diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..3bd14dd06b8170ed8820308cb74951c0d01c50c5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-7400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-7500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-7532/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..74bd120460692bb0e753677341dde9f04d12cda7 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +library_name: peft +license: other +base_model: Qwen/Qwen2.5-VL-7B-Instruct +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +pipeline_tag: text-generation +model-index: +- name: Qwen2.5-VL-7B-sft-generated + results: [] +--- + + + +# Qwen2.5-VL-7B-sft-generated + +This model is a fine-tuned version of [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) on the agent_sft_generated dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-05 +- train_batch_size: 4 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- gradient_accumulation_steps: 4 +- total_train_batch_size: 16 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 0.05 +- num_epochs: 2 + +### Training results + + + +### Framework versions + +- PEFT 0.18.1 +- Transformers 5.2.0 +- Pytorch 2.5.1+cu124 +- Datasets 4.0.0 +- Tokenizers 0.22.2 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e68fb35d77856a51c03fe5e97700fc3194faedb5 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.4.mlp.up_proj", + "k_proj", + "layers.16.mlp.up_proj", + "layers.15.mlp.down_proj", + "layers.2.mlp.down_proj", + "layers.14.mlp.up_proj", + "layers.26.mlp.down_proj", + "layers.1.mlp.up_proj", + "layers.14.mlp.down_proj", + "layers.20.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.12.mlp.up_proj", + "layers.10.mlp.up_proj", + "layers.12.mlp.gate_proj", + "layers.22.mlp.down_proj", + "layers.9.mlp.up_proj", + "layers.9.mlp.gate_proj", + "layers.19.mlp.up_proj", + "layers.22.mlp.gate_proj", + "v_proj", + "layers.15.mlp.up_proj", + "layers.21.mlp.up_proj", + "layers.6.mlp.up_proj", + "layers.0.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.11.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.17.mlp.gate_proj", + "layers.1.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.27.mlp.up_proj", + "layers.16.mlp.gate_proj", + "q_proj", + "layers.10.mlp.down_proj", + "layers.7.mlp.gate_proj", + "layers.0.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.22.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.25.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.25.mlp.down_proj", + "layers.13.mlp.down_proj", + "layers.2.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.3.mlp.gate_proj", + "layers.3.mlp.up_proj", + "layers.23.mlp.down_proj", + "o_proj", + "layers.6.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.20.mlp.gate_proj", + "layers.26.mlp.gate_proj", + "layers.24.mlp.down_proj", + "layers.20.mlp.down_proj", + "layers.24.mlp.up_proj", + "layers.0.mlp.gate_proj", + "layers.15.mlp.gate_proj", + "layers.4.mlp.down_proj", + "layers.8.mlp.gate_proj", + "layers.12.mlp.down_proj", + "layers.8.mlp.down_proj", + "layers.25.mlp.gate_proj", + "layers.3.mlp.down_proj", + "layers.11.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.16.mlp.down_proj", + "layers.26.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.10.mlp.gate_proj", + "layers.23.mlp.up_proj", + "layers.21.mlp.gate_proj", + "layers.13.mlp.up_proj", + "layers.18.mlp.gate_proj", + "layers.17.mlp.up_proj", + "layers.5.mlp.down_proj", + "layers.24.mlp.gate_proj", + "layers.4.mlp.gate_proj", + "layers.19.mlp.down_proj", + "layers.27.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.7.mlp.up_proj", + "layers.27.mlp.down_proj", + "layers.2.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.11.mlp.gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cb96688656e8f6e53f5d2b043739353eb69aa2d --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1994850f0e38a79ff5d308061112090688fcb346fcd1968cbb0e1c4b6b6f7e83 +size 323020440 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a574a917c05d1c53d5210b2a3351a5df4b586147 --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.0, + "total_flos": 5704003196682240.0, + "train_loss": 0.29768029879729163, + "train_runtime": 98000.2149, + "train_samples_per_second": 1.229, + "train_steps_per_second": 0.077 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-7400/README.md b/checkpoint-7400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-7400/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-7400/adapter_config.json b/checkpoint-7400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e68fb35d77856a51c03fe5e97700fc3194faedb5 --- /dev/null +++ b/checkpoint-7400/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.4.mlp.up_proj", + "k_proj", + "layers.16.mlp.up_proj", + "layers.15.mlp.down_proj", + "layers.2.mlp.down_proj", + "layers.14.mlp.up_proj", + "layers.26.mlp.down_proj", + "layers.1.mlp.up_proj", + "layers.14.mlp.down_proj", + "layers.20.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.12.mlp.up_proj", + "layers.10.mlp.up_proj", + "layers.12.mlp.gate_proj", + "layers.22.mlp.down_proj", + "layers.9.mlp.up_proj", + "layers.9.mlp.gate_proj", + "layers.19.mlp.up_proj", + "layers.22.mlp.gate_proj", + "v_proj", + "layers.15.mlp.up_proj", + "layers.21.mlp.up_proj", + "layers.6.mlp.up_proj", + "layers.0.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.11.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.17.mlp.gate_proj", + "layers.1.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.27.mlp.up_proj", + "layers.16.mlp.gate_proj", + "q_proj", + "layers.10.mlp.down_proj", + "layers.7.mlp.gate_proj", + "layers.0.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.22.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.25.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.25.mlp.down_proj", + "layers.13.mlp.down_proj", + "layers.2.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.3.mlp.gate_proj", + "layers.3.mlp.up_proj", + "layers.23.mlp.down_proj", + "o_proj", + "layers.6.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.20.mlp.gate_proj", + "layers.26.mlp.gate_proj", + "layers.24.mlp.down_proj", + "layers.20.mlp.down_proj", + "layers.24.mlp.up_proj", + "layers.0.mlp.gate_proj", + "layers.15.mlp.gate_proj", + "layers.4.mlp.down_proj", + "layers.8.mlp.gate_proj", + "layers.12.mlp.down_proj", + "layers.8.mlp.down_proj", + "layers.25.mlp.gate_proj", + "layers.3.mlp.down_proj", + "layers.11.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.16.mlp.down_proj", + "layers.26.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.10.mlp.gate_proj", + "layers.23.mlp.up_proj", + "layers.21.mlp.gate_proj", + "layers.13.mlp.up_proj", + "layers.18.mlp.gate_proj", + "layers.17.mlp.up_proj", + "layers.5.mlp.down_proj", + "layers.24.mlp.gate_proj", + "layers.4.mlp.gate_proj", + "layers.19.mlp.down_proj", + "layers.27.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.7.mlp.up_proj", + "layers.27.mlp.down_proj", + "layers.2.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.11.mlp.gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-7400/adapter_model.safetensors b/checkpoint-7400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..46f1d3ada17c59d545e50145eab39472066646a7 --- /dev/null +++ b/checkpoint-7400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad1f8c77dd3c2a97de016a28bbc1b53a6efa05ce54e3dec7b361358bd0548be3 +size 323020440 diff --git a/checkpoint-7400/chat_template.jinja b/checkpoint-7400/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-7400/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-7400/global_step7400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-7400/global_step7400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..11aa43e671e66f7b3a2bffa7f1513c7984a11487 --- /dev/null +++ b/checkpoint-7400/global_step7400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:512fd223fedf1c23c22c00497339f3770249a1728fdb30fe4f006c55b409991d +size 1937772272 diff --git a/checkpoint-7400/global_step7400/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-7400/global_step7400/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..eae52b1b20e560013b48f164b9aa72a92811d5a4 --- /dev/null +++ b/checkpoint-7400/global_step7400/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1bcbc7823af8766f5a2793d5b0074081bc4793a65700dbaaa5124937740e88b +size 460630 diff --git a/checkpoint-7400/latest b/checkpoint-7400/latest new file mode 100644 index 0000000000000000000000000000000000000000..5c95ca968b234f32f13ca2113b84d692d75cd2f2 --- /dev/null +++ b/checkpoint-7400/latest @@ -0,0 +1 @@ +global_step7400 \ No newline at end of file diff --git a/checkpoint-7400/processor_config.json b/checkpoint-7400/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-7400/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-7400/rng_state.pth b/checkpoint-7400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ed66a0e0253676ad3e734c97eced355f8d032787 --- /dev/null +++ b/checkpoint-7400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0f5a6c7e436c5aa714190a2f7ef6e6e21b9d879baeb33d1b54d5d93cb7a8def +size 14244 diff --git a/checkpoint-7400/scheduler.pt b/checkpoint-7400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a2ba66a07a3293d7d4ec75dd743c0aeda8847ab --- /dev/null +++ b/checkpoint-7400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fef204a44a83cd76c07413958056c773c5ae3f1f6a86e041ac74ef8a3adc681 +size 1000 diff --git a/checkpoint-7400/tokenizer.json b/checkpoint-7400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-7400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-7400/tokenizer_config.json b/checkpoint-7400/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-7400/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-7400/trainer_state.json b/checkpoint-7400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..135051d1246a569e18fa0dabcc33d43f120fd9cd --- /dev/null +++ b/checkpoint-7400/trainer_state.json @@ -0,0 +1,51834 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9650776789271012, + "eval_steps": 500, + "global_step": 7400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002655689815429558, + "grad_norm": 1.8881195832990014, + "learning_rate": 0.0, + "loss": 1.1502833366394043, + "step": 1 + }, + { + "epoch": 0.0005311379630859116, + "grad_norm": 1.77718785062999, + "learning_rate": 5.3050397877984086e-08, + "loss": 1.1698756217956543, + "step": 2 + }, + { + "epoch": 0.0007967069446288673, + "grad_norm": 1.6766718507101437, + "learning_rate": 1.0610079575596817e-07, + "loss": 1.1060130596160889, + "step": 3 + }, + { + "epoch": 0.0010622759261718232, + "grad_norm": 1.876053682165919, + "learning_rate": 1.5915119363395226e-07, + "loss": 1.1075276136398315, + "step": 4 + }, + { + "epoch": 0.001327844907714779, + "grad_norm": 1.88228417845019, + "learning_rate": 2.1220159151193635e-07, + "loss": 1.2153511047363281, + "step": 5 + }, + { + "epoch": 0.0015934138892577346, + "grad_norm": 1.9273368394845023, + "learning_rate": 2.6525198938992043e-07, + "loss": 1.1400426626205444, + "step": 6 + }, + { + "epoch": 0.0018589828708006906, + "grad_norm": 1.904814034912833, + "learning_rate": 3.183023872679045e-07, + "loss": 1.2070660591125488, + "step": 7 + }, + { + "epoch": 0.0021245518523436463, + "grad_norm": 1.7346381008587795, + "learning_rate": 3.713527851458886e-07, + "loss": 1.1614588499069214, + "step": 8 + }, + { + "epoch": 0.002390120833886602, + "grad_norm": 1.817032704311048, + "learning_rate": 4.244031830238727e-07, + "loss": 1.1739476919174194, + "step": 9 + }, + { + "epoch": 0.002655689815429558, + "grad_norm": 1.8291974144657501, + "learning_rate": 4.774535809018568e-07, + "loss": 1.1559171676635742, + "step": 10 + }, + { + "epoch": 0.0029212587969725135, + "grad_norm": 2.0039010539208744, + "learning_rate": 5.305039787798409e-07, + "loss": 1.2086225748062134, + "step": 11 + }, + { + "epoch": 0.0031868277785154693, + "grad_norm": 1.876026657216244, + "learning_rate": 5.83554376657825e-07, + "loss": 1.227709174156189, + "step": 12 + }, + { + "epoch": 0.003452396760058425, + "grad_norm": 2.0245192813139825, + "learning_rate": 6.36604774535809e-07, + "loss": 1.255577564239502, + "step": 13 + }, + { + "epoch": 0.003717965741601381, + "grad_norm": 1.8641260357218605, + "learning_rate": 6.896551724137931e-07, + "loss": 1.1953760385513306, + "step": 14 + }, + { + "epoch": 0.0039835347231443365, + "grad_norm": 1.9079733249323254, + "learning_rate": 7.427055702917772e-07, + "loss": 1.1325336694717407, + "step": 15 + }, + { + "epoch": 0.004249103704687293, + "grad_norm": 1.8230190567516942, + "learning_rate": 7.957559681697613e-07, + "loss": 1.232974648475647, + "step": 16 + }, + { + "epoch": 0.004514672686230248, + "grad_norm": 1.8532380418447003, + "learning_rate": 8.488063660477454e-07, + "loss": 1.1527395248413086, + "step": 17 + }, + { + "epoch": 0.004780241667773204, + "grad_norm": 1.986294801704247, + "learning_rate": 9.018567639257295e-07, + "loss": 1.151026964187622, + "step": 18 + }, + { + "epoch": 0.00504581064931616, + "grad_norm": 1.8048967405226255, + "learning_rate": 9.549071618037136e-07, + "loss": 1.155288815498352, + "step": 19 + }, + { + "epoch": 0.005311379630859116, + "grad_norm": 2.1631450267380767, + "learning_rate": 1.0079575596816979e-06, + "loss": 1.183434009552002, + "step": 20 + }, + { + "epoch": 0.005576948612402072, + "grad_norm": 1.88758019498484, + "learning_rate": 1.0610079575596817e-06, + "loss": 1.161030650138855, + "step": 21 + }, + { + "epoch": 0.005842517593945027, + "grad_norm": 1.9605989446426395, + "learning_rate": 1.1140583554376658e-06, + "loss": 1.123382806777954, + "step": 22 + }, + { + "epoch": 0.006108086575487983, + "grad_norm": 2.2042020560619306, + "learning_rate": 1.16710875331565e-06, + "loss": 1.238707423210144, + "step": 23 + }, + { + "epoch": 0.0063736555570309385, + "grad_norm": 2.289866056000848, + "learning_rate": 1.220159151193634e-06, + "loss": 1.2058464288711548, + "step": 24 + }, + { + "epoch": 0.006639224538573895, + "grad_norm": 2.724214643619529, + "learning_rate": 1.273209549071618e-06, + "loss": 1.2351092100143433, + "step": 25 + }, + { + "epoch": 0.00690479352011685, + "grad_norm": 2.5088520951326028, + "learning_rate": 1.3262599469496024e-06, + "loss": 1.1739860773086548, + "step": 26 + }, + { + "epoch": 0.007170362501659806, + "grad_norm": 2.3243798435890155, + "learning_rate": 1.3793103448275862e-06, + "loss": 1.1407617330551147, + "step": 27 + }, + { + "epoch": 0.007435931483202762, + "grad_norm": 2.533007430657115, + "learning_rate": 1.4323607427055705e-06, + "loss": 1.1844531297683716, + "step": 28 + }, + { + "epoch": 0.007701500464745718, + "grad_norm": 2.4702075978733804, + "learning_rate": 1.4854111405835544e-06, + "loss": 1.1293678283691406, + "step": 29 + }, + { + "epoch": 0.007967069446288673, + "grad_norm": 3.0873404038783963, + "learning_rate": 1.5384615384615387e-06, + "loss": 1.1310899257659912, + "step": 30 + }, + { + "epoch": 0.00823263842783163, + "grad_norm": 2.7098364862500013, + "learning_rate": 1.5915119363395226e-06, + "loss": 1.1015795469284058, + "step": 31 + }, + { + "epoch": 0.008498207409374585, + "grad_norm": 2.8074949689582476, + "learning_rate": 1.6445623342175069e-06, + "loss": 1.0756056308746338, + "step": 32 + }, + { + "epoch": 0.00876377639091754, + "grad_norm": 3.1563034348975676, + "learning_rate": 1.6976127320954908e-06, + "loss": 1.1496126651763916, + "step": 33 + }, + { + "epoch": 0.009029345372460496, + "grad_norm": 2.842390896608423, + "learning_rate": 1.750663129973475e-06, + "loss": 1.203465461730957, + "step": 34 + }, + { + "epoch": 0.009294914354003453, + "grad_norm": 2.6747271223349753, + "learning_rate": 1.803713527851459e-06, + "loss": 1.0613923072814941, + "step": 35 + }, + { + "epoch": 0.009560483335546408, + "grad_norm": 2.146709655536541, + "learning_rate": 1.8567639257294432e-06, + "loss": 1.06027090549469, + "step": 36 + }, + { + "epoch": 0.009826052317089363, + "grad_norm": 1.9942495143394863, + "learning_rate": 1.909814323607427e-06, + "loss": 1.0508522987365723, + "step": 37 + }, + { + "epoch": 0.01009162129863232, + "grad_norm": 2.1704927298148107, + "learning_rate": 1.9628647214854114e-06, + "loss": 1.0353929996490479, + "step": 38 + }, + { + "epoch": 0.010357190280175276, + "grad_norm": 1.8252380884349957, + "learning_rate": 2.0159151193633957e-06, + "loss": 0.9974027276039124, + "step": 39 + }, + { + "epoch": 0.010622759261718231, + "grad_norm": 1.7188806752497834, + "learning_rate": 2.0689655172413796e-06, + "loss": 1.0849467515945435, + "step": 40 + }, + { + "epoch": 0.010888328243261186, + "grad_norm": 1.3692667089198218, + "learning_rate": 2.1220159151193635e-06, + "loss": 1.005434274673462, + "step": 41 + }, + { + "epoch": 0.011153897224804143, + "grad_norm": 1.3465343019370317, + "learning_rate": 2.1750663129973478e-06, + "loss": 1.052631139755249, + "step": 42 + }, + { + "epoch": 0.011419466206347099, + "grad_norm": 1.352421126005469, + "learning_rate": 2.2281167108753316e-06, + "loss": 0.9470957517623901, + "step": 43 + }, + { + "epoch": 0.011685035187890054, + "grad_norm": 1.2219308328594767, + "learning_rate": 2.281167108753316e-06, + "loss": 0.9865130186080933, + "step": 44 + }, + { + "epoch": 0.01195060416943301, + "grad_norm": 1.19161259271228, + "learning_rate": 2.3342175066313e-06, + "loss": 0.9405577778816223, + "step": 45 + }, + { + "epoch": 0.012216173150975966, + "grad_norm": 1.1603073869733838, + "learning_rate": 2.387267904509284e-06, + "loss": 0.9418795108795166, + "step": 46 + }, + { + "epoch": 0.012481742132518922, + "grad_norm": 1.1897328813812988, + "learning_rate": 2.440318302387268e-06, + "loss": 0.9841142892837524, + "step": 47 + }, + { + "epoch": 0.012747311114061877, + "grad_norm": 1.159720101499262, + "learning_rate": 2.4933687002652523e-06, + "loss": 0.9412609338760376, + "step": 48 + }, + { + "epoch": 0.013012880095604834, + "grad_norm": 1.1421347262548374, + "learning_rate": 2.546419098143236e-06, + "loss": 0.9239889979362488, + "step": 49 + }, + { + "epoch": 0.01327844907714779, + "grad_norm": 1.144363453746544, + "learning_rate": 2.59946949602122e-06, + "loss": 0.9212941527366638, + "step": 50 + }, + { + "epoch": 0.013544018058690745, + "grad_norm": 0.9916816911141796, + "learning_rate": 2.6525198938992047e-06, + "loss": 0.8863773345947266, + "step": 51 + }, + { + "epoch": 0.0138095870402337, + "grad_norm": 0.9890613082667745, + "learning_rate": 2.7055702917771886e-06, + "loss": 0.8990404009819031, + "step": 52 + }, + { + "epoch": 0.014075156021776657, + "grad_norm": 1.1123466462737277, + "learning_rate": 2.7586206896551725e-06, + "loss": 0.9257171154022217, + "step": 53 + }, + { + "epoch": 0.014340725003319612, + "grad_norm": 0.8689931750055545, + "learning_rate": 2.8116710875331564e-06, + "loss": 0.8239601254463196, + "step": 54 + }, + { + "epoch": 0.014606293984862568, + "grad_norm": 0.9936229603029793, + "learning_rate": 2.864721485411141e-06, + "loss": 0.8656830787658691, + "step": 55 + }, + { + "epoch": 0.014871862966405525, + "grad_norm": 1.0202371081091262, + "learning_rate": 2.917771883289125e-06, + "loss": 0.9470342397689819, + "step": 56 + }, + { + "epoch": 0.01513743194794848, + "grad_norm": 0.9663900963956384, + "learning_rate": 2.970822281167109e-06, + "loss": 0.8699859976768494, + "step": 57 + }, + { + "epoch": 0.015403000929491435, + "grad_norm": 0.940263545207204, + "learning_rate": 3.0238726790450927e-06, + "loss": 0.8668704628944397, + "step": 58 + }, + { + "epoch": 0.01566856991103439, + "grad_norm": 0.9865381848251076, + "learning_rate": 3.0769230769230774e-06, + "loss": 0.841624915599823, + "step": 59 + }, + { + "epoch": 0.015934138892577346, + "grad_norm": 0.8909972421095332, + "learning_rate": 3.1299734748010613e-06, + "loss": 0.8412661552429199, + "step": 60 + }, + { + "epoch": 0.0161997078741203, + "grad_norm": 0.8771283277278942, + "learning_rate": 3.183023872679045e-06, + "loss": 0.818957507610321, + "step": 61 + }, + { + "epoch": 0.01646527685566326, + "grad_norm": 0.9190140482494583, + "learning_rate": 3.23607427055703e-06, + "loss": 0.8030763268470764, + "step": 62 + }, + { + "epoch": 0.016730845837206215, + "grad_norm": 0.8839367067386452, + "learning_rate": 3.2891246684350138e-06, + "loss": 0.7869359850883484, + "step": 63 + }, + { + "epoch": 0.01699641481874917, + "grad_norm": 0.8058255896640879, + "learning_rate": 3.3421750663129977e-06, + "loss": 0.7912170886993408, + "step": 64 + }, + { + "epoch": 0.017261983800292126, + "grad_norm": 0.8538938403853334, + "learning_rate": 3.3952254641909815e-06, + "loss": 0.7736695408821106, + "step": 65 + }, + { + "epoch": 0.01752755278183508, + "grad_norm": 0.8652625375848492, + "learning_rate": 3.448275862068966e-06, + "loss": 0.768275260925293, + "step": 66 + }, + { + "epoch": 0.017793121763378036, + "grad_norm": 0.8691478661970735, + "learning_rate": 3.50132625994695e-06, + "loss": 0.7210639119148254, + "step": 67 + }, + { + "epoch": 0.01805869074492099, + "grad_norm": 0.8378031795839386, + "learning_rate": 3.554376657824934e-06, + "loss": 0.7488028407096863, + "step": 68 + }, + { + "epoch": 0.01832425972646395, + "grad_norm": 0.8943989597273122, + "learning_rate": 3.607427055702918e-06, + "loss": 0.7329621911048889, + "step": 69 + }, + { + "epoch": 0.018589828708006906, + "grad_norm": 0.92104620358882, + "learning_rate": 3.660477453580902e-06, + "loss": 0.7270619869232178, + "step": 70 + }, + { + "epoch": 0.01885539768954986, + "grad_norm": 0.9782498013554233, + "learning_rate": 3.7135278514588865e-06, + "loss": 0.7271254658699036, + "step": 71 + }, + { + "epoch": 0.019120966671092816, + "grad_norm": 0.9115603845811348, + "learning_rate": 3.7665782493368703e-06, + "loss": 0.787033200263977, + "step": 72 + }, + { + "epoch": 0.01938653565263577, + "grad_norm": 0.8604692726067453, + "learning_rate": 3.819628647214854e-06, + "loss": 0.7049479484558105, + "step": 73 + }, + { + "epoch": 0.019652104634178727, + "grad_norm": 0.8610577281688413, + "learning_rate": 3.8726790450928385e-06, + "loss": 0.7146892547607422, + "step": 74 + }, + { + "epoch": 0.019917673615721682, + "grad_norm": 0.7602187567662452, + "learning_rate": 3.925729442970823e-06, + "loss": 0.7212516069412231, + "step": 75 + }, + { + "epoch": 0.02018324259726464, + "grad_norm": 0.6842508042039768, + "learning_rate": 3.978779840848806e-06, + "loss": 0.6612375378608704, + "step": 76 + }, + { + "epoch": 0.020448811578807596, + "grad_norm": 0.7781006919053841, + "learning_rate": 4.031830238726791e-06, + "loss": 0.7038244605064392, + "step": 77 + }, + { + "epoch": 0.02071438056035055, + "grad_norm": 0.7186592057129139, + "learning_rate": 4.084880636604775e-06, + "loss": 0.7081903219223022, + "step": 78 + }, + { + "epoch": 0.020979949541893507, + "grad_norm": 0.7655954113403886, + "learning_rate": 4.137931034482759e-06, + "loss": 0.7079841494560242, + "step": 79 + }, + { + "epoch": 0.021245518523436462, + "grad_norm": 0.7149787673446053, + "learning_rate": 4.190981432360743e-06, + "loss": 0.7090641260147095, + "step": 80 + }, + { + "epoch": 0.021511087504979418, + "grad_norm": 0.6657837070384769, + "learning_rate": 4.244031830238727e-06, + "loss": 0.6632575988769531, + "step": 81 + }, + { + "epoch": 0.021776656486522373, + "grad_norm": 0.6666401713606211, + "learning_rate": 4.297082228116711e-06, + "loss": 0.7231097221374512, + "step": 82 + }, + { + "epoch": 0.02204222546806533, + "grad_norm": 0.6804476609839887, + "learning_rate": 4.3501326259946955e-06, + "loss": 0.6696034669876099, + "step": 83 + }, + { + "epoch": 0.022307794449608287, + "grad_norm": 0.7073638927991296, + "learning_rate": 4.403183023872679e-06, + "loss": 0.7550696134567261, + "step": 84 + }, + { + "epoch": 0.022573363431151242, + "grad_norm": 0.7064770122504733, + "learning_rate": 4.456233421750663e-06, + "loss": 0.671328067779541, + "step": 85 + }, + { + "epoch": 0.022838932412694198, + "grad_norm": 0.6506139330803743, + "learning_rate": 4.5092838196286476e-06, + "loss": 0.6864410638809204, + "step": 86 + }, + { + "epoch": 0.023104501394237153, + "grad_norm": 0.6642837777732639, + "learning_rate": 4.562334217506632e-06, + "loss": 0.6870769262313843, + "step": 87 + }, + { + "epoch": 0.023370070375780108, + "grad_norm": 0.6947506894199804, + "learning_rate": 4.615384615384616e-06, + "loss": 0.6539690494537354, + "step": 88 + }, + { + "epoch": 0.023635639357323063, + "grad_norm": 0.6446743321890098, + "learning_rate": 4.6684350132626e-06, + "loss": 0.6946991086006165, + "step": 89 + }, + { + "epoch": 0.02390120833886602, + "grad_norm": 0.6384512383480915, + "learning_rate": 4.721485411140584e-06, + "loss": 0.6177583932876587, + "step": 90 + }, + { + "epoch": 0.024166777320408978, + "grad_norm": 0.7150510018442997, + "learning_rate": 4.774535809018568e-06, + "loss": 0.6890037059783936, + "step": 91 + }, + { + "epoch": 0.024432346301951933, + "grad_norm": 0.6592991709316253, + "learning_rate": 4.8275862068965525e-06, + "loss": 0.6563063263893127, + "step": 92 + }, + { + "epoch": 0.024697915283494888, + "grad_norm": 0.6897740926797078, + "learning_rate": 4.880636604774536e-06, + "loss": 0.714318573474884, + "step": 93 + }, + { + "epoch": 0.024963484265037843, + "grad_norm": 0.6433596226177777, + "learning_rate": 4.93368700265252e-06, + "loss": 0.6720882654190063, + "step": 94 + }, + { + "epoch": 0.0252290532465808, + "grad_norm": 0.5910528348002435, + "learning_rate": 4.9867374005305045e-06, + "loss": 0.602899968624115, + "step": 95 + }, + { + "epoch": 0.025494622228123754, + "grad_norm": 0.6635651676723159, + "learning_rate": 5.039787798408489e-06, + "loss": 0.6628841161727905, + "step": 96 + }, + { + "epoch": 0.02576019120966671, + "grad_norm": 0.6070065577903714, + "learning_rate": 5.092838196286472e-06, + "loss": 0.6486932635307312, + "step": 97 + }, + { + "epoch": 0.026025760191209668, + "grad_norm": 0.6484848126679549, + "learning_rate": 5.145888594164457e-06, + "loss": 0.6719033122062683, + "step": 98 + }, + { + "epoch": 0.026291329172752623, + "grad_norm": 0.6856934201881044, + "learning_rate": 5.19893899204244e-06, + "loss": 0.6818530559539795, + "step": 99 + }, + { + "epoch": 0.02655689815429558, + "grad_norm": 0.6204811558305167, + "learning_rate": 5.251989389920424e-06, + "loss": 0.6306912899017334, + "step": 100 + }, + { + "epoch": 0.026822467135838534, + "grad_norm": 0.7820574736690976, + "learning_rate": 5.3050397877984095e-06, + "loss": 0.5952945351600647, + "step": 101 + }, + { + "epoch": 0.02708803611738149, + "grad_norm": 0.6546243503849497, + "learning_rate": 5.358090185676394e-06, + "loss": 0.6566107273101807, + "step": 102 + }, + { + "epoch": 0.027353605098924445, + "grad_norm": 0.707921645301647, + "learning_rate": 5.411140583554377e-06, + "loss": 0.6981694102287292, + "step": 103 + }, + { + "epoch": 0.0276191740804674, + "grad_norm": 0.6375441067969543, + "learning_rate": 5.4641909814323615e-06, + "loss": 0.6231328248977661, + "step": 104 + }, + { + "epoch": 0.02788474306201036, + "grad_norm": 0.6964560869475424, + "learning_rate": 5.517241379310345e-06, + "loss": 0.6414977312088013, + "step": 105 + }, + { + "epoch": 0.028150312043553314, + "grad_norm": 0.6835502446580011, + "learning_rate": 5.570291777188329e-06, + "loss": 0.6335234642028809, + "step": 106 + }, + { + "epoch": 0.02841588102509627, + "grad_norm": 0.6248033284508979, + "learning_rate": 5.623342175066313e-06, + "loss": 0.6040852665901184, + "step": 107 + }, + { + "epoch": 0.028681450006639225, + "grad_norm": 0.6645474785171195, + "learning_rate": 5.676392572944297e-06, + "loss": 0.6011114716529846, + "step": 108 + }, + { + "epoch": 0.02894701898818218, + "grad_norm": 0.655106623405533, + "learning_rate": 5.729442970822282e-06, + "loss": 0.6042627096176147, + "step": 109 + }, + { + "epoch": 0.029212587969725135, + "grad_norm": 0.720208539355598, + "learning_rate": 5.782493368700266e-06, + "loss": 0.6183412671089172, + "step": 110 + }, + { + "epoch": 0.02947815695126809, + "grad_norm": 0.6666287454908232, + "learning_rate": 5.83554376657825e-06, + "loss": 0.6150818467140198, + "step": 111 + }, + { + "epoch": 0.02974372593281105, + "grad_norm": 0.6840692324124527, + "learning_rate": 5.888594164456234e-06, + "loss": 0.6202039122581482, + "step": 112 + }, + { + "epoch": 0.030009294914354005, + "grad_norm": 0.6626407253242022, + "learning_rate": 5.941644562334218e-06, + "loss": 0.6334809064865112, + "step": 113 + }, + { + "epoch": 0.03027486389589696, + "grad_norm": 0.6319419097399773, + "learning_rate": 5.994694960212202e-06, + "loss": 0.5728089809417725, + "step": 114 + }, + { + "epoch": 0.030540432877439915, + "grad_norm": 0.6988175213443283, + "learning_rate": 6.0477453580901854e-06, + "loss": 0.6884603500366211, + "step": 115 + }, + { + "epoch": 0.03080600185898287, + "grad_norm": 0.6618120552387852, + "learning_rate": 6.1007957559681706e-06, + "loss": 0.5619829893112183, + "step": 116 + }, + { + "epoch": 0.031071570840525826, + "grad_norm": 0.6756012639437595, + "learning_rate": 6.153846153846155e-06, + "loss": 0.6224710941314697, + "step": 117 + }, + { + "epoch": 0.03133713982206878, + "grad_norm": 0.7208355833756769, + "learning_rate": 6.206896551724138e-06, + "loss": 0.6119496822357178, + "step": 118 + }, + { + "epoch": 0.03160270880361174, + "grad_norm": 0.6917782946677038, + "learning_rate": 6.259946949602123e-06, + "loss": 0.6190857887268066, + "step": 119 + }, + { + "epoch": 0.03186827778515469, + "grad_norm": 0.6704531181022263, + "learning_rate": 6.312997347480107e-06, + "loss": 0.6460769176483154, + "step": 120 + }, + { + "epoch": 0.03213384676669765, + "grad_norm": 0.7493511248909543, + "learning_rate": 6.36604774535809e-06, + "loss": 0.6148796677589417, + "step": 121 + }, + { + "epoch": 0.0323994157482406, + "grad_norm": 0.6359613412994526, + "learning_rate": 6.419098143236075e-06, + "loss": 0.558960497379303, + "step": 122 + }, + { + "epoch": 0.03266498472978356, + "grad_norm": 0.6785691051694177, + "learning_rate": 6.47214854111406e-06, + "loss": 0.5844984650611877, + "step": 123 + }, + { + "epoch": 0.03293055371132652, + "grad_norm": 0.6692815537253501, + "learning_rate": 6.525198938992043e-06, + "loss": 0.5343623161315918, + "step": 124 + }, + { + "epoch": 0.03319612269286947, + "grad_norm": 0.6705726789318588, + "learning_rate": 6.5782493368700276e-06, + "loss": 0.5834348797798157, + "step": 125 + }, + { + "epoch": 0.03346169167441243, + "grad_norm": 0.7626576562771024, + "learning_rate": 6.631299734748011e-06, + "loss": 0.5997360944747925, + "step": 126 + }, + { + "epoch": 0.03372726065595538, + "grad_norm": 0.7117893752859364, + "learning_rate": 6.684350132625995e-06, + "loss": 0.5991666316986084, + "step": 127 + }, + { + "epoch": 0.03399282963749834, + "grad_norm": 0.7060406683837459, + "learning_rate": 6.737400530503979e-06, + "loss": 0.581120491027832, + "step": 128 + }, + { + "epoch": 0.03425839861904129, + "grad_norm": 0.6869761252397286, + "learning_rate": 6.790450928381963e-06, + "loss": 0.6219569444656372, + "step": 129 + }, + { + "epoch": 0.03452396760058425, + "grad_norm": 0.6916173566260286, + "learning_rate": 6.843501326259947e-06, + "loss": 0.5950608253479004, + "step": 130 + }, + { + "epoch": 0.03478953658212721, + "grad_norm": 0.6136480902733893, + "learning_rate": 6.896551724137932e-06, + "loss": 0.5762747526168823, + "step": 131 + }, + { + "epoch": 0.03505510556367016, + "grad_norm": 0.670368708945713, + "learning_rate": 6.949602122015916e-06, + "loss": 0.6003131866455078, + "step": 132 + }, + { + "epoch": 0.03532067454521312, + "grad_norm": 0.6439028776339482, + "learning_rate": 7.0026525198939e-06, + "loss": 0.5866605043411255, + "step": 133 + }, + { + "epoch": 0.03558624352675607, + "grad_norm": 0.8324202287699098, + "learning_rate": 7.055702917771884e-06, + "loss": 0.6668443083763123, + "step": 134 + }, + { + "epoch": 0.03585181250829903, + "grad_norm": 0.7064456856515898, + "learning_rate": 7.108753315649868e-06, + "loss": 0.5738306045532227, + "step": 135 + }, + { + "epoch": 0.03611738148984198, + "grad_norm": 0.6941604370641007, + "learning_rate": 7.1618037135278515e-06, + "loss": 0.5774663686752319, + "step": 136 + }, + { + "epoch": 0.03638295047138494, + "grad_norm": 0.7648336305672251, + "learning_rate": 7.214854111405836e-06, + "loss": 0.5721150636672974, + "step": 137 + }, + { + "epoch": 0.0366485194529279, + "grad_norm": 0.7394576462203543, + "learning_rate": 7.267904509283821e-06, + "loss": 0.6350122690200806, + "step": 138 + }, + { + "epoch": 0.03691408843447085, + "grad_norm": 0.6540602529440619, + "learning_rate": 7.320954907161804e-06, + "loss": 0.5435039401054382, + "step": 139 + }, + { + "epoch": 0.03717965741601381, + "grad_norm": 0.6965351191908165, + "learning_rate": 7.374005305039789e-06, + "loss": 0.5869162678718567, + "step": 140 + }, + { + "epoch": 0.03744522639755676, + "grad_norm": 0.6664228073022063, + "learning_rate": 7.427055702917773e-06, + "loss": 0.5645807981491089, + "step": 141 + }, + { + "epoch": 0.03771079537909972, + "grad_norm": 0.6503771775205762, + "learning_rate": 7.480106100795756e-06, + "loss": 0.5502692461013794, + "step": 142 + }, + { + "epoch": 0.037976364360642674, + "grad_norm": 0.6223645459397411, + "learning_rate": 7.533156498673741e-06, + "loss": 0.5602732300758362, + "step": 143 + }, + { + "epoch": 0.03824193334218563, + "grad_norm": 0.8638951879324807, + "learning_rate": 7.586206896551724e-06, + "loss": 0.6011391282081604, + "step": 144 + }, + { + "epoch": 0.03850750232372859, + "grad_norm": 0.6930636234613441, + "learning_rate": 7.639257294429708e-06, + "loss": 0.5482327938079834, + "step": 145 + }, + { + "epoch": 0.03877307130527154, + "grad_norm": 0.6693652199128735, + "learning_rate": 7.692307692307694e-06, + "loss": 0.5926344394683838, + "step": 146 + }, + { + "epoch": 0.0390386402868145, + "grad_norm": 0.8434991800954339, + "learning_rate": 7.745358090185677e-06, + "loss": 0.6558316946029663, + "step": 147 + }, + { + "epoch": 0.039304209268357454, + "grad_norm": 0.6845819362079449, + "learning_rate": 7.79840848806366e-06, + "loss": 0.572425365447998, + "step": 148 + }, + { + "epoch": 0.03956977824990041, + "grad_norm": 0.696296152543372, + "learning_rate": 7.851458885941646e-06, + "loss": 0.5684784650802612, + "step": 149 + }, + { + "epoch": 0.039835347231443365, + "grad_norm": 0.6779490529346879, + "learning_rate": 7.904509283819629e-06, + "loss": 0.5843643546104431, + "step": 150 + }, + { + "epoch": 0.04010091621298632, + "grad_norm": 0.6894842979231472, + "learning_rate": 7.957559681697613e-06, + "loss": 0.5471494793891907, + "step": 151 + }, + { + "epoch": 0.04036648519452928, + "grad_norm": 0.7583250211136208, + "learning_rate": 8.010610079575598e-06, + "loss": 0.595018744468689, + "step": 152 + }, + { + "epoch": 0.040632054176072234, + "grad_norm": 0.6904128122756304, + "learning_rate": 8.063660477453583e-06, + "loss": 0.5431865453720093, + "step": 153 + }, + { + "epoch": 0.04089762315761519, + "grad_norm": 0.7943246581886504, + "learning_rate": 8.116710875331566e-06, + "loss": 0.5622385740280151, + "step": 154 + }, + { + "epoch": 0.041163192139158145, + "grad_norm": 0.7792002007338675, + "learning_rate": 8.16976127320955e-06, + "loss": 0.5795880556106567, + "step": 155 + }, + { + "epoch": 0.0414287611207011, + "grad_norm": 0.7432143976693507, + "learning_rate": 8.222811671087533e-06, + "loss": 0.5854965448379517, + "step": 156 + }, + { + "epoch": 0.041694330102244055, + "grad_norm": 0.8104825185442435, + "learning_rate": 8.275862068965518e-06, + "loss": 0.5374501943588257, + "step": 157 + }, + { + "epoch": 0.041959899083787014, + "grad_norm": 0.7598674115735401, + "learning_rate": 8.328912466843502e-06, + "loss": 0.5779006481170654, + "step": 158 + }, + { + "epoch": 0.04222546806532997, + "grad_norm": 0.7033741631796787, + "learning_rate": 8.381962864721485e-06, + "loss": 0.550236701965332, + "step": 159 + }, + { + "epoch": 0.042491037046872925, + "grad_norm": 0.7285453499901458, + "learning_rate": 8.43501326259947e-06, + "loss": 0.557443380355835, + "step": 160 + }, + { + "epoch": 0.04275660602841588, + "grad_norm": 0.7050753960524794, + "learning_rate": 8.488063660477454e-06, + "loss": 0.5875238180160522, + "step": 161 + }, + { + "epoch": 0.043022175009958835, + "grad_norm": 0.7215582793376403, + "learning_rate": 8.541114058355439e-06, + "loss": 0.510900616645813, + "step": 162 + }, + { + "epoch": 0.043287743991501794, + "grad_norm": 0.7559114001900116, + "learning_rate": 8.594164456233422e-06, + "loss": 0.5465859174728394, + "step": 163 + }, + { + "epoch": 0.043553312973044746, + "grad_norm": 0.7494489908601825, + "learning_rate": 8.647214854111406e-06, + "loss": 0.5508615970611572, + "step": 164 + }, + { + "epoch": 0.043818881954587705, + "grad_norm": 0.7714387963397975, + "learning_rate": 8.700265251989391e-06, + "loss": 0.5437714457511902, + "step": 165 + }, + { + "epoch": 0.04408445093613066, + "grad_norm": 0.7480600693956645, + "learning_rate": 8.753315649867374e-06, + "loss": 0.542698323726654, + "step": 166 + }, + { + "epoch": 0.044350019917673615, + "grad_norm": 0.7339141407878966, + "learning_rate": 8.806366047745358e-06, + "loss": 0.5169371962547302, + "step": 167 + }, + { + "epoch": 0.044615588899216574, + "grad_norm": 0.725595419270195, + "learning_rate": 8.859416445623343e-06, + "loss": 0.5436176061630249, + "step": 168 + }, + { + "epoch": 0.044881157880759526, + "grad_norm": 0.8205411933516983, + "learning_rate": 8.912466843501327e-06, + "loss": 0.568030834197998, + "step": 169 + }, + { + "epoch": 0.045146726862302484, + "grad_norm": 0.7544356200090666, + "learning_rate": 8.965517241379312e-06, + "loss": 0.5218889713287354, + "step": 170 + }, + { + "epoch": 0.045412295843845436, + "grad_norm": 0.7860957525035722, + "learning_rate": 9.018567639257295e-06, + "loss": 0.5275779962539673, + "step": 171 + }, + { + "epoch": 0.045677864825388395, + "grad_norm": 0.6938225497373272, + "learning_rate": 9.071618037135279e-06, + "loss": 0.5263184905052185, + "step": 172 + }, + { + "epoch": 0.045943433806931354, + "grad_norm": 0.7549069812662602, + "learning_rate": 9.124668435013264e-06, + "loss": 0.563044548034668, + "step": 173 + }, + { + "epoch": 0.046209002788474306, + "grad_norm": 0.9364041083837341, + "learning_rate": 9.177718832891247e-06, + "loss": 0.5896912217140198, + "step": 174 + }, + { + "epoch": 0.046474571770017264, + "grad_norm": 0.7219752548557496, + "learning_rate": 9.230769230769232e-06, + "loss": 0.5163949131965637, + "step": 175 + }, + { + "epoch": 0.046740140751560216, + "grad_norm": 0.8391633255974319, + "learning_rate": 9.283819628647216e-06, + "loss": 0.6203320026397705, + "step": 176 + }, + { + "epoch": 0.047005709733103175, + "grad_norm": 0.9119997852547688, + "learning_rate": 9.3368700265252e-06, + "loss": 0.5528024435043335, + "step": 177 + }, + { + "epoch": 0.04727127871464613, + "grad_norm": 0.8828541610102935, + "learning_rate": 9.389920424403184e-06, + "loss": 0.5657555460929871, + "step": 178 + }, + { + "epoch": 0.047536847696189086, + "grad_norm": 0.7671789386737649, + "learning_rate": 9.442970822281168e-06, + "loss": 0.5301925539970398, + "step": 179 + }, + { + "epoch": 0.04780241667773204, + "grad_norm": 0.8675940797859782, + "learning_rate": 9.496021220159151e-06, + "loss": 0.5388369560241699, + "step": 180 + }, + { + "epoch": 0.048067985659274996, + "grad_norm": 0.7966332028310692, + "learning_rate": 9.549071618037136e-06, + "loss": 0.5549717545509338, + "step": 181 + }, + { + "epoch": 0.048333554640817955, + "grad_norm": 0.8814678011939608, + "learning_rate": 9.60212201591512e-06, + "loss": 0.5959764719009399, + "step": 182 + }, + { + "epoch": 0.04859912362236091, + "grad_norm": 0.7841222204736121, + "learning_rate": 9.655172413793105e-06, + "loss": 0.5461844205856323, + "step": 183 + }, + { + "epoch": 0.048864692603903866, + "grad_norm": 0.7620084886447284, + "learning_rate": 9.708222811671088e-06, + "loss": 0.5428494811058044, + "step": 184 + }, + { + "epoch": 0.04913026158544682, + "grad_norm": 0.7918991595575344, + "learning_rate": 9.761273209549072e-06, + "loss": 0.552198052406311, + "step": 185 + }, + { + "epoch": 0.049395830566989776, + "grad_norm": 0.6896394660507362, + "learning_rate": 9.814323607427057e-06, + "loss": 0.49992549419403076, + "step": 186 + }, + { + "epoch": 0.04966139954853273, + "grad_norm": 0.7875507527713166, + "learning_rate": 9.86737400530504e-06, + "loss": 0.557820200920105, + "step": 187 + }, + { + "epoch": 0.04992696853007569, + "grad_norm": 0.8883719893129148, + "learning_rate": 9.920424403183024e-06, + "loss": 0.5238749384880066, + "step": 188 + }, + { + "epoch": 0.050192537511618646, + "grad_norm": 0.988465476825029, + "learning_rate": 9.973474801061009e-06, + "loss": 0.5346978902816772, + "step": 189 + }, + { + "epoch": 0.0504581064931616, + "grad_norm": 0.8024883433630577, + "learning_rate": 1.0026525198938993e-05, + "loss": 0.5256577730178833, + "step": 190 + }, + { + "epoch": 0.050723675474704556, + "grad_norm": 0.8026852335394901, + "learning_rate": 1.0079575596816978e-05, + "loss": 0.5235393047332764, + "step": 191 + }, + { + "epoch": 0.05098924445624751, + "grad_norm": 0.6835673591276205, + "learning_rate": 1.013262599469496e-05, + "loss": 0.4984837472438812, + "step": 192 + }, + { + "epoch": 0.05125481343779047, + "grad_norm": 0.7829913352817355, + "learning_rate": 1.0185676392572945e-05, + "loss": 0.5209602117538452, + "step": 193 + }, + { + "epoch": 0.05152038241933342, + "grad_norm": 0.8334733472253096, + "learning_rate": 1.023872679045093e-05, + "loss": 0.5468267202377319, + "step": 194 + }, + { + "epoch": 0.05178595140087638, + "grad_norm": 0.8107908645155819, + "learning_rate": 1.0291777188328913e-05, + "loss": 0.5531667470932007, + "step": 195 + }, + { + "epoch": 0.052051520382419336, + "grad_norm": 0.8437904919697584, + "learning_rate": 1.0344827586206898e-05, + "loss": 0.5741526484489441, + "step": 196 + }, + { + "epoch": 0.05231708936396229, + "grad_norm": 0.6830882515315945, + "learning_rate": 1.039787798408488e-05, + "loss": 0.46132561564445496, + "step": 197 + }, + { + "epoch": 0.05258265834550525, + "grad_norm": 0.8402230890409916, + "learning_rate": 1.0450928381962865e-05, + "loss": 0.5074198842048645, + "step": 198 + }, + { + "epoch": 0.0528482273270482, + "grad_norm": 0.7476727742688456, + "learning_rate": 1.0503978779840849e-05, + "loss": 0.5193089842796326, + "step": 199 + }, + { + "epoch": 0.05311379630859116, + "grad_norm": 0.7814745235248249, + "learning_rate": 1.0557029177718834e-05, + "loss": 0.5209243297576904, + "step": 200 + }, + { + "epoch": 0.05337936529013411, + "grad_norm": 0.8844918483638834, + "learning_rate": 1.0610079575596819e-05, + "loss": 0.5607191920280457, + "step": 201 + }, + { + "epoch": 0.05364493427167707, + "grad_norm": 0.7926104097207243, + "learning_rate": 1.0663129973474802e-05, + "loss": 0.5482805371284485, + "step": 202 + }, + { + "epoch": 0.05391050325322003, + "grad_norm": 0.8109463956858287, + "learning_rate": 1.0716180371352788e-05, + "loss": 0.5579961538314819, + "step": 203 + }, + { + "epoch": 0.05417607223476298, + "grad_norm": 0.8246893162942163, + "learning_rate": 1.076923076923077e-05, + "loss": 0.5119072794914246, + "step": 204 + }, + { + "epoch": 0.05444164121630594, + "grad_norm": 0.8293246958439139, + "learning_rate": 1.0822281167108754e-05, + "loss": 0.5129292607307434, + "step": 205 + }, + { + "epoch": 0.05470721019784889, + "grad_norm": 0.6895550242199711, + "learning_rate": 1.0875331564986738e-05, + "loss": 0.500032901763916, + "step": 206 + }, + { + "epoch": 0.05497277917939185, + "grad_norm": 0.8385731092525408, + "learning_rate": 1.0928381962864723e-05, + "loss": 0.5264571309089661, + "step": 207 + }, + { + "epoch": 0.0552383481609348, + "grad_norm": 0.7915802802090326, + "learning_rate": 1.0981432360742708e-05, + "loss": 0.5569590330123901, + "step": 208 + }, + { + "epoch": 0.05550391714247776, + "grad_norm": 0.8546725938844908, + "learning_rate": 1.103448275862069e-05, + "loss": 0.5429908037185669, + "step": 209 + }, + { + "epoch": 0.05576948612402072, + "grad_norm": 0.8175642333393268, + "learning_rate": 1.1087533156498675e-05, + "loss": 0.5073692202568054, + "step": 210 + }, + { + "epoch": 0.05603505510556367, + "grad_norm": 0.9551222157670755, + "learning_rate": 1.1140583554376659e-05, + "loss": 0.5613659620285034, + "step": 211 + }, + { + "epoch": 0.05630062408710663, + "grad_norm": 1.8348970874488084, + "learning_rate": 1.1193633952254644e-05, + "loss": 0.5197691917419434, + "step": 212 + }, + { + "epoch": 0.05656619306864958, + "grad_norm": 0.9173115658326468, + "learning_rate": 1.1246684350132625e-05, + "loss": 0.5410990715026855, + "step": 213 + }, + { + "epoch": 0.05683176205019254, + "grad_norm": 0.8562107533946397, + "learning_rate": 1.129973474801061e-05, + "loss": 0.5852477550506592, + "step": 214 + }, + { + "epoch": 0.05709733103173549, + "grad_norm": 0.8483195878163089, + "learning_rate": 1.1352785145888594e-05, + "loss": 0.5312488079071045, + "step": 215 + }, + { + "epoch": 0.05736290001327845, + "grad_norm": 0.8817111257753456, + "learning_rate": 1.140583554376658e-05, + "loss": 0.5075235366821289, + "step": 216 + }, + { + "epoch": 0.05762846899482141, + "grad_norm": 0.8014885700994473, + "learning_rate": 1.1458885941644564e-05, + "loss": 0.5213298797607422, + "step": 217 + }, + { + "epoch": 0.05789403797636436, + "grad_norm": 0.8852582070340804, + "learning_rate": 1.1511936339522548e-05, + "loss": 0.5564183592796326, + "step": 218 + }, + { + "epoch": 0.05815960695790732, + "grad_norm": 1.0148412469588788, + "learning_rate": 1.1564986737400531e-05, + "loss": 0.5328387022018433, + "step": 219 + }, + { + "epoch": 0.05842517593945027, + "grad_norm": 0.7824132338865165, + "learning_rate": 1.1618037135278515e-05, + "loss": 0.5010273456573486, + "step": 220 + }, + { + "epoch": 0.05869074492099323, + "grad_norm": 0.8493817546068081, + "learning_rate": 1.16710875331565e-05, + "loss": 0.5473708510398865, + "step": 221 + }, + { + "epoch": 0.05895631390253618, + "grad_norm": 1.1554913959885298, + "learning_rate": 1.1724137931034483e-05, + "loss": 0.5359818339347839, + "step": 222 + }, + { + "epoch": 0.05922188288407914, + "grad_norm": 0.9663065987200732, + "learning_rate": 1.1777188328912468e-05, + "loss": 0.5274665951728821, + "step": 223 + }, + { + "epoch": 0.0594874518656221, + "grad_norm": 0.8158672021913522, + "learning_rate": 1.1830238726790454e-05, + "loss": 0.5463781952857971, + "step": 224 + }, + { + "epoch": 0.05975302084716505, + "grad_norm": 0.7817235200046289, + "learning_rate": 1.1883289124668435e-05, + "loss": 0.553212583065033, + "step": 225 + }, + { + "epoch": 0.06001858982870801, + "grad_norm": 0.8540074681170072, + "learning_rate": 1.193633952254642e-05, + "loss": 0.47144171595573425, + "step": 226 + }, + { + "epoch": 0.06028415881025096, + "grad_norm": 0.9191106803002166, + "learning_rate": 1.1989389920424404e-05, + "loss": 0.506844162940979, + "step": 227 + }, + { + "epoch": 0.06054972779179392, + "grad_norm": 0.794192267301098, + "learning_rate": 1.2042440318302389e-05, + "loss": 0.4965322017669678, + "step": 228 + }, + { + "epoch": 0.06081529677333687, + "grad_norm": 0.8421546110465796, + "learning_rate": 1.2095490716180371e-05, + "loss": 0.4815751612186432, + "step": 229 + }, + { + "epoch": 0.06108086575487983, + "grad_norm": 0.8107361719185122, + "learning_rate": 1.2148541114058356e-05, + "loss": 0.5245312452316284, + "step": 230 + }, + { + "epoch": 0.06134643473642279, + "grad_norm": 0.8749447967552209, + "learning_rate": 1.2201591511936341e-05, + "loss": 0.5215133428573608, + "step": 231 + }, + { + "epoch": 0.06161200371796574, + "grad_norm": 0.8315635530714504, + "learning_rate": 1.2254641909814325e-05, + "loss": 0.5039419531822205, + "step": 232 + }, + { + "epoch": 0.0618775726995087, + "grad_norm": 1.0583546039713638, + "learning_rate": 1.230769230769231e-05, + "loss": 0.5562925338745117, + "step": 233 + }, + { + "epoch": 0.06214314168105165, + "grad_norm": 1.069780059811152, + "learning_rate": 1.2360742705570291e-05, + "loss": 0.5372984409332275, + "step": 234 + }, + { + "epoch": 0.06240871066259461, + "grad_norm": 0.8766841361731121, + "learning_rate": 1.2413793103448277e-05, + "loss": 0.44987717270851135, + "step": 235 + }, + { + "epoch": 0.06267427964413756, + "grad_norm": 0.9229136432445015, + "learning_rate": 1.246684350132626e-05, + "loss": 0.537068247795105, + "step": 236 + }, + { + "epoch": 0.06293984862568051, + "grad_norm": 0.9828329951785308, + "learning_rate": 1.2519893899204245e-05, + "loss": 0.504779577255249, + "step": 237 + }, + { + "epoch": 0.06320541760722348, + "grad_norm": 1.0061858451025696, + "learning_rate": 1.257294429708223e-05, + "loss": 0.5524113774299622, + "step": 238 + }, + { + "epoch": 0.06347098658876643, + "grad_norm": 0.9888885225244529, + "learning_rate": 1.2625994694960214e-05, + "loss": 0.5089439153671265, + "step": 239 + }, + { + "epoch": 0.06373655557030938, + "grad_norm": 0.8394940482178029, + "learning_rate": 1.2679045092838197e-05, + "loss": 0.4501679837703705, + "step": 240 + }, + { + "epoch": 0.06400212455185235, + "grad_norm": 0.8117693384854435, + "learning_rate": 1.273209549071618e-05, + "loss": 0.5360216498374939, + "step": 241 + }, + { + "epoch": 0.0642676935333953, + "grad_norm": 0.876954304053235, + "learning_rate": 1.2785145888594166e-05, + "loss": 0.5595712661743164, + "step": 242 + }, + { + "epoch": 0.06453326251493825, + "grad_norm": 1.080992038181853, + "learning_rate": 1.283819628647215e-05, + "loss": 0.5010904669761658, + "step": 243 + }, + { + "epoch": 0.0647988314964812, + "grad_norm": 1.0446842005075034, + "learning_rate": 1.2891246684350134e-05, + "loss": 0.5053697228431702, + "step": 244 + }, + { + "epoch": 0.06506440047802417, + "grad_norm": 0.803002193385922, + "learning_rate": 1.294429708222812e-05, + "loss": 0.5045514106750488, + "step": 245 + }, + { + "epoch": 0.06532996945956712, + "grad_norm": 0.7912163744531999, + "learning_rate": 1.2997347480106101e-05, + "loss": 0.5546073913574219, + "step": 246 + }, + { + "epoch": 0.06559553844111007, + "grad_norm": 0.9572908035308383, + "learning_rate": 1.3050397877984087e-05, + "loss": 0.47276046872138977, + "step": 247 + }, + { + "epoch": 0.06586110742265304, + "grad_norm": 0.8233476091470914, + "learning_rate": 1.310344827586207e-05, + "loss": 0.4757889211177826, + "step": 248 + }, + { + "epoch": 0.06612667640419599, + "grad_norm": 0.8415305337388579, + "learning_rate": 1.3156498673740055e-05, + "loss": 0.5078848600387573, + "step": 249 + }, + { + "epoch": 0.06639224538573894, + "grad_norm": 0.8437984625649567, + "learning_rate": 1.3209549071618037e-05, + "loss": 0.4890335202217102, + "step": 250 + }, + { + "epoch": 0.0666578143672819, + "grad_norm": 0.8299999132068526, + "learning_rate": 1.3262599469496022e-05, + "loss": 0.5406580567359924, + "step": 251 + }, + { + "epoch": 0.06692338334882486, + "grad_norm": 0.9307594142144101, + "learning_rate": 1.3315649867374005e-05, + "loss": 0.5236875414848328, + "step": 252 + }, + { + "epoch": 0.06718895233036781, + "grad_norm": 1.0602580439454288, + "learning_rate": 1.336870026525199e-05, + "loss": 0.4991317391395569, + "step": 253 + }, + { + "epoch": 0.06745452131191076, + "grad_norm": 0.8277603880683132, + "learning_rate": 1.3421750663129976e-05, + "loss": 0.4234679639339447, + "step": 254 + }, + { + "epoch": 0.06772009029345373, + "grad_norm": 0.9984839302922622, + "learning_rate": 1.3474801061007958e-05, + "loss": 0.49749234318733215, + "step": 255 + }, + { + "epoch": 0.06798565927499668, + "grad_norm": 0.9543855303701088, + "learning_rate": 1.3527851458885943e-05, + "loss": 0.5049105286598206, + "step": 256 + }, + { + "epoch": 0.06825122825653963, + "grad_norm": 0.8443711840757044, + "learning_rate": 1.3580901856763926e-05, + "loss": 0.5355304479598999, + "step": 257 + }, + { + "epoch": 0.06851679723808259, + "grad_norm": 0.9255144140027944, + "learning_rate": 1.3633952254641911e-05, + "loss": 0.46302929520606995, + "step": 258 + }, + { + "epoch": 0.06878236621962555, + "grad_norm": 0.953877794861965, + "learning_rate": 1.3687002652519895e-05, + "loss": 0.5054173469543457, + "step": 259 + }, + { + "epoch": 0.0690479352011685, + "grad_norm": 0.8214682466537866, + "learning_rate": 1.374005305039788e-05, + "loss": 0.5018566846847534, + "step": 260 + }, + { + "epoch": 0.06931350418271146, + "grad_norm": 0.878430758752321, + "learning_rate": 1.3793103448275863e-05, + "loss": 0.4938735365867615, + "step": 261 + }, + { + "epoch": 0.06957907316425442, + "grad_norm": 0.8343439459008911, + "learning_rate": 1.3846153846153847e-05, + "loss": 0.4605029225349426, + "step": 262 + }, + { + "epoch": 0.06984464214579737, + "grad_norm": 0.8260329604526515, + "learning_rate": 1.3899204244031832e-05, + "loss": 0.5056782960891724, + "step": 263 + }, + { + "epoch": 0.07011021112734032, + "grad_norm": 0.860551370737139, + "learning_rate": 1.3952254641909815e-05, + "loss": 0.5017784833908081, + "step": 264 + }, + { + "epoch": 0.07037578010888328, + "grad_norm": 0.8353804409772935, + "learning_rate": 1.40053050397878e-05, + "loss": 0.5132012367248535, + "step": 265 + }, + { + "epoch": 0.07064134909042624, + "grad_norm": 0.8151795113028358, + "learning_rate": 1.4058355437665782e-05, + "loss": 0.531212329864502, + "step": 266 + }, + { + "epoch": 0.0709069180719692, + "grad_norm": 0.8086605566204427, + "learning_rate": 1.4111405835543767e-05, + "loss": 0.4900968074798584, + "step": 267 + }, + { + "epoch": 0.07117248705351215, + "grad_norm": 0.8735731145360269, + "learning_rate": 1.4164456233421753e-05, + "loss": 0.45277124643325806, + "step": 268 + }, + { + "epoch": 0.07143805603505511, + "grad_norm": 0.8760293380808535, + "learning_rate": 1.4217506631299736e-05, + "loss": 0.48026078939437866, + "step": 269 + }, + { + "epoch": 0.07170362501659806, + "grad_norm": 0.9019281227597356, + "learning_rate": 1.4270557029177721e-05, + "loss": 0.5111234784126282, + "step": 270 + }, + { + "epoch": 0.07196919399814102, + "grad_norm": 0.9120608197487232, + "learning_rate": 1.4323607427055703e-05, + "loss": 0.5448082685470581, + "step": 271 + }, + { + "epoch": 0.07223476297968397, + "grad_norm": 0.9400729117423203, + "learning_rate": 1.4376657824933688e-05, + "loss": 0.5242921113967896, + "step": 272 + }, + { + "epoch": 0.07250033196122693, + "grad_norm": 0.9404952891335322, + "learning_rate": 1.4429708222811672e-05, + "loss": 0.5194095373153687, + "step": 273 + }, + { + "epoch": 0.07276590094276988, + "grad_norm": 0.8893776382848525, + "learning_rate": 1.4482758620689657e-05, + "loss": 0.4620330333709717, + "step": 274 + }, + { + "epoch": 0.07303146992431284, + "grad_norm": 0.886983687866706, + "learning_rate": 1.4535809018567642e-05, + "loss": 0.4654063582420349, + "step": 275 + }, + { + "epoch": 0.0732970389058558, + "grad_norm": 0.7984003718276244, + "learning_rate": 1.4588859416445624e-05, + "loss": 0.4637746810913086, + "step": 276 + }, + { + "epoch": 0.07356260788739875, + "grad_norm": 0.8288882522584324, + "learning_rate": 1.4641909814323609e-05, + "loss": 0.47949421405792236, + "step": 277 + }, + { + "epoch": 0.0738281768689417, + "grad_norm": 1.0041804846004008, + "learning_rate": 1.4694960212201592e-05, + "loss": 0.49565935134887695, + "step": 278 + }, + { + "epoch": 0.07409374585048466, + "grad_norm": 0.9214786055945364, + "learning_rate": 1.4748010610079577e-05, + "loss": 0.5057941675186157, + "step": 279 + }, + { + "epoch": 0.07435931483202762, + "grad_norm": 0.9073397896109812, + "learning_rate": 1.480106100795756e-05, + "loss": 0.5495956540107727, + "step": 280 + }, + { + "epoch": 0.07462488381357057, + "grad_norm": 0.8743353741776648, + "learning_rate": 1.4854111405835546e-05, + "loss": 0.4502897560596466, + "step": 281 + }, + { + "epoch": 0.07489045279511353, + "grad_norm": 0.8694785116368758, + "learning_rate": 1.490716180371353e-05, + "loss": 0.4799070954322815, + "step": 282 + }, + { + "epoch": 0.07515602177665649, + "grad_norm": 0.886176954457428, + "learning_rate": 1.4960212201591513e-05, + "loss": 0.45640307664871216, + "step": 283 + }, + { + "epoch": 0.07542159075819944, + "grad_norm": 0.8937725285994821, + "learning_rate": 1.5013262599469498e-05, + "loss": 0.47862207889556885, + "step": 284 + }, + { + "epoch": 0.0756871597397424, + "grad_norm": 0.8717898339198907, + "learning_rate": 1.5066312997347481e-05, + "loss": 0.48195987939834595, + "step": 285 + }, + { + "epoch": 0.07595272872128535, + "grad_norm": 0.9124586645482137, + "learning_rate": 1.5119363395225467e-05, + "loss": 0.518566370010376, + "step": 286 + }, + { + "epoch": 0.07621829770282831, + "grad_norm": 0.9766882853479317, + "learning_rate": 1.5172413793103448e-05, + "loss": 0.5034162402153015, + "step": 287 + }, + { + "epoch": 0.07648386668437127, + "grad_norm": 0.8995114639723897, + "learning_rate": 1.5225464190981433e-05, + "loss": 0.497822642326355, + "step": 288 + }, + { + "epoch": 0.07674943566591422, + "grad_norm": 0.8484786603983125, + "learning_rate": 1.5278514588859417e-05, + "loss": 0.510530412197113, + "step": 289 + }, + { + "epoch": 0.07701500464745718, + "grad_norm": 0.9406440408252492, + "learning_rate": 1.53315649867374e-05, + "loss": 0.5163881778717041, + "step": 290 + }, + { + "epoch": 0.07728057362900013, + "grad_norm": 0.9825958938719339, + "learning_rate": 1.5384615384615387e-05, + "loss": 0.5161621570587158, + "step": 291 + }, + { + "epoch": 0.07754614261054309, + "grad_norm": 0.8680267479326179, + "learning_rate": 1.543766578249337e-05, + "loss": 0.5260482430458069, + "step": 292 + }, + { + "epoch": 0.07781171159208604, + "grad_norm": 0.8791995274446183, + "learning_rate": 1.5490716180371354e-05, + "loss": 0.4946279227733612, + "step": 293 + }, + { + "epoch": 0.078077280573629, + "grad_norm": 0.9734620967906259, + "learning_rate": 1.5543766578249338e-05, + "loss": 0.5030514001846313, + "step": 294 + }, + { + "epoch": 0.07834284955517196, + "grad_norm": 0.899295097408943, + "learning_rate": 1.559681697612732e-05, + "loss": 0.48864102363586426, + "step": 295 + }, + { + "epoch": 0.07860841853671491, + "grad_norm": 0.8710376092284174, + "learning_rate": 1.5649867374005304e-05, + "loss": 0.48310425877571106, + "step": 296 + }, + { + "epoch": 0.07887398751825787, + "grad_norm": 1.0094258392730318, + "learning_rate": 1.570291777188329e-05, + "loss": 0.4451446533203125, + "step": 297 + }, + { + "epoch": 0.07913955649980083, + "grad_norm": 0.9863170561942101, + "learning_rate": 1.5755968169761275e-05, + "loss": 0.4884604811668396, + "step": 298 + }, + { + "epoch": 0.07940512548134378, + "grad_norm": 0.8355693003184833, + "learning_rate": 1.5809018567639258e-05, + "loss": 0.5047659873962402, + "step": 299 + }, + { + "epoch": 0.07967069446288673, + "grad_norm": 0.8879040718748079, + "learning_rate": 1.586206896551724e-05, + "loss": 0.49124205112457275, + "step": 300 + }, + { + "epoch": 0.0799362634444297, + "grad_norm": 0.9411885452551192, + "learning_rate": 1.5915119363395225e-05, + "loss": 0.5113086700439453, + "step": 301 + }, + { + "epoch": 0.08020183242597265, + "grad_norm": 0.9345380756850689, + "learning_rate": 1.5968169761273212e-05, + "loss": 0.5298338532447815, + "step": 302 + }, + { + "epoch": 0.0804674014075156, + "grad_norm": 0.9050429706274331, + "learning_rate": 1.6021220159151195e-05, + "loss": 0.4673181176185608, + "step": 303 + }, + { + "epoch": 0.08073297038905856, + "grad_norm": 0.8972864762330055, + "learning_rate": 1.607427055702918e-05, + "loss": 0.45361828804016113, + "step": 304 + }, + { + "epoch": 0.08099853937060152, + "grad_norm": 0.8848533583648175, + "learning_rate": 1.6127320954907166e-05, + "loss": 0.5144034624099731, + "step": 305 + }, + { + "epoch": 0.08126410835214447, + "grad_norm": 0.9263690972931414, + "learning_rate": 1.6180371352785146e-05, + "loss": 0.5027451515197754, + "step": 306 + }, + { + "epoch": 0.08152967733368742, + "grad_norm": 0.8575377500476566, + "learning_rate": 1.6233421750663133e-05, + "loss": 0.4987551271915436, + "step": 307 + }, + { + "epoch": 0.08179524631523039, + "grad_norm": 1.0121964253373468, + "learning_rate": 1.6286472148541116e-05, + "loss": 0.5433062314987183, + "step": 308 + }, + { + "epoch": 0.08206081529677334, + "grad_norm": 0.8973695218716041, + "learning_rate": 1.63395225464191e-05, + "loss": 0.49603772163391113, + "step": 309 + }, + { + "epoch": 0.08232638427831629, + "grad_norm": 0.9033181815462389, + "learning_rate": 1.6392572944297083e-05, + "loss": 0.47990959882736206, + "step": 310 + }, + { + "epoch": 0.08259195325985925, + "grad_norm": 0.9843185449650845, + "learning_rate": 1.6445623342175066e-05, + "loss": 0.5196831226348877, + "step": 311 + }, + { + "epoch": 0.0828575222414022, + "grad_norm": 0.8589822510995361, + "learning_rate": 1.6498673740053053e-05, + "loss": 0.4664091467857361, + "step": 312 + }, + { + "epoch": 0.08312309122294516, + "grad_norm": 0.9077443936761218, + "learning_rate": 1.6551724137931037e-05, + "loss": 0.4405553936958313, + "step": 313 + }, + { + "epoch": 0.08338866020448811, + "grad_norm": 0.8561334135462362, + "learning_rate": 1.660477453580902e-05, + "loss": 0.46172815561294556, + "step": 314 + }, + { + "epoch": 0.08365422918603108, + "grad_norm": 0.8835708894071636, + "learning_rate": 1.6657824933687004e-05, + "loss": 0.5004327297210693, + "step": 315 + }, + { + "epoch": 0.08391979816757403, + "grad_norm": 0.8452618593185571, + "learning_rate": 1.6710875331564987e-05, + "loss": 0.4727814197540283, + "step": 316 + }, + { + "epoch": 0.08418536714911698, + "grad_norm": 0.7631381381409372, + "learning_rate": 1.676392572944297e-05, + "loss": 0.43602120876312256, + "step": 317 + }, + { + "epoch": 0.08445093613065995, + "grad_norm": 0.9092168864142193, + "learning_rate": 1.6816976127320957e-05, + "loss": 0.5110410451889038, + "step": 318 + }, + { + "epoch": 0.0847165051122029, + "grad_norm": 0.9902301773407237, + "learning_rate": 1.687002652519894e-05, + "loss": 0.4798283278942108, + "step": 319 + }, + { + "epoch": 0.08498207409374585, + "grad_norm": 0.8572923551208312, + "learning_rate": 1.6923076923076924e-05, + "loss": 0.45690029859542847, + "step": 320 + }, + { + "epoch": 0.0852476430752888, + "grad_norm": 0.8864718165003516, + "learning_rate": 1.6976127320954908e-05, + "loss": 0.4770117998123169, + "step": 321 + }, + { + "epoch": 0.08551321205683177, + "grad_norm": 0.888032985544436, + "learning_rate": 1.702917771883289e-05, + "loss": 0.512240469455719, + "step": 322 + }, + { + "epoch": 0.08577878103837472, + "grad_norm": 0.8665270088700595, + "learning_rate": 1.7082228116710878e-05, + "loss": 0.4696195423603058, + "step": 323 + }, + { + "epoch": 0.08604435001991767, + "grad_norm": 0.8876364903970222, + "learning_rate": 1.713527851458886e-05, + "loss": 0.4779578149318695, + "step": 324 + }, + { + "epoch": 0.08630991900146064, + "grad_norm": 0.9604080935445363, + "learning_rate": 1.7188328912466845e-05, + "loss": 0.48670440912246704, + "step": 325 + }, + { + "epoch": 0.08657548798300359, + "grad_norm": 0.9813156772782552, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.5285798907279968, + "step": 326 + }, + { + "epoch": 0.08684105696454654, + "grad_norm": 0.9264252564283505, + "learning_rate": 1.7294429708222812e-05, + "loss": 0.46095865964889526, + "step": 327 + }, + { + "epoch": 0.08710662594608949, + "grad_norm": 0.8953179311501671, + "learning_rate": 1.73474801061008e-05, + "loss": 0.44342565536499023, + "step": 328 + }, + { + "epoch": 0.08737219492763246, + "grad_norm": 0.9640917124230414, + "learning_rate": 1.7400530503978782e-05, + "loss": 0.48974257707595825, + "step": 329 + }, + { + "epoch": 0.08763776390917541, + "grad_norm": 1.3568266957703046, + "learning_rate": 1.7453580901856765e-05, + "loss": 0.4763977527618408, + "step": 330 + }, + { + "epoch": 0.08790333289071836, + "grad_norm": 1.0231360729141987, + "learning_rate": 1.750663129973475e-05, + "loss": 0.5390856266021729, + "step": 331 + }, + { + "epoch": 0.08816890187226133, + "grad_norm": 0.9254788253309115, + "learning_rate": 1.7559681697612732e-05, + "loss": 0.4833192825317383, + "step": 332 + }, + { + "epoch": 0.08843447085380428, + "grad_norm": 0.9106057248503829, + "learning_rate": 1.7612732095490716e-05, + "loss": 0.47842955589294434, + "step": 333 + }, + { + "epoch": 0.08870003983534723, + "grad_norm": 0.8653538374375338, + "learning_rate": 1.7665782493368703e-05, + "loss": 0.4543060064315796, + "step": 334 + }, + { + "epoch": 0.08896560881689018, + "grad_norm": 0.9024795887264612, + "learning_rate": 1.7718832891246686e-05, + "loss": 0.4492039978504181, + "step": 335 + }, + { + "epoch": 0.08923117779843315, + "grad_norm": 0.9660730803540603, + "learning_rate": 1.777188328912467e-05, + "loss": 0.4930066466331482, + "step": 336 + }, + { + "epoch": 0.0894967467799761, + "grad_norm": 0.9494811659806174, + "learning_rate": 1.7824933687002653e-05, + "loss": 0.46343356370925903, + "step": 337 + }, + { + "epoch": 0.08976231576151905, + "grad_norm": 0.98824099461907, + "learning_rate": 1.7877984084880636e-05, + "loss": 0.5118839740753174, + "step": 338 + }, + { + "epoch": 0.09002788474306202, + "grad_norm": 0.9759312233085756, + "learning_rate": 1.7931034482758623e-05, + "loss": 0.4659194350242615, + "step": 339 + }, + { + "epoch": 0.09029345372460497, + "grad_norm": 0.868792760549277, + "learning_rate": 1.7984084880636607e-05, + "loss": 0.45929303765296936, + "step": 340 + }, + { + "epoch": 0.09055902270614792, + "grad_norm": 0.9774857416777888, + "learning_rate": 1.803713527851459e-05, + "loss": 0.5072556734085083, + "step": 341 + }, + { + "epoch": 0.09082459168769087, + "grad_norm": 0.8722377179138728, + "learning_rate": 1.8090185676392577e-05, + "loss": 0.42370402812957764, + "step": 342 + }, + { + "epoch": 0.09109016066923384, + "grad_norm": 0.9404121189660462, + "learning_rate": 1.8143236074270557e-05, + "loss": 0.5017818212509155, + "step": 343 + }, + { + "epoch": 0.09135572965077679, + "grad_norm": 1.0279846493738434, + "learning_rate": 1.8196286472148544e-05, + "loss": 0.4746384620666504, + "step": 344 + }, + { + "epoch": 0.09162129863231974, + "grad_norm": 1.0016746569872437, + "learning_rate": 1.8249336870026527e-05, + "loss": 0.49020540714263916, + "step": 345 + }, + { + "epoch": 0.09188686761386271, + "grad_norm": 0.8521475505102624, + "learning_rate": 1.830238726790451e-05, + "loss": 0.4569393992424011, + "step": 346 + }, + { + "epoch": 0.09215243659540566, + "grad_norm": 0.9587089968564823, + "learning_rate": 1.8355437665782494e-05, + "loss": 0.46831727027893066, + "step": 347 + }, + { + "epoch": 0.09241800557694861, + "grad_norm": 0.909230845841239, + "learning_rate": 1.8408488063660478e-05, + "loss": 0.4795265197753906, + "step": 348 + }, + { + "epoch": 0.09268357455849156, + "grad_norm": 0.9641043081337674, + "learning_rate": 1.8461538461538465e-05, + "loss": 0.5122503042221069, + "step": 349 + }, + { + "epoch": 0.09294914354003453, + "grad_norm": 0.8617611974669258, + "learning_rate": 1.8514588859416448e-05, + "loss": 0.4190404713153839, + "step": 350 + }, + { + "epoch": 0.09321471252157748, + "grad_norm": 0.9061006884991066, + "learning_rate": 1.856763925729443e-05, + "loss": 0.47778886556625366, + "step": 351 + }, + { + "epoch": 0.09348028150312043, + "grad_norm": 0.9208451846579827, + "learning_rate": 1.8620689655172415e-05, + "loss": 0.45851507782936096, + "step": 352 + }, + { + "epoch": 0.09374585048466338, + "grad_norm": 1.0050481975496854, + "learning_rate": 1.86737400530504e-05, + "loss": 0.4888782501220703, + "step": 353 + }, + { + "epoch": 0.09401141946620635, + "grad_norm": 0.9454138173982718, + "learning_rate": 1.8726790450928382e-05, + "loss": 0.5032983422279358, + "step": 354 + }, + { + "epoch": 0.0942769884477493, + "grad_norm": 0.9130362696106749, + "learning_rate": 1.877984084880637e-05, + "loss": 0.4754604697227478, + "step": 355 + }, + { + "epoch": 0.09454255742929225, + "grad_norm": 0.9970889038933597, + "learning_rate": 1.8832891246684352e-05, + "loss": 0.488397479057312, + "step": 356 + }, + { + "epoch": 0.09480812641083522, + "grad_norm": 1.222649143916529, + "learning_rate": 1.8885941644562336e-05, + "loss": 0.4775403141975403, + "step": 357 + }, + { + "epoch": 0.09507369539237817, + "grad_norm": 0.9872263151320333, + "learning_rate": 1.893899204244032e-05, + "loss": 0.47063153982162476, + "step": 358 + }, + { + "epoch": 0.09533926437392112, + "grad_norm": 1.0222144168199743, + "learning_rate": 1.8992042440318303e-05, + "loss": 0.4856908321380615, + "step": 359 + }, + { + "epoch": 0.09560483335546408, + "grad_norm": 0.9195037496858368, + "learning_rate": 1.904509283819629e-05, + "loss": 0.440033495426178, + "step": 360 + }, + { + "epoch": 0.09587040233700704, + "grad_norm": 0.9961899484684762, + "learning_rate": 1.9098143236074273e-05, + "loss": 0.4825770854949951, + "step": 361 + }, + { + "epoch": 0.09613597131854999, + "grad_norm": 0.9443841189655576, + "learning_rate": 1.9151193633952256e-05, + "loss": 0.48192232847213745, + "step": 362 + }, + { + "epoch": 0.09640154030009294, + "grad_norm": 0.9065595450317342, + "learning_rate": 1.920424403183024e-05, + "loss": 0.4689444899559021, + "step": 363 + }, + { + "epoch": 0.09666710928163591, + "grad_norm": 0.9970961253516039, + "learning_rate": 1.9257294429708223e-05, + "loss": 0.47120895981788635, + "step": 364 + }, + { + "epoch": 0.09693267826317886, + "grad_norm": 1.0106028234477955, + "learning_rate": 1.931034482758621e-05, + "loss": 0.4968941807746887, + "step": 365 + }, + { + "epoch": 0.09719824724472181, + "grad_norm": 1.115125675989656, + "learning_rate": 1.9363395225464193e-05, + "loss": 0.46982288360595703, + "step": 366 + }, + { + "epoch": 0.09746381622626477, + "grad_norm": 0.9408972278578609, + "learning_rate": 1.9416445623342177e-05, + "loss": 0.4541531205177307, + "step": 367 + }, + { + "epoch": 0.09772938520780773, + "grad_norm": 0.9760564476186651, + "learning_rate": 1.946949602122016e-05, + "loss": 0.45576703548431396, + "step": 368 + }, + { + "epoch": 0.09799495418935068, + "grad_norm": 0.9893999168346334, + "learning_rate": 1.9522546419098144e-05, + "loss": 0.48060357570648193, + "step": 369 + }, + { + "epoch": 0.09826052317089363, + "grad_norm": 0.9675810264832774, + "learning_rate": 1.9575596816976127e-05, + "loss": 0.47536781430244446, + "step": 370 + }, + { + "epoch": 0.0985260921524366, + "grad_norm": 0.9516181191759193, + "learning_rate": 1.9628647214854114e-05, + "loss": 0.46463894844055176, + "step": 371 + }, + { + "epoch": 0.09879166113397955, + "grad_norm": 1.0082712913027811, + "learning_rate": 1.9681697612732098e-05, + "loss": 0.49570178985595703, + "step": 372 + }, + { + "epoch": 0.0990572301155225, + "grad_norm": 1.0327922438955468, + "learning_rate": 1.973474801061008e-05, + "loss": 0.4764043390750885, + "step": 373 + }, + { + "epoch": 0.09932279909706546, + "grad_norm": 0.9227866290107449, + "learning_rate": 1.9787798408488064e-05, + "loss": 0.43582671880722046, + "step": 374 + }, + { + "epoch": 0.09958836807860842, + "grad_norm": 0.9360238854832598, + "learning_rate": 1.9840848806366048e-05, + "loss": 0.46077725291252136, + "step": 375 + }, + { + "epoch": 0.09985393706015137, + "grad_norm": 0.9607682273492437, + "learning_rate": 1.9893899204244035e-05, + "loss": 0.4794929027557373, + "step": 376 + }, + { + "epoch": 0.10011950604169433, + "grad_norm": 0.9619848398175739, + "learning_rate": 1.9946949602122018e-05, + "loss": 0.43174588680267334, + "step": 377 + }, + { + "epoch": 0.10038507502323729, + "grad_norm": 0.90095462919728, + "learning_rate": 2e-05, + "loss": 0.44885915517807007, + "step": 378 + }, + { + "epoch": 0.10065064400478024, + "grad_norm": 1.0789787198205218, + "learning_rate": 1.9999999036058974e-05, + "loss": 0.520150899887085, + "step": 379 + }, + { + "epoch": 0.1009162129863232, + "grad_norm": 0.9699182604374589, + "learning_rate": 1.9999996144236068e-05, + "loss": 0.5139277577400208, + "step": 380 + }, + { + "epoch": 0.10118178196786615, + "grad_norm": 1.0077278580199993, + "learning_rate": 1.999999132453184e-05, + "loss": 0.48935171961784363, + "step": 381 + }, + { + "epoch": 0.10144735094940911, + "grad_norm": 0.9095465340361383, + "learning_rate": 1.999998457694723e-05, + "loss": 0.4805561304092407, + "step": 382 + }, + { + "epoch": 0.10171291993095206, + "grad_norm": 0.9209321398292457, + "learning_rate": 1.9999975901483532e-05, + "loss": 0.4340912997722626, + "step": 383 + }, + { + "epoch": 0.10197848891249502, + "grad_norm": 1.0414639039942946, + "learning_rate": 1.999996529814242e-05, + "loss": 0.48282474279403687, + "step": 384 + }, + { + "epoch": 0.10224405789403798, + "grad_norm": 0.9753320144694753, + "learning_rate": 1.999995276692593e-05, + "loss": 0.4653206169605255, + "step": 385 + }, + { + "epoch": 0.10250962687558093, + "grad_norm": 0.919281113033857, + "learning_rate": 1.999993830783649e-05, + "loss": 0.48501014709472656, + "step": 386 + }, + { + "epoch": 0.10277519585712389, + "grad_norm": 1.0711296444042975, + "learning_rate": 1.9999921920876882e-05, + "loss": 0.48260143399238586, + "step": 387 + }, + { + "epoch": 0.10304076483866684, + "grad_norm": 0.9590085896328235, + "learning_rate": 1.9999903606050267e-05, + "loss": 0.44557270407676697, + "step": 388 + }, + { + "epoch": 0.1033063338202098, + "grad_norm": 1.111282066618818, + "learning_rate": 1.9999883363360175e-05, + "loss": 0.4843652546405792, + "step": 389 + }, + { + "epoch": 0.10357190280175275, + "grad_norm": 0.9708048507544866, + "learning_rate": 1.9999861192810508e-05, + "loss": 0.4536727964878082, + "step": 390 + }, + { + "epoch": 0.1038374717832957, + "grad_norm": 1.0216212958759847, + "learning_rate": 1.9999837094405538e-05, + "loss": 0.49557366967201233, + "step": 391 + }, + { + "epoch": 0.10410304076483867, + "grad_norm": 1.0254795167373827, + "learning_rate": 1.9999811068149917e-05, + "loss": 0.45077240467071533, + "step": 392 + }, + { + "epoch": 0.10436860974638162, + "grad_norm": 0.9857255709196505, + "learning_rate": 1.9999783114048658e-05, + "loss": 0.4554041624069214, + "step": 393 + }, + { + "epoch": 0.10463417872792458, + "grad_norm": 0.8770920920154472, + "learning_rate": 1.999975323210715e-05, + "loss": 0.43526744842529297, + "step": 394 + }, + { + "epoch": 0.10489974770946753, + "grad_norm": 0.9824982196768539, + "learning_rate": 1.9999721422331154e-05, + "loss": 0.4097936749458313, + "step": 395 + }, + { + "epoch": 0.1051653166910105, + "grad_norm": 1.013432449022695, + "learning_rate": 1.9999687684726803e-05, + "loss": 0.4740130305290222, + "step": 396 + }, + { + "epoch": 0.10543088567255345, + "grad_norm": 0.9786752992542405, + "learning_rate": 1.9999652019300604e-05, + "loss": 0.43374374508857727, + "step": 397 + }, + { + "epoch": 0.1056964546540964, + "grad_norm": 0.9323415402935509, + "learning_rate": 1.999961442605943e-05, + "loss": 0.4423784911632538, + "step": 398 + }, + { + "epoch": 0.10596202363563936, + "grad_norm": 1.0497518439124596, + "learning_rate": 1.999957490501053e-05, + "loss": 0.4660544693470001, + "step": 399 + }, + { + "epoch": 0.10622759261718231, + "grad_norm": 1.11742327964835, + "learning_rate": 1.999953345616152e-05, + "loss": 0.4579896628856659, + "step": 400 + }, + { + "epoch": 0.10649316159872527, + "grad_norm": 1.0653029752390735, + "learning_rate": 1.9999490079520395e-05, + "loss": 0.4634096920490265, + "step": 401 + }, + { + "epoch": 0.10675873058026822, + "grad_norm": 0.9969566988589958, + "learning_rate": 1.9999444775095517e-05, + "loss": 0.45374077558517456, + "step": 402 + }, + { + "epoch": 0.10702429956181118, + "grad_norm": 1.1298291912896017, + "learning_rate": 1.9999397542895615e-05, + "loss": 0.49752670526504517, + "step": 403 + }, + { + "epoch": 0.10728986854335414, + "grad_norm": 1.049244919494092, + "learning_rate": 1.99993483829298e-05, + "loss": 0.4539335370063782, + "step": 404 + }, + { + "epoch": 0.10755543752489709, + "grad_norm": 1.0017841795942442, + "learning_rate": 1.999929729520755e-05, + "loss": 0.4665772616863251, + "step": 405 + }, + { + "epoch": 0.10782100650644005, + "grad_norm": 1.023688686658119, + "learning_rate": 1.9999244279738713e-05, + "loss": 0.4850832223892212, + "step": 406 + }, + { + "epoch": 0.108086575487983, + "grad_norm": 0.9960763191436038, + "learning_rate": 1.9999189336533508e-05, + "loss": 0.43974876403808594, + "step": 407 + }, + { + "epoch": 0.10835214446952596, + "grad_norm": 1.0378626233602128, + "learning_rate": 1.9999132465602526e-05, + "loss": 0.46823856234550476, + "step": 408 + }, + { + "epoch": 0.10861771345106891, + "grad_norm": 1.0461372802003532, + "learning_rate": 1.9999073666956734e-05, + "loss": 0.49704545736312866, + "step": 409 + }, + { + "epoch": 0.10888328243261187, + "grad_norm": 1.03380477635781, + "learning_rate": 1.999901294060747e-05, + "loss": 0.3863454759120941, + "step": 410 + }, + { + "epoch": 0.10914885141415483, + "grad_norm": 1.1280569204620268, + "learning_rate": 1.9998950286566438e-05, + "loss": 0.4903780221939087, + "step": 411 + }, + { + "epoch": 0.10941442039569778, + "grad_norm": 0.9546134462956446, + "learning_rate": 1.9998885704845716e-05, + "loss": 0.4312375485897064, + "step": 412 + }, + { + "epoch": 0.10967998937724074, + "grad_norm": 0.9382591225300354, + "learning_rate": 1.9998819195457756e-05, + "loss": 0.4350954294204712, + "step": 413 + }, + { + "epoch": 0.1099455583587837, + "grad_norm": 0.9201016144754837, + "learning_rate": 1.999875075841538e-05, + "loss": 0.4364873766899109, + "step": 414 + }, + { + "epoch": 0.11021112734032665, + "grad_norm": 0.9578414566062486, + "learning_rate": 1.999868039373178e-05, + "loss": 0.42079728841781616, + "step": 415 + }, + { + "epoch": 0.1104766963218696, + "grad_norm": 1.0011321946551845, + "learning_rate": 1.9998608101420527e-05, + "loss": 0.4396737515926361, + "step": 416 + }, + { + "epoch": 0.11074226530341257, + "grad_norm": 0.9922478693245596, + "learning_rate": 1.9998533881495552e-05, + "loss": 0.44765806198120117, + "step": 417 + }, + { + "epoch": 0.11100783428495552, + "grad_norm": 1.0219437952159112, + "learning_rate": 1.999845773397117e-05, + "loss": 0.46199291944503784, + "step": 418 + }, + { + "epoch": 0.11127340326649847, + "grad_norm": 0.9510961467421052, + "learning_rate": 1.9998379658862058e-05, + "loss": 0.44561129808425903, + "step": 419 + }, + { + "epoch": 0.11153897224804143, + "grad_norm": 1.0559368690309399, + "learning_rate": 1.9998299656183263e-05, + "loss": 0.46025681495666504, + "step": 420 + }, + { + "epoch": 0.11180454122958439, + "grad_norm": 0.9881679042322009, + "learning_rate": 1.999821772595022e-05, + "loss": 0.4408613443374634, + "step": 421 + }, + { + "epoch": 0.11207011021112734, + "grad_norm": 0.9620122842513851, + "learning_rate": 1.999813386817871e-05, + "loss": 0.4846842586994171, + "step": 422 + }, + { + "epoch": 0.11233567919267029, + "grad_norm": 0.9697081207450757, + "learning_rate": 1.999804808288491e-05, + "loss": 0.44503283500671387, + "step": 423 + }, + { + "epoch": 0.11260124817421326, + "grad_norm": 0.9687765160951803, + "learning_rate": 1.9997960370085355e-05, + "loss": 0.4090060293674469, + "step": 424 + }, + { + "epoch": 0.11286681715575621, + "grad_norm": 0.9575575943579401, + "learning_rate": 1.999787072979696e-05, + "loss": 0.43246471881866455, + "step": 425 + }, + { + "epoch": 0.11313238613729916, + "grad_norm": 1.001604978030575, + "learning_rate": 1.9997779162036996e-05, + "loss": 0.46283262968063354, + "step": 426 + }, + { + "epoch": 0.11339795511884213, + "grad_norm": 0.9108113962903395, + "learning_rate": 1.999768566682313e-05, + "loss": 0.3866165578365326, + "step": 427 + }, + { + "epoch": 0.11366352410038508, + "grad_norm": 0.9595506331685858, + "learning_rate": 1.9997590244173374e-05, + "loss": 0.4501144289970398, + "step": 428 + }, + { + "epoch": 0.11392909308192803, + "grad_norm": 0.9153639565172541, + "learning_rate": 1.9997492894106127e-05, + "loss": 0.43005290627479553, + "step": 429 + }, + { + "epoch": 0.11419466206347098, + "grad_norm": 0.9635360081712412, + "learning_rate": 1.9997393616640165e-05, + "loss": 0.4427964985370636, + "step": 430 + }, + { + "epoch": 0.11446023104501395, + "grad_norm": 1.0560533392763956, + "learning_rate": 1.999729241179462e-05, + "loss": 0.4690951108932495, + "step": 431 + }, + { + "epoch": 0.1147258000265569, + "grad_norm": 0.9559285214931015, + "learning_rate": 1.9997189279589003e-05, + "loss": 0.456949919462204, + "step": 432 + }, + { + "epoch": 0.11499136900809985, + "grad_norm": 0.9851459681291062, + "learning_rate": 1.99970842200432e-05, + "loss": 0.456052303314209, + "step": 433 + }, + { + "epoch": 0.11525693798964282, + "grad_norm": 0.9609923633405658, + "learning_rate": 1.9996977233177466e-05, + "loss": 0.43220120668411255, + "step": 434 + }, + { + "epoch": 0.11552250697118577, + "grad_norm": 0.9022181145862976, + "learning_rate": 1.9996868319012422e-05, + "loss": 0.4237494170665741, + "step": 435 + }, + { + "epoch": 0.11578807595272872, + "grad_norm": 1.1387519975876466, + "learning_rate": 1.9996757477569072e-05, + "loss": 0.4713878631591797, + "step": 436 + }, + { + "epoch": 0.11605364493427167, + "grad_norm": 1.026114633188765, + "learning_rate": 1.9996644708868776e-05, + "loss": 0.4561111330986023, + "step": 437 + }, + { + "epoch": 0.11631921391581464, + "grad_norm": 1.0425252904592188, + "learning_rate": 1.9996530012933285e-05, + "loss": 0.468253493309021, + "step": 438 + }, + { + "epoch": 0.11658478289735759, + "grad_norm": 0.9323050726416767, + "learning_rate": 1.9996413389784704e-05, + "loss": 0.4815019369125366, + "step": 439 + }, + { + "epoch": 0.11685035187890054, + "grad_norm": 0.9369313249225236, + "learning_rate": 1.9996294839445518e-05, + "loss": 0.4235987663269043, + "step": 440 + }, + { + "epoch": 0.1171159208604435, + "grad_norm": 0.9217309559918773, + "learning_rate": 1.999617436193858e-05, + "loss": 0.40562817454338074, + "step": 441 + }, + { + "epoch": 0.11738148984198646, + "grad_norm": 1.1384168500780398, + "learning_rate": 1.999605195728712e-05, + "loss": 0.424539715051651, + "step": 442 + }, + { + "epoch": 0.11764705882352941, + "grad_norm": 0.9616123874834243, + "learning_rate": 1.9995927625514736e-05, + "loss": 0.43677473068237305, + "step": 443 + }, + { + "epoch": 0.11791262780507236, + "grad_norm": 0.9761533315060044, + "learning_rate": 1.9995801366645396e-05, + "loss": 0.47325971722602844, + "step": 444 + }, + { + "epoch": 0.11817819678661533, + "grad_norm": 0.9447069768738408, + "learning_rate": 1.9995673180703443e-05, + "loss": 0.4206562638282776, + "step": 445 + }, + { + "epoch": 0.11844376576815828, + "grad_norm": 0.9743544240614231, + "learning_rate": 1.999554306771359e-05, + "loss": 0.4492834210395813, + "step": 446 + }, + { + "epoch": 0.11870933474970123, + "grad_norm": 1.0629000505790311, + "learning_rate": 1.9995411027700917e-05, + "loss": 0.4445284605026245, + "step": 447 + }, + { + "epoch": 0.1189749037312442, + "grad_norm": 0.9911650776890225, + "learning_rate": 1.9995277060690885e-05, + "loss": 0.4038352370262146, + "step": 448 + }, + { + "epoch": 0.11924047271278715, + "grad_norm": 0.9418518804089067, + "learning_rate": 1.9995141166709318e-05, + "loss": 0.4261324405670166, + "step": 449 + }, + { + "epoch": 0.1195060416943301, + "grad_norm": 1.067611227425969, + "learning_rate": 1.9995003345782416e-05, + "loss": 0.44187062978744507, + "step": 450 + }, + { + "epoch": 0.11977161067587305, + "grad_norm": 0.9191915914869351, + "learning_rate": 1.9994863597936752e-05, + "loss": 0.44672587513923645, + "step": 451 + }, + { + "epoch": 0.12003717965741602, + "grad_norm": 0.9882052007755191, + "learning_rate": 1.999472192319926e-05, + "loss": 0.44322314858436584, + "step": 452 + }, + { + "epoch": 0.12030274863895897, + "grad_norm": 0.9882289435866314, + "learning_rate": 1.9994578321597258e-05, + "loss": 0.4396611154079437, + "step": 453 + }, + { + "epoch": 0.12056831762050192, + "grad_norm": 0.9831868773412876, + "learning_rate": 1.9994432793158433e-05, + "loss": 0.4487733542919159, + "step": 454 + }, + { + "epoch": 0.12083388660204489, + "grad_norm": 0.9360753951175719, + "learning_rate": 1.999428533791084e-05, + "loss": 0.3969653248786926, + "step": 455 + }, + { + "epoch": 0.12109945558358784, + "grad_norm": 0.9662346637828156, + "learning_rate": 1.9994135955882906e-05, + "loss": 0.39312344789505005, + "step": 456 + }, + { + "epoch": 0.12136502456513079, + "grad_norm": 0.9019524086641805, + "learning_rate": 1.9993984647103425e-05, + "loss": 0.3979804217815399, + "step": 457 + }, + { + "epoch": 0.12163059354667374, + "grad_norm": 1.0970468981958466, + "learning_rate": 1.9993831411601573e-05, + "loss": 0.4430229365825653, + "step": 458 + }, + { + "epoch": 0.12189616252821671, + "grad_norm": 0.994492352252997, + "learning_rate": 1.9993676249406895e-05, + "loss": 0.4511718451976776, + "step": 459 + }, + { + "epoch": 0.12216173150975966, + "grad_norm": 1.091979336298699, + "learning_rate": 1.9993519160549298e-05, + "loss": 0.4686455726623535, + "step": 460 + }, + { + "epoch": 0.12242730049130261, + "grad_norm": 1.0158374042593608, + "learning_rate": 1.9993360145059073e-05, + "loss": 0.4501730501651764, + "step": 461 + }, + { + "epoch": 0.12269286947284558, + "grad_norm": 0.8530053413909426, + "learning_rate": 1.999319920296687e-05, + "loss": 0.40718767046928406, + "step": 462 + }, + { + "epoch": 0.12295843845438853, + "grad_norm": 1.1181007301257784, + "learning_rate": 1.9993036334303716e-05, + "loss": 0.47313761711120605, + "step": 463 + }, + { + "epoch": 0.12322400743593148, + "grad_norm": 0.9710975932515886, + "learning_rate": 1.9992871539101018e-05, + "loss": 0.47417378425598145, + "step": 464 + }, + { + "epoch": 0.12348957641747443, + "grad_norm": 0.9297582414898758, + "learning_rate": 1.999270481739054e-05, + "loss": 0.44206154346466064, + "step": 465 + }, + { + "epoch": 0.1237551453990174, + "grad_norm": 0.8745553533375581, + "learning_rate": 1.9992536169204427e-05, + "loss": 0.3800848722457886, + "step": 466 + }, + { + "epoch": 0.12402071438056035, + "grad_norm": 0.9337162704530373, + "learning_rate": 1.9992365594575194e-05, + "loss": 0.40339407324790955, + "step": 467 + }, + { + "epoch": 0.1242862833621033, + "grad_norm": 0.945328490567385, + "learning_rate": 1.999219309353572e-05, + "loss": 0.45280492305755615, + "step": 468 + }, + { + "epoch": 0.12455185234364627, + "grad_norm": 1.0911195899085697, + "learning_rate": 1.9992018666119266e-05, + "loss": 0.4600910544395447, + "step": 469 + }, + { + "epoch": 0.12481742132518922, + "grad_norm": 0.9649890056306747, + "learning_rate": 1.9991842312359458e-05, + "loss": 0.4475003480911255, + "step": 470 + }, + { + "epoch": 0.12508299030673217, + "grad_norm": 1.0493048741226816, + "learning_rate": 1.9991664032290297e-05, + "loss": 0.45377033948898315, + "step": 471 + }, + { + "epoch": 0.12534855928827512, + "grad_norm": 0.9964208438270044, + "learning_rate": 1.9991483825946147e-05, + "loss": 0.4397522509098053, + "step": 472 + }, + { + "epoch": 0.12561412826981808, + "grad_norm": 0.9309535511597795, + "learning_rate": 1.9991301693361756e-05, + "loss": 0.4258221387863159, + "step": 473 + }, + { + "epoch": 0.12587969725136103, + "grad_norm": 0.9120842027423138, + "learning_rate": 1.9991117634572234e-05, + "loss": 0.40272068977355957, + "step": 474 + }, + { + "epoch": 0.126145266232904, + "grad_norm": 0.8761120829975514, + "learning_rate": 1.9990931649613067e-05, + "loss": 0.3721206784248352, + "step": 475 + }, + { + "epoch": 0.12641083521444696, + "grad_norm": 0.9997105907953329, + "learning_rate": 1.9990743738520115e-05, + "loss": 0.4530203938484192, + "step": 476 + }, + { + "epoch": 0.1266764041959899, + "grad_norm": 0.999446109489731, + "learning_rate": 1.999055390132959e-05, + "loss": 0.4281614422798157, + "step": 477 + }, + { + "epoch": 0.12694197317753286, + "grad_norm": 1.3617327829527315, + "learning_rate": 1.999036213807811e-05, + "loss": 0.41965895891189575, + "step": 478 + }, + { + "epoch": 0.12720754215907581, + "grad_norm": 0.9525189428273744, + "learning_rate": 1.9990168448802633e-05, + "loss": 0.40055203437805176, + "step": 479 + }, + { + "epoch": 0.12747311114061877, + "grad_norm": 1.0868137290392272, + "learning_rate": 1.99899728335405e-05, + "loss": 0.4266522526741028, + "step": 480 + }, + { + "epoch": 0.12773868012216172, + "grad_norm": 1.028316280940819, + "learning_rate": 1.9989775292329425e-05, + "loss": 0.42291250824928284, + "step": 481 + }, + { + "epoch": 0.1280042491037047, + "grad_norm": 1.0319881226067493, + "learning_rate": 1.9989575825207494e-05, + "loss": 0.41346436738967896, + "step": 482 + }, + { + "epoch": 0.12826981808524765, + "grad_norm": 1.0162482863207583, + "learning_rate": 1.998937443221316e-05, + "loss": 0.4092825651168823, + "step": 483 + }, + { + "epoch": 0.1285353870667906, + "grad_norm": 0.9789070022917183, + "learning_rate": 1.998917111338525e-05, + "loss": 0.39763280749320984, + "step": 484 + }, + { + "epoch": 0.12880095604833355, + "grad_norm": 1.1639998102533433, + "learning_rate": 1.9988965868762956e-05, + "loss": 0.45523273944854736, + "step": 485 + }, + { + "epoch": 0.1290665250298765, + "grad_norm": 0.9737102573843942, + "learning_rate": 1.9988758698385854e-05, + "loss": 0.40181300044059753, + "step": 486 + }, + { + "epoch": 0.12933209401141946, + "grad_norm": 1.0269411713354706, + "learning_rate": 1.9988549602293884e-05, + "loss": 0.42487743496894836, + "step": 487 + }, + { + "epoch": 0.1295976629929624, + "grad_norm": 0.9805378587174307, + "learning_rate": 1.998833858052735e-05, + "loss": 0.41672298312187195, + "step": 488 + }, + { + "epoch": 0.1298632319745054, + "grad_norm": 0.9804335652831319, + "learning_rate": 1.998812563312694e-05, + "loss": 0.36750108003616333, + "step": 489 + }, + { + "epoch": 0.13012880095604834, + "grad_norm": 1.0991024476796578, + "learning_rate": 1.9987910760133712e-05, + "loss": 0.49290573596954346, + "step": 490 + }, + { + "epoch": 0.1303943699375913, + "grad_norm": 0.9956647709409898, + "learning_rate": 1.9987693961589084e-05, + "loss": 0.460039347410202, + "step": 491 + }, + { + "epoch": 0.13065993891913424, + "grad_norm": 1.269757897267166, + "learning_rate": 1.998747523753485e-05, + "loss": 0.4471668303012848, + "step": 492 + }, + { + "epoch": 0.1309255079006772, + "grad_norm": 0.9411513149719377, + "learning_rate": 1.9987254588013184e-05, + "loss": 0.395844966173172, + "step": 493 + }, + { + "epoch": 0.13119107688222015, + "grad_norm": 0.9546844808839872, + "learning_rate": 1.9987032013066623e-05, + "loss": 0.4465745985507965, + "step": 494 + }, + { + "epoch": 0.1314566458637631, + "grad_norm": 1.0929917252775374, + "learning_rate": 1.9986807512738075e-05, + "loss": 0.43123912811279297, + "step": 495 + }, + { + "epoch": 0.13172221484530608, + "grad_norm": 0.9741124155963404, + "learning_rate": 1.9986581087070824e-05, + "loss": 0.40066564083099365, + "step": 496 + }, + { + "epoch": 0.13198778382684903, + "grad_norm": 0.9421948045046618, + "learning_rate": 1.9986352736108515e-05, + "loss": 0.38514643907546997, + "step": 497 + }, + { + "epoch": 0.13225335280839198, + "grad_norm": 0.9713567699891517, + "learning_rate": 1.9986122459895182e-05, + "loss": 0.37397241592407227, + "step": 498 + }, + { + "epoch": 0.13251892178993493, + "grad_norm": 0.9697777712481016, + "learning_rate": 1.9985890258475215e-05, + "loss": 0.44865745306015015, + "step": 499 + }, + { + "epoch": 0.1327844907714779, + "grad_norm": 1.000823551239605, + "learning_rate": 1.9985656131893374e-05, + "loss": 0.4161406457424164, + "step": 500 + }, + { + "epoch": 0.13305005975302084, + "grad_norm": 1.049045844462056, + "learning_rate": 1.9985420080194804e-05, + "loss": 0.41364359855651855, + "step": 501 + }, + { + "epoch": 0.1333156287345638, + "grad_norm": 0.9766347522178017, + "learning_rate": 1.9985182103425007e-05, + "loss": 0.38466009497642517, + "step": 502 + }, + { + "epoch": 0.13358119771610677, + "grad_norm": 0.9820108788569575, + "learning_rate": 1.9984942201629868e-05, + "loss": 0.4189472794532776, + "step": 503 + }, + { + "epoch": 0.13384676669764972, + "grad_norm": 1.0124943582595707, + "learning_rate": 1.998470037485563e-05, + "loss": 0.4088754653930664, + "step": 504 + }, + { + "epoch": 0.13411233567919267, + "grad_norm": 0.9404621165531668, + "learning_rate": 1.9984456623148923e-05, + "loss": 0.4197084307670593, + "step": 505 + }, + { + "epoch": 0.13437790466073563, + "grad_norm": 1.022677047132229, + "learning_rate": 1.998421094655673e-05, + "loss": 0.4318644404411316, + "step": 506 + }, + { + "epoch": 0.13464347364227858, + "grad_norm": 0.9443470782499029, + "learning_rate": 1.9983963345126423e-05, + "loss": 0.38180238008499146, + "step": 507 + }, + { + "epoch": 0.13490904262382153, + "grad_norm": 0.9655473739081939, + "learning_rate": 1.9983713818905733e-05, + "loss": 0.38704103231430054, + "step": 508 + }, + { + "epoch": 0.13517461160536448, + "grad_norm": 1.050357567916831, + "learning_rate": 1.998346236794276e-05, + "loss": 0.4206693768501282, + "step": 509 + }, + { + "epoch": 0.13544018058690746, + "grad_norm": 1.1108901361228778, + "learning_rate": 1.9983208992285993e-05, + "loss": 0.42818987369537354, + "step": 510 + }, + { + "epoch": 0.1357057495684504, + "grad_norm": 1.0771548955106338, + "learning_rate": 1.9982953691984274e-05, + "loss": 0.44592660665512085, + "step": 511 + }, + { + "epoch": 0.13597131854999336, + "grad_norm": 1.006125968429414, + "learning_rate": 1.9982696467086815e-05, + "loss": 0.4272580146789551, + "step": 512 + }, + { + "epoch": 0.13623688753153632, + "grad_norm": 1.084212872761102, + "learning_rate": 1.9982437317643218e-05, + "loss": 0.4416295289993286, + "step": 513 + }, + { + "epoch": 0.13650245651307927, + "grad_norm": 1.1040865905907058, + "learning_rate": 1.998217624370343e-05, + "loss": 0.45108669996261597, + "step": 514 + }, + { + "epoch": 0.13676802549462222, + "grad_norm": 0.9866796372680723, + "learning_rate": 1.9981913245317802e-05, + "loss": 0.40311864018440247, + "step": 515 + }, + { + "epoch": 0.13703359447616517, + "grad_norm": 1.041531014011416, + "learning_rate": 1.9981648322537017e-05, + "loss": 0.4388020932674408, + "step": 516 + }, + { + "epoch": 0.13729916345770815, + "grad_norm": 1.069295153220874, + "learning_rate": 1.9981381475412162e-05, + "loss": 0.42741361260414124, + "step": 517 + }, + { + "epoch": 0.1375647324392511, + "grad_norm": 0.8562984414004653, + "learning_rate": 1.9981112703994677e-05, + "loss": 0.3766555190086365, + "step": 518 + }, + { + "epoch": 0.13783030142079405, + "grad_norm": 0.9297024970383198, + "learning_rate": 1.998084200833638e-05, + "loss": 0.38618308305740356, + "step": 519 + }, + { + "epoch": 0.138095870402337, + "grad_norm": 1.0033450202172107, + "learning_rate": 1.9980569388489457e-05, + "loss": 0.4553264379501343, + "step": 520 + }, + { + "epoch": 0.13836143938387996, + "grad_norm": 1.024202819723292, + "learning_rate": 1.9980294844506468e-05, + "loss": 0.44632673263549805, + "step": 521 + }, + { + "epoch": 0.1386270083654229, + "grad_norm": 1.0907023510727254, + "learning_rate": 1.998001837644033e-05, + "loss": 0.4285067617893219, + "step": 522 + }, + { + "epoch": 0.13889257734696586, + "grad_norm": 0.9721672428790065, + "learning_rate": 1.9979739984344365e-05, + "loss": 0.39360538125038147, + "step": 523 + }, + { + "epoch": 0.13915814632850884, + "grad_norm": 0.9475835393492287, + "learning_rate": 1.9979459668272226e-05, + "loss": 0.4007593095302582, + "step": 524 + }, + { + "epoch": 0.1394237153100518, + "grad_norm": 1.028990364637073, + "learning_rate": 1.9979177428277955e-05, + "loss": 0.40176767110824585, + "step": 525 + }, + { + "epoch": 0.13968928429159475, + "grad_norm": 1.0167293750004343, + "learning_rate": 1.9978893264415978e-05, + "loss": 0.4190528392791748, + "step": 526 + }, + { + "epoch": 0.1399548532731377, + "grad_norm": 0.9871913820335487, + "learning_rate": 1.9978607176741063e-05, + "loss": 0.4139288067817688, + "step": 527 + }, + { + "epoch": 0.14022042225468065, + "grad_norm": 0.8610694360554231, + "learning_rate": 1.9978319165308373e-05, + "loss": 0.3666151463985443, + "step": 528 + }, + { + "epoch": 0.1404859912362236, + "grad_norm": 1.016794526359022, + "learning_rate": 1.997802923017343e-05, + "loss": 0.44621142745018005, + "step": 529 + }, + { + "epoch": 0.14075156021776655, + "grad_norm": 0.9742602007181285, + "learning_rate": 1.9977737371392134e-05, + "loss": 0.4162977635860443, + "step": 530 + }, + { + "epoch": 0.14101712919930953, + "grad_norm": 1.0386051117102446, + "learning_rate": 1.997744358902075e-05, + "loss": 0.438882052898407, + "step": 531 + }, + { + "epoch": 0.14128269818085248, + "grad_norm": 0.9131334625730753, + "learning_rate": 1.997714788311591e-05, + "loss": 0.43381333351135254, + "step": 532 + }, + { + "epoch": 0.14154826716239544, + "grad_norm": 1.0341262373297713, + "learning_rate": 1.9976850253734633e-05, + "loss": 0.41925039887428284, + "step": 533 + }, + { + "epoch": 0.1418138361439384, + "grad_norm": 1.0366031704059997, + "learning_rate": 1.997655070093429e-05, + "loss": 0.40469998121261597, + "step": 534 + }, + { + "epoch": 0.14207940512548134, + "grad_norm": 1.069653848503876, + "learning_rate": 1.9976249224772638e-05, + "loss": 0.4252749979496002, + "step": 535 + }, + { + "epoch": 0.1423449741070243, + "grad_norm": 0.9131599330211423, + "learning_rate": 1.9975945825307788e-05, + "loss": 0.42437341809272766, + "step": 536 + }, + { + "epoch": 0.14261054308856724, + "grad_norm": 0.9295944144104017, + "learning_rate": 1.9975640502598243e-05, + "loss": 0.3435184955596924, + "step": 537 + }, + { + "epoch": 0.14287611207011022, + "grad_norm": 1.135805935036872, + "learning_rate": 1.9975333256702864e-05, + "loss": 0.4677535593509674, + "step": 538 + }, + { + "epoch": 0.14314168105165317, + "grad_norm": 0.9857610455714647, + "learning_rate": 1.9975024087680873e-05, + "loss": 0.3860551118850708, + "step": 539 + }, + { + "epoch": 0.14340725003319613, + "grad_norm": 1.0260051612127887, + "learning_rate": 1.9974712995591887e-05, + "loss": 0.4067271649837494, + "step": 540 + }, + { + "epoch": 0.14367281901473908, + "grad_norm": 1.0673102525592195, + "learning_rate": 1.9974399980495877e-05, + "loss": 0.42236536741256714, + "step": 541 + }, + { + "epoch": 0.14393838799628203, + "grad_norm": 0.9825710114440017, + "learning_rate": 1.9974085042453188e-05, + "loss": 0.45230624079704285, + "step": 542 + }, + { + "epoch": 0.14420395697782498, + "grad_norm": 1.0223761508252163, + "learning_rate": 1.997376818152453e-05, + "loss": 0.428194522857666, + "step": 543 + }, + { + "epoch": 0.14446952595936793, + "grad_norm": 1.0337438279048081, + "learning_rate": 1.9973449397771004e-05, + "loss": 0.40774789452552795, + "step": 544 + }, + { + "epoch": 0.1447350949409109, + "grad_norm": 0.9168779980285519, + "learning_rate": 1.9973128691254054e-05, + "loss": 0.4086815118789673, + "step": 545 + }, + { + "epoch": 0.14500066392245387, + "grad_norm": 0.9934439062572693, + "learning_rate": 1.997280606203552e-05, + "loss": 0.4045162796974182, + "step": 546 + }, + { + "epoch": 0.14526623290399682, + "grad_norm": 1.0110955437735047, + "learning_rate": 1.9972481510177594e-05, + "loss": 0.40463268756866455, + "step": 547 + }, + { + "epoch": 0.14553180188553977, + "grad_norm": 1.0029896014566093, + "learning_rate": 1.9972155035742847e-05, + "loss": 0.46733587980270386, + "step": 548 + }, + { + "epoch": 0.14579737086708272, + "grad_norm": 0.9683751197048177, + "learning_rate": 1.997182663879422e-05, + "loss": 0.45210930705070496, + "step": 549 + }, + { + "epoch": 0.14606293984862567, + "grad_norm": 0.9559484778346481, + "learning_rate": 1.9971496319395022e-05, + "loss": 0.39798587560653687, + "step": 550 + }, + { + "epoch": 0.14632850883016862, + "grad_norm": 1.0582410708312875, + "learning_rate": 1.9971164077608937e-05, + "loss": 0.4166080057621002, + "step": 551 + }, + { + "epoch": 0.1465940778117116, + "grad_norm": 0.99705391441119, + "learning_rate": 1.9970829913500017e-05, + "loss": 0.3995435833930969, + "step": 552 + }, + { + "epoch": 0.14685964679325456, + "grad_norm": 0.9693599664680953, + "learning_rate": 1.9970493827132686e-05, + "loss": 0.39335039258003235, + "step": 553 + }, + { + "epoch": 0.1471252157747975, + "grad_norm": 1.0653128556742777, + "learning_rate": 1.9970155818571733e-05, + "loss": 0.3923008441925049, + "step": 554 + }, + { + "epoch": 0.14739078475634046, + "grad_norm": 1.1000528384874784, + "learning_rate": 1.996981588788233e-05, + "loss": 0.42148759961128235, + "step": 555 + }, + { + "epoch": 0.1476563537378834, + "grad_norm": 0.9532704289154984, + "learning_rate": 1.9969474035130005e-05, + "loss": 0.36099517345428467, + "step": 556 + }, + { + "epoch": 0.14792192271942636, + "grad_norm": 0.9498609858415961, + "learning_rate": 1.9969130260380663e-05, + "loss": 0.39650559425354004, + "step": 557 + }, + { + "epoch": 0.14818749170096931, + "grad_norm": 0.9667452630427784, + "learning_rate": 1.9968784563700586e-05, + "loss": 0.36410078406333923, + "step": 558 + }, + { + "epoch": 0.1484530606825123, + "grad_norm": 1.002419821858965, + "learning_rate": 1.996843694515641e-05, + "loss": 0.41312888264656067, + "step": 559 + }, + { + "epoch": 0.14871862966405525, + "grad_norm": 1.1088153047335336, + "learning_rate": 1.9968087404815162e-05, + "loss": 0.3895263373851776, + "step": 560 + }, + { + "epoch": 0.1489841986455982, + "grad_norm": 1.2422388501205763, + "learning_rate": 1.9967735942744226e-05, + "loss": 0.4400597810745239, + "step": 561 + }, + { + "epoch": 0.14924976762714115, + "grad_norm": 1.1300700300497077, + "learning_rate": 1.9967382559011356e-05, + "loss": 0.36712852120399475, + "step": 562 + }, + { + "epoch": 0.1495153366086841, + "grad_norm": 1.0425502358891738, + "learning_rate": 1.9967027253684685e-05, + "loss": 0.4043564200401306, + "step": 563 + }, + { + "epoch": 0.14978090559022705, + "grad_norm": 1.101160625764444, + "learning_rate": 1.9966670026832707e-05, + "loss": 0.45233044028282166, + "step": 564 + }, + { + "epoch": 0.15004647457177, + "grad_norm": 1.3277254520379258, + "learning_rate": 1.9966310878524297e-05, + "loss": 0.441600501537323, + "step": 565 + }, + { + "epoch": 0.15031204355331299, + "grad_norm": 1.0833095900878238, + "learning_rate": 1.9965949808828687e-05, + "loss": 0.4268038868904114, + "step": 566 + }, + { + "epoch": 0.15057761253485594, + "grad_norm": 1.1492448156590855, + "learning_rate": 1.9965586817815494e-05, + "loss": 0.41927874088287354, + "step": 567 + }, + { + "epoch": 0.1508431815163989, + "grad_norm": 1.026170307581087, + "learning_rate": 1.9965221905554695e-05, + "loss": 0.41488781571388245, + "step": 568 + }, + { + "epoch": 0.15110875049794184, + "grad_norm": 0.9559142330236491, + "learning_rate": 1.9964855072116642e-05, + "loss": 0.3624749779701233, + "step": 569 + }, + { + "epoch": 0.1513743194794848, + "grad_norm": 1.254830306735622, + "learning_rate": 1.996448631757206e-05, + "loss": 0.45119866728782654, + "step": 570 + }, + { + "epoch": 0.15163988846102774, + "grad_norm": 1.095837461898702, + "learning_rate": 1.996411564199203e-05, + "loss": 0.41389739513397217, + "step": 571 + }, + { + "epoch": 0.1519054574425707, + "grad_norm": 0.9684460814064966, + "learning_rate": 1.996374304544802e-05, + "loss": 0.3640916347503662, + "step": 572 + }, + { + "epoch": 0.15217102642411368, + "grad_norm": 1.0711015344753547, + "learning_rate": 1.9963368528011867e-05, + "loss": 0.45648565888404846, + "step": 573 + }, + { + "epoch": 0.15243659540565663, + "grad_norm": 0.9722794055909949, + "learning_rate": 1.9962992089755765e-05, + "loss": 0.4335980713367462, + "step": 574 + }, + { + "epoch": 0.15270216438719958, + "grad_norm": 1.158400874054287, + "learning_rate": 1.996261373075229e-05, + "loss": 0.3908158540725708, + "step": 575 + }, + { + "epoch": 0.15296773336874253, + "grad_norm": 0.9311953954584888, + "learning_rate": 1.996223345107439e-05, + "loss": 0.36533305048942566, + "step": 576 + }, + { + "epoch": 0.15323330235028548, + "grad_norm": 0.9771467412652409, + "learning_rate": 1.9961851250795372e-05, + "loss": 0.407212495803833, + "step": 577 + }, + { + "epoch": 0.15349887133182843, + "grad_norm": 0.9988499065644934, + "learning_rate": 1.996146712998892e-05, + "loss": 0.4266315698623657, + "step": 578 + }, + { + "epoch": 0.1537644403133714, + "grad_norm": 0.9843108485081927, + "learning_rate": 1.9961081088729092e-05, + "loss": 0.3806581199169159, + "step": 579 + }, + { + "epoch": 0.15403000929491437, + "grad_norm": 0.9497423806639163, + "learning_rate": 1.9960693127090312e-05, + "loss": 0.40962716937065125, + "step": 580 + }, + { + "epoch": 0.15429557827645732, + "grad_norm": 0.94680923059909, + "learning_rate": 1.996030324514737e-05, + "loss": 0.4195394515991211, + "step": 581 + }, + { + "epoch": 0.15456114725800027, + "grad_norm": 1.0211843119224446, + "learning_rate": 1.995991144297543e-05, + "loss": 0.4366803765296936, + "step": 582 + }, + { + "epoch": 0.15482671623954322, + "grad_norm": 1.1779341722116263, + "learning_rate": 1.995951772065004e-05, + "loss": 0.44951680302619934, + "step": 583 + }, + { + "epoch": 0.15509228522108617, + "grad_norm": 1.1165714790353467, + "learning_rate": 1.9959122078247088e-05, + "loss": 0.42920851707458496, + "step": 584 + }, + { + "epoch": 0.15535785420262913, + "grad_norm": 1.3260467831670406, + "learning_rate": 1.9958724515842856e-05, + "loss": 0.3805098533630371, + "step": 585 + }, + { + "epoch": 0.15562342318417208, + "grad_norm": 1.1544212798945541, + "learning_rate": 1.995832503351399e-05, + "loss": 0.439333438873291, + "step": 586 + }, + { + "epoch": 0.15588899216571506, + "grad_norm": 0.9414235863159184, + "learning_rate": 1.9957923631337505e-05, + "loss": 0.38338547945022583, + "step": 587 + }, + { + "epoch": 0.156154561147258, + "grad_norm": 0.9711288321476074, + "learning_rate": 1.9957520309390786e-05, + "loss": 0.40603697299957275, + "step": 588 + }, + { + "epoch": 0.15642013012880096, + "grad_norm": 0.9468286962292546, + "learning_rate": 1.9957115067751594e-05, + "loss": 0.42816999554634094, + "step": 589 + }, + { + "epoch": 0.1566856991103439, + "grad_norm": 0.979497417166178, + "learning_rate": 1.9956707906498046e-05, + "loss": 0.42367884516716003, + "step": 590 + }, + { + "epoch": 0.15695126809188686, + "grad_norm": 1.1158588594509518, + "learning_rate": 1.995629882570864e-05, + "loss": 0.4349297881126404, + "step": 591 + }, + { + "epoch": 0.15721683707342982, + "grad_norm": 0.9762108745852242, + "learning_rate": 1.995588782546225e-05, + "loss": 0.37990960478782654, + "step": 592 + }, + { + "epoch": 0.15748240605497277, + "grad_norm": 0.9495653219493333, + "learning_rate": 1.9955474905838102e-05, + "loss": 0.4085468649864197, + "step": 593 + }, + { + "epoch": 0.15774797503651575, + "grad_norm": 0.9419429879365407, + "learning_rate": 1.995506006691581e-05, + "loss": 0.41362464427948, + "step": 594 + }, + { + "epoch": 0.1580135440180587, + "grad_norm": 1.002559702640921, + "learning_rate": 1.9954643308775342e-05, + "loss": 0.3830018937587738, + "step": 595 + }, + { + "epoch": 0.15827911299960165, + "grad_norm": 1.1505182326275074, + "learning_rate": 1.995422463149705e-05, + "loss": 0.48350822925567627, + "step": 596 + }, + { + "epoch": 0.1585446819811446, + "grad_norm": 0.9889824166630486, + "learning_rate": 1.995380403516165e-05, + "loss": 0.4215185344219208, + "step": 597 + }, + { + "epoch": 0.15881025096268755, + "grad_norm": 1.06826056700577, + "learning_rate": 1.9953381519850224e-05, + "loss": 0.42061948776245117, + "step": 598 + }, + { + "epoch": 0.1590758199442305, + "grad_norm": 1.032451381790901, + "learning_rate": 1.995295708564423e-05, + "loss": 0.38956254720687866, + "step": 599 + }, + { + "epoch": 0.15934138892577346, + "grad_norm": 1.0492553607775368, + "learning_rate": 1.9952530732625492e-05, + "loss": 0.3864685893058777, + "step": 600 + }, + { + "epoch": 0.15960695790731644, + "grad_norm": 0.9770856461072062, + "learning_rate": 1.9952102460876214e-05, + "loss": 0.395724356174469, + "step": 601 + }, + { + "epoch": 0.1598725268888594, + "grad_norm": 1.04245602393598, + "learning_rate": 1.995167227047895e-05, + "loss": 0.4220300316810608, + "step": 602 + }, + { + "epoch": 0.16013809587040234, + "grad_norm": 1.1406615370546667, + "learning_rate": 1.9951240161516643e-05, + "loss": 0.4129142165184021, + "step": 603 + }, + { + "epoch": 0.1604036648519453, + "grad_norm": 0.983753356740355, + "learning_rate": 1.9950806134072595e-05, + "loss": 0.3951375484466553, + "step": 604 + }, + { + "epoch": 0.16066923383348825, + "grad_norm": 1.0214548083454909, + "learning_rate": 1.9950370188230486e-05, + "loss": 0.4117582142353058, + "step": 605 + }, + { + "epoch": 0.1609348028150312, + "grad_norm": 1.0340746201961049, + "learning_rate": 1.994993232407436e-05, + "loss": 0.3920668363571167, + "step": 606 + }, + { + "epoch": 0.16120037179657415, + "grad_norm": 0.9768399206450091, + "learning_rate": 1.9949492541688626e-05, + "loss": 0.3756999373435974, + "step": 607 + }, + { + "epoch": 0.16146594077811713, + "grad_norm": 1.0034054922110034, + "learning_rate": 1.9949050841158078e-05, + "loss": 0.41009610891342163, + "step": 608 + }, + { + "epoch": 0.16173150975966008, + "grad_norm": 0.9847346075479474, + "learning_rate": 1.994860722256786e-05, + "loss": 0.3986571729183197, + "step": 609 + }, + { + "epoch": 0.16199707874120303, + "grad_norm": 0.9978440495541314, + "learning_rate": 1.994816168600351e-05, + "loss": 0.3903341591358185, + "step": 610 + }, + { + "epoch": 0.16226264772274598, + "grad_norm": 0.9992231775305654, + "learning_rate": 1.994771423155091e-05, + "loss": 0.39725261926651, + "step": 611 + }, + { + "epoch": 0.16252821670428894, + "grad_norm": 0.9446936558476315, + "learning_rate": 1.994726485929633e-05, + "loss": 0.39461129903793335, + "step": 612 + }, + { + "epoch": 0.1627937856858319, + "grad_norm": 1.0162077284831286, + "learning_rate": 1.99468135693264e-05, + "loss": 0.41346144676208496, + "step": 613 + }, + { + "epoch": 0.16305935466737484, + "grad_norm": 1.0305116850266922, + "learning_rate": 1.9946360361728127e-05, + "loss": 0.41148197650909424, + "step": 614 + }, + { + "epoch": 0.16332492364891782, + "grad_norm": 0.9678436330540818, + "learning_rate": 1.9945905236588884e-05, + "loss": 0.38204139471054077, + "step": 615 + }, + { + "epoch": 0.16359049263046077, + "grad_norm": 0.9830320911733957, + "learning_rate": 1.9945448193996412e-05, + "loss": 0.41496896743774414, + "step": 616 + }, + { + "epoch": 0.16385606161200372, + "grad_norm": 0.9327494941136337, + "learning_rate": 1.994498923403882e-05, + "loss": 0.38998982310295105, + "step": 617 + }, + { + "epoch": 0.16412163059354667, + "grad_norm": 1.0310759290486786, + "learning_rate": 1.99445283568046e-05, + "loss": 0.39018991589546204, + "step": 618 + }, + { + "epoch": 0.16438719957508963, + "grad_norm": 1.1133251353738367, + "learning_rate": 1.9944065562382594e-05, + "loss": 0.41579991579055786, + "step": 619 + }, + { + "epoch": 0.16465276855663258, + "grad_norm": 1.1413714641323347, + "learning_rate": 1.9943600850862027e-05, + "loss": 0.426283061504364, + "step": 620 + }, + { + "epoch": 0.16491833753817553, + "grad_norm": 1.0537239280428552, + "learning_rate": 1.9943134222332493e-05, + "loss": 0.418672651052475, + "step": 621 + }, + { + "epoch": 0.1651839065197185, + "grad_norm": 1.0177048245128393, + "learning_rate": 1.9942665676883946e-05, + "loss": 0.4014776349067688, + "step": 622 + }, + { + "epoch": 0.16544947550126146, + "grad_norm": 0.9703989792649265, + "learning_rate": 1.994219521460672e-05, + "loss": 0.3714776933193207, + "step": 623 + }, + { + "epoch": 0.1657150444828044, + "grad_norm": 1.005321267739283, + "learning_rate": 1.9941722835591514e-05, + "loss": 0.39415785670280457, + "step": 624 + }, + { + "epoch": 0.16598061346434737, + "grad_norm": 1.739817458909074, + "learning_rate": 1.9941248539929395e-05, + "loss": 0.3706223964691162, + "step": 625 + }, + { + "epoch": 0.16624618244589032, + "grad_norm": 0.9887487099192142, + "learning_rate": 1.9940772327711807e-05, + "loss": 0.4167429208755493, + "step": 626 + }, + { + "epoch": 0.16651175142743327, + "grad_norm": 1.0502993213264278, + "learning_rate": 1.9940294199030553e-05, + "loss": 0.38234227895736694, + "step": 627 + }, + { + "epoch": 0.16677732040897622, + "grad_norm": 0.9929957655695576, + "learning_rate": 1.9939814153977813e-05, + "loss": 0.4139519929885864, + "step": 628 + }, + { + "epoch": 0.1670428893905192, + "grad_norm": 1.0428716869119874, + "learning_rate": 1.9939332192646136e-05, + "loss": 0.44490402936935425, + "step": 629 + }, + { + "epoch": 0.16730845837206215, + "grad_norm": 0.9723220719956404, + "learning_rate": 1.993884831512843e-05, + "loss": 0.3870658278465271, + "step": 630 + }, + { + "epoch": 0.1675740273536051, + "grad_norm": 0.9337218443909966, + "learning_rate": 1.993836252151799e-05, + "loss": 0.3308948278427124, + "step": 631 + }, + { + "epoch": 0.16783959633514806, + "grad_norm": 1.1119638169858157, + "learning_rate": 1.993787481190847e-05, + "loss": 0.3727487623691559, + "step": 632 + }, + { + "epoch": 0.168105165316691, + "grad_norm": 1.0025380900585623, + "learning_rate": 1.9937385186393888e-05, + "loss": 0.4277465343475342, + "step": 633 + }, + { + "epoch": 0.16837073429823396, + "grad_norm": 1.2120120873899203, + "learning_rate": 1.9936893645068647e-05, + "loss": 0.4276485741138458, + "step": 634 + }, + { + "epoch": 0.1686363032797769, + "grad_norm": 1.000070161461063, + "learning_rate": 1.9936400188027502e-05, + "loss": 0.374578058719635, + "step": 635 + }, + { + "epoch": 0.1689018722613199, + "grad_norm": 1.113556890943216, + "learning_rate": 1.993590481536559e-05, + "loss": 0.4583400785923004, + "step": 636 + }, + { + "epoch": 0.16916744124286284, + "grad_norm": 0.9731147624235688, + "learning_rate": 1.9935407527178417e-05, + "loss": 0.3734489679336548, + "step": 637 + }, + { + "epoch": 0.1694330102244058, + "grad_norm": 1.0110441212525507, + "learning_rate": 1.9934908323561846e-05, + "loss": 0.39524513483047485, + "step": 638 + }, + { + "epoch": 0.16969857920594875, + "grad_norm": 1.0264447655460065, + "learning_rate": 1.9934407204612124e-05, + "loss": 0.42300352454185486, + "step": 639 + }, + { + "epoch": 0.1699641481874917, + "grad_norm": 0.9950374891978715, + "learning_rate": 1.9933904170425858e-05, + "loss": 0.4152276813983917, + "step": 640 + }, + { + "epoch": 0.17022971716903465, + "grad_norm": 1.230783330329369, + "learning_rate": 1.9933399221100026e-05, + "loss": 0.43046653270721436, + "step": 641 + }, + { + "epoch": 0.1704952861505776, + "grad_norm": 1.0095783418631343, + "learning_rate": 1.993289235673198e-05, + "loss": 0.4134339392185211, + "step": 642 + }, + { + "epoch": 0.17076085513212058, + "grad_norm": 1.0051407398693462, + "learning_rate": 1.9932383577419432e-05, + "loss": 0.44028693437576294, + "step": 643 + }, + { + "epoch": 0.17102642411366353, + "grad_norm": 1.0208746920457954, + "learning_rate": 1.9931872883260473e-05, + "loss": 0.3790222704410553, + "step": 644 + }, + { + "epoch": 0.17129199309520649, + "grad_norm": 1.041462978505965, + "learning_rate": 1.9931360274353556e-05, + "loss": 0.3683086633682251, + "step": 645 + }, + { + "epoch": 0.17155756207674944, + "grad_norm": 1.0400069352454702, + "learning_rate": 1.993084575079751e-05, + "loss": 0.3630594313144684, + "step": 646 + }, + { + "epoch": 0.1718231310582924, + "grad_norm": 1.0694046561659416, + "learning_rate": 1.993032931269153e-05, + "loss": 0.4398641884326935, + "step": 647 + }, + { + "epoch": 0.17208870003983534, + "grad_norm": 1.107156801944608, + "learning_rate": 1.992981096013517e-05, + "loss": 0.42222845554351807, + "step": 648 + }, + { + "epoch": 0.1723542690213783, + "grad_norm": 1.043160064840446, + "learning_rate": 1.992929069322837e-05, + "loss": 0.38966643810272217, + "step": 649 + }, + { + "epoch": 0.17261983800292127, + "grad_norm": 1.0607803195691352, + "learning_rate": 1.992876851207143e-05, + "loss": 0.4394804835319519, + "step": 650 + }, + { + "epoch": 0.17288540698446422, + "grad_norm": 0.9714467718451273, + "learning_rate": 1.9928244416765022e-05, + "loss": 0.3475287854671478, + "step": 651 + }, + { + "epoch": 0.17315097596600718, + "grad_norm": 0.9848879046616053, + "learning_rate": 1.992771840741018e-05, + "loss": 0.40047168731689453, + "step": 652 + }, + { + "epoch": 0.17341654494755013, + "grad_norm": 1.0744593937096147, + "learning_rate": 1.9927190484108315e-05, + "loss": 0.4028981328010559, + "step": 653 + }, + { + "epoch": 0.17368211392909308, + "grad_norm": 1.010491020672817, + "learning_rate": 1.9926660646961208e-05, + "loss": 0.3891482949256897, + "step": 654 + }, + { + "epoch": 0.17394768291063603, + "grad_norm": 1.1163232689680433, + "learning_rate": 1.9926128896071e-05, + "loss": 0.4570680856704712, + "step": 655 + }, + { + "epoch": 0.17421325189217898, + "grad_norm": 0.9509061944047602, + "learning_rate": 1.992559523154021e-05, + "loss": 0.392758309841156, + "step": 656 + }, + { + "epoch": 0.17447882087372196, + "grad_norm": 0.9648168194829144, + "learning_rate": 1.992505965347172e-05, + "loss": 0.39552047848701477, + "step": 657 + }, + { + "epoch": 0.17474438985526491, + "grad_norm": 1.045434666464082, + "learning_rate": 1.992452216196879e-05, + "loss": 0.4412619173526764, + "step": 658 + }, + { + "epoch": 0.17500995883680787, + "grad_norm": 1.033655605856329, + "learning_rate": 1.9923982757135028e-05, + "loss": 0.4075942635536194, + "step": 659 + }, + { + "epoch": 0.17527552781835082, + "grad_norm": 1.0660210414475448, + "learning_rate": 1.9923441439074434e-05, + "loss": 0.44615018367767334, + "step": 660 + }, + { + "epoch": 0.17554109679989377, + "grad_norm": 0.9504988883268379, + "learning_rate": 1.992289820789137e-05, + "loss": 0.3957441449165344, + "step": 661 + }, + { + "epoch": 0.17580666578143672, + "grad_norm": 0.9513339400965243, + "learning_rate": 1.992235306369056e-05, + "loss": 0.4014820158481598, + "step": 662 + }, + { + "epoch": 0.17607223476297967, + "grad_norm": 0.9988043316582222, + "learning_rate": 1.9921806006577102e-05, + "loss": 0.39478158950805664, + "step": 663 + }, + { + "epoch": 0.17633780374452265, + "grad_norm": 1.0278124558587338, + "learning_rate": 1.9921257036656463e-05, + "loss": 0.45742082595825195, + "step": 664 + }, + { + "epoch": 0.1766033727260656, + "grad_norm": 0.9674516471555401, + "learning_rate": 1.9920706154034477e-05, + "loss": 0.36519041657447815, + "step": 665 + }, + { + "epoch": 0.17686894170760856, + "grad_norm": 1.0086354363577679, + "learning_rate": 1.992015335881735e-05, + "loss": 0.40599358081817627, + "step": 666 + }, + { + "epoch": 0.1771345106891515, + "grad_norm": 0.958585892866014, + "learning_rate": 1.991959865111165e-05, + "loss": 0.4064781367778778, + "step": 667 + }, + { + "epoch": 0.17740007967069446, + "grad_norm": 0.9430583774727941, + "learning_rate": 1.991904203102432e-05, + "loss": 0.4076484143733978, + "step": 668 + }, + { + "epoch": 0.1776656486522374, + "grad_norm": 1.1044553051326549, + "learning_rate": 1.9918483498662678e-05, + "loss": 0.42157143354415894, + "step": 669 + }, + { + "epoch": 0.17793121763378036, + "grad_norm": 1.005923050768092, + "learning_rate": 1.9917923054134388e-05, + "loss": 0.3814900517463684, + "step": 670 + }, + { + "epoch": 0.17819678661532334, + "grad_norm": 1.0156953904207233, + "learning_rate": 1.9917360697547506e-05, + "loss": 0.4211175739765167, + "step": 671 + }, + { + "epoch": 0.1784623555968663, + "grad_norm": 1.0530805044024834, + "learning_rate": 1.991679642901045e-05, + "loss": 0.3975893259048462, + "step": 672 + }, + { + "epoch": 0.17872792457840925, + "grad_norm": 0.9633270935214763, + "learning_rate": 1.9916230248631993e-05, + "loss": 0.36090826988220215, + "step": 673 + }, + { + "epoch": 0.1789934935599522, + "grad_norm": 0.9408638333666679, + "learning_rate": 1.99156621565213e-05, + "loss": 0.36511334776878357, + "step": 674 + }, + { + "epoch": 0.17925906254149515, + "grad_norm": 1.0839117569759185, + "learning_rate": 1.9915092152787888e-05, + "loss": 0.4131924510002136, + "step": 675 + }, + { + "epoch": 0.1795246315230381, + "grad_norm": 1.1407281463751517, + "learning_rate": 1.9914520237541644e-05, + "loss": 0.4283728301525116, + "step": 676 + }, + { + "epoch": 0.17979020050458105, + "grad_norm": 0.9751873028047018, + "learning_rate": 1.991394641089283e-05, + "loss": 0.3855544924736023, + "step": 677 + }, + { + "epoch": 0.18005576948612403, + "grad_norm": 1.3517309919327671, + "learning_rate": 1.9913370672952074e-05, + "loss": 0.41288501024246216, + "step": 678 + }, + { + "epoch": 0.180321338467667, + "grad_norm": 1.1127679640996702, + "learning_rate": 1.9912793023830365e-05, + "loss": 0.3824073076248169, + "step": 679 + }, + { + "epoch": 0.18058690744920994, + "grad_norm": 1.0055812841256684, + "learning_rate": 1.9912213463639077e-05, + "loss": 0.39005106687545776, + "step": 680 + }, + { + "epoch": 0.1808524764307529, + "grad_norm": 1.0115332151563563, + "learning_rate": 1.9911631992489933e-05, + "loss": 0.3521374464035034, + "step": 681 + }, + { + "epoch": 0.18111804541229584, + "grad_norm": 0.983790464571211, + "learning_rate": 1.9911048610495037e-05, + "loss": 0.337347149848938, + "step": 682 + }, + { + "epoch": 0.1813836143938388, + "grad_norm": 1.1534370397304132, + "learning_rate": 1.9910463317766864e-05, + "loss": 0.4349983334541321, + "step": 683 + }, + { + "epoch": 0.18164918337538175, + "grad_norm": 1.059114838428009, + "learning_rate": 1.9909876114418242e-05, + "loss": 0.3783540427684784, + "step": 684 + }, + { + "epoch": 0.18191475235692472, + "grad_norm": 1.0050293498117582, + "learning_rate": 1.9909287000562383e-05, + "loss": 0.4065130054950714, + "step": 685 + }, + { + "epoch": 0.18218032133846768, + "grad_norm": 1.0122618604087057, + "learning_rate": 1.990869597631286e-05, + "loss": 0.3876315653324127, + "step": 686 + }, + { + "epoch": 0.18244589032001063, + "grad_norm": 0.9622962910168786, + "learning_rate": 1.9908103041783615e-05, + "loss": 0.3716024160385132, + "step": 687 + }, + { + "epoch": 0.18271145930155358, + "grad_norm": 1.086778230300176, + "learning_rate": 1.990750819708896e-05, + "loss": 0.4096733331680298, + "step": 688 + }, + { + "epoch": 0.18297702828309653, + "grad_norm": 1.131269280292305, + "learning_rate": 1.9906911442343567e-05, + "loss": 0.41432395577430725, + "step": 689 + }, + { + "epoch": 0.18324259726463948, + "grad_norm": 1.1182736792418642, + "learning_rate": 1.9906312777662493e-05, + "loss": 0.3934200406074524, + "step": 690 + }, + { + "epoch": 0.18350816624618244, + "grad_norm": 1.0493015785833109, + "learning_rate": 1.9905712203161148e-05, + "loss": 0.4246784746646881, + "step": 691 + }, + { + "epoch": 0.18377373522772542, + "grad_norm": 1.1362836227785695, + "learning_rate": 1.9905109718955323e-05, + "loss": 0.40027567744255066, + "step": 692 + }, + { + "epoch": 0.18403930420926837, + "grad_norm": 1.056262242708622, + "learning_rate": 1.990450532516116e-05, + "loss": 0.4162583351135254, + "step": 693 + }, + { + "epoch": 0.18430487319081132, + "grad_norm": 1.05760814074371, + "learning_rate": 1.990389902189518e-05, + "loss": 0.4133074879646301, + "step": 694 + }, + { + "epoch": 0.18457044217235427, + "grad_norm": 1.0438921885629904, + "learning_rate": 1.9903290809274277e-05, + "loss": 0.333192378282547, + "step": 695 + }, + { + "epoch": 0.18483601115389722, + "grad_norm": 0.9814281867123515, + "learning_rate": 1.9902680687415704e-05, + "loss": 0.39349496364593506, + "step": 696 + }, + { + "epoch": 0.18510158013544017, + "grad_norm": 1.0366332083029342, + "learning_rate": 1.9902068656437086e-05, + "loss": 0.39678412675857544, + "step": 697 + }, + { + "epoch": 0.18536714911698313, + "grad_norm": 1.0003960978434148, + "learning_rate": 1.9901454716456415e-05, + "loss": 0.3553932011127472, + "step": 698 + }, + { + "epoch": 0.18563271809852608, + "grad_norm": 1.0876315802223169, + "learning_rate": 1.990083886759205e-05, + "loss": 0.4264630079269409, + "step": 699 + }, + { + "epoch": 0.18589828708006906, + "grad_norm": 1.0135520655053032, + "learning_rate": 1.9900221109962726e-05, + "loss": 0.3883950412273407, + "step": 700 + }, + { + "epoch": 0.186163856061612, + "grad_norm": 1.0408639715408188, + "learning_rate": 1.989960144368753e-05, + "loss": 0.38465407490730286, + "step": 701 + }, + { + "epoch": 0.18642942504315496, + "grad_norm": 2.2198594223984065, + "learning_rate": 1.9898979868885933e-05, + "loss": 0.39897871017456055, + "step": 702 + }, + { + "epoch": 0.1866949940246979, + "grad_norm": 1.120873004114704, + "learning_rate": 1.9898356385677762e-05, + "loss": 0.4386023283004761, + "step": 703 + }, + { + "epoch": 0.18696056300624087, + "grad_norm": 1.0254606123190075, + "learning_rate": 1.989773099418322e-05, + "loss": 0.42621874809265137, + "step": 704 + }, + { + "epoch": 0.18722613198778382, + "grad_norm": 1.0153284696458207, + "learning_rate": 1.9897103694522877e-05, + "loss": 0.3811546266078949, + "step": 705 + }, + { + "epoch": 0.18749170096932677, + "grad_norm": 1.0634877610237485, + "learning_rate": 1.989647448681767e-05, + "loss": 0.4018982946872711, + "step": 706 + }, + { + "epoch": 0.18775726995086975, + "grad_norm": 1.0316038713106725, + "learning_rate": 1.9895843371188897e-05, + "loss": 0.3920126259326935, + "step": 707 + }, + { + "epoch": 0.1880228389324127, + "grad_norm": 0.9767495366810068, + "learning_rate": 1.9895210347758233e-05, + "loss": 0.3598487973213196, + "step": 708 + }, + { + "epoch": 0.18828840791395565, + "grad_norm": 1.0286682270198635, + "learning_rate": 1.9894575416647717e-05, + "loss": 0.4204316735267639, + "step": 709 + }, + { + "epoch": 0.1885539768954986, + "grad_norm": 0.9653709480495668, + "learning_rate": 1.9893938577979755e-05, + "loss": 0.33814263343811035, + "step": 710 + }, + { + "epoch": 0.18881954587704156, + "grad_norm": 0.9588770367914977, + "learning_rate": 1.9893299831877124e-05, + "loss": 0.3788227140903473, + "step": 711 + }, + { + "epoch": 0.1890851148585845, + "grad_norm": 0.9974371582936609, + "learning_rate": 1.989265917846297e-05, + "loss": 0.38141176104545593, + "step": 712 + }, + { + "epoch": 0.18935068384012746, + "grad_norm": 1.0051109402301954, + "learning_rate": 1.9892016617860793e-05, + "loss": 0.3757280707359314, + "step": 713 + }, + { + "epoch": 0.18961625282167044, + "grad_norm": 0.9863956856856875, + "learning_rate": 1.989137215019448e-05, + "loss": 0.37819087505340576, + "step": 714 + }, + { + "epoch": 0.1898818218032134, + "grad_norm": 1.1797000402703188, + "learning_rate": 1.9890725775588277e-05, + "loss": 0.46046000719070435, + "step": 715 + }, + { + "epoch": 0.19014739078475634, + "grad_norm": 0.9967163493181064, + "learning_rate": 1.9890077494166792e-05, + "loss": 0.33967363834381104, + "step": 716 + }, + { + "epoch": 0.1904129597662993, + "grad_norm": 0.9620841339155507, + "learning_rate": 1.988942730605501e-05, + "loss": 0.36672675609588623, + "step": 717 + }, + { + "epoch": 0.19067852874784225, + "grad_norm": 1.0666183498740949, + "learning_rate": 1.9888775211378278e-05, + "loss": 0.38705015182495117, + "step": 718 + }, + { + "epoch": 0.1909440977293852, + "grad_norm": 1.0696051052523068, + "learning_rate": 1.9888121210262313e-05, + "loss": 0.35257095098495483, + "step": 719 + }, + { + "epoch": 0.19120966671092815, + "grad_norm": 1.0337108803934987, + "learning_rate": 1.9887465302833194e-05, + "loss": 0.3803965449333191, + "step": 720 + }, + { + "epoch": 0.19147523569247113, + "grad_norm": 1.0097965015220993, + "learning_rate": 1.988680748921738e-05, + "loss": 0.38166487216949463, + "step": 721 + }, + { + "epoch": 0.19174080467401408, + "grad_norm": 0.971159209120872, + "learning_rate": 1.988614776954169e-05, + "loss": 0.4017483592033386, + "step": 722 + }, + { + "epoch": 0.19200637365555703, + "grad_norm": 1.0651840937747212, + "learning_rate": 1.98854861439333e-05, + "loss": 0.4343035817146301, + "step": 723 + }, + { + "epoch": 0.19227194263709999, + "grad_norm": 1.0527178531986199, + "learning_rate": 1.9884822612519773e-05, + "loss": 0.4017031192779541, + "step": 724 + }, + { + "epoch": 0.19253751161864294, + "grad_norm": 0.9558335625340557, + "learning_rate": 1.988415717542903e-05, + "loss": 0.32294636964797974, + "step": 725 + }, + { + "epoch": 0.1928030806001859, + "grad_norm": 1.018550638071552, + "learning_rate": 1.988348983278935e-05, + "loss": 0.34661561250686646, + "step": 726 + }, + { + "epoch": 0.19306864958172884, + "grad_norm": 1.1264464061553692, + "learning_rate": 1.98828205847294e-05, + "loss": 0.3588724434375763, + "step": 727 + }, + { + "epoch": 0.19333421856327182, + "grad_norm": 1.151476031768393, + "learning_rate": 1.9882149431378194e-05, + "loss": 0.45439180731773376, + "step": 728 + }, + { + "epoch": 0.19359978754481477, + "grad_norm": 1.092854672146059, + "learning_rate": 1.988147637286513e-05, + "loss": 0.3916742205619812, + "step": 729 + }, + { + "epoch": 0.19386535652635772, + "grad_norm": 1.1073017625666908, + "learning_rate": 1.988080140931996e-05, + "loss": 0.3838115334510803, + "step": 730 + }, + { + "epoch": 0.19413092550790068, + "grad_norm": 1.0305888563782257, + "learning_rate": 1.9880124540872813e-05, + "loss": 0.3803096413612366, + "step": 731 + }, + { + "epoch": 0.19439649448944363, + "grad_norm": 1.0697488639709387, + "learning_rate": 1.987944576765418e-05, + "loss": 0.4180675446987152, + "step": 732 + }, + { + "epoch": 0.19466206347098658, + "grad_norm": 0.968492149308095, + "learning_rate": 1.987876508979492e-05, + "loss": 0.34485924243927, + "step": 733 + }, + { + "epoch": 0.19492763245252953, + "grad_norm": 1.0301319893667387, + "learning_rate": 1.987808250742626e-05, + "loss": 0.3696223795413971, + "step": 734 + }, + { + "epoch": 0.1951932014340725, + "grad_norm": 1.0070871597151176, + "learning_rate": 1.9877398020679796e-05, + "loss": 0.39920324087142944, + "step": 735 + }, + { + "epoch": 0.19545877041561546, + "grad_norm": 0.9772548764362861, + "learning_rate": 1.987671162968748e-05, + "loss": 0.33534419536590576, + "step": 736 + }, + { + "epoch": 0.19572433939715841, + "grad_norm": 0.955184588375953, + "learning_rate": 1.9876023334581657e-05, + "loss": 0.3698185682296753, + "step": 737 + }, + { + "epoch": 0.19598990837870137, + "grad_norm": 1.0108475553340988, + "learning_rate": 1.9875333135495e-05, + "loss": 0.37388375401496887, + "step": 738 + }, + { + "epoch": 0.19625547736024432, + "grad_norm": 0.9685434293396273, + "learning_rate": 1.9874641032560594e-05, + "loss": 0.3285469114780426, + "step": 739 + }, + { + "epoch": 0.19652104634178727, + "grad_norm": 1.01794140535256, + "learning_rate": 1.9873947025911854e-05, + "loss": 0.3539549708366394, + "step": 740 + }, + { + "epoch": 0.19678661532333022, + "grad_norm": 1.0943847325994938, + "learning_rate": 1.9873251115682577e-05, + "loss": 0.4707021117210388, + "step": 741 + }, + { + "epoch": 0.1970521843048732, + "grad_norm": 0.9783865509799976, + "learning_rate": 1.987255330200693e-05, + "loss": 0.3871781826019287, + "step": 742 + }, + { + "epoch": 0.19731775328641615, + "grad_norm": 1.0462206197157178, + "learning_rate": 1.9871853585019446e-05, + "loss": 0.3890243172645569, + "step": 743 + }, + { + "epoch": 0.1975833222679591, + "grad_norm": 0.9914096392216383, + "learning_rate": 1.9871151964855013e-05, + "loss": 0.34914374351501465, + "step": 744 + }, + { + "epoch": 0.19784889124950206, + "grad_norm": 1.0157439665946277, + "learning_rate": 1.9870448441648905e-05, + "loss": 0.41009777784347534, + "step": 745 + }, + { + "epoch": 0.198114460231045, + "grad_norm": 1.0725931773033663, + "learning_rate": 1.9869743015536747e-05, + "loss": 0.39449363946914673, + "step": 746 + }, + { + "epoch": 0.19838002921258796, + "grad_norm": 1.081644116196219, + "learning_rate": 1.9869035686654538e-05, + "loss": 0.3530065417289734, + "step": 747 + }, + { + "epoch": 0.1986455981941309, + "grad_norm": 1.1338420898560146, + "learning_rate": 1.986832645513864e-05, + "loss": 0.4255196154117584, + "step": 748 + }, + { + "epoch": 0.1989111671756739, + "grad_norm": 1.0625457917520444, + "learning_rate": 1.9867615321125796e-05, + "loss": 0.3921143114566803, + "step": 749 + }, + { + "epoch": 0.19917673615721684, + "grad_norm": 1.1076371778966394, + "learning_rate": 1.986690228475309e-05, + "loss": 0.4157381057739258, + "step": 750 + }, + { + "epoch": 0.1994423051387598, + "grad_norm": 0.9887260401437288, + "learning_rate": 1.986618734615799e-05, + "loss": 0.3922047019004822, + "step": 751 + }, + { + "epoch": 0.19970787412030275, + "grad_norm": 1.2477225666156357, + "learning_rate": 1.9865470505478335e-05, + "loss": 0.4378710985183716, + "step": 752 + }, + { + "epoch": 0.1999734431018457, + "grad_norm": 0.9960415180367619, + "learning_rate": 1.986475176285232e-05, + "loss": 0.3636753261089325, + "step": 753 + }, + { + "epoch": 0.20023901208338865, + "grad_norm": 1.0691751577172293, + "learning_rate": 1.986403111841851e-05, + "loss": 0.3509834408760071, + "step": 754 + }, + { + "epoch": 0.2005045810649316, + "grad_norm": 0.9490438891131449, + "learning_rate": 1.986330857231583e-05, + "loss": 0.3539624512195587, + "step": 755 + }, + { + "epoch": 0.20077015004647458, + "grad_norm": 1.002849163142055, + "learning_rate": 1.9862584124683587e-05, + "loss": 0.417904257774353, + "step": 756 + }, + { + "epoch": 0.20103571902801753, + "grad_norm": 0.9438738740406134, + "learning_rate": 1.9861857775661442e-05, + "loss": 0.3602277636528015, + "step": 757 + }, + { + "epoch": 0.2013012880095605, + "grad_norm": 1.0703002408877305, + "learning_rate": 1.986112952538943e-05, + "loss": 0.41064661741256714, + "step": 758 + }, + { + "epoch": 0.20156685699110344, + "grad_norm": 0.9789269746167363, + "learning_rate": 1.9860399374007944e-05, + "loss": 0.36313754320144653, + "step": 759 + }, + { + "epoch": 0.2018324259726464, + "grad_norm": 1.0711706181502203, + "learning_rate": 1.9859667321657755e-05, + "loss": 0.39497628808021545, + "step": 760 + }, + { + "epoch": 0.20209799495418934, + "grad_norm": 1.0173001682725575, + "learning_rate": 1.9858933368479987e-05, + "loss": 0.405613511800766, + "step": 761 + }, + { + "epoch": 0.2023635639357323, + "grad_norm": 0.9881458101524105, + "learning_rate": 1.9858197514616142e-05, + "loss": 0.39093440771102905, + "step": 762 + }, + { + "epoch": 0.20262913291727527, + "grad_norm": 1.0330584509521943, + "learning_rate": 1.9857459760208084e-05, + "loss": 0.39908382296562195, + "step": 763 + }, + { + "epoch": 0.20289470189881822, + "grad_norm": 0.9416263868211369, + "learning_rate": 1.9856720105398038e-05, + "loss": 0.36787620186805725, + "step": 764 + }, + { + "epoch": 0.20316027088036118, + "grad_norm": 1.0128388377672763, + "learning_rate": 1.985597855032861e-05, + "loss": 0.390550822019577, + "step": 765 + }, + { + "epoch": 0.20342583986190413, + "grad_norm": 1.115759431869763, + "learning_rate": 1.9855235095142754e-05, + "loss": 0.4191611409187317, + "step": 766 + }, + { + "epoch": 0.20369140884344708, + "grad_norm": 1.1288935622655036, + "learning_rate": 1.985448973998381e-05, + "loss": 0.4060766100883484, + "step": 767 + }, + { + "epoch": 0.20395697782499003, + "grad_norm": 1.055264696895727, + "learning_rate": 1.985374248499546e-05, + "loss": 0.3906163275241852, + "step": 768 + }, + { + "epoch": 0.20422254680653298, + "grad_norm": 1.0101644212894914, + "learning_rate": 1.9852993330321774e-05, + "loss": 0.3926839828491211, + "step": 769 + }, + { + "epoch": 0.20448811578807596, + "grad_norm": 1.0474151984911524, + "learning_rate": 1.9852242276107182e-05, + "loss": 0.37276068329811096, + "step": 770 + }, + { + "epoch": 0.20475368476961892, + "grad_norm": 0.9531396793135881, + "learning_rate": 1.9851489322496476e-05, + "loss": 0.3765360414981842, + "step": 771 + }, + { + "epoch": 0.20501925375116187, + "grad_norm": 1.0017274873228423, + "learning_rate": 1.9850734469634815e-05, + "loss": 0.35091257095336914, + "step": 772 + }, + { + "epoch": 0.20528482273270482, + "grad_norm": 1.1164065944268338, + "learning_rate": 1.9849977717667725e-05, + "loss": 0.4259791076183319, + "step": 773 + }, + { + "epoch": 0.20555039171424777, + "grad_norm": 0.9939508272565134, + "learning_rate": 1.9849219066741102e-05, + "loss": 0.3563114404678345, + "step": 774 + }, + { + "epoch": 0.20581596069579072, + "grad_norm": 1.0814350606971046, + "learning_rate": 1.9848458517001203e-05, + "loss": 0.4148223102092743, + "step": 775 + }, + { + "epoch": 0.20608152967733367, + "grad_norm": 1.0296405515766518, + "learning_rate": 1.9847696068594655e-05, + "loss": 0.3817785382270813, + "step": 776 + }, + { + "epoch": 0.20634709865887665, + "grad_norm": 1.115875170640065, + "learning_rate": 1.984693172166845e-05, + "loss": 0.41741886734962463, + "step": 777 + }, + { + "epoch": 0.2066126676404196, + "grad_norm": 1.0479957521256793, + "learning_rate": 1.9846165476369938e-05, + "loss": 0.34800025820732117, + "step": 778 + }, + { + "epoch": 0.20687823662196256, + "grad_norm": 1.0122784392492805, + "learning_rate": 1.9845397332846848e-05, + "loss": 0.38093405961990356, + "step": 779 + }, + { + "epoch": 0.2071438056035055, + "grad_norm": 1.0953515150858002, + "learning_rate": 1.9844627291247268e-05, + "loss": 0.40733009576797485, + "step": 780 + }, + { + "epoch": 0.20740937458504846, + "grad_norm": 1.1011295166986532, + "learning_rate": 1.9843855351719655e-05, + "loss": 0.3829066753387451, + "step": 781 + }, + { + "epoch": 0.2076749435665914, + "grad_norm": 1.0316161170996605, + "learning_rate": 1.9843081514412827e-05, + "loss": 0.3574868440628052, + "step": 782 + }, + { + "epoch": 0.20794051254813437, + "grad_norm": 1.071531696766489, + "learning_rate": 1.984230577947597e-05, + "loss": 0.3675144612789154, + "step": 783 + }, + { + "epoch": 0.20820608152967734, + "grad_norm": 0.9982781618225591, + "learning_rate": 1.9841528147058638e-05, + "loss": 0.36120525002479553, + "step": 784 + }, + { + "epoch": 0.2084716505112203, + "grad_norm": 1.0016427535647234, + "learning_rate": 1.984074861731075e-05, + "loss": 0.3651392459869385, + "step": 785 + }, + { + "epoch": 0.20873721949276325, + "grad_norm": 1.1254815799645344, + "learning_rate": 1.983996719038259e-05, + "loss": 0.4204651117324829, + "step": 786 + }, + { + "epoch": 0.2090027884743062, + "grad_norm": 1.0600310007301286, + "learning_rate": 1.9839183866424806e-05, + "loss": 0.4452149271965027, + "step": 787 + }, + { + "epoch": 0.20926835745584915, + "grad_norm": 1.000047138771705, + "learning_rate": 1.9838398645588418e-05, + "loss": 0.3931270241737366, + "step": 788 + }, + { + "epoch": 0.2095339264373921, + "grad_norm": 1.0009892054118905, + "learning_rate": 1.98376115280248e-05, + "loss": 0.3680538535118103, + "step": 789 + }, + { + "epoch": 0.20979949541893506, + "grad_norm": 0.9848864128393906, + "learning_rate": 1.9836822513885704e-05, + "loss": 0.3766820728778839, + "step": 790 + }, + { + "epoch": 0.21006506440047804, + "grad_norm": 1.0494510099931045, + "learning_rate": 1.9836031603323245e-05, + "loss": 0.3602439761161804, + "step": 791 + }, + { + "epoch": 0.210330633382021, + "grad_norm": 0.9790632198207762, + "learning_rate": 1.98352387964899e-05, + "loss": 0.38925549387931824, + "step": 792 + }, + { + "epoch": 0.21059620236356394, + "grad_norm": 1.0121548586068807, + "learning_rate": 1.9834444093538504e-05, + "loss": 0.3569640517234802, + "step": 793 + }, + { + "epoch": 0.2108617713451069, + "grad_norm": 1.0171085592107372, + "learning_rate": 1.9833647494622275e-05, + "loss": 0.3543340265750885, + "step": 794 + }, + { + "epoch": 0.21112734032664984, + "grad_norm": 1.0426744340585967, + "learning_rate": 1.983284899989479e-05, + "loss": 0.37313222885131836, + "step": 795 + }, + { + "epoch": 0.2113929093081928, + "grad_norm": 1.0940501026222131, + "learning_rate": 1.983204860950998e-05, + "loss": 0.3874257802963257, + "step": 796 + }, + { + "epoch": 0.21165847828973575, + "grad_norm": 1.005805069630653, + "learning_rate": 1.983124632362216e-05, + "loss": 0.3815164864063263, + "step": 797 + }, + { + "epoch": 0.21192404727127873, + "grad_norm": 1.0879143214156584, + "learning_rate": 1.9830442142386e-05, + "loss": 0.39476731419563293, + "step": 798 + }, + { + "epoch": 0.21218961625282168, + "grad_norm": 1.0888281701524323, + "learning_rate": 1.9829636065956527e-05, + "loss": 0.399338036775589, + "step": 799 + }, + { + "epoch": 0.21245518523436463, + "grad_norm": 1.0679987938098825, + "learning_rate": 1.9828828094489157e-05, + "loss": 0.3940344452857971, + "step": 800 + }, + { + "epoch": 0.21272075421590758, + "grad_norm": 1.0124680733329086, + "learning_rate": 1.9828018228139647e-05, + "loss": 0.35597044229507446, + "step": 801 + }, + { + "epoch": 0.21298632319745053, + "grad_norm": 1.197291261672491, + "learning_rate": 1.9827206467064133e-05, + "loss": 0.4309435784816742, + "step": 802 + }, + { + "epoch": 0.21325189217899349, + "grad_norm": 1.0158009285134544, + "learning_rate": 1.9826392811419113e-05, + "loss": 0.37327438592910767, + "step": 803 + }, + { + "epoch": 0.21351746116053644, + "grad_norm": 0.9944187944281718, + "learning_rate": 1.9825577261361454e-05, + "loss": 0.35214242339134216, + "step": 804 + }, + { + "epoch": 0.21378303014207942, + "grad_norm": 1.1575422458756877, + "learning_rate": 1.982475981704838e-05, + "loss": 0.41114968061447144, + "step": 805 + }, + { + "epoch": 0.21404859912362237, + "grad_norm": 0.9719994027948292, + "learning_rate": 1.9823940478637486e-05, + "loss": 0.3632299304008484, + "step": 806 + }, + { + "epoch": 0.21431416810516532, + "grad_norm": 1.1699036102992622, + "learning_rate": 1.9823119246286727e-05, + "loss": 0.39640772342681885, + "step": 807 + }, + { + "epoch": 0.21457973708670827, + "grad_norm": 1.002397111320771, + "learning_rate": 1.9822296120154433e-05, + "loss": 0.39356929063796997, + "step": 808 + }, + { + "epoch": 0.21484530606825122, + "grad_norm": 1.061754718166072, + "learning_rate": 1.9821471100399294e-05, + "loss": 0.3710761070251465, + "step": 809 + }, + { + "epoch": 0.21511087504979418, + "grad_norm": 0.9713246248834058, + "learning_rate": 1.9820644187180354e-05, + "loss": 0.35515087842941284, + "step": 810 + }, + { + "epoch": 0.21537644403133713, + "grad_norm": 1.0166244205196049, + "learning_rate": 1.981981538065704e-05, + "loss": 0.3803205192089081, + "step": 811 + }, + { + "epoch": 0.2156420130128801, + "grad_norm": 1.0421456761704733, + "learning_rate": 1.9818984680989134e-05, + "loss": 0.40275394916534424, + "step": 812 + }, + { + "epoch": 0.21590758199442306, + "grad_norm": 1.0872785008811605, + "learning_rate": 1.9818152088336786e-05, + "loss": 0.3711051344871521, + "step": 813 + }, + { + "epoch": 0.216173150975966, + "grad_norm": 1.0872190904032264, + "learning_rate": 1.9817317602860512e-05, + "loss": 0.4198985695838928, + "step": 814 + }, + { + "epoch": 0.21643871995750896, + "grad_norm": 0.9931448766878032, + "learning_rate": 1.9816481224721185e-05, + "loss": 0.38333773612976074, + "step": 815 + }, + { + "epoch": 0.21670428893905191, + "grad_norm": 1.1679000778390602, + "learning_rate": 1.9815642954080055e-05, + "loss": 0.3959774971008301, + "step": 816 + }, + { + "epoch": 0.21696985792059487, + "grad_norm": 1.1013876458182361, + "learning_rate": 1.9814802791098728e-05, + "loss": 0.3475337326526642, + "step": 817 + }, + { + "epoch": 0.21723542690213782, + "grad_norm": 1.06867842878894, + "learning_rate": 1.981396073593918e-05, + "loss": 0.369370698928833, + "step": 818 + }, + { + "epoch": 0.2175009958836808, + "grad_norm": 1.085763343280496, + "learning_rate": 1.9813116788763744e-05, + "loss": 0.3515776991844177, + "step": 819 + }, + { + "epoch": 0.21776656486522375, + "grad_norm": 1.0780206278908893, + "learning_rate": 1.9812270949735124e-05, + "loss": 0.3637402355670929, + "step": 820 + }, + { + "epoch": 0.2180321338467667, + "grad_norm": 1.0342672695189807, + "learning_rate": 1.9811423219016395e-05, + "loss": 0.3930947780609131, + "step": 821 + }, + { + "epoch": 0.21829770282830965, + "grad_norm": 1.102521832922822, + "learning_rate": 1.981057359677098e-05, + "loss": 0.40081048011779785, + "step": 822 + }, + { + "epoch": 0.2185632718098526, + "grad_norm": 1.0386373096164698, + "learning_rate": 1.9809722083162682e-05, + "loss": 0.3831724226474762, + "step": 823 + }, + { + "epoch": 0.21882884079139556, + "grad_norm": 1.0516274934858763, + "learning_rate": 1.9808868678355662e-05, + "loss": 0.3919270932674408, + "step": 824 + }, + { + "epoch": 0.2190944097729385, + "grad_norm": 1.0623138704484363, + "learning_rate": 1.9808013382514448e-05, + "loss": 0.41782522201538086, + "step": 825 + }, + { + "epoch": 0.2193599787544815, + "grad_norm": 1.0570337251212087, + "learning_rate": 1.9807156195803926e-05, + "loss": 0.3751329779624939, + "step": 826 + }, + { + "epoch": 0.21962554773602444, + "grad_norm": 1.0009279652164118, + "learning_rate": 1.9806297118389353e-05, + "loss": 0.36451685428619385, + "step": 827 + }, + { + "epoch": 0.2198911167175674, + "grad_norm": 1.1911804759546862, + "learning_rate": 1.9805436150436352e-05, + "loss": 0.3924056887626648, + "step": 828 + }, + { + "epoch": 0.22015668569911034, + "grad_norm": 0.9887238598202497, + "learning_rate": 1.9804573292110906e-05, + "loss": 0.34744757413864136, + "step": 829 + }, + { + "epoch": 0.2204222546806533, + "grad_norm": 1.1506637434477502, + "learning_rate": 1.980370854357936e-05, + "loss": 0.4162982702255249, + "step": 830 + }, + { + "epoch": 0.22068782366219625, + "grad_norm": 1.103994708633239, + "learning_rate": 1.9802841905008434e-05, + "loss": 0.36572596430778503, + "step": 831 + }, + { + "epoch": 0.2209533926437392, + "grad_norm": 1.0028116020560682, + "learning_rate": 1.98019733765652e-05, + "loss": 0.3535170555114746, + "step": 832 + }, + { + "epoch": 0.22121896162528218, + "grad_norm": 1.061392974987333, + "learning_rate": 1.9801102958417107e-05, + "loss": 0.3906480073928833, + "step": 833 + }, + { + "epoch": 0.22148453060682513, + "grad_norm": 1.0646039703833918, + "learning_rate": 1.980023065073195e-05, + "loss": 0.34185755252838135, + "step": 834 + }, + { + "epoch": 0.22175009958836808, + "grad_norm": 1.1983506875652454, + "learning_rate": 1.9799356453677913e-05, + "loss": 0.4216359853744507, + "step": 835 + }, + { + "epoch": 0.22201566856991103, + "grad_norm": 1.038756499639493, + "learning_rate": 1.979848036742352e-05, + "loss": 0.365469366312027, + "step": 836 + }, + { + "epoch": 0.222281237551454, + "grad_norm": 1.0128951338762324, + "learning_rate": 1.9797602392137678e-05, + "loss": 0.3570204973220825, + "step": 837 + }, + { + "epoch": 0.22254680653299694, + "grad_norm": 1.0221196075964396, + "learning_rate": 1.9796722527989646e-05, + "loss": 0.3929975926876068, + "step": 838 + }, + { + "epoch": 0.2228123755145399, + "grad_norm": 1.1512146064832047, + "learning_rate": 1.979584077514905e-05, + "loss": 0.39064258337020874, + "step": 839 + }, + { + "epoch": 0.22307794449608287, + "grad_norm": 1.0559333522375243, + "learning_rate": 1.9794957133785884e-05, + "loss": 0.3626471757888794, + "step": 840 + }, + { + "epoch": 0.22334351347762582, + "grad_norm": 1.0867316997584564, + "learning_rate": 1.9794071604070506e-05, + "loss": 0.4337238371372223, + "step": 841 + }, + { + "epoch": 0.22360908245916877, + "grad_norm": 0.9358033183445809, + "learning_rate": 1.9793184186173632e-05, + "loss": 0.3361967206001282, + "step": 842 + }, + { + "epoch": 0.22387465144071172, + "grad_norm": 0.961043072021178, + "learning_rate": 1.9792294880266346e-05, + "loss": 0.3429332971572876, + "step": 843 + }, + { + "epoch": 0.22414022042225468, + "grad_norm": 1.012773989217256, + "learning_rate": 1.97914036865201e-05, + "loss": 0.39196616411209106, + "step": 844 + }, + { + "epoch": 0.22440578940379763, + "grad_norm": 1.1250916546708978, + "learning_rate": 1.9790510605106697e-05, + "loss": 0.3763045072555542, + "step": 845 + }, + { + "epoch": 0.22467135838534058, + "grad_norm": 1.1139610172600873, + "learning_rate": 1.978961563619832e-05, + "loss": 0.41614070534706116, + "step": 846 + }, + { + "epoch": 0.22493692736688356, + "grad_norm": 1.065347693165354, + "learning_rate": 1.9788718779967506e-05, + "loss": 0.3834165334701538, + "step": 847 + }, + { + "epoch": 0.2252024963484265, + "grad_norm": 0.9834992911039661, + "learning_rate": 1.978782003658716e-05, + "loss": 0.3552364110946655, + "step": 848 + }, + { + "epoch": 0.22546806532996946, + "grad_norm": 1.0365749744504318, + "learning_rate": 1.9786919406230544e-05, + "loss": 0.3857925534248352, + "step": 849 + }, + { + "epoch": 0.22573363431151242, + "grad_norm": 1.0779836727772776, + "learning_rate": 1.9786016889071294e-05, + "loss": 0.3501393795013428, + "step": 850 + }, + { + "epoch": 0.22599920329305537, + "grad_norm": 1.1363104904390704, + "learning_rate": 1.9785112485283404e-05, + "loss": 0.36280643939971924, + "step": 851 + }, + { + "epoch": 0.22626477227459832, + "grad_norm": 1.1791591930929934, + "learning_rate": 1.978420619504123e-05, + "loss": 0.3713894486427307, + "step": 852 + }, + { + "epoch": 0.22653034125614127, + "grad_norm": 1.0682718312185442, + "learning_rate": 1.97832980185195e-05, + "loss": 0.3668733537197113, + "step": 853 + }, + { + "epoch": 0.22679591023768425, + "grad_norm": 1.06232834606136, + "learning_rate": 1.978238795589329e-05, + "loss": 0.4054701626300812, + "step": 854 + }, + { + "epoch": 0.2270614792192272, + "grad_norm": 1.1024819375758403, + "learning_rate": 1.9781476007338058e-05, + "loss": 0.3824681043624878, + "step": 855 + }, + { + "epoch": 0.22732704820077015, + "grad_norm": 1.0604830101195206, + "learning_rate": 1.978056217302961e-05, + "loss": 0.4009544253349304, + "step": 856 + }, + { + "epoch": 0.2275926171823131, + "grad_norm": 1.0150812264671392, + "learning_rate": 1.9779646453144133e-05, + "loss": 0.34773316979408264, + "step": 857 + }, + { + "epoch": 0.22785818616385606, + "grad_norm": 1.0737509474924387, + "learning_rate": 1.977872884785815e-05, + "loss": 0.4067278206348419, + "step": 858 + }, + { + "epoch": 0.228123755145399, + "grad_norm": 1.0566398666110703, + "learning_rate": 1.9777809357348584e-05, + "loss": 0.3843458890914917, + "step": 859 + }, + { + "epoch": 0.22838932412694196, + "grad_norm": 1.083451143522079, + "learning_rate": 1.977688798179269e-05, + "loss": 0.4261704683303833, + "step": 860 + }, + { + "epoch": 0.22865489310848494, + "grad_norm": 1.0145015740681522, + "learning_rate": 1.9775964721368098e-05, + "loss": 0.39109086990356445, + "step": 861 + }, + { + "epoch": 0.2289204620900279, + "grad_norm": 1.1472642326588585, + "learning_rate": 1.9775039576252807e-05, + "loss": 0.39436954259872437, + "step": 862 + }, + { + "epoch": 0.22918603107157084, + "grad_norm": 0.9770870267905873, + "learning_rate": 1.9774112546625168e-05, + "loss": 0.3787967562675476, + "step": 863 + }, + { + "epoch": 0.2294516000531138, + "grad_norm": 1.5071435779935147, + "learning_rate": 1.9773183632663907e-05, + "loss": 0.3729320466518402, + "step": 864 + }, + { + "epoch": 0.22971716903465675, + "grad_norm": 1.0048578103437809, + "learning_rate": 1.9772252834548108e-05, + "loss": 0.3817081153392792, + "step": 865 + }, + { + "epoch": 0.2299827380161997, + "grad_norm": 0.9709592169890221, + "learning_rate": 1.9771320152457212e-05, + "loss": 0.3362218737602234, + "step": 866 + }, + { + "epoch": 0.23024830699774265, + "grad_norm": 1.0194192402395448, + "learning_rate": 1.9770385586571033e-05, + "loss": 0.37274059653282166, + "step": 867 + }, + { + "epoch": 0.23051387597928563, + "grad_norm": 1.058710969457703, + "learning_rate": 1.9769449137069746e-05, + "loss": 0.3832330107688904, + "step": 868 + }, + { + "epoch": 0.23077944496082858, + "grad_norm": 0.9857605594513371, + "learning_rate": 1.9768510804133886e-05, + "loss": 0.37420010566711426, + "step": 869 + }, + { + "epoch": 0.23104501394237154, + "grad_norm": 1.0333482020677847, + "learning_rate": 1.976757058794435e-05, + "loss": 0.35314565896987915, + "step": 870 + }, + { + "epoch": 0.2313105829239145, + "grad_norm": 1.0404097802666386, + "learning_rate": 1.97666284886824e-05, + "loss": 0.34667372703552246, + "step": 871 + }, + { + "epoch": 0.23157615190545744, + "grad_norm": 1.1826768759617956, + "learning_rate": 1.976568450652967e-05, + "loss": 0.3465980589389801, + "step": 872 + }, + { + "epoch": 0.2318417208870004, + "grad_norm": 1.6479387485919323, + "learning_rate": 1.9764738641668137e-05, + "loss": 0.40539389848709106, + "step": 873 + }, + { + "epoch": 0.23210728986854334, + "grad_norm": 1.090454596374008, + "learning_rate": 1.976379089428016e-05, + "loss": 0.35154545307159424, + "step": 874 + }, + { + "epoch": 0.23237285885008632, + "grad_norm": 1.1033163387519414, + "learning_rate": 1.9762841264548453e-05, + "loss": 0.39748087525367737, + "step": 875 + }, + { + "epoch": 0.23263842783162927, + "grad_norm": 1.0600221119400453, + "learning_rate": 1.976188975265609e-05, + "loss": 0.41628387570381165, + "step": 876 + }, + { + "epoch": 0.23290399681317223, + "grad_norm": 1.0805125037340586, + "learning_rate": 1.976093635878652e-05, + "loss": 0.4076233208179474, + "step": 877 + }, + { + "epoch": 0.23316956579471518, + "grad_norm": 0.9221839355888705, + "learning_rate": 1.9759981083123533e-05, + "loss": 0.3262259364128113, + "step": 878 + }, + { + "epoch": 0.23343513477625813, + "grad_norm": 1.1690018828805817, + "learning_rate": 1.9759023925851302e-05, + "loss": 0.36561673879623413, + "step": 879 + }, + { + "epoch": 0.23370070375780108, + "grad_norm": 1.083829918240926, + "learning_rate": 1.9758064887154358e-05, + "loss": 0.36661773920059204, + "step": 880 + }, + { + "epoch": 0.23396627273934403, + "grad_norm": 1.0655263771494812, + "learning_rate": 1.9757103967217587e-05, + "loss": 0.34671685099601746, + "step": 881 + }, + { + "epoch": 0.234231841720887, + "grad_norm": 1.0056372913167473, + "learning_rate": 1.9756141166226246e-05, + "loss": 0.3486331105232239, + "step": 882 + }, + { + "epoch": 0.23449741070242996, + "grad_norm": 1.1177836982205323, + "learning_rate": 1.9755176484365953e-05, + "loss": 0.3883505165576935, + "step": 883 + }, + { + "epoch": 0.23476297968397292, + "grad_norm": 1.0548520245203914, + "learning_rate": 1.9754209921822683e-05, + "loss": 0.3832106590270996, + "step": 884 + }, + { + "epoch": 0.23502854866551587, + "grad_norm": 1.078830112662993, + "learning_rate": 1.975324147878278e-05, + "loss": 0.37876033782958984, + "step": 885 + }, + { + "epoch": 0.23529411764705882, + "grad_norm": 1.0689289829128008, + "learning_rate": 1.975227115543295e-05, + "loss": 0.38931846618652344, + "step": 886 + }, + { + "epoch": 0.23555968662860177, + "grad_norm": 0.956721500767322, + "learning_rate": 1.9751298951960258e-05, + "loss": 0.3581021726131439, + "step": 887 + }, + { + "epoch": 0.23582525561014472, + "grad_norm": 1.0206944172292924, + "learning_rate": 1.9750324868552133e-05, + "loss": 0.35196465253829956, + "step": 888 + }, + { + "epoch": 0.2360908245916877, + "grad_norm": 0.9996206423870837, + "learning_rate": 1.974934890539637e-05, + "loss": 0.3635658025741577, + "step": 889 + }, + { + "epoch": 0.23635639357323066, + "grad_norm": 0.9523927655707425, + "learning_rate": 1.9748371062681122e-05, + "loss": 0.345594197511673, + "step": 890 + }, + { + "epoch": 0.2366219625547736, + "grad_norm": 1.0443032231121456, + "learning_rate": 1.97473913405949e-05, + "loss": 0.357181191444397, + "step": 891 + }, + { + "epoch": 0.23688753153631656, + "grad_norm": 1.0008000126392016, + "learning_rate": 1.974640973932659e-05, + "loss": 0.3264622986316681, + "step": 892 + }, + { + "epoch": 0.2371531005178595, + "grad_norm": 0.9731630083329554, + "learning_rate": 1.9745426259065434e-05, + "loss": 0.37950894236564636, + "step": 893 + }, + { + "epoch": 0.23741866949940246, + "grad_norm": 1.1493289415276364, + "learning_rate": 1.9744440900001027e-05, + "loss": 0.37400782108306885, + "step": 894 + }, + { + "epoch": 0.23768423848094541, + "grad_norm": 1.0325785235739895, + "learning_rate": 1.974345366232334e-05, + "loss": 0.3455463945865631, + "step": 895 + }, + { + "epoch": 0.2379498074624884, + "grad_norm": 1.1059511993758653, + "learning_rate": 1.9742464546222702e-05, + "loss": 0.3605351150035858, + "step": 896 + }, + { + "epoch": 0.23821537644403135, + "grad_norm": 0.9763906212855142, + "learning_rate": 1.97414735518898e-05, + "loss": 0.3839051127433777, + "step": 897 + }, + { + "epoch": 0.2384809454255743, + "grad_norm": 1.0304758127284366, + "learning_rate": 1.974048067951569e-05, + "loss": 0.34562867879867554, + "step": 898 + }, + { + "epoch": 0.23874651440711725, + "grad_norm": 1.1332867443652592, + "learning_rate": 1.9739485929291778e-05, + "loss": 0.3986506760120392, + "step": 899 + }, + { + "epoch": 0.2390120833886602, + "grad_norm": 1.1598961775072092, + "learning_rate": 1.9738489301409848e-05, + "loss": 0.3955162465572357, + "step": 900 + }, + { + "epoch": 0.23927765237020315, + "grad_norm": 1.080226447361195, + "learning_rate": 1.9737490796062036e-05, + "loss": 0.370066374540329, + "step": 901 + }, + { + "epoch": 0.2395432213517461, + "grad_norm": 1.0637004733407822, + "learning_rate": 1.973649041344084e-05, + "loss": 0.3777826726436615, + "step": 902 + }, + { + "epoch": 0.23980879033328908, + "grad_norm": 1.1358293788080334, + "learning_rate": 1.9735488153739128e-05, + "loss": 0.327572226524353, + "step": 903 + }, + { + "epoch": 0.24007435931483204, + "grad_norm": 1.071729158749965, + "learning_rate": 1.973448401715011e-05, + "loss": 0.3921743929386139, + "step": 904 + }, + { + "epoch": 0.240339928296375, + "grad_norm": 1.0635179670685195, + "learning_rate": 1.973347800386739e-05, + "loss": 0.3683379888534546, + "step": 905 + }, + { + "epoch": 0.24060549727791794, + "grad_norm": 1.023832589054702, + "learning_rate": 1.9732470114084905e-05, + "loss": 0.390872597694397, + "step": 906 + }, + { + "epoch": 0.2408710662594609, + "grad_norm": 1.0814023137489452, + "learning_rate": 1.9731460347996964e-05, + "loss": 0.3772459626197815, + "step": 907 + }, + { + "epoch": 0.24113663524100384, + "grad_norm": 1.0280982913686894, + "learning_rate": 1.973044870579824e-05, + "loss": 0.37990954518318176, + "step": 908 + }, + { + "epoch": 0.2414022042225468, + "grad_norm": 1.0035238419205756, + "learning_rate": 1.972943518768377e-05, + "loss": 0.3380817770957947, + "step": 909 + }, + { + "epoch": 0.24166777320408978, + "grad_norm": 0.9879847056007396, + "learning_rate": 1.9728419793848935e-05, + "loss": 0.3348115384578705, + "step": 910 + }, + { + "epoch": 0.24193334218563273, + "grad_norm": 1.0561235323428824, + "learning_rate": 1.9727402524489505e-05, + "loss": 0.36936551332473755, + "step": 911 + }, + { + "epoch": 0.24219891116717568, + "grad_norm": 1.0744513063457712, + "learning_rate": 1.9726383379801593e-05, + "loss": 0.3871539235115051, + "step": 912 + }, + { + "epoch": 0.24246448014871863, + "grad_norm": 1.0904556770971818, + "learning_rate": 1.9725362359981676e-05, + "loss": 0.37087059020996094, + "step": 913 + }, + { + "epoch": 0.24273004913026158, + "grad_norm": 0.9802916629421812, + "learning_rate": 1.9724339465226595e-05, + "loss": 0.35582688450813293, + "step": 914 + }, + { + "epoch": 0.24299561811180453, + "grad_norm": 1.0947021466091125, + "learning_rate": 1.9723314695733557e-05, + "loss": 0.38500669598579407, + "step": 915 + }, + { + "epoch": 0.2432611870933475, + "grad_norm": 0.9834121517145057, + "learning_rate": 1.9722288051700116e-05, + "loss": 0.32470762729644775, + "step": 916 + }, + { + "epoch": 0.24352675607489047, + "grad_norm": 1.0805011919993295, + "learning_rate": 1.9721259533324207e-05, + "loss": 0.3822774589061737, + "step": 917 + }, + { + "epoch": 0.24379232505643342, + "grad_norm": 0.9937398719966192, + "learning_rate": 1.972022914080411e-05, + "loss": 0.38374873995780945, + "step": 918 + }, + { + "epoch": 0.24405789403797637, + "grad_norm": 1.0550770033370775, + "learning_rate": 1.9719196874338472e-05, + "loss": 0.3419352173805237, + "step": 919 + }, + { + "epoch": 0.24432346301951932, + "grad_norm": 1.0164630853495407, + "learning_rate": 1.9718162734126308e-05, + "loss": 0.3294275403022766, + "step": 920 + }, + { + "epoch": 0.24458903200106227, + "grad_norm": 1.0668295499881337, + "learning_rate": 1.9717126720366982e-05, + "loss": 0.3585365414619446, + "step": 921 + }, + { + "epoch": 0.24485460098260522, + "grad_norm": 1.0609325079201495, + "learning_rate": 1.9716088833260225e-05, + "loss": 0.38130316138267517, + "step": 922 + }, + { + "epoch": 0.24512016996414818, + "grad_norm": 1.0577067392982809, + "learning_rate": 1.9715049073006133e-05, + "loss": 0.3745136260986328, + "step": 923 + }, + { + "epoch": 0.24538573894569116, + "grad_norm": 1.0457228779122651, + "learning_rate": 1.971400743980516e-05, + "loss": 0.3771660327911377, + "step": 924 + }, + { + "epoch": 0.2456513079272341, + "grad_norm": 1.0133861698501567, + "learning_rate": 1.971296393385812e-05, + "loss": 0.29661691188812256, + "step": 925 + }, + { + "epoch": 0.24591687690877706, + "grad_norm": 0.9516714902458889, + "learning_rate": 1.9711918555366184e-05, + "loss": 0.33783960342407227, + "step": 926 + }, + { + "epoch": 0.24618244589032, + "grad_norm": 1.2469460687001952, + "learning_rate": 1.971087130453089e-05, + "loss": 0.42983683943748474, + "step": 927 + }, + { + "epoch": 0.24644801487186296, + "grad_norm": 0.9725914261438413, + "learning_rate": 1.9709822181554142e-05, + "loss": 0.32242363691329956, + "step": 928 + }, + { + "epoch": 0.24671358385340592, + "grad_norm": 1.0989308968162201, + "learning_rate": 1.970877118663819e-05, + "loss": 0.3576955795288086, + "step": 929 + }, + { + "epoch": 0.24697915283494887, + "grad_norm": 1.116595385391156, + "learning_rate": 1.9707718319985663e-05, + "loss": 0.4185359477996826, + "step": 930 + }, + { + "epoch": 0.24724472181649185, + "grad_norm": 1.1178442474909813, + "learning_rate": 1.970666358179953e-05, + "loss": 0.35377705097198486, + "step": 931 + }, + { + "epoch": 0.2475102907980348, + "grad_norm": 1.1350743092525455, + "learning_rate": 1.9705606972283143e-05, + "loss": 0.3860151171684265, + "step": 932 + }, + { + "epoch": 0.24777585977957775, + "grad_norm": 1.1915035264404457, + "learning_rate": 1.9704548491640195e-05, + "loss": 0.39463168382644653, + "step": 933 + }, + { + "epoch": 0.2480414287611207, + "grad_norm": 1.0462444044755623, + "learning_rate": 1.9703488140074752e-05, + "loss": 0.3670084774494171, + "step": 934 + }, + { + "epoch": 0.24830699774266365, + "grad_norm": 1.2914788702644175, + "learning_rate": 1.9702425917791242e-05, + "loss": 0.388730525970459, + "step": 935 + }, + { + "epoch": 0.2485725667242066, + "grad_norm": 1.128517931307855, + "learning_rate": 1.970136182499444e-05, + "loss": 0.38767656683921814, + "step": 936 + }, + { + "epoch": 0.24883813570574956, + "grad_norm": 1.0771582387425684, + "learning_rate": 1.9700295861889497e-05, + "loss": 0.35394930839538574, + "step": 937 + }, + { + "epoch": 0.24910370468729254, + "grad_norm": 1.0639329095738126, + "learning_rate": 1.9699228028681917e-05, + "loss": 0.3360324501991272, + "step": 938 + }, + { + "epoch": 0.2493692736688355, + "grad_norm": 1.116621384383513, + "learning_rate": 1.9698158325577563e-05, + "loss": 0.390169233083725, + "step": 939 + }, + { + "epoch": 0.24963484265037844, + "grad_norm": 1.108635788765439, + "learning_rate": 1.9697086752782666e-05, + "loss": 0.3921571671962738, + "step": 940 + }, + { + "epoch": 0.2499004116319214, + "grad_norm": 1.0665933445619122, + "learning_rate": 1.9696013310503808e-05, + "loss": 0.3795739710330963, + "step": 941 + }, + { + "epoch": 0.25016598061346434, + "grad_norm": 1.2202319167117164, + "learning_rate": 1.9694937998947935e-05, + "loss": 0.3891025185585022, + "step": 942 + }, + { + "epoch": 0.2504315495950073, + "grad_norm": 0.9751921056908068, + "learning_rate": 1.9693860818322357e-05, + "loss": 0.3548225164413452, + "step": 943 + }, + { + "epoch": 0.25069711857655025, + "grad_norm": 1.0555900207888067, + "learning_rate": 1.9692781768834747e-05, + "loss": 0.3696819543838501, + "step": 944 + }, + { + "epoch": 0.2509626875580932, + "grad_norm": 1.1322184210541604, + "learning_rate": 1.9691700850693126e-05, + "loss": 0.3906037211418152, + "step": 945 + }, + { + "epoch": 0.25122825653963615, + "grad_norm": 1.072434154806742, + "learning_rate": 1.9690618064105883e-05, + "loss": 0.38181206583976746, + "step": 946 + }, + { + "epoch": 0.2514938255211791, + "grad_norm": 1.0644124497842522, + "learning_rate": 1.9689533409281765e-05, + "loss": 0.36904582381248474, + "step": 947 + }, + { + "epoch": 0.25175939450272206, + "grad_norm": 1.097105891991116, + "learning_rate": 1.9688446886429885e-05, + "loss": 0.3635823130607605, + "step": 948 + }, + { + "epoch": 0.25202496348426506, + "grad_norm": 0.9954310874837226, + "learning_rate": 1.9687358495759713e-05, + "loss": 0.3527260422706604, + "step": 949 + }, + { + "epoch": 0.252290532465808, + "grad_norm": 1.1902017812011518, + "learning_rate": 1.968626823748107e-05, + "loss": 0.3781110346317291, + "step": 950 + }, + { + "epoch": 0.25255610144735097, + "grad_norm": 1.0346217070487125, + "learning_rate": 1.968517611180415e-05, + "loss": 0.3931560814380646, + "step": 951 + }, + { + "epoch": 0.2528216704288939, + "grad_norm": 1.0783245371828571, + "learning_rate": 1.9684082118939503e-05, + "loss": 0.39111074805259705, + "step": 952 + }, + { + "epoch": 0.25308723941043687, + "grad_norm": 1.2090013193363973, + "learning_rate": 1.9682986259098037e-05, + "loss": 0.385967880487442, + "step": 953 + }, + { + "epoch": 0.2533528083919798, + "grad_norm": 1.0103878099057118, + "learning_rate": 1.9681888532491022e-05, + "loss": 0.34006553888320923, + "step": 954 + }, + { + "epoch": 0.2536183773735228, + "grad_norm": 1.0077784550534965, + "learning_rate": 1.9680788939330086e-05, + "loss": 0.36069998145103455, + "step": 955 + }, + { + "epoch": 0.2538839463550657, + "grad_norm": 1.090649670414093, + "learning_rate": 1.9679687479827212e-05, + "loss": 0.3354898691177368, + "step": 956 + }, + { + "epoch": 0.2541495153366087, + "grad_norm": 1.0691933766101984, + "learning_rate": 1.9678584154194756e-05, + "loss": 0.35667335987091064, + "step": 957 + }, + { + "epoch": 0.25441508431815163, + "grad_norm": 1.2652121820599898, + "learning_rate": 1.9677478962645422e-05, + "loss": 0.4003029465675354, + "step": 958 + }, + { + "epoch": 0.2546806532996946, + "grad_norm": 1.0313200756086844, + "learning_rate": 1.9676371905392278e-05, + "loss": 0.34397056698799133, + "step": 959 + }, + { + "epoch": 0.25494622228123753, + "grad_norm": 1.0544706314753822, + "learning_rate": 1.9675262982648757e-05, + "loss": 0.35319578647613525, + "step": 960 + }, + { + "epoch": 0.2552117912627805, + "grad_norm": 1.0179000224070893, + "learning_rate": 1.967415219462864e-05, + "loss": 0.34840327501296997, + "step": 961 + }, + { + "epoch": 0.25547736024432344, + "grad_norm": 0.9360325612494472, + "learning_rate": 1.9673039541546076e-05, + "loss": 0.3298989534378052, + "step": 962 + }, + { + "epoch": 0.25574292922586644, + "grad_norm": 1.0904225305922717, + "learning_rate": 1.9671925023615572e-05, + "loss": 0.38438719511032104, + "step": 963 + }, + { + "epoch": 0.2560084982074094, + "grad_norm": 1.128608711014793, + "learning_rate": 1.9670808641051994e-05, + "loss": 0.3834493160247803, + "step": 964 + }, + { + "epoch": 0.25627406718895235, + "grad_norm": 1.0456501331264114, + "learning_rate": 1.9669690394070564e-05, + "loss": 0.3713288903236389, + "step": 965 + }, + { + "epoch": 0.2565396361704953, + "grad_norm": 1.0864184401996346, + "learning_rate": 1.966857028288687e-05, + "loss": 0.37564241886138916, + "step": 966 + }, + { + "epoch": 0.25680520515203825, + "grad_norm": 1.0329676619050974, + "learning_rate": 1.9667448307716857e-05, + "loss": 0.30162689089775085, + "step": 967 + }, + { + "epoch": 0.2570707741335812, + "grad_norm": 1.0948768995323135, + "learning_rate": 1.9666324468776826e-05, + "loss": 0.35969680547714233, + "step": 968 + }, + { + "epoch": 0.25733634311512416, + "grad_norm": 1.206651724690857, + "learning_rate": 1.9665198766283444e-05, + "loss": 0.40947285294532776, + "step": 969 + }, + { + "epoch": 0.2576019120966671, + "grad_norm": 1.0651964473806064, + "learning_rate": 1.9664071200453726e-05, + "loss": 0.35868343710899353, + "step": 970 + }, + { + "epoch": 0.25786748107821006, + "grad_norm": 1.1330033214419297, + "learning_rate": 1.966294177150506e-05, + "loss": 0.3569234311580658, + "step": 971 + }, + { + "epoch": 0.258133050059753, + "grad_norm": 1.1641224987322216, + "learning_rate": 1.9661810479655184e-05, + "loss": 0.3381764888763428, + "step": 972 + }, + { + "epoch": 0.25839861904129596, + "grad_norm": 1.535927577191984, + "learning_rate": 1.9660677325122196e-05, + "loss": 0.39847785234451294, + "step": 973 + }, + { + "epoch": 0.2586641880228389, + "grad_norm": 0.9608622914302752, + "learning_rate": 1.965954230812456e-05, + "loss": 0.33162468671798706, + "step": 974 + }, + { + "epoch": 0.25892975700438187, + "grad_norm": 1.0421688584245348, + "learning_rate": 1.9658405428881087e-05, + "loss": 0.3627605438232422, + "step": 975 + }, + { + "epoch": 0.2591953259859248, + "grad_norm": 1.0501672081861986, + "learning_rate": 1.9657266687610965e-05, + "loss": 0.3253796100616455, + "step": 976 + }, + { + "epoch": 0.2594608949674678, + "grad_norm": 1.0198628618780734, + "learning_rate": 1.9656126084533716e-05, + "loss": 0.3341265916824341, + "step": 977 + }, + { + "epoch": 0.2597264639490108, + "grad_norm": 1.0202967346949672, + "learning_rate": 1.9654983619869242e-05, + "loss": 0.3714970052242279, + "step": 978 + }, + { + "epoch": 0.25999203293055373, + "grad_norm": 1.0333982958482495, + "learning_rate": 1.9653839293837798e-05, + "loss": 0.3360912501811981, + "step": 979 + }, + { + "epoch": 0.2602576019120967, + "grad_norm": 1.0322459892827835, + "learning_rate": 1.9652693106659995e-05, + "loss": 0.3780854642391205, + "step": 980 + }, + { + "epoch": 0.26052317089363963, + "grad_norm": 1.1062219940451128, + "learning_rate": 1.9651545058556803e-05, + "loss": 0.33595478534698486, + "step": 981 + }, + { + "epoch": 0.2607887398751826, + "grad_norm": 1.111464982167328, + "learning_rate": 1.965039514974955e-05, + "loss": 0.3608357012271881, + "step": 982 + }, + { + "epoch": 0.26105430885672554, + "grad_norm": 1.0024532391943957, + "learning_rate": 1.964924338045993e-05, + "loss": 0.3807666599750519, + "step": 983 + }, + { + "epoch": 0.2613198778382685, + "grad_norm": 1.0213030373156555, + "learning_rate": 1.964808975090999e-05, + "loss": 0.3551647663116455, + "step": 984 + }, + { + "epoch": 0.26158544681981144, + "grad_norm": 1.0761922389740786, + "learning_rate": 1.9646934261322135e-05, + "loss": 0.3771904706954956, + "step": 985 + }, + { + "epoch": 0.2618510158013544, + "grad_norm": 1.1925998045571422, + "learning_rate": 1.964577691191913e-05, + "loss": 0.41103222966194153, + "step": 986 + }, + { + "epoch": 0.26211658478289734, + "grad_norm": 1.0270282722515527, + "learning_rate": 1.9644617702924093e-05, + "loss": 0.34439292550086975, + "step": 987 + }, + { + "epoch": 0.2623821537644403, + "grad_norm": 1.1578988390038234, + "learning_rate": 1.9643456634560515e-05, + "loss": 0.41214391589164734, + "step": 988 + }, + { + "epoch": 0.26264772274598325, + "grad_norm": 0.9879567855265076, + "learning_rate": 1.9642293707052232e-05, + "loss": 0.3186502754688263, + "step": 989 + }, + { + "epoch": 0.2629132917275262, + "grad_norm": 1.039224300824638, + "learning_rate": 1.9641128920623438e-05, + "loss": 0.3534559905529022, + "step": 990 + }, + { + "epoch": 0.2631788607090692, + "grad_norm": 1.0867820667103292, + "learning_rate": 1.96399622754987e-05, + "loss": 0.35217320919036865, + "step": 991 + }, + { + "epoch": 0.26344442969061216, + "grad_norm": 0.954421559413849, + "learning_rate": 1.9638793771902924e-05, + "loss": 0.31661587953567505, + "step": 992 + }, + { + "epoch": 0.2637099986721551, + "grad_norm": 0.9881195075112362, + "learning_rate": 1.9637623410061392e-05, + "loss": 0.32468482851982117, + "step": 993 + }, + { + "epoch": 0.26397556765369806, + "grad_norm": 1.0355017939200293, + "learning_rate": 1.9636451190199727e-05, + "loss": 0.346771776676178, + "step": 994 + }, + { + "epoch": 0.264241136635241, + "grad_norm": 1.0997948902450267, + "learning_rate": 1.9635277112543928e-05, + "loss": 0.36409270763397217, + "step": 995 + }, + { + "epoch": 0.26450670561678397, + "grad_norm": 1.2132528670947562, + "learning_rate": 1.963410117732034e-05, + "loss": 0.404967725276947, + "step": 996 + }, + { + "epoch": 0.2647722745983269, + "grad_norm": 1.1962964423617835, + "learning_rate": 1.9632923384755666e-05, + "loss": 0.39506661891937256, + "step": 997 + }, + { + "epoch": 0.26503784357986987, + "grad_norm": 1.1967751692769375, + "learning_rate": 1.9631743735076972e-05, + "loss": 0.3833203911781311, + "step": 998 + }, + { + "epoch": 0.2653034125614128, + "grad_norm": 1.083140773107417, + "learning_rate": 1.9630562228511682e-05, + "loss": 0.34522518515586853, + "step": 999 + }, + { + "epoch": 0.2655689815429558, + "grad_norm": 1.1367328076589556, + "learning_rate": 1.962937886528758e-05, + "loss": 0.3818400800228119, + "step": 1000 + }, + { + "epoch": 0.2658345505244987, + "grad_norm": 1.2496699132911573, + "learning_rate": 1.9628193645632796e-05, + "loss": 0.40827828645706177, + "step": 1001 + }, + { + "epoch": 0.2661001195060417, + "grad_norm": 1.0406728708542907, + "learning_rate": 1.962700656977583e-05, + "loss": 0.3448852002620697, + "step": 1002 + }, + { + "epoch": 0.26636568848758463, + "grad_norm": 1.1035895986897222, + "learning_rate": 1.9625817637945542e-05, + "loss": 0.36560773849487305, + "step": 1003 + }, + { + "epoch": 0.2666312574691276, + "grad_norm": 1.1637977684704512, + "learning_rate": 1.962462685037114e-05, + "loss": 0.38305893540382385, + "step": 1004 + }, + { + "epoch": 0.2668968264506706, + "grad_norm": 1.0320363555261158, + "learning_rate": 1.962343420728219e-05, + "loss": 0.3562568426132202, + "step": 1005 + }, + { + "epoch": 0.26716239543221354, + "grad_norm": 1.18312934129538, + "learning_rate": 1.9622239708908626e-05, + "loss": 0.37458860874176025, + "step": 1006 + }, + { + "epoch": 0.2674279644137565, + "grad_norm": 1.058042672523148, + "learning_rate": 1.9621043355480726e-05, + "loss": 0.35852503776550293, + "step": 1007 + }, + { + "epoch": 0.26769353339529944, + "grad_norm": 1.0975239398171568, + "learning_rate": 1.961984514722914e-05, + "loss": 0.4056578278541565, + "step": 1008 + }, + { + "epoch": 0.2679591023768424, + "grad_norm": 1.1773057151207822, + "learning_rate": 1.9618645084384863e-05, + "loss": 0.4531296491622925, + "step": 1009 + }, + { + "epoch": 0.26822467135838535, + "grad_norm": 0.9095840908563808, + "learning_rate": 1.9617443167179256e-05, + "loss": 0.3356376886367798, + "step": 1010 + }, + { + "epoch": 0.2684902403399283, + "grad_norm": 1.09880831555839, + "learning_rate": 1.9616239395844033e-05, + "loss": 0.38045161962509155, + "step": 1011 + }, + { + "epoch": 0.26875580932147125, + "grad_norm": 1.028451509847456, + "learning_rate": 1.9615033770611268e-05, + "loss": 0.3549511730670929, + "step": 1012 + }, + { + "epoch": 0.2690213783030142, + "grad_norm": 1.0546213631772847, + "learning_rate": 1.9613826291713393e-05, + "loss": 0.33363252878189087, + "step": 1013 + }, + { + "epoch": 0.26928694728455715, + "grad_norm": 0.9539256345754278, + "learning_rate": 1.961261695938319e-05, + "loss": 0.3443339467048645, + "step": 1014 + }, + { + "epoch": 0.2695525162661001, + "grad_norm": 0.9897755385014708, + "learning_rate": 1.9611405773853807e-05, + "loss": 0.3258364796638489, + "step": 1015 + }, + { + "epoch": 0.26981808524764306, + "grad_norm": 1.0357196980681809, + "learning_rate": 1.961019273535875e-05, + "loss": 0.357122540473938, + "step": 1016 + }, + { + "epoch": 0.270083654229186, + "grad_norm": 0.9668495504097999, + "learning_rate": 1.9608977844131875e-05, + "loss": 0.32092082500457764, + "step": 1017 + }, + { + "epoch": 0.27034922321072896, + "grad_norm": 1.0067299219043435, + "learning_rate": 1.96077611004074e-05, + "loss": 0.36354511976242065, + "step": 1018 + }, + { + "epoch": 0.27061479219227197, + "grad_norm": 1.0982243281899924, + "learning_rate": 1.9606542504419895e-05, + "loss": 0.37128758430480957, + "step": 1019 + }, + { + "epoch": 0.2708803611738149, + "grad_norm": 1.1112959838703056, + "learning_rate": 1.9605322056404294e-05, + "loss": 0.3732859790325165, + "step": 1020 + }, + { + "epoch": 0.2711459301553579, + "grad_norm": 1.0058814849372155, + "learning_rate": 1.9604099756595885e-05, + "loss": 0.32642674446105957, + "step": 1021 + }, + { + "epoch": 0.2714114991369008, + "grad_norm": 1.10371255398192, + "learning_rate": 1.9602875605230313e-05, + "loss": 0.376791775226593, + "step": 1022 + }, + { + "epoch": 0.2716770681184438, + "grad_norm": 1.0603007725295257, + "learning_rate": 1.960164960254358e-05, + "loss": 0.34514784812927246, + "step": 1023 + }, + { + "epoch": 0.27194263709998673, + "grad_norm": 1.225533197470795, + "learning_rate": 1.9600421748772044e-05, + "loss": 0.3752189576625824, + "step": 1024 + }, + { + "epoch": 0.2722082060815297, + "grad_norm": 1.0783483670765837, + "learning_rate": 1.959919204415242e-05, + "loss": 0.33100831508636475, + "step": 1025 + }, + { + "epoch": 0.27247377506307263, + "grad_norm": 1.1910668751599112, + "learning_rate": 1.9597960488921785e-05, + "loss": 0.42713654041290283, + "step": 1026 + }, + { + "epoch": 0.2727393440446156, + "grad_norm": 1.110777223027095, + "learning_rate": 1.9596727083317565e-05, + "loss": 0.3746519684791565, + "step": 1027 + }, + { + "epoch": 0.27300491302615854, + "grad_norm": 1.1133725792972708, + "learning_rate": 1.9595491827577543e-05, + "loss": 0.39962098002433777, + "step": 1028 + }, + { + "epoch": 0.2732704820077015, + "grad_norm": 1.0544310192284179, + "learning_rate": 1.9594254721939866e-05, + "loss": 0.35112401843070984, + "step": 1029 + }, + { + "epoch": 0.27353605098924444, + "grad_norm": 1.0749153592990304, + "learning_rate": 1.9593015766643037e-05, + "loss": 0.3648139238357544, + "step": 1030 + }, + { + "epoch": 0.2738016199707874, + "grad_norm": 1.0268996180520502, + "learning_rate": 1.9591774961925902e-05, + "loss": 0.31544098258018494, + "step": 1031 + }, + { + "epoch": 0.27406718895233034, + "grad_norm": 1.1260952074052377, + "learning_rate": 1.959053230802768e-05, + "loss": 0.3593738079071045, + "step": 1032 + }, + { + "epoch": 0.27433275793387335, + "grad_norm": 1.1009303195981317, + "learning_rate": 1.958928780518794e-05, + "loss": 0.39784368872642517, + "step": 1033 + }, + { + "epoch": 0.2745983269154163, + "grad_norm": 1.1304731324804922, + "learning_rate": 1.9588041453646606e-05, + "loss": 0.3869936168193817, + "step": 1034 + }, + { + "epoch": 0.27486389589695925, + "grad_norm": 0.9803124730292929, + "learning_rate": 1.958679325364396e-05, + "loss": 0.31108593940734863, + "step": 1035 + }, + { + "epoch": 0.2751294648785022, + "grad_norm": 1.098791994520666, + "learning_rate": 1.958554320542064e-05, + "loss": 0.3917708098888397, + "step": 1036 + }, + { + "epoch": 0.27539503386004516, + "grad_norm": 0.9969159455112034, + "learning_rate": 1.958429130921764e-05, + "loss": 0.36782944202423096, + "step": 1037 + }, + { + "epoch": 0.2756606028415881, + "grad_norm": 0.9381100088398062, + "learning_rate": 1.9583037565276314e-05, + "loss": 0.36196422576904297, + "step": 1038 + }, + { + "epoch": 0.27592617182313106, + "grad_norm": 1.0783473143219733, + "learning_rate": 1.9581781973838368e-05, + "loss": 0.32208555936813354, + "step": 1039 + }, + { + "epoch": 0.276191740804674, + "grad_norm": 0.9653316626874986, + "learning_rate": 1.958052453514586e-05, + "loss": 0.33451759815216064, + "step": 1040 + }, + { + "epoch": 0.27645730978621696, + "grad_norm": 1.0328342572912144, + "learning_rate": 1.9579265249441216e-05, + "loss": 0.3228047788143158, + "step": 1041 + }, + { + "epoch": 0.2767228787677599, + "grad_norm": 1.0944658380016739, + "learning_rate": 1.957800411696721e-05, + "loss": 0.36992791295051575, + "step": 1042 + }, + { + "epoch": 0.27698844774930287, + "grad_norm": 0.9799580951396849, + "learning_rate": 1.9576741137966967e-05, + "loss": 0.3072342276573181, + "step": 1043 + }, + { + "epoch": 0.2772540167308458, + "grad_norm": 1.0637046756594408, + "learning_rate": 1.9575476312683985e-05, + "loss": 0.3372080326080322, + "step": 1044 + }, + { + "epoch": 0.27751958571238877, + "grad_norm": 1.0509701364189301, + "learning_rate": 1.95742096413621e-05, + "loss": 0.34725332260131836, + "step": 1045 + }, + { + "epoch": 0.2777851546939317, + "grad_norm": 1.1053591471100805, + "learning_rate": 1.9572941124245516e-05, + "loss": 0.36714982986450195, + "step": 1046 + }, + { + "epoch": 0.27805072367547473, + "grad_norm": 1.208127444221669, + "learning_rate": 1.957167076157878e-05, + "loss": 0.4163498282432556, + "step": 1047 + }, + { + "epoch": 0.2783162926570177, + "grad_norm": 1.1861975128714084, + "learning_rate": 1.9570398553606815e-05, + "loss": 0.40059348940849304, + "step": 1048 + }, + { + "epoch": 0.27858186163856063, + "grad_norm": 1.085993120538819, + "learning_rate": 1.956912450057488e-05, + "loss": 0.3622320294380188, + "step": 1049 + }, + { + "epoch": 0.2788474306201036, + "grad_norm": 1.1326017870689584, + "learning_rate": 1.9567848602728595e-05, + "loss": 0.35159534215927124, + "step": 1050 + }, + { + "epoch": 0.27911299960164654, + "grad_norm": 0.9516936878211085, + "learning_rate": 1.9566570860313944e-05, + "loss": 0.3093762993812561, + "step": 1051 + }, + { + "epoch": 0.2793785685831895, + "grad_norm": 1.040326152894859, + "learning_rate": 1.9565291273577255e-05, + "loss": 0.341474324464798, + "step": 1052 + }, + { + "epoch": 0.27964413756473244, + "grad_norm": 1.0885626452470811, + "learning_rate": 1.9564009842765225e-05, + "loss": 0.35376566648483276, + "step": 1053 + }, + { + "epoch": 0.2799097065462754, + "grad_norm": 1.09154548256864, + "learning_rate": 1.9562726568124892e-05, + "loss": 0.3487662374973297, + "step": 1054 + }, + { + "epoch": 0.28017527552781835, + "grad_norm": 1.014222924008021, + "learning_rate": 1.956144144990366e-05, + "loss": 0.3610745370388031, + "step": 1055 + }, + { + "epoch": 0.2804408445093613, + "grad_norm": 0.9789890869027496, + "learning_rate": 1.9560154488349284e-05, + "loss": 0.33230137825012207, + "step": 1056 + }, + { + "epoch": 0.28070641349090425, + "grad_norm": 1.0104241821081763, + "learning_rate": 1.9558865683709875e-05, + "loss": 0.310351699590683, + "step": 1057 + }, + { + "epoch": 0.2809719824724472, + "grad_norm": 1.1188708821966176, + "learning_rate": 1.9557575036233897e-05, + "loss": 0.39930224418640137, + "step": 1058 + }, + { + "epoch": 0.28123755145399015, + "grad_norm": 1.0498907782820184, + "learning_rate": 1.955628254617017e-05, + "loss": 0.3345295488834381, + "step": 1059 + }, + { + "epoch": 0.2815031204355331, + "grad_norm": 1.1059864789744056, + "learning_rate": 1.9554988213767875e-05, + "loss": 0.37963107228279114, + "step": 1060 + }, + { + "epoch": 0.2817686894170761, + "grad_norm": 1.0825219178132603, + "learning_rate": 1.9553692039276545e-05, + "loss": 0.3923654854297638, + "step": 1061 + }, + { + "epoch": 0.28203425839861906, + "grad_norm": 1.0736283126776336, + "learning_rate": 1.9552394022946068e-05, + "loss": 0.363646924495697, + "step": 1062 + }, + { + "epoch": 0.282299827380162, + "grad_norm": 1.1051684289136041, + "learning_rate": 1.9551094165026677e-05, + "loss": 0.35486382246017456, + "step": 1063 + }, + { + "epoch": 0.28256539636170497, + "grad_norm": 1.0845117937449689, + "learning_rate": 1.954979246576898e-05, + "loss": 0.35215455293655396, + "step": 1064 + }, + { + "epoch": 0.2828309653432479, + "grad_norm": 1.1587243435425785, + "learning_rate": 1.9548488925423924e-05, + "loss": 0.3936809003353119, + "step": 1065 + }, + { + "epoch": 0.28309653432479087, + "grad_norm": 1.0399965264634783, + "learning_rate": 1.9547183544242817e-05, + "loss": 0.36852866411209106, + "step": 1066 + }, + { + "epoch": 0.2833621033063338, + "grad_norm": 1.0679817467710029, + "learning_rate": 1.954587632247732e-05, + "loss": 0.3552001714706421, + "step": 1067 + }, + { + "epoch": 0.2836276722878768, + "grad_norm": 1.1330169189394568, + "learning_rate": 1.9544567260379455e-05, + "loss": 0.3684498965740204, + "step": 1068 + }, + { + "epoch": 0.2838932412694197, + "grad_norm": 0.9857931835351914, + "learning_rate": 1.9543256358201586e-05, + "loss": 0.3367026448249817, + "step": 1069 + }, + { + "epoch": 0.2841588102509627, + "grad_norm": 1.0677692738667734, + "learning_rate": 1.9541943616196443e-05, + "loss": 0.3702335059642792, + "step": 1070 + }, + { + "epoch": 0.28442437923250563, + "grad_norm": 1.1114119189633371, + "learning_rate": 1.9540629034617108e-05, + "loss": 0.3430984318256378, + "step": 1071 + }, + { + "epoch": 0.2846899482140486, + "grad_norm": 1.1406170357402363, + "learning_rate": 1.953931261371702e-05, + "loss": 0.36514735221862793, + "step": 1072 + }, + { + "epoch": 0.28495551719559153, + "grad_norm": 1.0428104806049732, + "learning_rate": 1.9537994353749963e-05, + "loss": 0.3524945080280304, + "step": 1073 + }, + { + "epoch": 0.2852210861771345, + "grad_norm": 1.0283973360981475, + "learning_rate": 1.9536674254970088e-05, + "loss": 0.32405683398246765, + "step": 1074 + }, + { + "epoch": 0.2854866551586775, + "grad_norm": 1.0649875575316718, + "learning_rate": 1.9535352317631888e-05, + "loss": 0.30863165855407715, + "step": 1075 + }, + { + "epoch": 0.28575222414022045, + "grad_norm": 1.0647565002745494, + "learning_rate": 1.953402854199022e-05, + "loss": 0.34343889355659485, + "step": 1076 + }, + { + "epoch": 0.2860177931217634, + "grad_norm": 1.2339349330872973, + "learning_rate": 1.9532702928300292e-05, + "loss": 0.3639434576034546, + "step": 1077 + }, + { + "epoch": 0.28628336210330635, + "grad_norm": 1.0888261251069975, + "learning_rate": 1.9531375476817667e-05, + "loss": 0.3380300998687744, + "step": 1078 + }, + { + "epoch": 0.2865489310848493, + "grad_norm": 1.1078839119175599, + "learning_rate": 1.9530046187798267e-05, + "loss": 0.3323265016078949, + "step": 1079 + }, + { + "epoch": 0.28681450006639225, + "grad_norm": 1.0529271541493659, + "learning_rate": 1.9528715061498355e-05, + "loss": 0.3439220190048218, + "step": 1080 + }, + { + "epoch": 0.2870800690479352, + "grad_norm": 1.088357435010649, + "learning_rate": 1.952738209817456e-05, + "loss": 0.36376965045928955, + "step": 1081 + }, + { + "epoch": 0.28734563802947816, + "grad_norm": 1.0188116446188513, + "learning_rate": 1.952604729808386e-05, + "loss": 0.3281211853027344, + "step": 1082 + }, + { + "epoch": 0.2876112070110211, + "grad_norm": 1.0999135645201878, + "learning_rate": 1.9524710661483594e-05, + "loss": 0.3538089990615845, + "step": 1083 + }, + { + "epoch": 0.28787677599256406, + "grad_norm": 1.1475903462769852, + "learning_rate": 1.9523372188631442e-05, + "loss": 0.3982803225517273, + "step": 1084 + }, + { + "epoch": 0.288142344974107, + "grad_norm": 1.11408923860859, + "learning_rate": 1.9522031879785453e-05, + "loss": 0.3958810567855835, + "step": 1085 + }, + { + "epoch": 0.28840791395564996, + "grad_norm": 1.191451776763126, + "learning_rate": 1.9520689735204016e-05, + "loss": 0.40133988857269287, + "step": 1086 + }, + { + "epoch": 0.2886734829371929, + "grad_norm": 1.048862195613205, + "learning_rate": 1.9519345755145886e-05, + "loss": 0.32411646842956543, + "step": 1087 + }, + { + "epoch": 0.28893905191873587, + "grad_norm": 1.210003646730205, + "learning_rate": 1.9517999939870166e-05, + "loss": 0.38678207993507385, + "step": 1088 + }, + { + "epoch": 0.2892046209002789, + "grad_norm": 1.0663258874668164, + "learning_rate": 1.951665228963631e-05, + "loss": 0.36829686164855957, + "step": 1089 + }, + { + "epoch": 0.2894701898818218, + "grad_norm": 0.9884592653808488, + "learning_rate": 1.9515302804704134e-05, + "loss": 0.38631704449653625, + "step": 1090 + }, + { + "epoch": 0.2897357588633648, + "grad_norm": 1.1934503112083867, + "learning_rate": 1.9513951485333798e-05, + "loss": 0.39288902282714844, + "step": 1091 + }, + { + "epoch": 0.29000132784490773, + "grad_norm": 1.0804742457342014, + "learning_rate": 1.9512598331785822e-05, + "loss": 0.3655658960342407, + "step": 1092 + }, + { + "epoch": 0.2902668968264507, + "grad_norm": 0.9929300268939649, + "learning_rate": 1.9511243344321076e-05, + "loss": 0.3263852596282959, + "step": 1093 + }, + { + "epoch": 0.29053246580799363, + "grad_norm": 1.1166275426043832, + "learning_rate": 1.9509886523200792e-05, + "loss": 0.37939125299453735, + "step": 1094 + }, + { + "epoch": 0.2907980347895366, + "grad_norm": 1.074761796186792, + "learning_rate": 1.9508527868686543e-05, + "loss": 0.34218865633010864, + "step": 1095 + }, + { + "epoch": 0.29106360377107954, + "grad_norm": 1.036633851483027, + "learning_rate": 1.9507167381040263e-05, + "loss": 0.368261456489563, + "step": 1096 + }, + { + "epoch": 0.2913291727526225, + "grad_norm": 1.083724731335207, + "learning_rate": 1.950580506052424e-05, + "loss": 0.36133286356925964, + "step": 1097 + }, + { + "epoch": 0.29159474173416544, + "grad_norm": 1.0542758401630365, + "learning_rate": 1.9504440907401113e-05, + "loss": 0.3667418658733368, + "step": 1098 + }, + { + "epoch": 0.2918603107157084, + "grad_norm": 0.9961595646698646, + "learning_rate": 1.950307492193387e-05, + "loss": 0.34444570541381836, + "step": 1099 + }, + { + "epoch": 0.29212587969725134, + "grad_norm": 1.1203470867439278, + "learning_rate": 1.9501707104385863e-05, + "loss": 0.41261589527130127, + "step": 1100 + }, + { + "epoch": 0.2923914486787943, + "grad_norm": 1.0847270622391922, + "learning_rate": 1.9500337455020788e-05, + "loss": 0.3762981593608856, + "step": 1101 + }, + { + "epoch": 0.29265701766033725, + "grad_norm": 1.108635996430537, + "learning_rate": 1.9498965974102697e-05, + "loss": 0.3527417480945587, + "step": 1102 + }, + { + "epoch": 0.29292258664188026, + "grad_norm": 1.1555485155020386, + "learning_rate": 1.9497592661895996e-05, + "loss": 0.34812286496162415, + "step": 1103 + }, + { + "epoch": 0.2931881556234232, + "grad_norm": 0.9844968948580171, + "learning_rate": 1.9496217518665444e-05, + "loss": 0.33663398027420044, + "step": 1104 + }, + { + "epoch": 0.29345372460496616, + "grad_norm": 0.997090208380272, + "learning_rate": 1.9494840544676156e-05, + "loss": 0.3632991313934326, + "step": 1105 + }, + { + "epoch": 0.2937192935865091, + "grad_norm": 1.3515018592791732, + "learning_rate": 1.9493461740193587e-05, + "loss": 0.37389490008354187, + "step": 1106 + }, + { + "epoch": 0.29398486256805206, + "grad_norm": 1.204356467911551, + "learning_rate": 1.949208110548356e-05, + "loss": 0.3634020686149597, + "step": 1107 + }, + { + "epoch": 0.294250431549595, + "grad_norm": 1.0778805299295515, + "learning_rate": 1.9490698640812247e-05, + "loss": 0.36032742261886597, + "step": 1108 + }, + { + "epoch": 0.29451600053113797, + "grad_norm": 1.1504972318858309, + "learning_rate": 1.9489314346446164e-05, + "loss": 0.3385765552520752, + "step": 1109 + }, + { + "epoch": 0.2947815695126809, + "grad_norm": 1.0946200184976398, + "learning_rate": 1.9487928222652195e-05, + "loss": 0.3751915991306305, + "step": 1110 + }, + { + "epoch": 0.29504713849422387, + "grad_norm": 1.0903856446796527, + "learning_rate": 1.9486540269697564e-05, + "loss": 0.36069825291633606, + "step": 1111 + }, + { + "epoch": 0.2953127074757668, + "grad_norm": 1.009573568422265, + "learning_rate": 1.948515048784985e-05, + "loss": 0.32703787088394165, + "step": 1112 + }, + { + "epoch": 0.2955782764573098, + "grad_norm": 0.9196963642088989, + "learning_rate": 1.948375887737699e-05, + "loss": 0.312494158744812, + "step": 1113 + }, + { + "epoch": 0.2958438454388527, + "grad_norm": 0.9880564768480579, + "learning_rate": 1.9482365438547272e-05, + "loss": 0.30626165866851807, + "step": 1114 + }, + { + "epoch": 0.2961094144203957, + "grad_norm": 1.07827456569524, + "learning_rate": 1.948097017162933e-05, + "loss": 0.3625817894935608, + "step": 1115 + }, + { + "epoch": 0.29637498340193863, + "grad_norm": 1.1789711489550672, + "learning_rate": 1.9479573076892152e-05, + "loss": 0.38403773307800293, + "step": 1116 + }, + { + "epoch": 0.2966405523834816, + "grad_norm": 1.0638061154391991, + "learning_rate": 1.9478174154605093e-05, + "loss": 0.3645164966583252, + "step": 1117 + }, + { + "epoch": 0.2969061213650246, + "grad_norm": 1.0428170431433939, + "learning_rate": 1.9476773405037836e-05, + "loss": 0.3714389503002167, + "step": 1118 + }, + { + "epoch": 0.29717169034656754, + "grad_norm": 1.1488169814057956, + "learning_rate": 1.9475370828460436e-05, + "loss": 0.39809900522232056, + "step": 1119 + }, + { + "epoch": 0.2974372593281105, + "grad_norm": 1.0702503358715294, + "learning_rate": 1.9473966425143292e-05, + "loss": 0.3698490262031555, + "step": 1120 + }, + { + "epoch": 0.29770282830965344, + "grad_norm": 1.0166542138266799, + "learning_rate": 1.947256019535716e-05, + "loss": 0.3072658181190491, + "step": 1121 + }, + { + "epoch": 0.2979683972911964, + "grad_norm": 1.0479599499698302, + "learning_rate": 1.947115213937314e-05, + "loss": 0.3294365406036377, + "step": 1122 + }, + { + "epoch": 0.29823396627273935, + "grad_norm": 1.007749929257712, + "learning_rate": 1.9469742257462684e-05, + "loss": 0.34933674335479736, + "step": 1123 + }, + { + "epoch": 0.2984995352542823, + "grad_norm": 1.133473784296847, + "learning_rate": 1.946833054989761e-05, + "loss": 0.34586772322654724, + "step": 1124 + }, + { + "epoch": 0.29876510423582525, + "grad_norm": 1.0225090189343862, + "learning_rate": 1.9466917016950076e-05, + "loss": 0.33158159255981445, + "step": 1125 + }, + { + "epoch": 0.2990306732173682, + "grad_norm": 1.0162208348084125, + "learning_rate": 1.946550165889259e-05, + "loss": 0.32665887475013733, + "step": 1126 + }, + { + "epoch": 0.29929624219891116, + "grad_norm": 1.1065475895733048, + "learning_rate": 1.946408447599802e-05, + "loss": 0.3333032429218292, + "step": 1127 + }, + { + "epoch": 0.2995618111804541, + "grad_norm": 1.0958997421479173, + "learning_rate": 1.9462665468539582e-05, + "loss": 0.3747228980064392, + "step": 1128 + }, + { + "epoch": 0.29982738016199706, + "grad_norm": 0.9447906277138843, + "learning_rate": 1.9461244636790845e-05, + "loss": 0.34040436148643494, + "step": 1129 + }, + { + "epoch": 0.30009294914354, + "grad_norm": 1.0062775259583612, + "learning_rate": 1.9459821981025723e-05, + "loss": 0.3279584050178528, + "step": 1130 + }, + { + "epoch": 0.30035851812508296, + "grad_norm": 1.136819731097147, + "learning_rate": 1.9458397501518496e-05, + "loss": 0.33507707715034485, + "step": 1131 + }, + { + "epoch": 0.30062408710662597, + "grad_norm": 0.9978141677663763, + "learning_rate": 1.945697119854378e-05, + "loss": 0.3511529862880707, + "step": 1132 + }, + { + "epoch": 0.3008896560881689, + "grad_norm": 1.1038696900269844, + "learning_rate": 1.945554307237655e-05, + "loss": 0.33260345458984375, + "step": 1133 + }, + { + "epoch": 0.3011552250697119, + "grad_norm": 1.1267244347055163, + "learning_rate": 1.9454113123292133e-05, + "loss": 0.37698423862457275, + "step": 1134 + }, + { + "epoch": 0.3014207940512548, + "grad_norm": 1.0482054605062838, + "learning_rate": 1.945268135156621e-05, + "loss": 0.34843316674232483, + "step": 1135 + }, + { + "epoch": 0.3016863630327978, + "grad_norm": 1.1518938911568848, + "learning_rate": 1.9451247757474805e-05, + "loss": 0.38723987340927124, + "step": 1136 + }, + { + "epoch": 0.30195193201434073, + "grad_norm": 1.0597410032778982, + "learning_rate": 1.9449812341294302e-05, + "loss": 0.3836795389652252, + "step": 1137 + }, + { + "epoch": 0.3022175009958837, + "grad_norm": 0.9828275773453091, + "learning_rate": 1.9448375103301424e-05, + "loss": 0.3362433612346649, + "step": 1138 + }, + { + "epoch": 0.30248306997742663, + "grad_norm": 1.0750556057741842, + "learning_rate": 1.9446936043773264e-05, + "loss": 0.3615792393684387, + "step": 1139 + }, + { + "epoch": 0.3027486389589696, + "grad_norm": 1.0233339727957385, + "learning_rate": 1.944549516298725e-05, + "loss": 0.33693915605545044, + "step": 1140 + }, + { + "epoch": 0.30301420794051254, + "grad_norm": 1.0074205515838075, + "learning_rate": 1.9444052461221167e-05, + "loss": 0.32611170411109924, + "step": 1141 + }, + { + "epoch": 0.3032797769220555, + "grad_norm": 1.0257687736898828, + "learning_rate": 1.9442607938753153e-05, + "loss": 0.3504132032394409, + "step": 1142 + }, + { + "epoch": 0.30354534590359844, + "grad_norm": 1.081217851264946, + "learning_rate": 1.944116159586169e-05, + "loss": 0.3598168194293976, + "step": 1143 + }, + { + "epoch": 0.3038109148851414, + "grad_norm": 1.025673115447757, + "learning_rate": 1.9439713432825625e-05, + "loss": 0.33447909355163574, + "step": 1144 + }, + { + "epoch": 0.30407648386668434, + "grad_norm": 0.9795127759513904, + "learning_rate": 1.943826344992414e-05, + "loss": 0.34026333689689636, + "step": 1145 + }, + { + "epoch": 0.30434205284822735, + "grad_norm": 1.070042442644686, + "learning_rate": 1.9436811647436772e-05, + "loss": 0.323203980922699, + "step": 1146 + }, + { + "epoch": 0.3046076218297703, + "grad_norm": 1.0588861737680213, + "learning_rate": 1.943535802564342e-05, + "loss": 0.332398921251297, + "step": 1147 + }, + { + "epoch": 0.30487319081131325, + "grad_norm": 1.175168490214782, + "learning_rate": 1.9433902584824316e-05, + "loss": 0.3882995545864105, + "step": 1148 + }, + { + "epoch": 0.3051387597928562, + "grad_norm": 1.093435738226519, + "learning_rate": 1.943244532526006e-05, + "loss": 0.35262739658355713, + "step": 1149 + }, + { + "epoch": 0.30540432877439916, + "grad_norm": 1.1043029209432185, + "learning_rate": 1.9430986247231586e-05, + "loss": 0.39694511890411377, + "step": 1150 + }, + { + "epoch": 0.3056698977559421, + "grad_norm": 1.1276348856512544, + "learning_rate": 1.9429525351020197e-05, + "loss": 0.3692580759525299, + "step": 1151 + }, + { + "epoch": 0.30593546673748506, + "grad_norm": 1.1284903074468042, + "learning_rate": 1.9428062636907526e-05, + "loss": 0.3685402572154999, + "step": 1152 + }, + { + "epoch": 0.306201035719028, + "grad_norm": 1.1120189967723886, + "learning_rate": 1.9426598105175575e-05, + "loss": 0.37557253241539, + "step": 1153 + }, + { + "epoch": 0.30646660470057097, + "grad_norm": 0.9544414078231065, + "learning_rate": 1.9425131756106687e-05, + "loss": 0.3323203921318054, + "step": 1154 + }, + { + "epoch": 0.3067321736821139, + "grad_norm": 1.085159318227953, + "learning_rate": 1.9423663589983554e-05, + "loss": 0.37262290716171265, + "step": 1155 + }, + { + "epoch": 0.30699774266365687, + "grad_norm": 1.138203326668225, + "learning_rate": 1.9422193607089224e-05, + "loss": 0.36621618270874023, + "step": 1156 + }, + { + "epoch": 0.3072633116451998, + "grad_norm": 1.0326975743253168, + "learning_rate": 1.942072180770709e-05, + "loss": 0.3844982385635376, + "step": 1157 + }, + { + "epoch": 0.3075288806267428, + "grad_norm": 0.9983252957319158, + "learning_rate": 1.94192481921209e-05, + "loss": 0.3229531943798065, + "step": 1158 + }, + { + "epoch": 0.3077944496082857, + "grad_norm": 1.0805327657153956, + "learning_rate": 1.9417772760614745e-05, + "loss": 0.34862661361694336, + "step": 1159 + }, + { + "epoch": 0.30806001858982873, + "grad_norm": 1.0329581193958253, + "learning_rate": 1.941629551347308e-05, + "loss": 0.35496509075164795, + "step": 1160 + }, + { + "epoch": 0.3083255875713717, + "grad_norm": 1.051163133463375, + "learning_rate": 1.9414816450980686e-05, + "loss": 0.3695065975189209, + "step": 1161 + }, + { + "epoch": 0.30859115655291464, + "grad_norm": 1.0254769076684076, + "learning_rate": 1.9413335573422723e-05, + "loss": 0.3472525179386139, + "step": 1162 + }, + { + "epoch": 0.3088567255344576, + "grad_norm": 1.008969123299064, + "learning_rate": 1.9411852881084683e-05, + "loss": 0.3447483479976654, + "step": 1163 + }, + { + "epoch": 0.30912229451600054, + "grad_norm": 0.9333424416365893, + "learning_rate": 1.941036837425241e-05, + "loss": 0.31047824025154114, + "step": 1164 + }, + { + "epoch": 0.3093878634975435, + "grad_norm": 1.0570471012152007, + "learning_rate": 1.9408882053212094e-05, + "loss": 0.34502410888671875, + "step": 1165 + }, + { + "epoch": 0.30965343247908644, + "grad_norm": 1.1849442151759089, + "learning_rate": 1.940739391825029e-05, + "loss": 0.3663109540939331, + "step": 1166 + }, + { + "epoch": 0.3099190014606294, + "grad_norm": 1.1136723468346887, + "learning_rate": 1.9405903969653887e-05, + "loss": 0.3635792136192322, + "step": 1167 + }, + { + "epoch": 0.31018457044217235, + "grad_norm": 1.0769441486287206, + "learning_rate": 1.940441220771013e-05, + "loss": 0.359528124332428, + "step": 1168 + }, + { + "epoch": 0.3104501394237153, + "grad_norm": 1.043185528474707, + "learning_rate": 1.9402918632706618e-05, + "loss": 0.32566630840301514, + "step": 1169 + }, + { + "epoch": 0.31071570840525825, + "grad_norm": 1.0286897614370414, + "learning_rate": 1.940142324493129e-05, + "loss": 0.34758460521698, + "step": 1170 + }, + { + "epoch": 0.3109812773868012, + "grad_norm": 1.0148570847451444, + "learning_rate": 1.9399926044672438e-05, + "loss": 0.3484055995941162, + "step": 1171 + }, + { + "epoch": 0.31124684636834415, + "grad_norm": 1.1806099587394492, + "learning_rate": 1.93984270322187e-05, + "loss": 0.41958773136138916, + "step": 1172 + }, + { + "epoch": 0.3115124153498871, + "grad_norm": 1.085314216258339, + "learning_rate": 1.9396926207859085e-05, + "loss": 0.3578398525714874, + "step": 1173 + }, + { + "epoch": 0.3117779843314301, + "grad_norm": 1.0721505496116728, + "learning_rate": 1.9395423571882917e-05, + "loss": 0.38140422105789185, + "step": 1174 + }, + { + "epoch": 0.31204355331297307, + "grad_norm": 1.1224661464468277, + "learning_rate": 1.9393919124579898e-05, + "loss": 0.3782861828804016, + "step": 1175 + }, + { + "epoch": 0.312309122294516, + "grad_norm": 1.0482874367837718, + "learning_rate": 1.939241286624006e-05, + "loss": 0.3211040496826172, + "step": 1176 + }, + { + "epoch": 0.31257469127605897, + "grad_norm": 0.9909015391020882, + "learning_rate": 1.9390904797153795e-05, + "loss": 0.3090783953666687, + "step": 1177 + }, + { + "epoch": 0.3128402602576019, + "grad_norm": 1.0203166402095418, + "learning_rate": 1.938939491761184e-05, + "loss": 0.3542889654636383, + "step": 1178 + }, + { + "epoch": 0.3131058292391449, + "grad_norm": 1.016567110972503, + "learning_rate": 1.9387883227905285e-05, + "loss": 0.369164377450943, + "step": 1179 + }, + { + "epoch": 0.3133713982206878, + "grad_norm": 1.1492868354113897, + "learning_rate": 1.9386369728325562e-05, + "loss": 0.35200801491737366, + "step": 1180 + }, + { + "epoch": 0.3136369672022308, + "grad_norm": 1.1332626811675575, + "learning_rate": 1.9384854419164454e-05, + "loss": 0.3696276843547821, + "step": 1181 + }, + { + "epoch": 0.31390253618377373, + "grad_norm": 0.9856387823657043, + "learning_rate": 1.9383337300714104e-05, + "loss": 0.3403652012348175, + "step": 1182 + }, + { + "epoch": 0.3141681051653167, + "grad_norm": 0.9608300998441986, + "learning_rate": 1.9381818373266987e-05, + "loss": 0.3307063579559326, + "step": 1183 + }, + { + "epoch": 0.31443367414685963, + "grad_norm": 1.002604353314113, + "learning_rate": 1.9380297637115933e-05, + "loss": 0.3223465085029602, + "step": 1184 + }, + { + "epoch": 0.3146992431284026, + "grad_norm": 1.1668926481270334, + "learning_rate": 1.9378775092554124e-05, + "loss": 0.4013838768005371, + "step": 1185 + }, + { + "epoch": 0.31496481210994554, + "grad_norm": 1.2376602965184098, + "learning_rate": 1.9377250739875095e-05, + "loss": 0.3596574664115906, + "step": 1186 + }, + { + "epoch": 0.3152303810914885, + "grad_norm": 1.0683740579575798, + "learning_rate": 1.937572457937271e-05, + "loss": 0.41639968752861023, + "step": 1187 + }, + { + "epoch": 0.3154959500730315, + "grad_norm": 0.950341293536979, + "learning_rate": 1.9374196611341212e-05, + "loss": 0.3001318573951721, + "step": 1188 + }, + { + "epoch": 0.31576151905457445, + "grad_norm": 1.0390515723802394, + "learning_rate": 1.937266683607516e-05, + "loss": 0.33238667249679565, + "step": 1189 + }, + { + "epoch": 0.3160270880361174, + "grad_norm": 1.0559788990716998, + "learning_rate": 1.9371135253869483e-05, + "loss": 0.33638086915016174, + "step": 1190 + }, + { + "epoch": 0.31629265701766035, + "grad_norm": 1.0736881782093415, + "learning_rate": 1.9369601865019452e-05, + "loss": 0.34445878863334656, + "step": 1191 + }, + { + "epoch": 0.3165582259992033, + "grad_norm": 1.116672373820781, + "learning_rate": 1.9368066669820684e-05, + "loss": 0.33554553985595703, + "step": 1192 + }, + { + "epoch": 0.31682379498074625, + "grad_norm": 1.2940820576034424, + "learning_rate": 1.936652966856915e-05, + "loss": 0.3668493628501892, + "step": 1193 + }, + { + "epoch": 0.3170893639622892, + "grad_norm": 1.1460266164336763, + "learning_rate": 1.9364990861561163e-05, + "loss": 0.3813396990299225, + "step": 1194 + }, + { + "epoch": 0.31735493294383216, + "grad_norm": 1.048871056336621, + "learning_rate": 1.936345024909339e-05, + "loss": 0.33625900745391846, + "step": 1195 + }, + { + "epoch": 0.3176205019253751, + "grad_norm": 1.0238786804477913, + "learning_rate": 1.9361907831462836e-05, + "loss": 0.31131428480148315, + "step": 1196 + }, + { + "epoch": 0.31788607090691806, + "grad_norm": 0.9751456398999766, + "learning_rate": 1.936036360896687e-05, + "loss": 0.32571589946746826, + "step": 1197 + }, + { + "epoch": 0.318151639888461, + "grad_norm": 1.1296061558872548, + "learning_rate": 1.9358817581903193e-05, + "loss": 0.36207717657089233, + "step": 1198 + }, + { + "epoch": 0.31841720887000396, + "grad_norm": 1.062344543153862, + "learning_rate": 1.9357269750569864e-05, + "loss": 0.3743855059146881, + "step": 1199 + }, + { + "epoch": 0.3186827778515469, + "grad_norm": 1.1254060799620074, + "learning_rate": 1.9355720115265283e-05, + "loss": 0.3862137794494629, + "step": 1200 + }, + { + "epoch": 0.31894834683308987, + "grad_norm": 1.1135871061204583, + "learning_rate": 1.935416867628821e-05, + "loss": 0.33353424072265625, + "step": 1201 + }, + { + "epoch": 0.3192139158146329, + "grad_norm": 9.759113022509682, + "learning_rate": 1.9352615433937733e-05, + "loss": 0.3277953267097473, + "step": 1202 + }, + { + "epoch": 0.3194794847961758, + "grad_norm": 1.104737565124737, + "learning_rate": 1.9351060388513304e-05, + "loss": 0.38247692584991455, + "step": 1203 + }, + { + "epoch": 0.3197450537777188, + "grad_norm": 1.0645482624060865, + "learning_rate": 1.9349503540314724e-05, + "loss": 0.3330709934234619, + "step": 1204 + }, + { + "epoch": 0.32001062275926173, + "grad_norm": 1.1382102351287038, + "learning_rate": 1.9347944889642125e-05, + "loss": 0.3809449076652527, + "step": 1205 + }, + { + "epoch": 0.3202761917408047, + "grad_norm": 0.9591245399492223, + "learning_rate": 1.9346384436796e-05, + "loss": 0.33623188734054565, + "step": 1206 + }, + { + "epoch": 0.32054176072234764, + "grad_norm": 1.0414583731283242, + "learning_rate": 1.9344822182077184e-05, + "loss": 0.35465264320373535, + "step": 1207 + }, + { + "epoch": 0.3208073297038906, + "grad_norm": 1.0419539507532576, + "learning_rate": 1.9343258125786866e-05, + "loss": 0.3532233238220215, + "step": 1208 + }, + { + "epoch": 0.32107289868543354, + "grad_norm": 0.972348986123494, + "learning_rate": 1.9341692268226572e-05, + "loss": 0.3498903512954712, + "step": 1209 + }, + { + "epoch": 0.3213384676669765, + "grad_norm": 1.057700016356479, + "learning_rate": 1.9340124609698185e-05, + "loss": 0.36124879121780396, + "step": 1210 + }, + { + "epoch": 0.32160403664851944, + "grad_norm": 1.1891126233384992, + "learning_rate": 1.933855515050393e-05, + "loss": 0.38535434007644653, + "step": 1211 + }, + { + "epoch": 0.3218696056300624, + "grad_norm": 1.1201736183139164, + "learning_rate": 1.9336983890946383e-05, + "loss": 0.39999911189079285, + "step": 1212 + }, + { + "epoch": 0.32213517461160535, + "grad_norm": 1.1396977359685507, + "learning_rate": 1.9335410831328457e-05, + "loss": 0.3519791066646576, + "step": 1213 + }, + { + "epoch": 0.3224007435931483, + "grad_norm": 1.1624196201646915, + "learning_rate": 1.9333835971953424e-05, + "loss": 0.35882368683815, + "step": 1214 + }, + { + "epoch": 0.32266631257469125, + "grad_norm": 1.2089532713833613, + "learning_rate": 1.93322593131249e-05, + "loss": 0.36132001876831055, + "step": 1215 + }, + { + "epoch": 0.32293188155623426, + "grad_norm": 1.0741169297687752, + "learning_rate": 1.9330680855146845e-05, + "loss": 0.36840832233428955, + "step": 1216 + }, + { + "epoch": 0.3231974505377772, + "grad_norm": 1.1553079333487188, + "learning_rate": 1.9329100598323563e-05, + "loss": 0.3755963444709778, + "step": 1217 + }, + { + "epoch": 0.32346301951932016, + "grad_norm": 1.1792888887437214, + "learning_rate": 1.9327518542959717e-05, + "loss": 0.400601863861084, + "step": 1218 + }, + { + "epoch": 0.3237285885008631, + "grad_norm": 1.0342294479515497, + "learning_rate": 1.93259346893603e-05, + "loss": 0.3100128769874573, + "step": 1219 + }, + { + "epoch": 0.32399415748240606, + "grad_norm": 1.0633052239431813, + "learning_rate": 1.9324349037830665e-05, + "loss": 0.3439880609512329, + "step": 1220 + }, + { + "epoch": 0.324259726463949, + "grad_norm": 1.1634088151631976, + "learning_rate": 1.9322761588676505e-05, + "loss": 0.3612631559371948, + "step": 1221 + }, + { + "epoch": 0.32452529544549197, + "grad_norm": 1.1292400605185824, + "learning_rate": 1.9321172342203863e-05, + "loss": 0.38202327489852905, + "step": 1222 + }, + { + "epoch": 0.3247908644270349, + "grad_norm": 1.0253004653890312, + "learning_rate": 1.9319581298719127e-05, + "loss": 0.3405265808105469, + "step": 1223 + }, + { + "epoch": 0.32505643340857787, + "grad_norm": 1.1499639639111883, + "learning_rate": 1.931798845852903e-05, + "loss": 0.4110907018184662, + "step": 1224 + }, + { + "epoch": 0.3253220023901208, + "grad_norm": 1.2758168253168263, + "learning_rate": 1.9316393821940654e-05, + "loss": 0.3007548451423645, + "step": 1225 + }, + { + "epoch": 0.3255875713716638, + "grad_norm": 2.5438383009304673, + "learning_rate": 1.9314797389261426e-05, + "loss": 0.32769858837127686, + "step": 1226 + }, + { + "epoch": 0.3258531403532067, + "grad_norm": 1.0370704182885782, + "learning_rate": 1.931319916079912e-05, + "loss": 0.3619830310344696, + "step": 1227 + }, + { + "epoch": 0.3261187093347497, + "grad_norm": 1.2983573666738066, + "learning_rate": 1.9311599136861853e-05, + "loss": 0.3470210134983063, + "step": 1228 + }, + { + "epoch": 0.32638427831629263, + "grad_norm": 1.145435126731274, + "learning_rate": 1.9309997317758093e-05, + "loss": 0.3471665382385254, + "step": 1229 + }, + { + "epoch": 0.32664984729783564, + "grad_norm": 1.0757592201920594, + "learning_rate": 1.930839370379665e-05, + "loss": 0.3717760443687439, + "step": 1230 + }, + { + "epoch": 0.3269154162793786, + "grad_norm": 1.1173068015382108, + "learning_rate": 1.9306788295286687e-05, + "loss": 0.37279975414276123, + "step": 1231 + }, + { + "epoch": 0.32718098526092154, + "grad_norm": 1.1523781527891401, + "learning_rate": 1.93051810925377e-05, + "loss": 0.3884522020816803, + "step": 1232 + }, + { + "epoch": 0.3274465542424645, + "grad_norm": 1.1200431222189422, + "learning_rate": 1.9303572095859545e-05, + "loss": 0.4277604818344116, + "step": 1233 + }, + { + "epoch": 0.32771212322400745, + "grad_norm": 1.1197023145386935, + "learning_rate": 1.9301961305562415e-05, + "loss": 0.2888818681240082, + "step": 1234 + }, + { + "epoch": 0.3279776922055504, + "grad_norm": 1.0271311895282893, + "learning_rate": 1.9300348721956854e-05, + "loss": 0.3134511709213257, + "step": 1235 + }, + { + "epoch": 0.32824326118709335, + "grad_norm": 1.0800984792046815, + "learning_rate": 1.9298734345353745e-05, + "loss": 0.38525280356407166, + "step": 1236 + }, + { + "epoch": 0.3285088301686363, + "grad_norm": 1.134011749036063, + "learning_rate": 1.9297118176064324e-05, + "loss": 0.3692918121814728, + "step": 1237 + }, + { + "epoch": 0.32877439915017925, + "grad_norm": 1.0348260315377988, + "learning_rate": 1.9295500214400165e-05, + "loss": 0.3443421721458435, + "step": 1238 + }, + { + "epoch": 0.3290399681317222, + "grad_norm": 1.0129455663017488, + "learning_rate": 1.9293880460673197e-05, + "loss": 0.3228621184825897, + "step": 1239 + }, + { + "epoch": 0.32930553711326516, + "grad_norm": 1.0116024279908165, + "learning_rate": 1.9292258915195688e-05, + "loss": 0.330943763256073, + "step": 1240 + }, + { + "epoch": 0.3295711060948081, + "grad_norm": 1.1814587344422625, + "learning_rate": 1.929063557828025e-05, + "loss": 0.356637567281723, + "step": 1241 + }, + { + "epoch": 0.32983667507635106, + "grad_norm": 0.9888159780201056, + "learning_rate": 1.9289010450239843e-05, + "loss": 0.3481113910675049, + "step": 1242 + }, + { + "epoch": 0.330102244057894, + "grad_norm": 1.1876931030431213, + "learning_rate": 1.928738353138778e-05, + "loss": 0.36579906940460205, + "step": 1243 + }, + { + "epoch": 0.330367813039437, + "grad_norm": 1.0281454378567854, + "learning_rate": 1.9285754822037705e-05, + "loss": 0.33025234937667847, + "step": 1244 + }, + { + "epoch": 0.33063338202097997, + "grad_norm": 1.0936673160473642, + "learning_rate": 1.9284124322503613e-05, + "loss": 0.34848469495773315, + "step": 1245 + }, + { + "epoch": 0.3308989510025229, + "grad_norm": 1.1232405017277023, + "learning_rate": 1.928249203309985e-05, + "loss": 0.3523876368999481, + "step": 1246 + }, + { + "epoch": 0.3311645199840659, + "grad_norm": 1.140153458583263, + "learning_rate": 1.92808579541411e-05, + "loss": 0.3695565462112427, + "step": 1247 + }, + { + "epoch": 0.3314300889656088, + "grad_norm": 1.0267337296320096, + "learning_rate": 1.9279222085942396e-05, + "loss": 0.3557945191860199, + "step": 1248 + }, + { + "epoch": 0.3316956579471518, + "grad_norm": 1.0261133198060035, + "learning_rate": 1.9277584428819113e-05, + "loss": 0.3015502989292145, + "step": 1249 + }, + { + "epoch": 0.33196122692869473, + "grad_norm": 0.9384869314897972, + "learning_rate": 1.9275944983086964e-05, + "loss": 0.31333664059638977, + "step": 1250 + }, + { + "epoch": 0.3322267959102377, + "grad_norm": 1.103154580638619, + "learning_rate": 1.9274303749062028e-05, + "loss": 0.36595287919044495, + "step": 1251 + }, + { + "epoch": 0.33249236489178063, + "grad_norm": 1.0573816777840739, + "learning_rate": 1.9272660727060705e-05, + "loss": 0.3400266170501709, + "step": 1252 + }, + { + "epoch": 0.3327579338733236, + "grad_norm": 1.0994664368429343, + "learning_rate": 1.927101591739976e-05, + "loss": 0.3642529547214508, + "step": 1253 + }, + { + "epoch": 0.33302350285486654, + "grad_norm": 1.08059410662081, + "learning_rate": 1.926936932039628e-05, + "loss": 0.3418777287006378, + "step": 1254 + }, + { + "epoch": 0.3332890718364095, + "grad_norm": 1.0881678177934593, + "learning_rate": 1.9267720936367723e-05, + "loss": 0.33382388949394226, + "step": 1255 + }, + { + "epoch": 0.33355464081795244, + "grad_norm": 1.1227567600503816, + "learning_rate": 1.926607076563187e-05, + "loss": 0.36257779598236084, + "step": 1256 + }, + { + "epoch": 0.3338202097994954, + "grad_norm": 1.5546101865012443, + "learning_rate": 1.926441880850686e-05, + "loss": 0.3018002510070801, + "step": 1257 + }, + { + "epoch": 0.3340857787810384, + "grad_norm": 1.0263747105982135, + "learning_rate": 1.9262765065311165e-05, + "loss": 0.3373662233352661, + "step": 1258 + }, + { + "epoch": 0.33435134776258135, + "grad_norm": 1.0001644182280367, + "learning_rate": 1.9261109536363613e-05, + "loss": 0.3555397391319275, + "step": 1259 + }, + { + "epoch": 0.3346169167441243, + "grad_norm": 1.1519069907937776, + "learning_rate": 1.925945222198336e-05, + "loss": 0.3004256784915924, + "step": 1260 + }, + { + "epoch": 0.33488248572566726, + "grad_norm": 2.328412351070072, + "learning_rate": 1.925779312248993e-05, + "loss": 0.33299940824508667, + "step": 1261 + }, + { + "epoch": 0.3351480547072102, + "grad_norm": 1.0617967738999583, + "learning_rate": 1.9256132238203166e-05, + "loss": 0.3715725541114807, + "step": 1262 + }, + { + "epoch": 0.33541362368875316, + "grad_norm": 1.0140049717249513, + "learning_rate": 1.9254469569443274e-05, + "loss": 0.35133951902389526, + "step": 1263 + }, + { + "epoch": 0.3356791926702961, + "grad_norm": 0.9980129680534503, + "learning_rate": 1.92528051165308e-05, + "loss": 0.3328818380832672, + "step": 1264 + }, + { + "epoch": 0.33594476165183906, + "grad_norm": 1.0764552464682182, + "learning_rate": 1.925113887978662e-05, + "loss": 0.3665468692779541, + "step": 1265 + }, + { + "epoch": 0.336210330633382, + "grad_norm": 1.0446302802374996, + "learning_rate": 1.9249470859531976e-05, + "loss": 0.3489571511745453, + "step": 1266 + }, + { + "epoch": 0.33647589961492497, + "grad_norm": 1.0629721705272823, + "learning_rate": 1.9247801056088433e-05, + "loss": 0.30038982629776, + "step": 1267 + }, + { + "epoch": 0.3367414685964679, + "grad_norm": 1.1798569183028156, + "learning_rate": 1.9246129469777918e-05, + "loss": 0.4163355827331543, + "step": 1268 + }, + { + "epoch": 0.33700703757801087, + "grad_norm": 1.0428552063046848, + "learning_rate": 1.924445610092269e-05, + "loss": 0.33687612414360046, + "step": 1269 + }, + { + "epoch": 0.3372726065595538, + "grad_norm": 1.0466869124167506, + "learning_rate": 1.924278094984535e-05, + "loss": 0.3448297679424286, + "step": 1270 + }, + { + "epoch": 0.3375381755410968, + "grad_norm": 1.0979384797680924, + "learning_rate": 1.9241104016868853e-05, + "loss": 0.35257208347320557, + "step": 1271 + }, + { + "epoch": 0.3378037445226398, + "grad_norm": 1.0794393535441016, + "learning_rate": 1.9239425302316487e-05, + "loss": 0.34880566596984863, + "step": 1272 + }, + { + "epoch": 0.33806931350418273, + "grad_norm": 1.1081978913885613, + "learning_rate": 1.9237744806511895e-05, + "loss": 0.33643782138824463, + "step": 1273 + }, + { + "epoch": 0.3383348824857257, + "grad_norm": 1.0185962864877929, + "learning_rate": 1.9236062529779057e-05, + "loss": 0.32345050573349, + "step": 1274 + }, + { + "epoch": 0.33860045146726864, + "grad_norm": 1.0547576972102612, + "learning_rate": 1.9234378472442286e-05, + "loss": 0.33983978629112244, + "step": 1275 + }, + { + "epoch": 0.3388660204488116, + "grad_norm": 1.0305326470674594, + "learning_rate": 1.923269263482626e-05, + "loss": 0.32825571298599243, + "step": 1276 + }, + { + "epoch": 0.33913158943035454, + "grad_norm": 1.0836151603415423, + "learning_rate": 1.923100501725598e-05, + "loss": 0.3434044122695923, + "step": 1277 + }, + { + "epoch": 0.3393971584118975, + "grad_norm": 1.1293248576076373, + "learning_rate": 1.9229315620056805e-05, + "loss": 0.3463204503059387, + "step": 1278 + }, + { + "epoch": 0.33966272739344044, + "grad_norm": 1.0476463818396518, + "learning_rate": 1.9227624443554425e-05, + "loss": 0.3608240485191345, + "step": 1279 + }, + { + "epoch": 0.3399282963749834, + "grad_norm": 1.111712780266586, + "learning_rate": 1.9225931488074882e-05, + "loss": 0.36131763458251953, + "step": 1280 + }, + { + "epoch": 0.34019386535652635, + "grad_norm": 0.9948222919660873, + "learning_rate": 1.922423675394456e-05, + "loss": 0.3270101547241211, + "step": 1281 + }, + { + "epoch": 0.3404594343380693, + "grad_norm": 1.1047356141038558, + "learning_rate": 1.922254024149018e-05, + "loss": 0.3551778495311737, + "step": 1282 + }, + { + "epoch": 0.34072500331961225, + "grad_norm": 1.1057498393465535, + "learning_rate": 1.9220841951038815e-05, + "loss": 0.3686622381210327, + "step": 1283 + }, + { + "epoch": 0.3409905723011552, + "grad_norm": 1.0810198379819234, + "learning_rate": 1.921914188291787e-05, + "loss": 0.35161536931991577, + "step": 1284 + }, + { + "epoch": 0.34125614128269816, + "grad_norm": 1.1489267376414198, + "learning_rate": 1.92174400374551e-05, + "loss": 0.3549870550632477, + "step": 1285 + }, + { + "epoch": 0.34152171026424116, + "grad_norm": 1.0904860537070935, + "learning_rate": 1.9215736414978593e-05, + "loss": 0.36780738830566406, + "step": 1286 + }, + { + "epoch": 0.3417872792457841, + "grad_norm": 1.132171748367688, + "learning_rate": 1.9214031015816803e-05, + "loss": 0.36060047149658203, + "step": 1287 + }, + { + "epoch": 0.34205284822732707, + "grad_norm": 1.0753334155968608, + "learning_rate": 1.9212323840298502e-05, + "loss": 0.32578715682029724, + "step": 1288 + }, + { + "epoch": 0.34231841720887, + "grad_norm": 1.0380534929488934, + "learning_rate": 1.9210614888752813e-05, + "loss": 0.3505493402481079, + "step": 1289 + }, + { + "epoch": 0.34258398619041297, + "grad_norm": 1.0227959332298084, + "learning_rate": 1.9208904161509203e-05, + "loss": 0.32681795954704285, + "step": 1290 + }, + { + "epoch": 0.3428495551719559, + "grad_norm": 1.0227973616384467, + "learning_rate": 1.9207191658897473e-05, + "loss": 0.34808459877967834, + "step": 1291 + }, + { + "epoch": 0.3431151241534989, + "grad_norm": 1.0810974703490968, + "learning_rate": 1.920547738124779e-05, + "loss": 0.3588678240776062, + "step": 1292 + }, + { + "epoch": 0.3433806931350418, + "grad_norm": 1.2030053357742059, + "learning_rate": 1.9203761328890626e-05, + "loss": 0.3528832495212555, + "step": 1293 + }, + { + "epoch": 0.3436462621165848, + "grad_norm": 1.35729757891191, + "learning_rate": 1.9202043502156833e-05, + "loss": 0.33549001812934875, + "step": 1294 + }, + { + "epoch": 0.34391183109812773, + "grad_norm": 1.0986147605525078, + "learning_rate": 1.920032390137758e-05, + "loss": 0.3466021418571472, + "step": 1295 + }, + { + "epoch": 0.3441774000796707, + "grad_norm": 1.0492164389172054, + "learning_rate": 1.9198602526884388e-05, + "loss": 0.35646146535873413, + "step": 1296 + }, + { + "epoch": 0.34444296906121363, + "grad_norm": 1.0348991752364494, + "learning_rate": 1.9196879379009112e-05, + "loss": 0.3442128300666809, + "step": 1297 + }, + { + "epoch": 0.3447085380427566, + "grad_norm": 1.083291442034964, + "learning_rate": 1.9195154458083962e-05, + "loss": 0.3854391872882843, + "step": 1298 + }, + { + "epoch": 0.34497410702429954, + "grad_norm": 1.202325074766952, + "learning_rate": 1.9193427764441477e-05, + "loss": 0.376137375831604, + "step": 1299 + }, + { + "epoch": 0.34523967600584254, + "grad_norm": 1.1591691335477168, + "learning_rate": 1.9191699298414547e-05, + "loss": 0.3115769028663635, + "step": 1300 + }, + { + "epoch": 0.3455052449873855, + "grad_norm": 1.125127529667975, + "learning_rate": 1.9189969060336396e-05, + "loss": 0.32553282380104065, + "step": 1301 + }, + { + "epoch": 0.34577081396892845, + "grad_norm": 1.2442677252107, + "learning_rate": 1.9188237050540597e-05, + "loss": 0.39529356360435486, + "step": 1302 + }, + { + "epoch": 0.3460363829504714, + "grad_norm": 1.016155926476122, + "learning_rate": 1.9186503269361063e-05, + "loss": 0.3027458190917969, + "step": 1303 + }, + { + "epoch": 0.34630195193201435, + "grad_norm": 1.2178145504108082, + "learning_rate": 1.918476771713204e-05, + "loss": 0.39317795634269714, + "step": 1304 + }, + { + "epoch": 0.3465675209135573, + "grad_norm": 1.1358253756284789, + "learning_rate": 1.918303039418813e-05, + "loss": 0.3730325698852539, + "step": 1305 + }, + { + "epoch": 0.34683308989510025, + "grad_norm": 1.0835224567793253, + "learning_rate": 1.918129130086426e-05, + "loss": 0.34862780570983887, + "step": 1306 + }, + { + "epoch": 0.3470986588766432, + "grad_norm": 1.106131252801308, + "learning_rate": 1.9179550437495707e-05, + "loss": 0.32139018177986145, + "step": 1307 + }, + { + "epoch": 0.34736422785818616, + "grad_norm": 1.118754726003564, + "learning_rate": 1.91778078044181e-05, + "loss": 0.37246090173721313, + "step": 1308 + }, + { + "epoch": 0.3476297968397291, + "grad_norm": 1.035507147337034, + "learning_rate": 1.9176063401967386e-05, + "loss": 0.30985957384109497, + "step": 1309 + }, + { + "epoch": 0.34789536582127206, + "grad_norm": 1.1303664709170593, + "learning_rate": 1.917431723047987e-05, + "loss": 0.3713758587837219, + "step": 1310 + }, + { + "epoch": 0.348160934802815, + "grad_norm": 1.076206973404712, + "learning_rate": 1.9172569290292193e-05, + "loss": 0.3465833067893982, + "step": 1311 + }, + { + "epoch": 0.34842650378435797, + "grad_norm": 1.1789932919731194, + "learning_rate": 1.917081958174134e-05, + "loss": 0.34807220101356506, + "step": 1312 + }, + { + "epoch": 0.3486920727659009, + "grad_norm": 1.0178456651378849, + "learning_rate": 1.9169068105164627e-05, + "loss": 0.3369640111923218, + "step": 1313 + }, + { + "epoch": 0.3489576417474439, + "grad_norm": 1.1714339652663717, + "learning_rate": 1.9167314860899724e-05, + "loss": 0.3521544337272644, + "step": 1314 + }, + { + "epoch": 0.3492232107289869, + "grad_norm": 0.9756562815370131, + "learning_rate": 1.9165559849284635e-05, + "loss": 0.3256300687789917, + "step": 1315 + }, + { + "epoch": 0.34948877971052983, + "grad_norm": 1.1173269078403432, + "learning_rate": 1.9163803070657706e-05, + "loss": 0.32401931285858154, + "step": 1316 + }, + { + "epoch": 0.3497543486920728, + "grad_norm": 1.104564951170044, + "learning_rate": 1.916204452535762e-05, + "loss": 0.372749924659729, + "step": 1317 + }, + { + "epoch": 0.35001991767361573, + "grad_norm": 1.053240444697934, + "learning_rate": 1.9160284213723407e-05, + "loss": 0.35853224992752075, + "step": 1318 + }, + { + "epoch": 0.3502854866551587, + "grad_norm": 1.048325144857422, + "learning_rate": 1.9158522136094433e-05, + "loss": 0.32850801944732666, + "step": 1319 + }, + { + "epoch": 0.35055105563670164, + "grad_norm": 1.1274703494911789, + "learning_rate": 1.9156758292810404e-05, + "loss": 0.3548474907875061, + "step": 1320 + }, + { + "epoch": 0.3508166246182446, + "grad_norm": 1.10371779317482, + "learning_rate": 1.9154992684211372e-05, + "loss": 0.38709041476249695, + "step": 1321 + }, + { + "epoch": 0.35108219359978754, + "grad_norm": 1.1369910570736041, + "learning_rate": 1.9153225310637726e-05, + "loss": 0.40369266271591187, + "step": 1322 + }, + { + "epoch": 0.3513477625813305, + "grad_norm": 1.179710362637603, + "learning_rate": 1.9151456172430186e-05, + "loss": 0.3570155203342438, + "step": 1323 + }, + { + "epoch": 0.35161333156287344, + "grad_norm": 1.0315056954444073, + "learning_rate": 1.9149685269929833e-05, + "loss": 0.34426411986351013, + "step": 1324 + }, + { + "epoch": 0.3518789005444164, + "grad_norm": 1.0980268876500368, + "learning_rate": 1.9147912603478066e-05, + "loss": 0.35666006803512573, + "step": 1325 + }, + { + "epoch": 0.35214446952595935, + "grad_norm": 1.0320732816254274, + "learning_rate": 1.9146138173416643e-05, + "loss": 0.36225512623786926, + "step": 1326 + }, + { + "epoch": 0.3524100385075023, + "grad_norm": 1.0499655117353668, + "learning_rate": 1.9144361980087643e-05, + "loss": 0.3312349319458008, + "step": 1327 + }, + { + "epoch": 0.3526756074890453, + "grad_norm": 1.0828461821707789, + "learning_rate": 1.9142584023833506e-05, + "loss": 0.3590523302555084, + "step": 1328 + }, + { + "epoch": 0.35294117647058826, + "grad_norm": 1.2432343198034153, + "learning_rate": 1.9140804304996997e-05, + "loss": 0.341480016708374, + "step": 1329 + }, + { + "epoch": 0.3532067454521312, + "grad_norm": 1.0165353851066345, + "learning_rate": 1.913902282392122e-05, + "loss": 0.37246501445770264, + "step": 1330 + }, + { + "epoch": 0.35347231443367416, + "grad_norm": 1.0959834963108057, + "learning_rate": 1.913723958094963e-05, + "loss": 0.33834031224250793, + "step": 1331 + }, + { + "epoch": 0.3537378834152171, + "grad_norm": 1.0066884605687934, + "learning_rate": 1.913545457642601e-05, + "loss": 0.29285067319869995, + "step": 1332 + }, + { + "epoch": 0.35400345239676007, + "grad_norm": 1.0768479974972798, + "learning_rate": 1.913366781069449e-05, + "loss": 0.2903720736503601, + "step": 1333 + }, + { + "epoch": 0.354269021378303, + "grad_norm": 1.1311334028851072, + "learning_rate": 1.913187928409954e-05, + "loss": 0.36428314447402954, + "step": 1334 + }, + { + "epoch": 0.35453459035984597, + "grad_norm": 1.0473346547130091, + "learning_rate": 1.9130088996985967e-05, + "loss": 0.3379477560520172, + "step": 1335 + }, + { + "epoch": 0.3548001593413889, + "grad_norm": 1.0963924260325884, + "learning_rate": 1.912829694969891e-05, + "loss": 0.35286659002304077, + "step": 1336 + }, + { + "epoch": 0.3550657283229319, + "grad_norm": 1.1930831242867357, + "learning_rate": 1.9126503142583864e-05, + "loss": 0.3670174479484558, + "step": 1337 + }, + { + "epoch": 0.3553312973044748, + "grad_norm": 1.1294601866875984, + "learning_rate": 1.9124707575986642e-05, + "loss": 0.3422902226448059, + "step": 1338 + }, + { + "epoch": 0.3555968662860178, + "grad_norm": 0.9984746022499613, + "learning_rate": 1.912291025025342e-05, + "loss": 0.29778385162353516, + "step": 1339 + }, + { + "epoch": 0.35586243526756073, + "grad_norm": 1.1907673127670892, + "learning_rate": 1.91211111657307e-05, + "loss": 0.36249661445617676, + "step": 1340 + }, + { + "epoch": 0.3561280042491037, + "grad_norm": 1.1054946723600563, + "learning_rate": 1.9119310322765315e-05, + "loss": 0.340925395488739, + "step": 1341 + }, + { + "epoch": 0.3563935732306467, + "grad_norm": 1.1964466720866056, + "learning_rate": 1.9117507721704455e-05, + "loss": 0.35674089193344116, + "step": 1342 + }, + { + "epoch": 0.35665914221218964, + "grad_norm": 1.1077144979302902, + "learning_rate": 1.9115703362895636e-05, + "loss": 0.3602067828178406, + "step": 1343 + }, + { + "epoch": 0.3569247111937326, + "grad_norm": 1.1669501112510636, + "learning_rate": 1.9113897246686716e-05, + "loss": 0.35211697220802307, + "step": 1344 + }, + { + "epoch": 0.35719028017527554, + "grad_norm": 1.1098565168791754, + "learning_rate": 1.91120893734259e-05, + "loss": 0.3706115484237671, + "step": 1345 + }, + { + "epoch": 0.3574558491568185, + "grad_norm": 0.955637908965499, + "learning_rate": 1.9110279743461717e-05, + "loss": 0.3365110754966736, + "step": 1346 + }, + { + "epoch": 0.35772141813836145, + "grad_norm": 1.2071736385011052, + "learning_rate": 1.9108468357143047e-05, + "loss": 0.40012121200561523, + "step": 1347 + }, + { + "epoch": 0.3579869871199044, + "grad_norm": 1.1409634140225444, + "learning_rate": 1.91066552148191e-05, + "loss": 0.4003351926803589, + "step": 1348 + }, + { + "epoch": 0.35825255610144735, + "grad_norm": 1.0613274196364288, + "learning_rate": 1.910484031683943e-05, + "loss": 0.3574616014957428, + "step": 1349 + }, + { + "epoch": 0.3585181250829903, + "grad_norm": 1.0904662824068834, + "learning_rate": 1.910302366355393e-05, + "loss": 0.3345073461532593, + "step": 1350 + }, + { + "epoch": 0.35878369406453325, + "grad_norm": 1.0532412802136695, + "learning_rate": 1.910120525531283e-05, + "loss": 0.3467676341533661, + "step": 1351 + }, + { + "epoch": 0.3590492630460762, + "grad_norm": 1.0529131768701299, + "learning_rate": 1.9099385092466695e-05, + "loss": 0.32433655858039856, + "step": 1352 + }, + { + "epoch": 0.35931483202761916, + "grad_norm": 1.0442908892383016, + "learning_rate": 1.909756317536643e-05, + "loss": 0.3366447985172272, + "step": 1353 + }, + { + "epoch": 0.3595804010091621, + "grad_norm": 1.0770054348386777, + "learning_rate": 1.909573950436328e-05, + "loss": 0.310118168592453, + "step": 1354 + }, + { + "epoch": 0.35984596999070506, + "grad_norm": 1.4782002462322321, + "learning_rate": 1.909391407980883e-05, + "loss": 0.3503451943397522, + "step": 1355 + }, + { + "epoch": 0.36011153897224807, + "grad_norm": 1.0889726916887852, + "learning_rate": 1.9092086902054996e-05, + "loss": 0.3375343978404999, + "step": 1356 + }, + { + "epoch": 0.360377107953791, + "grad_norm": 0.9368081121032712, + "learning_rate": 1.909025797145404e-05, + "loss": 0.3056451082229614, + "step": 1357 + }, + { + "epoch": 0.360642676935334, + "grad_norm": 0.9554491579006472, + "learning_rate": 1.9088427288358556e-05, + "loss": 0.3063391447067261, + "step": 1358 + }, + { + "epoch": 0.3609082459168769, + "grad_norm": 0.9358824747825566, + "learning_rate": 1.908659485312148e-05, + "loss": 0.3055405616760254, + "step": 1359 + }, + { + "epoch": 0.3611738148984199, + "grad_norm": 1.1828231629690173, + "learning_rate": 1.908476066609608e-05, + "loss": 0.38323235511779785, + "step": 1360 + }, + { + "epoch": 0.36143938387996283, + "grad_norm": 1.0971994038941366, + "learning_rate": 1.908292472763597e-05, + "loss": 0.33526092767715454, + "step": 1361 + }, + { + "epoch": 0.3617049528615058, + "grad_norm": 1.0449346093027478, + "learning_rate": 1.9081087038095094e-05, + "loss": 0.34485238790512085, + "step": 1362 + }, + { + "epoch": 0.36197052184304873, + "grad_norm": 1.0943982229718532, + "learning_rate": 1.907924759782774e-05, + "loss": 0.2963239252567291, + "step": 1363 + }, + { + "epoch": 0.3622360908245917, + "grad_norm": 1.2033822452903298, + "learning_rate": 1.9077406407188532e-05, + "loss": 0.3536864221096039, + "step": 1364 + }, + { + "epoch": 0.36250165980613464, + "grad_norm": 1.1739216512613182, + "learning_rate": 1.907556346653242e-05, + "loss": 0.3724798858165741, + "step": 1365 + }, + { + "epoch": 0.3627672287876776, + "grad_norm": 1.2035474175290464, + "learning_rate": 1.9073718776214717e-05, + "loss": 0.36241161823272705, + "step": 1366 + }, + { + "epoch": 0.36303279776922054, + "grad_norm": 1.2262905723198394, + "learning_rate": 1.9071872336591042e-05, + "loss": 0.3484225273132324, + "step": 1367 + }, + { + "epoch": 0.3632983667507635, + "grad_norm": 1.11285184075262, + "learning_rate": 1.9070024148017375e-05, + "loss": 0.33606311678886414, + "step": 1368 + }, + { + "epoch": 0.36356393573230644, + "grad_norm": 1.076908267109863, + "learning_rate": 1.906817421085002e-05, + "loss": 0.3263503909111023, + "step": 1369 + }, + { + "epoch": 0.36382950471384945, + "grad_norm": 1.126388175466026, + "learning_rate": 1.906632252544563e-05, + "loss": 0.33454492688179016, + "step": 1370 + }, + { + "epoch": 0.3640950736953924, + "grad_norm": 1.1264022314316273, + "learning_rate": 1.9064469092161185e-05, + "loss": 0.34858438372612, + "step": 1371 + }, + { + "epoch": 0.36436064267693535, + "grad_norm": 1.0527021112264499, + "learning_rate": 1.9062613911354005e-05, + "loss": 0.3466234505176544, + "step": 1372 + }, + { + "epoch": 0.3646262116584783, + "grad_norm": 1.0325760706581486, + "learning_rate": 1.9060756983381743e-05, + "loss": 0.33574312925338745, + "step": 1373 + }, + { + "epoch": 0.36489178064002126, + "grad_norm": 1.0321788657369535, + "learning_rate": 1.90588983086024e-05, + "loss": 0.3012363016605377, + "step": 1374 + }, + { + "epoch": 0.3651573496215642, + "grad_norm": 1.0033389586223882, + "learning_rate": 1.90570378873743e-05, + "loss": 0.3050191402435303, + "step": 1375 + }, + { + "epoch": 0.36542291860310716, + "grad_norm": 1.0078763869776561, + "learning_rate": 1.905517572005611e-05, + "loss": 0.35090070962905884, + "step": 1376 + }, + { + "epoch": 0.3656884875846501, + "grad_norm": 1.011051809727729, + "learning_rate": 1.9053311807006845e-05, + "loss": 0.3276262581348419, + "step": 1377 + }, + { + "epoch": 0.36595405656619306, + "grad_norm": 1.300904148134606, + "learning_rate": 1.9051446148585833e-05, + "loss": 0.3303500711917877, + "step": 1378 + }, + { + "epoch": 0.366219625547736, + "grad_norm": 1.113413634877815, + "learning_rate": 1.9049578745152754e-05, + "loss": 0.3748486042022705, + "step": 1379 + }, + { + "epoch": 0.36648519452927897, + "grad_norm": 0.8707302355459249, + "learning_rate": 1.9047709597067628e-05, + "loss": 0.30339744687080383, + "step": 1380 + }, + { + "epoch": 0.3667507635108219, + "grad_norm": 1.0245709544347914, + "learning_rate": 1.9045838704690796e-05, + "loss": 0.31811147928237915, + "step": 1381 + }, + { + "epoch": 0.36701633249236487, + "grad_norm": 1.1759156162745943, + "learning_rate": 1.9043966068382945e-05, + "loss": 0.3541119694709778, + "step": 1382 + }, + { + "epoch": 0.3672819014739078, + "grad_norm": 1.0874467494483675, + "learning_rate": 1.9042091688505104e-05, + "loss": 0.36639657616615295, + "step": 1383 + }, + { + "epoch": 0.36754747045545083, + "grad_norm": 1.0242460437241268, + "learning_rate": 1.9040215565418628e-05, + "loss": 0.35859787464141846, + "step": 1384 + }, + { + "epoch": 0.3678130394369938, + "grad_norm": 1.017105790679022, + "learning_rate": 1.9038337699485207e-05, + "loss": 0.3210521340370178, + "step": 1385 + }, + { + "epoch": 0.36807860841853673, + "grad_norm": 1.0362268895966902, + "learning_rate": 1.9036458091066875e-05, + "loss": 0.3207433819770813, + "step": 1386 + }, + { + "epoch": 0.3683441774000797, + "grad_norm": 0.9948382455278952, + "learning_rate": 1.9034576740526e-05, + "loss": 0.3475082218647003, + "step": 1387 + }, + { + "epoch": 0.36860974638162264, + "grad_norm": 1.167057707852143, + "learning_rate": 1.903269364822528e-05, + "loss": 0.33252987265586853, + "step": 1388 + }, + { + "epoch": 0.3688753153631656, + "grad_norm": 1.0281516525035093, + "learning_rate": 1.903080881452776e-05, + "loss": 0.32200103998184204, + "step": 1389 + }, + { + "epoch": 0.36914088434470854, + "grad_norm": 1.0752934055327636, + "learning_rate": 1.9028922239796803e-05, + "loss": 0.34780022501945496, + "step": 1390 + }, + { + "epoch": 0.3694064533262515, + "grad_norm": 1.1028643639363398, + "learning_rate": 1.902703392439613e-05, + "loss": 0.35411912202835083, + "step": 1391 + }, + { + "epoch": 0.36967202230779445, + "grad_norm": 1.6627965093255739, + "learning_rate": 1.9025143868689773e-05, + "loss": 0.35232803225517273, + "step": 1392 + }, + { + "epoch": 0.3699375912893374, + "grad_norm": 1.168292115519334, + "learning_rate": 1.9023252073042128e-05, + "loss": 0.38561391830444336, + "step": 1393 + }, + { + "epoch": 0.37020316027088035, + "grad_norm": 0.9982322437598163, + "learning_rate": 1.9021358537817897e-05, + "loss": 0.3184170126914978, + "step": 1394 + }, + { + "epoch": 0.3704687292524233, + "grad_norm": 1.0557333187102689, + "learning_rate": 1.9019463263382142e-05, + "loss": 0.32455068826675415, + "step": 1395 + }, + { + "epoch": 0.37073429823396625, + "grad_norm": 1.0862364532602506, + "learning_rate": 1.901756625010024e-05, + "loss": 0.32998934388160706, + "step": 1396 + }, + { + "epoch": 0.3709998672155092, + "grad_norm": 1.1350071137219766, + "learning_rate": 1.901566749833792e-05, + "loss": 0.3361780643463135, + "step": 1397 + }, + { + "epoch": 0.37126543619705216, + "grad_norm": 1.1483051699341575, + "learning_rate": 1.9013767008461236e-05, + "loss": 0.3618711829185486, + "step": 1398 + }, + { + "epoch": 0.37153100517859516, + "grad_norm": 1.1250978483748488, + "learning_rate": 1.901186478083658e-05, + "loss": 0.3904131054878235, + "step": 1399 + }, + { + "epoch": 0.3717965741601381, + "grad_norm": 1.0885741580509858, + "learning_rate": 1.9009960815830676e-05, + "loss": 0.35742759704589844, + "step": 1400 + }, + { + "epoch": 0.37206214314168107, + "grad_norm": 1.073570835222054, + "learning_rate": 1.9008055113810595e-05, + "loss": 0.32880812883377075, + "step": 1401 + }, + { + "epoch": 0.372327712123224, + "grad_norm": 1.0645240727318732, + "learning_rate": 1.9006147675143724e-05, + "loss": 0.3379839360713959, + "step": 1402 + }, + { + "epoch": 0.37259328110476697, + "grad_norm": 1.1363528922504198, + "learning_rate": 1.90042385001978e-05, + "loss": 0.3635789453983307, + "step": 1403 + }, + { + "epoch": 0.3728588500863099, + "grad_norm": 1.1103620354136925, + "learning_rate": 1.900232758934089e-05, + "loss": 0.3462461233139038, + "step": 1404 + }, + { + "epoch": 0.3731244190678529, + "grad_norm": 1.1087128591527484, + "learning_rate": 1.900041494294139e-05, + "loss": 0.34578579664230347, + "step": 1405 + }, + { + "epoch": 0.3733899880493958, + "grad_norm": 1.1067984269435176, + "learning_rate": 1.899850056136804e-05, + "loss": 0.36266931891441345, + "step": 1406 + }, + { + "epoch": 0.3736555570309388, + "grad_norm": 1.089685836132972, + "learning_rate": 1.899658444498991e-05, + "loss": 0.34019365906715393, + "step": 1407 + }, + { + "epoch": 0.37392112601248173, + "grad_norm": 1.0009475991478056, + "learning_rate": 1.8994666594176404e-05, + "loss": 0.3057953119277954, + "step": 1408 + }, + { + "epoch": 0.3741866949940247, + "grad_norm": 1.1008245937613312, + "learning_rate": 1.8992747009297265e-05, + "loss": 0.3663131892681122, + "step": 1409 + }, + { + "epoch": 0.37445226397556763, + "grad_norm": 1.0696938984110862, + "learning_rate": 1.8990825690722557e-05, + "loss": 0.3402065634727478, + "step": 1410 + }, + { + "epoch": 0.3747178329571106, + "grad_norm": 1.017664192724319, + "learning_rate": 1.8988902638822693e-05, + "loss": 0.3437868654727936, + "step": 1411 + }, + { + "epoch": 0.37498340193865354, + "grad_norm": 1.2246388577961873, + "learning_rate": 1.8986977853968416e-05, + "loss": 0.40972524881362915, + "step": 1412 + }, + { + "epoch": 0.37524897092019655, + "grad_norm": 1.0293557658064552, + "learning_rate": 1.89850513365308e-05, + "loss": 0.3237977921962738, + "step": 1413 + }, + { + "epoch": 0.3755145399017395, + "grad_norm": 0.9581631299919097, + "learning_rate": 1.8983123086881254e-05, + "loss": 0.3146173357963562, + "step": 1414 + }, + { + "epoch": 0.37578010888328245, + "grad_norm": 0.9942979474502576, + "learning_rate": 1.8981193105391524e-05, + "loss": 0.33485543727874756, + "step": 1415 + }, + { + "epoch": 0.3760456778648254, + "grad_norm": 1.0963696340494955, + "learning_rate": 1.8979261392433685e-05, + "loss": 0.36379897594451904, + "step": 1416 + }, + { + "epoch": 0.37631124684636835, + "grad_norm": 0.902828061805848, + "learning_rate": 1.8977327948380154e-05, + "loss": 0.2737882137298584, + "step": 1417 + }, + { + "epoch": 0.3765768158279113, + "grad_norm": 1.1168765744666191, + "learning_rate": 1.897539277360367e-05, + "loss": 0.3554575443267822, + "step": 1418 + }, + { + "epoch": 0.37684238480945426, + "grad_norm": 1.0021058464909711, + "learning_rate": 1.897345586847731e-05, + "loss": 0.3297621011734009, + "step": 1419 + }, + { + "epoch": 0.3771079537909972, + "grad_norm": 1.1638469907551372, + "learning_rate": 1.8971517233374497e-05, + "loss": 0.32272985577583313, + "step": 1420 + }, + { + "epoch": 0.37737352277254016, + "grad_norm": 1.0280583772355378, + "learning_rate": 1.8969576868668967e-05, + "loss": 0.32175642251968384, + "step": 1421 + }, + { + "epoch": 0.3776390917540831, + "grad_norm": 1.1136468557030246, + "learning_rate": 1.8967634774734807e-05, + "loss": 0.35973137617111206, + "step": 1422 + }, + { + "epoch": 0.37790466073562606, + "grad_norm": 1.1892680335343753, + "learning_rate": 1.8965690951946424e-05, + "loss": 0.3385169506072998, + "step": 1423 + }, + { + "epoch": 0.378170229717169, + "grad_norm": 1.1245023779822048, + "learning_rate": 1.8963745400678564e-05, + "loss": 0.3683067560195923, + "step": 1424 + }, + { + "epoch": 0.37843579869871197, + "grad_norm": 1.1630069521478075, + "learning_rate": 1.896179812130631e-05, + "loss": 0.3711622357368469, + "step": 1425 + }, + { + "epoch": 0.3787013676802549, + "grad_norm": 1.015020556732164, + "learning_rate": 1.895984911420507e-05, + "loss": 0.30416572093963623, + "step": 1426 + }, + { + "epoch": 0.3789669366617979, + "grad_norm": 1.079958708031102, + "learning_rate": 1.8957898379750598e-05, + "loss": 0.3439522385597229, + "step": 1427 + }, + { + "epoch": 0.3792325056433409, + "grad_norm": 1.1382084488728177, + "learning_rate": 1.895594591831896e-05, + "loss": 0.3663806617259979, + "step": 1428 + }, + { + "epoch": 0.37949807462488383, + "grad_norm": 1.0501527452156108, + "learning_rate": 1.895399173028658e-05, + "loss": 0.32132354378700256, + "step": 1429 + }, + { + "epoch": 0.3797636436064268, + "grad_norm": 0.9916462964383544, + "learning_rate": 1.8952035816030196e-05, + "loss": 0.3040635585784912, + "step": 1430 + }, + { + "epoch": 0.38002921258796973, + "grad_norm": 1.1155299107557486, + "learning_rate": 1.8950078175926886e-05, + "loss": 0.3548869788646698, + "step": 1431 + }, + { + "epoch": 0.3802947815695127, + "grad_norm": 1.1280933582225339, + "learning_rate": 1.894811881035406e-05, + "loss": 0.3114319443702698, + "step": 1432 + }, + { + "epoch": 0.38056035055105564, + "grad_norm": 1.151174980739505, + "learning_rate": 1.894615771968946e-05, + "loss": 0.3589673936367035, + "step": 1433 + }, + { + "epoch": 0.3808259195325986, + "grad_norm": 1.1074661491088642, + "learning_rate": 1.894419490431116e-05, + "loss": 0.3073863983154297, + "step": 1434 + }, + { + "epoch": 0.38109148851414154, + "grad_norm": 1.0689323921068359, + "learning_rate": 1.8942230364597572e-05, + "loss": 0.32474076747894287, + "step": 1435 + }, + { + "epoch": 0.3813570574956845, + "grad_norm": 2.6127931856999314, + "learning_rate": 1.8940264100927432e-05, + "loss": 0.3363546133041382, + "step": 1436 + }, + { + "epoch": 0.38162262647722744, + "grad_norm": 0.9995665434586938, + "learning_rate": 1.8938296113679814e-05, + "loss": 0.33679312467575073, + "step": 1437 + }, + { + "epoch": 0.3818881954587704, + "grad_norm": 1.0113319573344832, + "learning_rate": 1.8936326403234125e-05, + "loss": 0.33171382546424866, + "step": 1438 + }, + { + "epoch": 0.38215376444031335, + "grad_norm": 1.0880785150495547, + "learning_rate": 1.8934354969970097e-05, + "loss": 0.3717402219772339, + "step": 1439 + }, + { + "epoch": 0.3824193334218563, + "grad_norm": 1.1102375952968466, + "learning_rate": 1.8932381814267802e-05, + "loss": 0.335337370634079, + "step": 1440 + }, + { + "epoch": 0.3826849024033993, + "grad_norm": 1.010201255539417, + "learning_rate": 1.893040693650764e-05, + "loss": 0.32745444774627686, + "step": 1441 + }, + { + "epoch": 0.38295047138494226, + "grad_norm": 1.045820108792802, + "learning_rate": 1.892843033707035e-05, + "loss": 0.34863507747650146, + "step": 1442 + }, + { + "epoch": 0.3832160403664852, + "grad_norm": 1.0344465763282014, + "learning_rate": 1.8926452016336987e-05, + "loss": 0.3428313732147217, + "step": 1443 + }, + { + "epoch": 0.38348160934802816, + "grad_norm": 0.9882681324904586, + "learning_rate": 1.8924471974688956e-05, + "loss": 0.3223801851272583, + "step": 1444 + }, + { + "epoch": 0.3837471783295711, + "grad_norm": 1.2003387152989082, + "learning_rate": 1.8922490212507983e-05, + "loss": 0.33248746395111084, + "step": 1445 + }, + { + "epoch": 0.38401274731111407, + "grad_norm": 1.0404747226700646, + "learning_rate": 1.8920506730176125e-05, + "loss": 0.3472076654434204, + "step": 1446 + }, + { + "epoch": 0.384278316292657, + "grad_norm": 1.229166058737197, + "learning_rate": 1.891852152807578e-05, + "loss": 0.4385136365890503, + "step": 1447 + }, + { + "epoch": 0.38454388527419997, + "grad_norm": 1.0444838405880497, + "learning_rate": 1.8916534606589666e-05, + "loss": 0.36871540546417236, + "step": 1448 + }, + { + "epoch": 0.3848094542557429, + "grad_norm": 1.0803859921763799, + "learning_rate": 1.8914545966100843e-05, + "loss": 0.3136710524559021, + "step": 1449 + }, + { + "epoch": 0.3850750232372859, + "grad_norm": 1.0902031451870209, + "learning_rate": 1.891255560699269e-05, + "loss": 0.3236457109451294, + "step": 1450 + }, + { + "epoch": 0.3853405922188288, + "grad_norm": 0.9936714818929803, + "learning_rate": 1.8910563529648933e-05, + "loss": 0.3176822066307068, + "step": 1451 + }, + { + "epoch": 0.3856061612003718, + "grad_norm": 1.0635659473367998, + "learning_rate": 1.890856973445362e-05, + "loss": 0.3531719744205475, + "step": 1452 + }, + { + "epoch": 0.38587173018191473, + "grad_norm": 0.9470574553293423, + "learning_rate": 1.8906574221791127e-05, + "loss": 0.2911416292190552, + "step": 1453 + }, + { + "epoch": 0.3861372991634577, + "grad_norm": 1.0992858203425024, + "learning_rate": 1.890457699204617e-05, + "loss": 0.3522392511367798, + "step": 1454 + }, + { + "epoch": 0.3864028681450007, + "grad_norm": 1.1706910837372075, + "learning_rate": 1.8902578045603787e-05, + "loss": 0.3724471628665924, + "step": 1455 + }, + { + "epoch": 0.38666843712654364, + "grad_norm": 1.1807687078274312, + "learning_rate": 1.890057738284935e-05, + "loss": 0.2935449481010437, + "step": 1456 + }, + { + "epoch": 0.3869340061080866, + "grad_norm": 1.1181603604376231, + "learning_rate": 1.8898575004168568e-05, + "loss": 0.3413137197494507, + "step": 1457 + }, + { + "epoch": 0.38719957508962954, + "grad_norm": 1.1002740783107277, + "learning_rate": 1.8896570909947477e-05, + "loss": 0.32282277941703796, + "step": 1458 + }, + { + "epoch": 0.3874651440711725, + "grad_norm": 1.0071931608273124, + "learning_rate": 1.8894565100572435e-05, + "loss": 0.3285476565361023, + "step": 1459 + }, + { + "epoch": 0.38773071305271545, + "grad_norm": 1.010871057653593, + "learning_rate": 1.8892557576430147e-05, + "loss": 0.29517480731010437, + "step": 1460 + }, + { + "epoch": 0.3879962820342584, + "grad_norm": 0.9710184588467288, + "learning_rate": 1.8890548337907636e-05, + "loss": 0.2913149297237396, + "step": 1461 + }, + { + "epoch": 0.38826185101580135, + "grad_norm": 1.096024980027641, + "learning_rate": 1.8888537385392258e-05, + "loss": 0.32154160737991333, + "step": 1462 + }, + { + "epoch": 0.3885274199973443, + "grad_norm": 1.157775550745099, + "learning_rate": 1.88865247192717e-05, + "loss": 0.30677905678749084, + "step": 1463 + }, + { + "epoch": 0.38879298897888726, + "grad_norm": 1.1509749466488566, + "learning_rate": 1.888451033993399e-05, + "loss": 0.37568169832229614, + "step": 1464 + }, + { + "epoch": 0.3890585579604302, + "grad_norm": 1.0554287268781006, + "learning_rate": 1.8882494247767465e-05, + "loss": 0.34972083568573, + "step": 1465 + }, + { + "epoch": 0.38932412694197316, + "grad_norm": 1.1253148629548142, + "learning_rate": 1.888047644316081e-05, + "loss": 0.3198736906051636, + "step": 1466 + }, + { + "epoch": 0.3895896959235161, + "grad_norm": 1.0268445477998984, + "learning_rate": 1.887845692650303e-05, + "loss": 0.3405846953392029, + "step": 1467 + }, + { + "epoch": 0.38985526490505906, + "grad_norm": 1.1800981831391237, + "learning_rate": 1.8876435698183465e-05, + "loss": 0.3600257337093353, + "step": 1468 + }, + { + "epoch": 0.39012083388660207, + "grad_norm": 1.042232512137109, + "learning_rate": 1.887441275859179e-05, + "loss": 0.32415103912353516, + "step": 1469 + }, + { + "epoch": 0.390386402868145, + "grad_norm": 1.1736259107415346, + "learning_rate": 1.8872388108117995e-05, + "loss": 0.3450891673564911, + "step": 1470 + }, + { + "epoch": 0.390651971849688, + "grad_norm": 1.0534871304087963, + "learning_rate": 1.8870361747152416e-05, + "loss": 0.3210057318210602, + "step": 1471 + }, + { + "epoch": 0.3909175408312309, + "grad_norm": 1.1749127166764717, + "learning_rate": 1.8868333676085707e-05, + "loss": 0.3615706264972687, + "step": 1472 + }, + { + "epoch": 0.3911831098127739, + "grad_norm": 1.0750237065987462, + "learning_rate": 1.8866303895308856e-05, + "loss": 0.34149813652038574, + "step": 1473 + }, + { + "epoch": 0.39144867879431683, + "grad_norm": 0.91786674858188, + "learning_rate": 1.8864272405213188e-05, + "loss": 0.2795295715332031, + "step": 1474 + }, + { + "epoch": 0.3917142477758598, + "grad_norm": 1.1110559595870293, + "learning_rate": 1.8862239206190337e-05, + "loss": 0.3459053933620453, + "step": 1475 + }, + { + "epoch": 0.39197981675740273, + "grad_norm": 1.1048084354602663, + "learning_rate": 1.8860204298632294e-05, + "loss": 0.3531072735786438, + "step": 1476 + }, + { + "epoch": 0.3922453857389457, + "grad_norm": 1.128095083544478, + "learning_rate": 1.8858167682931357e-05, + "loss": 0.3788977265357971, + "step": 1477 + }, + { + "epoch": 0.39251095472048864, + "grad_norm": 1.3263027090109385, + "learning_rate": 1.8856129359480163e-05, + "loss": 0.3210671544075012, + "step": 1478 + }, + { + "epoch": 0.3927765237020316, + "grad_norm": 1.0773816671223826, + "learning_rate": 1.8854089328671673e-05, + "loss": 0.3442102074623108, + "step": 1479 + }, + { + "epoch": 0.39304209268357454, + "grad_norm": 1.0501956367137624, + "learning_rate": 1.885204759089919e-05, + "loss": 0.29128211736679077, + "step": 1480 + }, + { + "epoch": 0.3933076616651175, + "grad_norm": 1.1403330671915806, + "learning_rate": 1.885000414655633e-05, + "loss": 0.3601154088973999, + "step": 1481 + }, + { + "epoch": 0.39357323064666044, + "grad_norm": 1.032058056545269, + "learning_rate": 1.8847958996037042e-05, + "loss": 0.3173052668571472, + "step": 1482 + }, + { + "epoch": 0.39383879962820345, + "grad_norm": 1.0840123249628424, + "learning_rate": 1.8845912139735616e-05, + "loss": 0.32759106159210205, + "step": 1483 + }, + { + "epoch": 0.3941043686097464, + "grad_norm": 1.0868479290241493, + "learning_rate": 1.8843863578046657e-05, + "loss": 0.3213586211204529, + "step": 1484 + }, + { + "epoch": 0.39436993759128935, + "grad_norm": 1.0263834848721582, + "learning_rate": 1.8841813311365105e-05, + "loss": 0.342970073223114, + "step": 1485 + }, + { + "epoch": 0.3946355065728323, + "grad_norm": 1.1467746465148738, + "learning_rate": 1.883976134008622e-05, + "loss": 0.3852401375770569, + "step": 1486 + }, + { + "epoch": 0.39490107555437526, + "grad_norm": 1.0974253808771965, + "learning_rate": 1.883770766460561e-05, + "loss": 0.2965390682220459, + "step": 1487 + }, + { + "epoch": 0.3951666445359182, + "grad_norm": 1.1655078685340161, + "learning_rate": 1.883565228531919e-05, + "loss": 0.3899655044078827, + "step": 1488 + }, + { + "epoch": 0.39543221351746116, + "grad_norm": 1.1086105484757183, + "learning_rate": 1.8833595202623222e-05, + "loss": 0.339199423789978, + "step": 1489 + }, + { + "epoch": 0.3956977824990041, + "grad_norm": 1.049526058190211, + "learning_rate": 1.8831536416914278e-05, + "loss": 0.3121682405471802, + "step": 1490 + }, + { + "epoch": 0.39596335148054707, + "grad_norm": 1.073417591294797, + "learning_rate": 1.8829475928589272e-05, + "loss": 0.31947991251945496, + "step": 1491 + }, + { + "epoch": 0.39622892046209, + "grad_norm": 1.1660176936819076, + "learning_rate": 1.882741373804544e-05, + "loss": 0.3569333553314209, + "step": 1492 + }, + { + "epoch": 0.39649448944363297, + "grad_norm": 1.1521030930761056, + "learning_rate": 1.882534984568035e-05, + "loss": 0.3739020526409149, + "step": 1493 + }, + { + "epoch": 0.3967600584251759, + "grad_norm": 1.0930221251915908, + "learning_rate": 1.882328425189189e-05, + "loss": 0.34350353479385376, + "step": 1494 + }, + { + "epoch": 0.3970256274067189, + "grad_norm": 1.0780622136577362, + "learning_rate": 1.882121695707829e-05, + "loss": 0.3103981614112854, + "step": 1495 + }, + { + "epoch": 0.3972911963882618, + "grad_norm": 1.066229649085828, + "learning_rate": 1.8819147961638104e-05, + "loss": 0.33847716450691223, + "step": 1496 + }, + { + "epoch": 0.39755676536980483, + "grad_norm": 0.943119049120047, + "learning_rate": 1.8817077265970196e-05, + "loss": 0.3080996870994568, + "step": 1497 + }, + { + "epoch": 0.3978223343513478, + "grad_norm": 0.9758181744675688, + "learning_rate": 1.8815004870473777e-05, + "loss": 0.3247831463813782, + "step": 1498 + }, + { + "epoch": 0.39808790333289074, + "grad_norm": 0.9965389459031595, + "learning_rate": 1.8812930775548387e-05, + "loss": 0.2919698655605316, + "step": 1499 + }, + { + "epoch": 0.3983534723144337, + "grad_norm": 1.1815639690812958, + "learning_rate": 1.8810854981593883e-05, + "loss": 0.3627319931983948, + "step": 1500 + }, + { + "epoch": 0.39861904129597664, + "grad_norm": 1.0245222516327634, + "learning_rate": 1.880877748901045e-05, + "loss": 0.3619319796562195, + "step": 1501 + }, + { + "epoch": 0.3988846102775196, + "grad_norm": 1.0294076265521692, + "learning_rate": 1.8806698298198608e-05, + "loss": 0.3393789827823639, + "step": 1502 + }, + { + "epoch": 0.39915017925906254, + "grad_norm": 1.1375999694611314, + "learning_rate": 1.88046174095592e-05, + "loss": 0.3736116886138916, + "step": 1503 + }, + { + "epoch": 0.3994157482406055, + "grad_norm": 0.9615847393601772, + "learning_rate": 1.8802534823493395e-05, + "loss": 0.32829388976097107, + "step": 1504 + }, + { + "epoch": 0.39968131722214845, + "grad_norm": 1.004520084683698, + "learning_rate": 1.8800450540402694e-05, + "loss": 0.340041846036911, + "step": 1505 + }, + { + "epoch": 0.3999468862036914, + "grad_norm": 1.6423190284198783, + "learning_rate": 1.8798364560688917e-05, + "loss": 0.2830736041069031, + "step": 1506 + }, + { + "epoch": 0.40021245518523435, + "grad_norm": 1.126838308447994, + "learning_rate": 1.8796276884754224e-05, + "loss": 0.33011579513549805, + "step": 1507 + }, + { + "epoch": 0.4004780241667773, + "grad_norm": 1.0024833819275993, + "learning_rate": 1.8794187513001088e-05, + "loss": 0.2893834114074707, + "step": 1508 + }, + { + "epoch": 0.40074359314832025, + "grad_norm": 1.0682148927963429, + "learning_rate": 1.8792096445832317e-05, + "loss": 0.3590015172958374, + "step": 1509 + }, + { + "epoch": 0.4010091621298632, + "grad_norm": 1.1883404603513603, + "learning_rate": 1.8790003683651045e-05, + "loss": 0.3968508541584015, + "step": 1510 + }, + { + "epoch": 0.4012747311114062, + "grad_norm": 1.1506641785596874, + "learning_rate": 1.878790922686073e-05, + "loss": 0.324398934841156, + "step": 1511 + }, + { + "epoch": 0.40154030009294917, + "grad_norm": 1.0455658872732225, + "learning_rate": 1.8785813075865164e-05, + "loss": 0.35111895203590393, + "step": 1512 + }, + { + "epoch": 0.4018058690744921, + "grad_norm": 1.055231257150353, + "learning_rate": 1.8783715231068452e-05, + "loss": 0.28124356269836426, + "step": 1513 + }, + { + "epoch": 0.40207143805603507, + "grad_norm": 1.0070468428923411, + "learning_rate": 1.878161569287504e-05, + "loss": 0.28962311148643494, + "step": 1514 + }, + { + "epoch": 0.402337007037578, + "grad_norm": 1.0934983041480315, + "learning_rate": 1.877951446168969e-05, + "loss": 0.3646606206893921, + "step": 1515 + }, + { + "epoch": 0.402602576019121, + "grad_norm": 1.1065863254454682, + "learning_rate": 1.8777411537917497e-05, + "loss": 0.2815355360507965, + "step": 1516 + }, + { + "epoch": 0.4028681450006639, + "grad_norm": 1.1372178900816394, + "learning_rate": 1.877530692196388e-05, + "loss": 0.33208370208740234, + "step": 1517 + }, + { + "epoch": 0.4031337139822069, + "grad_norm": 1.0968319662456871, + "learning_rate": 1.8773200614234587e-05, + "loss": 0.33741289377212524, + "step": 1518 + }, + { + "epoch": 0.40339928296374983, + "grad_norm": 1.1178822197952292, + "learning_rate": 1.877109261513568e-05, + "loss": 0.31304073333740234, + "step": 1519 + }, + { + "epoch": 0.4036648519452928, + "grad_norm": 1.264796618244999, + "learning_rate": 1.8768982925073566e-05, + "loss": 0.32556387782096863, + "step": 1520 + }, + { + "epoch": 0.40393042092683573, + "grad_norm": 1.1057344226732335, + "learning_rate": 1.8766871544454963e-05, + "loss": 0.3584224581718445, + "step": 1521 + }, + { + "epoch": 0.4041959899083787, + "grad_norm": 1.0109621512685618, + "learning_rate": 1.8764758473686918e-05, + "loss": 0.2864416837692261, + "step": 1522 + }, + { + "epoch": 0.40446155888992164, + "grad_norm": 1.0390539229722413, + "learning_rate": 1.8762643713176815e-05, + "loss": 0.28925320506095886, + "step": 1523 + }, + { + "epoch": 0.4047271278714646, + "grad_norm": 1.022628245189221, + "learning_rate": 1.876052726333235e-05, + "loss": 0.30940550565719604, + "step": 1524 + }, + { + "epoch": 0.4049926968530076, + "grad_norm": 1.1648500528958037, + "learning_rate": 1.875840912456155e-05, + "loss": 0.3463154733181, + "step": 1525 + }, + { + "epoch": 0.40525826583455055, + "grad_norm": 1.1823420506345301, + "learning_rate": 1.8756289297272764e-05, + "loss": 0.3349658250808716, + "step": 1526 + }, + { + "epoch": 0.4055238348160935, + "grad_norm": 1.0511817500052025, + "learning_rate": 1.8754167781874674e-05, + "loss": 0.32588714361190796, + "step": 1527 + }, + { + "epoch": 0.40578940379763645, + "grad_norm": 1.0750045197041278, + "learning_rate": 1.875204457877628e-05, + "loss": 0.33787310123443604, + "step": 1528 + }, + { + "epoch": 0.4060549727791794, + "grad_norm": 1.0444881434472735, + "learning_rate": 1.8749919688386912e-05, + "loss": 0.3223261833190918, + "step": 1529 + }, + { + "epoch": 0.40632054176072235, + "grad_norm": 1.2251483540500576, + "learning_rate": 1.8747793111116226e-05, + "loss": 0.38505882024765015, + "step": 1530 + }, + { + "epoch": 0.4065861107422653, + "grad_norm": 1.077913563059366, + "learning_rate": 1.8745664847374197e-05, + "loss": 0.33071833848953247, + "step": 1531 + }, + { + "epoch": 0.40685167972380826, + "grad_norm": 1.2405893427169952, + "learning_rate": 1.874353489757113e-05, + "loss": 0.36603987216949463, + "step": 1532 + }, + { + "epoch": 0.4071172487053512, + "grad_norm": 0.9982674001932202, + "learning_rate": 1.874140326211766e-05, + "loss": 0.3103085160255432, + "step": 1533 + }, + { + "epoch": 0.40738281768689416, + "grad_norm": 1.1470515997968143, + "learning_rate": 1.873926994142473e-05, + "loss": 0.3471127152442932, + "step": 1534 + }, + { + "epoch": 0.4076483866684371, + "grad_norm": 1.0759117431352352, + "learning_rate": 1.873713493590363e-05, + "loss": 0.33152899146080017, + "step": 1535 + }, + { + "epoch": 0.40791395564998006, + "grad_norm": 1.0887192073538825, + "learning_rate": 1.8734998245965958e-05, + "loss": 0.340177059173584, + "step": 1536 + }, + { + "epoch": 0.408179524631523, + "grad_norm": 1.175803638176176, + "learning_rate": 1.8732859872023644e-05, + "loss": 0.3331618010997772, + "step": 1537 + }, + { + "epoch": 0.40844509361306597, + "grad_norm": 1.0971311272588662, + "learning_rate": 1.8730719814488937e-05, + "loss": 0.3911997675895691, + "step": 1538 + }, + { + "epoch": 0.408710662594609, + "grad_norm": 1.0986179012488992, + "learning_rate": 1.8728578073774427e-05, + "loss": 0.3699817955493927, + "step": 1539 + }, + { + "epoch": 0.4089762315761519, + "grad_norm": 1.086312859301249, + "learning_rate": 1.8726434650293e-05, + "loss": 0.31567275524139404, + "step": 1540 + }, + { + "epoch": 0.4092418005576949, + "grad_norm": 1.1099279461258769, + "learning_rate": 1.8724289544457897e-05, + "loss": 0.3387305438518524, + "step": 1541 + }, + { + "epoch": 0.40950736953923783, + "grad_norm": 1.6366665349052443, + "learning_rate": 1.8722142756682663e-05, + "loss": 0.3460234999656677, + "step": 1542 + }, + { + "epoch": 0.4097729385207808, + "grad_norm": 1.1109783591024025, + "learning_rate": 1.8719994287381173e-05, + "loss": 0.35653382539749146, + "step": 1543 + }, + { + "epoch": 0.41003850750232373, + "grad_norm": 1.1054235252004945, + "learning_rate": 1.8717844136967626e-05, + "loss": 0.3828277885913849, + "step": 1544 + }, + { + "epoch": 0.4103040764838667, + "grad_norm": 1.0929819002464054, + "learning_rate": 1.871569230585655e-05, + "loss": 0.35883858799934387, + "step": 1545 + }, + { + "epoch": 0.41056964546540964, + "grad_norm": 0.988264800308937, + "learning_rate": 1.8713538794462783e-05, + "loss": 0.27414464950561523, + "step": 1546 + }, + { + "epoch": 0.4108352144469526, + "grad_norm": 1.0216234157414708, + "learning_rate": 1.871138360320151e-05, + "loss": 0.2924337387084961, + "step": 1547 + }, + { + "epoch": 0.41110078342849554, + "grad_norm": 1.1264719097344291, + "learning_rate": 1.8709226732488216e-05, + "loss": 0.34270918369293213, + "step": 1548 + }, + { + "epoch": 0.4113663524100385, + "grad_norm": 1.056133674601812, + "learning_rate": 1.870706818273872e-05, + "loss": 0.33866482973098755, + "step": 1549 + }, + { + "epoch": 0.41163192139158145, + "grad_norm": 1.0578429496037574, + "learning_rate": 1.8704907954369176e-05, + "loss": 0.3350633382797241, + "step": 1550 + }, + { + "epoch": 0.4118974903731244, + "grad_norm": 1.0981882806330738, + "learning_rate": 1.870274604779604e-05, + "loss": 0.32763785123825073, + "step": 1551 + }, + { + "epoch": 0.41216305935466735, + "grad_norm": 1.1235534336905566, + "learning_rate": 1.8700582463436102e-05, + "loss": 0.3130378723144531, + "step": 1552 + }, + { + "epoch": 0.41242862833621036, + "grad_norm": 1.1311593123986747, + "learning_rate": 1.8698417201706484e-05, + "loss": 0.34318777918815613, + "step": 1553 + }, + { + "epoch": 0.4126941973177533, + "grad_norm": 1.038517953287962, + "learning_rate": 1.8696250263024617e-05, + "loss": 0.3250104784965515, + "step": 1554 + }, + { + "epoch": 0.41295976629929626, + "grad_norm": 1.1047081419569766, + "learning_rate": 1.869408164780826e-05, + "loss": 0.3409217298030853, + "step": 1555 + }, + { + "epoch": 0.4132253352808392, + "grad_norm": 0.9892429720688775, + "learning_rate": 1.86919113564755e-05, + "loss": 0.2885017395019531, + "step": 1556 + }, + { + "epoch": 0.41349090426238216, + "grad_norm": 0.9861078966083267, + "learning_rate": 1.8689739389444744e-05, + "loss": 0.31912562251091003, + "step": 1557 + }, + { + "epoch": 0.4137564732439251, + "grad_norm": 1.0037060940033242, + "learning_rate": 1.8687565747134716e-05, + "loss": 0.29874011874198914, + "step": 1558 + }, + { + "epoch": 0.41402204222546807, + "grad_norm": 1.0308167425812278, + "learning_rate": 1.8685390429964473e-05, + "loss": 0.3132701516151428, + "step": 1559 + }, + { + "epoch": 0.414287611207011, + "grad_norm": 1.0029824533275895, + "learning_rate": 1.868321343835339e-05, + "loss": 0.31158843636512756, + "step": 1560 + }, + { + "epoch": 0.41455318018855397, + "grad_norm": 0.959841401113078, + "learning_rate": 1.8681034772721167e-05, + "loss": 0.30490344762802124, + "step": 1561 + }, + { + "epoch": 0.4148187491700969, + "grad_norm": 1.1053356359227535, + "learning_rate": 1.867885443348782e-05, + "loss": 0.3150998055934906, + "step": 1562 + }, + { + "epoch": 0.4150843181516399, + "grad_norm": 1.0578010897773087, + "learning_rate": 1.86766724210737e-05, + "loss": 0.3391645550727844, + "step": 1563 + }, + { + "epoch": 0.4153498871331828, + "grad_norm": 1.1317933031731224, + "learning_rate": 1.8674488735899466e-05, + "loss": 0.35013002157211304, + "step": 1564 + }, + { + "epoch": 0.4156154561147258, + "grad_norm": 1.1514144052665038, + "learning_rate": 1.867230337838611e-05, + "loss": 0.3455789387226105, + "step": 1565 + }, + { + "epoch": 0.41588102509626873, + "grad_norm": 1.0985743755307058, + "learning_rate": 1.8670116348954945e-05, + "loss": 0.3179319500923157, + "step": 1566 + }, + { + "epoch": 0.41614659407781174, + "grad_norm": 1.046997092909125, + "learning_rate": 1.8667927648027596e-05, + "loss": 0.3628920018672943, + "step": 1567 + }, + { + "epoch": 0.4164121630593547, + "grad_norm": 1.1175553372657145, + "learning_rate": 1.8665737276026033e-05, + "loss": 0.33599400520324707, + "step": 1568 + }, + { + "epoch": 0.41667773204089764, + "grad_norm": 1.0741100001694928, + "learning_rate": 1.8663545233372524e-05, + "loss": 0.31519144773483276, + "step": 1569 + }, + { + "epoch": 0.4169433010224406, + "grad_norm": 1.0564388001425704, + "learning_rate": 1.8661351520489667e-05, + "loss": 0.3326237201690674, + "step": 1570 + }, + { + "epoch": 0.41720887000398355, + "grad_norm": 1.0506499046982631, + "learning_rate": 1.865915613780039e-05, + "loss": 0.35254499316215515, + "step": 1571 + }, + { + "epoch": 0.4174744389855265, + "grad_norm": 1.134962500533026, + "learning_rate": 1.8656959085727936e-05, + "loss": 0.36689436435699463, + "step": 1572 + }, + { + "epoch": 0.41774000796706945, + "grad_norm": 1.104702895545828, + "learning_rate": 1.8654760364695873e-05, + "loss": 0.3113600015640259, + "step": 1573 + }, + { + "epoch": 0.4180055769486124, + "grad_norm": 1.0072243279377031, + "learning_rate": 1.865255997512808e-05, + "loss": 0.3336432874202728, + "step": 1574 + }, + { + "epoch": 0.41827114593015535, + "grad_norm": 1.1762721663897004, + "learning_rate": 1.8650357917448774e-05, + "loss": 0.3657492995262146, + "step": 1575 + }, + { + "epoch": 0.4185367149116983, + "grad_norm": 1.1286123264778107, + "learning_rate": 1.864815419208248e-05, + "loss": 0.3087846338748932, + "step": 1576 + }, + { + "epoch": 0.41880228389324126, + "grad_norm": 1.059893684126419, + "learning_rate": 1.8645948799454058e-05, + "loss": 0.31422343850135803, + "step": 1577 + }, + { + "epoch": 0.4190678528747842, + "grad_norm": 1.0232345658393134, + "learning_rate": 1.8643741739988672e-05, + "loss": 0.3172760009765625, + "step": 1578 + }, + { + "epoch": 0.41933342185632716, + "grad_norm": 1.131569038679809, + "learning_rate": 1.8641533014111824e-05, + "loss": 0.36819136142730713, + "step": 1579 + }, + { + "epoch": 0.4195989908378701, + "grad_norm": 1.0215370560204735, + "learning_rate": 1.863932262224933e-05, + "loss": 0.29081088304519653, + "step": 1580 + }, + { + "epoch": 0.4198645598194131, + "grad_norm": 1.0406040134422527, + "learning_rate": 1.8637110564827325e-05, + "loss": 0.3209632635116577, + "step": 1581 + }, + { + "epoch": 0.42013012880095607, + "grad_norm": 1.9161132832998955, + "learning_rate": 1.863489684227227e-05, + "loss": 0.3357914686203003, + "step": 1582 + }, + { + "epoch": 0.420395697782499, + "grad_norm": 1.0469990353974015, + "learning_rate": 1.8632681455010937e-05, + "loss": 0.285677969455719, + "step": 1583 + }, + { + "epoch": 0.420661266764042, + "grad_norm": 1.1491447855439996, + "learning_rate": 1.8630464403470435e-05, + "loss": 0.377876341342926, + "step": 1584 + }, + { + "epoch": 0.4209268357455849, + "grad_norm": 1.0642007656116979, + "learning_rate": 1.8628245688078187e-05, + "loss": 0.3141768276691437, + "step": 1585 + }, + { + "epoch": 0.4211924047271279, + "grad_norm": 1.078787810404599, + "learning_rate": 1.8626025309261927e-05, + "loss": 0.34249693155288696, + "step": 1586 + }, + { + "epoch": 0.42145797370867083, + "grad_norm": 1.1583509747022063, + "learning_rate": 1.8623803267449722e-05, + "loss": 0.32564717531204224, + "step": 1587 + }, + { + "epoch": 0.4217235426902138, + "grad_norm": 1.0623179841052965, + "learning_rate": 1.8621579563069957e-05, + "loss": 0.3425004184246063, + "step": 1588 + }, + { + "epoch": 0.42198911167175673, + "grad_norm": 1.05392590229203, + "learning_rate": 1.8619354196551333e-05, + "loss": 0.3676222562789917, + "step": 1589 + }, + { + "epoch": 0.4222546806532997, + "grad_norm": 0.9612536546184688, + "learning_rate": 1.8617127168322877e-05, + "loss": 0.28915971517562866, + "step": 1590 + }, + { + "epoch": 0.42252024963484264, + "grad_norm": 1.1293248025877465, + "learning_rate": 1.8614898478813933e-05, + "loss": 0.3387221097946167, + "step": 1591 + }, + { + "epoch": 0.4227858186163856, + "grad_norm": 1.0804518757125117, + "learning_rate": 1.8612668128454164e-05, + "loss": 0.33886784315109253, + "step": 1592 + }, + { + "epoch": 0.42305138759792854, + "grad_norm": 1.0780507904890781, + "learning_rate": 1.8610436117673557e-05, + "loss": 0.3364121913909912, + "step": 1593 + }, + { + "epoch": 0.4233169565794715, + "grad_norm": 1.0590527240631433, + "learning_rate": 1.8608202446902418e-05, + "loss": 0.3661370873451233, + "step": 1594 + }, + { + "epoch": 0.4235825255610145, + "grad_norm": 1.254416564930449, + "learning_rate": 1.8605967116571372e-05, + "loss": 0.2980557680130005, + "step": 1595 + }, + { + "epoch": 0.42384809454255745, + "grad_norm": 1.180518248335952, + "learning_rate": 1.8603730127111363e-05, + "loss": 0.36112043261528015, + "step": 1596 + }, + { + "epoch": 0.4241136635241004, + "grad_norm": 0.9967676484164163, + "learning_rate": 1.860149147895366e-05, + "loss": 0.30641958117485046, + "step": 1597 + }, + { + "epoch": 0.42437923250564336, + "grad_norm": 1.06006138769355, + "learning_rate": 1.8599251172529836e-05, + "loss": 0.3312561511993408, + "step": 1598 + }, + { + "epoch": 0.4246448014871863, + "grad_norm": 1.070580032885208, + "learning_rate": 1.859700920827181e-05, + "loss": 0.3757131099700928, + "step": 1599 + }, + { + "epoch": 0.42491037046872926, + "grad_norm": 1.0514692584176801, + "learning_rate": 1.8594765586611805e-05, + "loss": 0.3225080370903015, + "step": 1600 + }, + { + "epoch": 0.4251759394502722, + "grad_norm": 1.0857454483782787, + "learning_rate": 1.859252030798236e-05, + "loss": 0.35943928360939026, + "step": 1601 + }, + { + "epoch": 0.42544150843181516, + "grad_norm": 0.9907794348406631, + "learning_rate": 1.859027337281633e-05, + "loss": 0.29319390654563904, + "step": 1602 + }, + { + "epoch": 0.4257070774133581, + "grad_norm": 1.1441852776057728, + "learning_rate": 1.8588024781546914e-05, + "loss": 0.32320237159729004, + "step": 1603 + }, + { + "epoch": 0.42597264639490107, + "grad_norm": 1.1070076098385897, + "learning_rate": 1.8585774534607606e-05, + "loss": 0.3381520211696625, + "step": 1604 + }, + { + "epoch": 0.426238215376444, + "grad_norm": 0.9826840529093485, + "learning_rate": 1.858352263243223e-05, + "loss": 0.30010825395584106, + "step": 1605 + }, + { + "epoch": 0.42650378435798697, + "grad_norm": 0.9805553200940528, + "learning_rate": 1.8581269075454918e-05, + "loss": 0.26282748579978943, + "step": 1606 + }, + { + "epoch": 0.4267693533395299, + "grad_norm": 1.0395702570014627, + "learning_rate": 1.857901386411014e-05, + "loss": 0.33613401651382446, + "step": 1607 + }, + { + "epoch": 0.4270349223210729, + "grad_norm": 1.1625768546626036, + "learning_rate": 1.8576756998832667e-05, + "loss": 0.34522315859794617, + "step": 1608 + }, + { + "epoch": 0.4273004913026159, + "grad_norm": 1.0776480516530333, + "learning_rate": 1.8574498480057598e-05, + "loss": 0.3253153860569, + "step": 1609 + }, + { + "epoch": 0.42756606028415883, + "grad_norm": 1.177683979502923, + "learning_rate": 1.8572238308220347e-05, + "loss": 0.32180655002593994, + "step": 1610 + }, + { + "epoch": 0.4278316292657018, + "grad_norm": 1.2444289754345055, + "learning_rate": 1.856997648375665e-05, + "loss": 0.3274008333683014, + "step": 1611 + }, + { + "epoch": 0.42809719824724474, + "grad_norm": 1.006782047196068, + "learning_rate": 1.8567713007102565e-05, + "loss": 0.3196510374546051, + "step": 1612 + }, + { + "epoch": 0.4283627672287877, + "grad_norm": 1.0069133029708661, + "learning_rate": 1.8565447878694455e-05, + "loss": 0.2759617567062378, + "step": 1613 + }, + { + "epoch": 0.42862833621033064, + "grad_norm": 1.1572573238869637, + "learning_rate": 1.8563181098969017e-05, + "loss": 0.35069289803504944, + "step": 1614 + }, + { + "epoch": 0.4288939051918736, + "grad_norm": 1.1400434606874466, + "learning_rate": 1.8560912668363253e-05, + "loss": 0.3388484716415405, + "step": 1615 + }, + { + "epoch": 0.42915947417341654, + "grad_norm": 1.0338736294243014, + "learning_rate": 1.8558642587314496e-05, + "loss": 0.34116029739379883, + "step": 1616 + }, + { + "epoch": 0.4294250431549595, + "grad_norm": 1.0487376701262667, + "learning_rate": 1.8556370856260387e-05, + "loss": 0.30212706327438354, + "step": 1617 + }, + { + "epoch": 0.42969061213650245, + "grad_norm": 1.0633174136084793, + "learning_rate": 1.855409747563889e-05, + "loss": 0.32250338792800903, + "step": 1618 + }, + { + "epoch": 0.4299561811180454, + "grad_norm": 1.132237618998821, + "learning_rate": 1.8551822445888285e-05, + "loss": 0.35972943902015686, + "step": 1619 + }, + { + "epoch": 0.43022175009958835, + "grad_norm": 0.9921112897877987, + "learning_rate": 1.8549545767447174e-05, + "loss": 0.3112533390522003, + "step": 1620 + }, + { + "epoch": 0.4304873190811313, + "grad_norm": 1.0331176116114555, + "learning_rate": 1.854726744075447e-05, + "loss": 0.3044458031654358, + "step": 1621 + }, + { + "epoch": 0.43075288806267426, + "grad_norm": 1.0421498129424722, + "learning_rate": 1.8544987466249412e-05, + "loss": 0.3261772096157074, + "step": 1622 + }, + { + "epoch": 0.43101845704421726, + "grad_norm": 1.3249821498842442, + "learning_rate": 1.8542705844371544e-05, + "loss": 0.3485907018184662, + "step": 1623 + }, + { + "epoch": 0.4312840260257602, + "grad_norm": 2.6643478315387576, + "learning_rate": 1.8540422575560747e-05, + "loss": 0.3016113340854645, + "step": 1624 + }, + { + "epoch": 0.43154959500730317, + "grad_norm": 1.021133157663628, + "learning_rate": 1.8538137660257198e-05, + "loss": 0.35383081436157227, + "step": 1625 + }, + { + "epoch": 0.4318151639888461, + "grad_norm": 1.170997891522692, + "learning_rate": 1.8535851098901406e-05, + "loss": 0.32015109062194824, + "step": 1626 + }, + { + "epoch": 0.43208073297038907, + "grad_norm": 1.1526156179794622, + "learning_rate": 1.8533562891934195e-05, + "loss": 0.3801743984222412, + "step": 1627 + }, + { + "epoch": 0.432346301951932, + "grad_norm": 1.0686097183664227, + "learning_rate": 1.85312730397967e-05, + "loss": 0.33140939474105835, + "step": 1628 + }, + { + "epoch": 0.432611870933475, + "grad_norm": 1.232101025230023, + "learning_rate": 1.8528981542930382e-05, + "loss": 0.4052904546260834, + "step": 1629 + }, + { + "epoch": 0.4328774399150179, + "grad_norm": 1.0850305465298753, + "learning_rate": 1.8526688401777014e-05, + "loss": 0.3661607801914215, + "step": 1630 + }, + { + "epoch": 0.4331430088965609, + "grad_norm": 1.0520968780833948, + "learning_rate": 1.852439361677868e-05, + "loss": 0.33260756731033325, + "step": 1631 + }, + { + "epoch": 0.43340857787810383, + "grad_norm": 1.0137607762513057, + "learning_rate": 1.85220971883778e-05, + "loss": 0.30222776532173157, + "step": 1632 + }, + { + "epoch": 0.4336741468596468, + "grad_norm": 1.1138822281677037, + "learning_rate": 1.8519799117017086e-05, + "loss": 0.3444751799106598, + "step": 1633 + }, + { + "epoch": 0.43393971584118973, + "grad_norm": 1.0896517914007275, + "learning_rate": 1.8517499403139586e-05, + "loss": 0.33887404203414917, + "step": 1634 + }, + { + "epoch": 0.4342052848227327, + "grad_norm": 0.9260010903737679, + "learning_rate": 1.8515198047188652e-05, + "loss": 0.287893146276474, + "step": 1635 + }, + { + "epoch": 0.43447085380427564, + "grad_norm": 1.0080783350179279, + "learning_rate": 1.8512895049607965e-05, + "loss": 0.32236215472221375, + "step": 1636 + }, + { + "epoch": 0.43473642278581864, + "grad_norm": 1.0861808896793093, + "learning_rate": 1.8510590410841515e-05, + "loss": 0.30670079588890076, + "step": 1637 + }, + { + "epoch": 0.4350019917673616, + "grad_norm": 1.045996826542631, + "learning_rate": 1.8508284131333604e-05, + "loss": 0.34104713797569275, + "step": 1638 + }, + { + "epoch": 0.43526756074890455, + "grad_norm": 1.13616869746559, + "learning_rate": 1.8505976211528857e-05, + "loss": 0.3402378559112549, + "step": 1639 + }, + { + "epoch": 0.4355331297304475, + "grad_norm": 1.1414650328718847, + "learning_rate": 1.8503666651872217e-05, + "loss": 0.35236096382141113, + "step": 1640 + }, + { + "epoch": 0.43579869871199045, + "grad_norm": 1.1137846416322885, + "learning_rate": 1.850135545280894e-05, + "loss": 0.3385634422302246, + "step": 1641 + }, + { + "epoch": 0.4360642676935334, + "grad_norm": 1.0049349552180111, + "learning_rate": 1.849904261478459e-05, + "loss": 0.32222414016723633, + "step": 1642 + }, + { + "epoch": 0.43632983667507635, + "grad_norm": 1.1246487142505726, + "learning_rate": 1.8496728138245062e-05, + "loss": 0.3251120448112488, + "step": 1643 + }, + { + "epoch": 0.4365954056566193, + "grad_norm": 1.3230672810485753, + "learning_rate": 1.8494412023636563e-05, + "loss": 0.3199063837528229, + "step": 1644 + }, + { + "epoch": 0.43686097463816226, + "grad_norm": 1.031106173264746, + "learning_rate": 1.8492094271405605e-05, + "loss": 0.3470883071422577, + "step": 1645 + }, + { + "epoch": 0.4371265436197052, + "grad_norm": 1.1420067933967792, + "learning_rate": 1.848977488199903e-05, + "loss": 0.319596529006958, + "step": 1646 + }, + { + "epoch": 0.43739211260124816, + "grad_norm": 1.172387725238046, + "learning_rate": 1.848745385586398e-05, + "loss": 0.3445591628551483, + "step": 1647 + }, + { + "epoch": 0.4376576815827911, + "grad_norm": 1.0622512502557289, + "learning_rate": 1.848513119344793e-05, + "loss": 0.35861149430274963, + "step": 1648 + }, + { + "epoch": 0.43792325056433407, + "grad_norm": 1.3423176489021205, + "learning_rate": 1.8482806895198658e-05, + "loss": 0.36727622151374817, + "step": 1649 + }, + { + "epoch": 0.438188819545877, + "grad_norm": 1.0985203266462633, + "learning_rate": 1.848048096156426e-05, + "loss": 0.3505704402923584, + "step": 1650 + }, + { + "epoch": 0.43845438852742, + "grad_norm": 1.050005044594017, + "learning_rate": 1.8478153392993154e-05, + "loss": 0.3508742153644562, + "step": 1651 + }, + { + "epoch": 0.438719957508963, + "grad_norm": 1.0688095584032915, + "learning_rate": 1.8475824189934063e-05, + "loss": 0.32757264375686646, + "step": 1652 + }, + { + "epoch": 0.43898552649050593, + "grad_norm": 1.0768843323365103, + "learning_rate": 1.8473493352836032e-05, + "loss": 0.3117530643939972, + "step": 1653 + }, + { + "epoch": 0.4392510954720489, + "grad_norm": 1.1751248406507369, + "learning_rate": 1.8471160882148417e-05, + "loss": 0.3506043553352356, + "step": 1654 + }, + { + "epoch": 0.43951666445359183, + "grad_norm": 1.1247697965204402, + "learning_rate": 1.8468826778320892e-05, + "loss": 0.33997148275375366, + "step": 1655 + }, + { + "epoch": 0.4397822334351348, + "grad_norm": 1.007133328419329, + "learning_rate": 1.8466491041803446e-05, + "loss": 0.30060335993766785, + "step": 1656 + }, + { + "epoch": 0.44004780241667774, + "grad_norm": 0.9546594059496064, + "learning_rate": 1.846415367304638e-05, + "loss": 0.3057805597782135, + "step": 1657 + }, + { + "epoch": 0.4403133713982207, + "grad_norm": 1.006954520739026, + "learning_rate": 1.846181467250031e-05, + "loss": 0.30772098898887634, + "step": 1658 + }, + { + "epoch": 0.44057894037976364, + "grad_norm": 1.043209753174748, + "learning_rate": 1.845947404061617e-05, + "loss": 0.3183813989162445, + "step": 1659 + }, + { + "epoch": 0.4408445093613066, + "grad_norm": 1.0413807475941115, + "learning_rate": 1.8457131777845204e-05, + "loss": 0.2986184358596802, + "step": 1660 + }, + { + "epoch": 0.44111007834284954, + "grad_norm": 1.0330249735438937, + "learning_rate": 1.8454787884638973e-05, + "loss": 0.33342432975769043, + "step": 1661 + }, + { + "epoch": 0.4413756473243925, + "grad_norm": 1.6337494282252796, + "learning_rate": 1.8452442361449353e-05, + "loss": 0.33435192704200745, + "step": 1662 + }, + { + "epoch": 0.44164121630593545, + "grad_norm": 1.1084487395338765, + "learning_rate": 1.8450095208728537e-05, + "loss": 0.31596100330352783, + "step": 1663 + }, + { + "epoch": 0.4419067852874784, + "grad_norm": 1.0372033094770008, + "learning_rate": 1.8447746426929022e-05, + "loss": 0.29850512742996216, + "step": 1664 + }, + { + "epoch": 0.4421723542690214, + "grad_norm": 1.1891933812209383, + "learning_rate": 1.8445396016503628e-05, + "loss": 0.34898555278778076, + "step": 1665 + }, + { + "epoch": 0.44243792325056436, + "grad_norm": 1.0486597661615855, + "learning_rate": 1.8443043977905484e-05, + "loss": 0.283272385597229, + "step": 1666 + }, + { + "epoch": 0.4427034922321073, + "grad_norm": 1.041766578180328, + "learning_rate": 1.844069031158804e-05, + "loss": 0.32765433192253113, + "step": 1667 + }, + { + "epoch": 0.44296906121365026, + "grad_norm": 1.1465241668847563, + "learning_rate": 1.8438335018005052e-05, + "loss": 0.347957044839859, + "step": 1668 + }, + { + "epoch": 0.4432346301951932, + "grad_norm": 1.1330493919292772, + "learning_rate": 1.8435978097610594e-05, + "loss": 0.36188018321990967, + "step": 1669 + }, + { + "epoch": 0.44350019917673617, + "grad_norm": 1.1541714860130494, + "learning_rate": 1.843361955085905e-05, + "loss": 0.35944315791130066, + "step": 1670 + }, + { + "epoch": 0.4437657681582791, + "grad_norm": 1.0564596521414393, + "learning_rate": 1.8431259378205122e-05, + "loss": 0.33441367745399475, + "step": 1671 + }, + { + "epoch": 0.44403133713982207, + "grad_norm": 1.1043363461383413, + "learning_rate": 1.8428897580103827e-05, + "loss": 0.3157849907875061, + "step": 1672 + }, + { + "epoch": 0.444296906121365, + "grad_norm": 1.0760645254646117, + "learning_rate": 1.8426534157010486e-05, + "loss": 0.33416497707366943, + "step": 1673 + }, + { + "epoch": 0.444562475102908, + "grad_norm": 1.1629646905519946, + "learning_rate": 1.842416910938074e-05, + "loss": 0.3611617684364319, + "step": 1674 + }, + { + "epoch": 0.4448280440844509, + "grad_norm": 1.079831089952362, + "learning_rate": 1.8421802437670546e-05, + "loss": 0.3030395805835724, + "step": 1675 + }, + { + "epoch": 0.4450936130659939, + "grad_norm": 0.9867988845558019, + "learning_rate": 1.8419434142336167e-05, + "loss": 0.30281510949134827, + "step": 1676 + }, + { + "epoch": 0.44535918204753683, + "grad_norm": 1.2041533085675928, + "learning_rate": 1.8417064223834184e-05, + "loss": 0.3489738404750824, + "step": 1677 + }, + { + "epoch": 0.4456247510290798, + "grad_norm": 1.0320394434428715, + "learning_rate": 1.8414692682621487e-05, + "loss": 0.30453425645828247, + "step": 1678 + }, + { + "epoch": 0.44589032001062273, + "grad_norm": 0.9586890082829097, + "learning_rate": 1.841231951915528e-05, + "loss": 0.28717339038848877, + "step": 1679 + }, + { + "epoch": 0.44615588899216574, + "grad_norm": 1.0685350052372018, + "learning_rate": 1.840994473389309e-05, + "loss": 0.3227912187576294, + "step": 1680 + }, + { + "epoch": 0.4464214579737087, + "grad_norm": 1.0774879432227336, + "learning_rate": 1.8407568327292737e-05, + "loss": 0.3575928807258606, + "step": 1681 + }, + { + "epoch": 0.44668702695525164, + "grad_norm": 1.0240612597420884, + "learning_rate": 1.840519029981237e-05, + "loss": 0.35601454973220825, + "step": 1682 + }, + { + "epoch": 0.4469525959367946, + "grad_norm": 1.1829639598617365, + "learning_rate": 1.8402810651910444e-05, + "loss": 0.34867429733276367, + "step": 1683 + }, + { + "epoch": 0.44721816491833755, + "grad_norm": 1.0185115495756123, + "learning_rate": 1.8400429384045724e-05, + "loss": 0.3333359360694885, + "step": 1684 + }, + { + "epoch": 0.4474837338998805, + "grad_norm": 1.1658514468774803, + "learning_rate": 1.8398046496677296e-05, + "loss": 0.3269057273864746, + "step": 1685 + }, + { + "epoch": 0.44774930288142345, + "grad_norm": 1.0186865264151983, + "learning_rate": 1.839566199026455e-05, + "loss": 0.3507213890552521, + "step": 1686 + }, + { + "epoch": 0.4480148718629664, + "grad_norm": 1.0962029873559684, + "learning_rate": 1.8393275865267185e-05, + "loss": 0.32935822010040283, + "step": 1687 + }, + { + "epoch": 0.44828044084450935, + "grad_norm": 1.168811125319112, + "learning_rate": 1.8390888122145225e-05, + "loss": 0.3780096769332886, + "step": 1688 + }, + { + "epoch": 0.4485460098260523, + "grad_norm": 1.08432540630583, + "learning_rate": 1.8388498761358997e-05, + "loss": 0.3412250578403473, + "step": 1689 + }, + { + "epoch": 0.44881157880759526, + "grad_norm": 1.0725143861051711, + "learning_rate": 1.838610778336914e-05, + "loss": 0.33751022815704346, + "step": 1690 + }, + { + "epoch": 0.4490771477891382, + "grad_norm": 1.113628501747759, + "learning_rate": 1.8383715188636608e-05, + "loss": 0.35736170411109924, + "step": 1691 + }, + { + "epoch": 0.44934271677068116, + "grad_norm": 1.0608679340591776, + "learning_rate": 1.8381320977622664e-05, + "loss": 0.3133913278579712, + "step": 1692 + }, + { + "epoch": 0.4496082857522241, + "grad_norm": 1.0696112323301112, + "learning_rate": 1.8378925150788886e-05, + "loss": 0.2890821099281311, + "step": 1693 + }, + { + "epoch": 0.4498738547337671, + "grad_norm": 1.0759892831738864, + "learning_rate": 1.8376527708597155e-05, + "loss": 0.34016966819763184, + "step": 1694 + }, + { + "epoch": 0.45013942371531007, + "grad_norm": 1.0933611032669988, + "learning_rate": 1.8374128651509676e-05, + "loss": 0.3502900302410126, + "step": 1695 + }, + { + "epoch": 0.450404992696853, + "grad_norm": 1.1956521483077693, + "learning_rate": 1.8371727979988957e-05, + "loss": 0.31828251481056213, + "step": 1696 + }, + { + "epoch": 0.450670561678396, + "grad_norm": 1.1739995891800665, + "learning_rate": 1.836932569449782e-05, + "loss": 0.33322471380233765, + "step": 1697 + }, + { + "epoch": 0.4509361306599389, + "grad_norm": 0.977715581129718, + "learning_rate": 1.8366921795499394e-05, + "loss": 0.28489458560943604, + "step": 1698 + }, + { + "epoch": 0.4512016996414819, + "grad_norm": 1.0351592490047028, + "learning_rate": 1.8364516283457127e-05, + "loss": 0.3125787079334259, + "step": 1699 + }, + { + "epoch": 0.45146726862302483, + "grad_norm": 1.6801930060854708, + "learning_rate": 1.8362109158834767e-05, + "loss": 0.3352596163749695, + "step": 1700 + }, + { + "epoch": 0.4517328376045678, + "grad_norm": 1.0152758212914303, + "learning_rate": 1.8359700422096385e-05, + "loss": 0.2986747622489929, + "step": 1701 + }, + { + "epoch": 0.45199840658611073, + "grad_norm": 1.0704573865215896, + "learning_rate": 1.8357290073706355e-05, + "loss": 0.3276829123497009, + "step": 1702 + }, + { + "epoch": 0.4522639755676537, + "grad_norm": 1.05119725558451, + "learning_rate": 1.8354878114129368e-05, + "loss": 0.3183029890060425, + "step": 1703 + }, + { + "epoch": 0.45252954454919664, + "grad_norm": 1.0595099003295023, + "learning_rate": 1.835246454383041e-05, + "loss": 0.32149460911750793, + "step": 1704 + }, + { + "epoch": 0.4527951135307396, + "grad_norm": 1.0365725372264356, + "learning_rate": 1.8350049363274802e-05, + "loss": 0.2963859438896179, + "step": 1705 + }, + { + "epoch": 0.45306068251228254, + "grad_norm": 1.132218144997021, + "learning_rate": 1.8347632572928154e-05, + "loss": 0.35251080989837646, + "step": 1706 + }, + { + "epoch": 0.4533262514938255, + "grad_norm": 1.1840188868504486, + "learning_rate": 1.8345214173256395e-05, + "loss": 0.3585474491119385, + "step": 1707 + }, + { + "epoch": 0.4535918204753685, + "grad_norm": 1.1792148584627284, + "learning_rate": 1.834279416472577e-05, + "loss": 0.32339078187942505, + "step": 1708 + }, + { + "epoch": 0.45385738945691145, + "grad_norm": 1.030916532610971, + "learning_rate": 1.8340372547802822e-05, + "loss": 0.3473295569419861, + "step": 1709 + }, + { + "epoch": 0.4541229584384544, + "grad_norm": 1.149162033618886, + "learning_rate": 1.833794932295441e-05, + "loss": 0.35146117210388184, + "step": 1710 + }, + { + "epoch": 0.45438852741999736, + "grad_norm": 1.080751163824508, + "learning_rate": 1.833552449064771e-05, + "loss": 0.29697534441947937, + "step": 1711 + }, + { + "epoch": 0.4546540964015403, + "grad_norm": 1.0590764839143914, + "learning_rate": 1.8333098051350197e-05, + "loss": 0.30980685353279114, + "step": 1712 + }, + { + "epoch": 0.45491966538308326, + "grad_norm": 1.2023264217964575, + "learning_rate": 1.8330670005529657e-05, + "loss": 0.3271983861923218, + "step": 1713 + }, + { + "epoch": 0.4551852343646262, + "grad_norm": 1.061456665590969, + "learning_rate": 1.8328240353654193e-05, + "loss": 0.3421804904937744, + "step": 1714 + }, + { + "epoch": 0.45545080334616916, + "grad_norm": 0.988281834877126, + "learning_rate": 1.8325809096192207e-05, + "loss": 0.2949771285057068, + "step": 1715 + }, + { + "epoch": 0.4557163723277121, + "grad_norm": 1.1467541005281106, + "learning_rate": 1.832337623361242e-05, + "loss": 0.35578668117523193, + "step": 1716 + }, + { + "epoch": 0.45598194130925507, + "grad_norm": 1.099618839558401, + "learning_rate": 1.832094176638387e-05, + "loss": 0.3714647889137268, + "step": 1717 + }, + { + "epoch": 0.456247510290798, + "grad_norm": 1.116087725713372, + "learning_rate": 1.8318505694975877e-05, + "loss": 0.36253875494003296, + "step": 1718 + }, + { + "epoch": 0.45651307927234097, + "grad_norm": 1.0310426822464949, + "learning_rate": 1.8316068019858093e-05, + "loss": 0.3148016035556793, + "step": 1719 + }, + { + "epoch": 0.4567786482538839, + "grad_norm": 1.0869949789046671, + "learning_rate": 1.8313628741500476e-05, + "loss": 0.3420512080192566, + "step": 1720 + }, + { + "epoch": 0.4570442172354269, + "grad_norm": 1.0955610437646774, + "learning_rate": 1.831118786037329e-05, + "loss": 0.2941698431968689, + "step": 1721 + }, + { + "epoch": 0.4573097862169699, + "grad_norm": 0.9987507632564111, + "learning_rate": 1.83087453769471e-05, + "loss": 0.3033481240272522, + "step": 1722 + }, + { + "epoch": 0.45757535519851283, + "grad_norm": 1.0508818993675257, + "learning_rate": 1.8306301291692798e-05, + "loss": 0.3405943810939789, + "step": 1723 + }, + { + "epoch": 0.4578409241800558, + "grad_norm": 1.0291343903638976, + "learning_rate": 1.8303855605081567e-05, + "loss": 0.32217931747436523, + "step": 1724 + }, + { + "epoch": 0.45810649316159874, + "grad_norm": 1.1797464113481113, + "learning_rate": 1.8301408317584913e-05, + "loss": 0.3627573847770691, + "step": 1725 + }, + { + "epoch": 0.4583720621431417, + "grad_norm": 1.1425882725361838, + "learning_rate": 1.829895942967464e-05, + "loss": 0.3512224853038788, + "step": 1726 + }, + { + "epoch": 0.45863763112468464, + "grad_norm": 1.1358093316461328, + "learning_rate": 1.8296508941822868e-05, + "loss": 0.35433265566825867, + "step": 1727 + }, + { + "epoch": 0.4589032001062276, + "grad_norm": 1.1217406683513973, + "learning_rate": 1.829405685450202e-05, + "loss": 0.33105185627937317, + "step": 1728 + }, + { + "epoch": 0.45916876908777055, + "grad_norm": 1.0087946676492725, + "learning_rate": 1.829160316818483e-05, + "loss": 0.31765925884246826, + "step": 1729 + }, + { + "epoch": 0.4594343380693135, + "grad_norm": 1.0268902541251206, + "learning_rate": 1.8289147883344338e-05, + "loss": 0.3276101350784302, + "step": 1730 + }, + { + "epoch": 0.45969990705085645, + "grad_norm": 2.1185922480389676, + "learning_rate": 1.8286691000453895e-05, + "loss": 0.2921130061149597, + "step": 1731 + }, + { + "epoch": 0.4599654760323994, + "grad_norm": 0.9680106013727008, + "learning_rate": 1.828423251998716e-05, + "loss": 0.3025062382221222, + "step": 1732 + }, + { + "epoch": 0.46023104501394235, + "grad_norm": 1.0299077884479195, + "learning_rate": 1.82817724424181e-05, + "loss": 0.3128702640533447, + "step": 1733 + }, + { + "epoch": 0.4604966139954853, + "grad_norm": 0.9957682350134235, + "learning_rate": 1.8279310768220987e-05, + "loss": 0.31156033277511597, + "step": 1734 + }, + { + "epoch": 0.46076218297702826, + "grad_norm": 1.0327514294429654, + "learning_rate": 1.82768474978704e-05, + "loss": 0.30409976840019226, + "step": 1735 + }, + { + "epoch": 0.46102775195857126, + "grad_norm": 1.0533664417585449, + "learning_rate": 1.827438263184124e-05, + "loss": 0.305557519197464, + "step": 1736 + }, + { + "epoch": 0.4612933209401142, + "grad_norm": 1.1216722893854725, + "learning_rate": 1.827191617060869e-05, + "loss": 0.36079999804496765, + "step": 1737 + }, + { + "epoch": 0.46155888992165717, + "grad_norm": 1.0546022345807051, + "learning_rate": 1.8269448114648264e-05, + "loss": 0.3341830372810364, + "step": 1738 + }, + { + "epoch": 0.4618244589032001, + "grad_norm": 1.0085785444907966, + "learning_rate": 1.8266978464435764e-05, + "loss": 0.3222450017929077, + "step": 1739 + }, + { + "epoch": 0.46209002788474307, + "grad_norm": 1.112818872130856, + "learning_rate": 1.826450722044732e-05, + "loss": 0.34665441513061523, + "step": 1740 + }, + { + "epoch": 0.462355596866286, + "grad_norm": 1.1112300040840664, + "learning_rate": 1.8262034383159357e-05, + "loss": 0.31024169921875, + "step": 1741 + }, + { + "epoch": 0.462621165847829, + "grad_norm": 1.2322752248386413, + "learning_rate": 1.8259559953048606e-05, + "loss": 0.2950369119644165, + "step": 1742 + }, + { + "epoch": 0.4628867348293719, + "grad_norm": 1.109045795536776, + "learning_rate": 1.8257083930592102e-05, + "loss": 0.3378523886203766, + "step": 1743 + }, + { + "epoch": 0.4631523038109149, + "grad_norm": 0.9899845397184047, + "learning_rate": 1.8254606316267204e-05, + "loss": 0.2930060923099518, + "step": 1744 + }, + { + "epoch": 0.46341787279245783, + "grad_norm": 1.079619676645024, + "learning_rate": 1.8252127110551564e-05, + "loss": 0.3236517012119293, + "step": 1745 + }, + { + "epoch": 0.4636834417740008, + "grad_norm": 0.9852877201201444, + "learning_rate": 1.824964631392314e-05, + "loss": 0.3010406196117401, + "step": 1746 + }, + { + "epoch": 0.46394901075554373, + "grad_norm": 1.0095585954453505, + "learning_rate": 1.8247163926860204e-05, + "loss": 0.3269607424736023, + "step": 1747 + }, + { + "epoch": 0.4642145797370867, + "grad_norm": 1.0474961373680607, + "learning_rate": 1.8244679949841328e-05, + "loss": 0.3437904715538025, + "step": 1748 + }, + { + "epoch": 0.46448014871862964, + "grad_norm": 1.1512723462780612, + "learning_rate": 1.8242194383345394e-05, + "loss": 0.37820738554000854, + "step": 1749 + }, + { + "epoch": 0.46474571770017264, + "grad_norm": 1.0989334641357904, + "learning_rate": 1.8239707227851592e-05, + "loss": 0.3365899920463562, + "step": 1750 + }, + { + "epoch": 0.4650112866817156, + "grad_norm": 0.9943228703349263, + "learning_rate": 1.8237218483839414e-05, + "loss": 0.30418774485588074, + "step": 1751 + }, + { + "epoch": 0.46527685566325855, + "grad_norm": 0.9379554406122236, + "learning_rate": 1.823472815178866e-05, + "loss": 0.2923222780227661, + "step": 1752 + }, + { + "epoch": 0.4655424246448015, + "grad_norm": 1.1096787188742467, + "learning_rate": 1.823223623217944e-05, + "loss": 0.3358995020389557, + "step": 1753 + }, + { + "epoch": 0.46580799362634445, + "grad_norm": 1.0997620749237405, + "learning_rate": 1.822974272549216e-05, + "loss": 0.3413343131542206, + "step": 1754 + }, + { + "epoch": 0.4660735626078874, + "grad_norm": 1.0873990469892099, + "learning_rate": 1.822724763220755e-05, + "loss": 0.33553364872932434, + "step": 1755 + }, + { + "epoch": 0.46633913158943036, + "grad_norm": 1.0957210856960815, + "learning_rate": 1.8224750952806626e-05, + "loss": 0.35896626114845276, + "step": 1756 + }, + { + "epoch": 0.4666047005709733, + "grad_norm": 1.1032076691430248, + "learning_rate": 1.8222252687770718e-05, + "loss": 0.35345566272735596, + "step": 1757 + }, + { + "epoch": 0.46687026955251626, + "grad_norm": 1.0034635235769087, + "learning_rate": 1.8219752837581466e-05, + "loss": 0.3146013617515564, + "step": 1758 + }, + { + "epoch": 0.4671358385340592, + "grad_norm": 1.0191336075935247, + "learning_rate": 1.8217251402720807e-05, + "loss": 0.33270642161369324, + "step": 1759 + }, + { + "epoch": 0.46740140751560216, + "grad_norm": 1.030475428136688, + "learning_rate": 1.821474838367099e-05, + "loss": 0.3172033727169037, + "step": 1760 + }, + { + "epoch": 0.4676669764971451, + "grad_norm": 1.6535016363051902, + "learning_rate": 1.8212243780914578e-05, + "loss": 0.3277033567428589, + "step": 1761 + }, + { + "epoch": 0.46793254547868807, + "grad_norm": 1.1570228647748637, + "learning_rate": 1.820973759493441e-05, + "loss": 0.3523799777030945, + "step": 1762 + }, + { + "epoch": 0.468198114460231, + "grad_norm": 1.0907259849913267, + "learning_rate": 1.8207229826213664e-05, + "loss": 0.32437676191329956, + "step": 1763 + }, + { + "epoch": 0.468463683441774, + "grad_norm": 1.1347618214788342, + "learning_rate": 1.82047204752358e-05, + "loss": 0.34185051918029785, + "step": 1764 + }, + { + "epoch": 0.468729252423317, + "grad_norm": 1.0561382700570243, + "learning_rate": 1.8202209542484594e-05, + "loss": 0.32034197449684143, + "step": 1765 + }, + { + "epoch": 0.46899482140485993, + "grad_norm": 1.097207173265362, + "learning_rate": 1.8199697028444125e-05, + "loss": 0.30969515442848206, + "step": 1766 + }, + { + "epoch": 0.4692603903864029, + "grad_norm": 0.9320632629292236, + "learning_rate": 1.8197182933598776e-05, + "loss": 0.24751389026641846, + "step": 1767 + }, + { + "epoch": 0.46952595936794583, + "grad_norm": 1.2001835130139573, + "learning_rate": 1.8194667258433235e-05, + "loss": 0.3859948217868805, + "step": 1768 + }, + { + "epoch": 0.4697915283494888, + "grad_norm": 1.0989779617923678, + "learning_rate": 1.819215000343249e-05, + "loss": 0.29364967346191406, + "step": 1769 + }, + { + "epoch": 0.47005709733103174, + "grad_norm": 1.1161641657952082, + "learning_rate": 1.8189631169081845e-05, + "loss": 0.3560323715209961, + "step": 1770 + }, + { + "epoch": 0.4703226663125747, + "grad_norm": 1.6505675097600017, + "learning_rate": 1.8187110755866898e-05, + "loss": 0.3458098769187927, + "step": 1771 + }, + { + "epoch": 0.47058823529411764, + "grad_norm": 1.0148526914708587, + "learning_rate": 1.8184588764273555e-05, + "loss": 0.32131001353263855, + "step": 1772 + }, + { + "epoch": 0.4708538042756606, + "grad_norm": 1.0453234866463608, + "learning_rate": 1.8182065194788024e-05, + "loss": 0.3011054992675781, + "step": 1773 + }, + { + "epoch": 0.47111937325720354, + "grad_norm": 1.1076832582073854, + "learning_rate": 1.8179540047896827e-05, + "loss": 0.3314674496650696, + "step": 1774 + }, + { + "epoch": 0.4713849422387465, + "grad_norm": 1.0853788387965118, + "learning_rate": 1.8177013324086774e-05, + "loss": 0.3437536060810089, + "step": 1775 + }, + { + "epoch": 0.47165051122028945, + "grad_norm": 1.166112048160084, + "learning_rate": 1.8174485023844993e-05, + "loss": 0.36137935519218445, + "step": 1776 + }, + { + "epoch": 0.4719160802018324, + "grad_norm": 1.0726359370167762, + "learning_rate": 1.8171955147658905e-05, + "loss": 0.34018874168395996, + "step": 1777 + }, + { + "epoch": 0.4721816491833754, + "grad_norm": 1.0596665602066746, + "learning_rate": 1.8169423696016245e-05, + "loss": 0.33298587799072266, + "step": 1778 + }, + { + "epoch": 0.47244721816491836, + "grad_norm": 1.1107712039752602, + "learning_rate": 1.816689066940505e-05, + "loss": 0.3649418354034424, + "step": 1779 + }, + { + "epoch": 0.4727127871464613, + "grad_norm": 1.0148859742506888, + "learning_rate": 1.8164356068313646e-05, + "loss": 0.32419171929359436, + "step": 1780 + }, + { + "epoch": 0.47297835612800426, + "grad_norm": 1.047167823612948, + "learning_rate": 1.8161819893230688e-05, + "loss": 0.288555383682251, + "step": 1781 + }, + { + "epoch": 0.4732439251095472, + "grad_norm": 1.005455205363293, + "learning_rate": 1.815928214464511e-05, + "loss": 0.3231011629104614, + "step": 1782 + }, + { + "epoch": 0.47350949409109017, + "grad_norm": 1.0470674131364166, + "learning_rate": 1.815674282304617e-05, + "loss": 0.29310134053230286, + "step": 1783 + }, + { + "epoch": 0.4737750630726331, + "grad_norm": 1.0390137248114197, + "learning_rate": 1.815420192892341e-05, + "loss": 0.32683852314949036, + "step": 1784 + }, + { + "epoch": 0.47404063205417607, + "grad_norm": 1.0353379429668699, + "learning_rate": 1.8151659462766685e-05, + "loss": 0.3200969099998474, + "step": 1785 + }, + { + "epoch": 0.474306201035719, + "grad_norm": 1.051359679014311, + "learning_rate": 1.814911542506616e-05, + "loss": 0.3091360032558441, + "step": 1786 + }, + { + "epoch": 0.474571770017262, + "grad_norm": 1.1630088603070372, + "learning_rate": 1.814656981631229e-05, + "loss": 0.3679049611091614, + "step": 1787 + }, + { + "epoch": 0.4748373389988049, + "grad_norm": 1.1065634125772459, + "learning_rate": 1.814402263699584e-05, + "loss": 0.290119469165802, + "step": 1788 + }, + { + "epoch": 0.4751029079803479, + "grad_norm": 1.0987492456650414, + "learning_rate": 1.8141473887607874e-05, + "loss": 0.31878861784935, + "step": 1789 + }, + { + "epoch": 0.47536847696189083, + "grad_norm": 1.1254389921885528, + "learning_rate": 1.8138923568639763e-05, + "loss": 0.35820287466049194, + "step": 1790 + }, + { + "epoch": 0.4756340459434338, + "grad_norm": 1.0046454439717083, + "learning_rate": 1.8136371680583176e-05, + "loss": 0.2924647629261017, + "step": 1791 + }, + { + "epoch": 0.4758996149249768, + "grad_norm": 1.2202907606610718, + "learning_rate": 1.8133818223930092e-05, + "loss": 0.3799927234649658, + "step": 1792 + }, + { + "epoch": 0.47616518390651974, + "grad_norm": 1.1097316301591598, + "learning_rate": 1.8131263199172783e-05, + "loss": 0.3505420386791229, + "step": 1793 + }, + { + "epoch": 0.4764307528880627, + "grad_norm": 1.1021438648339534, + "learning_rate": 1.8128706606803823e-05, + "loss": 0.3291688859462738, + "step": 1794 + }, + { + "epoch": 0.47669632186960564, + "grad_norm": 1.0814065231113215, + "learning_rate": 1.8126148447316104e-05, + "loss": 0.34079697728157043, + "step": 1795 + }, + { + "epoch": 0.4769618908511486, + "grad_norm": 1.2185578909639558, + "learning_rate": 1.8123588721202802e-05, + "loss": 0.2898064851760864, + "step": 1796 + }, + { + "epoch": 0.47722745983269155, + "grad_norm": 1.0448194415877836, + "learning_rate": 1.8121027428957402e-05, + "loss": 0.32089224457740784, + "step": 1797 + }, + { + "epoch": 0.4774930288142345, + "grad_norm": 1.903396083379018, + "learning_rate": 1.8118464571073697e-05, + "loss": 0.3402039408683777, + "step": 1798 + }, + { + "epoch": 0.47775859779577745, + "grad_norm": 1.1693256768707747, + "learning_rate": 1.8115900148045767e-05, + "loss": 0.29904159903526306, + "step": 1799 + }, + { + "epoch": 0.4780241667773204, + "grad_norm": 1.0688058843932313, + "learning_rate": 1.8113334160368007e-05, + "loss": 0.34074240922927856, + "step": 1800 + }, + { + "epoch": 0.47828973575886335, + "grad_norm": 1.0404364284009804, + "learning_rate": 1.811076660853511e-05, + "loss": 0.28566253185272217, + "step": 1801 + }, + { + "epoch": 0.4785553047404063, + "grad_norm": 1.0267154270839738, + "learning_rate": 1.8108197493042065e-05, + "loss": 0.34523358941078186, + "step": 1802 + }, + { + "epoch": 0.47882087372194926, + "grad_norm": 1.0082361251695107, + "learning_rate": 1.8105626814384173e-05, + "loss": 0.3261171281337738, + "step": 1803 + }, + { + "epoch": 0.4790864427034922, + "grad_norm": 1.0353580811121572, + "learning_rate": 1.8103054573057027e-05, + "loss": 0.2915942966938019, + "step": 1804 + }, + { + "epoch": 0.47935201168503516, + "grad_norm": 1.117140176261941, + "learning_rate": 1.810048076955653e-05, + "loss": 0.2999255657196045, + "step": 1805 + }, + { + "epoch": 0.47961758066657817, + "grad_norm": 1.0967176640726466, + "learning_rate": 1.8097905404378874e-05, + "loss": 0.3294594883918762, + "step": 1806 + }, + { + "epoch": 0.4798831496481211, + "grad_norm": 1.025641731681811, + "learning_rate": 1.8095328478020563e-05, + "loss": 0.30720093846321106, + "step": 1807 + }, + { + "epoch": 0.4801487186296641, + "grad_norm": 1.0583824100775536, + "learning_rate": 1.8092749990978395e-05, + "loss": 0.31076985597610474, + "step": 1808 + }, + { + "epoch": 0.480414287611207, + "grad_norm": 1.0650372083327142, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.3182013928890228, + "step": 1809 + }, + { + "epoch": 0.48067985659275, + "grad_norm": 1.1560421045272382, + "learning_rate": 1.8087588336831206e-05, + "loss": 0.325716108083725, + "step": 1810 + }, + { + "epoch": 0.48094542557429293, + "grad_norm": 1.034822212222003, + "learning_rate": 1.8085005170721287e-05, + "loss": 0.3148769736289978, + "step": 1811 + }, + { + "epoch": 0.4812109945558359, + "grad_norm": 0.9998987744353804, + "learning_rate": 1.8082420445917727e-05, + "loss": 0.30645644664764404, + "step": 1812 + }, + { + "epoch": 0.48147656353737883, + "grad_norm": 0.9765412034449941, + "learning_rate": 1.807983416291883e-05, + "loss": 0.2978900969028473, + "step": 1813 + }, + { + "epoch": 0.4817421325189218, + "grad_norm": 1.1281577444413164, + "learning_rate": 1.8077246322223194e-05, + "loss": 0.34340181946754456, + "step": 1814 + }, + { + "epoch": 0.48200770150046474, + "grad_norm": 1.0940690010095575, + "learning_rate": 1.8074656924329733e-05, + "loss": 0.3272106349468231, + "step": 1815 + }, + { + "epoch": 0.4822732704820077, + "grad_norm": 1.0823130111098402, + "learning_rate": 1.807206596973765e-05, + "loss": 0.31061962246894836, + "step": 1816 + }, + { + "epoch": 0.48253883946355064, + "grad_norm": 1.1134329507970786, + "learning_rate": 1.8069473458946445e-05, + "loss": 0.28947243094444275, + "step": 1817 + }, + { + "epoch": 0.4828044084450936, + "grad_norm": 1.066867737773279, + "learning_rate": 1.8066879392455932e-05, + "loss": 0.35057532787323, + "step": 1818 + }, + { + "epoch": 0.48306997742663654, + "grad_norm": 1.5202577425125505, + "learning_rate": 1.8064283770766212e-05, + "loss": 0.31032001972198486, + "step": 1819 + }, + { + "epoch": 0.48333554640817955, + "grad_norm": 1.1166414917810035, + "learning_rate": 1.8061686594377685e-05, + "loss": 0.3802293539047241, + "step": 1820 + }, + { + "epoch": 0.4836011153897225, + "grad_norm": 1.122052528401037, + "learning_rate": 1.8059087863791066e-05, + "loss": 0.3306402564048767, + "step": 1821 + }, + { + "epoch": 0.48386668437126545, + "grad_norm": 1.051177925612534, + "learning_rate": 1.8056487579507352e-05, + "loss": 0.32170724868774414, + "step": 1822 + }, + { + "epoch": 0.4841322533528084, + "grad_norm": 1.0182895505748566, + "learning_rate": 1.8053885742027854e-05, + "loss": 0.35058924555778503, + "step": 1823 + }, + { + "epoch": 0.48439782233435136, + "grad_norm": 1.079491665486815, + "learning_rate": 1.8051282351854168e-05, + "loss": 0.3796595335006714, + "step": 1824 + }, + { + "epoch": 0.4846633913158943, + "grad_norm": 1.0882057457557335, + "learning_rate": 1.8048677409488205e-05, + "loss": 0.28997284173965454, + "step": 1825 + }, + { + "epoch": 0.48492896029743726, + "grad_norm": 1.7307038017833063, + "learning_rate": 1.804607091543216e-05, + "loss": 0.35110151767730713, + "step": 1826 + }, + { + "epoch": 0.4851945292789802, + "grad_norm": 1.1036882170711018, + "learning_rate": 1.8043462870188535e-05, + "loss": 0.3194088637828827, + "step": 1827 + }, + { + "epoch": 0.48546009826052317, + "grad_norm": 1.0664676604065728, + "learning_rate": 1.8040853274260137e-05, + "loss": 0.28777945041656494, + "step": 1828 + }, + { + "epoch": 0.4857256672420661, + "grad_norm": 1.0702584286398438, + "learning_rate": 1.803824212815006e-05, + "loss": 0.3642069697380066, + "step": 1829 + }, + { + "epoch": 0.48599123622360907, + "grad_norm": 1.0626897024145745, + "learning_rate": 1.80356294323617e-05, + "loss": 0.32396575808525085, + "step": 1830 + }, + { + "epoch": 0.486256805205152, + "grad_norm": 1.205959051296984, + "learning_rate": 1.8033015187398758e-05, + "loss": 0.36421436071395874, + "step": 1831 + }, + { + "epoch": 0.486522374186695, + "grad_norm": 1.0011906322370974, + "learning_rate": 1.8030399393765227e-05, + "loss": 0.3170832395553589, + "step": 1832 + }, + { + "epoch": 0.4867879431682379, + "grad_norm": 0.9739220394650455, + "learning_rate": 1.8027782051965408e-05, + "loss": 0.3003416359424591, + "step": 1833 + }, + { + "epoch": 0.48705351214978093, + "grad_norm": 1.0701369618567955, + "learning_rate": 1.802516316250388e-05, + "loss": 0.30362898111343384, + "step": 1834 + }, + { + "epoch": 0.4873190811313239, + "grad_norm": 1.0466563888798912, + "learning_rate": 1.802254272588555e-05, + "loss": 0.32721444964408875, + "step": 1835 + }, + { + "epoch": 0.48758465011286684, + "grad_norm": 1.345049864677536, + "learning_rate": 1.8019920742615596e-05, + "loss": 0.317483514547348, + "step": 1836 + }, + { + "epoch": 0.4878502190944098, + "grad_norm": 1.0589953518283157, + "learning_rate": 1.801729721319951e-05, + "loss": 0.2928479015827179, + "step": 1837 + }, + { + "epoch": 0.48811578807595274, + "grad_norm": 1.1098495840377043, + "learning_rate": 1.8014672138143073e-05, + "loss": 0.3425772190093994, + "step": 1838 + }, + { + "epoch": 0.4883813570574957, + "grad_norm": 1.0286414092040284, + "learning_rate": 1.801204551795238e-05, + "loss": 0.334087997674942, + "step": 1839 + }, + { + "epoch": 0.48864692603903864, + "grad_norm": 1.0797374159140127, + "learning_rate": 1.80094173531338e-05, + "loss": 0.3186641335487366, + "step": 1840 + }, + { + "epoch": 0.4889124950205816, + "grad_norm": 1.0361897985848911, + "learning_rate": 1.800678764419401e-05, + "loss": 0.3153733015060425, + "step": 1841 + }, + { + "epoch": 0.48917806400212455, + "grad_norm": 1.070217807683518, + "learning_rate": 1.8004156391640004e-05, + "loss": 0.3323214054107666, + "step": 1842 + }, + { + "epoch": 0.4894436329836675, + "grad_norm": 0.9455521865874897, + "learning_rate": 1.8001523595979043e-05, + "loss": 0.2856762409210205, + "step": 1843 + }, + { + "epoch": 0.48970920196521045, + "grad_norm": 1.0256135363684138, + "learning_rate": 1.79988892577187e-05, + "loss": 0.32493725419044495, + "step": 1844 + }, + { + "epoch": 0.4899747709467534, + "grad_norm": 1.1082860888483268, + "learning_rate": 1.7996253377366846e-05, + "loss": 0.350448876619339, + "step": 1845 + }, + { + "epoch": 0.49024033992829635, + "grad_norm": 1.096249407467401, + "learning_rate": 1.7993615955431648e-05, + "loss": 0.32246965169906616, + "step": 1846 + }, + { + "epoch": 0.4905059089098393, + "grad_norm": 0.9715072313794847, + "learning_rate": 1.799097699242157e-05, + "loss": 0.302636057138443, + "step": 1847 + }, + { + "epoch": 0.4907714778913823, + "grad_norm": 1.1573319310132777, + "learning_rate": 1.7988336488845374e-05, + "loss": 0.34280693531036377, + "step": 1848 + }, + { + "epoch": 0.49103704687292526, + "grad_norm": 1.1205814585182334, + "learning_rate": 1.7985694445212118e-05, + "loss": 0.3650673031806946, + "step": 1849 + }, + { + "epoch": 0.4913026158544682, + "grad_norm": 1.1348057531260405, + "learning_rate": 1.798305086203115e-05, + "loss": 0.33800822496414185, + "step": 1850 + }, + { + "epoch": 0.49156818483601117, + "grad_norm": 1.0428655272942455, + "learning_rate": 1.7980405739812134e-05, + "loss": 0.31522083282470703, + "step": 1851 + }, + { + "epoch": 0.4918337538175541, + "grad_norm": 1.177464907100392, + "learning_rate": 1.7977759079065003e-05, + "loss": 0.3374335765838623, + "step": 1852 + }, + { + "epoch": 0.49209932279909707, + "grad_norm": 1.060278247692231, + "learning_rate": 1.7975110880300018e-05, + "loss": 0.33803191781044006, + "step": 1853 + }, + { + "epoch": 0.49236489178064, + "grad_norm": 1.0982376140773644, + "learning_rate": 1.797246114402771e-05, + "loss": 0.37764933705329895, + "step": 1854 + }, + { + "epoch": 0.492630460762183, + "grad_norm": 0.9654297547716862, + "learning_rate": 1.796980987075892e-05, + "loss": 0.3075840473175049, + "step": 1855 + }, + { + "epoch": 0.4928960297437259, + "grad_norm": 0.9768928030686648, + "learning_rate": 1.7967157061004782e-05, + "loss": 0.306305855512619, + "step": 1856 + }, + { + "epoch": 0.4931615987252689, + "grad_norm": 1.0225684543938522, + "learning_rate": 1.796450271527673e-05, + "loss": 0.3474302291870117, + "step": 1857 + }, + { + "epoch": 0.49342716770681183, + "grad_norm": 1.0243106870487633, + "learning_rate": 1.7961846834086483e-05, + "loss": 0.31059685349464417, + "step": 1858 + }, + { + "epoch": 0.4936927366883548, + "grad_norm": 1.0236396527349367, + "learning_rate": 1.795918941794607e-05, + "loss": 0.346218079328537, + "step": 1859 + }, + { + "epoch": 0.49395830566989773, + "grad_norm": 0.9969229384493907, + "learning_rate": 1.7956530467367805e-05, + "loss": 0.28371214866638184, + "step": 1860 + }, + { + "epoch": 0.4942238746514407, + "grad_norm": 0.8979156608776232, + "learning_rate": 1.7953869982864306e-05, + "loss": 0.27775150537490845, + "step": 1861 + }, + { + "epoch": 0.4944894436329837, + "grad_norm": 1.279703247293047, + "learning_rate": 1.795120796494848e-05, + "loss": 0.328782856464386, + "step": 1862 + }, + { + "epoch": 0.49475501261452665, + "grad_norm": 1.0950381369417217, + "learning_rate": 1.7948544414133534e-05, + "loss": 0.33220064640045166, + "step": 1863 + }, + { + "epoch": 0.4950205815960696, + "grad_norm": 1.0528449584388764, + "learning_rate": 1.794587933093297e-05, + "loss": 0.32681554555892944, + "step": 1864 + }, + { + "epoch": 0.49528615057761255, + "grad_norm": 1.1023465974826758, + "learning_rate": 1.7943212715860586e-05, + "loss": 0.32202866673469543, + "step": 1865 + }, + { + "epoch": 0.4955517195591555, + "grad_norm": 2.266456857585339, + "learning_rate": 1.7940544569430468e-05, + "loss": 0.3051350712776184, + "step": 1866 + }, + { + "epoch": 0.49581728854069845, + "grad_norm": 1.1617568134775966, + "learning_rate": 1.793787489215701e-05, + "loss": 0.3924705386161804, + "step": 1867 + }, + { + "epoch": 0.4960828575222414, + "grad_norm": 1.018817969430421, + "learning_rate": 1.793520368455489e-05, + "loss": 0.30267882347106934, + "step": 1868 + }, + { + "epoch": 0.49634842650378436, + "grad_norm": 1.0585020042998596, + "learning_rate": 1.793253094713909e-05, + "loss": 0.3150729238986969, + "step": 1869 + }, + { + "epoch": 0.4966139954853273, + "grad_norm": 1.314679145900761, + "learning_rate": 1.7929856680424872e-05, + "loss": 0.33814147114753723, + "step": 1870 + }, + { + "epoch": 0.49687956446687026, + "grad_norm": 1.010460021909887, + "learning_rate": 1.7927180884927814e-05, + "loss": 0.31929856538772583, + "step": 1871 + }, + { + "epoch": 0.4971451334484132, + "grad_norm": 1.1376790681693039, + "learning_rate": 1.7924503561163775e-05, + "loss": 0.3797461688518524, + "step": 1872 + }, + { + "epoch": 0.49741070242995616, + "grad_norm": 1.057594588942085, + "learning_rate": 1.792182470964891e-05, + "loss": 0.3056377172470093, + "step": 1873 + }, + { + "epoch": 0.4976762714114991, + "grad_norm": 1.1254473942016883, + "learning_rate": 1.7919144330899668e-05, + "loss": 0.3526398539543152, + "step": 1874 + }, + { + "epoch": 0.49794184039304207, + "grad_norm": 1.0289140670533532, + "learning_rate": 1.79164624254328e-05, + "loss": 0.3183595538139343, + "step": 1875 + }, + { + "epoch": 0.4982074093745851, + "grad_norm": 1.1908370019011798, + "learning_rate": 1.791377899376534e-05, + "loss": 0.3604113459587097, + "step": 1876 + }, + { + "epoch": 0.498472978356128, + "grad_norm": 1.1651856770093412, + "learning_rate": 1.7911094036414623e-05, + "loss": 0.3219848573207855, + "step": 1877 + }, + { + "epoch": 0.498738547337671, + "grad_norm": 1.0586801467718077, + "learning_rate": 1.7908407553898282e-05, + "loss": 0.28773394227027893, + "step": 1878 + }, + { + "epoch": 0.49900411631921393, + "grad_norm": 1.0649509880321448, + "learning_rate": 1.7905719546734233e-05, + "loss": 0.31453996896743774, + "step": 1879 + }, + { + "epoch": 0.4992696853007569, + "grad_norm": 0.9878415524405192, + "learning_rate": 1.7903030015440696e-05, + "loss": 0.2947153151035309, + "step": 1880 + }, + { + "epoch": 0.49953525428229983, + "grad_norm": 1.0652111521233423, + "learning_rate": 1.7900338960536178e-05, + "loss": 0.313723087310791, + "step": 1881 + }, + { + "epoch": 0.4998008232638428, + "grad_norm": 1.0853994840945123, + "learning_rate": 1.7897646382539485e-05, + "loss": 0.3385108709335327, + "step": 1882 + }, + { + "epoch": 0.5000663922453857, + "grad_norm": 1.0993457819479324, + "learning_rate": 1.7894952281969712e-05, + "loss": 0.31417039036750793, + "step": 1883 + }, + { + "epoch": 0.5003319612269287, + "grad_norm": 1.1452192213941934, + "learning_rate": 1.7892256659346253e-05, + "loss": 0.3555717468261719, + "step": 1884 + }, + { + "epoch": 0.5005975302084716, + "grad_norm": 1.1989261836629121, + "learning_rate": 1.7889559515188793e-05, + "loss": 0.3724518120288849, + "step": 1885 + }, + { + "epoch": 0.5008630991900146, + "grad_norm": 1.0516015708006068, + "learning_rate": 1.7886860850017306e-05, + "loss": 0.32646167278289795, + "step": 1886 + }, + { + "epoch": 0.5011286681715575, + "grad_norm": 1.079300223054909, + "learning_rate": 1.7884160664352062e-05, + "loss": 0.31072959303855896, + "step": 1887 + }, + { + "epoch": 0.5013942371531005, + "grad_norm": 0.9518526173941219, + "learning_rate": 1.7881458958713628e-05, + "loss": 0.26987242698669434, + "step": 1888 + }, + { + "epoch": 0.5016598061346434, + "grad_norm": 0.9908294117764815, + "learning_rate": 1.787875573362286e-05, + "loss": 0.30105817317962646, + "step": 1889 + }, + { + "epoch": 0.5019253751161864, + "grad_norm": 1.0444226583374554, + "learning_rate": 1.7876050989600908e-05, + "loss": 0.31277188658714294, + "step": 1890 + }, + { + "epoch": 0.5021909440977294, + "grad_norm": 1.0192470233304842, + "learning_rate": 1.7873344727169214e-05, + "loss": 0.31068161129951477, + "step": 1891 + }, + { + "epoch": 0.5024565130792723, + "grad_norm": 1.0797105219167356, + "learning_rate": 1.7870636946849512e-05, + "loss": 0.3491121530532837, + "step": 1892 + }, + { + "epoch": 0.5027220820608153, + "grad_norm": 1.0753654491775293, + "learning_rate": 1.7867927649163838e-05, + "loss": 0.3223581612110138, + "step": 1893 + }, + { + "epoch": 0.5029876510423582, + "grad_norm": 1.1295999155195493, + "learning_rate": 1.7865216834634506e-05, + "loss": 0.345224529504776, + "step": 1894 + }, + { + "epoch": 0.5032532200239012, + "grad_norm": 1.1419032071310418, + "learning_rate": 1.7862504503784123e-05, + "loss": 0.3408205211162567, + "step": 1895 + }, + { + "epoch": 0.5035187890054441, + "grad_norm": 0.9713066472066385, + "learning_rate": 1.7859790657135608e-05, + "loss": 0.2680068016052246, + "step": 1896 + }, + { + "epoch": 0.5037843579869872, + "grad_norm": 0.9186813995364894, + "learning_rate": 1.7857075295212148e-05, + "loss": 0.29733535647392273, + "step": 1897 + }, + { + "epoch": 0.5040499269685301, + "grad_norm": 1.1196248802118025, + "learning_rate": 1.785435841853724e-05, + "loss": 0.34820133447647095, + "step": 1898 + }, + { + "epoch": 0.5043154959500731, + "grad_norm": 1.134445876132798, + "learning_rate": 1.785164002763466e-05, + "loss": 0.3306594491004944, + "step": 1899 + }, + { + "epoch": 0.504581064931616, + "grad_norm": 1.0579272410020724, + "learning_rate": 1.7848920123028482e-05, + "loss": 0.3166846036911011, + "step": 1900 + }, + { + "epoch": 0.504846633913159, + "grad_norm": 1.2213509498849395, + "learning_rate": 1.784619870524308e-05, + "loss": 0.3406408727169037, + "step": 1901 + }, + { + "epoch": 0.5051122028947019, + "grad_norm": 1.0410168562106317, + "learning_rate": 1.78434757748031e-05, + "loss": 0.36358171701431274, + "step": 1902 + }, + { + "epoch": 0.5053777718762449, + "grad_norm": 1.0510382236040618, + "learning_rate": 1.7840751332233498e-05, + "loss": 0.34045761823654175, + "step": 1903 + }, + { + "epoch": 0.5056433408577878, + "grad_norm": 1.0566120463915532, + "learning_rate": 1.783802537805951e-05, + "loss": 0.3442475199699402, + "step": 1904 + }, + { + "epoch": 0.5059089098393308, + "grad_norm": 1.1632822330113848, + "learning_rate": 1.7835297912806675e-05, + "loss": 0.3488585650920868, + "step": 1905 + }, + { + "epoch": 0.5061744788208737, + "grad_norm": 1.098650773563784, + "learning_rate": 1.7832568937000808e-05, + "loss": 0.3340107500553131, + "step": 1906 + }, + { + "epoch": 0.5064400478024167, + "grad_norm": 1.0195614065654457, + "learning_rate": 1.7829838451168027e-05, + "loss": 0.3206177353858948, + "step": 1907 + }, + { + "epoch": 0.5067056167839596, + "grad_norm": 1.0219563874782234, + "learning_rate": 1.782710645583473e-05, + "loss": 0.2851010262966156, + "step": 1908 + }, + { + "epoch": 0.5069711857655026, + "grad_norm": 1.0249326570563306, + "learning_rate": 1.782437295152763e-05, + "loss": 0.31850844621658325, + "step": 1909 + }, + { + "epoch": 0.5072367547470455, + "grad_norm": 1.0890541355083159, + "learning_rate": 1.7821637938773704e-05, + "loss": 0.3343108892440796, + "step": 1910 + }, + { + "epoch": 0.5075023237285885, + "grad_norm": 1.1131994842325255, + "learning_rate": 1.781890141810023e-05, + "loss": 0.3423745930194855, + "step": 1911 + }, + { + "epoch": 0.5077678927101315, + "grad_norm": 1.057536319451762, + "learning_rate": 1.7816163390034775e-05, + "loss": 0.30980780720710754, + "step": 1912 + }, + { + "epoch": 0.5080334616916744, + "grad_norm": 1.0099692843485935, + "learning_rate": 1.7813423855105203e-05, + "loss": 0.31217479705810547, + "step": 1913 + }, + { + "epoch": 0.5082990306732174, + "grad_norm": 1.0721675523916532, + "learning_rate": 1.7810682813839664e-05, + "loss": 0.34741947054862976, + "step": 1914 + }, + { + "epoch": 0.5085645996547603, + "grad_norm": 1.1098427332228447, + "learning_rate": 1.7807940266766595e-05, + "loss": 0.32275527715682983, + "step": 1915 + }, + { + "epoch": 0.5088301686363033, + "grad_norm": 1.1130434711054393, + "learning_rate": 1.7805196214414728e-05, + "loss": 0.32760411500930786, + "step": 1916 + }, + { + "epoch": 0.5090957376178462, + "grad_norm": 1.1445787919507704, + "learning_rate": 1.7802450657313086e-05, + "loss": 0.3877720832824707, + "step": 1917 + }, + { + "epoch": 0.5093613065993892, + "grad_norm": 1.1135916509560913, + "learning_rate": 1.779970359599098e-05, + "loss": 0.33458876609802246, + "step": 1918 + }, + { + "epoch": 0.5096268755809321, + "grad_norm": 0.9826034605244246, + "learning_rate": 1.7796955030978007e-05, + "loss": 0.30603206157684326, + "step": 1919 + }, + { + "epoch": 0.5098924445624751, + "grad_norm": 0.9902684589377142, + "learning_rate": 1.7794204962804063e-05, + "loss": 0.2920286953449249, + "step": 1920 + }, + { + "epoch": 0.510158013544018, + "grad_norm": 1.1034173597508874, + "learning_rate": 1.7791453391999325e-05, + "loss": 0.32407981157302856, + "step": 1921 + }, + { + "epoch": 0.510423582525561, + "grad_norm": 1.3200648964540613, + "learning_rate": 1.7788700319094263e-05, + "loss": 0.30423563718795776, + "step": 1922 + }, + { + "epoch": 0.5106891515071039, + "grad_norm": 1.1213502448496324, + "learning_rate": 1.7785945744619642e-05, + "loss": 0.34691399335861206, + "step": 1923 + }, + { + "epoch": 0.5109547204886469, + "grad_norm": 1.0498801582672959, + "learning_rate": 1.7783189669106503e-05, + "loss": 0.3217603266239166, + "step": 1924 + }, + { + "epoch": 0.5112202894701899, + "grad_norm": 1.1943957961346587, + "learning_rate": 1.7780432093086198e-05, + "loss": 0.365132212638855, + "step": 1925 + }, + { + "epoch": 0.5114858584517329, + "grad_norm": 0.9783494867108459, + "learning_rate": 1.7777673017090344e-05, + "loss": 0.29662930965423584, + "step": 1926 + }, + { + "epoch": 0.5117514274332758, + "grad_norm": 1.0707541061431447, + "learning_rate": 1.7774912441650857e-05, + "loss": 0.3324819803237915, + "step": 1927 + }, + { + "epoch": 0.5120169964148188, + "grad_norm": 1.0040789031204058, + "learning_rate": 1.7772150367299953e-05, + "loss": 0.29331067204475403, + "step": 1928 + }, + { + "epoch": 0.5122825653963617, + "grad_norm": 1.064062495235822, + "learning_rate": 1.7769386794570117e-05, + "loss": 0.3158259987831116, + "step": 1929 + }, + { + "epoch": 0.5125481343779047, + "grad_norm": 1.020159871349018, + "learning_rate": 1.7766621723994145e-05, + "loss": 0.2824791967868805, + "step": 1930 + }, + { + "epoch": 0.5128137033594476, + "grad_norm": 1.0493215169042918, + "learning_rate": 1.7763855156105097e-05, + "loss": 0.2690732777118683, + "step": 1931 + }, + { + "epoch": 0.5130792723409906, + "grad_norm": 1.043157004637876, + "learning_rate": 1.7761087091436346e-05, + "loss": 0.31360942125320435, + "step": 1932 + }, + { + "epoch": 0.5133448413225336, + "grad_norm": 0.9858891902519169, + "learning_rate": 1.7758317530521535e-05, + "loss": 0.28334349393844604, + "step": 1933 + }, + { + "epoch": 0.5136104103040765, + "grad_norm": 1.1739380172138798, + "learning_rate": 1.7755546473894604e-05, + "loss": 0.3857404589653015, + "step": 1934 + }, + { + "epoch": 0.5138759792856195, + "grad_norm": 1.0280582546011092, + "learning_rate": 1.7752773922089784e-05, + "loss": 0.2852492332458496, + "step": 1935 + }, + { + "epoch": 0.5141415482671624, + "grad_norm": 1.003050995152578, + "learning_rate": 1.7749999875641585e-05, + "loss": 0.2959831953048706, + "step": 1936 + }, + { + "epoch": 0.5144071172487054, + "grad_norm": 1.100974201889633, + "learning_rate": 1.7747224335084815e-05, + "loss": 0.3129635453224182, + "step": 1937 + }, + { + "epoch": 0.5146726862302483, + "grad_norm": 1.0336946735940622, + "learning_rate": 1.774444730095456e-05, + "loss": 0.31391531229019165, + "step": 1938 + }, + { + "epoch": 0.5149382552117913, + "grad_norm": 1.0155253897885985, + "learning_rate": 1.7741668773786202e-05, + "loss": 0.30274757742881775, + "step": 1939 + }, + { + "epoch": 0.5152038241933342, + "grad_norm": 1.026561688701391, + "learning_rate": 1.7738888754115413e-05, + "loss": 0.29162222146987915, + "step": 1940 + }, + { + "epoch": 0.5154693931748772, + "grad_norm": 1.045931473256506, + "learning_rate": 1.7736107242478143e-05, + "loss": 0.30358970165252686, + "step": 1941 + }, + { + "epoch": 0.5157349621564201, + "grad_norm": 1.11915386227621, + "learning_rate": 1.7733324239410634e-05, + "loss": 0.32268065214157104, + "step": 1942 + }, + { + "epoch": 0.5160005311379631, + "grad_norm": 1.0626040245012975, + "learning_rate": 1.7730539745449417e-05, + "loss": 0.31925222277641296, + "step": 1943 + }, + { + "epoch": 0.516266100119506, + "grad_norm": 1.1170224886553113, + "learning_rate": 1.7727753761131312e-05, + "loss": 0.32883748412132263, + "step": 1944 + }, + { + "epoch": 0.516531669101049, + "grad_norm": 1.101510406621582, + "learning_rate": 1.7724966286993425e-05, + "loss": 0.3212829530239105, + "step": 1945 + }, + { + "epoch": 0.5167972380825919, + "grad_norm": 1.1477333753851342, + "learning_rate": 1.772217732357314e-05, + "loss": 0.32909759879112244, + "step": 1946 + }, + { + "epoch": 0.5170628070641349, + "grad_norm": 33.3722959000957, + "learning_rate": 1.7719386871408147e-05, + "loss": 0.3451213538646698, + "step": 1947 + }, + { + "epoch": 0.5173283760456778, + "grad_norm": 1.0792459943819739, + "learning_rate": 1.7716594931036402e-05, + "loss": 0.318422794342041, + "step": 1948 + }, + { + "epoch": 0.5175939450272208, + "grad_norm": 1.1243494025490273, + "learning_rate": 1.7713801502996166e-05, + "loss": 0.3165292739868164, + "step": 1949 + }, + { + "epoch": 0.5178595140087637, + "grad_norm": 1.1353818628503742, + "learning_rate": 1.7711006587825975e-05, + "loss": 0.3116700351238251, + "step": 1950 + }, + { + "epoch": 0.5181250829903067, + "grad_norm": 1.2005138291757869, + "learning_rate": 1.7708210186064656e-05, + "loss": 0.32102686166763306, + "step": 1951 + }, + { + "epoch": 0.5183906519718496, + "grad_norm": 1.079523368082095, + "learning_rate": 1.7705412298251323e-05, + "loss": 0.33025500178337097, + "step": 1952 + }, + { + "epoch": 0.5186562209533926, + "grad_norm": 1.2087703844513067, + "learning_rate": 1.7702612924925377e-05, + "loss": 0.36113062500953674, + "step": 1953 + }, + { + "epoch": 0.5189217899349357, + "grad_norm": 1.1242566727618883, + "learning_rate": 1.7699812066626503e-05, + "loss": 0.3092479109764099, + "step": 1954 + }, + { + "epoch": 0.5191873589164786, + "grad_norm": 1.117146005158035, + "learning_rate": 1.769700972389467e-05, + "loss": 0.3389117419719696, + "step": 1955 + }, + { + "epoch": 0.5194529278980216, + "grad_norm": 1.1525168535902064, + "learning_rate": 1.7694205897270147e-05, + "loss": 0.3225803077220917, + "step": 1956 + }, + { + "epoch": 0.5197184968795645, + "grad_norm": 1.0237361691251219, + "learning_rate": 1.7691400587293467e-05, + "loss": 0.3226786255836487, + "step": 1957 + }, + { + "epoch": 0.5199840658611075, + "grad_norm": 1.0060672564491426, + "learning_rate": 1.7688593794505466e-05, + "loss": 0.27708399295806885, + "step": 1958 + }, + { + "epoch": 0.5202496348426504, + "grad_norm": 1.0763214880079806, + "learning_rate": 1.768578551944726e-05, + "loss": 0.36100950837135315, + "step": 1959 + }, + { + "epoch": 0.5205152038241934, + "grad_norm": 1.043549985204807, + "learning_rate": 1.768297576266025e-05, + "loss": 0.3138211965560913, + "step": 1960 + }, + { + "epoch": 0.5207807728057363, + "grad_norm": 1.0618046264640966, + "learning_rate": 1.7680164524686128e-05, + "loss": 0.33959656953811646, + "step": 1961 + }, + { + "epoch": 0.5210463417872793, + "grad_norm": 0.9826913420332539, + "learning_rate": 1.7677351806066863e-05, + "loss": 0.3093605637550354, + "step": 1962 + }, + { + "epoch": 0.5213119107688222, + "grad_norm": 1.13307401094871, + "learning_rate": 1.7674537607344717e-05, + "loss": 0.3098641633987427, + "step": 1963 + }, + { + "epoch": 0.5215774797503652, + "grad_norm": 1.0810255128706003, + "learning_rate": 1.767172192906223e-05, + "loss": 0.35172683000564575, + "step": 1964 + }, + { + "epoch": 0.5218430487319081, + "grad_norm": 1.0729896509671073, + "learning_rate": 1.7668904771762242e-05, + "loss": 0.3535798192024231, + "step": 1965 + }, + { + "epoch": 0.5221086177134511, + "grad_norm": 1.2521081937006913, + "learning_rate": 1.766608613598785e-05, + "loss": 0.36183854937553406, + "step": 1966 + }, + { + "epoch": 0.522374186694994, + "grad_norm": 1.0735439944400962, + "learning_rate": 1.7663266022282473e-05, + "loss": 0.35995131731033325, + "step": 1967 + }, + { + "epoch": 0.522639755676537, + "grad_norm": 1.117054454049305, + "learning_rate": 1.766044443118978e-05, + "loss": 0.38672733306884766, + "step": 1968 + }, + { + "epoch": 0.5229053246580799, + "grad_norm": 1.0862044019422723, + "learning_rate": 1.765762136325375e-05, + "loss": 0.3389524221420288, + "step": 1969 + }, + { + "epoch": 0.5231708936396229, + "grad_norm": 0.9847521483407152, + "learning_rate": 1.7654796819018635e-05, + "loss": 0.3325779139995575, + "step": 1970 + }, + { + "epoch": 0.5234364626211658, + "grad_norm": 1.014607581135561, + "learning_rate": 1.7651970799028976e-05, + "loss": 0.328407347202301, + "step": 1971 + }, + { + "epoch": 0.5237020316027088, + "grad_norm": 0.9793310107257689, + "learning_rate": 1.764914330382959e-05, + "loss": 0.3050537705421448, + "step": 1972 + }, + { + "epoch": 0.5239676005842517, + "grad_norm": 1.1408686145630131, + "learning_rate": 1.7646314333965588e-05, + "loss": 0.35500285029411316, + "step": 1973 + }, + { + "epoch": 0.5242331695657947, + "grad_norm": 1.1035893819341516, + "learning_rate": 1.7643483889982364e-05, + "loss": 0.30319780111312866, + "step": 1974 + }, + { + "epoch": 0.5244987385473376, + "grad_norm": 1.0161223434375823, + "learning_rate": 1.7640651972425592e-05, + "loss": 0.315757691860199, + "step": 1975 + }, + { + "epoch": 0.5247643075288806, + "grad_norm": 1.0278713767432786, + "learning_rate": 1.7637818581841234e-05, + "loss": 0.28562331199645996, + "step": 1976 + }, + { + "epoch": 0.5250298765104235, + "grad_norm": 1.017204404946826, + "learning_rate": 1.763498371877553e-05, + "loss": 0.29798296093940735, + "step": 1977 + }, + { + "epoch": 0.5252954454919665, + "grad_norm": 1.1245986087835715, + "learning_rate": 1.763214738377501e-05, + "loss": 0.2923639416694641, + "step": 1978 + }, + { + "epoch": 0.5255610144735094, + "grad_norm": 1.0282257211254215, + "learning_rate": 1.7629309577386492e-05, + "loss": 0.2858009934425354, + "step": 1979 + }, + { + "epoch": 0.5258265834550524, + "grad_norm": 1.1185725636940211, + "learning_rate": 1.7626470300157064e-05, + "loss": 0.3615952134132385, + "step": 1980 + }, + { + "epoch": 0.5260921524365954, + "grad_norm": 1.1357118701340632, + "learning_rate": 1.762362955263411e-05, + "loss": 0.36142098903656006, + "step": 1981 + }, + { + "epoch": 0.5263577214181384, + "grad_norm": 1.1305105783283786, + "learning_rate": 1.762078733536529e-05, + "loss": 0.3335961699485779, + "step": 1982 + }, + { + "epoch": 0.5266232903996814, + "grad_norm": 1.2367655641806865, + "learning_rate": 1.761794364889855e-05, + "loss": 0.34549272060394287, + "step": 1983 + }, + { + "epoch": 0.5268888593812243, + "grad_norm": 1.1166612317693478, + "learning_rate": 1.761509849378212e-05, + "loss": 0.3177812993526459, + "step": 1984 + }, + { + "epoch": 0.5271544283627673, + "grad_norm": 1.1485560676920734, + "learning_rate": 1.7612251870564515e-05, + "loss": 0.33191388845443726, + "step": 1985 + }, + { + "epoch": 0.5274199973443102, + "grad_norm": 1.0807821541967428, + "learning_rate": 1.7609403779794523e-05, + "loss": 0.30732038617134094, + "step": 1986 + }, + { + "epoch": 0.5276855663258532, + "grad_norm": 1.1038043700347457, + "learning_rate": 1.7606554222021226e-05, + "loss": 0.33012068271636963, + "step": 1987 + }, + { + "epoch": 0.5279511353073961, + "grad_norm": 1.2233212729045404, + "learning_rate": 1.760370319779399e-05, + "loss": 0.3396066427230835, + "step": 1988 + }, + { + "epoch": 0.5282167042889391, + "grad_norm": 1.0755028443639627, + "learning_rate": 1.7600850707662454e-05, + "loss": 0.29053401947021484, + "step": 1989 + }, + { + "epoch": 0.528482273270482, + "grad_norm": 1.0859289781343007, + "learning_rate": 1.7597996752176545e-05, + "loss": 0.32927206158638, + "step": 1990 + }, + { + "epoch": 0.528747842252025, + "grad_norm": 1.0494460781018915, + "learning_rate": 1.759514133188647e-05, + "loss": 0.309224933385849, + "step": 1991 + }, + { + "epoch": 0.5290134112335679, + "grad_norm": 1.0870307368096292, + "learning_rate": 1.7592284447342725e-05, + "loss": 0.31973862648010254, + "step": 1992 + }, + { + "epoch": 0.5292789802151109, + "grad_norm": 1.0491029702582455, + "learning_rate": 1.758942609909608e-05, + "loss": 0.3331080377101898, + "step": 1993 + }, + { + "epoch": 0.5295445491966538, + "grad_norm": 1.0710245753206995, + "learning_rate": 1.7586566287697592e-05, + "loss": 0.32755160331726074, + "step": 1994 + }, + { + "epoch": 0.5298101181781968, + "grad_norm": 1.0377451052992368, + "learning_rate": 1.7583705013698602e-05, + "loss": 0.31942498683929443, + "step": 1995 + }, + { + "epoch": 0.5300756871597397, + "grad_norm": 1.1665695354682926, + "learning_rate": 1.7580842277650723e-05, + "loss": 0.3199199438095093, + "step": 1996 + }, + { + "epoch": 0.5303412561412827, + "grad_norm": 0.9680761404148592, + "learning_rate": 1.7577978080105864e-05, + "loss": 0.28153708577156067, + "step": 1997 + }, + { + "epoch": 0.5306068251228256, + "grad_norm": 1.0336529884327843, + "learning_rate": 1.7575112421616203e-05, + "loss": 0.3050921559333801, + "step": 1998 + }, + { + "epoch": 0.5308723941043686, + "grad_norm": 1.0836881519572394, + "learning_rate": 1.7572245302734208e-05, + "loss": 0.3242149353027344, + "step": 1999 + }, + { + "epoch": 0.5311379630859115, + "grad_norm": 0.9889139549595165, + "learning_rate": 1.7569376724012622e-05, + "loss": 0.29947227239608765, + "step": 2000 + }, + { + "epoch": 0.5314035320674545, + "grad_norm": 1.132976441688301, + "learning_rate": 1.756650668600448e-05, + "loss": 0.3229755163192749, + "step": 2001 + }, + { + "epoch": 0.5316691010489975, + "grad_norm": 1.0802391073518836, + "learning_rate": 1.7563635189263086e-05, + "loss": 0.3544544577598572, + "step": 2002 + }, + { + "epoch": 0.5319346700305404, + "grad_norm": 1.0996284853033707, + "learning_rate": 1.756076223434203e-05, + "loss": 0.32807621359825134, + "step": 2003 + }, + { + "epoch": 0.5322002390120834, + "grad_norm": 0.9920629294688551, + "learning_rate": 1.7557887821795192e-05, + "loss": 0.3057190477848053, + "step": 2004 + }, + { + "epoch": 0.5324658079936263, + "grad_norm": 1.0234244423063892, + "learning_rate": 1.7555011952176716e-05, + "loss": 0.29419198632240295, + "step": 2005 + }, + { + "epoch": 0.5327313769751693, + "grad_norm": 0.9799120327217228, + "learning_rate": 1.755213462604104e-05, + "loss": 0.3232089877128601, + "step": 2006 + }, + { + "epoch": 0.5329969459567122, + "grad_norm": 1.0186576745896931, + "learning_rate": 1.7549255843942875e-05, + "loss": 0.29784274101257324, + "step": 2007 + }, + { + "epoch": 0.5332625149382552, + "grad_norm": 1.0470325382276877, + "learning_rate": 1.7546375606437216e-05, + "loss": 0.31421899795532227, + "step": 2008 + }, + { + "epoch": 0.5335280839197981, + "grad_norm": 1.0641694414781755, + "learning_rate": 1.7543493914079345e-05, + "loss": 0.30681121349334717, + "step": 2009 + }, + { + "epoch": 0.5337936529013412, + "grad_norm": 1.0092085906510277, + "learning_rate": 1.7540610767424813e-05, + "loss": 0.3114027976989746, + "step": 2010 + }, + { + "epoch": 0.5340592218828841, + "grad_norm": 1.0064230726553411, + "learning_rate": 1.753772616702946e-05, + "loss": 0.3030378520488739, + "step": 2011 + }, + { + "epoch": 0.5343247908644271, + "grad_norm": 1.1096181297712675, + "learning_rate": 1.75348401134494e-05, + "loss": 0.30272024869918823, + "step": 2012 + }, + { + "epoch": 0.53459035984597, + "grad_norm": 1.049795668852804, + "learning_rate": 1.7531952607241033e-05, + "loss": 0.35117241740226746, + "step": 2013 + }, + { + "epoch": 0.534855928827513, + "grad_norm": 1.2552056089457548, + "learning_rate": 1.7529063648961035e-05, + "loss": 0.297889769077301, + "step": 2014 + }, + { + "epoch": 0.5351214978090559, + "grad_norm": 1.1238332501182418, + "learning_rate": 1.752617323916636e-05, + "loss": 0.32858210802078247, + "step": 2015 + }, + { + "epoch": 0.5353870667905989, + "grad_norm": 1.117582559290418, + "learning_rate": 1.7523281378414246e-05, + "loss": 0.3095484673976898, + "step": 2016 + }, + { + "epoch": 0.5356526357721418, + "grad_norm": 1.1072331793921826, + "learning_rate": 1.752038806726222e-05, + "loss": 0.34490731358528137, + "step": 2017 + }, + { + "epoch": 0.5359182047536848, + "grad_norm": 1.1427367564985542, + "learning_rate": 1.751749330626806e-05, + "loss": 0.35144859552383423, + "step": 2018 + }, + { + "epoch": 0.5361837737352277, + "grad_norm": 1.0337528414474293, + "learning_rate": 1.751459709598985e-05, + "loss": 0.26337549090385437, + "step": 2019 + }, + { + "epoch": 0.5364493427167707, + "grad_norm": 1.0719958558069054, + "learning_rate": 1.7511699436985952e-05, + "loss": 0.3235297203063965, + "step": 2020 + }, + { + "epoch": 0.5367149116983136, + "grad_norm": 1.1655117185465573, + "learning_rate": 1.7508800329814993e-05, + "loss": 0.35195302963256836, + "step": 2021 + }, + { + "epoch": 0.5369804806798566, + "grad_norm": 1.0547432431007058, + "learning_rate": 1.7505899775035887e-05, + "loss": 0.3226467967033386, + "step": 2022 + }, + { + "epoch": 0.5372460496613995, + "grad_norm": 1.0406958245289468, + "learning_rate": 1.750299777320783e-05, + "loss": 0.30616605281829834, + "step": 2023 + }, + { + "epoch": 0.5375116186429425, + "grad_norm": 1.074902411593199, + "learning_rate": 1.7500094324890294e-05, + "loss": 0.3007400333881378, + "step": 2024 + }, + { + "epoch": 0.5377771876244855, + "grad_norm": 1.1883491645763606, + "learning_rate": 1.7497189430643025e-05, + "loss": 0.35409432649612427, + "step": 2025 + }, + { + "epoch": 0.5380427566060284, + "grad_norm": 1.6951314154408594, + "learning_rate": 1.7494283091026053e-05, + "loss": 0.33718281984329224, + "step": 2026 + }, + { + "epoch": 0.5383083255875714, + "grad_norm": 1.0940933435725269, + "learning_rate": 1.749137530659969e-05, + "loss": 0.3589650094509125, + "step": 2027 + }, + { + "epoch": 0.5385738945691143, + "grad_norm": 1.1114345705753812, + "learning_rate": 1.7488466077924525e-05, + "loss": 0.35314273834228516, + "step": 2028 + }, + { + "epoch": 0.5388394635506573, + "grad_norm": 1.017869922891923, + "learning_rate": 1.7485555405561412e-05, + "loss": 0.28393587470054626, + "step": 2029 + }, + { + "epoch": 0.5391050325322002, + "grad_norm": 1.0276825009259218, + "learning_rate": 1.7482643290071503e-05, + "loss": 0.3262496292591095, + "step": 2030 + }, + { + "epoch": 0.5393706015137432, + "grad_norm": 1.122887144479208, + "learning_rate": 1.7479729732016218e-05, + "loss": 0.3549670875072479, + "step": 2031 + }, + { + "epoch": 0.5396361704952861, + "grad_norm": 1.0211791251004596, + "learning_rate": 1.7476814731957253e-05, + "loss": 0.30668947100639343, + "step": 2032 + }, + { + "epoch": 0.5399017394768291, + "grad_norm": 0.9278865240006526, + "learning_rate": 1.747389829045659e-05, + "loss": 0.2942228317260742, + "step": 2033 + }, + { + "epoch": 0.540167308458372, + "grad_norm": 1.023956047651912, + "learning_rate": 1.7470980408076484e-05, + "loss": 0.3166583478450775, + "step": 2034 + }, + { + "epoch": 0.540432877439915, + "grad_norm": 1.1503051826481139, + "learning_rate": 1.7468061085379467e-05, + "loss": 0.35149675607681274, + "step": 2035 + }, + { + "epoch": 0.5406984464214579, + "grad_norm": 1.1081467050264138, + "learning_rate": 1.7465140322928353e-05, + "loss": 0.32645004987716675, + "step": 2036 + }, + { + "epoch": 0.5409640154030009, + "grad_norm": 1.1656339653416823, + "learning_rate": 1.7462218121286224e-05, + "loss": 0.3078027367591858, + "step": 2037 + }, + { + "epoch": 0.5412295843845439, + "grad_norm": 1.0310810248927436, + "learning_rate": 1.7459294481016452e-05, + "loss": 0.28726300597190857, + "step": 2038 + }, + { + "epoch": 0.5414951533660869, + "grad_norm": 1.028103971871598, + "learning_rate": 1.7456369402682675e-05, + "loss": 0.29330572485923767, + "step": 2039 + }, + { + "epoch": 0.5417607223476298, + "grad_norm": 1.176742297493161, + "learning_rate": 1.7453442886848818e-05, + "loss": 0.3151019215583801, + "step": 2040 + }, + { + "epoch": 0.5420262913291728, + "grad_norm": 1.0830810759861134, + "learning_rate": 1.745051493407908e-05, + "loss": 0.3267561197280884, + "step": 2041 + }, + { + "epoch": 0.5422918603107157, + "grad_norm": 1.0462822233377385, + "learning_rate": 1.7447585544937933e-05, + "loss": 0.2834410071372986, + "step": 2042 + }, + { + "epoch": 0.5425574292922587, + "grad_norm": 0.9922210453154783, + "learning_rate": 1.7444654719990128e-05, + "loss": 0.29896080493927, + "step": 2043 + }, + { + "epoch": 0.5428229982738016, + "grad_norm": 1.0716195406510356, + "learning_rate": 1.7441722459800695e-05, + "loss": 0.3084600865840912, + "step": 2044 + }, + { + "epoch": 0.5430885672553446, + "grad_norm": 1.100381998832612, + "learning_rate": 1.743878876493494e-05, + "loss": 0.3178163170814514, + "step": 2045 + }, + { + "epoch": 0.5433541362368876, + "grad_norm": 1.1512124937535644, + "learning_rate": 1.743585363595844e-05, + "loss": 0.32886385917663574, + "step": 2046 + }, + { + "epoch": 0.5436197052184305, + "grad_norm": 1.0499932799675828, + "learning_rate": 1.743291707343706e-05, + "loss": 0.31810784339904785, + "step": 2047 + }, + { + "epoch": 0.5438852741999735, + "grad_norm": 0.994229574171737, + "learning_rate": 1.7429979077936928e-05, + "loss": 0.3003198504447937, + "step": 2048 + }, + { + "epoch": 0.5441508431815164, + "grad_norm": 1.1622503660754158, + "learning_rate": 1.7427039650024462e-05, + "loss": 0.33889323472976685, + "step": 2049 + }, + { + "epoch": 0.5444164121630594, + "grad_norm": 1.062972427778211, + "learning_rate": 1.7424098790266343e-05, + "loss": 0.3238763213157654, + "step": 2050 + }, + { + "epoch": 0.5446819811446023, + "grad_norm": 1.3651581380225686, + "learning_rate": 1.742115649922954e-05, + "loss": 0.34304776787757874, + "step": 2051 + }, + { + "epoch": 0.5449475501261453, + "grad_norm": 1.1192647204238841, + "learning_rate": 1.741821277748128e-05, + "loss": 0.31528347730636597, + "step": 2052 + }, + { + "epoch": 0.5452131191076882, + "grad_norm": 1.0728286121769783, + "learning_rate": 1.7415267625589094e-05, + "loss": 0.2992726266384125, + "step": 2053 + }, + { + "epoch": 0.5454786880892312, + "grad_norm": 1.0217638219637288, + "learning_rate": 1.741232104412076e-05, + "loss": 0.31706419587135315, + "step": 2054 + }, + { + "epoch": 0.5457442570707741, + "grad_norm": 1.8373163603702176, + "learning_rate": 1.7409373033644355e-05, + "loss": 0.2887676954269409, + "step": 2055 + }, + { + "epoch": 0.5460098260523171, + "grad_norm": 1.1434290988558236, + "learning_rate": 1.740642359472821e-05, + "loss": 0.3410964906215668, + "step": 2056 + }, + { + "epoch": 0.54627539503386, + "grad_norm": 1.0501323660770627, + "learning_rate": 1.740347272794095e-05, + "loss": 0.3711693286895752, + "step": 2057 + }, + { + "epoch": 0.546540964015403, + "grad_norm": 1.10922453334831, + "learning_rate": 1.7400520433851457e-05, + "loss": 0.3512499928474426, + "step": 2058 + }, + { + "epoch": 0.5468065329969459, + "grad_norm": 1.0790222544341648, + "learning_rate": 1.739756671302891e-05, + "loss": 0.3136678636074066, + "step": 2059 + }, + { + "epoch": 0.5470721019784889, + "grad_norm": 1.0417668658369865, + "learning_rate": 1.7394611566042748e-05, + "loss": 0.2983730435371399, + "step": 2060 + }, + { + "epoch": 0.5473376709600318, + "grad_norm": 1.1233530419836393, + "learning_rate": 1.7391654993462686e-05, + "loss": 0.36603933572769165, + "step": 2061 + }, + { + "epoch": 0.5476032399415748, + "grad_norm": 1.1758952832381078, + "learning_rate": 1.7388696995858717e-05, + "loss": 0.3651789128780365, + "step": 2062 + }, + { + "epoch": 0.5478688089231177, + "grad_norm": 1.2065493864331982, + "learning_rate": 1.7385737573801108e-05, + "loss": 0.30580615997314453, + "step": 2063 + }, + { + "epoch": 0.5481343779046607, + "grad_norm": 0.981372496476623, + "learning_rate": 1.7382776727860406e-05, + "loss": 0.2630755305290222, + "step": 2064 + }, + { + "epoch": 0.5483999468862036, + "grad_norm": 1.0020540486713174, + "learning_rate": 1.7379814458607416e-05, + "loss": 0.2947537899017334, + "step": 2065 + }, + { + "epoch": 0.5486655158677467, + "grad_norm": 1.034048631807644, + "learning_rate": 1.737685076661324e-05, + "loss": 0.3119455873966217, + "step": 2066 + }, + { + "epoch": 0.5489310848492897, + "grad_norm": 1.052273536899897, + "learning_rate": 1.7373885652449237e-05, + "loss": 0.3162347972393036, + "step": 2067 + }, + { + "epoch": 0.5491966538308326, + "grad_norm": 1.2320011234530202, + "learning_rate": 1.7370919116687047e-05, + "loss": 0.34120452404022217, + "step": 2068 + }, + { + "epoch": 0.5494622228123756, + "grad_norm": 1.095244169583748, + "learning_rate": 1.7367951159898583e-05, + "loss": 0.3126780092716217, + "step": 2069 + }, + { + "epoch": 0.5497277917939185, + "grad_norm": 0.9591128480333501, + "learning_rate": 1.7364981782656033e-05, + "loss": 0.2833349406719208, + "step": 2070 + }, + { + "epoch": 0.5499933607754615, + "grad_norm": 1.0921809927618633, + "learning_rate": 1.7362010985531855e-05, + "loss": 0.31617453694343567, + "step": 2071 + }, + { + "epoch": 0.5502589297570044, + "grad_norm": 1.0809700153666713, + "learning_rate": 1.735903876909879e-05, + "loss": 0.31372442841529846, + "step": 2072 + }, + { + "epoch": 0.5505244987385474, + "grad_norm": 1.1616077591637106, + "learning_rate": 1.735606513392984e-05, + "loss": 0.3500489592552185, + "step": 2073 + }, + { + "epoch": 0.5507900677200903, + "grad_norm": 1.0373404262028456, + "learning_rate": 1.735309008059829e-05, + "loss": 0.3219031095504761, + "step": 2074 + }, + { + "epoch": 0.5510556367016333, + "grad_norm": 1.0701365395287485, + "learning_rate": 1.7350113609677694e-05, + "loss": 0.32419610023498535, + "step": 2075 + }, + { + "epoch": 0.5513212056831762, + "grad_norm": 1.1054492395059694, + "learning_rate": 1.7347135721741874e-05, + "loss": 0.34804612398147583, + "step": 2076 + }, + { + "epoch": 0.5515867746647192, + "grad_norm": 1.09814942010155, + "learning_rate": 1.7344156417364946e-05, + "loss": 0.33105939626693726, + "step": 2077 + }, + { + "epoch": 0.5518523436462621, + "grad_norm": 1.0139790776190714, + "learning_rate": 1.7341175697121273e-05, + "loss": 0.3426011800765991, + "step": 2078 + }, + { + "epoch": 0.5521179126278051, + "grad_norm": 1.1120942872149455, + "learning_rate": 1.7338193561585507e-05, + "loss": 0.33207643032073975, + "step": 2079 + }, + { + "epoch": 0.552383481609348, + "grad_norm": 0.9807946500665143, + "learning_rate": 1.7335210011332573e-05, + "loss": 0.31849467754364014, + "step": 2080 + }, + { + "epoch": 0.552649050590891, + "grad_norm": 1.081622565959563, + "learning_rate": 1.7332225046937655e-05, + "loss": 0.3549337685108185, + "step": 2081 + }, + { + "epoch": 0.5529146195724339, + "grad_norm": 0.9652343930669623, + "learning_rate": 1.7329238668976224e-05, + "loss": 0.2850857377052307, + "step": 2082 + }, + { + "epoch": 0.5531801885539769, + "grad_norm": 1.1370461672740964, + "learning_rate": 1.732625087802402e-05, + "loss": 0.3277609348297119, + "step": 2083 + }, + { + "epoch": 0.5534457575355198, + "grad_norm": 1.0712095451099939, + "learning_rate": 1.732326167465705e-05, + "loss": 0.2951444983482361, + "step": 2084 + }, + { + "epoch": 0.5537113265170628, + "grad_norm": 1.0893938459197319, + "learning_rate": 1.7320271059451597e-05, + "loss": 0.36634138226509094, + "step": 2085 + }, + { + "epoch": 0.5539768954986057, + "grad_norm": 1.060256238160636, + "learning_rate": 1.7317279032984222e-05, + "loss": 0.3407907783985138, + "step": 2086 + }, + { + "epoch": 0.5542424644801487, + "grad_norm": 1.0563310141876696, + "learning_rate": 1.7314285595831747e-05, + "loss": 0.34038978815078735, + "step": 2087 + }, + { + "epoch": 0.5545080334616916, + "grad_norm": 1.0558109709205228, + "learning_rate": 1.7311290748571273e-05, + "loss": 0.337898313999176, + "step": 2088 + }, + { + "epoch": 0.5547736024432346, + "grad_norm": 1.1543867929059073, + "learning_rate": 1.7308294491780175e-05, + "loss": 0.3250765800476074, + "step": 2089 + }, + { + "epoch": 0.5550391714247775, + "grad_norm": 1.101568217376945, + "learning_rate": 1.730529682603609e-05, + "loss": 0.31562721729278564, + "step": 2090 + }, + { + "epoch": 0.5553047404063205, + "grad_norm": 1.2678079753749867, + "learning_rate": 1.730229775191693e-05, + "loss": 0.32757896184921265, + "step": 2091 + }, + { + "epoch": 0.5555703093878634, + "grad_norm": 1.1010819086774664, + "learning_rate": 1.7299297270000894e-05, + "loss": 0.35861605405807495, + "step": 2092 + }, + { + "epoch": 0.5558358783694064, + "grad_norm": 1.0999873688088635, + "learning_rate": 1.7296295380866425e-05, + "loss": 0.3383220434188843, + "step": 2093 + }, + { + "epoch": 0.5561014473509495, + "grad_norm": 1.1431134206724336, + "learning_rate": 1.7293292085092263e-05, + "loss": 0.30144187808036804, + "step": 2094 + }, + { + "epoch": 0.5563670163324924, + "grad_norm": 1.0354659821546437, + "learning_rate": 1.72902873832574e-05, + "loss": 0.2626546323299408, + "step": 2095 + }, + { + "epoch": 0.5566325853140354, + "grad_norm": 1.0939710377386638, + "learning_rate": 1.7287281275941112e-05, + "loss": 0.3289363980293274, + "step": 2096 + }, + { + "epoch": 0.5568981542955783, + "grad_norm": 0.9797533003070389, + "learning_rate": 1.7284273763722943e-05, + "loss": 0.26631784439086914, + "step": 2097 + }, + { + "epoch": 0.5571637232771213, + "grad_norm": 1.0035421194069876, + "learning_rate": 1.7281264847182697e-05, + "loss": 0.3051939606666565, + "step": 2098 + }, + { + "epoch": 0.5574292922586642, + "grad_norm": 1.0515034870910809, + "learning_rate": 1.7278254526900468e-05, + "loss": 0.34456121921539307, + "step": 2099 + }, + { + "epoch": 0.5576948612402072, + "grad_norm": 1.2038994359149542, + "learning_rate": 1.72752428034566e-05, + "loss": 0.2747807502746582, + "step": 2100 + }, + { + "epoch": 0.5579604302217501, + "grad_norm": 2.186270123050143, + "learning_rate": 1.7272229677431723e-05, + "loss": 0.31111812591552734, + "step": 2101 + }, + { + "epoch": 0.5582259992032931, + "grad_norm": 1.0150701360001215, + "learning_rate": 1.7269215149406737e-05, + "loss": 0.29648226499557495, + "step": 2102 + }, + { + "epoch": 0.558491568184836, + "grad_norm": 0.9846402594569152, + "learning_rate": 1.72661992199628e-05, + "loss": 0.28303876519203186, + "step": 2103 + }, + { + "epoch": 0.558757137166379, + "grad_norm": 1.1069492435421613, + "learning_rate": 1.726318188968135e-05, + "loss": 0.30540165305137634, + "step": 2104 + }, + { + "epoch": 0.5590227061479219, + "grad_norm": 1.2177152582591586, + "learning_rate": 1.726016315914409e-05, + "loss": 0.31810393929481506, + "step": 2105 + }, + { + "epoch": 0.5592882751294649, + "grad_norm": 1.134577587954556, + "learning_rate": 1.7257143028933004e-05, + "loss": 0.33605068922042847, + "step": 2106 + }, + { + "epoch": 0.5595538441110078, + "grad_norm": 1.089019585879268, + "learning_rate": 1.725412149963033e-05, + "loss": 0.3340590298175812, + "step": 2107 + }, + { + "epoch": 0.5598194130925508, + "grad_norm": 0.9872121137775324, + "learning_rate": 1.7251098571818586e-05, + "loss": 0.29560500383377075, + "step": 2108 + }, + { + "epoch": 0.5600849820740937, + "grad_norm": 1.0964006197085026, + "learning_rate": 1.7248074246080555e-05, + "loss": 0.30100107192993164, + "step": 2109 + }, + { + "epoch": 0.5603505510556367, + "grad_norm": 1.1506338140671328, + "learning_rate": 1.7245048522999294e-05, + "loss": 0.35551172494888306, + "step": 2110 + }, + { + "epoch": 0.5606161200371796, + "grad_norm": 1.0513397818607815, + "learning_rate": 1.724202140315812e-05, + "loss": 0.3182663023471832, + "step": 2111 + }, + { + "epoch": 0.5608816890187226, + "grad_norm": 1.092960095111009, + "learning_rate": 1.723899288714064e-05, + "loss": 0.3160201609134674, + "step": 2112 + }, + { + "epoch": 0.5611472580002655, + "grad_norm": 1.0656744789709975, + "learning_rate": 1.72359629755307e-05, + "loss": 0.3126063942909241, + "step": 2113 + }, + { + "epoch": 0.5614128269818085, + "grad_norm": 1.0376603045942787, + "learning_rate": 1.723293166891244e-05, + "loss": 0.3222552239894867, + "step": 2114 + }, + { + "epoch": 0.5616783959633515, + "grad_norm": 1.1154320347150413, + "learning_rate": 1.722989896787026e-05, + "loss": 0.33601805567741394, + "step": 2115 + }, + { + "epoch": 0.5619439649448944, + "grad_norm": 1.0241046952841495, + "learning_rate": 1.722686487298883e-05, + "loss": 0.28679755330085754, + "step": 2116 + }, + { + "epoch": 0.5622095339264374, + "grad_norm": 0.9498185678215705, + "learning_rate": 1.722382938485308e-05, + "loss": 0.2895340323448181, + "step": 2117 + }, + { + "epoch": 0.5624751029079803, + "grad_norm": 1.3753225282493697, + "learning_rate": 1.7220792504048227e-05, + "loss": 0.310183048248291, + "step": 2118 + }, + { + "epoch": 0.5627406718895233, + "grad_norm": 0.9776305745351022, + "learning_rate": 1.7217754231159737e-05, + "loss": 0.2768586277961731, + "step": 2119 + }, + { + "epoch": 0.5630062408710662, + "grad_norm": 0.9838874956474448, + "learning_rate": 1.7214714566773358e-05, + "loss": 0.2785574793815613, + "step": 2120 + }, + { + "epoch": 0.5632718098526092, + "grad_norm": 1.1815363465765012, + "learning_rate": 1.72116735114751e-05, + "loss": 0.30544358491897583, + "step": 2121 + }, + { + "epoch": 0.5635373788341522, + "grad_norm": 1.0704755380783626, + "learning_rate": 1.7208631065851243e-05, + "loss": 0.31662559509277344, + "step": 2122 + }, + { + "epoch": 0.5638029478156952, + "grad_norm": 0.9893085866675072, + "learning_rate": 1.7205587230488335e-05, + "loss": 0.31466105580329895, + "step": 2123 + }, + { + "epoch": 0.5640685167972381, + "grad_norm": 1.1520731756820097, + "learning_rate": 1.720254200597319e-05, + "loss": 0.3471367359161377, + "step": 2124 + }, + { + "epoch": 0.5643340857787811, + "grad_norm": 1.056530578075146, + "learning_rate": 1.7199495392892892e-05, + "loss": 0.3325269818305969, + "step": 2125 + }, + { + "epoch": 0.564599654760324, + "grad_norm": 1.1040662937900534, + "learning_rate": 1.7196447391834797e-05, + "loss": 0.32423460483551025, + "step": 2126 + }, + { + "epoch": 0.564865223741867, + "grad_norm": 1.0403895710374138, + "learning_rate": 1.7193398003386514e-05, + "loss": 0.3083527088165283, + "step": 2127 + }, + { + "epoch": 0.5651307927234099, + "grad_norm": 1.1794029606730059, + "learning_rate": 1.7190347228135933e-05, + "loss": 0.3418716490268707, + "step": 2128 + }, + { + "epoch": 0.5653963617049529, + "grad_norm": 1.0509473075306943, + "learning_rate": 1.7187295066671214e-05, + "loss": 0.33037957549095154, + "step": 2129 + }, + { + "epoch": 0.5656619306864958, + "grad_norm": 1.229094630243538, + "learning_rate": 1.7184241519580767e-05, + "loss": 0.3383673131465912, + "step": 2130 + }, + { + "epoch": 0.5659274996680388, + "grad_norm": 0.9364933789266218, + "learning_rate": 1.718118658745329e-05, + "loss": 0.27756133675575256, + "step": 2131 + }, + { + "epoch": 0.5661930686495817, + "grad_norm": 1.1307081535546069, + "learning_rate": 1.717813027087773e-05, + "loss": 0.2987852692604065, + "step": 2132 + }, + { + "epoch": 0.5664586376311247, + "grad_norm": 1.0924971268375117, + "learning_rate": 1.717507257044331e-05, + "loss": 0.30016621947288513, + "step": 2133 + }, + { + "epoch": 0.5667242066126676, + "grad_norm": 1.0923612277165435, + "learning_rate": 1.7172013486739528e-05, + "loss": 0.31592345237731934, + "step": 2134 + }, + { + "epoch": 0.5669897755942106, + "grad_norm": 1.0932899901018698, + "learning_rate": 1.716895302035613e-05, + "loss": 0.3500048816204071, + "step": 2135 + }, + { + "epoch": 0.5672553445757536, + "grad_norm": 1.0529476139624208, + "learning_rate": 1.7165891171883134e-05, + "loss": 0.32069307565689087, + "step": 2136 + }, + { + "epoch": 0.5675209135572965, + "grad_norm": 1.10329279559138, + "learning_rate": 1.7162827941910837e-05, + "loss": 0.3100130558013916, + "step": 2137 + }, + { + "epoch": 0.5677864825388395, + "grad_norm": 1.080836142172887, + "learning_rate": 1.715976333102979e-05, + "loss": 0.3205985128879547, + "step": 2138 + }, + { + "epoch": 0.5680520515203824, + "grad_norm": 1.0861679281182697, + "learning_rate": 1.715669733983081e-05, + "loss": 0.3243224024772644, + "step": 2139 + }, + { + "epoch": 0.5683176205019254, + "grad_norm": 1.0818895017967487, + "learning_rate": 1.7153629968904997e-05, + "loss": 0.3278832733631134, + "step": 2140 + }, + { + "epoch": 0.5685831894834683, + "grad_norm": 0.9949896264020713, + "learning_rate": 1.7150561218843693e-05, + "loss": 0.29137033224105835, + "step": 2141 + }, + { + "epoch": 0.5688487584650113, + "grad_norm": 1.0470808838345107, + "learning_rate": 1.7147491090238516e-05, + "loss": 0.3065168857574463, + "step": 2142 + }, + { + "epoch": 0.5691143274465542, + "grad_norm": 1.0368441449557109, + "learning_rate": 1.7144419583681354e-05, + "loss": 0.3367912173271179, + "step": 2143 + }, + { + "epoch": 0.5693798964280972, + "grad_norm": 1.086220090850542, + "learning_rate": 1.7141346699764357e-05, + "loss": 0.32278239727020264, + "step": 2144 + }, + { + "epoch": 0.5696454654096401, + "grad_norm": 1.080765529331453, + "learning_rate": 1.713827243907994e-05, + "loss": 0.2887166440486908, + "step": 2145 + }, + { + "epoch": 0.5699110343911831, + "grad_norm": 1.1353258061614586, + "learning_rate": 1.713519680222079e-05, + "loss": 0.33214619755744934, + "step": 2146 + }, + { + "epoch": 0.570176603372726, + "grad_norm": 1.1145274058321384, + "learning_rate": 1.7132119789779846e-05, + "loss": 0.2865470051765442, + "step": 2147 + }, + { + "epoch": 0.570442172354269, + "grad_norm": 1.1145678631141913, + "learning_rate": 1.7129041402350317e-05, + "loss": 0.32746967673301697, + "step": 2148 + }, + { + "epoch": 0.5707077413358119, + "grad_norm": 1.0454330804264187, + "learning_rate": 1.712596164052569e-05, + "loss": 0.3029513359069824, + "step": 2149 + }, + { + "epoch": 0.570973310317355, + "grad_norm": 0.9779058393705973, + "learning_rate": 1.7122880504899698e-05, + "loss": 0.3052698075771332, + "step": 2150 + }, + { + "epoch": 0.5712388792988979, + "grad_norm": 1.055591157713499, + "learning_rate": 1.7119797996066355e-05, + "loss": 0.29221272468566895, + "step": 2151 + }, + { + "epoch": 0.5715044482804409, + "grad_norm": 1.0014263274293047, + "learning_rate": 1.711671411461993e-05, + "loss": 0.3165368139743805, + "step": 2152 + }, + { + "epoch": 0.5717700172619838, + "grad_norm": 1.0763149059705845, + "learning_rate": 1.7113628861154953e-05, + "loss": 0.30877187848091125, + "step": 2153 + }, + { + "epoch": 0.5720355862435268, + "grad_norm": 1.0826550246568385, + "learning_rate": 1.711054223626623e-05, + "loss": 0.2985781729221344, + "step": 2154 + }, + { + "epoch": 0.5723011552250697, + "grad_norm": 1.1063225967671673, + "learning_rate": 1.7107454240548825e-05, + "loss": 0.3449699878692627, + "step": 2155 + }, + { + "epoch": 0.5725667242066127, + "grad_norm": 1.0430022801820942, + "learning_rate": 1.7104364874598066e-05, + "loss": 0.3219606578350067, + "step": 2156 + }, + { + "epoch": 0.5728322931881557, + "grad_norm": 1.0017795464639185, + "learning_rate": 1.710127413900955e-05, + "loss": 0.3059350550174713, + "step": 2157 + }, + { + "epoch": 0.5730978621696986, + "grad_norm": 1.0027463566346577, + "learning_rate": 1.7098182034379132e-05, + "loss": 0.29461371898651123, + "step": 2158 + }, + { + "epoch": 0.5733634311512416, + "grad_norm": 1.0159484116581767, + "learning_rate": 1.709508856130293e-05, + "loss": 0.2998795509338379, + "step": 2159 + }, + { + "epoch": 0.5736290001327845, + "grad_norm": 1.0092216110834475, + "learning_rate": 1.7091993720377336e-05, + "loss": 0.28214582800865173, + "step": 2160 + }, + { + "epoch": 0.5738945691143275, + "grad_norm": 1.2106483053766084, + "learning_rate": 1.708889751219899e-05, + "loss": 0.3036864697933197, + "step": 2161 + }, + { + "epoch": 0.5741601380958704, + "grad_norm": 1.1139097359759478, + "learning_rate": 1.7085799937364815e-05, + "loss": 0.34146320819854736, + "step": 2162 + }, + { + "epoch": 0.5744257070774134, + "grad_norm": 1.0631963944232283, + "learning_rate": 1.708270099647198e-05, + "loss": 0.33996909856796265, + "step": 2163 + }, + { + "epoch": 0.5746912760589563, + "grad_norm": 1.0779467399705778, + "learning_rate": 1.7079600690117924e-05, + "loss": 0.3308744728565216, + "step": 2164 + }, + { + "epoch": 0.5749568450404993, + "grad_norm": 1.0447240453690412, + "learning_rate": 1.707649901890035e-05, + "loss": 0.2945587933063507, + "step": 2165 + }, + { + "epoch": 0.5752224140220422, + "grad_norm": 1.0321317558144223, + "learning_rate": 1.7073395983417227e-05, + "loss": 0.30348697304725647, + "step": 2166 + }, + { + "epoch": 0.5754879830035852, + "grad_norm": 1.025806147580304, + "learning_rate": 1.707029158426678e-05, + "loss": 0.28789055347442627, + "step": 2167 + }, + { + "epoch": 0.5757535519851281, + "grad_norm": 1.168965754707192, + "learning_rate": 1.7067185822047502e-05, + "loss": 0.3026643693447113, + "step": 2168 + }, + { + "epoch": 0.5760191209666711, + "grad_norm": 1.1108861255752682, + "learning_rate": 1.7064078697358147e-05, + "loss": 0.34021061658859253, + "step": 2169 + }, + { + "epoch": 0.576284689948214, + "grad_norm": 1.1062563353075296, + "learning_rate": 1.7060970210797735e-05, + "loss": 0.32793867588043213, + "step": 2170 + }, + { + "epoch": 0.576550258929757, + "grad_norm": 1.1692826638365306, + "learning_rate": 1.705786036296554e-05, + "loss": 0.36144691705703735, + "step": 2171 + }, + { + "epoch": 0.5768158279112999, + "grad_norm": 1.1177501875227254, + "learning_rate": 1.7054749154461105e-05, + "loss": 0.3630291223526001, + "step": 2172 + }, + { + "epoch": 0.5770813968928429, + "grad_norm": 1.144365708172633, + "learning_rate": 1.705163658588424e-05, + "loss": 0.34964969754219055, + "step": 2173 + }, + { + "epoch": 0.5773469658743858, + "grad_norm": 1.0298961015626151, + "learning_rate": 1.7048522657835004e-05, + "loss": 0.2877815067768097, + "step": 2174 + }, + { + "epoch": 0.5776125348559288, + "grad_norm": 1.1148926749607628, + "learning_rate": 1.7045407370913732e-05, + "loss": 0.3185664713382721, + "step": 2175 + }, + { + "epoch": 0.5778781038374717, + "grad_norm": 1.0393243287048395, + "learning_rate": 1.704229072572101e-05, + "loss": 0.3035257160663605, + "step": 2176 + }, + { + "epoch": 0.5781436728190147, + "grad_norm": 1.048139429574759, + "learning_rate": 1.7039172722857695e-05, + "loss": 0.325702965259552, + "step": 2177 + }, + { + "epoch": 0.5784092418005577, + "grad_norm": 1.1046410504333486, + "learning_rate": 1.7036053362924896e-05, + "loss": 0.32837462425231934, + "step": 2178 + }, + { + "epoch": 0.5786748107821007, + "grad_norm": 1.066094854816524, + "learning_rate": 1.703293264652399e-05, + "loss": 0.3430028259754181, + "step": 2179 + }, + { + "epoch": 0.5789403797636437, + "grad_norm": 1.1007701198247044, + "learning_rate": 1.702981057425662e-05, + "loss": 0.32792964577674866, + "step": 2180 + }, + { + "epoch": 0.5792059487451866, + "grad_norm": 0.9964902607677808, + "learning_rate": 1.7026687146724675e-05, + "loss": 0.3037140965461731, + "step": 2181 + }, + { + "epoch": 0.5794715177267296, + "grad_norm": 0.9962684392556416, + "learning_rate": 1.7023562364530322e-05, + "loss": 0.33083540201187134, + "step": 2182 + }, + { + "epoch": 0.5797370867082725, + "grad_norm": 0.9979777099745417, + "learning_rate": 1.702043622827598e-05, + "loss": 0.3108663260936737, + "step": 2183 + }, + { + "epoch": 0.5800026556898155, + "grad_norm": 0.9618495492417584, + "learning_rate": 1.7017308738564336e-05, + "loss": 0.2939792573451996, + "step": 2184 + }, + { + "epoch": 0.5802682246713584, + "grad_norm": 1.1315656989934186, + "learning_rate": 1.7014179895998322e-05, + "loss": 0.3686106503009796, + "step": 2185 + }, + { + "epoch": 0.5805337936529014, + "grad_norm": 1.0524191997810952, + "learning_rate": 1.7011049701181152e-05, + "loss": 0.3497159779071808, + "step": 2186 + }, + { + "epoch": 0.5807993626344443, + "grad_norm": 1.0989364128809138, + "learning_rate": 1.7007918154716286e-05, + "loss": 0.31730401515960693, + "step": 2187 + }, + { + "epoch": 0.5810649316159873, + "grad_norm": 1.0000330799865447, + "learning_rate": 1.7004785257207456e-05, + "loss": 0.3064701557159424, + "step": 2188 + }, + { + "epoch": 0.5813305005975302, + "grad_norm": 1.1111458283716926, + "learning_rate": 1.7001651009258635e-05, + "loss": 0.37174129486083984, + "step": 2189 + }, + { + "epoch": 0.5815960695790732, + "grad_norm": 1.068050904458805, + "learning_rate": 1.699851541147408e-05, + "loss": 0.3548140823841095, + "step": 2190 + }, + { + "epoch": 0.5818616385606161, + "grad_norm": 1.2340650081251097, + "learning_rate": 1.6995378464458292e-05, + "loss": 0.3486049473285675, + "step": 2191 + }, + { + "epoch": 0.5821272075421591, + "grad_norm": 1.996025853729682, + "learning_rate": 1.6992240168816037e-05, + "loss": 0.3083210587501526, + "step": 2192 + }, + { + "epoch": 0.582392776523702, + "grad_norm": 1.0284637251594817, + "learning_rate": 1.6989100525152346e-05, + "loss": 0.3006829619407654, + "step": 2193 + }, + { + "epoch": 0.582658345505245, + "grad_norm": 1.103386023825705, + "learning_rate": 1.6985959534072502e-05, + "loss": 0.32856425642967224, + "step": 2194 + }, + { + "epoch": 0.5829239144867879, + "grad_norm": 1.1293873964177752, + "learning_rate": 1.6982817196182052e-05, + "loss": 0.3382526934146881, + "step": 2195 + }, + { + "epoch": 0.5831894834683309, + "grad_norm": 1.0326113865244562, + "learning_rate": 1.69796735120868e-05, + "loss": 0.3311583399772644, + "step": 2196 + }, + { + "epoch": 0.5834550524498738, + "grad_norm": 1.0267321140886136, + "learning_rate": 1.6976528482392815e-05, + "loss": 0.312778115272522, + "step": 2197 + }, + { + "epoch": 0.5837206214314168, + "grad_norm": 1.0148067463802801, + "learning_rate": 1.697338210770642e-05, + "loss": 0.2996736466884613, + "step": 2198 + }, + { + "epoch": 0.5839861904129597, + "grad_norm": 1.1885772355333009, + "learning_rate": 1.6970234388634192e-05, + "loss": 0.344571590423584, + "step": 2199 + }, + { + "epoch": 0.5842517593945027, + "grad_norm": 0.9183671512098872, + "learning_rate": 1.6967085325782984e-05, + "loss": 0.25299468636512756, + "step": 2200 + }, + { + "epoch": 0.5845173283760456, + "grad_norm": 1.042142544774348, + "learning_rate": 1.6963934919759896e-05, + "loss": 0.3080691695213318, + "step": 2201 + }, + { + "epoch": 0.5847828973575886, + "grad_norm": 1.0216299822000434, + "learning_rate": 1.6960783171172286e-05, + "loss": 0.27491697669029236, + "step": 2202 + }, + { + "epoch": 0.5850484663391315, + "grad_norm": 1.1629234714983534, + "learning_rate": 1.6957630080627772e-05, + "loss": 0.3422500193119049, + "step": 2203 + }, + { + "epoch": 0.5853140353206745, + "grad_norm": 1.0832524871656921, + "learning_rate": 1.695447564873424e-05, + "loss": 0.27703234553337097, + "step": 2204 + }, + { + "epoch": 0.5855796043022174, + "grad_norm": 1.0275000328668338, + "learning_rate": 1.6951319876099825e-05, + "loss": 0.3088543117046356, + "step": 2205 + }, + { + "epoch": 0.5858451732837605, + "grad_norm": 1.0671359142705343, + "learning_rate": 1.694816276333292e-05, + "loss": 0.29875609278678894, + "step": 2206 + }, + { + "epoch": 0.5861107422653035, + "grad_norm": 1.0185982306074886, + "learning_rate": 1.6945004311042176e-05, + "loss": 0.30804386734962463, + "step": 2207 + }, + { + "epoch": 0.5863763112468464, + "grad_norm": 1.081134235929082, + "learning_rate": 1.694184451983651e-05, + "loss": 0.3324572741985321, + "step": 2208 + }, + { + "epoch": 0.5866418802283894, + "grad_norm": 1.0822730402391103, + "learning_rate": 1.6938683390325096e-05, + "loss": 0.30302488803863525, + "step": 2209 + }, + { + "epoch": 0.5869074492099323, + "grad_norm": 1.1499037543983048, + "learning_rate": 1.6935520923117355e-05, + "loss": 0.3264358341693878, + "step": 2210 + }, + { + "epoch": 0.5871730181914753, + "grad_norm": 1.1305858167915457, + "learning_rate": 1.693235711882298e-05, + "loss": 0.3172164261341095, + "step": 2211 + }, + { + "epoch": 0.5874385871730182, + "grad_norm": 0.9910314790510931, + "learning_rate": 1.6929191978051908e-05, + "loss": 0.300851047039032, + "step": 2212 + }, + { + "epoch": 0.5877041561545612, + "grad_norm": 1.1122516205102002, + "learning_rate": 1.6926025501414352e-05, + "loss": 0.2887764871120453, + "step": 2213 + }, + { + "epoch": 0.5879697251361041, + "grad_norm": 1.0991421920944897, + "learning_rate": 1.692285768952076e-05, + "loss": 0.3246796727180481, + "step": 2214 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1069795382063548, + "learning_rate": 1.6919688542981852e-05, + "loss": 0.30595412850379944, + "step": 2215 + }, + { + "epoch": 0.58850086309919, + "grad_norm": 1.068918741300791, + "learning_rate": 1.6916518062408604e-05, + "loss": 0.2885501980781555, + "step": 2216 + }, + { + "epoch": 0.588766432080733, + "grad_norm": 1.066918066226772, + "learning_rate": 1.6913346248412245e-05, + "loss": 0.34449082612991333, + "step": 2217 + }, + { + "epoch": 0.5890320010622759, + "grad_norm": 1.0585511422631098, + "learning_rate": 1.6910173101604267e-05, + "loss": 0.29410409927368164, + "step": 2218 + }, + { + "epoch": 0.5892975700438189, + "grad_norm": 1.1710793080996782, + "learning_rate": 1.690699862259641e-05, + "loss": 0.3250378370285034, + "step": 2219 + }, + { + "epoch": 0.5895631390253618, + "grad_norm": 1.3327292763951073, + "learning_rate": 1.690382281200068e-05, + "loss": 0.34420648217201233, + "step": 2220 + }, + { + "epoch": 0.5898287080069048, + "grad_norm": 1.1196949637967406, + "learning_rate": 1.6900645670429338e-05, + "loss": 0.33951860666275024, + "step": 2221 + }, + { + "epoch": 0.5900942769884477, + "grad_norm": 1.064177847952839, + "learning_rate": 1.6897467198494892e-05, + "loss": 0.35045644640922546, + "step": 2222 + }, + { + "epoch": 0.5903598459699907, + "grad_norm": 1.0378256375427404, + "learning_rate": 1.689428739681012e-05, + "loss": 0.3262789845466614, + "step": 2223 + }, + { + "epoch": 0.5906254149515336, + "grad_norm": 1.0662878016953237, + "learning_rate": 1.689110626598805e-05, + "loss": 0.2959234118461609, + "step": 2224 + }, + { + "epoch": 0.5908909839330766, + "grad_norm": 1.040953230887288, + "learning_rate": 1.6887923806641965e-05, + "loss": 0.3185187876224518, + "step": 2225 + }, + { + "epoch": 0.5911565529146195, + "grad_norm": 0.9754385668000993, + "learning_rate": 1.6884740019385403e-05, + "loss": 0.2861860692501068, + "step": 2226 + }, + { + "epoch": 0.5914221218961625, + "grad_norm": 1.0067160421449919, + "learning_rate": 1.6881554904832163e-05, + "loss": 0.28718897700309753, + "step": 2227 + }, + { + "epoch": 0.5916876908777055, + "grad_norm": 1.0412433017248806, + "learning_rate": 1.68783684635963e-05, + "loss": 0.2919235825538635, + "step": 2228 + }, + { + "epoch": 0.5919532598592484, + "grad_norm": 0.9981457951279066, + "learning_rate": 1.687518069629212e-05, + "loss": 0.29265689849853516, + "step": 2229 + }, + { + "epoch": 0.5922188288407914, + "grad_norm": 1.105624159979672, + "learning_rate": 1.6871991603534183e-05, + "loss": 0.3257937431335449, + "step": 2230 + }, + { + "epoch": 0.5924843978223343, + "grad_norm": 0.9776528734928177, + "learning_rate": 1.6868801185937318e-05, + "loss": 0.30709922313690186, + "step": 2231 + }, + { + "epoch": 0.5927499668038773, + "grad_norm": 1.0470693079191735, + "learning_rate": 1.6865609444116594e-05, + "loss": 0.34016695618629456, + "step": 2232 + }, + { + "epoch": 0.5930155357854202, + "grad_norm": 3.119158292180646, + "learning_rate": 1.686241637868734e-05, + "loss": 0.27988332509994507, + "step": 2233 + }, + { + "epoch": 0.5932811047669632, + "grad_norm": 1.0478488923431404, + "learning_rate": 1.685922199026514e-05, + "loss": 0.33241748809814453, + "step": 2234 + }, + { + "epoch": 0.5935466737485062, + "grad_norm": 1.131470783603603, + "learning_rate": 1.685602627946584e-05, + "loss": 0.29636645317077637, + "step": 2235 + }, + { + "epoch": 0.5938122427300492, + "grad_norm": 1.0270882549188534, + "learning_rate": 1.6852829246905532e-05, + "loss": 0.32173705101013184, + "step": 2236 + }, + { + "epoch": 0.5940778117115921, + "grad_norm": 1.0825392737706068, + "learning_rate": 1.6849630893200567e-05, + "loss": 0.318726122379303, + "step": 2237 + }, + { + "epoch": 0.5943433806931351, + "grad_norm": 1.0382165285294276, + "learning_rate": 1.684643121896755e-05, + "loss": 0.3085494339466095, + "step": 2238 + }, + { + "epoch": 0.594608949674678, + "grad_norm": 1.0527313536489507, + "learning_rate": 1.684323022482334e-05, + "loss": 0.3402160406112671, + "step": 2239 + }, + { + "epoch": 0.594874518656221, + "grad_norm": 1.0380085019224927, + "learning_rate": 1.684002791138505e-05, + "loss": 0.28099578619003296, + "step": 2240 + }, + { + "epoch": 0.5951400876377639, + "grad_norm": 1.0821564922133853, + "learning_rate": 1.6836824279270053e-05, + "loss": 0.3049670159816742, + "step": 2241 + }, + { + "epoch": 0.5954056566193069, + "grad_norm": 1.0644252940512267, + "learning_rate": 1.6833619329095966e-05, + "loss": 0.2999834716320038, + "step": 2242 + }, + { + "epoch": 0.5956712256008498, + "grad_norm": 1.0828247808996563, + "learning_rate": 1.6830413061480663e-05, + "loss": 0.2976648509502411, + "step": 2243 + }, + { + "epoch": 0.5959367945823928, + "grad_norm": 0.9516700397999099, + "learning_rate": 1.6827205477042282e-05, + "loss": 0.2937200963497162, + "step": 2244 + }, + { + "epoch": 0.5962023635639357, + "grad_norm": 0.9800041770842799, + "learning_rate": 1.6823996576399208e-05, + "loss": 0.27944231033325195, + "step": 2245 + }, + { + "epoch": 0.5964679325454787, + "grad_norm": 1.2497901059935828, + "learning_rate": 1.6820786360170073e-05, + "loss": 0.37821248173713684, + "step": 2246 + }, + { + "epoch": 0.5967335015270216, + "grad_norm": 1.0764913922139379, + "learning_rate": 1.681757482897377e-05, + "loss": 0.31929296255111694, + "step": 2247 + }, + { + "epoch": 0.5969990705085646, + "grad_norm": 1.0997353700477965, + "learning_rate": 1.6814361983429446e-05, + "loss": 0.29905542731285095, + "step": 2248 + }, + { + "epoch": 0.5972646394901076, + "grad_norm": 1.1012066663218303, + "learning_rate": 1.6811147824156503e-05, + "loss": 0.31056714057922363, + "step": 2249 + }, + { + "epoch": 0.5975302084716505, + "grad_norm": 1.0740873036211436, + "learning_rate": 1.6807932351774585e-05, + "loss": 0.3311445415019989, + "step": 2250 + }, + { + "epoch": 0.5977957774531935, + "grad_norm": 0.9539008733822649, + "learning_rate": 1.6804715566903603e-05, + "loss": 0.28413334488868713, + "step": 2251 + }, + { + "epoch": 0.5980613464347364, + "grad_norm": 1.068533794622215, + "learning_rate": 1.6801497470163717e-05, + "loss": 0.27681154012680054, + "step": 2252 + }, + { + "epoch": 0.5983269154162794, + "grad_norm": 1.0654200190327086, + "learning_rate": 1.679827806217533e-05, + "loss": 0.290216863155365, + "step": 2253 + }, + { + "epoch": 0.5985924843978223, + "grad_norm": 1.1041469834048565, + "learning_rate": 1.6795057343559115e-05, + "loss": 0.31263259053230286, + "step": 2254 + }, + { + "epoch": 0.5988580533793653, + "grad_norm": 1.126601485756597, + "learning_rate": 1.6791835314935984e-05, + "loss": 0.31527474522590637, + "step": 2255 + }, + { + "epoch": 0.5991236223609082, + "grad_norm": 1.078203294441185, + "learning_rate": 1.6788611976927104e-05, + "loss": 0.308803915977478, + "step": 2256 + }, + { + "epoch": 0.5993891913424512, + "grad_norm": 1.0503773076355036, + "learning_rate": 1.6785387330153898e-05, + "loss": 0.3038686215877533, + "step": 2257 + }, + { + "epoch": 0.5996547603239941, + "grad_norm": 1.0216209005739547, + "learning_rate": 1.6782161375238045e-05, + "loss": 0.32485973834991455, + "step": 2258 + }, + { + "epoch": 0.5999203293055371, + "grad_norm": 1.182450532742011, + "learning_rate": 1.6778934112801467e-05, + "loss": 0.32350587844848633, + "step": 2259 + }, + { + "epoch": 0.60018589828708, + "grad_norm": 1.0888151703509321, + "learning_rate": 1.6775705543466337e-05, + "loss": 0.31593745946884155, + "step": 2260 + }, + { + "epoch": 0.600451467268623, + "grad_norm": 1.0882766479814592, + "learning_rate": 1.6772475667855098e-05, + "loss": 0.3266843855381012, + "step": 2261 + }, + { + "epoch": 0.6007170362501659, + "grad_norm": 1.1815872316974045, + "learning_rate": 1.676924448659042e-05, + "loss": 0.3334394693374634, + "step": 2262 + }, + { + "epoch": 0.600982605231709, + "grad_norm": 1.1019346354795203, + "learning_rate": 1.676601200029524e-05, + "loss": 0.29688704013824463, + "step": 2263 + }, + { + "epoch": 0.6012481742132519, + "grad_norm": 1.0675092497220116, + "learning_rate": 1.6762778209592744e-05, + "loss": 0.3163599967956543, + "step": 2264 + }, + { + "epoch": 0.6015137431947949, + "grad_norm": 3.310146638883422, + "learning_rate": 1.675954311510637e-05, + "loss": 0.3001909554004669, + "step": 2265 + }, + { + "epoch": 0.6017793121763378, + "grad_norm": 1.052342150287052, + "learning_rate": 1.6756306717459804e-05, + "loss": 0.306442528963089, + "step": 2266 + }, + { + "epoch": 0.6020448811578808, + "grad_norm": 1.0462245388504205, + "learning_rate": 1.6753069017276988e-05, + "loss": 0.32714736461639404, + "step": 2267 + }, + { + "epoch": 0.6023104501394237, + "grad_norm": 1.1462408299032063, + "learning_rate": 1.6749830015182106e-05, + "loss": 0.3276352286338806, + "step": 2268 + }, + { + "epoch": 0.6025760191209667, + "grad_norm": 1.196238497855594, + "learning_rate": 1.6746589711799607e-05, + "loss": 0.3151017427444458, + "step": 2269 + }, + { + "epoch": 0.6028415881025097, + "grad_norm": 1.0342963680315473, + "learning_rate": 1.674334810775418e-05, + "loss": 0.30252715945243835, + "step": 2270 + }, + { + "epoch": 0.6031071570840526, + "grad_norm": 1.013150034994447, + "learning_rate": 1.674010520367077e-05, + "loss": 0.28994205594062805, + "step": 2271 + }, + { + "epoch": 0.6033727260655956, + "grad_norm": 1.060884408167446, + "learning_rate": 1.6736861000174566e-05, + "loss": 0.31821542978286743, + "step": 2272 + }, + { + "epoch": 0.6036382950471385, + "grad_norm": 1.0745731746159097, + "learning_rate": 1.6733615497891018e-05, + "loss": 0.33488404750823975, + "step": 2273 + }, + { + "epoch": 0.6039038640286815, + "grad_norm": 1.1687722013665731, + "learning_rate": 1.6730368697445815e-05, + "loss": 0.32545825839042664, + "step": 2274 + }, + { + "epoch": 0.6041694330102244, + "grad_norm": 1.0959659967153625, + "learning_rate": 1.6727120599464904e-05, + "loss": 0.3229105770587921, + "step": 2275 + }, + { + "epoch": 0.6044350019917674, + "grad_norm": 1.0190980223229251, + "learning_rate": 1.672387120457448e-05, + "loss": 0.29090648889541626, + "step": 2276 + }, + { + "epoch": 0.6047005709733103, + "grad_norm": 1.0135966931724694, + "learning_rate": 1.6720620513400993e-05, + "loss": 0.3102695345878601, + "step": 2277 + }, + { + "epoch": 0.6049661399548533, + "grad_norm": 0.9853472262099896, + "learning_rate": 1.6717368526571133e-05, + "loss": 0.3104533851146698, + "step": 2278 + }, + { + "epoch": 0.6052317089363962, + "grad_norm": 1.0624907138843722, + "learning_rate": 1.671411524471184e-05, + "loss": 0.3340798616409302, + "step": 2279 + }, + { + "epoch": 0.6054972779179392, + "grad_norm": 0.9362556276145145, + "learning_rate": 1.6710860668450318e-05, + "loss": 0.2807982563972473, + "step": 2280 + }, + { + "epoch": 0.6057628468994821, + "grad_norm": 1.0604829312359818, + "learning_rate": 1.6707604798414005e-05, + "loss": 0.28892064094543457, + "step": 2281 + }, + { + "epoch": 0.6060284158810251, + "grad_norm": 1.1005771261022437, + "learning_rate": 1.6704347635230594e-05, + "loss": 0.29660698771476746, + "step": 2282 + }, + { + "epoch": 0.606293984862568, + "grad_norm": 1.0826898129560842, + "learning_rate": 1.6701089179528032e-05, + "loss": 0.32079893350601196, + "step": 2283 + }, + { + "epoch": 0.606559553844111, + "grad_norm": 1.0711524337358722, + "learning_rate": 1.6697829431934508e-05, + "loss": 0.3464012145996094, + "step": 2284 + }, + { + "epoch": 0.6068251228256539, + "grad_norm": 1.113831391037599, + "learning_rate": 1.669456839307846e-05, + "loss": 0.3378494381904602, + "step": 2285 + }, + { + "epoch": 0.6070906918071969, + "grad_norm": 1.1314381443012484, + "learning_rate": 1.6691306063588583e-05, + "loss": 0.2856704294681549, + "step": 2286 + }, + { + "epoch": 0.6073562607887398, + "grad_norm": 1.117095467957477, + "learning_rate": 1.6688042444093816e-05, + "loss": 0.317970871925354, + "step": 2287 + }, + { + "epoch": 0.6076218297702828, + "grad_norm": 0.9765740214705895, + "learning_rate": 1.6684777535223338e-05, + "loss": 0.3067381978034973, + "step": 2288 + }, + { + "epoch": 0.6078873987518257, + "grad_norm": 0.9795122588790717, + "learning_rate": 1.6681511337606594e-05, + "loss": 0.28682243824005127, + "step": 2289 + }, + { + "epoch": 0.6081529677333687, + "grad_norm": 1.0967806384391572, + "learning_rate": 1.667824385187327e-05, + "loss": 0.30516478419303894, + "step": 2290 + }, + { + "epoch": 0.6084185367149118, + "grad_norm": 1.2090889717256932, + "learning_rate": 1.6674975078653284e-05, + "loss": 0.3114034831523895, + "step": 2291 + }, + { + "epoch": 0.6086841056964547, + "grad_norm": 1.045779035897072, + "learning_rate": 1.6671705018576837e-05, + "loss": 0.3119916617870331, + "step": 2292 + }, + { + "epoch": 0.6089496746779977, + "grad_norm": 1.0110290976394836, + "learning_rate": 1.666843367227434e-05, + "loss": 0.2695278823375702, + "step": 2293 + }, + { + "epoch": 0.6092152436595406, + "grad_norm": 1.1042693591067085, + "learning_rate": 1.6665161040376483e-05, + "loss": 0.32162508368492126, + "step": 2294 + }, + { + "epoch": 0.6094808126410836, + "grad_norm": 1.1533266295102853, + "learning_rate": 1.6661887123514183e-05, + "loss": 0.3115222752094269, + "step": 2295 + }, + { + "epoch": 0.6097463816226265, + "grad_norm": 1.1903173397636237, + "learning_rate": 1.6658611922318618e-05, + "loss": 0.3239362835884094, + "step": 2296 + }, + { + "epoch": 0.6100119506041695, + "grad_norm": 1.0224008240467277, + "learning_rate": 1.66553354374212e-05, + "loss": 0.29716256260871887, + "step": 2297 + }, + { + "epoch": 0.6102775195857124, + "grad_norm": 1.1579823586849616, + "learning_rate": 1.6652057669453606e-05, + "loss": 0.3337557911872864, + "step": 2298 + }, + { + "epoch": 0.6105430885672554, + "grad_norm": 1.0726602627394455, + "learning_rate": 1.6648778619047747e-05, + "loss": 0.30258649587631226, + "step": 2299 + }, + { + "epoch": 0.6108086575487983, + "grad_norm": 1.0836532202857172, + "learning_rate": 1.6645498286835784e-05, + "loss": 0.3151426315307617, + "step": 2300 + }, + { + "epoch": 0.6110742265303413, + "grad_norm": 0.9639622977001232, + "learning_rate": 1.664221667345013e-05, + "loss": 0.274954617023468, + "step": 2301 + }, + { + "epoch": 0.6113397955118842, + "grad_norm": 1.0454921478368049, + "learning_rate": 1.6638933779523437e-05, + "loss": 0.3055363893508911, + "step": 2302 + }, + { + "epoch": 0.6116053644934272, + "grad_norm": 1.0132221767482874, + "learning_rate": 1.663564960568861e-05, + "loss": 0.30296921730041504, + "step": 2303 + }, + { + "epoch": 0.6118709334749701, + "grad_norm": 1.0766188111034134, + "learning_rate": 1.66323641525788e-05, + "loss": 0.3118343651294708, + "step": 2304 + }, + { + "epoch": 0.6121365024565131, + "grad_norm": 1.164685781665666, + "learning_rate": 1.6629077420827405e-05, + "loss": 0.3277447819709778, + "step": 2305 + }, + { + "epoch": 0.612402071438056, + "grad_norm": 1.11996036014055, + "learning_rate": 1.6625789411068063e-05, + "loss": 0.307643860578537, + "step": 2306 + }, + { + "epoch": 0.612667640419599, + "grad_norm": 1.0752891079202938, + "learning_rate": 1.6622500123934665e-05, + "loss": 0.3043777346611023, + "step": 2307 + }, + { + "epoch": 0.6129332094011419, + "grad_norm": 1.1229566611504027, + "learning_rate": 1.6619209560061352e-05, + "loss": 0.28634852170944214, + "step": 2308 + }, + { + "epoch": 0.6131987783826849, + "grad_norm": 1.1746890844036781, + "learning_rate": 1.6615917720082503e-05, + "loss": 0.33200016617774963, + "step": 2309 + }, + { + "epoch": 0.6134643473642278, + "grad_norm": 1.0620493011215435, + "learning_rate": 1.661262460463274e-05, + "loss": 0.26568055152893066, + "step": 2310 + }, + { + "epoch": 0.6137299163457708, + "grad_norm": 1.0408157138123326, + "learning_rate": 1.6609330214346945e-05, + "loss": 0.2772855758666992, + "step": 2311 + }, + { + "epoch": 0.6139954853273137, + "grad_norm": 1.2060076126932109, + "learning_rate": 1.6606034549860236e-05, + "loss": 0.3330409824848175, + "step": 2312 + }, + { + "epoch": 0.6142610543088567, + "grad_norm": 1.0235644562455184, + "learning_rate": 1.6602737611807975e-05, + "loss": 0.27702978253364563, + "step": 2313 + }, + { + "epoch": 0.6145266232903996, + "grad_norm": 1.1266755606893777, + "learning_rate": 1.6599439400825775e-05, + "loss": 0.29985183477401733, + "step": 2314 + }, + { + "epoch": 0.6147921922719426, + "grad_norm": 1.0266522277907775, + "learning_rate": 1.659613991754949e-05, + "loss": 0.2666100859642029, + "step": 2315 + }, + { + "epoch": 0.6150577612534855, + "grad_norm": 1.0676553477298287, + "learning_rate": 1.6592839162615223e-05, + "loss": 0.2968613803386688, + "step": 2316 + }, + { + "epoch": 0.6153233302350285, + "grad_norm": 1.26155090118547, + "learning_rate": 1.6589537136659326e-05, + "loss": 0.2693714499473572, + "step": 2317 + }, + { + "epoch": 0.6155888992165715, + "grad_norm": 1.1411779960646509, + "learning_rate": 1.658623384031838e-05, + "loss": 0.3192713260650635, + "step": 2318 + }, + { + "epoch": 0.6158544681981145, + "grad_norm": 1.099028639770974, + "learning_rate": 1.658292927422923e-05, + "loss": 0.2958469092845917, + "step": 2319 + }, + { + "epoch": 0.6161200371796575, + "grad_norm": 1.0613129939040433, + "learning_rate": 1.657962343902895e-05, + "loss": 0.28580743074417114, + "step": 2320 + }, + { + "epoch": 0.6163856061612004, + "grad_norm": 1.2105545865052383, + "learning_rate": 1.6576316335354875e-05, + "loss": 0.34325680136680603, + "step": 2321 + }, + { + "epoch": 0.6166511751427434, + "grad_norm": 1.076014963599046, + "learning_rate": 1.657300796384457e-05, + "loss": 0.3220894932746887, + "step": 2322 + }, + { + "epoch": 0.6169167441242863, + "grad_norm": 1.003861259990267, + "learning_rate": 1.656969832513585e-05, + "loss": 0.2934642434120178, + "step": 2323 + }, + { + "epoch": 0.6171823131058293, + "grad_norm": 1.0182182491222724, + "learning_rate": 1.656638741986677e-05, + "loss": 0.3066999912261963, + "step": 2324 + }, + { + "epoch": 0.6174478820873722, + "grad_norm": 1.0780285957414313, + "learning_rate": 1.6563075248675645e-05, + "loss": 0.2947896122932434, + "step": 2325 + }, + { + "epoch": 0.6177134510689152, + "grad_norm": 1.1567241875430703, + "learning_rate": 1.6559761812201018e-05, + "loss": 0.33616161346435547, + "step": 2326 + }, + { + "epoch": 0.6179790200504581, + "grad_norm": 1.0754490235924812, + "learning_rate": 1.6556447111081678e-05, + "loss": 0.29555875062942505, + "step": 2327 + }, + { + "epoch": 0.6182445890320011, + "grad_norm": 1.0070791342344025, + "learning_rate": 1.655313114595666e-05, + "loss": 0.276498019695282, + "step": 2328 + }, + { + "epoch": 0.618510158013544, + "grad_norm": 1.0894248364537533, + "learning_rate": 1.6549813917465242e-05, + "loss": 0.3081165552139282, + "step": 2329 + }, + { + "epoch": 0.618775726995087, + "grad_norm": 1.2153046006588315, + "learning_rate": 1.654649542624695e-05, + "loss": 0.3610053062438965, + "step": 2330 + }, + { + "epoch": 0.6190412959766299, + "grad_norm": 1.0676492266011808, + "learning_rate": 1.654317567294155e-05, + "loss": 0.2775106430053711, + "step": 2331 + }, + { + "epoch": 0.6193068649581729, + "grad_norm": 4.371469554540211, + "learning_rate": 1.653985465818905e-05, + "loss": 0.2915893793106079, + "step": 2332 + }, + { + "epoch": 0.6195724339397158, + "grad_norm": 1.0032536414224313, + "learning_rate": 1.6536532382629696e-05, + "loss": 0.30868977308273315, + "step": 2333 + }, + { + "epoch": 0.6198380029212588, + "grad_norm": 1.1011191125099704, + "learning_rate": 1.6533208846903996e-05, + "loss": 0.3083038330078125, + "step": 2334 + }, + { + "epoch": 0.6201035719028017, + "grad_norm": 0.9895882037041855, + "learning_rate": 1.652988405165268e-05, + "loss": 0.25192466378211975, + "step": 2335 + }, + { + "epoch": 0.6203691408843447, + "grad_norm": 1.1020677364796136, + "learning_rate": 1.6526557997516737e-05, + "loss": 0.32154130935668945, + "step": 2336 + }, + { + "epoch": 0.6206347098658876, + "grad_norm": 1.1174587266065723, + "learning_rate": 1.6523230685137382e-05, + "loss": 0.2860945165157318, + "step": 2337 + }, + { + "epoch": 0.6209002788474306, + "grad_norm": 1.1647384960602913, + "learning_rate": 1.6519902115156084e-05, + "loss": 0.3279789984226227, + "step": 2338 + }, + { + "epoch": 0.6211658478289735, + "grad_norm": 1.062678685453679, + "learning_rate": 1.6516572288214555e-05, + "loss": 0.3082200884819031, + "step": 2339 + }, + { + "epoch": 0.6214314168105165, + "grad_norm": 1.1253285275737313, + "learning_rate": 1.6513241204954745e-05, + "loss": 0.29032304883003235, + "step": 2340 + }, + { + "epoch": 0.6216969857920595, + "grad_norm": 1.004918906125766, + "learning_rate": 1.6509908866018843e-05, + "loss": 0.3096848130226135, + "step": 2341 + }, + { + "epoch": 0.6219625547736024, + "grad_norm": 1.021047856460921, + "learning_rate": 1.6506575272049294e-05, + "loss": 0.309989333152771, + "step": 2342 + }, + { + "epoch": 0.6222281237551454, + "grad_norm": 1.119097166323709, + "learning_rate": 1.6503240423688768e-05, + "loss": 0.311350554227829, + "step": 2343 + }, + { + "epoch": 0.6224936927366883, + "grad_norm": 1.0659510240862446, + "learning_rate": 1.6499904321580187e-05, + "loss": 0.3313952386379242, + "step": 2344 + }, + { + "epoch": 0.6227592617182313, + "grad_norm": 1.0702797293760455, + "learning_rate": 1.649656696636671e-05, + "loss": 0.2984781265258789, + "step": 2345 + }, + { + "epoch": 0.6230248306997742, + "grad_norm": 1.0312282361562104, + "learning_rate": 1.6493228358691748e-05, + "loss": 0.3058238625526428, + "step": 2346 + }, + { + "epoch": 0.6232903996813173, + "grad_norm": 1.0462474005488736, + "learning_rate": 1.6489888499198935e-05, + "loss": 0.33439138531684875, + "step": 2347 + }, + { + "epoch": 0.6235559686628602, + "grad_norm": 1.0386002000588619, + "learning_rate": 1.6486547388532157e-05, + "loss": 0.2883133292198181, + "step": 2348 + }, + { + "epoch": 0.6238215376444032, + "grad_norm": 0.9997410916606129, + "learning_rate": 1.648320502733555e-05, + "loss": 0.30258435010910034, + "step": 2349 + }, + { + "epoch": 0.6240871066259461, + "grad_norm": 1.0226158069339855, + "learning_rate": 1.6479861416253476e-05, + "loss": 0.316353440284729, + "step": 2350 + }, + { + "epoch": 0.6243526756074891, + "grad_norm": 1.0638089423798769, + "learning_rate": 1.647651655593054e-05, + "loss": 0.3230556547641754, + "step": 2351 + }, + { + "epoch": 0.624618244589032, + "grad_norm": 1.2043111611037318, + "learning_rate": 1.6473170447011593e-05, + "loss": 0.3327128291130066, + "step": 2352 + }, + { + "epoch": 0.624883813570575, + "grad_norm": 1.081123131766037, + "learning_rate": 1.6469823090141733e-05, + "loss": 0.3152993619441986, + "step": 2353 + }, + { + "epoch": 0.6251493825521179, + "grad_norm": 1.0655193061859811, + "learning_rate": 1.6466474485966286e-05, + "loss": 0.26792511343955994, + "step": 2354 + }, + { + "epoch": 0.6254149515336609, + "grad_norm": 1.121022507517606, + "learning_rate": 1.6463124635130824e-05, + "loss": 0.31665652990341187, + "step": 2355 + }, + { + "epoch": 0.6256805205152038, + "grad_norm": 1.0108098757868682, + "learning_rate": 1.645977353828115e-05, + "loss": 0.29573655128479004, + "step": 2356 + }, + { + "epoch": 0.6259460894967468, + "grad_norm": 1.0973823257435635, + "learning_rate": 1.6456421196063334e-05, + "loss": 0.3210436999797821, + "step": 2357 + }, + { + "epoch": 0.6262116584782897, + "grad_norm": 1.2424369194288305, + "learning_rate": 1.6453067609123656e-05, + "loss": 0.2837316691875458, + "step": 2358 + }, + { + "epoch": 0.6264772274598327, + "grad_norm": 1.0217734190114693, + "learning_rate": 1.6449712778108645e-05, + "loss": 0.2885812520980835, + "step": 2359 + }, + { + "epoch": 0.6267427964413756, + "grad_norm": 1.1369177274860889, + "learning_rate": 1.6446356703665078e-05, + "loss": 0.34908249974250793, + "step": 2360 + }, + { + "epoch": 0.6270083654229186, + "grad_norm": 0.9942151080492051, + "learning_rate": 1.6442999386439967e-05, + "loss": 0.30398470163345337, + "step": 2361 + }, + { + "epoch": 0.6272739344044616, + "grad_norm": 0.9838105681310805, + "learning_rate": 1.6439640827080565e-05, + "loss": 0.2780487537384033, + "step": 2362 + }, + { + "epoch": 0.6275395033860045, + "grad_norm": 0.956534505955689, + "learning_rate": 1.6436281026234357e-05, + "loss": 0.2575770616531372, + "step": 2363 + }, + { + "epoch": 0.6278050723675475, + "grad_norm": 0.9675911826739493, + "learning_rate": 1.6432919984549077e-05, + "loss": 0.2888547480106354, + "step": 2364 + }, + { + "epoch": 0.6280706413490904, + "grad_norm": 1.2303845977564731, + "learning_rate": 1.6429557702672694e-05, + "loss": 0.3259009122848511, + "step": 2365 + }, + { + "epoch": 0.6283362103306334, + "grad_norm": 1.3923197622537806, + "learning_rate": 1.6426194181253415e-05, + "loss": 0.2899959683418274, + "step": 2366 + }, + { + "epoch": 0.6286017793121763, + "grad_norm": 1.058685915432802, + "learning_rate": 1.6422829420939688e-05, + "loss": 0.28471851348876953, + "step": 2367 + }, + { + "epoch": 0.6288673482937193, + "grad_norm": 1.0822140266216713, + "learning_rate": 1.64194634223802e-05, + "loss": 0.2958947420120239, + "step": 2368 + }, + { + "epoch": 0.6291329172752622, + "grad_norm": 1.1251439755337522, + "learning_rate": 1.6416096186223872e-05, + "loss": 0.3089750111103058, + "step": 2369 + }, + { + "epoch": 0.6293984862568052, + "grad_norm": 1.0517657351777636, + "learning_rate": 1.641272771311987e-05, + "loss": 0.31597089767456055, + "step": 2370 + }, + { + "epoch": 0.6296640552383481, + "grad_norm": 1.237586073778816, + "learning_rate": 1.6409358003717598e-05, + "loss": 0.2968488931655884, + "step": 2371 + }, + { + "epoch": 0.6299296242198911, + "grad_norm": 1.0062603647307793, + "learning_rate": 1.6405987058666694e-05, + "loss": 0.27532660961151123, + "step": 2372 + }, + { + "epoch": 0.630195193201434, + "grad_norm": 1.0061271713511417, + "learning_rate": 1.6402614878617037e-05, + "loss": 0.2800731956958771, + "step": 2373 + }, + { + "epoch": 0.630460762182977, + "grad_norm": 1.0867786948587836, + "learning_rate": 1.6399241464218744e-05, + "loss": 0.31728652119636536, + "step": 2374 + }, + { + "epoch": 0.63072633116452, + "grad_norm": 1.0634834793994077, + "learning_rate": 1.6395866816122167e-05, + "loss": 0.2776367664337158, + "step": 2375 + }, + { + "epoch": 0.630991900146063, + "grad_norm": 1.2696308030410766, + "learning_rate": 1.63924909349779e-05, + "loss": 0.3308418095111847, + "step": 2376 + }, + { + "epoch": 0.6312574691276059, + "grad_norm": 1.027144235831433, + "learning_rate": 1.6389113821436775e-05, + "loss": 0.31589487195014954, + "step": 2377 + }, + { + "epoch": 0.6315230381091489, + "grad_norm": 0.9983142729953255, + "learning_rate": 1.6385735476149855e-05, + "loss": 0.27181899547576904, + "step": 2378 + }, + { + "epoch": 0.6317886070906918, + "grad_norm": 1.0656862561919935, + "learning_rate": 1.638235589976845e-05, + "loss": 0.2603747546672821, + "step": 2379 + }, + { + "epoch": 0.6320541760722348, + "grad_norm": 1.0543823342651422, + "learning_rate": 1.63789750929441e-05, + "loss": 0.29050707817077637, + "step": 2380 + }, + { + "epoch": 0.6323197450537777, + "grad_norm": 1.0310549396867945, + "learning_rate": 1.6375593056328586e-05, + "loss": 0.2979413866996765, + "step": 2381 + }, + { + "epoch": 0.6325853140353207, + "grad_norm": 1.0460005843129836, + "learning_rate": 1.6372209790573926e-05, + "loss": 0.30875420570373535, + "step": 2382 + }, + { + "epoch": 0.6328508830168637, + "grad_norm": 0.9698416111844145, + "learning_rate": 1.6368825296332366e-05, + "loss": 0.2755935788154602, + "step": 2383 + }, + { + "epoch": 0.6331164519984066, + "grad_norm": 1.1336778567410772, + "learning_rate": 1.6365439574256406e-05, + "loss": 0.3459136486053467, + "step": 2384 + }, + { + "epoch": 0.6333820209799496, + "grad_norm": 1.116018329054477, + "learning_rate": 1.6362052624998767e-05, + "loss": 0.29043829441070557, + "step": 2385 + }, + { + "epoch": 0.6336475899614925, + "grad_norm": 1.123039696178655, + "learning_rate": 1.635866444921242e-05, + "loss": 0.321551114320755, + "step": 2386 + }, + { + "epoch": 0.6339131589430355, + "grad_norm": 1.0451682936950502, + "learning_rate": 1.6355275047550553e-05, + "loss": 0.28478139638900757, + "step": 2387 + }, + { + "epoch": 0.6341787279245784, + "grad_norm": 1.060617338056141, + "learning_rate": 1.6351884420666616e-05, + "loss": 0.30913087725639343, + "step": 2388 + }, + { + "epoch": 0.6344442969061214, + "grad_norm": 1.0996519301974148, + "learning_rate": 1.6348492569214275e-05, + "loss": 0.328342467546463, + "step": 2389 + }, + { + "epoch": 0.6347098658876643, + "grad_norm": 1.0657562962668374, + "learning_rate": 1.634509949384744e-05, + "loss": 0.3291119933128357, + "step": 2390 + }, + { + "epoch": 0.6349754348692073, + "grad_norm": 1.0805286951038287, + "learning_rate": 1.6341705195220257e-05, + "loss": 0.3542378544807434, + "step": 2391 + }, + { + "epoch": 0.6352410038507502, + "grad_norm": 1.1387422668526126, + "learning_rate": 1.63383096739871e-05, + "loss": 0.3167935609817505, + "step": 2392 + }, + { + "epoch": 0.6355065728322932, + "grad_norm": 0.9614211236141011, + "learning_rate": 1.63349129308026e-05, + "loss": 0.27623263001441956, + "step": 2393 + }, + { + "epoch": 0.6357721418138361, + "grad_norm": 1.1351525352268206, + "learning_rate": 1.6331514966321596e-05, + "loss": 0.3615761399269104, + "step": 2394 + }, + { + "epoch": 0.6360377107953791, + "grad_norm": 1.1430561223010627, + "learning_rate": 1.632811578119918e-05, + "loss": 0.3503292500972748, + "step": 2395 + }, + { + "epoch": 0.636303279776922, + "grad_norm": 1.0400637290516392, + "learning_rate": 1.6324715376090673e-05, + "loss": 0.2994767129421234, + "step": 2396 + }, + { + "epoch": 0.636568848758465, + "grad_norm": 1.2836743734514182, + "learning_rate": 1.6321313751651638e-05, + "loss": 0.29903143644332886, + "step": 2397 + }, + { + "epoch": 0.6368344177400079, + "grad_norm": 1.0273086079776361, + "learning_rate": 1.6317910908537865e-05, + "loss": 0.310536652803421, + "step": 2398 + }, + { + "epoch": 0.6370999867215509, + "grad_norm": 1.2820707601171073, + "learning_rate": 1.6314506847405382e-05, + "loss": 0.32584354281425476, + "step": 2399 + }, + { + "epoch": 0.6373655557030938, + "grad_norm": 1.186095937719991, + "learning_rate": 1.6311101568910448e-05, + "loss": 0.3536352217197418, + "step": 2400 + }, + { + "epoch": 0.6376311246846368, + "grad_norm": 1.0361661707144088, + "learning_rate": 1.6307695073709565e-05, + "loss": 0.3198434114456177, + "step": 2401 + }, + { + "epoch": 0.6378966936661797, + "grad_norm": 0.8809138916670839, + "learning_rate": 1.6304287362459462e-05, + "loss": 0.264182448387146, + "step": 2402 + }, + { + "epoch": 0.6381622626477228, + "grad_norm": 1.0526335869529386, + "learning_rate": 1.6300878435817115e-05, + "loss": 0.31182044744491577, + "step": 2403 + }, + { + "epoch": 0.6384278316292658, + "grad_norm": 1.0495886453587215, + "learning_rate": 1.6297468294439708e-05, + "loss": 0.28221404552459717, + "step": 2404 + }, + { + "epoch": 0.6386934006108087, + "grad_norm": 1.0211141314743026, + "learning_rate": 1.6294056938984693e-05, + "loss": 0.27788785099983215, + "step": 2405 + }, + { + "epoch": 0.6389589695923517, + "grad_norm": 1.068610455564362, + "learning_rate": 1.6290644370109728e-05, + "loss": 0.3300796151161194, + "step": 2406 + }, + { + "epoch": 0.6392245385738946, + "grad_norm": 1.0949996094795582, + "learning_rate": 1.628723058847272e-05, + "loss": 0.32170963287353516, + "step": 2407 + }, + { + "epoch": 0.6394901075554376, + "grad_norm": 1.1320309851276869, + "learning_rate": 1.628381559473181e-05, + "loss": 0.3243589997291565, + "step": 2408 + }, + { + "epoch": 0.6397556765369805, + "grad_norm": 1.4458945786524546, + "learning_rate": 1.6280399389545358e-05, + "loss": 0.311046838760376, + "step": 2409 + }, + { + "epoch": 0.6400212455185235, + "grad_norm": 1.0237689913585555, + "learning_rate": 1.6276981973571973e-05, + "loss": 0.2642543911933899, + "step": 2410 + }, + { + "epoch": 0.6402868145000664, + "grad_norm": 1.1424399755044237, + "learning_rate": 1.62735633474705e-05, + "loss": 0.3593730926513672, + "step": 2411 + }, + { + "epoch": 0.6405523834816094, + "grad_norm": 1.1145611429504636, + "learning_rate": 1.62701435119e-05, + "loss": 0.3147425353527069, + "step": 2412 + }, + { + "epoch": 0.6408179524631523, + "grad_norm": 1.1400749315540035, + "learning_rate": 1.6266722467519783e-05, + "loss": 0.32639142870903015, + "step": 2413 + }, + { + "epoch": 0.6410835214446953, + "grad_norm": 1.1011849489387644, + "learning_rate": 1.626330021498938e-05, + "loss": 0.32113659381866455, + "step": 2414 + }, + { + "epoch": 0.6413490904262382, + "grad_norm": 1.0371621680767618, + "learning_rate": 1.6259876754968568e-05, + "loss": 0.3188290297985077, + "step": 2415 + }, + { + "epoch": 0.6416146594077812, + "grad_norm": 1.076893351246201, + "learning_rate": 1.625645208811734e-05, + "loss": 0.3145543932914734, + "step": 2416 + }, + { + "epoch": 0.6418802283893241, + "grad_norm": 1.1368093372185335, + "learning_rate": 1.6253026215095943e-05, + "loss": 0.30433323979377747, + "step": 2417 + }, + { + "epoch": 0.6421457973708671, + "grad_norm": 1.1042321396184265, + "learning_rate": 1.6249599136564837e-05, + "loss": 0.30946728587150574, + "step": 2418 + }, + { + "epoch": 0.64241136635241, + "grad_norm": 0.991248414026241, + "learning_rate": 1.6246170853184726e-05, + "loss": 0.26245906949043274, + "step": 2419 + }, + { + "epoch": 0.642676935333953, + "grad_norm": 1.1213671588278835, + "learning_rate": 1.624274136561654e-05, + "loss": 0.31468862295150757, + "step": 2420 + }, + { + "epoch": 0.6429425043154959, + "grad_norm": 1.0200744973975597, + "learning_rate": 1.6239310674521443e-05, + "loss": 0.28946155309677124, + "step": 2421 + }, + { + "epoch": 0.6432080732970389, + "grad_norm": 1.1088143851501708, + "learning_rate": 1.6235878780560835e-05, + "loss": 0.26272106170654297, + "step": 2422 + }, + { + "epoch": 0.6434736422785818, + "grad_norm": 1.1185700160494145, + "learning_rate": 1.6232445684396347e-05, + "loss": 0.3094574213027954, + "step": 2423 + }, + { + "epoch": 0.6437392112601248, + "grad_norm": 0.9377280048944331, + "learning_rate": 1.6229011386689832e-05, + "loss": 0.2503833770751953, + "step": 2424 + }, + { + "epoch": 0.6440047802416677, + "grad_norm": 0.9657663244207705, + "learning_rate": 1.6225575888103387e-05, + "loss": 0.2655009627342224, + "step": 2425 + }, + { + "epoch": 0.6442703492232107, + "grad_norm": 1.123117061290067, + "learning_rate": 1.6222139189299336e-05, + "loss": 0.2819611728191376, + "step": 2426 + }, + { + "epoch": 0.6445359182047536, + "grad_norm": 1.0859641118248262, + "learning_rate": 1.6218701290940232e-05, + "loss": 0.2956068217754364, + "step": 2427 + }, + { + "epoch": 0.6448014871862966, + "grad_norm": 1.2445728810553593, + "learning_rate": 1.6215262193688862e-05, + "loss": 0.3330997824668884, + "step": 2428 + }, + { + "epoch": 0.6450670561678395, + "grad_norm": 1.0073602881165937, + "learning_rate": 1.6211821898208242e-05, + "loss": 0.25897055864334106, + "step": 2429 + }, + { + "epoch": 0.6453326251493825, + "grad_norm": 1.1228221759016932, + "learning_rate": 1.6208380405161623e-05, + "loss": 0.3119947016239166, + "step": 2430 + }, + { + "epoch": 0.6455981941309256, + "grad_norm": 1.143631742936843, + "learning_rate": 1.6204937715212482e-05, + "loss": 0.30833956599235535, + "step": 2431 + }, + { + "epoch": 0.6458637631124685, + "grad_norm": 1.1584271404994573, + "learning_rate": 1.620149382902453e-05, + "loss": 0.2935214638710022, + "step": 2432 + }, + { + "epoch": 0.6461293320940115, + "grad_norm": 1.6063755788258844, + "learning_rate": 1.619804874726171e-05, + "loss": 0.24297356605529785, + "step": 2433 + }, + { + "epoch": 0.6463949010755544, + "grad_norm": 1.14218339304969, + "learning_rate": 1.6194602470588186e-05, + "loss": 0.319774866104126, + "step": 2434 + }, + { + "epoch": 0.6466604700570974, + "grad_norm": 1.1751618225153557, + "learning_rate": 1.6191154999668368e-05, + "loss": 0.29197463393211365, + "step": 2435 + }, + { + "epoch": 0.6469260390386403, + "grad_norm": 1.1008916130088804, + "learning_rate": 1.6187706335166882e-05, + "loss": 0.2939727306365967, + "step": 2436 + }, + { + "epoch": 0.6471916080201833, + "grad_norm": 1.0935449463761302, + "learning_rate": 1.6184256477748595e-05, + "loss": 0.2941162586212158, + "step": 2437 + }, + { + "epoch": 0.6474571770017262, + "grad_norm": 1.1336931987797143, + "learning_rate": 1.6180805428078593e-05, + "loss": 0.2823144197463989, + "step": 2438 + }, + { + "epoch": 0.6477227459832692, + "grad_norm": 1.0912252779984561, + "learning_rate": 1.61773531868222e-05, + "loss": 0.30048274993896484, + "step": 2439 + }, + { + "epoch": 0.6479883149648121, + "grad_norm": 1.183044095349839, + "learning_rate": 1.617389975464497e-05, + "loss": 0.30927354097366333, + "step": 2440 + }, + { + "epoch": 0.6482538839463551, + "grad_norm": 1.166570736507726, + "learning_rate": 1.6170445132212678e-05, + "loss": 0.34835004806518555, + "step": 2441 + }, + { + "epoch": 0.648519452927898, + "grad_norm": 1.0325781129961564, + "learning_rate": 1.616698932019134e-05, + "loss": 0.2890225648880005, + "step": 2442 + }, + { + "epoch": 0.648785021909441, + "grad_norm": 1.1182329319338478, + "learning_rate": 1.6163532319247195e-05, + "loss": 0.31410521268844604, + "step": 2443 + }, + { + "epoch": 0.6490505908909839, + "grad_norm": 0.9213656240638256, + "learning_rate": 1.616007413004671e-05, + "loss": 0.267375111579895, + "step": 2444 + }, + { + "epoch": 0.6493161598725269, + "grad_norm": 1.1587177777274813, + "learning_rate": 1.6156614753256583e-05, + "loss": 0.3300023376941681, + "step": 2445 + }, + { + "epoch": 0.6495817288540698, + "grad_norm": 1.0295072511714587, + "learning_rate": 1.615315418954374e-05, + "loss": 0.2822847366333008, + "step": 2446 + }, + { + "epoch": 0.6498472978356128, + "grad_norm": 1.1626615137060834, + "learning_rate": 1.6149692439575348e-05, + "loss": 0.3093401789665222, + "step": 2447 + }, + { + "epoch": 0.6501128668171557, + "grad_norm": 1.0475923101386018, + "learning_rate": 1.6146229504018777e-05, + "loss": 0.2892506718635559, + "step": 2448 + }, + { + "epoch": 0.6503784357986987, + "grad_norm": 0.9972012319936079, + "learning_rate": 1.6142765383541643e-05, + "loss": 0.2805558741092682, + "step": 2449 + }, + { + "epoch": 0.6506440047802416, + "grad_norm": 1.0535842654025462, + "learning_rate": 1.6139300078811794e-05, + "loss": 0.29852935671806335, + "step": 2450 + }, + { + "epoch": 0.6509095737617846, + "grad_norm": 1.193949473615032, + "learning_rate": 1.6135833590497295e-05, + "loss": 0.3567991256713867, + "step": 2451 + }, + { + "epoch": 0.6511751427433276, + "grad_norm": 1.1265709697559396, + "learning_rate": 1.6132365919266442e-05, + "loss": 0.29564782977104187, + "step": 2452 + }, + { + "epoch": 0.6514407117248705, + "grad_norm": 1.011180050217134, + "learning_rate": 1.612889706578777e-05, + "loss": 0.30027297139167786, + "step": 2453 + }, + { + "epoch": 0.6517062807064135, + "grad_norm": 1.0908136110597069, + "learning_rate": 1.6125427030730027e-05, + "loss": 0.3318096697330475, + "step": 2454 + }, + { + "epoch": 0.6519718496879564, + "grad_norm": 1.0728958387824694, + "learning_rate": 1.612195581476219e-05, + "loss": 0.30962997674942017, + "step": 2455 + }, + { + "epoch": 0.6522374186694994, + "grad_norm": 1.2969539714019946, + "learning_rate": 1.6118483418553476e-05, + "loss": 0.3152836859226227, + "step": 2456 + }, + { + "epoch": 0.6525029876510423, + "grad_norm": 1.0160215490589632, + "learning_rate": 1.6115009842773322e-05, + "loss": 0.26117920875549316, + "step": 2457 + }, + { + "epoch": 0.6527685566325853, + "grad_norm": 0.9780826840488046, + "learning_rate": 1.6111535088091388e-05, + "loss": 0.2705717384815216, + "step": 2458 + }, + { + "epoch": 0.6530341256141283, + "grad_norm": 1.112935626593024, + "learning_rate": 1.6108059155177568e-05, + "loss": 0.3281205892562866, + "step": 2459 + }, + { + "epoch": 0.6532996945956713, + "grad_norm": 1.0805050021999307, + "learning_rate": 1.6104582044701983e-05, + "loss": 0.3300125002861023, + "step": 2460 + }, + { + "epoch": 0.6535652635772142, + "grad_norm": 1.0596352955938992, + "learning_rate": 1.6101103757334973e-05, + "loss": 0.29286977648735046, + "step": 2461 + }, + { + "epoch": 0.6538308325587572, + "grad_norm": 1.114611766363321, + "learning_rate": 1.6097624293747115e-05, + "loss": 0.2920498847961426, + "step": 2462 + }, + { + "epoch": 0.6540964015403001, + "grad_norm": 1.0455118881549736, + "learning_rate": 1.609414365460921e-05, + "loss": 0.31018689274787903, + "step": 2463 + }, + { + "epoch": 0.6543619705218431, + "grad_norm": 1.0028130278859915, + "learning_rate": 1.609066184059228e-05, + "loss": 0.26806512475013733, + "step": 2464 + }, + { + "epoch": 0.654627539503386, + "grad_norm": 1.0385768164913443, + "learning_rate": 1.608717885236758e-05, + "loss": 0.29770639538764954, + "step": 2465 + }, + { + "epoch": 0.654893108484929, + "grad_norm": 1.0811683391440958, + "learning_rate": 1.6083694690606592e-05, + "loss": 0.36161965131759644, + "step": 2466 + }, + { + "epoch": 0.6551586774664719, + "grad_norm": 1.1455214370068598, + "learning_rate": 1.6080209355981016e-05, + "loss": 0.36114081740379333, + "step": 2467 + }, + { + "epoch": 0.6554242464480149, + "grad_norm": 0.9911085328884063, + "learning_rate": 1.6076722849162786e-05, + "loss": 0.28924882411956787, + "step": 2468 + }, + { + "epoch": 0.6556898154295578, + "grad_norm": 1.1198872767040324, + "learning_rate": 1.6073235170824058e-05, + "loss": 0.3088049292564392, + "step": 2469 + }, + { + "epoch": 0.6559553844111008, + "grad_norm": 1.062389027957873, + "learning_rate": 1.6069746321637216e-05, + "loss": 0.2684907615184784, + "step": 2470 + }, + { + "epoch": 0.6562209533926437, + "grad_norm": 0.9850175058697045, + "learning_rate": 1.6066256302274873e-05, + "loss": 0.2674641013145447, + "step": 2471 + }, + { + "epoch": 0.6564865223741867, + "grad_norm": 1.0658104164235327, + "learning_rate": 1.6062765113409854e-05, + "loss": 0.2865106165409088, + "step": 2472 + }, + { + "epoch": 0.6567520913557297, + "grad_norm": 1.1117203943537428, + "learning_rate": 1.605927275571523e-05, + "loss": 0.33163607120513916, + "step": 2473 + }, + { + "epoch": 0.6570176603372726, + "grad_norm": 1.1177244627769223, + "learning_rate": 1.6055779229864276e-05, + "loss": 0.32725927233695984, + "step": 2474 + }, + { + "epoch": 0.6572832293188156, + "grad_norm": 1.171322314473831, + "learning_rate": 1.605228453653051e-05, + "loss": 0.31537747383117676, + "step": 2475 + }, + { + "epoch": 0.6575487983003585, + "grad_norm": 1.0855461390356589, + "learning_rate": 1.604878867638767e-05, + "loss": 0.29331761598587036, + "step": 2476 + }, + { + "epoch": 0.6578143672819015, + "grad_norm": 1.0342424424241736, + "learning_rate": 1.6045291650109706e-05, + "loss": 0.315193772315979, + "step": 2477 + }, + { + "epoch": 0.6580799362634444, + "grad_norm": 1.2286540067411784, + "learning_rate": 1.6041793458370812e-05, + "loss": 0.3595796227455139, + "step": 2478 + }, + { + "epoch": 0.6583455052449874, + "grad_norm": 1.0251892797499218, + "learning_rate": 1.6038294101845394e-05, + "loss": 0.3069949150085449, + "step": 2479 + }, + { + "epoch": 0.6586110742265303, + "grad_norm": 1.1576253586981062, + "learning_rate": 1.603479358120809e-05, + "loss": 0.3154812455177307, + "step": 2480 + }, + { + "epoch": 0.6588766432080733, + "grad_norm": 1.1008921076459075, + "learning_rate": 1.6031291897133756e-05, + "loss": 0.3005039691925049, + "step": 2481 + }, + { + "epoch": 0.6591422121896162, + "grad_norm": 1.1463594149599334, + "learning_rate": 1.6027789050297476e-05, + "loss": 0.2885095775127411, + "step": 2482 + }, + { + "epoch": 0.6594077811711592, + "grad_norm": 1.002066881102099, + "learning_rate": 1.602428504137456e-05, + "loss": 0.291950523853302, + "step": 2483 + }, + { + "epoch": 0.6596733501527021, + "grad_norm": 1.0919380790727968, + "learning_rate": 1.6020779871040538e-05, + "loss": 0.31630760431289673, + "step": 2484 + }, + { + "epoch": 0.6599389191342451, + "grad_norm": 1.0827567425634856, + "learning_rate": 1.6017273539971167e-05, + "loss": 0.29767507314682007, + "step": 2485 + }, + { + "epoch": 0.660204488115788, + "grad_norm": 1.036820980968177, + "learning_rate": 1.601376604884242e-05, + "loss": 0.2882775664329529, + "step": 2486 + }, + { + "epoch": 0.6604700570973311, + "grad_norm": 1.0885135950320362, + "learning_rate": 1.601025739833051e-05, + "loss": 0.325736403465271, + "step": 2487 + }, + { + "epoch": 0.660735626078874, + "grad_norm": 1.048580856774253, + "learning_rate": 1.6006747589111854e-05, + "loss": 0.3007255792617798, + "step": 2488 + }, + { + "epoch": 0.661001195060417, + "grad_norm": 1.146836506523448, + "learning_rate": 1.6003236621863107e-05, + "loss": 0.33199968934059143, + "step": 2489 + }, + { + "epoch": 0.6612667640419599, + "grad_norm": 1.1430196866694278, + "learning_rate": 1.5999724497261138e-05, + "loss": 0.3784569799900055, + "step": 2490 + }, + { + "epoch": 0.6615323330235029, + "grad_norm": 1.0506667031587968, + "learning_rate": 1.5996211215983052e-05, + "loss": 0.28146931529045105, + "step": 2491 + }, + { + "epoch": 0.6617979020050458, + "grad_norm": 1.0621415260673002, + "learning_rate": 1.599269677870616e-05, + "loss": 0.32187730073928833, + "step": 2492 + }, + { + "epoch": 0.6620634709865888, + "grad_norm": 1.0631524880676668, + "learning_rate": 1.5989181186108003e-05, + "loss": 0.3021823465824127, + "step": 2493 + }, + { + "epoch": 0.6623290399681317, + "grad_norm": 1.0248198480240434, + "learning_rate": 1.5985664438866354e-05, + "loss": 0.3309648334980011, + "step": 2494 + }, + { + "epoch": 0.6625946089496747, + "grad_norm": 1.0183038789118495, + "learning_rate": 1.598214653765919e-05, + "loss": 0.2939694821834564, + "step": 2495 + }, + { + "epoch": 0.6628601779312177, + "grad_norm": 1.0091208408649601, + "learning_rate": 1.597862748316473e-05, + "loss": 0.31219810247421265, + "step": 2496 + }, + { + "epoch": 0.6631257469127606, + "grad_norm": 1.3669850946739606, + "learning_rate": 1.5975107276061405e-05, + "loss": 0.29435622692108154, + "step": 2497 + }, + { + "epoch": 0.6633913158943036, + "grad_norm": 1.0359724885535866, + "learning_rate": 1.5971585917027864e-05, + "loss": 0.27167004346847534, + "step": 2498 + }, + { + "epoch": 0.6636568848758465, + "grad_norm": 1.121619558624798, + "learning_rate": 1.5968063406742988e-05, + "loss": 0.3360658884048462, + "step": 2499 + }, + { + "epoch": 0.6639224538573895, + "grad_norm": 1.0767207810238415, + "learning_rate": 1.596453974588587e-05, + "loss": 0.2994089424610138, + "step": 2500 + }, + { + "epoch": 0.6641880228389324, + "grad_norm": 1.0997593865705806, + "learning_rate": 1.596101493513584e-05, + "loss": 0.32302889227867126, + "step": 2501 + }, + { + "epoch": 0.6644535918204754, + "grad_norm": 1.1249891187970829, + "learning_rate": 1.595748897517243e-05, + "loss": 0.3122987747192383, + "step": 2502 + }, + { + "epoch": 0.6647191608020183, + "grad_norm": 1.014108779554691, + "learning_rate": 1.5953961866675408e-05, + "loss": 0.2746438980102539, + "step": 2503 + }, + { + "epoch": 0.6649847297835613, + "grad_norm": 1.0758059481680302, + "learning_rate": 1.5950433610324758e-05, + "loss": 0.3043097257614136, + "step": 2504 + }, + { + "epoch": 0.6652502987651042, + "grad_norm": 1.2204942135197403, + "learning_rate": 1.594690420680069e-05, + "loss": 0.3208698332309723, + "step": 2505 + }, + { + "epoch": 0.6655158677466472, + "grad_norm": 1.1502218188727449, + "learning_rate": 1.5943373656783628e-05, + "loss": 0.317341148853302, + "step": 2506 + }, + { + "epoch": 0.6657814367281901, + "grad_norm": 1.1223078751349502, + "learning_rate": 1.5939841960954218e-05, + "loss": 0.3250347673892975, + "step": 2507 + }, + { + "epoch": 0.6660470057097331, + "grad_norm": 1.066903715567463, + "learning_rate": 1.5936309119993333e-05, + "loss": 0.32255828380584717, + "step": 2508 + }, + { + "epoch": 0.666312574691276, + "grad_norm": 1.0591506680476068, + "learning_rate": 1.593277513458206e-05, + "loss": 0.3247614800930023, + "step": 2509 + }, + { + "epoch": 0.666578143672819, + "grad_norm": 1.087253896768941, + "learning_rate": 1.5929240005401715e-05, + "loss": 0.34171730279922485, + "step": 2510 + }, + { + "epoch": 0.6668437126543619, + "grad_norm": 1.092874100004657, + "learning_rate": 1.5925703733133823e-05, + "loss": 0.30671584606170654, + "step": 2511 + }, + { + "epoch": 0.6671092816359049, + "grad_norm": 1.1250075389065, + "learning_rate": 1.5922166318460138e-05, + "loss": 0.3387908339500427, + "step": 2512 + }, + { + "epoch": 0.6673748506174478, + "grad_norm": 1.0272141820522305, + "learning_rate": 1.5918627762062635e-05, + "loss": 0.2772873044013977, + "step": 2513 + }, + { + "epoch": 0.6676404195989908, + "grad_norm": 1.0802689739154336, + "learning_rate": 1.59150880646235e-05, + "loss": 0.31555238366127014, + "step": 2514 + }, + { + "epoch": 0.6679059885805337, + "grad_norm": 0.9930963010924009, + "learning_rate": 1.5911547226825154e-05, + "loss": 0.2821594476699829, + "step": 2515 + }, + { + "epoch": 0.6681715575620768, + "grad_norm": 1.098936156337469, + "learning_rate": 1.5908005249350217e-05, + "loss": 0.3176054358482361, + "step": 2516 + }, + { + "epoch": 0.6684371265436198, + "grad_norm": 1.083365844116071, + "learning_rate": 1.590446213288155e-05, + "loss": 0.28484907746315, + "step": 2517 + }, + { + "epoch": 0.6687026955251627, + "grad_norm": 1.0028500327966023, + "learning_rate": 1.590091787810222e-05, + "loss": 0.25227850675582886, + "step": 2518 + }, + { + "epoch": 0.6689682645067057, + "grad_norm": 0.993931866088294, + "learning_rate": 1.5897372485695514e-05, + "loss": 0.276819109916687, + "step": 2519 + }, + { + "epoch": 0.6692338334882486, + "grad_norm": 1.1883846939575156, + "learning_rate": 1.589382595634495e-05, + "loss": 0.27944183349609375, + "step": 2520 + }, + { + "epoch": 0.6694994024697916, + "grad_norm": 1.0217591474349375, + "learning_rate": 1.589027829073425e-05, + "loss": 0.295337975025177, + "step": 2521 + }, + { + "epoch": 0.6697649714513345, + "grad_norm": 1.0940479681497102, + "learning_rate": 1.5886729489547365e-05, + "loss": 0.31168580055236816, + "step": 2522 + }, + { + "epoch": 0.6700305404328775, + "grad_norm": 1.0847233646991081, + "learning_rate": 1.5883179553468465e-05, + "loss": 0.34520941972732544, + "step": 2523 + }, + { + "epoch": 0.6702961094144204, + "grad_norm": 1.0941539012056998, + "learning_rate": 1.587962848318193e-05, + "loss": 0.3121863901615143, + "step": 2524 + }, + { + "epoch": 0.6705616783959634, + "grad_norm": 1.2414605611463847, + "learning_rate": 1.587607627937237e-05, + "loss": 0.3450377583503723, + "step": 2525 + }, + { + "epoch": 0.6708272473775063, + "grad_norm": 1.0575484463097053, + "learning_rate": 1.58725229427246e-05, + "loss": 0.33431196212768555, + "step": 2526 + }, + { + "epoch": 0.6710928163590493, + "grad_norm": 2.8101197900274433, + "learning_rate": 1.5868968473923675e-05, + "loss": 0.2753226161003113, + "step": 2527 + }, + { + "epoch": 0.6713583853405922, + "grad_norm": 1.1171540013343635, + "learning_rate": 1.586541287365484e-05, + "loss": 0.31394219398498535, + "step": 2528 + }, + { + "epoch": 0.6716239543221352, + "grad_norm": 1.0940027543433968, + "learning_rate": 1.586185614260358e-05, + "loss": 0.352859765291214, + "step": 2529 + }, + { + "epoch": 0.6718895233036781, + "grad_norm": 1.158790754412002, + "learning_rate": 1.5858298281455592e-05, + "loss": 0.3182204067707062, + "step": 2530 + }, + { + "epoch": 0.6721550922852211, + "grad_norm": 1.0901686159979078, + "learning_rate": 1.5854739290896785e-05, + "loss": 0.3107008934020996, + "step": 2531 + }, + { + "epoch": 0.672420661266764, + "grad_norm": 1.0367853416177613, + "learning_rate": 1.5851179171613294e-05, + "loss": 0.2737328112125397, + "step": 2532 + }, + { + "epoch": 0.672686230248307, + "grad_norm": 1.070700914663809, + "learning_rate": 1.5847617924291466e-05, + "loss": 0.2744509279727936, + "step": 2533 + }, + { + "epoch": 0.6729517992298499, + "grad_norm": 1.0763385778363233, + "learning_rate": 1.584405554961787e-05, + "loss": 0.3149082660675049, + "step": 2534 + }, + { + "epoch": 0.6732173682113929, + "grad_norm": 1.1199335422347676, + "learning_rate": 1.584049204827929e-05, + "loss": 0.32643741369247437, + "step": 2535 + }, + { + "epoch": 0.6734829371929358, + "grad_norm": 1.1153920819002263, + "learning_rate": 1.583692742096272e-05, + "loss": 0.31901559233665466, + "step": 2536 + }, + { + "epoch": 0.6737485061744788, + "grad_norm": 1.037012713250851, + "learning_rate": 1.583336166835539e-05, + "loss": 0.3020802140235901, + "step": 2537 + }, + { + "epoch": 0.6740140751560217, + "grad_norm": 0.9884255382698084, + "learning_rate": 1.5829794791144723e-05, + "loss": 0.29683804512023926, + "step": 2538 + }, + { + "epoch": 0.6742796441375647, + "grad_norm": 1.0549080502640127, + "learning_rate": 1.582622679001838e-05, + "loss": 0.2898966073989868, + "step": 2539 + }, + { + "epoch": 0.6745452131191076, + "grad_norm": 1.0628349250468347, + "learning_rate": 1.582265766566422e-05, + "loss": 0.2665000855922699, + "step": 2540 + }, + { + "epoch": 0.6748107821006506, + "grad_norm": 1.1059852721256176, + "learning_rate": 1.581908741877034e-05, + "loss": 0.2987207770347595, + "step": 2541 + }, + { + "epoch": 0.6750763510821935, + "grad_norm": 1.1051901132495052, + "learning_rate": 1.5815516050025032e-05, + "loss": 0.32591086626052856, + "step": 2542 + }, + { + "epoch": 0.6753419200637365, + "grad_norm": 0.9752097662975195, + "learning_rate": 1.581194356011682e-05, + "loss": 0.28181299567222595, + "step": 2543 + }, + { + "epoch": 0.6756074890452796, + "grad_norm": 1.0983389872703522, + "learning_rate": 1.5808369949734433e-05, + "loss": 0.3256041407585144, + "step": 2544 + }, + { + "epoch": 0.6758730580268225, + "grad_norm": 1.1228012917357884, + "learning_rate": 1.5804795219566825e-05, + "loss": 0.3079703152179718, + "step": 2545 + }, + { + "epoch": 0.6761386270083655, + "grad_norm": 1.1504916593616519, + "learning_rate": 1.580121937030316e-05, + "loss": 0.3364162743091583, + "step": 2546 + }, + { + "epoch": 0.6764041959899084, + "grad_norm": 1.046870504650359, + "learning_rate": 1.5797642402632816e-05, + "loss": 0.2774898111820221, + "step": 2547 + }, + { + "epoch": 0.6766697649714514, + "grad_norm": 1.1108782100380157, + "learning_rate": 1.5794064317245396e-05, + "loss": 0.33260244131088257, + "step": 2548 + }, + { + "epoch": 0.6769353339529943, + "grad_norm": 1.16229568793775, + "learning_rate": 1.5790485114830708e-05, + "loss": 0.3327571153640747, + "step": 2549 + }, + { + "epoch": 0.6772009029345373, + "grad_norm": 1.1256526679188055, + "learning_rate": 1.5786904796078783e-05, + "loss": 0.28527912497520447, + "step": 2550 + }, + { + "epoch": 0.6774664719160802, + "grad_norm": 1.1757868172389025, + "learning_rate": 1.5783323361679865e-05, + "loss": 0.3100908100605011, + "step": 2551 + }, + { + "epoch": 0.6777320408976232, + "grad_norm": 1.1187226402475792, + "learning_rate": 1.577974081232441e-05, + "loss": 0.3434574007987976, + "step": 2552 + }, + { + "epoch": 0.6779976098791661, + "grad_norm": 1.0691671390255433, + "learning_rate": 1.5776157148703094e-05, + "loss": 0.3151341676712036, + "step": 2553 + }, + { + "epoch": 0.6782631788607091, + "grad_norm": 1.1432839314923735, + "learning_rate": 1.5772572371506803e-05, + "loss": 0.33334124088287354, + "step": 2554 + }, + { + "epoch": 0.678528747842252, + "grad_norm": 0.9718187941404679, + "learning_rate": 1.576898648142664e-05, + "loss": 0.26933547854423523, + "step": 2555 + }, + { + "epoch": 0.678794316823795, + "grad_norm": 1.0146251280063243, + "learning_rate": 1.576539947915392e-05, + "loss": 0.3087029755115509, + "step": 2556 + }, + { + "epoch": 0.6790598858053379, + "grad_norm": 2.0746649121309244, + "learning_rate": 1.576181136538018e-05, + "loss": 0.32620540261268616, + "step": 2557 + }, + { + "epoch": 0.6793254547868809, + "grad_norm": 1.0462752825892652, + "learning_rate": 1.575822214079716e-05, + "loss": 0.29112139344215393, + "step": 2558 + }, + { + "epoch": 0.6795910237684238, + "grad_norm": 1.108770761520566, + "learning_rate": 1.5754631806096822e-05, + "loss": 0.3394843339920044, + "step": 2559 + }, + { + "epoch": 0.6798565927499668, + "grad_norm": 1.0789431162979184, + "learning_rate": 1.5751040361971342e-05, + "loss": 0.32754629850387573, + "step": 2560 + }, + { + "epoch": 0.6801221617315097, + "grad_norm": 1.055729440740922, + "learning_rate": 1.574744780911311e-05, + "loss": 0.2829592823982239, + "step": 2561 + }, + { + "epoch": 0.6803877307130527, + "grad_norm": 3.1916720491195423, + "learning_rate": 1.5743854148214724e-05, + "loss": 0.2718046307563782, + "step": 2562 + }, + { + "epoch": 0.6806532996945956, + "grad_norm": 1.0355755791413483, + "learning_rate": 1.5740259379969002e-05, + "loss": 0.29244256019592285, + "step": 2563 + }, + { + "epoch": 0.6809188686761386, + "grad_norm": 1.0678189150114252, + "learning_rate": 1.5736663505068972e-05, + "loss": 0.2925388514995575, + "step": 2564 + }, + { + "epoch": 0.6811844376576816, + "grad_norm": 1.109826571766002, + "learning_rate": 1.5733066524207875e-05, + "loss": 0.26742440462112427, + "step": 2565 + }, + { + "epoch": 0.6814500066392245, + "grad_norm": 1.0365586719986022, + "learning_rate": 1.5729468438079167e-05, + "loss": 0.33688807487487793, + "step": 2566 + }, + { + "epoch": 0.6817155756207675, + "grad_norm": 1.0939355325909954, + "learning_rate": 1.5725869247376514e-05, + "loss": 0.2953096330165863, + "step": 2567 + }, + { + "epoch": 0.6819811446023104, + "grad_norm": 1.081510188555139, + "learning_rate": 1.5722268952793806e-05, + "loss": 0.321500301361084, + "step": 2568 + }, + { + "epoch": 0.6822467135838534, + "grad_norm": 1.1427798210793014, + "learning_rate": 1.5718667555025127e-05, + "loss": 0.29148590564727783, + "step": 2569 + }, + { + "epoch": 0.6825122825653963, + "grad_norm": 1.0849106130015975, + "learning_rate": 1.5715065054764792e-05, + "loss": 0.26887139678001404, + "step": 2570 + }, + { + "epoch": 0.6827778515469393, + "grad_norm": 0.9118900514894542, + "learning_rate": 1.5711461452707316e-05, + "loss": 0.2698139250278473, + "step": 2571 + }, + { + "epoch": 0.6830434205284823, + "grad_norm": 0.9420578172190551, + "learning_rate": 1.5707856749547433e-05, + "loss": 0.264956533908844, + "step": 2572 + }, + { + "epoch": 0.6833089895100253, + "grad_norm": 1.0786584040903482, + "learning_rate": 1.5704250945980085e-05, + "loss": 0.32535314559936523, + "step": 2573 + }, + { + "epoch": 0.6835745584915682, + "grad_norm": 1.1132312438200667, + "learning_rate": 1.5700644042700432e-05, + "loss": 0.30529654026031494, + "step": 2574 + }, + { + "epoch": 0.6838401274731112, + "grad_norm": 0.9518994724553314, + "learning_rate": 1.569703604040384e-05, + "loss": 0.27253150939941406, + "step": 2575 + }, + { + "epoch": 0.6841056964546541, + "grad_norm": 1.0559070796873817, + "learning_rate": 1.5693426939785886e-05, + "loss": 0.27451053261756897, + "step": 2576 + }, + { + "epoch": 0.6843712654361971, + "grad_norm": 1.1393124405849042, + "learning_rate": 1.5689816741542374e-05, + "loss": 0.33280283212661743, + "step": 2577 + }, + { + "epoch": 0.68463683441774, + "grad_norm": 1.1306113061745138, + "learning_rate": 1.5686205446369293e-05, + "loss": 0.2911887764930725, + "step": 2578 + }, + { + "epoch": 0.684902403399283, + "grad_norm": 1.0940465986734231, + "learning_rate": 1.5682593054962866e-05, + "loss": 0.2950279116630554, + "step": 2579 + }, + { + "epoch": 0.6851679723808259, + "grad_norm": 1.0911163136563768, + "learning_rate": 1.5678979568019518e-05, + "loss": 0.3267458975315094, + "step": 2580 + }, + { + "epoch": 0.6854335413623689, + "grad_norm": 1.2739312763430675, + "learning_rate": 1.5675364986235887e-05, + "loss": 0.3209132254123688, + "step": 2581 + }, + { + "epoch": 0.6856991103439118, + "grad_norm": 1.1101887519376679, + "learning_rate": 1.5671749310308818e-05, + "loss": 0.3186662197113037, + "step": 2582 + }, + { + "epoch": 0.6859646793254548, + "grad_norm": 0.9652854961372175, + "learning_rate": 1.566813254093538e-05, + "loss": 0.24875827133655548, + "step": 2583 + }, + { + "epoch": 0.6862302483069977, + "grad_norm": 1.0684425959326884, + "learning_rate": 1.5664514678812835e-05, + "loss": 0.26657983660697937, + "step": 2584 + }, + { + "epoch": 0.6864958172885407, + "grad_norm": 1.0670123202559558, + "learning_rate": 1.5660895724638666e-05, + "loss": 0.2889682650566101, + "step": 2585 + }, + { + "epoch": 0.6867613862700837, + "grad_norm": 1.2310590689373582, + "learning_rate": 1.5657275679110564e-05, + "loss": 0.32035061717033386, + "step": 2586 + }, + { + "epoch": 0.6870269552516266, + "grad_norm": 0.9946580402808185, + "learning_rate": 1.5653654542926435e-05, + "loss": 0.2844264507293701, + "step": 2587 + }, + { + "epoch": 0.6872925242331696, + "grad_norm": 1.0738818938413612, + "learning_rate": 1.5650032316784388e-05, + "loss": 0.27645713090896606, + "step": 2588 + }, + { + "epoch": 0.6875580932147125, + "grad_norm": 1.0078062598096618, + "learning_rate": 1.5646409001382745e-05, + "loss": 0.29902809858322144, + "step": 2589 + }, + { + "epoch": 0.6878236621962555, + "grad_norm": 1.0662439819494403, + "learning_rate": 1.564278459742004e-05, + "loss": 0.28179824352264404, + "step": 2590 + }, + { + "epoch": 0.6880892311777984, + "grad_norm": 0.9959782320912598, + "learning_rate": 1.563915910559502e-05, + "loss": 0.30527305603027344, + "step": 2591 + }, + { + "epoch": 0.6883548001593414, + "grad_norm": 0.9640464455731136, + "learning_rate": 1.5635532526606625e-05, + "loss": 0.29411792755126953, + "step": 2592 + }, + { + "epoch": 0.6886203691408843, + "grad_norm": 1.0659796212639145, + "learning_rate": 1.563190486115403e-05, + "loss": 0.32294154167175293, + "step": 2593 + }, + { + "epoch": 0.6888859381224273, + "grad_norm": 1.0983041505312465, + "learning_rate": 1.5628276109936594e-05, + "loss": 0.31873172521591187, + "step": 2594 + }, + { + "epoch": 0.6891515071039702, + "grad_norm": 1.2163401358885952, + "learning_rate": 1.5624646273653908e-05, + "loss": 0.37790048122406006, + "step": 2595 + }, + { + "epoch": 0.6894170760855132, + "grad_norm": 1.0271206309222516, + "learning_rate": 1.5621015353005754e-05, + "loss": 0.27596205472946167, + "step": 2596 + }, + { + "epoch": 0.6896826450670561, + "grad_norm": 1.2915034278595348, + "learning_rate": 1.5617383348692135e-05, + "loss": 0.30952686071395874, + "step": 2597 + }, + { + "epoch": 0.6899482140485991, + "grad_norm": 1.089414433310086, + "learning_rate": 1.5613750261413256e-05, + "loss": 0.2933235764503479, + "step": 2598 + }, + { + "epoch": 0.690213783030142, + "grad_norm": 1.1151043496896997, + "learning_rate": 1.5610116091869538e-05, + "loss": 0.2961776554584503, + "step": 2599 + }, + { + "epoch": 0.6904793520116851, + "grad_norm": 1.0596230408388436, + "learning_rate": 1.56064808407616e-05, + "loss": 0.2843313217163086, + "step": 2600 + }, + { + "epoch": 0.690744920993228, + "grad_norm": 1.0545406618996236, + "learning_rate": 1.560284450879028e-05, + "loss": 0.29366564750671387, + "step": 2601 + }, + { + "epoch": 0.691010489974771, + "grad_norm": 1.028254286030692, + "learning_rate": 1.5599207096656614e-05, + "loss": 0.32668614387512207, + "step": 2602 + }, + { + "epoch": 0.6912760589563139, + "grad_norm": 1.1962201821774399, + "learning_rate": 1.5595568605061858e-05, + "loss": 0.344367653131485, + "step": 2603 + }, + { + "epoch": 0.6915416279378569, + "grad_norm": 1.2250839657368426, + "learning_rate": 1.5591929034707468e-05, + "loss": 0.2875809371471405, + "step": 2604 + }, + { + "epoch": 0.6918071969193998, + "grad_norm": 0.9717157700868733, + "learning_rate": 1.5588288386295113e-05, + "loss": 0.2688799202442169, + "step": 2605 + }, + { + "epoch": 0.6920727659009428, + "grad_norm": 1.2520016236289049, + "learning_rate": 1.558464666052667e-05, + "loss": 0.28575828671455383, + "step": 2606 + }, + { + "epoch": 0.6923383348824858, + "grad_norm": 1.0741907315089707, + "learning_rate": 1.5581003858104203e-05, + "loss": 0.2800632119178772, + "step": 2607 + }, + { + "epoch": 0.6926039038640287, + "grad_norm": 1.096176752690496, + "learning_rate": 1.5577359979730022e-05, + "loss": 0.3066416382789612, + "step": 2608 + }, + { + "epoch": 0.6928694728455717, + "grad_norm": 1.0146792499875503, + "learning_rate": 1.5573715026106617e-05, + "loss": 0.3164110779762268, + "step": 2609 + }, + { + "epoch": 0.6931350418271146, + "grad_norm": 1.0292100354922897, + "learning_rate": 1.5570068997936686e-05, + "loss": 0.2908422350883484, + "step": 2610 + }, + { + "epoch": 0.6934006108086576, + "grad_norm": 0.9996966110923509, + "learning_rate": 1.5566421895923148e-05, + "loss": 0.29055240750312805, + "step": 2611 + }, + { + "epoch": 0.6936661797902005, + "grad_norm": 1.1296077877181152, + "learning_rate": 1.556277372076912e-05, + "loss": 0.3247227370738983, + "step": 2612 + }, + { + "epoch": 0.6939317487717435, + "grad_norm": 1.0869397458201258, + "learning_rate": 1.555912447317792e-05, + "loss": 0.29944315552711487, + "step": 2613 + }, + { + "epoch": 0.6941973177532864, + "grad_norm": 1.140637727836958, + "learning_rate": 1.5555474153853092e-05, + "loss": 0.2984931170940399, + "step": 2614 + }, + { + "epoch": 0.6944628867348294, + "grad_norm": 1.0644561032518303, + "learning_rate": 1.5551822763498364e-05, + "loss": 0.301285982131958, + "step": 2615 + }, + { + "epoch": 0.6947284557163723, + "grad_norm": 1.0271314049069311, + "learning_rate": 1.5548170302817683e-05, + "loss": 0.2862967252731323, + "step": 2616 + }, + { + "epoch": 0.6949940246979153, + "grad_norm": 1.0216494335731472, + "learning_rate": 1.5544516772515207e-05, + "loss": 0.3071482181549072, + "step": 2617 + }, + { + "epoch": 0.6952595936794582, + "grad_norm": 1.153798162838472, + "learning_rate": 1.5540862173295285e-05, + "loss": 0.33668914437294006, + "step": 2618 + }, + { + "epoch": 0.6955251626610012, + "grad_norm": 1.0451730984690786, + "learning_rate": 1.5537206505862486e-05, + "loss": 0.32204627990722656, + "step": 2619 + }, + { + "epoch": 0.6957907316425441, + "grad_norm": 1.083101648134336, + "learning_rate": 1.5533549770921576e-05, + "loss": 0.30210041999816895, + "step": 2620 + }, + { + "epoch": 0.6960563006240871, + "grad_norm": 1.1518417167078652, + "learning_rate": 1.5529891969177535e-05, + "loss": 0.3116886019706726, + "step": 2621 + }, + { + "epoch": 0.69632186960563, + "grad_norm": 1.1473344970327815, + "learning_rate": 1.5526233101335543e-05, + "loss": 0.3460058867931366, + "step": 2622 + }, + { + "epoch": 0.696587438587173, + "grad_norm": 1.0477810576486106, + "learning_rate": 1.552257316810098e-05, + "loss": 0.30080512166023254, + "step": 2623 + }, + { + "epoch": 0.6968530075687159, + "grad_norm": 1.1107090823955428, + "learning_rate": 1.5518912170179447e-05, + "loss": 0.3381347954273224, + "step": 2624 + }, + { + "epoch": 0.6971185765502589, + "grad_norm": 1.0737064011248665, + "learning_rate": 1.5515250108276733e-05, + "loss": 0.30345672369003296, + "step": 2625 + }, + { + "epoch": 0.6973841455318018, + "grad_norm": 1.1809134250993814, + "learning_rate": 1.5511586983098847e-05, + "loss": 0.3002641797065735, + "step": 2626 + }, + { + "epoch": 0.6976497145133448, + "grad_norm": 0.9975793486319376, + "learning_rate": 1.5507922795351992e-05, + "loss": 0.2848126292228699, + "step": 2627 + }, + { + "epoch": 0.6979152834948879, + "grad_norm": 1.1203755244922207, + "learning_rate": 1.5504257545742585e-05, + "loss": 0.32360371947288513, + "step": 2628 + }, + { + "epoch": 0.6981808524764308, + "grad_norm": 1.0674295201271842, + "learning_rate": 1.5500591234977237e-05, + "loss": 0.2970595955848694, + "step": 2629 + }, + { + "epoch": 0.6984464214579738, + "grad_norm": 1.1343972682519483, + "learning_rate": 1.5496923863762773e-05, + "loss": 0.35431474447250366, + "step": 2630 + }, + { + "epoch": 0.6987119904395167, + "grad_norm": 1.027377246814574, + "learning_rate": 1.549325543280622e-05, + "loss": 0.30133551359176636, + "step": 2631 + }, + { + "epoch": 0.6989775594210597, + "grad_norm": 1.066148832325447, + "learning_rate": 1.5489585942814807e-05, + "loss": 0.3013160824775696, + "step": 2632 + }, + { + "epoch": 0.6992431284026026, + "grad_norm": 1.1981871164483473, + "learning_rate": 1.5485915394495967e-05, + "loss": 0.3291313052177429, + "step": 2633 + }, + { + "epoch": 0.6995086973841456, + "grad_norm": 1.3083774012082008, + "learning_rate": 1.5482243788557336e-05, + "loss": 0.32308053970336914, + "step": 2634 + }, + { + "epoch": 0.6997742663656885, + "grad_norm": 1.0802428984314951, + "learning_rate": 1.5478571125706762e-05, + "loss": 0.321450412273407, + "step": 2635 + }, + { + "epoch": 0.7000398353472315, + "grad_norm": 1.1144035500723286, + "learning_rate": 1.547489740665229e-05, + "loss": 0.30871254205703735, + "step": 2636 + }, + { + "epoch": 0.7003054043287744, + "grad_norm": 1.1599776854022048, + "learning_rate": 1.5471222632102168e-05, + "loss": 0.29414835572242737, + "step": 2637 + }, + { + "epoch": 0.7005709733103174, + "grad_norm": 1.019484878273918, + "learning_rate": 1.546754680276485e-05, + "loss": 0.2841604948043823, + "step": 2638 + }, + { + "epoch": 0.7008365422918603, + "grad_norm": 1.039625714192533, + "learning_rate": 1.546386991934899e-05, + "loss": 0.2895316183567047, + "step": 2639 + }, + { + "epoch": 0.7011021112734033, + "grad_norm": 1.0418724746200432, + "learning_rate": 1.546019198256345e-05, + "loss": 0.310278058052063, + "step": 2640 + }, + { + "epoch": 0.7013676802549462, + "grad_norm": 1.1737622034955963, + "learning_rate": 1.5456512993117297e-05, + "loss": 0.3000732660293579, + "step": 2641 + }, + { + "epoch": 0.7016332492364892, + "grad_norm": 1.034060473081883, + "learning_rate": 1.545283295171979e-05, + "loss": 0.2650133967399597, + "step": 2642 + }, + { + "epoch": 0.7018988182180321, + "grad_norm": 1.1833814596994714, + "learning_rate": 1.5449151859080395e-05, + "loss": 0.3414345681667328, + "step": 2643 + }, + { + "epoch": 0.7021643871995751, + "grad_norm": 0.9407765615747015, + "learning_rate": 1.5445469715908793e-05, + "loss": 0.26955321431159973, + "step": 2644 + }, + { + "epoch": 0.702429956181118, + "grad_norm": 1.0775826100815478, + "learning_rate": 1.5441786522914855e-05, + "loss": 0.3028743863105774, + "step": 2645 + }, + { + "epoch": 0.702695525162661, + "grad_norm": 1.1630883359211883, + "learning_rate": 1.5438102280808653e-05, + "loss": 0.28710106015205383, + "step": 2646 + }, + { + "epoch": 0.7029610941442039, + "grad_norm": 1.0828201415955274, + "learning_rate": 1.543441699030047e-05, + "loss": 0.33343076705932617, + "step": 2647 + }, + { + "epoch": 0.7032266631257469, + "grad_norm": 2.8774903725783445, + "learning_rate": 1.543073065210078e-05, + "loss": 0.27760642766952515, + "step": 2648 + }, + { + "epoch": 0.7034922321072898, + "grad_norm": 1.0939125975780095, + "learning_rate": 1.5427043266920276e-05, + "loss": 0.2844334840774536, + "step": 2649 + }, + { + "epoch": 0.7037578010888328, + "grad_norm": 1.0671776711844796, + "learning_rate": 1.542335483546983e-05, + "loss": 0.28979432582855225, + "step": 2650 + }, + { + "epoch": 0.7040233700703757, + "grad_norm": 1.1018820862649594, + "learning_rate": 1.5419665358460537e-05, + "loss": 0.313267320394516, + "step": 2651 + }, + { + "epoch": 0.7042889390519187, + "grad_norm": 1.122792570050495, + "learning_rate": 1.5415974836603676e-05, + "loss": 0.26702141761779785, + "step": 2652 + }, + { + "epoch": 0.7045545080334616, + "grad_norm": 1.084104909381419, + "learning_rate": 1.5412283270610752e-05, + "loss": 0.3256012499332428, + "step": 2653 + }, + { + "epoch": 0.7048200770150046, + "grad_norm": 1.1096374178765924, + "learning_rate": 1.540859066119344e-05, + "loss": 0.3035642206668854, + "step": 2654 + }, + { + "epoch": 0.7050856459965475, + "grad_norm": 1.1410920430169775, + "learning_rate": 1.5404897009063636e-05, + "loss": 0.32206645607948303, + "step": 2655 + }, + { + "epoch": 0.7053512149780906, + "grad_norm": 0.9596610334229038, + "learning_rate": 1.5401202314933436e-05, + "loss": 0.3023940920829773, + "step": 2656 + }, + { + "epoch": 0.7056167839596336, + "grad_norm": 0.9678878502259071, + "learning_rate": 1.539750657951513e-05, + "loss": 0.2839987277984619, + "step": 2657 + }, + { + "epoch": 0.7058823529411765, + "grad_norm": 0.9744312269236198, + "learning_rate": 1.5393809803521213e-05, + "loss": 0.2488149106502533, + "step": 2658 + }, + { + "epoch": 0.7061479219227195, + "grad_norm": 1.0311988168007409, + "learning_rate": 1.539011198766438e-05, + "loss": 0.27156201004981995, + "step": 2659 + }, + { + "epoch": 0.7064134909042624, + "grad_norm": 1.0925039664890526, + "learning_rate": 1.5386413132657528e-05, + "loss": 0.3038437068462372, + "step": 2660 + }, + { + "epoch": 0.7066790598858054, + "grad_norm": 0.9713190505037098, + "learning_rate": 1.5382713239213746e-05, + "loss": 0.27626922726631165, + "step": 2661 + }, + { + "epoch": 0.7069446288673483, + "grad_norm": 1.9675808121081846, + "learning_rate": 1.537901230804634e-05, + "loss": 0.27338162064552307, + "step": 2662 + }, + { + "epoch": 0.7072101978488913, + "grad_norm": 0.9540020890839573, + "learning_rate": 1.5375310339868798e-05, + "loss": 0.2635098099708557, + "step": 2663 + }, + { + "epoch": 0.7074757668304342, + "grad_norm": 1.1274430903932144, + "learning_rate": 1.537160733539482e-05, + "loss": 0.3245551288127899, + "step": 2664 + }, + { + "epoch": 0.7077413358119772, + "grad_norm": 1.1100804783644485, + "learning_rate": 1.53679032953383e-05, + "loss": 0.3226238787174225, + "step": 2665 + }, + { + "epoch": 0.7080069047935201, + "grad_norm": 1.0972084780717322, + "learning_rate": 1.536419822041333e-05, + "loss": 0.31588318943977356, + "step": 2666 + }, + { + "epoch": 0.7082724737750631, + "grad_norm": 1.031778059845932, + "learning_rate": 1.536049211133421e-05, + "loss": 0.2494429647922516, + "step": 2667 + }, + { + "epoch": 0.708538042756606, + "grad_norm": 1.1110915785079796, + "learning_rate": 1.5356784968815436e-05, + "loss": 0.30966901779174805, + "step": 2668 + }, + { + "epoch": 0.708803611738149, + "grad_norm": 1.1803956993815392, + "learning_rate": 1.5353076793571692e-05, + "loss": 0.29383328557014465, + "step": 2669 + }, + { + "epoch": 0.7090691807196919, + "grad_norm": 1.086625008831518, + "learning_rate": 1.5349367586317875e-05, + "loss": 0.30337825417518616, + "step": 2670 + }, + { + "epoch": 0.7093347497012349, + "grad_norm": 1.0049086741144315, + "learning_rate": 1.5345657347769082e-05, + "loss": 0.28128665685653687, + "step": 2671 + }, + { + "epoch": 0.7096003186827778, + "grad_norm": 1.1819105498956106, + "learning_rate": 1.5341946078640594e-05, + "loss": 0.35167062282562256, + "step": 2672 + }, + { + "epoch": 0.7098658876643208, + "grad_norm": 1.0441531577784944, + "learning_rate": 1.533823377964791e-05, + "loss": 0.30409517884254456, + "step": 2673 + }, + { + "epoch": 0.7101314566458637, + "grad_norm": 1.013441954819978, + "learning_rate": 1.5334520451506706e-05, + "loss": 0.2667735815048218, + "step": 2674 + }, + { + "epoch": 0.7103970256274067, + "grad_norm": 1.130854753100919, + "learning_rate": 1.5330806094932876e-05, + "loss": 0.290219247341156, + "step": 2675 + }, + { + "epoch": 0.7106625946089496, + "grad_norm": 1.120803532670259, + "learning_rate": 1.5327090710642503e-05, + "loss": 0.33118927478790283, + "step": 2676 + }, + { + "epoch": 0.7109281635904926, + "grad_norm": 1.2896959817209073, + "learning_rate": 1.5323374299351867e-05, + "loss": 0.34287041425704956, + "step": 2677 + }, + { + "epoch": 0.7111937325720356, + "grad_norm": 1.0183367847991263, + "learning_rate": 1.531965686177745e-05, + "loss": 0.27093711495399475, + "step": 2678 + }, + { + "epoch": 0.7114593015535785, + "grad_norm": 1.0913550671130643, + "learning_rate": 1.531593839863593e-05, + "loss": 0.2987911105155945, + "step": 2679 + }, + { + "epoch": 0.7117248705351215, + "grad_norm": 1.0145664449432468, + "learning_rate": 1.5312218910644185e-05, + "loss": 0.2914583086967468, + "step": 2680 + }, + { + "epoch": 0.7119904395166644, + "grad_norm": 1.0712171950199525, + "learning_rate": 1.530849839851928e-05, + "loss": 0.34159964323043823, + "step": 2681 + }, + { + "epoch": 0.7122560084982074, + "grad_norm": 1.0132523095253043, + "learning_rate": 1.5304776862978496e-05, + "loss": 0.28327372670173645, + "step": 2682 + }, + { + "epoch": 0.7125215774797503, + "grad_norm": 1.0473430655235008, + "learning_rate": 1.5301054304739292e-05, + "loss": 0.2902851104736328, + "step": 2683 + }, + { + "epoch": 0.7127871464612934, + "grad_norm": 1.106440530120003, + "learning_rate": 1.5297330724519344e-05, + "loss": 0.3192726969718933, + "step": 2684 + }, + { + "epoch": 0.7130527154428363, + "grad_norm": 1.0682705697817987, + "learning_rate": 1.5293606123036508e-05, + "loss": 0.30242764949798584, + "step": 2685 + }, + { + "epoch": 0.7133182844243793, + "grad_norm": 1.0059439200202651, + "learning_rate": 1.528988050100884e-05, + "loss": 0.2718653082847595, + "step": 2686 + }, + { + "epoch": 0.7135838534059222, + "grad_norm": 1.019566462631627, + "learning_rate": 1.52861538591546e-05, + "loss": 0.3014821708202362, + "step": 2687 + }, + { + "epoch": 0.7138494223874652, + "grad_norm": 1.1473508187880241, + "learning_rate": 1.528242619819224e-05, + "loss": 0.3378177881240845, + "step": 2688 + }, + { + "epoch": 0.7141149913690081, + "grad_norm": 1.0632179838195628, + "learning_rate": 1.5278697518840415e-05, + "loss": 0.29286471009254456, + "step": 2689 + }, + { + "epoch": 0.7143805603505511, + "grad_norm": 1.1140242619678895, + "learning_rate": 1.527496782181796e-05, + "loss": 0.3371768593788147, + "step": 2690 + }, + { + "epoch": 0.714646129332094, + "grad_norm": 1.0421377750374783, + "learning_rate": 1.5271237107843925e-05, + "loss": 0.30571556091308594, + "step": 2691 + }, + { + "epoch": 0.714911698313637, + "grad_norm": 1.0650624138184501, + "learning_rate": 1.526750537763754e-05, + "loss": 0.33064618706703186, + "step": 2692 + }, + { + "epoch": 0.7151772672951799, + "grad_norm": 1.0787164498543842, + "learning_rate": 1.5263772631918242e-05, + "loss": 0.3369274139404297, + "step": 2693 + }, + { + "epoch": 0.7154428362767229, + "grad_norm": 1.079249778019668, + "learning_rate": 1.5260038871405663e-05, + "loss": 0.2422705739736557, + "step": 2694 + }, + { + "epoch": 0.7157084052582658, + "grad_norm": 1.3990281605221084, + "learning_rate": 1.5256304096819628e-05, + "loss": 0.35786008834838867, + "step": 2695 + }, + { + "epoch": 0.7159739742398088, + "grad_norm": 1.0368618301698236, + "learning_rate": 1.5252568308880155e-05, + "loss": 0.2853243052959442, + "step": 2696 + }, + { + "epoch": 0.7162395432213517, + "grad_norm": 1.1300838792843926, + "learning_rate": 1.5248831508307459e-05, + "loss": 0.2903040051460266, + "step": 2697 + }, + { + "epoch": 0.7165051122028947, + "grad_norm": 1.0779989148221412, + "learning_rate": 1.5245093695821954e-05, + "loss": 0.3375359773635864, + "step": 2698 + }, + { + "epoch": 0.7167706811844377, + "grad_norm": 0.9828776196369989, + "learning_rate": 1.5241354872144242e-05, + "loss": 0.27855974435806274, + "step": 2699 + }, + { + "epoch": 0.7170362501659806, + "grad_norm": 1.0672391327565405, + "learning_rate": 1.5237615037995129e-05, + "loss": 0.32226768136024475, + "step": 2700 + }, + { + "epoch": 0.7173018191475236, + "grad_norm": 1.1089458515112456, + "learning_rate": 1.5233874194095606e-05, + "loss": 0.32856303453445435, + "step": 2701 + }, + { + "epoch": 0.7175673881290665, + "grad_norm": 1.15556869357308, + "learning_rate": 1.5230132341166868e-05, + "loss": 0.31619006395339966, + "step": 2702 + }, + { + "epoch": 0.7178329571106095, + "grad_norm": 1.09474796019269, + "learning_rate": 1.5226389479930296e-05, + "loss": 0.29736411571502686, + "step": 2703 + }, + { + "epoch": 0.7180985260921524, + "grad_norm": 1.0969127487202406, + "learning_rate": 1.5222645611107477e-05, + "loss": 0.2767728865146637, + "step": 2704 + }, + { + "epoch": 0.7183640950736954, + "grad_norm": 1.054074095850648, + "learning_rate": 1.5218900735420174e-05, + "loss": 0.30994221568107605, + "step": 2705 + }, + { + "epoch": 0.7186296640552383, + "grad_norm": 1.0931807335310835, + "learning_rate": 1.5215154853590362e-05, + "loss": 0.3419484496116638, + "step": 2706 + }, + { + "epoch": 0.7188952330367813, + "grad_norm": 1.0503021732812985, + "learning_rate": 1.5211407966340203e-05, + "loss": 0.3063664436340332, + "step": 2707 + }, + { + "epoch": 0.7191608020183242, + "grad_norm": 1.0345938706194526, + "learning_rate": 1.520766007439205e-05, + "loss": 0.2856604754924774, + "step": 2708 + }, + { + "epoch": 0.7194263709998672, + "grad_norm": 0.9757823992785323, + "learning_rate": 1.5203911178468453e-05, + "loss": 0.23257851600646973, + "step": 2709 + }, + { + "epoch": 0.7196919399814101, + "grad_norm": 1.0292145399058534, + "learning_rate": 1.5200161279292154e-05, + "loss": 0.31451839208602905, + "step": 2710 + }, + { + "epoch": 0.7199575089629531, + "grad_norm": 1.1017577588578753, + "learning_rate": 1.5196410377586095e-05, + "loss": 0.30298277735710144, + "step": 2711 + }, + { + "epoch": 0.7202230779444961, + "grad_norm": 1.0759590578514124, + "learning_rate": 1.5192658474073398e-05, + "loss": 0.28654640913009644, + "step": 2712 + }, + { + "epoch": 0.7204886469260391, + "grad_norm": 1.1189221983197806, + "learning_rate": 1.5188905569477391e-05, + "loss": 0.3148455023765564, + "step": 2713 + }, + { + "epoch": 0.720754215907582, + "grad_norm": 1.079970608729249, + "learning_rate": 1.5185151664521585e-05, + "loss": 0.3004840612411499, + "step": 2714 + }, + { + "epoch": 0.721019784889125, + "grad_norm": 1.206470642332625, + "learning_rate": 1.518139675992969e-05, + "loss": 0.3378010392189026, + "step": 2715 + }, + { + "epoch": 0.721285353870668, + "grad_norm": 1.0802971688897103, + "learning_rate": 1.517764085642561e-05, + "loss": 0.3084215223789215, + "step": 2716 + }, + { + "epoch": 0.7215509228522109, + "grad_norm": 1.1196175790564493, + "learning_rate": 1.517388395473344e-05, + "loss": 0.3434324264526367, + "step": 2717 + }, + { + "epoch": 0.7218164918337538, + "grad_norm": 1.2084125695848371, + "learning_rate": 1.517012605557746e-05, + "loss": 0.2862265706062317, + "step": 2718 + }, + { + "epoch": 0.7220820608152968, + "grad_norm": 0.9574562560549519, + "learning_rate": 1.5166367159682156e-05, + "loss": 0.2760370671749115, + "step": 2719 + }, + { + "epoch": 0.7223476297968398, + "grad_norm": 1.0623260792686084, + "learning_rate": 1.5162607267772194e-05, + "loss": 0.26659202575683594, + "step": 2720 + }, + { + "epoch": 0.7226131987783827, + "grad_norm": 1.069380288412464, + "learning_rate": 1.5158846380572439e-05, + "loss": 0.31900978088378906, + "step": 2721 + }, + { + "epoch": 0.7228787677599257, + "grad_norm": 0.9775730121294547, + "learning_rate": 1.5155084498807941e-05, + "loss": 0.2983658015727997, + "step": 2722 + }, + { + "epoch": 0.7231443367414686, + "grad_norm": 1.0202126383266699, + "learning_rate": 1.5151321623203953e-05, + "loss": 0.3086162805557251, + "step": 2723 + }, + { + "epoch": 0.7234099057230116, + "grad_norm": 1.2685875339489936, + "learning_rate": 1.5147557754485908e-05, + "loss": 0.3233461380004883, + "step": 2724 + }, + { + "epoch": 0.7236754747045545, + "grad_norm": 1.1386667332230644, + "learning_rate": 1.5143792893379441e-05, + "loss": 0.2979195713996887, + "step": 2725 + }, + { + "epoch": 0.7239410436860975, + "grad_norm": 0.9598628443474388, + "learning_rate": 1.5140027040610367e-05, + "loss": 0.27854713797569275, + "step": 2726 + }, + { + "epoch": 0.7242066126676404, + "grad_norm": 1.0735596908703036, + "learning_rate": 1.5136260196904704e-05, + "loss": 0.293560266494751, + "step": 2727 + }, + { + "epoch": 0.7244721816491834, + "grad_norm": 1.1273149809893865, + "learning_rate": 1.513249236298865e-05, + "loss": 0.3033742308616638, + "step": 2728 + }, + { + "epoch": 0.7247377506307263, + "grad_norm": 1.1425183002588892, + "learning_rate": 1.51287235395886e-05, + "loss": 0.27958324551582336, + "step": 2729 + }, + { + "epoch": 0.7250033196122693, + "grad_norm": 1.022839475112705, + "learning_rate": 1.512495372743114e-05, + "loss": 0.3063122034072876, + "step": 2730 + }, + { + "epoch": 0.7252688885938122, + "grad_norm": 1.0524007495354166, + "learning_rate": 1.5121182927243043e-05, + "loss": 0.29126864671707153, + "step": 2731 + }, + { + "epoch": 0.7255344575753552, + "grad_norm": 1.0517432179455284, + "learning_rate": 1.5117411139751279e-05, + "loss": 0.27507084608078003, + "step": 2732 + }, + { + "epoch": 0.7258000265568981, + "grad_norm": 1.1167955582078537, + "learning_rate": 1.5113638365682996e-05, + "loss": 0.3432404398918152, + "step": 2733 + }, + { + "epoch": 0.7260655955384411, + "grad_norm": 1.0687371329401973, + "learning_rate": 1.5109864605765552e-05, + "loss": 0.27633196115493774, + "step": 2734 + }, + { + "epoch": 0.726331164519984, + "grad_norm": 1.0811244514830984, + "learning_rate": 1.5106089860726474e-05, + "loss": 0.274509072303772, + "step": 2735 + }, + { + "epoch": 0.726596733501527, + "grad_norm": 0.97012581020674, + "learning_rate": 1.5102314131293494e-05, + "loss": 0.26650723814964294, + "step": 2736 + }, + { + "epoch": 0.7268623024830699, + "grad_norm": 0.9681782432226156, + "learning_rate": 1.5098537418194524e-05, + "loss": 0.24476298689842224, + "step": 2737 + }, + { + "epoch": 0.7271278714646129, + "grad_norm": 1.1154772400244737, + "learning_rate": 1.5094759722157671e-05, + "loss": 0.3337150812149048, + "step": 2738 + }, + { + "epoch": 0.7273934404461558, + "grad_norm": 1.0187825093211873, + "learning_rate": 1.509098104391123e-05, + "loss": 0.3147660195827484, + "step": 2739 + }, + { + "epoch": 0.7276590094276989, + "grad_norm": 0.969229068573487, + "learning_rate": 1.5087201384183687e-05, + "loss": 0.2613281309604645, + "step": 2740 + }, + { + "epoch": 0.7279245784092419, + "grad_norm": 1.0641712204852296, + "learning_rate": 1.5083420743703717e-05, + "loss": 0.2773926854133606, + "step": 2741 + }, + { + "epoch": 0.7281901473907848, + "grad_norm": 1.0826759541494775, + "learning_rate": 1.5079639123200179e-05, + "loss": 0.30515575408935547, + "step": 2742 + }, + { + "epoch": 0.7284557163723278, + "grad_norm": 1.0619554532285063, + "learning_rate": 1.5075856523402128e-05, + "loss": 0.3174355626106262, + "step": 2743 + }, + { + "epoch": 0.7287212853538707, + "grad_norm": 0.9676487172589012, + "learning_rate": 1.5072072945038802e-05, + "loss": 0.25163760781288147, + "step": 2744 + }, + { + "epoch": 0.7289868543354137, + "grad_norm": 1.009992458232401, + "learning_rate": 1.5068288388839634e-05, + "loss": 0.28822118043899536, + "step": 2745 + }, + { + "epoch": 0.7292524233169566, + "grad_norm": 1.1623698216562623, + "learning_rate": 1.5064502855534237e-05, + "loss": 0.3129134476184845, + "step": 2746 + }, + { + "epoch": 0.7295179922984996, + "grad_norm": 1.0993962878508883, + "learning_rate": 1.5060716345852423e-05, + "loss": 0.332313597202301, + "step": 2747 + }, + { + "epoch": 0.7297835612800425, + "grad_norm": 1.1989932540466257, + "learning_rate": 1.5056928860524181e-05, + "loss": 0.3425176739692688, + "step": 2748 + }, + { + "epoch": 0.7300491302615855, + "grad_norm": 1.006044605592889, + "learning_rate": 1.5053140400279693e-05, + "loss": 0.2737991511821747, + "step": 2749 + }, + { + "epoch": 0.7303146992431284, + "grad_norm": 0.963162900300573, + "learning_rate": 1.5049350965849337e-05, + "loss": 0.27506589889526367, + "step": 2750 + }, + { + "epoch": 0.7305802682246714, + "grad_norm": 0.9901021314780329, + "learning_rate": 1.5045560557963663e-05, + "loss": 0.25581830739974976, + "step": 2751 + }, + { + "epoch": 0.7308458372062143, + "grad_norm": 1.0977147554610498, + "learning_rate": 1.5041769177353423e-05, + "loss": 0.31746333837509155, + "step": 2752 + }, + { + "epoch": 0.7311114061877573, + "grad_norm": 1.142455577048558, + "learning_rate": 1.5037976824749545e-05, + "loss": 0.3119337260723114, + "step": 2753 + }, + { + "epoch": 0.7313769751693002, + "grad_norm": 1.0824713857839723, + "learning_rate": 1.5034183500883153e-05, + "loss": 0.3330266773700714, + "step": 2754 + }, + { + "epoch": 0.7316425441508432, + "grad_norm": 1.1870819737785345, + "learning_rate": 1.5030389206485554e-05, + "loss": 0.2794867753982544, + "step": 2755 + }, + { + "epoch": 0.7319081131323861, + "grad_norm": 1.0826714009199063, + "learning_rate": 1.5026593942288248e-05, + "loss": 0.33273079991340637, + "step": 2756 + }, + { + "epoch": 0.7321736821139291, + "grad_norm": 1.1000195904608074, + "learning_rate": 1.502279770902291e-05, + "loss": 0.30673256516456604, + "step": 2757 + }, + { + "epoch": 0.732439251095472, + "grad_norm": 1.1311236734843304, + "learning_rate": 1.5019000507421412e-05, + "loss": 0.3126910924911499, + "step": 2758 + }, + { + "epoch": 0.732704820077015, + "grad_norm": 1.1665747930638253, + "learning_rate": 1.5015202338215811e-05, + "loss": 0.35423290729522705, + "step": 2759 + }, + { + "epoch": 0.7329703890585579, + "grad_norm": 1.0691634248957984, + "learning_rate": 1.5011403202138346e-05, + "loss": 0.31541377305984497, + "step": 2760 + }, + { + "epoch": 0.7332359580401009, + "grad_norm": 3.4446251175420257, + "learning_rate": 1.5007603099921451e-05, + "loss": 0.31460440158843994, + "step": 2761 + }, + { + "epoch": 0.7335015270216438, + "grad_norm": 1.0828016056563536, + "learning_rate": 1.5003802032297735e-05, + "loss": 0.2786293923854828, + "step": 2762 + }, + { + "epoch": 0.7337670960031868, + "grad_norm": 1.1025311021139896, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.27977997064590454, + "step": 2763 + }, + { + "epoch": 0.7340326649847297, + "grad_norm": 1.1136339551828278, + "learning_rate": 1.4996197003761237e-05, + "loss": 0.2933383584022522, + "step": 2764 + }, + { + "epoch": 0.7342982339662727, + "grad_norm": 1.0743056930311463, + "learning_rate": 1.4992393044314617e-05, + "loss": 0.30623573064804077, + "step": 2765 + }, + { + "epoch": 0.7345638029478156, + "grad_norm": 1.112681662128017, + "learning_rate": 1.4988588122393497e-05, + "loss": 0.28665077686309814, + "step": 2766 + }, + { + "epoch": 0.7348293719293586, + "grad_norm": 1.0268941907147413, + "learning_rate": 1.4984782238731422e-05, + "loss": 0.3245697021484375, + "step": 2767 + }, + { + "epoch": 0.7350949409109017, + "grad_norm": 1.118864717612721, + "learning_rate": 1.4980975394062122e-05, + "loss": 0.29477447271347046, + "step": 2768 + }, + { + "epoch": 0.7353605098924446, + "grad_norm": 1.009879072463833, + "learning_rate": 1.4977167589119508e-05, + "loss": 0.29174134135246277, + "step": 2769 + }, + { + "epoch": 0.7356260788739876, + "grad_norm": 1.010733766191454, + "learning_rate": 1.4973358824637687e-05, + "loss": 0.29473474621772766, + "step": 2770 + }, + { + "epoch": 0.7358916478555305, + "grad_norm": 1.3454647120520804, + "learning_rate": 1.4969549101350938e-05, + "loss": 0.3095156252384186, + "step": 2771 + }, + { + "epoch": 0.7361572168370735, + "grad_norm": 1.0578448721867733, + "learning_rate": 1.4965738419993733e-05, + "loss": 0.26295265555381775, + "step": 2772 + }, + { + "epoch": 0.7364227858186164, + "grad_norm": 1.0590497560307077, + "learning_rate": 1.4961926781300723e-05, + "loss": 0.2989509701728821, + "step": 2773 + }, + { + "epoch": 0.7366883548001594, + "grad_norm": 1.0783454816561941, + "learning_rate": 1.4958114186006756e-05, + "loss": 0.31087079644203186, + "step": 2774 + }, + { + "epoch": 0.7369539237817023, + "grad_norm": 1.0953647378016445, + "learning_rate": 1.4954300634846845e-05, + "loss": 0.3063197433948517, + "step": 2775 + }, + { + "epoch": 0.7372194927632453, + "grad_norm": 1.0858506486148067, + "learning_rate": 1.4950486128556208e-05, + "loss": 0.3149424195289612, + "step": 2776 + }, + { + "epoch": 0.7374850617447882, + "grad_norm": 1.0199984929310564, + "learning_rate": 1.4946670667870224e-05, + "loss": 0.2724878191947937, + "step": 2777 + }, + { + "epoch": 0.7377506307263312, + "grad_norm": 1.0033150283887489, + "learning_rate": 1.4942854253524479e-05, + "loss": 0.2556690275669098, + "step": 2778 + }, + { + "epoch": 0.7380161997078741, + "grad_norm": 1.0594159401263619, + "learning_rate": 1.4939036886254727e-05, + "loss": 0.2704542875289917, + "step": 2779 + }, + { + "epoch": 0.7382817686894171, + "grad_norm": 1.052456117640013, + "learning_rate": 1.4935218566796918e-05, + "loss": 0.26762163639068604, + "step": 2780 + }, + { + "epoch": 0.73854733767096, + "grad_norm": 1.1328164222449624, + "learning_rate": 1.4931399295887172e-05, + "loss": 0.3376831114292145, + "step": 2781 + }, + { + "epoch": 0.738812906652503, + "grad_norm": 1.0695003562166123, + "learning_rate": 1.4927579074261803e-05, + "loss": 0.2980082631111145, + "step": 2782 + }, + { + "epoch": 0.7390784756340459, + "grad_norm": 1.0340858480290613, + "learning_rate": 1.4923757902657306e-05, + "loss": 0.27693796157836914, + "step": 2783 + }, + { + "epoch": 0.7393440446155889, + "grad_norm": 1.0204290883803, + "learning_rate": 1.4919935781810353e-05, + "loss": 0.3109282851219177, + "step": 2784 + }, + { + "epoch": 0.7396096135971318, + "grad_norm": 1.12631585013599, + "learning_rate": 1.4916112712457807e-05, + "loss": 0.3123949468135834, + "step": 2785 + }, + { + "epoch": 0.7398751825786748, + "grad_norm": 1.143039341014623, + "learning_rate": 1.4912288695336709e-05, + "loss": 0.3232062757015228, + "step": 2786 + }, + { + "epoch": 0.7401407515602177, + "grad_norm": 1.0315778016896975, + "learning_rate": 1.4908463731184287e-05, + "loss": 0.2685563862323761, + "step": 2787 + }, + { + "epoch": 0.7404063205417607, + "grad_norm": 1.076569860938466, + "learning_rate": 1.4904637820737945e-05, + "loss": 0.25752881169319153, + "step": 2788 + }, + { + "epoch": 0.7406718895233037, + "grad_norm": 1.2236263687690485, + "learning_rate": 1.4900810964735279e-05, + "loss": 0.2887497544288635, + "step": 2789 + }, + { + "epoch": 0.7409374585048466, + "grad_norm": 1.126755867019387, + "learning_rate": 1.489698316391406e-05, + "loss": 0.28804779052734375, + "step": 2790 + }, + { + "epoch": 0.7412030274863896, + "grad_norm": 1.0931262335064922, + "learning_rate": 1.489315441901224e-05, + "loss": 0.2684408724308014, + "step": 2791 + }, + { + "epoch": 0.7414685964679325, + "grad_norm": 1.0509233991385625, + "learning_rate": 1.4889324730767959e-05, + "loss": 0.31945526599884033, + "step": 2792 + }, + { + "epoch": 0.7417341654494755, + "grad_norm": 1.3391113530092205, + "learning_rate": 1.488549409991953e-05, + "loss": 0.34446024894714355, + "step": 2793 + }, + { + "epoch": 0.7419997344310184, + "grad_norm": 1.094751814978447, + "learning_rate": 1.488166252720546e-05, + "loss": 0.28849151730537415, + "step": 2794 + }, + { + "epoch": 0.7422653034125614, + "grad_norm": 1.0431424597135226, + "learning_rate": 1.4877830013364429e-05, + "loss": 0.2793633043766022, + "step": 2795 + }, + { + "epoch": 0.7425308723941043, + "grad_norm": 1.1811188011136542, + "learning_rate": 1.4873996559135298e-05, + "loss": 0.3211687505245209, + "step": 2796 + }, + { + "epoch": 0.7427964413756474, + "grad_norm": 1.004634818722801, + "learning_rate": 1.4870162165257114e-05, + "loss": 0.26225876808166504, + "step": 2797 + }, + { + "epoch": 0.7430620103571903, + "grad_norm": 1.7885293848946355, + "learning_rate": 1.4866326832469105e-05, + "loss": 0.3100029528141022, + "step": 2798 + }, + { + "epoch": 0.7433275793387333, + "grad_norm": 1.0428487423040855, + "learning_rate": 1.4862490561510675e-05, + "loss": 0.29399827122688293, + "step": 2799 + }, + { + "epoch": 0.7435931483202762, + "grad_norm": 0.9886298200418341, + "learning_rate": 1.4858653353121412e-05, + "loss": 0.27357399463653564, + "step": 2800 + }, + { + "epoch": 0.7438587173018192, + "grad_norm": 1.1101962385134683, + "learning_rate": 1.4854815208041087e-05, + "loss": 0.34575730562210083, + "step": 2801 + }, + { + "epoch": 0.7441242862833621, + "grad_norm": 1.0351474931606812, + "learning_rate": 1.4850976127009644e-05, + "loss": 0.28487247228622437, + "step": 2802 + }, + { + "epoch": 0.7443898552649051, + "grad_norm": 1.0283492066128257, + "learning_rate": 1.484713611076722e-05, + "loss": 0.264443576335907, + "step": 2803 + }, + { + "epoch": 0.744655424246448, + "grad_norm": 1.085429543255666, + "learning_rate": 1.4843295160054116e-05, + "loss": 0.32750973105430603, + "step": 2804 + }, + { + "epoch": 0.744920993227991, + "grad_norm": 1.0136013055294886, + "learning_rate": 1.4839453275610827e-05, + "loss": 0.24080191552639008, + "step": 2805 + }, + { + "epoch": 0.7451865622095339, + "grad_norm": 1.1486643921382949, + "learning_rate": 1.4835610458178025e-05, + "loss": 0.31667011976242065, + "step": 2806 + }, + { + "epoch": 0.7454521311910769, + "grad_norm": 1.0103490185384167, + "learning_rate": 1.4831766708496553e-05, + "loss": 0.2754175066947937, + "step": 2807 + }, + { + "epoch": 0.7457177001726198, + "grad_norm": 1.0607394107689443, + "learning_rate": 1.482792202730745e-05, + "loss": 0.2890132963657379, + "step": 2808 + }, + { + "epoch": 0.7459832691541628, + "grad_norm": 1.049970305589495, + "learning_rate": 1.4824076415351918e-05, + "loss": 0.3402877748012543, + "step": 2809 + }, + { + "epoch": 0.7462488381357057, + "grad_norm": 1.0879104018503691, + "learning_rate": 1.4820229873371347e-05, + "loss": 0.3167210519313812, + "step": 2810 + }, + { + "epoch": 0.7465144071172487, + "grad_norm": 0.9983910427341833, + "learning_rate": 1.4816382402107308e-05, + "loss": 0.2653643786907196, + "step": 2811 + }, + { + "epoch": 0.7467799760987917, + "grad_norm": 1.2191167585139304, + "learning_rate": 1.4812534002301547e-05, + "loss": 0.3202674984931946, + "step": 2812 + }, + { + "epoch": 0.7470455450803346, + "grad_norm": 1.0461975743299208, + "learning_rate": 1.4808684674695985e-05, + "loss": 0.2942724823951721, + "step": 2813 + }, + { + "epoch": 0.7473111140618776, + "grad_norm": 1.0581736193326858, + "learning_rate": 1.480483442003273e-05, + "loss": 0.28640663623809814, + "step": 2814 + }, + { + "epoch": 0.7475766830434205, + "grad_norm": 0.9932743335315769, + "learning_rate": 1.4800983239054071e-05, + "loss": 0.26214420795440674, + "step": 2815 + }, + { + "epoch": 0.7478422520249635, + "grad_norm": 1.0324489729554576, + "learning_rate": 1.4797131132502464e-05, + "loss": 0.3288992643356323, + "step": 2816 + }, + { + "epoch": 0.7481078210065064, + "grad_norm": 0.9775792939666473, + "learning_rate": 1.4793278101120551e-05, + "loss": 0.2622208297252655, + "step": 2817 + }, + { + "epoch": 0.7483733899880494, + "grad_norm": 1.0856486279870832, + "learning_rate": 1.4789424145651152e-05, + "loss": 0.3223533034324646, + "step": 2818 + }, + { + "epoch": 0.7486389589695923, + "grad_norm": 0.9640735701611682, + "learning_rate": 1.4785569266837264e-05, + "loss": 0.25849875807762146, + "step": 2819 + }, + { + "epoch": 0.7489045279511353, + "grad_norm": 1.20204465384733, + "learning_rate": 1.478171346542206e-05, + "loss": 0.3477833569049835, + "step": 2820 + }, + { + "epoch": 0.7491700969326782, + "grad_norm": 1.0577809669167442, + "learning_rate": 1.4777856742148897e-05, + "loss": 0.2799205780029297, + "step": 2821 + }, + { + "epoch": 0.7494356659142212, + "grad_norm": 1.624939710599736, + "learning_rate": 1.4773999097761304e-05, + "loss": 0.2591988444328308, + "step": 2822 + }, + { + "epoch": 0.7497012348957641, + "grad_norm": 1.2869478314125868, + "learning_rate": 1.477014053300299e-05, + "loss": 0.30161747336387634, + "step": 2823 + }, + { + "epoch": 0.7499668038773071, + "grad_norm": 1.0738509532979332, + "learning_rate": 1.4766281048617837e-05, + "loss": 0.28202176094055176, + "step": 2824 + }, + { + "epoch": 0.7502323728588501, + "grad_norm": 1.0042946509670743, + "learning_rate": 1.4762420645349912e-05, + "loss": 0.26074907183647156, + "step": 2825 + }, + { + "epoch": 0.7504979418403931, + "grad_norm": 1.1385436298617553, + "learning_rate": 1.4758559323943455e-05, + "loss": 0.2822819948196411, + "step": 2826 + }, + { + "epoch": 0.750763510821936, + "grad_norm": 1.1069166183989807, + "learning_rate": 1.4754697085142879e-05, + "loss": 0.2704991102218628, + "step": 2827 + }, + { + "epoch": 0.751029079803479, + "grad_norm": 1.1005590878466516, + "learning_rate": 1.4750833929692785e-05, + "loss": 0.2627401053905487, + "step": 2828 + }, + { + "epoch": 0.751294648785022, + "grad_norm": 1.0886740028659867, + "learning_rate": 1.474696985833794e-05, + "loss": 0.2898240089416504, + "step": 2829 + }, + { + "epoch": 0.7515602177665649, + "grad_norm": 1.0291450176805186, + "learning_rate": 1.4743104871823291e-05, + "loss": 0.30080029368400574, + "step": 2830 + }, + { + "epoch": 0.7518257867481078, + "grad_norm": 1.0953597523125502, + "learning_rate": 1.473923897089396e-05, + "loss": 0.2950359284877777, + "step": 2831 + }, + { + "epoch": 0.7520913557296508, + "grad_norm": 1.1129882579718784, + "learning_rate": 1.4735372156295253e-05, + "loss": 0.31936827301979065, + "step": 2832 + }, + { + "epoch": 0.7523569247111938, + "grad_norm": 1.1117484749822675, + "learning_rate": 1.4731504428772642e-05, + "loss": 0.2771468460559845, + "step": 2833 + }, + { + "epoch": 0.7526224936927367, + "grad_norm": 1.1332551367729735, + "learning_rate": 1.4727635789071779e-05, + "loss": 0.3135997951030731, + "step": 2834 + }, + { + "epoch": 0.7528880626742797, + "grad_norm": 1.1215560189558773, + "learning_rate": 1.4723766237938495e-05, + "loss": 0.29874372482299805, + "step": 2835 + }, + { + "epoch": 0.7531536316558226, + "grad_norm": 1.0292177835845961, + "learning_rate": 1.4719895776118789e-05, + "loss": 0.249681293964386, + "step": 2836 + }, + { + "epoch": 0.7534192006373656, + "grad_norm": 1.0567186687732057, + "learning_rate": 1.4716024404358847e-05, + "loss": 0.28544771671295166, + "step": 2837 + }, + { + "epoch": 0.7536847696189085, + "grad_norm": 1.1290911495331684, + "learning_rate": 1.4712152123405018e-05, + "loss": 0.32532355189323425, + "step": 2838 + }, + { + "epoch": 0.7539503386004515, + "grad_norm": 1.1212187873017119, + "learning_rate": 1.4708278934003835e-05, + "loss": 0.31663140654563904, + "step": 2839 + }, + { + "epoch": 0.7542159075819944, + "grad_norm": 1.123142254862964, + "learning_rate": 1.4704404836902005e-05, + "loss": 0.30552318692207336, + "step": 2840 + }, + { + "epoch": 0.7544814765635374, + "grad_norm": 1.1574657252500693, + "learning_rate": 1.47005298328464e-05, + "loss": 0.3019601106643677, + "step": 2841 + }, + { + "epoch": 0.7547470455450803, + "grad_norm": 1.0814580547673966, + "learning_rate": 1.4696653922584084e-05, + "loss": 0.321606308221817, + "step": 2842 + }, + { + "epoch": 0.7550126145266233, + "grad_norm": 1.138590953455986, + "learning_rate": 1.4692777106862281e-05, + "loss": 0.2709462642669678, + "step": 2843 + }, + { + "epoch": 0.7552781835081662, + "grad_norm": 1.1366302949330385, + "learning_rate": 1.46888993864284e-05, + "loss": 0.2882609963417053, + "step": 2844 + }, + { + "epoch": 0.7555437524897092, + "grad_norm": 0.9948609987035232, + "learning_rate": 1.4685020762030019e-05, + "loss": 0.25843000411987305, + "step": 2845 + }, + { + "epoch": 0.7558093214712521, + "grad_norm": 1.1002004205654323, + "learning_rate": 1.4681141234414889e-05, + "loss": 0.30962038040161133, + "step": 2846 + }, + { + "epoch": 0.7560748904527951, + "grad_norm": 1.2025960097123465, + "learning_rate": 1.4677260804330938e-05, + "loss": 0.304874062538147, + "step": 2847 + }, + { + "epoch": 0.756340459434338, + "grad_norm": 1.2287867091921092, + "learning_rate": 1.4673379472526268e-05, + "loss": 0.3425619602203369, + "step": 2848 + }, + { + "epoch": 0.756606028415881, + "grad_norm": 1.0701256182117689, + "learning_rate": 1.4669497239749153e-05, + "loss": 0.3002302050590515, + "step": 2849 + }, + { + "epoch": 0.7568715973974239, + "grad_norm": 1.1005370830207322, + "learning_rate": 1.4665614106748038e-05, + "loss": 0.31008803844451904, + "step": 2850 + }, + { + "epoch": 0.7571371663789669, + "grad_norm": 1.0175712407141912, + "learning_rate": 1.4661730074271551e-05, + "loss": 0.27829408645629883, + "step": 2851 + }, + { + "epoch": 0.7574027353605098, + "grad_norm": 1.0501959661073665, + "learning_rate": 1.4657845143068488e-05, + "loss": 0.25915467739105225, + "step": 2852 + }, + { + "epoch": 0.7576683043420529, + "grad_norm": 1.0719536636155031, + "learning_rate": 1.4653959313887813e-05, + "loss": 0.2843416929244995, + "step": 2853 + }, + { + "epoch": 0.7579338733235959, + "grad_norm": 1.0489373710223147, + "learning_rate": 1.465007258747867e-05, + "loss": 0.2851647138595581, + "step": 2854 + }, + { + "epoch": 0.7581994423051388, + "grad_norm": 1.085754694338766, + "learning_rate": 1.4646184964590378e-05, + "loss": 0.266017884016037, + "step": 2855 + }, + { + "epoch": 0.7584650112866818, + "grad_norm": 1.0789098348141843, + "learning_rate": 1.4642296445972421e-05, + "loss": 0.30142179131507874, + "step": 2856 + }, + { + "epoch": 0.7587305802682247, + "grad_norm": 0.9904299934324251, + "learning_rate": 1.463840703237446e-05, + "loss": 0.2878327965736389, + "step": 2857 + }, + { + "epoch": 0.7589961492497677, + "grad_norm": 1.114310168260114, + "learning_rate": 1.4634516724546326e-05, + "loss": 0.2919169068336487, + "step": 2858 + }, + { + "epoch": 0.7592617182313106, + "grad_norm": 0.9954308342175644, + "learning_rate": 1.4630625523238027e-05, + "loss": 0.2530924081802368, + "step": 2859 + }, + { + "epoch": 0.7595272872128536, + "grad_norm": 1.0858688189416337, + "learning_rate": 1.462673342919974e-05, + "loss": 0.3009106516838074, + "step": 2860 + }, + { + "epoch": 0.7597928561943965, + "grad_norm": 1.1572533440881312, + "learning_rate": 1.4622840443181817e-05, + "loss": 0.3114222288131714, + "step": 2861 + }, + { + "epoch": 0.7600584251759395, + "grad_norm": 1.2224434370177688, + "learning_rate": 1.4618946565934775e-05, + "loss": 0.344540536403656, + "step": 2862 + }, + { + "epoch": 0.7603239941574824, + "grad_norm": 1.0685722656113568, + "learning_rate": 1.4615051798209312e-05, + "loss": 0.263607919216156, + "step": 2863 + }, + { + "epoch": 0.7605895631390254, + "grad_norm": 1.018611353798299, + "learning_rate": 1.4611156140756293e-05, + "loss": 0.2685706317424774, + "step": 2864 + }, + { + "epoch": 0.7608551321205683, + "grad_norm": 1.1431197890714058, + "learning_rate": 1.4607259594326752e-05, + "loss": 0.32342326641082764, + "step": 2865 + }, + { + "epoch": 0.7611207011021113, + "grad_norm": 1.182050624874759, + "learning_rate": 1.4603362159671902e-05, + "loss": 0.3088849186897278, + "step": 2866 + }, + { + "epoch": 0.7613862700836542, + "grad_norm": 1.0482348167122462, + "learning_rate": 1.4599463837543114e-05, + "loss": 0.26718589663505554, + "step": 2867 + }, + { + "epoch": 0.7616518390651972, + "grad_norm": 1.0051992534296357, + "learning_rate": 1.4595564628691944e-05, + "loss": 0.29511263966560364, + "step": 2868 + }, + { + "epoch": 0.7619174080467401, + "grad_norm": 1.0974088254649037, + "learning_rate": 1.4591664533870118e-05, + "loss": 0.2940484285354614, + "step": 2869 + }, + { + "epoch": 0.7621829770282831, + "grad_norm": 1.1564456059915547, + "learning_rate": 1.4587763553829521e-05, + "loss": 0.28167295455932617, + "step": 2870 + }, + { + "epoch": 0.762448546009826, + "grad_norm": 1.0590804851451585, + "learning_rate": 1.4583861689322219e-05, + "loss": 0.3362962007522583, + "step": 2871 + }, + { + "epoch": 0.762714114991369, + "grad_norm": 1.1206777555300773, + "learning_rate": 1.4579958941100445e-05, + "loss": 0.3003339171409607, + "step": 2872 + }, + { + "epoch": 0.7629796839729119, + "grad_norm": 1.0572512051509857, + "learning_rate": 1.4576055309916602e-05, + "loss": 0.3191443979740143, + "step": 2873 + }, + { + "epoch": 0.7632452529544549, + "grad_norm": 1.0684782615871369, + "learning_rate": 1.4572150796523265e-05, + "loss": 0.30804574489593506, + "step": 2874 + }, + { + "epoch": 0.7635108219359978, + "grad_norm": 1.0214046475154577, + "learning_rate": 1.4568245401673178e-05, + "loss": 0.32462549209594727, + "step": 2875 + }, + { + "epoch": 0.7637763909175408, + "grad_norm": 1.1357318078490404, + "learning_rate": 1.4564339126119254e-05, + "loss": 0.27751386165618896, + "step": 2876 + }, + { + "epoch": 0.7640419598990837, + "grad_norm": 1.0701221152994065, + "learning_rate": 1.4560431970614578e-05, + "loss": 0.27194011211395264, + "step": 2877 + }, + { + "epoch": 0.7643075288806267, + "grad_norm": 1.134082938487784, + "learning_rate": 1.4556523935912406e-05, + "loss": 0.28701072931289673, + "step": 2878 + }, + { + "epoch": 0.7645730978621696, + "grad_norm": 1.0814539768930527, + "learning_rate": 1.4552615022766156e-05, + "loss": 0.3278783857822418, + "step": 2879 + }, + { + "epoch": 0.7648386668437126, + "grad_norm": 1.096499511679905, + "learning_rate": 1.4548705231929426e-05, + "loss": 0.3292006254196167, + "step": 2880 + }, + { + "epoch": 0.7651042358252557, + "grad_norm": 1.30563906707581, + "learning_rate": 1.4544794564155971e-05, + "loss": 0.33038759231567383, + "step": 2881 + }, + { + "epoch": 0.7653698048067986, + "grad_norm": 1.0799053745016685, + "learning_rate": 1.4540883020199725e-05, + "loss": 0.29183000326156616, + "step": 2882 + }, + { + "epoch": 0.7656353737883416, + "grad_norm": 1.049945067498866, + "learning_rate": 1.4536970600814789e-05, + "loss": 0.28066399693489075, + "step": 2883 + }, + { + "epoch": 0.7659009427698845, + "grad_norm": 1.0673215015420034, + "learning_rate": 1.4533057306755427e-05, + "loss": 0.2832046151161194, + "step": 2884 + }, + { + "epoch": 0.7661665117514275, + "grad_norm": 1.0799218487874103, + "learning_rate": 1.4529143138776078e-05, + "loss": 0.3006540834903717, + "step": 2885 + }, + { + "epoch": 0.7664320807329704, + "grad_norm": 0.965945374746046, + "learning_rate": 1.4525228097631351e-05, + "loss": 0.2793240547180176, + "step": 2886 + }, + { + "epoch": 0.7666976497145134, + "grad_norm": 1.0791298696355873, + "learning_rate": 1.452131218407602e-05, + "loss": 0.2895192503929138, + "step": 2887 + }, + { + "epoch": 0.7669632186960563, + "grad_norm": 1.1085071656285739, + "learning_rate": 1.4517395398865022e-05, + "loss": 0.27707618474960327, + "step": 2888 + }, + { + "epoch": 0.7672287876775993, + "grad_norm": 0.9801959170871006, + "learning_rate": 1.4513477742753465e-05, + "loss": 0.29167065024375916, + "step": 2889 + }, + { + "epoch": 0.7674943566591422, + "grad_norm": 0.9760628575291594, + "learning_rate": 1.4509559216496631e-05, + "loss": 0.2670987844467163, + "step": 2890 + }, + { + "epoch": 0.7677599256406852, + "grad_norm": 1.0541213606202946, + "learning_rate": 1.4505639820849968e-05, + "loss": 0.3025206923484802, + "step": 2891 + }, + { + "epoch": 0.7680254946222281, + "grad_norm": 1.0721054101606857, + "learning_rate": 1.4501719556569087e-05, + "loss": 0.3104705512523651, + "step": 2892 + }, + { + "epoch": 0.7682910636037711, + "grad_norm": 1.1715745485021363, + "learning_rate": 1.4497798424409766e-05, + "loss": 0.2972267270088196, + "step": 2893 + }, + { + "epoch": 0.768556632585314, + "grad_norm": 1.3084992927105763, + "learning_rate": 1.4493876425127957e-05, + "loss": 0.34956347942352295, + "step": 2894 + }, + { + "epoch": 0.768822201566857, + "grad_norm": 1.0910589486872886, + "learning_rate": 1.4489953559479775e-05, + "loss": 0.3122873902320862, + "step": 2895 + }, + { + "epoch": 0.7690877705483999, + "grad_norm": 1.0070263080445798, + "learning_rate": 1.4486029828221497e-05, + "loss": 0.29645755887031555, + "step": 2896 + }, + { + "epoch": 0.7693533395299429, + "grad_norm": 1.1312479199974272, + "learning_rate": 1.448210523210958e-05, + "loss": 0.33357223868370056, + "step": 2897 + }, + { + "epoch": 0.7696189085114858, + "grad_norm": 1.0807209302083978, + "learning_rate": 1.4478179771900634e-05, + "loss": 0.2780191898345947, + "step": 2898 + }, + { + "epoch": 0.7698844774930288, + "grad_norm": 1.098992372480737, + "learning_rate": 1.447425344835144e-05, + "loss": 0.31503236293792725, + "step": 2899 + }, + { + "epoch": 0.7701500464745717, + "grad_norm": 1.0152023365250116, + "learning_rate": 1.4470326262218955e-05, + "loss": 0.2843332290649414, + "step": 2900 + }, + { + "epoch": 0.7704156154561147, + "grad_norm": 1.1041753681410225, + "learning_rate": 1.4466398214260286e-05, + "loss": 0.305475652217865, + "step": 2901 + }, + { + "epoch": 0.7706811844376577, + "grad_norm": 1.0159008972115877, + "learning_rate": 1.446246930523272e-05, + "loss": 0.28418007493019104, + "step": 2902 + }, + { + "epoch": 0.7709467534192006, + "grad_norm": 2.0289726917266027, + "learning_rate": 1.44585395358937e-05, + "loss": 0.28237032890319824, + "step": 2903 + }, + { + "epoch": 0.7712123224007436, + "grad_norm": 1.1334683720848762, + "learning_rate": 1.4454608907000843e-05, + "loss": 0.33727777004241943, + "step": 2904 + }, + { + "epoch": 0.7714778913822865, + "grad_norm": 1.1393257541232447, + "learning_rate": 1.4450677419311925e-05, + "loss": 0.2977198660373688, + "step": 2905 + }, + { + "epoch": 0.7717434603638295, + "grad_norm": 1.0793508547506123, + "learning_rate": 1.4446745073584891e-05, + "loss": 0.3095981776714325, + "step": 2906 + }, + { + "epoch": 0.7720090293453724, + "grad_norm": 1.138471500425881, + "learning_rate": 1.4442811870577851e-05, + "loss": 0.29808440804481506, + "step": 2907 + }, + { + "epoch": 0.7722745983269154, + "grad_norm": 1.2668271633221484, + "learning_rate": 1.4438877811049079e-05, + "loss": 0.32444530725479126, + "step": 2908 + }, + { + "epoch": 0.7725401673084584, + "grad_norm": 1.0229226464155372, + "learning_rate": 1.443494289575702e-05, + "loss": 0.24782602488994598, + "step": 2909 + }, + { + "epoch": 0.7728057362900014, + "grad_norm": 1.079755307057506, + "learning_rate": 1.4431007125460274e-05, + "loss": 0.31289762258529663, + "step": 2910 + }, + { + "epoch": 0.7730713052715443, + "grad_norm": 1.0928540626872372, + "learning_rate": 1.4427070500917615e-05, + "loss": 0.31444042921066284, + "step": 2911 + }, + { + "epoch": 0.7733368742530873, + "grad_norm": 1.1235251868548595, + "learning_rate": 1.4423133022887973e-05, + "loss": 0.31347882747650146, + "step": 2912 + }, + { + "epoch": 0.7736024432346302, + "grad_norm": 1.1449169077961199, + "learning_rate": 1.4419194692130453e-05, + "loss": 0.3025411367416382, + "step": 2913 + }, + { + "epoch": 0.7738680122161732, + "grad_norm": 0.9734590933720824, + "learning_rate": 1.4415255509404316e-05, + "loss": 0.2954581081867218, + "step": 2914 + }, + { + "epoch": 0.7741335811977161, + "grad_norm": 1.051295802747811, + "learning_rate": 1.4411315475468988e-05, + "loss": 0.2675531506538391, + "step": 2915 + }, + { + "epoch": 0.7743991501792591, + "grad_norm": 1.0207923958770302, + "learning_rate": 1.4407374591084064e-05, + "loss": 0.29307854175567627, + "step": 2916 + }, + { + "epoch": 0.774664719160802, + "grad_norm": 0.9134258889524259, + "learning_rate": 1.4403432857009295e-05, + "loss": 0.2805953025817871, + "step": 2917 + }, + { + "epoch": 0.774930288142345, + "grad_norm": 1.1114518211112974, + "learning_rate": 1.439949027400461e-05, + "loss": 0.30805838108062744, + "step": 2918 + }, + { + "epoch": 0.7751958571238879, + "grad_norm": 1.063187320260136, + "learning_rate": 1.4395546842830085e-05, + "loss": 0.31501835584640503, + "step": 2919 + }, + { + "epoch": 0.7754614261054309, + "grad_norm": 1.025310766436644, + "learning_rate": 1.4391602564245975e-05, + "loss": 0.2719186246395111, + "step": 2920 + }, + { + "epoch": 0.7757269950869738, + "grad_norm": 1.0474571998069828, + "learning_rate": 1.4387657439012677e-05, + "loss": 0.29554325342178345, + "step": 2921 + }, + { + "epoch": 0.7759925640685168, + "grad_norm": 1.0103166752174864, + "learning_rate": 1.4383711467890776e-05, + "loss": 0.2993816137313843, + "step": 2922 + }, + { + "epoch": 0.7762581330500598, + "grad_norm": 1.087143911717871, + "learning_rate": 1.4379764651641004e-05, + "loss": 0.3412264883518219, + "step": 2923 + }, + { + "epoch": 0.7765237020316027, + "grad_norm": 1.3163055539647115, + "learning_rate": 1.4375816991024263e-05, + "loss": 0.3137913942337036, + "step": 2924 + }, + { + "epoch": 0.7767892710131457, + "grad_norm": 1.0026858390591848, + "learning_rate": 1.4371868486801611e-05, + "loss": 0.2710151672363281, + "step": 2925 + }, + { + "epoch": 0.7770548399946886, + "grad_norm": 1.060508746597415, + "learning_rate": 1.4367919139734279e-05, + "loss": 0.28521692752838135, + "step": 2926 + }, + { + "epoch": 0.7773204089762316, + "grad_norm": 0.9938687291505847, + "learning_rate": 1.4363968950583651e-05, + "loss": 0.2889919579029083, + "step": 2927 + }, + { + "epoch": 0.7775859779577745, + "grad_norm": 1.0641534591195945, + "learning_rate": 1.436001792011128e-05, + "loss": 0.31562381982803345, + "step": 2928 + }, + { + "epoch": 0.7778515469393175, + "grad_norm": 0.980719397790632, + "learning_rate": 1.4356066049078871e-05, + "loss": 0.2747528553009033, + "step": 2929 + }, + { + "epoch": 0.7781171159208604, + "grad_norm": 1.0890864939874727, + "learning_rate": 1.4352113338248303e-05, + "loss": 0.2918938398361206, + "step": 2930 + }, + { + "epoch": 0.7783826849024034, + "grad_norm": 1.1375978489291394, + "learning_rate": 1.4348159788381615e-05, + "loss": 0.3348507285118103, + "step": 2931 + }, + { + "epoch": 0.7786482538839463, + "grad_norm": 1.049930284325584, + "learning_rate": 1.4344205400241e-05, + "loss": 0.27206242084503174, + "step": 2932 + }, + { + "epoch": 0.7789138228654893, + "grad_norm": 1.0635705360778813, + "learning_rate": 1.434025017458882e-05, + "loss": 0.28496092557907104, + "step": 2933 + }, + { + "epoch": 0.7791793918470322, + "grad_norm": 1.1207237235097192, + "learning_rate": 1.4336294112187595e-05, + "loss": 0.3080131411552429, + "step": 2934 + }, + { + "epoch": 0.7794449608285752, + "grad_norm": 1.1562549835000784, + "learning_rate": 1.4332337213800008e-05, + "loss": 0.3116779029369354, + "step": 2935 + }, + { + "epoch": 0.7797105298101181, + "grad_norm": 1.0230593279992428, + "learning_rate": 1.43283794801889e-05, + "loss": 0.26526543498039246, + "step": 2936 + }, + { + "epoch": 0.7799760987916612, + "grad_norm": 1.0768548459396885, + "learning_rate": 1.4324420912117274e-05, + "loss": 0.2829325497150421, + "step": 2937 + }, + { + "epoch": 0.7802416677732041, + "grad_norm": 1.197165846783245, + "learning_rate": 1.43204615103483e-05, + "loss": 0.34146445989608765, + "step": 2938 + }, + { + "epoch": 0.7805072367547471, + "grad_norm": 1.1418950254878286, + "learning_rate": 1.43165012756453e-05, + "loss": 0.316609650850296, + "step": 2939 + }, + { + "epoch": 0.78077280573629, + "grad_norm": 1.119861281862994, + "learning_rate": 1.4312540208771766e-05, + "loss": 0.3215107321739197, + "step": 2940 + }, + { + "epoch": 0.781038374717833, + "grad_norm": 1.0591732101512668, + "learning_rate": 1.4308578310491342e-05, + "loss": 0.2834000587463379, + "step": 2941 + }, + { + "epoch": 0.781303943699376, + "grad_norm": 1.1186376453102755, + "learning_rate": 1.430461558156783e-05, + "loss": 0.30184993147850037, + "step": 2942 + }, + { + "epoch": 0.7815695126809189, + "grad_norm": 1.1319557052801907, + "learning_rate": 1.4300652022765207e-05, + "loss": 0.3299996256828308, + "step": 2943 + }, + { + "epoch": 0.7818350816624619, + "grad_norm": 1.1269288601015153, + "learning_rate": 1.4296687634847592e-05, + "loss": 0.27565228939056396, + "step": 2944 + }, + { + "epoch": 0.7821006506440048, + "grad_norm": 1.1019395409868211, + "learning_rate": 1.4292722418579278e-05, + "loss": 0.30347493290901184, + "step": 2945 + }, + { + "epoch": 0.7823662196255478, + "grad_norm": 1.125677517693181, + "learning_rate": 1.4288756374724709e-05, + "loss": 0.31469428539276123, + "step": 2946 + }, + { + "epoch": 0.7826317886070907, + "grad_norm": 1.0500101449680372, + "learning_rate": 1.4284789504048493e-05, + "loss": 0.27361029386520386, + "step": 2947 + }, + { + "epoch": 0.7828973575886337, + "grad_norm": 1.057442611584268, + "learning_rate": 1.428082180731539e-05, + "loss": 0.29180705547332764, + "step": 2948 + }, + { + "epoch": 0.7831629265701766, + "grad_norm": 1.0218659697209738, + "learning_rate": 1.4276853285290334e-05, + "loss": 0.281120628118515, + "step": 2949 + }, + { + "epoch": 0.7834284955517196, + "grad_norm": 1.0029783457826962, + "learning_rate": 1.4272883938738406e-05, + "loss": 0.26144471764564514, + "step": 2950 + }, + { + "epoch": 0.7836940645332625, + "grad_norm": 1.0904458839940374, + "learning_rate": 1.4268913768424848e-05, + "loss": 0.3118991255760193, + "step": 2951 + }, + { + "epoch": 0.7839596335148055, + "grad_norm": 1.0581869365443632, + "learning_rate": 1.4264942775115065e-05, + "loss": 0.29352328181266785, + "step": 2952 + }, + { + "epoch": 0.7842252024963484, + "grad_norm": 1.025234952757571, + "learning_rate": 1.426097095957461e-05, + "loss": 0.2687748968601227, + "step": 2953 + }, + { + "epoch": 0.7844907714778914, + "grad_norm": 1.0817782920006436, + "learning_rate": 1.4256998322569212e-05, + "loss": 0.3106890916824341, + "step": 2954 + }, + { + "epoch": 0.7847563404594343, + "grad_norm": 1.0039841255701216, + "learning_rate": 1.4253024864864742e-05, + "loss": 0.2522161304950714, + "step": 2955 + }, + { + "epoch": 0.7850219094409773, + "grad_norm": 1.031799618380073, + "learning_rate": 1.424905058722724e-05, + "loss": 0.2994377613067627, + "step": 2956 + }, + { + "epoch": 0.7852874784225202, + "grad_norm": 1.295564211303899, + "learning_rate": 1.4245075490422893e-05, + "loss": 0.3753565549850464, + "step": 2957 + }, + { + "epoch": 0.7855530474040632, + "grad_norm": 1.2386689798654595, + "learning_rate": 1.424109957521806e-05, + "loss": 0.29544737935066223, + "step": 2958 + }, + { + "epoch": 0.7858186163856061, + "grad_norm": 1.0381164701705432, + "learning_rate": 1.423712284237925e-05, + "loss": 0.307847797870636, + "step": 2959 + }, + { + "epoch": 0.7860841853671491, + "grad_norm": 1.1107576873332587, + "learning_rate": 1.4233145292673127e-05, + "loss": 0.31758183240890503, + "step": 2960 + }, + { + "epoch": 0.786349754348692, + "grad_norm": 1.0358601319268448, + "learning_rate": 1.4229166926866517e-05, + "loss": 0.307254433631897, + "step": 2961 + }, + { + "epoch": 0.786615323330235, + "grad_norm": 1.2228062733167704, + "learning_rate": 1.42251877457264e-05, + "loss": 0.3513748049736023, + "step": 2962 + }, + { + "epoch": 0.7868808923117779, + "grad_norm": 1.1359729522705007, + "learning_rate": 1.422120775001992e-05, + "loss": 0.3025718629360199, + "step": 2963 + }, + { + "epoch": 0.7871464612933209, + "grad_norm": 1.076503168390535, + "learning_rate": 1.4217226940514367e-05, + "loss": 0.2922811508178711, + "step": 2964 + }, + { + "epoch": 0.787412030274864, + "grad_norm": 1.07297262661661, + "learning_rate": 1.42132453179772e-05, + "loss": 0.29599297046661377, + "step": 2965 + }, + { + "epoch": 0.7876775992564069, + "grad_norm": 0.992121967255531, + "learning_rate": 1.4209262883176025e-05, + "loss": 0.28336548805236816, + "step": 2966 + }, + { + "epoch": 0.7879431682379499, + "grad_norm": 1.0655541697156172, + "learning_rate": 1.4205279636878613e-05, + "loss": 0.3100801110267639, + "step": 2967 + }, + { + "epoch": 0.7882087372194928, + "grad_norm": 1.165527486411767, + "learning_rate": 1.4201295579852881e-05, + "loss": 0.33067989349365234, + "step": 2968 + }, + { + "epoch": 0.7884743062010358, + "grad_norm": 1.1896877635723886, + "learning_rate": 1.4197310712866909e-05, + "loss": 0.282347172498703, + "step": 2969 + }, + { + "epoch": 0.7887398751825787, + "grad_norm": 1.0769183433483809, + "learning_rate": 1.419332503668894e-05, + "loss": 0.30585426092147827, + "step": 2970 + }, + { + "epoch": 0.7890054441641217, + "grad_norm": 1.0616062054836604, + "learning_rate": 1.4189338552087351e-05, + "loss": 0.3011561632156372, + "step": 2971 + }, + { + "epoch": 0.7892710131456646, + "grad_norm": 0.9722574451184507, + "learning_rate": 1.4185351259830705e-05, + "loss": 0.2700524926185608, + "step": 2972 + }, + { + "epoch": 0.7895365821272076, + "grad_norm": 1.0849811262666431, + "learning_rate": 1.4181363160687693e-05, + "loss": 0.2963382303714752, + "step": 2973 + }, + { + "epoch": 0.7898021511087505, + "grad_norm": 1.0388990841328773, + "learning_rate": 1.4177374255427183e-05, + "loss": 0.27132824063301086, + "step": 2974 + }, + { + "epoch": 0.7900677200902935, + "grad_norm": 0.9602477794817199, + "learning_rate": 1.417338454481818e-05, + "loss": 0.2539706826210022, + "step": 2975 + }, + { + "epoch": 0.7903332890718364, + "grad_norm": 1.0972216427869486, + "learning_rate": 1.416939402962986e-05, + "loss": 0.28465601801872253, + "step": 2976 + }, + { + "epoch": 0.7905988580533794, + "grad_norm": 1.1885027397372414, + "learning_rate": 1.4165402710631544e-05, + "loss": 0.3020748198032379, + "step": 2977 + }, + { + "epoch": 0.7908644270349223, + "grad_norm": 1.0709231597298363, + "learning_rate": 1.416141058859271e-05, + "loss": 0.3157690465450287, + "step": 2978 + }, + { + "epoch": 0.7911299960164653, + "grad_norm": 1.0874979641604023, + "learning_rate": 1.4157417664282994e-05, + "loss": 0.2720191776752472, + "step": 2979 + }, + { + "epoch": 0.7913955649980082, + "grad_norm": 1.0670143355557837, + "learning_rate": 1.4153423938472185e-05, + "loss": 0.2931746542453766, + "step": 2980 + }, + { + "epoch": 0.7916611339795512, + "grad_norm": 1.0836941185599118, + "learning_rate": 1.4149429411930226e-05, + "loss": 0.2683875560760498, + "step": 2981 + }, + { + "epoch": 0.7919267029610941, + "grad_norm": 1.0454189872619364, + "learning_rate": 1.4145434085427216e-05, + "loss": 0.2559819519519806, + "step": 2982 + }, + { + "epoch": 0.7921922719426371, + "grad_norm": 1.1028368657772893, + "learning_rate": 1.4141437959733404e-05, + "loss": 0.2845582365989685, + "step": 2983 + }, + { + "epoch": 0.79245784092418, + "grad_norm": 1.05827279827959, + "learning_rate": 1.4137441035619197e-05, + "loss": 0.26766544580459595, + "step": 2984 + }, + { + "epoch": 0.792723409905723, + "grad_norm": 1.2459472391823172, + "learning_rate": 1.4133443313855155e-05, + "loss": 0.32089024782180786, + "step": 2985 + }, + { + "epoch": 0.7929889788872659, + "grad_norm": 1.053106908199776, + "learning_rate": 1.4129444795211993e-05, + "loss": 0.2756182551383972, + "step": 2986 + }, + { + "epoch": 0.7932545478688089, + "grad_norm": 1.231241306668284, + "learning_rate": 1.4125445480460573e-05, + "loss": 0.29487302899360657, + "step": 2987 + }, + { + "epoch": 0.7935201168503518, + "grad_norm": 1.1738297230948855, + "learning_rate": 1.4121445370371922e-05, + "loss": 0.3362561762332916, + "step": 2988 + }, + { + "epoch": 0.7937856858318948, + "grad_norm": 1.1591988507026376, + "learning_rate": 1.4117444465717209e-05, + "loss": 0.2986692488193512, + "step": 2989 + }, + { + "epoch": 0.7940512548134377, + "grad_norm": 1.0341012671875776, + "learning_rate": 1.4113442767267766e-05, + "loss": 0.2725266218185425, + "step": 2990 + }, + { + "epoch": 0.7943168237949807, + "grad_norm": 1.1125466640148414, + "learning_rate": 1.4109440275795071e-05, + "loss": 0.29827257990837097, + "step": 2991 + }, + { + "epoch": 0.7945823927765236, + "grad_norm": 1.0512885973195232, + "learning_rate": 1.410543699207076e-05, + "loss": 0.2506203055381775, + "step": 2992 + }, + { + "epoch": 0.7948479617580667, + "grad_norm": 0.9867416114744889, + "learning_rate": 1.410143291686661e-05, + "loss": 0.2675034701824188, + "step": 2993 + }, + { + "epoch": 0.7951135307396097, + "grad_norm": 1.1763547306282318, + "learning_rate": 1.4097428050954571e-05, + "loss": 0.34528690576553345, + "step": 2994 + }, + { + "epoch": 0.7953790997211526, + "grad_norm": 1.1374135219725177, + "learning_rate": 1.4093422395106726e-05, + "loss": 0.27551063895225525, + "step": 2995 + }, + { + "epoch": 0.7956446687026956, + "grad_norm": 1.1195982376159075, + "learning_rate": 1.408941595009532e-05, + "loss": 0.3176268935203552, + "step": 2996 + }, + { + "epoch": 0.7959102376842385, + "grad_norm": 1.1804373403956752, + "learning_rate": 1.408540871669275e-05, + "loss": 0.30056723952293396, + "step": 2997 + }, + { + "epoch": 0.7961758066657815, + "grad_norm": 1.124570387942151, + "learning_rate": 1.4081400695671562e-05, + "loss": 0.32109886407852173, + "step": 2998 + }, + { + "epoch": 0.7964413756473244, + "grad_norm": 1.1262740571855958, + "learning_rate": 1.4077391887804457e-05, + "loss": 0.33622005581855774, + "step": 2999 + }, + { + "epoch": 0.7967069446288674, + "grad_norm": 1.1195153536613822, + "learning_rate": 1.4073382293864283e-05, + "loss": 0.3054961860179901, + "step": 3000 + }, + { + "epoch": 0.7969725136104103, + "grad_norm": 1.1210721039096916, + "learning_rate": 1.4069371914624044e-05, + "loss": 0.3022462725639343, + "step": 3001 + }, + { + "epoch": 0.7972380825919533, + "grad_norm": 1.0116555063320039, + "learning_rate": 1.4065360750856891e-05, + "loss": 0.2500512897968292, + "step": 3002 + }, + { + "epoch": 0.7975036515734962, + "grad_norm": 1.233947002119444, + "learning_rate": 1.4061348803336135e-05, + "loss": 0.2960171699523926, + "step": 3003 + }, + { + "epoch": 0.7977692205550392, + "grad_norm": 3.53476121579318, + "learning_rate": 1.4057336072835228e-05, + "loss": 0.2941724359989166, + "step": 3004 + }, + { + "epoch": 0.7980347895365821, + "grad_norm": 1.0143157952003843, + "learning_rate": 1.4053322560127779e-05, + "loss": 0.2827858328819275, + "step": 3005 + }, + { + "epoch": 0.7983003585181251, + "grad_norm": 1.34417890867956, + "learning_rate": 1.4049308265987544e-05, + "loss": 0.32525116205215454, + "step": 3006 + }, + { + "epoch": 0.798565927499668, + "grad_norm": 1.1622605286979444, + "learning_rate": 1.4045293191188431e-05, + "loss": 0.26509979367256165, + "step": 3007 + }, + { + "epoch": 0.798831496481211, + "grad_norm": 1.1649049829769997, + "learning_rate": 1.4041277336504503e-05, + "loss": 0.3462742567062378, + "step": 3008 + }, + { + "epoch": 0.7990970654627539, + "grad_norm": 1.118975693723979, + "learning_rate": 1.4037260702709967e-05, + "loss": 0.2971092164516449, + "step": 3009 + }, + { + "epoch": 0.7993626344442969, + "grad_norm": 1.0541078602131526, + "learning_rate": 1.4033243290579182e-05, + "loss": 0.32359808683395386, + "step": 3010 + }, + { + "epoch": 0.7996282034258398, + "grad_norm": 0.9819968107477214, + "learning_rate": 1.4029225100886657e-05, + "loss": 0.2949031591415405, + "step": 3011 + }, + { + "epoch": 0.7998937724073828, + "grad_norm": 0.9639154080405838, + "learning_rate": 1.4025206134407051e-05, + "loss": 0.29888901114463806, + "step": 3012 + }, + { + "epoch": 0.8001593413889257, + "grad_norm": 1.0921369087209054, + "learning_rate": 1.4021186391915181e-05, + "loss": 0.2999705672264099, + "step": 3013 + }, + { + "epoch": 0.8004249103704687, + "grad_norm": 1.027092536189555, + "learning_rate": 1.4017165874185996e-05, + "loss": 0.2725638449192047, + "step": 3014 + }, + { + "epoch": 0.8006904793520117, + "grad_norm": 1.6251260873819724, + "learning_rate": 1.4013144581994609e-05, + "loss": 0.2809314727783203, + "step": 3015 + }, + { + "epoch": 0.8009560483335546, + "grad_norm": 1.194026798460289, + "learning_rate": 1.400912251611628e-05, + "loss": 0.30335327982902527, + "step": 3016 + }, + { + "epoch": 0.8012216173150976, + "grad_norm": 1.0526756572542106, + "learning_rate": 1.400509967732641e-05, + "loss": 0.27780598402023315, + "step": 3017 + }, + { + "epoch": 0.8014871862966405, + "grad_norm": 1.0036615790617616, + "learning_rate": 1.400107606640056e-05, + "loss": 0.2865309715270996, + "step": 3018 + }, + { + "epoch": 0.8017527552781835, + "grad_norm": 1.067182271229665, + "learning_rate": 1.3997051684114431e-05, + "loss": 0.2691546082496643, + "step": 3019 + }, + { + "epoch": 0.8020183242597264, + "grad_norm": 1.0174199108878024, + "learning_rate": 1.3993026531243876e-05, + "loss": 0.30289226770401, + "step": 3020 + }, + { + "epoch": 0.8022838932412695, + "grad_norm": 1.1180967643802684, + "learning_rate": 1.3989000608564905e-05, + "loss": 0.2767682671546936, + "step": 3021 + }, + { + "epoch": 0.8025494622228124, + "grad_norm": 1.1982508587685934, + "learning_rate": 1.3984973916853657e-05, + "loss": 0.3423742353916168, + "step": 3022 + }, + { + "epoch": 0.8028150312043554, + "grad_norm": 1.1718790013716964, + "learning_rate": 1.3980946456886439e-05, + "loss": 0.3000536561012268, + "step": 3023 + }, + { + "epoch": 0.8030806001858983, + "grad_norm": 1.1431161282459077, + "learning_rate": 1.3976918229439698e-05, + "loss": 0.3071063756942749, + "step": 3024 + }, + { + "epoch": 0.8033461691674413, + "grad_norm": 1.6885640285561154, + "learning_rate": 1.397288923529002e-05, + "loss": 0.31261157989501953, + "step": 3025 + }, + { + "epoch": 0.8036117381489842, + "grad_norm": 1.0076153318556622, + "learning_rate": 1.3968859475214156e-05, + "loss": 0.2658939063549042, + "step": 3026 + }, + { + "epoch": 0.8038773071305272, + "grad_norm": 1.0309089161631302, + "learning_rate": 1.3964828949988993e-05, + "loss": 0.2772905230522156, + "step": 3027 + }, + { + "epoch": 0.8041428761120701, + "grad_norm": 1.1271894525974708, + "learning_rate": 1.396079766039157e-05, + "loss": 0.2903479337692261, + "step": 3028 + }, + { + "epoch": 0.8044084450936131, + "grad_norm": 1.2165332424367126, + "learning_rate": 1.3956765607199069e-05, + "loss": 0.35709524154663086, + "step": 3029 + }, + { + "epoch": 0.804674014075156, + "grad_norm": 1.0863328323430816, + "learning_rate": 1.3952732791188828e-05, + "loss": 0.2929389774799347, + "step": 3030 + }, + { + "epoch": 0.804939583056699, + "grad_norm": 0.999480167032172, + "learning_rate": 1.3948699213138321e-05, + "loss": 0.2609884440898895, + "step": 3031 + }, + { + "epoch": 0.805205152038242, + "grad_norm": 1.0946442757602284, + "learning_rate": 1.394466487382518e-05, + "loss": 0.3026544749736786, + "step": 3032 + }, + { + "epoch": 0.8054707210197849, + "grad_norm": 1.0415601836945267, + "learning_rate": 1.394062977402717e-05, + "loss": 0.28281137347221375, + "step": 3033 + }, + { + "epoch": 0.8057362900013278, + "grad_norm": 0.9908513124522437, + "learning_rate": 1.3936593914522214e-05, + "loss": 0.26189178228378296, + "step": 3034 + }, + { + "epoch": 0.8060018589828708, + "grad_norm": 1.0541854732158313, + "learning_rate": 1.3932557296088383e-05, + "loss": 0.27987509965896606, + "step": 3035 + }, + { + "epoch": 0.8062674279644138, + "grad_norm": 0.9961129101435677, + "learning_rate": 1.3928519919503884e-05, + "loss": 0.2857724130153656, + "step": 3036 + }, + { + "epoch": 0.8065329969459567, + "grad_norm": 0.9752377302684325, + "learning_rate": 1.3924481785547076e-05, + "loss": 0.28102418780326843, + "step": 3037 + }, + { + "epoch": 0.8067985659274997, + "grad_norm": 1.06882045524996, + "learning_rate": 1.3920442894996464e-05, + "loss": 0.30250412225723267, + "step": 3038 + }, + { + "epoch": 0.8070641349090426, + "grad_norm": 0.9854538363943691, + "learning_rate": 1.3916403248630703e-05, + "loss": 0.28951483964920044, + "step": 3039 + }, + { + "epoch": 0.8073297038905856, + "grad_norm": 0.990016753911339, + "learning_rate": 1.3912362847228585e-05, + "loss": 0.28455328941345215, + "step": 3040 + }, + { + "epoch": 0.8075952728721285, + "grad_norm": 1.0887176497400486, + "learning_rate": 1.3908321691569048e-05, + "loss": 0.29541105031967163, + "step": 3041 + }, + { + "epoch": 0.8078608418536715, + "grad_norm": 1.162648796815669, + "learning_rate": 1.3904279782431187e-05, + "loss": 0.3057629466056824, + "step": 3042 + }, + { + "epoch": 0.8081264108352144, + "grad_norm": 1.0909846424659564, + "learning_rate": 1.3900237120594226e-05, + "loss": 0.3204082250595093, + "step": 3043 + }, + { + "epoch": 0.8083919798167574, + "grad_norm": 0.9793203113476959, + "learning_rate": 1.3896193706837551e-05, + "loss": 0.28629523515701294, + "step": 3044 + }, + { + "epoch": 0.8086575487983003, + "grad_norm": 1.1874958252714642, + "learning_rate": 1.389214954194068e-05, + "loss": 0.298164427280426, + "step": 3045 + }, + { + "epoch": 0.8089231177798433, + "grad_norm": 1.005892758898695, + "learning_rate": 1.3888104626683282e-05, + "loss": 0.27309298515319824, + "step": 3046 + }, + { + "epoch": 0.8091886867613862, + "grad_norm": 0.9950263488620656, + "learning_rate": 1.3884058961845166e-05, + "loss": 0.25635263323783875, + "step": 3047 + }, + { + "epoch": 0.8094542557429292, + "grad_norm": 1.002808171969614, + "learning_rate": 1.3880012548206292e-05, + "loss": 0.29926127195358276, + "step": 3048 + }, + { + "epoch": 0.8097198247244722, + "grad_norm": 0.9867331912864394, + "learning_rate": 1.387596538654676e-05, + "loss": 0.26633137464523315, + "step": 3049 + }, + { + "epoch": 0.8099853937060152, + "grad_norm": 1.0757993931692869, + "learning_rate": 1.387191747764681e-05, + "loss": 0.28725534677505493, + "step": 3050 + }, + { + "epoch": 0.8102509626875581, + "grad_norm": 1.4955713597704303, + "learning_rate": 1.3867868822286838e-05, + "loss": 0.3015314042568207, + "step": 3051 + }, + { + "epoch": 0.8105165316691011, + "grad_norm": 1.048643971484194, + "learning_rate": 1.3863819421247375e-05, + "loss": 0.3054691553115845, + "step": 3052 + }, + { + "epoch": 0.810782100650644, + "grad_norm": 1.1596568650600225, + "learning_rate": 1.3859769275309097e-05, + "loss": 0.26315444707870483, + "step": 3053 + }, + { + "epoch": 0.811047669632187, + "grad_norm": 1.024319547072995, + "learning_rate": 1.3855718385252824e-05, + "loss": 0.2973077595233917, + "step": 3054 + }, + { + "epoch": 0.81131323861373, + "grad_norm": 1.1845129171721744, + "learning_rate": 1.385166675185952e-05, + "loss": 0.32824432849884033, + "step": 3055 + }, + { + "epoch": 0.8115788075952729, + "grad_norm": 1.2351976774044444, + "learning_rate": 1.3847614375910292e-05, + "loss": 0.3127811849117279, + "step": 3056 + }, + { + "epoch": 0.8118443765768159, + "grad_norm": 1.0840317870226388, + "learning_rate": 1.384356125818639e-05, + "loss": 0.2631932497024536, + "step": 3057 + }, + { + "epoch": 0.8121099455583588, + "grad_norm": 1.0251225163823416, + "learning_rate": 1.3839507399469213e-05, + "loss": 0.2856106162071228, + "step": 3058 + }, + { + "epoch": 0.8123755145399018, + "grad_norm": 1.2604810760435325, + "learning_rate": 1.3835452800540288e-05, + "loss": 0.28986629843711853, + "step": 3059 + }, + { + "epoch": 0.8126410835214447, + "grad_norm": 1.0804422287227695, + "learning_rate": 1.3831397462181298e-05, + "loss": 0.28411972522735596, + "step": 3060 + }, + { + "epoch": 0.8129066525029877, + "grad_norm": 1.117697190248139, + "learning_rate": 1.3827341385174063e-05, + "loss": 0.3234354853630066, + "step": 3061 + }, + { + "epoch": 0.8131722214845306, + "grad_norm": 0.9917598533716923, + "learning_rate": 1.3823284570300551e-05, + "loss": 0.24779736995697021, + "step": 3062 + }, + { + "epoch": 0.8134377904660736, + "grad_norm": 1.1743500466494587, + "learning_rate": 1.3819227018342865e-05, + "loss": 0.3306904137134552, + "step": 3063 + }, + { + "epoch": 0.8137033594476165, + "grad_norm": 1.1120224667451313, + "learning_rate": 1.3815168730083254e-05, + "loss": 0.31705451011657715, + "step": 3064 + }, + { + "epoch": 0.8139689284291595, + "grad_norm": 1.1351768868234977, + "learning_rate": 1.3811109706304105e-05, + "loss": 0.29830047488212585, + "step": 3065 + }, + { + "epoch": 0.8142344974107024, + "grad_norm": 1.1496885073051233, + "learning_rate": 1.3807049947787954e-05, + "loss": 0.30605942010879517, + "step": 3066 + }, + { + "epoch": 0.8145000663922454, + "grad_norm": 1.0745429008877887, + "learning_rate": 1.3802989455317475e-05, + "loss": 0.3139193058013916, + "step": 3067 + }, + { + "epoch": 0.8147656353737883, + "grad_norm": 1.0541430221228831, + "learning_rate": 1.3798928229675478e-05, + "loss": 0.3175879716873169, + "step": 3068 + }, + { + "epoch": 0.8150312043553313, + "grad_norm": 1.0450888698469754, + "learning_rate": 1.3794866271644922e-05, + "loss": 0.26391106843948364, + "step": 3069 + }, + { + "epoch": 0.8152967733368742, + "grad_norm": 0.945534402365018, + "learning_rate": 1.3790803582008906e-05, + "loss": 0.24128863215446472, + "step": 3070 + }, + { + "epoch": 0.8155623423184172, + "grad_norm": 1.1627322372772537, + "learning_rate": 1.378674016155067e-05, + "loss": 0.3249368965625763, + "step": 3071 + }, + { + "epoch": 0.8158279112999601, + "grad_norm": 1.0060562228451158, + "learning_rate": 1.3782676011053592e-05, + "loss": 0.2871986925601959, + "step": 3072 + }, + { + "epoch": 0.8160934802815031, + "grad_norm": 1.1624248444882197, + "learning_rate": 1.377861113130119e-05, + "loss": 0.29047372937202454, + "step": 3073 + }, + { + "epoch": 0.816359049263046, + "grad_norm": 1.0925698386610025, + "learning_rate": 1.3774545523077122e-05, + "loss": 0.3055281341075897, + "step": 3074 + }, + { + "epoch": 0.816624618244589, + "grad_norm": 0.9197098274775629, + "learning_rate": 1.37704791871652e-05, + "loss": 0.2565494179725647, + "step": 3075 + }, + { + "epoch": 0.8168901872261319, + "grad_norm": 1.0377185359248249, + "learning_rate": 1.3766412124349358e-05, + "loss": 0.3016049861907959, + "step": 3076 + }, + { + "epoch": 0.8171557562076749, + "grad_norm": 1.0790995041055653, + "learning_rate": 1.3762344335413677e-05, + "loss": 0.3021200895309448, + "step": 3077 + }, + { + "epoch": 0.817421325189218, + "grad_norm": 1.0643017770253544, + "learning_rate": 1.3758275821142382e-05, + "loss": 0.3024774193763733, + "step": 3078 + }, + { + "epoch": 0.8176868941707609, + "grad_norm": 1.0591328005001268, + "learning_rate": 1.3754206582319836e-05, + "loss": 0.33114269375801086, + "step": 3079 + }, + { + "epoch": 0.8179524631523039, + "grad_norm": 1.0815809107319383, + "learning_rate": 1.3750136619730534e-05, + "loss": 0.27339494228363037, + "step": 3080 + }, + { + "epoch": 0.8182180321338468, + "grad_norm": 1.170674128986789, + "learning_rate": 1.3746065934159123e-05, + "loss": 0.2827128767967224, + "step": 3081 + }, + { + "epoch": 0.8184836011153898, + "grad_norm": 1.1064880736532463, + "learning_rate": 1.3741994526390379e-05, + "loss": 0.2972746193408966, + "step": 3082 + }, + { + "epoch": 0.8187491700969327, + "grad_norm": 1.143548636761381, + "learning_rate": 1.3737922397209222e-05, + "loss": 0.29932117462158203, + "step": 3083 + }, + { + "epoch": 0.8190147390784757, + "grad_norm": 1.0415876434255473, + "learning_rate": 1.3733849547400713e-05, + "loss": 0.28307998180389404, + "step": 3084 + }, + { + "epoch": 0.8192803080600186, + "grad_norm": 1.1070561443231863, + "learning_rate": 1.3729775977750048e-05, + "loss": 0.2885883152484894, + "step": 3085 + }, + { + "epoch": 0.8195458770415616, + "grad_norm": 1.1106477390667713, + "learning_rate": 1.3725701689042564e-05, + "loss": 0.28837913274765015, + "step": 3086 + }, + { + "epoch": 0.8198114460231045, + "grad_norm": 1.0553526039271008, + "learning_rate": 1.3721626682063733e-05, + "loss": 0.2775058150291443, + "step": 3087 + }, + { + "epoch": 0.8200770150046475, + "grad_norm": 1.153176622627066, + "learning_rate": 1.3717550957599172e-05, + "loss": 0.2813493609428406, + "step": 3088 + }, + { + "epoch": 0.8203425839861904, + "grad_norm": 1.1477738573738745, + "learning_rate": 1.371347451643463e-05, + "loss": 0.2677592933177948, + "step": 3089 + }, + { + "epoch": 0.8206081529677334, + "grad_norm": 1.184705398593534, + "learning_rate": 1.3709397359355998e-05, + "loss": 0.3104957938194275, + "step": 3090 + }, + { + "epoch": 0.8208737219492763, + "grad_norm": 1.1714327280441006, + "learning_rate": 1.3705319487149303e-05, + "loss": 0.29315799474716187, + "step": 3091 + }, + { + "epoch": 0.8211392909308193, + "grad_norm": 1.1179168081295616, + "learning_rate": 1.370124090060071e-05, + "loss": 0.3044348657131195, + "step": 3092 + }, + { + "epoch": 0.8214048599123622, + "grad_norm": 1.1122209585212142, + "learning_rate": 1.3697161600496525e-05, + "loss": 0.2918691635131836, + "step": 3093 + }, + { + "epoch": 0.8216704288939052, + "grad_norm": 1.0702091422822353, + "learning_rate": 1.3693081587623187e-05, + "loss": 0.2887750267982483, + "step": 3094 + }, + { + "epoch": 0.8219359978754481, + "grad_norm": 1.1155429990394359, + "learning_rate": 1.3689000862767274e-05, + "loss": 0.3055661916732788, + "step": 3095 + }, + { + "epoch": 0.8222015668569911, + "grad_norm": 1.0251756704247361, + "learning_rate": 1.3684919426715504e-05, + "loss": 0.271525114774704, + "step": 3096 + }, + { + "epoch": 0.822467135838534, + "grad_norm": 1.1269584199088303, + "learning_rate": 1.3680837280254726e-05, + "loss": 0.3220426142215729, + "step": 3097 + }, + { + "epoch": 0.822732704820077, + "grad_norm": 1.0149552227204566, + "learning_rate": 1.3676754424171935e-05, + "loss": 0.29091203212738037, + "step": 3098 + }, + { + "epoch": 0.8229982738016199, + "grad_norm": 1.051328362150218, + "learning_rate": 1.3672670859254252e-05, + "loss": 0.2928692102432251, + "step": 3099 + }, + { + "epoch": 0.8232638427831629, + "grad_norm": 1.0366528987524315, + "learning_rate": 1.3668586586288942e-05, + "loss": 0.28635919094085693, + "step": 3100 + }, + { + "epoch": 0.8235294117647058, + "grad_norm": 1.0374876833794577, + "learning_rate": 1.3664501606063402e-05, + "loss": 0.2912571430206299, + "step": 3101 + }, + { + "epoch": 0.8237949807462488, + "grad_norm": 1.051516198651511, + "learning_rate": 1.3660415919365178e-05, + "loss": 0.2783615291118622, + "step": 3102 + }, + { + "epoch": 0.8240605497277917, + "grad_norm": 1.088921494432588, + "learning_rate": 1.365632952698193e-05, + "loss": 0.3064395785331726, + "step": 3103 + }, + { + "epoch": 0.8243261187093347, + "grad_norm": 1.023130230207284, + "learning_rate": 1.3652242429701477e-05, + "loss": 0.2528907358646393, + "step": 3104 + }, + { + "epoch": 0.8245916876908777, + "grad_norm": 1.0503421945431453, + "learning_rate": 1.3648154628311754e-05, + "loss": 0.2648676633834839, + "step": 3105 + }, + { + "epoch": 0.8248572566724207, + "grad_norm": 1.2732480631249905, + "learning_rate": 1.3644066123600846e-05, + "loss": 0.33425620198249817, + "step": 3106 + }, + { + "epoch": 0.8251228256539637, + "grad_norm": 1.0925062122156084, + "learning_rate": 1.3639976916356965e-05, + "loss": 0.3108072280883789, + "step": 3107 + }, + { + "epoch": 0.8253883946355066, + "grad_norm": 1.0815679409684162, + "learning_rate": 1.3635887007368467e-05, + "loss": 0.2860543131828308, + "step": 3108 + }, + { + "epoch": 0.8256539636170496, + "grad_norm": 1.0711932859903586, + "learning_rate": 1.3631796397423833e-05, + "loss": 0.25440749526023865, + "step": 3109 + }, + { + "epoch": 0.8259195325985925, + "grad_norm": 1.1006663978120534, + "learning_rate": 1.3627705087311687e-05, + "loss": 0.2676115334033966, + "step": 3110 + }, + { + "epoch": 0.8261851015801355, + "grad_norm": 1.1597529133358384, + "learning_rate": 1.3623613077820788e-05, + "loss": 0.28977078199386597, + "step": 3111 + }, + { + "epoch": 0.8264506705616784, + "grad_norm": 1.1046761011596355, + "learning_rate": 1.361952036974002e-05, + "loss": 0.30161401629447937, + "step": 3112 + }, + { + "epoch": 0.8267162395432214, + "grad_norm": 1.135120464396266, + "learning_rate": 1.3615426963858416e-05, + "loss": 0.28676310181617737, + "step": 3113 + }, + { + "epoch": 0.8269818085247643, + "grad_norm": 1.100109147839879, + "learning_rate": 1.361133286096513e-05, + "loss": 0.2957243323326111, + "step": 3114 + }, + { + "epoch": 0.8272473775063073, + "grad_norm": 1.0691905028493969, + "learning_rate": 1.3607238061849461e-05, + "loss": 0.3036375343799591, + "step": 3115 + }, + { + "epoch": 0.8275129464878502, + "grad_norm": 1.1142331461612014, + "learning_rate": 1.360314256730084e-05, + "loss": 0.31175294518470764, + "step": 3116 + }, + { + "epoch": 0.8277785154693932, + "grad_norm": 1.0665802680669934, + "learning_rate": 1.3599046378108825e-05, + "loss": 0.30212485790252686, + "step": 3117 + }, + { + "epoch": 0.8280440844509361, + "grad_norm": 1.1992776426845386, + "learning_rate": 1.3594949495063117e-05, + "loss": 0.3290692865848541, + "step": 3118 + }, + { + "epoch": 0.8283096534324791, + "grad_norm": 1.007005509411099, + "learning_rate": 1.3590851918953542e-05, + "loss": 0.25952839851379395, + "step": 3119 + }, + { + "epoch": 0.828575222414022, + "grad_norm": 1.0949064818424232, + "learning_rate": 1.3586753650570069e-05, + "loss": 0.27737247943878174, + "step": 3120 + }, + { + "epoch": 0.828840791395565, + "grad_norm": 1.0156990629875267, + "learning_rate": 1.3582654690702795e-05, + "loss": 0.29415374994277954, + "step": 3121 + }, + { + "epoch": 0.8291063603771079, + "grad_norm": 1.066804105313739, + "learning_rate": 1.3578555040141948e-05, + "loss": 0.29197627305984497, + "step": 3122 + }, + { + "epoch": 0.8293719293586509, + "grad_norm": 1.1089730397237387, + "learning_rate": 1.3574454699677893e-05, + "loss": 0.30318522453308105, + "step": 3123 + }, + { + "epoch": 0.8296374983401938, + "grad_norm": 1.0916871079120407, + "learning_rate": 1.357035367010113e-05, + "loss": 0.3184241056442261, + "step": 3124 + }, + { + "epoch": 0.8299030673217368, + "grad_norm": 1.3286365770942894, + "learning_rate": 1.3566251952202288e-05, + "loss": 0.30330199003219604, + "step": 3125 + }, + { + "epoch": 0.8301686363032797, + "grad_norm": 1.1117453782986153, + "learning_rate": 1.356214954677213e-05, + "loss": 0.25366994738578796, + "step": 3126 + }, + { + "epoch": 0.8304342052848227, + "grad_norm": 1.109752753436135, + "learning_rate": 1.3558046454601552e-05, + "loss": 0.3213343918323517, + "step": 3127 + }, + { + "epoch": 0.8306997742663657, + "grad_norm": 1.0918389418395038, + "learning_rate": 1.355394267648158e-05, + "loss": 0.3012468218803406, + "step": 3128 + }, + { + "epoch": 0.8309653432479086, + "grad_norm": 1.1319633441718049, + "learning_rate": 1.3549838213203374e-05, + "loss": 0.3272971510887146, + "step": 3129 + }, + { + "epoch": 0.8312309122294516, + "grad_norm": 1.0778057413430624, + "learning_rate": 1.354573306555823e-05, + "loss": 0.30032482743263245, + "step": 3130 + }, + { + "epoch": 0.8314964812109945, + "grad_norm": 1.0778331818873157, + "learning_rate": 1.3541627234337567e-05, + "loss": 0.2820669412612915, + "step": 3131 + }, + { + "epoch": 0.8317620501925375, + "grad_norm": 1.0187129279356677, + "learning_rate": 1.3537520720332943e-05, + "loss": 0.2638673782348633, + "step": 3132 + }, + { + "epoch": 0.8320276191740804, + "grad_norm": 1.0843507637886551, + "learning_rate": 1.3533413524336043e-05, + "loss": 0.2766842246055603, + "step": 3133 + }, + { + "epoch": 0.8322931881556235, + "grad_norm": 1.2660530642163288, + "learning_rate": 1.3529305647138689e-05, + "loss": 0.330536425113678, + "step": 3134 + }, + { + "epoch": 0.8325587571371664, + "grad_norm": 1.0925834195413107, + "learning_rate": 1.3525197089532833e-05, + "loss": 0.30375364422798157, + "step": 3135 + }, + { + "epoch": 0.8328243261187094, + "grad_norm": 1.1657669106128519, + "learning_rate": 1.3521087852310555e-05, + "loss": 0.3092171549797058, + "step": 3136 + }, + { + "epoch": 0.8330898951002523, + "grad_norm": 1.1686338102407274, + "learning_rate": 1.3516977936264062e-05, + "loss": 0.28651195764541626, + "step": 3137 + }, + { + "epoch": 0.8333554640817953, + "grad_norm": 1.0845327487717817, + "learning_rate": 1.3512867342185705e-05, + "loss": 0.2882133722305298, + "step": 3138 + }, + { + "epoch": 0.8336210330633382, + "grad_norm": 1.1325019700739036, + "learning_rate": 1.3508756070867955e-05, + "loss": 0.30633628368377686, + "step": 3139 + }, + { + "epoch": 0.8338866020448812, + "grad_norm": 1.090943303162736, + "learning_rate": 1.3504644123103415e-05, + "loss": 0.2819565236568451, + "step": 3140 + }, + { + "epoch": 0.8341521710264241, + "grad_norm": 1.0804420637943886, + "learning_rate": 1.3500531499684819e-05, + "loss": 0.29544374346733093, + "step": 3141 + }, + { + "epoch": 0.8344177400079671, + "grad_norm": 1.10400689114043, + "learning_rate": 1.3496418201405037e-05, + "loss": 0.29383376240730286, + "step": 3142 + }, + { + "epoch": 0.83468330898951, + "grad_norm": 0.9862964562028984, + "learning_rate": 1.3492304229057062e-05, + "loss": 0.24945983290672302, + "step": 3143 + }, + { + "epoch": 0.834948877971053, + "grad_norm": 1.2055608503616826, + "learning_rate": 1.3488189583434023e-05, + "loss": 0.338919997215271, + "step": 3144 + }, + { + "epoch": 0.835214446952596, + "grad_norm": 1.071166648249549, + "learning_rate": 1.348407426532917e-05, + "loss": 0.29555821418762207, + "step": 3145 + }, + { + "epoch": 0.8354800159341389, + "grad_norm": 1.0650010322896095, + "learning_rate": 1.3479958275535887e-05, + "loss": 0.31038299202919006, + "step": 3146 + }, + { + "epoch": 0.8357455849156818, + "grad_norm": 1.021351909092412, + "learning_rate": 1.347584161484769e-05, + "loss": 0.2595089077949524, + "step": 3147 + }, + { + "epoch": 0.8360111538972248, + "grad_norm": 1.1885926674667484, + "learning_rate": 1.3471724284058227e-05, + "loss": 0.3287338614463806, + "step": 3148 + }, + { + "epoch": 0.8362767228787678, + "grad_norm": 1.1997618392346763, + "learning_rate": 1.3467606283961268e-05, + "loss": 0.3109680414199829, + "step": 3149 + }, + { + "epoch": 0.8365422918603107, + "grad_norm": 1.0762954067078139, + "learning_rate": 1.346348761535071e-05, + "loss": 0.2584227919578552, + "step": 3150 + }, + { + "epoch": 0.8368078608418537, + "grad_norm": 1.137771769139511, + "learning_rate": 1.345936827902059e-05, + "loss": 0.3038554787635803, + "step": 3151 + }, + { + "epoch": 0.8370734298233966, + "grad_norm": 1.029659281383911, + "learning_rate": 1.3455248275765067e-05, + "loss": 0.28267812728881836, + "step": 3152 + }, + { + "epoch": 0.8373389988049396, + "grad_norm": 1.163661242492436, + "learning_rate": 1.3451127606378425e-05, + "loss": 0.3328094184398651, + "step": 3153 + }, + { + "epoch": 0.8376045677864825, + "grad_norm": 1.084045978606854, + "learning_rate": 1.3447006271655082e-05, + "loss": 0.3235865533351898, + "step": 3154 + }, + { + "epoch": 0.8378701367680255, + "grad_norm": 1.037100355990568, + "learning_rate": 1.3442884272389583e-05, + "loss": 0.25394493341445923, + "step": 3155 + }, + { + "epoch": 0.8381357057495684, + "grad_norm": 1.1250984496593863, + "learning_rate": 1.3438761609376604e-05, + "loss": 0.29841768741607666, + "step": 3156 + }, + { + "epoch": 0.8384012747311114, + "grad_norm": 1.1999100818775306, + "learning_rate": 1.3434638283410942e-05, + "loss": 0.3161924183368683, + "step": 3157 + }, + { + "epoch": 0.8386668437126543, + "grad_norm": 0.9017579941601053, + "learning_rate": 1.3430514295287526e-05, + "loss": 0.22781039774417877, + "step": 3158 + }, + { + "epoch": 0.8389324126941973, + "grad_norm": 1.0534948555265085, + "learning_rate": 1.3426389645801415e-05, + "loss": 0.2947984039783478, + "step": 3159 + }, + { + "epoch": 0.8391979816757402, + "grad_norm": 1.0286789238265646, + "learning_rate": 1.342226433574779e-05, + "loss": 0.2827467918395996, + "step": 3160 + }, + { + "epoch": 0.8394635506572832, + "grad_norm": 1.0453932660244052, + "learning_rate": 1.3418138365921962e-05, + "loss": 0.3149232268333435, + "step": 3161 + }, + { + "epoch": 0.8397291196388262, + "grad_norm": 1.2487567497076437, + "learning_rate": 1.3414011737119373e-05, + "loss": 0.33154603838920593, + "step": 3162 + }, + { + "epoch": 0.8399946886203692, + "grad_norm": 1.074983718750332, + "learning_rate": 1.3409884450135581e-05, + "loss": 0.28532034158706665, + "step": 3163 + }, + { + "epoch": 0.8402602576019121, + "grad_norm": 1.0695327636228384, + "learning_rate": 1.3405756505766286e-05, + "loss": 0.2539500892162323, + "step": 3164 + }, + { + "epoch": 0.8405258265834551, + "grad_norm": 1.0653532722719707, + "learning_rate": 1.3401627904807302e-05, + "loss": 0.3023888170719147, + "step": 3165 + }, + { + "epoch": 0.840791395564998, + "grad_norm": 1.0811844194203637, + "learning_rate": 1.3397498648054579e-05, + "loss": 0.3088506758213043, + "step": 3166 + }, + { + "epoch": 0.841056964546541, + "grad_norm": 1.2249048833028835, + "learning_rate": 1.3393368736304184e-05, + "loss": 0.3223467469215393, + "step": 3167 + }, + { + "epoch": 0.841322533528084, + "grad_norm": 1.0772937869709083, + "learning_rate": 1.3389238170352318e-05, + "loss": 0.2541419565677643, + "step": 3168 + }, + { + "epoch": 0.8415881025096269, + "grad_norm": 1.0463826735598363, + "learning_rate": 1.3385106950995308e-05, + "loss": 0.2915497422218323, + "step": 3169 + }, + { + "epoch": 0.8418536714911699, + "grad_norm": 1.1726858597591174, + "learning_rate": 1.3380975079029598e-05, + "loss": 0.2907465994358063, + "step": 3170 + }, + { + "epoch": 0.8421192404727128, + "grad_norm": 1.0581221380369799, + "learning_rate": 1.337684255525177e-05, + "loss": 0.2587417960166931, + "step": 3171 + }, + { + "epoch": 0.8423848094542558, + "grad_norm": 1.1080472137531636, + "learning_rate": 1.3372709380458522e-05, + "loss": 0.2932469844818115, + "step": 3172 + }, + { + "epoch": 0.8426503784357987, + "grad_norm": 1.2359417241278925, + "learning_rate": 1.3368575555446681e-05, + "loss": 0.31451860070228577, + "step": 3173 + }, + { + "epoch": 0.8429159474173417, + "grad_norm": 1.067745190297883, + "learning_rate": 1.3364441081013205e-05, + "loss": 0.24513742327690125, + "step": 3174 + }, + { + "epoch": 0.8431815163988846, + "grad_norm": 1.0795526820997523, + "learning_rate": 1.3360305957955166e-05, + "loss": 0.29781201481819153, + "step": 3175 + }, + { + "epoch": 0.8434470853804276, + "grad_norm": 1.3176130252584213, + "learning_rate": 1.3356170187069775e-05, + "loss": 0.30925726890563965, + "step": 3176 + }, + { + "epoch": 0.8437126543619705, + "grad_norm": 1.1110632932678028, + "learning_rate": 1.3352033769154347e-05, + "loss": 0.2822851538658142, + "step": 3177 + }, + { + "epoch": 0.8439782233435135, + "grad_norm": 1.0033731418220575, + "learning_rate": 1.3347896705006344e-05, + "loss": 0.2511071264743805, + "step": 3178 + }, + { + "epoch": 0.8442437923250564, + "grad_norm": 1.1921629041957855, + "learning_rate": 1.3343758995423344e-05, + "loss": 0.3002505302429199, + "step": 3179 + }, + { + "epoch": 0.8445093613065994, + "grad_norm": 0.9942107511416755, + "learning_rate": 1.3339620641203043e-05, + "loss": 0.285504549741745, + "step": 3180 + }, + { + "epoch": 0.8447749302881423, + "grad_norm": 1.1880306222164103, + "learning_rate": 1.3335481643143271e-05, + "loss": 0.31988856196403503, + "step": 3181 + }, + { + "epoch": 0.8450404992696853, + "grad_norm": 1.0905691447057935, + "learning_rate": 1.3331342002041973e-05, + "loss": 0.29330819845199585, + "step": 3182 + }, + { + "epoch": 0.8453060682512282, + "grad_norm": 1.049547579497453, + "learning_rate": 1.3327201718697232e-05, + "loss": 0.28694427013397217, + "step": 3183 + }, + { + "epoch": 0.8455716372327712, + "grad_norm": 1.0561569710297949, + "learning_rate": 1.3323060793907239e-05, + "loss": 0.24912211298942566, + "step": 3184 + }, + { + "epoch": 0.8458372062143141, + "grad_norm": 1.1346018526864223, + "learning_rate": 1.3318919228470315e-05, + "loss": 0.28117647767066956, + "step": 3185 + }, + { + "epoch": 0.8461027751958571, + "grad_norm": 1.2524387900920857, + "learning_rate": 1.3314777023184907e-05, + "loss": 0.3176446557044983, + "step": 3186 + }, + { + "epoch": 0.8463683441774, + "grad_norm": 1.0728463380702977, + "learning_rate": 1.3310634178849583e-05, + "loss": 0.31205689907073975, + "step": 3187 + }, + { + "epoch": 0.846633913158943, + "grad_norm": 1.1500545538779043, + "learning_rate": 1.3306490696263034e-05, + "loss": 0.29942232370376587, + "step": 3188 + }, + { + "epoch": 0.8468994821404859, + "grad_norm": 1.161750107962421, + "learning_rate": 1.3302346576224077e-05, + "loss": 0.3149508833885193, + "step": 3189 + }, + { + "epoch": 0.847165051122029, + "grad_norm": 1.0924626607758976, + "learning_rate": 1.3298201819531646e-05, + "loss": 0.2930619418621063, + "step": 3190 + }, + { + "epoch": 0.847430620103572, + "grad_norm": 1.0958680594537196, + "learning_rate": 1.3294056426984804e-05, + "loss": 0.3089582920074463, + "step": 3191 + }, + { + "epoch": 0.8476961890851149, + "grad_norm": 1.2175163313381927, + "learning_rate": 1.3289910399382733e-05, + "loss": 0.3120991587638855, + "step": 3192 + }, + { + "epoch": 0.8479617580666579, + "grad_norm": 1.0535688994558223, + "learning_rate": 1.3285763737524738e-05, + "loss": 0.2728833258152008, + "step": 3193 + }, + { + "epoch": 0.8482273270482008, + "grad_norm": 1.0457465617551238, + "learning_rate": 1.3281616442210246e-05, + "loss": 0.2833358347415924, + "step": 3194 + }, + { + "epoch": 0.8484928960297438, + "grad_norm": 1.0714039101779447, + "learning_rate": 1.3277468514238803e-05, + "loss": 0.26218950748443604, + "step": 3195 + }, + { + "epoch": 0.8487584650112867, + "grad_norm": 1.0938436245702892, + "learning_rate": 1.3273319954410088e-05, + "loss": 0.3120720386505127, + "step": 3196 + }, + { + "epoch": 0.8490240339928297, + "grad_norm": 1.0412833763909957, + "learning_rate": 1.3269170763523892e-05, + "loss": 0.2748696208000183, + "step": 3197 + }, + { + "epoch": 0.8492896029743726, + "grad_norm": 1.0148051769031237, + "learning_rate": 1.326502094238013e-05, + "loss": 0.2892690598964691, + "step": 3198 + }, + { + "epoch": 0.8495551719559156, + "grad_norm": 1.068648430192615, + "learning_rate": 1.3260870491778835e-05, + "loss": 0.26583510637283325, + "step": 3199 + }, + { + "epoch": 0.8498207409374585, + "grad_norm": 1.105620955007001, + "learning_rate": 1.325671941252017e-05, + "loss": 0.31602388620376587, + "step": 3200 + }, + { + "epoch": 0.8500863099190015, + "grad_norm": 1.068517421778971, + "learning_rate": 1.3252567705404409e-05, + "loss": 0.2980017364025116, + "step": 3201 + }, + { + "epoch": 0.8503518789005444, + "grad_norm": 1.0740685936810315, + "learning_rate": 1.3248415371231957e-05, + "loss": 0.27081727981567383, + "step": 3202 + }, + { + "epoch": 0.8506174478820874, + "grad_norm": 1.2590520587844396, + "learning_rate": 1.3244262410803333e-05, + "loss": 0.28895002603530884, + "step": 3203 + }, + { + "epoch": 0.8508830168636303, + "grad_norm": 1.1373552047630993, + "learning_rate": 1.3240108824919176e-05, + "loss": 0.30804315209388733, + "step": 3204 + }, + { + "epoch": 0.8511485858451733, + "grad_norm": 1.1074447190812993, + "learning_rate": 1.3235954614380253e-05, + "loss": 0.28173667192459106, + "step": 3205 + }, + { + "epoch": 0.8514141548267162, + "grad_norm": 1.097058715769224, + "learning_rate": 1.3231799779987445e-05, + "loss": 0.3113047778606415, + "step": 3206 + }, + { + "epoch": 0.8516797238082592, + "grad_norm": 1.0285862677327642, + "learning_rate": 1.3227644322541754e-05, + "loss": 0.247248113155365, + "step": 3207 + }, + { + "epoch": 0.8519452927898021, + "grad_norm": 1.1032823581833329, + "learning_rate": 1.3223488242844309e-05, + "loss": 0.27078187465667725, + "step": 3208 + }, + { + "epoch": 0.8522108617713451, + "grad_norm": 1.0635139884249352, + "learning_rate": 1.321933154169634e-05, + "loss": 0.2749357223510742, + "step": 3209 + }, + { + "epoch": 0.852476430752888, + "grad_norm": 1.0129100217319345, + "learning_rate": 1.3215174219899224e-05, + "loss": 0.25382956862449646, + "step": 3210 + }, + { + "epoch": 0.852741999734431, + "grad_norm": 1.0528151094235563, + "learning_rate": 1.3211016278254436e-05, + "loss": 0.3237685263156891, + "step": 3211 + }, + { + "epoch": 0.8530075687159739, + "grad_norm": 1.273911241149791, + "learning_rate": 1.3206857717563581e-05, + "loss": 0.2899032235145569, + "step": 3212 + }, + { + "epoch": 0.8532731376975169, + "grad_norm": 1.040323856520164, + "learning_rate": 1.3202698538628376e-05, + "loss": 0.25997933745384216, + "step": 3213 + }, + { + "epoch": 0.8535387066790598, + "grad_norm": 1.121125084608177, + "learning_rate": 1.3198538742250668e-05, + "loss": 0.3228183090686798, + "step": 3214 + }, + { + "epoch": 0.8538042756606028, + "grad_norm": 1.1002230220524851, + "learning_rate": 1.3194378329232413e-05, + "loss": 0.31993368268013, + "step": 3215 + }, + { + "epoch": 0.8540698446421457, + "grad_norm": 1.157115702913611, + "learning_rate": 1.3190217300375694e-05, + "loss": 0.29520007967948914, + "step": 3216 + }, + { + "epoch": 0.8543354136236887, + "grad_norm": 1.0898926058638614, + "learning_rate": 1.3186055656482702e-05, + "loss": 0.31073522567749023, + "step": 3217 + }, + { + "epoch": 0.8546009826052318, + "grad_norm": 1.1465583376043518, + "learning_rate": 1.3181893398355752e-05, + "loss": 0.34354183077812195, + "step": 3218 + }, + { + "epoch": 0.8548665515867747, + "grad_norm": 1.179928846812524, + "learning_rate": 1.3177730526797286e-05, + "loss": 0.27676698565483093, + "step": 3219 + }, + { + "epoch": 0.8551321205683177, + "grad_norm": 1.0792983255501365, + "learning_rate": 1.3173567042609852e-05, + "loss": 0.27313530445098877, + "step": 3220 + }, + { + "epoch": 0.8553976895498606, + "grad_norm": 0.9249374113484707, + "learning_rate": 1.3169402946596119e-05, + "loss": 0.2517555058002472, + "step": 3221 + }, + { + "epoch": 0.8556632585314036, + "grad_norm": 1.0684778793194236, + "learning_rate": 1.3165238239558878e-05, + "loss": 0.29700207710266113, + "step": 3222 + }, + { + "epoch": 0.8559288275129465, + "grad_norm": 1.1262235464302217, + "learning_rate": 1.3161072922301037e-05, + "loss": 0.3182620704174042, + "step": 3223 + }, + { + "epoch": 0.8561943964944895, + "grad_norm": 1.123570804553303, + "learning_rate": 1.3156906995625615e-05, + "loss": 0.3112961947917938, + "step": 3224 + }, + { + "epoch": 0.8564599654760324, + "grad_norm": 1.1746597736734636, + "learning_rate": 1.3152740460335757e-05, + "loss": 0.3080563545227051, + "step": 3225 + }, + { + "epoch": 0.8567255344575754, + "grad_norm": 1.1646363575237453, + "learning_rate": 1.3148573317234726e-05, + "loss": 0.31197935342788696, + "step": 3226 + }, + { + "epoch": 0.8569911034391183, + "grad_norm": 1.0455051980244612, + "learning_rate": 1.3144405567125886e-05, + "loss": 0.27377086877822876, + "step": 3227 + }, + { + "epoch": 0.8572566724206613, + "grad_norm": 1.050528412475655, + "learning_rate": 1.3140237210812741e-05, + "loss": 0.25303182005882263, + "step": 3228 + }, + { + "epoch": 0.8575222414022042, + "grad_norm": 1.0664458431943622, + "learning_rate": 1.3136068249098899e-05, + "loss": 0.27949726581573486, + "step": 3229 + }, + { + "epoch": 0.8577878103837472, + "grad_norm": 1.0907347405782384, + "learning_rate": 1.3131898682788082e-05, + "loss": 0.278359055519104, + "step": 3230 + }, + { + "epoch": 0.8580533793652901, + "grad_norm": 1.081462335761227, + "learning_rate": 1.312772851268414e-05, + "loss": 0.28507643938064575, + "step": 3231 + }, + { + "epoch": 0.8583189483468331, + "grad_norm": 1.0256133822907842, + "learning_rate": 1.3123557739591026e-05, + "loss": 0.2689790427684784, + "step": 3232 + }, + { + "epoch": 0.858584517328376, + "grad_norm": 1.1569049456144243, + "learning_rate": 1.3119386364312821e-05, + "loss": 0.31956973671913147, + "step": 3233 + }, + { + "epoch": 0.858850086309919, + "grad_norm": 1.0914807974802394, + "learning_rate": 1.3115214387653711e-05, + "loss": 0.2837323546409607, + "step": 3234 + }, + { + "epoch": 0.8591156552914619, + "grad_norm": 1.0015578039784754, + "learning_rate": 1.3111041810418011e-05, + "loss": 0.2756272554397583, + "step": 3235 + }, + { + "epoch": 0.8593812242730049, + "grad_norm": 1.0283979772106548, + "learning_rate": 1.3106868633410139e-05, + "loss": 0.2664923369884491, + "step": 3236 + }, + { + "epoch": 0.8596467932545478, + "grad_norm": 1.2217960050611696, + "learning_rate": 1.3102694857434637e-05, + "loss": 0.2842246890068054, + "step": 3237 + }, + { + "epoch": 0.8599123622360908, + "grad_norm": 1.0632739499737671, + "learning_rate": 1.3098520483296159e-05, + "loss": 0.3066467344760895, + "step": 3238 + }, + { + "epoch": 0.8601779312176338, + "grad_norm": 1.148754786147734, + "learning_rate": 1.3094345511799478e-05, + "loss": 0.3042510151863098, + "step": 3239 + }, + { + "epoch": 0.8604435001991767, + "grad_norm": 0.9995895975923785, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.2753696143627167, + "step": 3240 + }, + { + "epoch": 0.8607090691807197, + "grad_norm": 1.0325788591675433, + "learning_rate": 1.3085993779951154e-05, + "loss": 0.2561766803264618, + "step": 3241 + }, + { + "epoch": 0.8609746381622626, + "grad_norm": 1.2136300404308455, + "learning_rate": 1.3081817021209626e-05, + "loss": 0.297982782125473, + "step": 3242 + }, + { + "epoch": 0.8612402071438056, + "grad_norm": 1.0615498924909679, + "learning_rate": 1.3077639668330124e-05, + "loss": 0.2961920499801636, + "step": 3243 + }, + { + "epoch": 0.8615057761253485, + "grad_norm": 1.1445145037694135, + "learning_rate": 1.3073461722117991e-05, + "loss": 0.2868857979774475, + "step": 3244 + }, + { + "epoch": 0.8617713451068915, + "grad_norm": 0.9475657969770804, + "learning_rate": 1.3069283183378683e-05, + "loss": 0.22930951416492462, + "step": 3245 + }, + { + "epoch": 0.8620369140884345, + "grad_norm": 1.1416904771862697, + "learning_rate": 1.306510405291778e-05, + "loss": 0.29737964272499084, + "step": 3246 + }, + { + "epoch": 0.8623024830699775, + "grad_norm": 1.0401904023883137, + "learning_rate": 1.3060924331540964e-05, + "loss": 0.2764522433280945, + "step": 3247 + }, + { + "epoch": 0.8625680520515204, + "grad_norm": 0.9863739655208709, + "learning_rate": 1.3056744020054039e-05, + "loss": 0.27608832716941833, + "step": 3248 + }, + { + "epoch": 0.8628336210330634, + "grad_norm": 1.0115944755696356, + "learning_rate": 1.3052563119262915e-05, + "loss": 0.25667035579681396, + "step": 3249 + }, + { + "epoch": 0.8630991900146063, + "grad_norm": 1.1289498412687866, + "learning_rate": 1.3048381629973622e-05, + "loss": 0.3015863597393036, + "step": 3250 + }, + { + "epoch": 0.8633647589961493, + "grad_norm": 1.123802742380982, + "learning_rate": 1.3044199552992307e-05, + "loss": 0.2798422873020172, + "step": 3251 + }, + { + "epoch": 0.8636303279776922, + "grad_norm": 1.1385670465264601, + "learning_rate": 1.304001688912522e-05, + "loss": 0.2856596112251282, + "step": 3252 + }, + { + "epoch": 0.8638958969592352, + "grad_norm": 1.2094473565150297, + "learning_rate": 1.303583363917873e-05, + "loss": 0.30247554183006287, + "step": 3253 + }, + { + "epoch": 0.8641614659407781, + "grad_norm": 1.1517937069448307, + "learning_rate": 1.303164980395932e-05, + "loss": 0.26817965507507324, + "step": 3254 + }, + { + "epoch": 0.8644270349223211, + "grad_norm": 1.197653632931973, + "learning_rate": 1.3027465384273579e-05, + "loss": 0.26919034123420715, + "step": 3255 + }, + { + "epoch": 0.864692603903864, + "grad_norm": 1.1206851183742237, + "learning_rate": 1.3023280380928223e-05, + "loss": 0.29495447874069214, + "step": 3256 + }, + { + "epoch": 0.864958172885407, + "grad_norm": 1.0428738517831404, + "learning_rate": 1.3019094794730063e-05, + "loss": 0.26766717433929443, + "step": 3257 + }, + { + "epoch": 0.86522374186695, + "grad_norm": 0.9998039586765358, + "learning_rate": 1.3014908626486032e-05, + "loss": 0.2573341131210327, + "step": 3258 + }, + { + "epoch": 0.8654893108484929, + "grad_norm": 1.226366277313196, + "learning_rate": 1.3010721877003177e-05, + "loss": 0.32776498794555664, + "step": 3259 + }, + { + "epoch": 0.8657548798300359, + "grad_norm": 1.1631189448763641, + "learning_rate": 1.3006534547088651e-05, + "loss": 0.3107950687408447, + "step": 3260 + }, + { + "epoch": 0.8660204488115788, + "grad_norm": 1.0476224109192296, + "learning_rate": 1.3002346637549726e-05, + "loss": 0.26143360137939453, + "step": 3261 + }, + { + "epoch": 0.8662860177931218, + "grad_norm": 1.035123297672666, + "learning_rate": 1.2998158149193773e-05, + "loss": 0.25666722655296326, + "step": 3262 + }, + { + "epoch": 0.8665515867746647, + "grad_norm": 1.1492097701405037, + "learning_rate": 1.2993969082828296e-05, + "loss": 0.2982695698738098, + "step": 3263 + }, + { + "epoch": 0.8668171557562077, + "grad_norm": 1.0937256102841277, + "learning_rate": 1.2989779439260888e-05, + "loss": 0.30144304037094116, + "step": 3264 + }, + { + "epoch": 0.8670827247377506, + "grad_norm": 1.0563159913050848, + "learning_rate": 1.2985589219299264e-05, + "loss": 0.30421534180641174, + "step": 3265 + }, + { + "epoch": 0.8673482937192936, + "grad_norm": 1.0698350081311019, + "learning_rate": 1.298139842375125e-05, + "loss": 0.23653842508792877, + "step": 3266 + }, + { + "epoch": 0.8676138627008365, + "grad_norm": 1.2059661362441823, + "learning_rate": 1.2977207053424781e-05, + "loss": 0.284118115901947, + "step": 3267 + }, + { + "epoch": 0.8678794316823795, + "grad_norm": 1.0387152548948486, + "learning_rate": 1.2973015109127907e-05, + "loss": 0.30857348442077637, + "step": 3268 + }, + { + "epoch": 0.8681450006639224, + "grad_norm": 1.0987728632322369, + "learning_rate": 1.2968822591668784e-05, + "loss": 0.2826589047908783, + "step": 3269 + }, + { + "epoch": 0.8684105696454654, + "grad_norm": 1.109218087764862, + "learning_rate": 1.2964629501855678e-05, + "loss": 0.27634552121162415, + "step": 3270 + }, + { + "epoch": 0.8686761386270083, + "grad_norm": 1.0217259699141916, + "learning_rate": 1.296043584049697e-05, + "loss": 0.25823545455932617, + "step": 3271 + }, + { + "epoch": 0.8689417076085513, + "grad_norm": 1.148249635090711, + "learning_rate": 1.2956241608401145e-05, + "loss": 0.28939294815063477, + "step": 3272 + }, + { + "epoch": 0.8692072765900942, + "grad_norm": 1.0622455952024017, + "learning_rate": 1.2952046806376806e-05, + "loss": 0.3042459785938263, + "step": 3273 + }, + { + "epoch": 0.8694728455716373, + "grad_norm": 1.042505415392428, + "learning_rate": 1.2947851435232658e-05, + "loss": 0.2834415137767792, + "step": 3274 + }, + { + "epoch": 0.8697384145531802, + "grad_norm": 1.144903021800522, + "learning_rate": 1.2943655495777518e-05, + "loss": 0.28226330876350403, + "step": 3275 + }, + { + "epoch": 0.8700039835347232, + "grad_norm": 1.023547316743189, + "learning_rate": 1.2939458988820317e-05, + "loss": 0.2796105742454529, + "step": 3276 + }, + { + "epoch": 0.8702695525162661, + "grad_norm": 0.9903193313068561, + "learning_rate": 1.2935261915170091e-05, + "loss": 0.24790553748607635, + "step": 3277 + }, + { + "epoch": 0.8705351214978091, + "grad_norm": 1.0279177898991045, + "learning_rate": 1.2931064275635987e-05, + "loss": 0.25101587176322937, + "step": 3278 + }, + { + "epoch": 0.870800690479352, + "grad_norm": 1.1728597267839225, + "learning_rate": 1.2926866071027257e-05, + "loss": 0.3060816526412964, + "step": 3279 + }, + { + "epoch": 0.871066259460895, + "grad_norm": 1.1510511467115991, + "learning_rate": 1.2922667302153268e-05, + "loss": 0.3137212097644806, + "step": 3280 + }, + { + "epoch": 0.871331828442438, + "grad_norm": 0.9977159840643061, + "learning_rate": 1.2918467969823497e-05, + "loss": 0.2391548752784729, + "step": 3281 + }, + { + "epoch": 0.8715973974239809, + "grad_norm": 1.2003880700717509, + "learning_rate": 1.2914268074847516e-05, + "loss": 0.3219330608844757, + "step": 3282 + }, + { + "epoch": 0.8718629664055239, + "grad_norm": 1.126134187698585, + "learning_rate": 1.2910067618035025e-05, + "loss": 0.2934436798095703, + "step": 3283 + }, + { + "epoch": 0.8721285353870668, + "grad_norm": 1.2016016844780073, + "learning_rate": 1.2905866600195815e-05, + "loss": 0.2919486165046692, + "step": 3284 + }, + { + "epoch": 0.8723941043686098, + "grad_norm": 1.1895929482131946, + "learning_rate": 1.2901665022139796e-05, + "loss": 0.2840641438961029, + "step": 3285 + }, + { + "epoch": 0.8726596733501527, + "grad_norm": 1.0215741253911979, + "learning_rate": 1.2897462884676983e-05, + "loss": 0.24151530861854553, + "step": 3286 + }, + { + "epoch": 0.8729252423316957, + "grad_norm": 1.0040194757671277, + "learning_rate": 1.28932601886175e-05, + "loss": 0.24515505135059357, + "step": 3287 + }, + { + "epoch": 0.8731908113132386, + "grad_norm": 1.2173512735867882, + "learning_rate": 1.2889056934771577e-05, + "loss": 0.2561264634132385, + "step": 3288 + }, + { + "epoch": 0.8734563802947816, + "grad_norm": 1.1645401251165897, + "learning_rate": 1.2884853123949547e-05, + "loss": 0.2798641622066498, + "step": 3289 + }, + { + "epoch": 0.8737219492763245, + "grad_norm": 1.2693161910394721, + "learning_rate": 1.288064875696186e-05, + "loss": 0.35207298398017883, + "step": 3290 + }, + { + "epoch": 0.8739875182578675, + "grad_norm": 1.0184365377421387, + "learning_rate": 1.2876443834619066e-05, + "loss": 0.2778821289539337, + "step": 3291 + }, + { + "epoch": 0.8742530872394104, + "grad_norm": 1.044209880952949, + "learning_rate": 1.2872238357731825e-05, + "loss": 0.2691737413406372, + "step": 3292 + }, + { + "epoch": 0.8745186562209534, + "grad_norm": 1.1392637940929287, + "learning_rate": 1.2868032327110904e-05, + "loss": 0.25476595759391785, + "step": 3293 + }, + { + "epoch": 0.8747842252024963, + "grad_norm": 1.012064080488804, + "learning_rate": 1.2863825743567174e-05, + "loss": 0.258474737405777, + "step": 3294 + }, + { + "epoch": 0.8750497941840393, + "grad_norm": 1.17733236715245, + "learning_rate": 1.285961860791162e-05, + "loss": 0.32421568036079407, + "step": 3295 + }, + { + "epoch": 0.8753153631655822, + "grad_norm": 1.0747747984737868, + "learning_rate": 1.2855410920955323e-05, + "loss": 0.3090333342552185, + "step": 3296 + }, + { + "epoch": 0.8755809321471252, + "grad_norm": 1.1729934635240566, + "learning_rate": 1.2851202683509476e-05, + "loss": 0.26548707485198975, + "step": 3297 + }, + { + "epoch": 0.8758465011286681, + "grad_norm": 2.497627852681845, + "learning_rate": 1.2846993896385378e-05, + "loss": 0.3002355098724365, + "step": 3298 + }, + { + "epoch": 0.8761120701102111, + "grad_norm": 1.1706582997439863, + "learning_rate": 1.2842784560394433e-05, + "loss": 0.2924933135509491, + "step": 3299 + }, + { + "epoch": 0.876377639091754, + "grad_norm": 1.1544391256229967, + "learning_rate": 1.2838574676348155e-05, + "loss": 0.2886514663696289, + "step": 3300 + }, + { + "epoch": 0.876643208073297, + "grad_norm": 1.1131138367993383, + "learning_rate": 1.2834364245058155e-05, + "loss": 0.29821154475212097, + "step": 3301 + }, + { + "epoch": 0.87690877705484, + "grad_norm": 1.0278540671542709, + "learning_rate": 1.2830153267336159e-05, + "loss": 0.2656530737876892, + "step": 3302 + }, + { + "epoch": 0.877174346036383, + "grad_norm": 1.2018449655833119, + "learning_rate": 1.282594174399399e-05, + "loss": 0.3437826633453369, + "step": 3303 + }, + { + "epoch": 0.877439915017926, + "grad_norm": 1.0564301800372577, + "learning_rate": 1.2821729675843581e-05, + "loss": 0.29773175716400146, + "step": 3304 + }, + { + "epoch": 0.8777054839994689, + "grad_norm": 1.0707167209814024, + "learning_rate": 1.2817517063696973e-05, + "loss": 0.29772818088531494, + "step": 3305 + }, + { + "epoch": 0.8779710529810119, + "grad_norm": 1.1530012432828134, + "learning_rate": 1.2813303908366303e-05, + "loss": 0.3266611099243164, + "step": 3306 + }, + { + "epoch": 0.8782366219625548, + "grad_norm": 1.0044541774243023, + "learning_rate": 1.2809090210663818e-05, + "loss": 0.26599690318107605, + "step": 3307 + }, + { + "epoch": 0.8785021909440978, + "grad_norm": 1.0142651525790767, + "learning_rate": 1.2804875971401872e-05, + "loss": 0.27988117933273315, + "step": 3308 + }, + { + "epoch": 0.8787677599256407, + "grad_norm": 1.0221522532224918, + "learning_rate": 1.2800661191392916e-05, + "loss": 0.2630334496498108, + "step": 3309 + }, + { + "epoch": 0.8790333289071837, + "grad_norm": 1.022950247187023, + "learning_rate": 1.2796445871449517e-05, + "loss": 0.2628091871738434, + "step": 3310 + }, + { + "epoch": 0.8792988978887266, + "grad_norm": 1.1994310454875075, + "learning_rate": 1.2792230012384333e-05, + "loss": 0.3443898558616638, + "step": 3311 + }, + { + "epoch": 0.8795644668702696, + "grad_norm": 1.0673533832636588, + "learning_rate": 1.2788013615010136e-05, + "loss": 0.2966022491455078, + "step": 3312 + }, + { + "epoch": 0.8798300358518125, + "grad_norm": 1.1030087744198647, + "learning_rate": 1.2783796680139793e-05, + "loss": 0.2995494604110718, + "step": 3313 + }, + { + "epoch": 0.8800956048333555, + "grad_norm": 1.0504434000468303, + "learning_rate": 1.2779579208586283e-05, + "loss": 0.2652590870857239, + "step": 3314 + }, + { + "epoch": 0.8803611738148984, + "grad_norm": 1.1388460976467547, + "learning_rate": 1.2775361201162684e-05, + "loss": 0.3145690858364105, + "step": 3315 + }, + { + "epoch": 0.8806267427964414, + "grad_norm": 1.040210802651612, + "learning_rate": 1.2771142658682175e-05, + "loss": 0.25744086503982544, + "step": 3316 + }, + { + "epoch": 0.8808923117779843, + "grad_norm": 1.1618029117732733, + "learning_rate": 1.2766923581958046e-05, + "loss": 0.3129793405532837, + "step": 3317 + }, + { + "epoch": 0.8811578807595273, + "grad_norm": 1.166975234876197, + "learning_rate": 1.2762703971803684e-05, + "loss": 0.233384907245636, + "step": 3318 + }, + { + "epoch": 0.8814234497410702, + "grad_norm": 0.9242808009438505, + "learning_rate": 1.2758483829032579e-05, + "loss": 0.2422962635755539, + "step": 3319 + }, + { + "epoch": 0.8816890187226132, + "grad_norm": 1.0844595421589949, + "learning_rate": 1.2754263154458328e-05, + "loss": 0.2801973819732666, + "step": 3320 + }, + { + "epoch": 0.8819545877041561, + "grad_norm": 1.294346594070355, + "learning_rate": 1.2750041948894621e-05, + "loss": 0.30659937858581543, + "step": 3321 + }, + { + "epoch": 0.8822201566856991, + "grad_norm": 1.0921019252616484, + "learning_rate": 1.274582021315526e-05, + "loss": 0.28527066111564636, + "step": 3322 + }, + { + "epoch": 0.882485725667242, + "grad_norm": 1.0598264473011552, + "learning_rate": 1.2741597948054146e-05, + "loss": 0.23065675795078278, + "step": 3323 + }, + { + "epoch": 0.882751294648785, + "grad_norm": 1.0918730747592962, + "learning_rate": 1.2737375154405283e-05, + "loss": 0.2727832794189453, + "step": 3324 + }, + { + "epoch": 0.8830168636303279, + "grad_norm": 1.0789259788038712, + "learning_rate": 1.273315183302277e-05, + "loss": 0.26809507608413696, + "step": 3325 + }, + { + "epoch": 0.8832824326118709, + "grad_norm": 1.1647625824499415, + "learning_rate": 1.2728927984720823e-05, + "loss": 0.3250407576560974, + "step": 3326 + }, + { + "epoch": 0.8835480015934138, + "grad_norm": 1.0915300736309757, + "learning_rate": 1.2724703610313742e-05, + "loss": 0.2651330232620239, + "step": 3327 + }, + { + "epoch": 0.8838135705749568, + "grad_norm": 1.206298710080754, + "learning_rate": 1.2720478710615944e-05, + "loss": 0.27337920665740967, + "step": 3328 + }, + { + "epoch": 0.8840791395564997, + "grad_norm": 1.0282478968996285, + "learning_rate": 1.2716253286441935e-05, + "loss": 0.2664092183113098, + "step": 3329 + }, + { + "epoch": 0.8843447085380428, + "grad_norm": 1.1354570950284573, + "learning_rate": 1.2712027338606323e-05, + "loss": 0.27927765250205994, + "step": 3330 + }, + { + "epoch": 0.8846102775195858, + "grad_norm": 1.1204979208217445, + "learning_rate": 1.270780086792383e-05, + "loss": 0.27241113781929016, + "step": 3331 + }, + { + "epoch": 0.8848758465011287, + "grad_norm": 1.0795162414965664, + "learning_rate": 1.2703573875209264e-05, + "loss": 0.28279373049736023, + "step": 3332 + }, + { + "epoch": 0.8851414154826717, + "grad_norm": 1.1634487658284207, + "learning_rate": 1.2699346361277538e-05, + "loss": 0.3011108934879303, + "step": 3333 + }, + { + "epoch": 0.8854069844642146, + "grad_norm": 2.772716513531517, + "learning_rate": 1.2695118326943671e-05, + "loss": 0.3071288764476776, + "step": 3334 + }, + { + "epoch": 0.8856725534457576, + "grad_norm": 1.0969950934626527, + "learning_rate": 1.2690889773022778e-05, + "loss": 0.2688761353492737, + "step": 3335 + }, + { + "epoch": 0.8859381224273005, + "grad_norm": 1.1363327585955358, + "learning_rate": 1.2686660700330074e-05, + "loss": 0.2788669466972351, + "step": 3336 + }, + { + "epoch": 0.8862036914088435, + "grad_norm": 1.0884694079711634, + "learning_rate": 1.268243110968087e-05, + "loss": 0.2801516652107239, + "step": 3337 + }, + { + "epoch": 0.8864692603903864, + "grad_norm": 1.0414904749451368, + "learning_rate": 1.2678201001890587e-05, + "loss": 0.2876908779144287, + "step": 3338 + }, + { + "epoch": 0.8867348293719294, + "grad_norm": 1.1731879069090343, + "learning_rate": 1.2673970377774733e-05, + "loss": 0.27709734439849854, + "step": 3339 + }, + { + "epoch": 0.8870003983534723, + "grad_norm": 1.2053408848372587, + "learning_rate": 1.266973923814893e-05, + "loss": 0.3191622793674469, + "step": 3340 + }, + { + "epoch": 0.8872659673350153, + "grad_norm": 1.098682297791164, + "learning_rate": 1.2665507583828889e-05, + "loss": 0.2873385548591614, + "step": 3341 + }, + { + "epoch": 0.8875315363165582, + "grad_norm": 1.1730973936717166, + "learning_rate": 1.2661275415630421e-05, + "loss": 0.2922922372817993, + "step": 3342 + }, + { + "epoch": 0.8877971052981012, + "grad_norm": 1.1127017834272521, + "learning_rate": 1.2657042734369443e-05, + "loss": 0.305694043636322, + "step": 3343 + }, + { + "epoch": 0.8880626742796441, + "grad_norm": 1.120364019457983, + "learning_rate": 1.2652809540861958e-05, + "loss": 0.29108062386512756, + "step": 3344 + }, + { + "epoch": 0.8883282432611871, + "grad_norm": 1.076655765525218, + "learning_rate": 1.2648575835924084e-05, + "loss": 0.24170495569705963, + "step": 3345 + }, + { + "epoch": 0.88859381224273, + "grad_norm": 1.4853370236272063, + "learning_rate": 1.2644341620372025e-05, + "loss": 0.2987719476222992, + "step": 3346 + }, + { + "epoch": 0.888859381224273, + "grad_norm": 0.9743774864126274, + "learning_rate": 1.2640106895022088e-05, + "loss": 0.21037599444389343, + "step": 3347 + }, + { + "epoch": 0.889124950205816, + "grad_norm": 1.034527053965976, + "learning_rate": 1.2635871660690677e-05, + "loss": 0.25263655185699463, + "step": 3348 + }, + { + "epoch": 0.8893905191873589, + "grad_norm": 1.2196740502064325, + "learning_rate": 1.2631635918194301e-05, + "loss": 0.30169543623924255, + "step": 3349 + }, + { + "epoch": 0.8896560881689018, + "grad_norm": 1.0624381650731511, + "learning_rate": 1.2627399668349554e-05, + "loss": 0.26982420682907104, + "step": 3350 + }, + { + "epoch": 0.8899216571504448, + "grad_norm": 1.1785068724165282, + "learning_rate": 1.262316291197314e-05, + "loss": 0.3281899690628052, + "step": 3351 + }, + { + "epoch": 0.8901872261319878, + "grad_norm": 1.1157278400935415, + "learning_rate": 1.2618925649881852e-05, + "loss": 0.30140435695648193, + "step": 3352 + }, + { + "epoch": 0.8904527951135307, + "grad_norm": 0.9928732296573972, + "learning_rate": 1.261468788289259e-05, + "loss": 0.22343885898590088, + "step": 3353 + }, + { + "epoch": 0.8907183640950737, + "grad_norm": 1.0410264886026745, + "learning_rate": 1.261044961182234e-05, + "loss": 0.2889901399612427, + "step": 3354 + }, + { + "epoch": 0.8909839330766166, + "grad_norm": 1.0933214790144683, + "learning_rate": 1.260621083748819e-05, + "loss": 0.27896153926849365, + "step": 3355 + }, + { + "epoch": 0.8912495020581596, + "grad_norm": 1.077111437166839, + "learning_rate": 1.2601971560707328e-05, + "loss": 0.29390811920166016, + "step": 3356 + }, + { + "epoch": 0.8915150710397025, + "grad_norm": 1.0468332572471015, + "learning_rate": 1.2597731782297036e-05, + "loss": 0.2872384190559387, + "step": 3357 + }, + { + "epoch": 0.8917806400212455, + "grad_norm": 1.3094137802442116, + "learning_rate": 1.2593491503074698e-05, + "loss": 0.29753726720809937, + "step": 3358 + }, + { + "epoch": 0.8920462090027885, + "grad_norm": 1.1441306843080605, + "learning_rate": 1.2589250723857782e-05, + "loss": 0.31631946563720703, + "step": 3359 + }, + { + "epoch": 0.8923117779843315, + "grad_norm": 1.1374138683367387, + "learning_rate": 1.2585009445463867e-05, + "loss": 0.2932048738002777, + "step": 3360 + }, + { + "epoch": 0.8925773469658744, + "grad_norm": 1.0483655110874528, + "learning_rate": 1.2580767668710614e-05, + "loss": 0.2902034521102905, + "step": 3361 + }, + { + "epoch": 0.8928429159474174, + "grad_norm": 1.0712531988705474, + "learning_rate": 1.2576525394415795e-05, + "loss": 0.2596299648284912, + "step": 3362 + }, + { + "epoch": 0.8931084849289603, + "grad_norm": 1.1916540375753872, + "learning_rate": 1.2572282623397268e-05, + "loss": 0.29102641344070435, + "step": 3363 + }, + { + "epoch": 0.8933740539105033, + "grad_norm": 1.236954620143465, + "learning_rate": 1.2568039356472985e-05, + "loss": 0.2970406711101532, + "step": 3364 + }, + { + "epoch": 0.8936396228920462, + "grad_norm": 1.1384210267422126, + "learning_rate": 1.2563795594461003e-05, + "loss": 0.2916618585586548, + "step": 3365 + }, + { + "epoch": 0.8939051918735892, + "grad_norm": 1.1769911575713834, + "learning_rate": 1.2559551338179468e-05, + "loss": 0.3217374086380005, + "step": 3366 + }, + { + "epoch": 0.8941707608551321, + "grad_norm": 1.1228623922561494, + "learning_rate": 1.255530658844662e-05, + "loss": 0.3000059425830841, + "step": 3367 + }, + { + "epoch": 0.8944363298366751, + "grad_norm": 1.2170346898517979, + "learning_rate": 1.2551061346080804e-05, + "loss": 0.2848728895187378, + "step": 3368 + }, + { + "epoch": 0.894701898818218, + "grad_norm": 1.3197542136745113, + "learning_rate": 1.2546815611900442e-05, + "loss": 0.3328903317451477, + "step": 3369 + }, + { + "epoch": 0.894967467799761, + "grad_norm": 1.0838958961687528, + "learning_rate": 1.2542569386724069e-05, + "loss": 0.2920045256614685, + "step": 3370 + }, + { + "epoch": 0.895233036781304, + "grad_norm": 1.0679716869166582, + "learning_rate": 1.2538322671370305e-05, + "loss": 0.30370092391967773, + "step": 3371 + }, + { + "epoch": 0.8954986057628469, + "grad_norm": 1.069215534600395, + "learning_rate": 1.2534075466657866e-05, + "loss": 0.24454624950885773, + "step": 3372 + }, + { + "epoch": 0.8957641747443899, + "grad_norm": 1.172481734803523, + "learning_rate": 1.2529827773405566e-05, + "loss": 0.30908581614494324, + "step": 3373 + }, + { + "epoch": 0.8960297437259328, + "grad_norm": 1.1095939186212227, + "learning_rate": 1.2525579592432304e-05, + "loss": 0.2792360782623291, + "step": 3374 + }, + { + "epoch": 0.8962953127074758, + "grad_norm": 1.0658472517819026, + "learning_rate": 1.2521330924557087e-05, + "loss": 0.285555362701416, + "step": 3375 + }, + { + "epoch": 0.8965608816890187, + "grad_norm": 1.1649386203925687, + "learning_rate": 1.2517081770599002e-05, + "loss": 0.3159451484680176, + "step": 3376 + }, + { + "epoch": 0.8968264506705617, + "grad_norm": 1.2867424735092035, + "learning_rate": 1.2512832131377237e-05, + "loss": 0.35929200053215027, + "step": 3377 + }, + { + "epoch": 0.8970920196521046, + "grad_norm": 1.0781651079446009, + "learning_rate": 1.2508582007711074e-05, + "loss": 0.28624874353408813, + "step": 3378 + }, + { + "epoch": 0.8973575886336476, + "grad_norm": 1.0156684050998903, + "learning_rate": 1.2504331400419884e-05, + "loss": 0.27670109272003174, + "step": 3379 + }, + { + "epoch": 0.8976231576151905, + "grad_norm": 1.0786636895703534, + "learning_rate": 1.2500080310323139e-05, + "loss": 0.2894589304924011, + "step": 3380 + }, + { + "epoch": 0.8978887265967335, + "grad_norm": 1.1385795160382524, + "learning_rate": 1.2495828738240396e-05, + "loss": 0.31378716230392456, + "step": 3381 + }, + { + "epoch": 0.8981542955782764, + "grad_norm": 1.3149597134232174, + "learning_rate": 1.2491576684991306e-05, + "loss": 0.33676713705062866, + "step": 3382 + }, + { + "epoch": 0.8984198645598194, + "grad_norm": 0.9814689350619926, + "learning_rate": 1.2487324151395618e-05, + "loss": 0.2875351011753082, + "step": 3383 + }, + { + "epoch": 0.8986854335413623, + "grad_norm": 1.1646557221945626, + "learning_rate": 1.2483071138273168e-05, + "loss": 0.29729989171028137, + "step": 3384 + }, + { + "epoch": 0.8989510025229053, + "grad_norm": 1.0864970585536224, + "learning_rate": 1.2478817646443888e-05, + "loss": 0.3227398991584778, + "step": 3385 + }, + { + "epoch": 0.8992165715044482, + "grad_norm": 1.1586445900518523, + "learning_rate": 1.2474563676727803e-05, + "loss": 0.2664690315723419, + "step": 3386 + }, + { + "epoch": 0.8994821404859913, + "grad_norm": 1.1748792923054732, + "learning_rate": 1.2470309229945021e-05, + "loss": 0.29543352127075195, + "step": 3387 + }, + { + "epoch": 0.8997477094675342, + "grad_norm": 0.9899792334789409, + "learning_rate": 1.2466054306915756e-05, + "loss": 0.26658856868743896, + "step": 3388 + }, + { + "epoch": 0.9000132784490772, + "grad_norm": 1.123207894421506, + "learning_rate": 1.2461798908460305e-05, + "loss": 0.2899627387523651, + "step": 3389 + }, + { + "epoch": 0.9002788474306201, + "grad_norm": 1.1137567335053833, + "learning_rate": 1.245754303539906e-05, + "loss": 0.2708336114883423, + "step": 3390 + }, + { + "epoch": 0.9005444164121631, + "grad_norm": 1.1459655330577214, + "learning_rate": 1.2453286688552502e-05, + "loss": 0.28124746680259705, + "step": 3391 + }, + { + "epoch": 0.900809985393706, + "grad_norm": 1.0470005335558448, + "learning_rate": 1.2449029868741202e-05, + "loss": 0.2599399983882904, + "step": 3392 + }, + { + "epoch": 0.901075554375249, + "grad_norm": 0.9576026734877732, + "learning_rate": 1.2444772576785828e-05, + "loss": 0.25035667419433594, + "step": 3393 + }, + { + "epoch": 0.901341123356792, + "grad_norm": 1.1148471766082222, + "learning_rate": 1.2440514813507136e-05, + "loss": 0.2772521376609802, + "step": 3394 + }, + { + "epoch": 0.9016066923383349, + "grad_norm": 1.103787889433512, + "learning_rate": 1.2436256579725969e-05, + "loss": 0.3282839357852936, + "step": 3395 + }, + { + "epoch": 0.9018722613198779, + "grad_norm": 1.080988888326222, + "learning_rate": 1.2431997876263269e-05, + "loss": 0.2507914900779724, + "step": 3396 + }, + { + "epoch": 0.9021378303014208, + "grad_norm": 1.1123927965933749, + "learning_rate": 1.2427738703940055e-05, + "loss": 0.2620914876461029, + "step": 3397 + }, + { + "epoch": 0.9024033992829638, + "grad_norm": 1.0713438905056172, + "learning_rate": 1.2423479063577458e-05, + "loss": 0.26561641693115234, + "step": 3398 + }, + { + "epoch": 0.9026689682645067, + "grad_norm": 1.151582271756571, + "learning_rate": 1.2419218955996677e-05, + "loss": 0.2998678386211395, + "step": 3399 + }, + { + "epoch": 0.9029345372460497, + "grad_norm": 1.0484454707225395, + "learning_rate": 1.2414958382019017e-05, + "loss": 0.2368398755788803, + "step": 3400 + }, + { + "epoch": 0.9032001062275926, + "grad_norm": 1.0429929570241405, + "learning_rate": 1.241069734246586e-05, + "loss": 0.2623558044433594, + "step": 3401 + }, + { + "epoch": 0.9034656752091356, + "grad_norm": 1.0283944167565489, + "learning_rate": 1.2406435838158686e-05, + "loss": 0.2693074941635132, + "step": 3402 + }, + { + "epoch": 0.9037312441906785, + "grad_norm": 1.1211950634171715, + "learning_rate": 1.2402173869919063e-05, + "loss": 0.2933652698993683, + "step": 3403 + }, + { + "epoch": 0.9039968131722215, + "grad_norm": 1.0858313001207585, + "learning_rate": 1.2397911438568651e-05, + "loss": 0.28515487909317017, + "step": 3404 + }, + { + "epoch": 0.9042623821537644, + "grad_norm": 1.1243916508543286, + "learning_rate": 1.2393648544929193e-05, + "loss": 0.282942533493042, + "step": 3405 + }, + { + "epoch": 0.9045279511353074, + "grad_norm": 1.112018853789466, + "learning_rate": 1.2389385189822526e-05, + "loss": 0.28300392627716064, + "step": 3406 + }, + { + "epoch": 0.9047935201168503, + "grad_norm": 1.0490322847853841, + "learning_rate": 1.2385121374070577e-05, + "loss": 0.25697019696235657, + "step": 3407 + }, + { + "epoch": 0.9050590890983933, + "grad_norm": 1.15038978087342, + "learning_rate": 1.2380857098495355e-05, + "loss": 0.31156057119369507, + "step": 3408 + }, + { + "epoch": 0.9053246580799362, + "grad_norm": 1.1544066045654053, + "learning_rate": 1.2376592363918967e-05, + "loss": 0.2943422794342041, + "step": 3409 + }, + { + "epoch": 0.9055902270614792, + "grad_norm": 0.9968457114080438, + "learning_rate": 1.2372327171163596e-05, + "loss": 0.2792074680328369, + "step": 3410 + }, + { + "epoch": 0.9058557960430221, + "grad_norm": 1.0328662447203703, + "learning_rate": 1.2368061521051526e-05, + "loss": 0.2547443211078644, + "step": 3411 + }, + { + "epoch": 0.9061213650245651, + "grad_norm": 1.068901181257851, + "learning_rate": 1.2363795414405125e-05, + "loss": 0.25637373328208923, + "step": 3412 + }, + { + "epoch": 0.906386934006108, + "grad_norm": 1.1660475318941728, + "learning_rate": 1.2359528852046844e-05, + "loss": 0.3269123435020447, + "step": 3413 + }, + { + "epoch": 0.906652502987651, + "grad_norm": 1.0197427295072394, + "learning_rate": 1.2355261834799232e-05, + "loss": 0.28538423776626587, + "step": 3414 + }, + { + "epoch": 0.906918071969194, + "grad_norm": 1.1343354993973966, + "learning_rate": 1.2350994363484915e-05, + "loss": 0.2961096167564392, + "step": 3415 + }, + { + "epoch": 0.907183640950737, + "grad_norm": 1.0930595123597455, + "learning_rate": 1.2346726438926613e-05, + "loss": 0.3134537935256958, + "step": 3416 + }, + { + "epoch": 0.90744920993228, + "grad_norm": 1.018679268761631, + "learning_rate": 1.2342458061947129e-05, + "loss": 0.2614031434059143, + "step": 3417 + }, + { + "epoch": 0.9077147789138229, + "grad_norm": 1.0403373381004117, + "learning_rate": 1.2338189233369357e-05, + "loss": 0.27166056632995605, + "step": 3418 + }, + { + "epoch": 0.9079803478953659, + "grad_norm": 1.0735839504787106, + "learning_rate": 1.2333919954016277e-05, + "loss": 0.26053497195243835, + "step": 3419 + }, + { + "epoch": 0.9082459168769088, + "grad_norm": 1.1112591016079632, + "learning_rate": 1.2329650224710956e-05, + "loss": 0.3109636902809143, + "step": 3420 + }, + { + "epoch": 0.9085114858584518, + "grad_norm": 1.081828404421451, + "learning_rate": 1.232538004627655e-05, + "loss": 0.2576507329940796, + "step": 3421 + }, + { + "epoch": 0.9087770548399947, + "grad_norm": 1.0981308884589311, + "learning_rate": 1.2321109419536292e-05, + "loss": 0.2525216341018677, + "step": 3422 + }, + { + "epoch": 0.9090426238215377, + "grad_norm": 1.0732531844020532, + "learning_rate": 1.2316838345313517e-05, + "loss": 0.2483336180448532, + "step": 3423 + }, + { + "epoch": 0.9093081928030806, + "grad_norm": 1.1592146270526706, + "learning_rate": 1.2312566824431631e-05, + "loss": 0.26372796297073364, + "step": 3424 + }, + { + "epoch": 0.9095737617846236, + "grad_norm": 1.1537675520237485, + "learning_rate": 1.2308294857714138e-05, + "loss": 0.2933644950389862, + "step": 3425 + }, + { + "epoch": 0.9098393307661665, + "grad_norm": 1.0330883162146767, + "learning_rate": 1.2304022445984618e-05, + "loss": 0.2543371915817261, + "step": 3426 + }, + { + "epoch": 0.9101048997477095, + "grad_norm": 1.1689002717846686, + "learning_rate": 1.2299749590066745e-05, + "loss": 0.29246431589126587, + "step": 3427 + }, + { + "epoch": 0.9103704687292524, + "grad_norm": 1.0141798843769114, + "learning_rate": 1.2295476290784273e-05, + "loss": 0.2475431263446808, + "step": 3428 + }, + { + "epoch": 0.9106360377107954, + "grad_norm": 1.1845034794986053, + "learning_rate": 1.2291202548961042e-05, + "loss": 0.3312363624572754, + "step": 3429 + }, + { + "epoch": 0.9109016066923383, + "grad_norm": 1.0459618447051044, + "learning_rate": 1.2286928365420987e-05, + "loss": 0.25192639231681824, + "step": 3430 + }, + { + "epoch": 0.9111671756738813, + "grad_norm": 1.2038671566275931, + "learning_rate": 1.2282653740988114e-05, + "loss": 0.23189345002174377, + "step": 3431 + }, + { + "epoch": 0.9114327446554242, + "grad_norm": 1.17767221221897, + "learning_rate": 1.2278378676486522e-05, + "loss": 0.2888398766517639, + "step": 3432 + }, + { + "epoch": 0.9116983136369672, + "grad_norm": 1.1295595703903276, + "learning_rate": 1.2274103172740387e-05, + "loss": 0.2857785224914551, + "step": 3433 + }, + { + "epoch": 0.9119638826185101, + "grad_norm": 1.039533312390003, + "learning_rate": 1.2269827230573986e-05, + "loss": 0.23961025476455688, + "step": 3434 + }, + { + "epoch": 0.9122294516000531, + "grad_norm": 1.1192521835175562, + "learning_rate": 1.2265550850811663e-05, + "loss": 0.2791004478931427, + "step": 3435 + }, + { + "epoch": 0.912495020581596, + "grad_norm": 1.052040685054951, + "learning_rate": 1.2261274034277858e-05, + "loss": 0.2875480651855469, + "step": 3436 + }, + { + "epoch": 0.912760589563139, + "grad_norm": 1.12188070500717, + "learning_rate": 1.2256996781797086e-05, + "loss": 0.29422929883003235, + "step": 3437 + }, + { + "epoch": 0.9130261585446819, + "grad_norm": 1.2976046274469295, + "learning_rate": 1.225271909419395e-05, + "loss": 0.27114444971084595, + "step": 3438 + }, + { + "epoch": 0.9132917275262249, + "grad_norm": 1.0684416452719028, + "learning_rate": 1.2248440972293146e-05, + "loss": 0.3007166385650635, + "step": 3439 + }, + { + "epoch": 0.9135572965077678, + "grad_norm": 1.1408150577224654, + "learning_rate": 1.224416241691944e-05, + "loss": 0.28550055623054504, + "step": 3440 + }, + { + "epoch": 0.9138228654893108, + "grad_norm": 1.1159473328967766, + "learning_rate": 1.2239883428897687e-05, + "loss": 0.2861761450767517, + "step": 3441 + }, + { + "epoch": 0.9140884344708538, + "grad_norm": 1.1186358936011263, + "learning_rate": 1.2235604009052823e-05, + "loss": 0.3288506865501404, + "step": 3442 + }, + { + "epoch": 0.9143540034523968, + "grad_norm": 1.2101661293343442, + "learning_rate": 1.2231324158209876e-05, + "loss": 0.33189019560813904, + "step": 3443 + }, + { + "epoch": 0.9146195724339398, + "grad_norm": 0.9931883995236199, + "learning_rate": 1.2227043877193947e-05, + "loss": 0.20846885442733765, + "step": 3444 + }, + { + "epoch": 0.9148851414154827, + "grad_norm": 0.9579263575635046, + "learning_rate": 1.2222763166830223e-05, + "loss": 0.25184741616249084, + "step": 3445 + }, + { + "epoch": 0.9151507103970257, + "grad_norm": 1.0775642304955, + "learning_rate": 1.2218482027943977e-05, + "loss": 0.2954701781272888, + "step": 3446 + }, + { + "epoch": 0.9154162793785686, + "grad_norm": 1.055908963813806, + "learning_rate": 1.221420046136056e-05, + "loss": 0.263336718082428, + "step": 3447 + }, + { + "epoch": 0.9156818483601116, + "grad_norm": 1.2181481624195412, + "learning_rate": 1.2209918467905405e-05, + "loss": 0.31178128719329834, + "step": 3448 + }, + { + "epoch": 0.9159474173416545, + "grad_norm": 1.1248939907914326, + "learning_rate": 1.2205636048404037e-05, + "loss": 0.30373090505599976, + "step": 3449 + }, + { + "epoch": 0.9162129863231975, + "grad_norm": 1.1316476755108689, + "learning_rate": 1.2201353203682052e-05, + "loss": 0.31057459115982056, + "step": 3450 + }, + { + "epoch": 0.9164785553047404, + "grad_norm": 1.0432699213656527, + "learning_rate": 1.2197069934565126e-05, + "loss": 0.26834744215011597, + "step": 3451 + }, + { + "epoch": 0.9167441242862834, + "grad_norm": 1.0235490532622333, + "learning_rate": 1.2192786241879033e-05, + "loss": 0.30224066972732544, + "step": 3452 + }, + { + "epoch": 0.9170096932678263, + "grad_norm": 1.1136690118430506, + "learning_rate": 1.2188502126449616e-05, + "loss": 0.28249508142471313, + "step": 3453 + }, + { + "epoch": 0.9172752622493693, + "grad_norm": 1.0210144972314754, + "learning_rate": 1.2184217589102798e-05, + "loss": 0.24823793768882751, + "step": 3454 + }, + { + "epoch": 0.9175408312309122, + "grad_norm": 1.1878687209379464, + "learning_rate": 1.2179932630664589e-05, + "loss": 0.32556289434432983, + "step": 3455 + }, + { + "epoch": 0.9178064002124552, + "grad_norm": 1.0899520670240972, + "learning_rate": 1.217564725196108e-05, + "loss": 0.29420584440231323, + "step": 3456 + }, + { + "epoch": 0.9180719691939981, + "grad_norm": 1.028247015068141, + "learning_rate": 1.2171361453818437e-05, + "loss": 0.29294469952583313, + "step": 3457 + }, + { + "epoch": 0.9183375381755411, + "grad_norm": 1.0399893903415627, + "learning_rate": 1.2167075237062918e-05, + "loss": 0.3173823952674866, + "step": 3458 + }, + { + "epoch": 0.918603107157084, + "grad_norm": 1.1571492956528482, + "learning_rate": 1.2162788602520851e-05, + "loss": 0.32950159907341003, + "step": 3459 + }, + { + "epoch": 0.918868676138627, + "grad_norm": 1.0478118037587627, + "learning_rate": 1.2158501551018647e-05, + "loss": 0.3011544942855835, + "step": 3460 + }, + { + "epoch": 0.91913424512017, + "grad_norm": 1.0135067760604335, + "learning_rate": 1.2154214083382802e-05, + "loss": 0.25775954127311707, + "step": 3461 + }, + { + "epoch": 0.9193998141017129, + "grad_norm": 1.0514508898774713, + "learning_rate": 1.214992620043989e-05, + "loss": 0.286748468875885, + "step": 3462 + }, + { + "epoch": 0.9196653830832558, + "grad_norm": 1.1050004366949897, + "learning_rate": 1.214563790301656e-05, + "loss": 0.30588221549987793, + "step": 3463 + }, + { + "epoch": 0.9199309520647988, + "grad_norm": 1.0079666808538812, + "learning_rate": 1.214134919193955e-05, + "loss": 0.23506608605384827, + "step": 3464 + }, + { + "epoch": 0.9201965210463418, + "grad_norm": 1.037364536446331, + "learning_rate": 1.2137060068035672e-05, + "loss": 0.2612350285053253, + "step": 3465 + }, + { + "epoch": 0.9204620900278847, + "grad_norm": 1.0810309706979688, + "learning_rate": 1.2132770532131815e-05, + "loss": 0.3268318772315979, + "step": 3466 + }, + { + "epoch": 0.9207276590094277, + "grad_norm": 1.0723394192428657, + "learning_rate": 1.2128480585054951e-05, + "loss": 0.2970179319381714, + "step": 3467 + }, + { + "epoch": 0.9209932279909706, + "grad_norm": 1.0036147426745694, + "learning_rate": 1.2124190227632138e-05, + "loss": 0.2910206615924835, + "step": 3468 + }, + { + "epoch": 0.9212587969725136, + "grad_norm": 1.1089890742219906, + "learning_rate": 1.2119899460690496e-05, + "loss": 0.3000222444534302, + "step": 3469 + }, + { + "epoch": 0.9215243659540565, + "grad_norm": 1.1166450826016983, + "learning_rate": 1.2115608285057242e-05, + "loss": 0.30304765701293945, + "step": 3470 + }, + { + "epoch": 0.9217899349355996, + "grad_norm": 0.9893826238823328, + "learning_rate": 1.2111316701559663e-05, + "loss": 0.26393038034439087, + "step": 3471 + }, + { + "epoch": 0.9220555039171425, + "grad_norm": 1.1384217438340345, + "learning_rate": 1.2107024711025128e-05, + "loss": 0.3111063838005066, + "step": 3472 + }, + { + "epoch": 0.9223210728986855, + "grad_norm": 0.9599961450252364, + "learning_rate": 1.2102732314281073e-05, + "loss": 0.2897321581840515, + "step": 3473 + }, + { + "epoch": 0.9225866418802284, + "grad_norm": 1.1396280258666305, + "learning_rate": 1.2098439512155028e-05, + "loss": 0.2835896611213684, + "step": 3474 + }, + { + "epoch": 0.9228522108617714, + "grad_norm": 1.0165194494005183, + "learning_rate": 1.2094146305474596e-05, + "loss": 0.27648821473121643, + "step": 3475 + }, + { + "epoch": 0.9231177798433143, + "grad_norm": 1.1221504506656363, + "learning_rate": 1.2089852695067457e-05, + "loss": 0.2528097629547119, + "step": 3476 + }, + { + "epoch": 0.9233833488248573, + "grad_norm": 1.1105562286202324, + "learning_rate": 1.2085558681761361e-05, + "loss": 0.2750067412853241, + "step": 3477 + }, + { + "epoch": 0.9236489178064002, + "grad_norm": 1.1199967050670125, + "learning_rate": 1.2081264266384148e-05, + "loss": 0.3115938901901245, + "step": 3478 + }, + { + "epoch": 0.9239144867879432, + "grad_norm": 1.1203071431737686, + "learning_rate": 1.2076969449763734e-05, + "loss": 0.2858419418334961, + "step": 3479 + }, + { + "epoch": 0.9241800557694861, + "grad_norm": 1.051118385350032, + "learning_rate": 1.2072674232728105e-05, + "loss": 0.24990032613277435, + "step": 3480 + }, + { + "epoch": 0.9244456247510291, + "grad_norm": 1.2991104394876676, + "learning_rate": 1.206837861610533e-05, + "loss": 0.23106999695301056, + "step": 3481 + }, + { + "epoch": 0.924711193732572, + "grad_norm": 1.0396779513824141, + "learning_rate": 1.2064082600723546e-05, + "loss": 0.2737967371940613, + "step": 3482 + }, + { + "epoch": 0.924976762714115, + "grad_norm": 1.1890061925781694, + "learning_rate": 1.2059786187410984e-05, + "loss": 0.2810317873954773, + "step": 3483 + }, + { + "epoch": 0.925242331695658, + "grad_norm": 1.1358698893490913, + "learning_rate": 1.2055489376995938e-05, + "loss": 0.30852559208869934, + "step": 3484 + }, + { + "epoch": 0.9255079006772009, + "grad_norm": 1.1003932874354148, + "learning_rate": 1.2051192170306784e-05, + "loss": 0.2956348657608032, + "step": 3485 + }, + { + "epoch": 0.9257734696587439, + "grad_norm": 1.18261367067389, + "learning_rate": 1.204689456817197e-05, + "loss": 0.2825953960418701, + "step": 3486 + }, + { + "epoch": 0.9260390386402868, + "grad_norm": 1.2502616697865143, + "learning_rate": 1.2042596571420025e-05, + "loss": 0.3351168632507324, + "step": 3487 + }, + { + "epoch": 0.9263046076218298, + "grad_norm": 1.2354469073344645, + "learning_rate": 1.2038298180879548e-05, + "loss": 0.2718926668167114, + "step": 3488 + }, + { + "epoch": 0.9265701766033727, + "grad_norm": 1.1387239259181285, + "learning_rate": 1.2033999397379223e-05, + "loss": 0.29036587476730347, + "step": 3489 + }, + { + "epoch": 0.9268357455849157, + "grad_norm": 0.9499049433325992, + "learning_rate": 1.2029700221747804e-05, + "loss": 0.22917689383029938, + "step": 3490 + }, + { + "epoch": 0.9271013145664586, + "grad_norm": 1.2322966399012754, + "learning_rate": 1.2025400654814119e-05, + "loss": 0.2963443398475647, + "step": 3491 + }, + { + "epoch": 0.9273668835480016, + "grad_norm": 1.100231072465541, + "learning_rate": 1.2021100697407075e-05, + "loss": 0.2866464853286743, + "step": 3492 + }, + { + "epoch": 0.9276324525295445, + "grad_norm": 1.1717529025248212, + "learning_rate": 1.2016800350355654e-05, + "loss": 0.3069216012954712, + "step": 3493 + }, + { + "epoch": 0.9278980215110875, + "grad_norm": 1.0745448017128252, + "learning_rate": 1.2012499614488913e-05, + "loss": 0.27206870913505554, + "step": 3494 + }, + { + "epoch": 0.9281635904926304, + "grad_norm": 1.0995365532444106, + "learning_rate": 1.2008198490635978e-05, + "loss": 0.32130372524261475, + "step": 3495 + }, + { + "epoch": 0.9284291594741734, + "grad_norm": 1.151015013814654, + "learning_rate": 1.2003896979626061e-05, + "loss": 0.30631259083747864, + "step": 3496 + }, + { + "epoch": 0.9286947284557163, + "grad_norm": 1.125856079122124, + "learning_rate": 1.199959508228844e-05, + "loss": 0.3005716800689697, + "step": 3497 + }, + { + "epoch": 0.9289602974372593, + "grad_norm": 0.9983757548693274, + "learning_rate": 1.1995292799452472e-05, + "loss": 0.2381039410829544, + "step": 3498 + }, + { + "epoch": 0.9292258664188023, + "grad_norm": 1.1338580261514946, + "learning_rate": 1.1990990131947582e-05, + "loss": 0.31764286756515503, + "step": 3499 + }, + { + "epoch": 0.9294914354003453, + "grad_norm": 1.1445030838538803, + "learning_rate": 1.1986687080603273e-05, + "loss": 0.3029370903968811, + "step": 3500 + }, + { + "epoch": 0.9297570043818882, + "grad_norm": 1.0814133109661386, + "learning_rate": 1.198238364624913e-05, + "loss": 0.30967646837234497, + "step": 3501 + }, + { + "epoch": 0.9300225733634312, + "grad_norm": 1.0376796287878236, + "learning_rate": 1.1978079829714799e-05, + "loss": 0.24687506258487701, + "step": 3502 + }, + { + "epoch": 0.9302881423449741, + "grad_norm": 1.0529899744692286, + "learning_rate": 1.1973775631830007e-05, + "loss": 0.25909408926963806, + "step": 3503 + }, + { + "epoch": 0.9305537113265171, + "grad_norm": 1.1136411983367804, + "learning_rate": 1.196947105342455e-05, + "loss": 0.281025230884552, + "step": 3504 + }, + { + "epoch": 0.93081928030806, + "grad_norm": 1.2858712177395888, + "learning_rate": 1.1965166095328302e-05, + "loss": 0.33401811122894287, + "step": 3505 + }, + { + "epoch": 0.931084849289603, + "grad_norm": 0.9732764276792689, + "learning_rate": 1.1960860758371208e-05, + "loss": 0.25839388370513916, + "step": 3506 + }, + { + "epoch": 0.931350418271146, + "grad_norm": 0.954364218435113, + "learning_rate": 1.1956555043383286e-05, + "loss": 0.23343560099601746, + "step": 3507 + }, + { + "epoch": 0.9316159872526889, + "grad_norm": 1.176408931412559, + "learning_rate": 1.1952248951194629e-05, + "loss": 0.31106436252593994, + "step": 3508 + }, + { + "epoch": 0.9318815562342319, + "grad_norm": 1.108418204277134, + "learning_rate": 1.1947942482635395e-05, + "loss": 0.29152095317840576, + "step": 3509 + }, + { + "epoch": 0.9321471252157748, + "grad_norm": 1.2651732065185788, + "learning_rate": 1.1943635638535827e-05, + "loss": 0.31517675518989563, + "step": 3510 + }, + { + "epoch": 0.9324126941973178, + "grad_norm": 1.2309480505410157, + "learning_rate": 1.1939328419726231e-05, + "loss": 0.33221137523651123, + "step": 3511 + }, + { + "epoch": 0.9326782631788607, + "grad_norm": 1.2277892053470791, + "learning_rate": 1.193502082703699e-05, + "loss": 0.314359575510025, + "step": 3512 + }, + { + "epoch": 0.9329438321604037, + "grad_norm": 1.129757464324541, + "learning_rate": 1.1930712861298553e-05, + "loss": 0.2879924178123474, + "step": 3513 + }, + { + "epoch": 0.9332094011419466, + "grad_norm": 1.1622909402406336, + "learning_rate": 1.1926404523341443e-05, + "loss": 0.2732955515384674, + "step": 3514 + }, + { + "epoch": 0.9334749701234896, + "grad_norm": 1.1586501434218468, + "learning_rate": 1.1922095813996264e-05, + "loss": 0.32156097888946533, + "step": 3515 + }, + { + "epoch": 0.9337405391050325, + "grad_norm": 1.110486475282156, + "learning_rate": 1.1917786734093682e-05, + "loss": 0.2694319486618042, + "step": 3516 + }, + { + "epoch": 0.9340061080865755, + "grad_norm": 1.0871387001943549, + "learning_rate": 1.1913477284464434e-05, + "loss": 0.3049655258655548, + "step": 3517 + }, + { + "epoch": 0.9342716770681184, + "grad_norm": 1.0962864613999421, + "learning_rate": 1.1909167465939334e-05, + "loss": 0.30053725838661194, + "step": 3518 + }, + { + "epoch": 0.9345372460496614, + "grad_norm": 1.0261517334123498, + "learning_rate": 1.1904857279349265e-05, + "loss": 0.2611788809299469, + "step": 3519 + }, + { + "epoch": 0.9348028150312043, + "grad_norm": 1.1400957154071245, + "learning_rate": 1.1900546725525175e-05, + "loss": 0.28344646096229553, + "step": 3520 + }, + { + "epoch": 0.9350683840127473, + "grad_norm": 1.067093022484818, + "learning_rate": 1.1896235805298093e-05, + "loss": 0.2504042685031891, + "step": 3521 + }, + { + "epoch": 0.9353339529942902, + "grad_norm": 1.0534608212516616, + "learning_rate": 1.1891924519499113e-05, + "loss": 0.27877938747406006, + "step": 3522 + }, + { + "epoch": 0.9355995219758332, + "grad_norm": 1.046331705593262, + "learning_rate": 1.1887612868959394e-05, + "loss": 0.28176525235176086, + "step": 3523 + }, + { + "epoch": 0.9358650909573761, + "grad_norm": 1.1750063194789062, + "learning_rate": 1.1883300854510178e-05, + "loss": 0.32376354932785034, + "step": 3524 + }, + { + "epoch": 0.9361306599389191, + "grad_norm": 1.0908366283033504, + "learning_rate": 1.1878988476982772e-05, + "loss": 0.2846054434776306, + "step": 3525 + }, + { + "epoch": 0.936396228920462, + "grad_norm": 1.0507783491664777, + "learning_rate": 1.1874675737208546e-05, + "loss": 0.25711044669151306, + "step": 3526 + }, + { + "epoch": 0.9366617979020051, + "grad_norm": 1.078360429057703, + "learning_rate": 1.1870362636018946e-05, + "loss": 0.2810837924480438, + "step": 3527 + }, + { + "epoch": 0.936927366883548, + "grad_norm": 1.2088151262046463, + "learning_rate": 1.186604917424549e-05, + "loss": 0.3090322017669678, + "step": 3528 + }, + { + "epoch": 0.937192935865091, + "grad_norm": 1.061646146170892, + "learning_rate": 1.1861735352719763e-05, + "loss": 0.2797972559928894, + "step": 3529 + }, + { + "epoch": 0.937458504846634, + "grad_norm": 1.3937474116807773, + "learning_rate": 1.1857421172273415e-05, + "loss": 0.3124893605709076, + "step": 3530 + }, + { + "epoch": 0.9377240738281769, + "grad_norm": 1.1043040217194096, + "learning_rate": 1.1853106633738174e-05, + "loss": 0.28317195177078247, + "step": 3531 + }, + { + "epoch": 0.9379896428097199, + "grad_norm": 1.0483798154842934, + "learning_rate": 1.1848791737945823e-05, + "loss": 0.27804574370384216, + "step": 3532 + }, + { + "epoch": 0.9382552117912628, + "grad_norm": 1.1007797171562173, + "learning_rate": 1.1844476485728236e-05, + "loss": 0.24936731159687042, + "step": 3533 + }, + { + "epoch": 0.9385207807728058, + "grad_norm": 1.16922301793574, + "learning_rate": 1.1840160877917335e-05, + "loss": 0.296974778175354, + "step": 3534 + }, + { + "epoch": 0.9387863497543487, + "grad_norm": 1.1172266681075624, + "learning_rate": 1.1835844915345117e-05, + "loss": 0.3048890233039856, + "step": 3535 + }, + { + "epoch": 0.9390519187358917, + "grad_norm": 1.0372698095624082, + "learning_rate": 1.1831528598843654e-05, + "loss": 0.2703601121902466, + "step": 3536 + }, + { + "epoch": 0.9393174877174346, + "grad_norm": 1.123009081238491, + "learning_rate": 1.1827211929245075e-05, + "loss": 0.30738013982772827, + "step": 3537 + }, + { + "epoch": 0.9395830566989776, + "grad_norm": 1.0660333251952498, + "learning_rate": 1.1822894907381589e-05, + "loss": 0.26538529992103577, + "step": 3538 + }, + { + "epoch": 0.9398486256805205, + "grad_norm": 1.1050453871275616, + "learning_rate": 1.1818577534085462e-05, + "loss": 0.26795464754104614, + "step": 3539 + }, + { + "epoch": 0.9401141946620635, + "grad_norm": 1.1533311536850575, + "learning_rate": 1.1814259810189034e-05, + "loss": 0.30891868472099304, + "step": 3540 + }, + { + "epoch": 0.9403797636436064, + "grad_norm": 1.8167204702159565, + "learning_rate": 1.1809941736524713e-05, + "loss": 0.29164037108421326, + "step": 3541 + }, + { + "epoch": 0.9406453326251494, + "grad_norm": 1.0875424396631934, + "learning_rate": 1.180562331392497e-05, + "loss": 0.30322739481925964, + "step": 3542 + }, + { + "epoch": 0.9409109016066923, + "grad_norm": 1.0765622649066557, + "learning_rate": 1.1801304543222349e-05, + "loss": 0.275432288646698, + "step": 3543 + }, + { + "epoch": 0.9411764705882353, + "grad_norm": 1.1566847425916267, + "learning_rate": 1.1796985425249459e-05, + "loss": 0.2788141965866089, + "step": 3544 + }, + { + "epoch": 0.9414420395697782, + "grad_norm": 1.203313197377309, + "learning_rate": 1.1792665960838967e-05, + "loss": 0.24254676699638367, + "step": 3545 + }, + { + "epoch": 0.9417076085513212, + "grad_norm": 1.1050026210111878, + "learning_rate": 1.1788346150823625e-05, + "loss": 0.2803058326244354, + "step": 3546 + }, + { + "epoch": 0.9419731775328641, + "grad_norm": 1.0993090963339842, + "learning_rate": 1.1784025996036232e-05, + "loss": 0.3068317174911499, + "step": 3547 + }, + { + "epoch": 0.9422387465144071, + "grad_norm": 0.9977731134117688, + "learning_rate": 1.1779705497309673e-05, + "loss": 0.23124024271965027, + "step": 3548 + }, + { + "epoch": 0.94250431549595, + "grad_norm": 1.080710306089679, + "learning_rate": 1.177538465547688e-05, + "loss": 0.2815462648868561, + "step": 3549 + }, + { + "epoch": 0.942769884477493, + "grad_norm": 1.1118952137889662, + "learning_rate": 1.1771063471370862e-05, + "loss": 0.29448196291923523, + "step": 3550 + }, + { + "epoch": 0.9430354534590359, + "grad_norm": 1.2691077751501818, + "learning_rate": 1.1766741945824698e-05, + "loss": 0.3176615834236145, + "step": 3551 + }, + { + "epoch": 0.9433010224405789, + "grad_norm": 1.1390071879475103, + "learning_rate": 1.1762420079671527e-05, + "loss": 0.29126274585723877, + "step": 3552 + }, + { + "epoch": 0.9435665914221218, + "grad_norm": 1.084504171285626, + "learning_rate": 1.1758097873744547e-05, + "loss": 0.27074337005615234, + "step": 3553 + }, + { + "epoch": 0.9438321604036648, + "grad_norm": 1.0495499557301764, + "learning_rate": 1.175377532887703e-05, + "loss": 0.2756083011627197, + "step": 3554 + }, + { + "epoch": 0.9440977293852079, + "grad_norm": 1.1028881447166687, + "learning_rate": 1.1749452445902315e-05, + "loss": 0.26918384432792664, + "step": 3555 + }, + { + "epoch": 0.9443632983667508, + "grad_norm": 1.0856468025535497, + "learning_rate": 1.17451292256538e-05, + "loss": 0.2550349235534668, + "step": 3556 + }, + { + "epoch": 0.9446288673482938, + "grad_norm": 1.0791996633460945, + "learning_rate": 1.1740805668964954e-05, + "loss": 0.2601481080055237, + "step": 3557 + }, + { + "epoch": 0.9448944363298367, + "grad_norm": 1.1367109564667788, + "learning_rate": 1.1736481776669307e-05, + "loss": 0.2848352789878845, + "step": 3558 + }, + { + "epoch": 0.9451600053113797, + "grad_norm": 1.1168278064757895, + "learning_rate": 1.173215754960045e-05, + "loss": 0.266584575176239, + "step": 3559 + }, + { + "epoch": 0.9454255742929226, + "grad_norm": 0.9979692557530664, + "learning_rate": 1.172783298859205e-05, + "loss": 0.25037410855293274, + "step": 3560 + }, + { + "epoch": 0.9456911432744656, + "grad_norm": 1.1049326363207628, + "learning_rate": 1.1723508094477825e-05, + "loss": 0.30239278078079224, + "step": 3561 + }, + { + "epoch": 0.9459567122560085, + "grad_norm": 1.0413977608943958, + "learning_rate": 1.1719182868091567e-05, + "loss": 0.2893553078174591, + "step": 3562 + }, + { + "epoch": 0.9462222812375515, + "grad_norm": 1.215187947788902, + "learning_rate": 1.1714857310267124e-05, + "loss": 0.2840202748775482, + "step": 3563 + }, + { + "epoch": 0.9464878502190944, + "grad_norm": 1.0615180068139964, + "learning_rate": 1.1710531421838422e-05, + "loss": 0.2614031732082367, + "step": 3564 + }, + { + "epoch": 0.9467534192006374, + "grad_norm": 1.0290230331800772, + "learning_rate": 1.1706205203639433e-05, + "loss": 0.267095148563385, + "step": 3565 + }, + { + "epoch": 0.9470189881821803, + "grad_norm": 1.2397291626994196, + "learning_rate": 1.1701878656504206e-05, + "loss": 0.25835227966308594, + "step": 3566 + }, + { + "epoch": 0.9472845571637233, + "grad_norm": 1.1319162410146095, + "learning_rate": 1.1697551781266845e-05, + "loss": 0.27547580003738403, + "step": 3567 + }, + { + "epoch": 0.9475501261452662, + "grad_norm": 1.089656044815204, + "learning_rate": 1.169322457876152e-05, + "loss": 0.251165509223938, + "step": 3568 + }, + { + "epoch": 0.9478156951268092, + "grad_norm": 1.2350323802819905, + "learning_rate": 1.1688897049822467e-05, + "loss": 0.2738516926765442, + "step": 3569 + }, + { + "epoch": 0.9480812641083521, + "grad_norm": 1.0315369616879289, + "learning_rate": 1.1684569195283981e-05, + "loss": 0.2745274305343628, + "step": 3570 + }, + { + "epoch": 0.9483468330898951, + "grad_norm": 1.180099592022995, + "learning_rate": 1.1680241015980423e-05, + "loss": 0.28586819767951965, + "step": 3571 + }, + { + "epoch": 0.948612402071438, + "grad_norm": 1.2233918967574897, + "learning_rate": 1.167591251274621e-05, + "loss": 0.2559577524662018, + "step": 3572 + }, + { + "epoch": 0.948877971052981, + "grad_norm": 1.155824963337958, + "learning_rate": 1.1671583686415833e-05, + "loss": 0.26069143414497375, + "step": 3573 + }, + { + "epoch": 0.949143540034524, + "grad_norm": 1.078529730225554, + "learning_rate": 1.1667254537823838e-05, + "loss": 0.26866453886032104, + "step": 3574 + }, + { + "epoch": 0.9494091090160669, + "grad_norm": 1.0772599867154102, + "learning_rate": 1.166292506780483e-05, + "loss": 0.25285348296165466, + "step": 3575 + }, + { + "epoch": 0.9496746779976099, + "grad_norm": 1.1335172942215501, + "learning_rate": 1.1658595277193479e-05, + "loss": 0.3330434262752533, + "step": 3576 + }, + { + "epoch": 0.9499402469791528, + "grad_norm": 1.076438251163932, + "learning_rate": 1.1654265166824522e-05, + "loss": 0.2789473533630371, + "step": 3577 + }, + { + "epoch": 0.9502058159606958, + "grad_norm": 1.2746037306212283, + "learning_rate": 1.164993473753275e-05, + "loss": 0.30984824895858765, + "step": 3578 + }, + { + "epoch": 0.9504713849422387, + "grad_norm": 1.0517088315750878, + "learning_rate": 1.164560399015302e-05, + "loss": 0.23881833255290985, + "step": 3579 + }, + { + "epoch": 0.9507369539237817, + "grad_norm": 1.1012484750770577, + "learning_rate": 1.164127292552025e-05, + "loss": 0.3027937114238739, + "step": 3580 + }, + { + "epoch": 0.9510025229053246, + "grad_norm": 1.1998484228117954, + "learning_rate": 1.1636941544469413e-05, + "loss": 0.2901906371116638, + "step": 3581 + }, + { + "epoch": 0.9512680918868676, + "grad_norm": 1.069491787313744, + "learning_rate": 1.1632609847835556e-05, + "loss": 0.28961148858070374, + "step": 3582 + }, + { + "epoch": 0.9515336608684106, + "grad_norm": 1.0782542825887276, + "learning_rate": 1.1628277836453774e-05, + "loss": 0.2730783224105835, + "step": 3583 + }, + { + "epoch": 0.9517992298499536, + "grad_norm": 1.0952017771476839, + "learning_rate": 1.1623945511159232e-05, + "loss": 0.3195485770702362, + "step": 3584 + }, + { + "epoch": 0.9520647988314965, + "grad_norm": 1.1514370971708257, + "learning_rate": 1.1619612872787144e-05, + "loss": 0.3097516894340515, + "step": 3585 + }, + { + "epoch": 0.9523303678130395, + "grad_norm": 1.0422990071728377, + "learning_rate": 1.1615279922172796e-05, + "loss": 0.2716284692287445, + "step": 3586 + }, + { + "epoch": 0.9525959367945824, + "grad_norm": 0.9669355988334725, + "learning_rate": 1.1610946660151531e-05, + "loss": 0.2601209878921509, + "step": 3587 + }, + { + "epoch": 0.9528615057761254, + "grad_norm": 1.1027425019898653, + "learning_rate": 1.1606613087558748e-05, + "loss": 0.28665289282798767, + "step": 3588 + }, + { + "epoch": 0.9531270747576683, + "grad_norm": 1.082078861677668, + "learning_rate": 1.1602279205229912e-05, + "loss": 0.3019893765449524, + "step": 3589 + }, + { + "epoch": 0.9533926437392113, + "grad_norm": 0.9778282797717269, + "learning_rate": 1.1597945014000537e-05, + "loss": 0.2635146677494049, + "step": 3590 + }, + { + "epoch": 0.9536582127207542, + "grad_norm": 1.0527782897227813, + "learning_rate": 1.1593610514706217e-05, + "loss": 0.2704858183860779, + "step": 3591 + }, + { + "epoch": 0.9539237817022972, + "grad_norm": 1.2295509988273574, + "learning_rate": 1.1589275708182581e-05, + "loss": 0.31997931003570557, + "step": 3592 + }, + { + "epoch": 0.9541893506838401, + "grad_norm": 1.1529907760165448, + "learning_rate": 1.1584940595265332e-05, + "loss": 0.2308788150548935, + "step": 3593 + }, + { + "epoch": 0.9544549196653831, + "grad_norm": 1.0980235303762964, + "learning_rate": 1.1580605176790229e-05, + "loss": 0.28886470198631287, + "step": 3594 + }, + { + "epoch": 0.954720488646926, + "grad_norm": 1.313883667721807, + "learning_rate": 1.157626945359309e-05, + "loss": 0.30698686838150024, + "step": 3595 + }, + { + "epoch": 0.954986057628469, + "grad_norm": 1.1087251273709688, + "learning_rate": 1.1571933426509789e-05, + "loss": 0.27475905418395996, + "step": 3596 + }, + { + "epoch": 0.955251626610012, + "grad_norm": 1.1064883207545173, + "learning_rate": 1.1567597096376264e-05, + "loss": 0.2568071484565735, + "step": 3597 + }, + { + "epoch": 0.9555171955915549, + "grad_norm": 1.28706485993144, + "learning_rate": 1.1563260464028507e-05, + "loss": 0.2574060261249542, + "step": 3598 + }, + { + "epoch": 0.9557827645730979, + "grad_norm": 1.193494963897618, + "learning_rate": 1.1558923530302571e-05, + "loss": 0.2847997546195984, + "step": 3599 + }, + { + "epoch": 0.9560483335546408, + "grad_norm": 1.0723094070831873, + "learning_rate": 1.155458629603456e-05, + "loss": 0.2594734728336334, + "step": 3600 + }, + { + "epoch": 0.9563139025361838, + "grad_norm": 1.0020160427681732, + "learning_rate": 1.155024876206065e-05, + "loss": 0.2300589680671692, + "step": 3601 + }, + { + "epoch": 0.9565794715177267, + "grad_norm": 1.1475438454718678, + "learning_rate": 1.1545910929217059e-05, + "loss": 0.29174795746803284, + "step": 3602 + }, + { + "epoch": 0.9568450404992697, + "grad_norm": 1.0425930414114217, + "learning_rate": 1.1541572798340076e-05, + "loss": 0.2666400074958801, + "step": 3603 + }, + { + "epoch": 0.9571106094808126, + "grad_norm": 1.0067559469755134, + "learning_rate": 1.1537234370266035e-05, + "loss": 0.24651308357715607, + "step": 3604 + }, + { + "epoch": 0.9573761784623556, + "grad_norm": 1.1542471481522265, + "learning_rate": 1.1532895645831339e-05, + "loss": 0.29991376399993896, + "step": 3605 + }, + { + "epoch": 0.9576417474438985, + "grad_norm": 1.0631305192934537, + "learning_rate": 1.1528556625872443e-05, + "loss": 0.27713578939437866, + "step": 3606 + }, + { + "epoch": 0.9579073164254415, + "grad_norm": 1.0497999275546905, + "learning_rate": 1.1524217311225857e-05, + "loss": 0.26503294706344604, + "step": 3607 + }, + { + "epoch": 0.9581728854069844, + "grad_norm": 1.1479000180189152, + "learning_rate": 1.1519877702728149e-05, + "loss": 0.28627675771713257, + "step": 3608 + }, + { + "epoch": 0.9584384543885274, + "grad_norm": 1.0333891142616893, + "learning_rate": 1.1515537801215944e-05, + "loss": 0.26862916350364685, + "step": 3609 + }, + { + "epoch": 0.9587040233700703, + "grad_norm": 1.2518522451268181, + "learning_rate": 1.1511197607525926e-05, + "loss": 0.29697147011756897, + "step": 3610 + }, + { + "epoch": 0.9589695923516134, + "grad_norm": 1.0668919106736792, + "learning_rate": 1.1506857122494832e-05, + "loss": 0.2980155944824219, + "step": 3611 + }, + { + "epoch": 0.9592351613331563, + "grad_norm": 1.1016644329026075, + "learning_rate": 1.1502516346959458e-05, + "loss": 0.2847440838813782, + "step": 3612 + }, + { + "epoch": 0.9595007303146993, + "grad_norm": 1.1131533712076647, + "learning_rate": 1.149817528175665e-05, + "loss": 0.2812016010284424, + "step": 3613 + }, + { + "epoch": 0.9597662992962422, + "grad_norm": 1.0387818826049915, + "learning_rate": 1.1493833927723319e-05, + "loss": 0.26856982707977295, + "step": 3614 + }, + { + "epoch": 0.9600318682777852, + "grad_norm": 1.0595715138301371, + "learning_rate": 1.1489492285696424e-05, + "loss": 0.2651693820953369, + "step": 3615 + }, + { + "epoch": 0.9602974372593281, + "grad_norm": 1.1384265947297394, + "learning_rate": 1.1485150356512986e-05, + "loss": 0.29811644554138184, + "step": 3616 + }, + { + "epoch": 0.9605630062408711, + "grad_norm": 1.0449713925688802, + "learning_rate": 1.1480808141010071e-05, + "loss": 0.2622855007648468, + "step": 3617 + }, + { + "epoch": 0.960828575222414, + "grad_norm": 1.1964334046740135, + "learning_rate": 1.1476465640024814e-05, + "loss": 0.3067246377468109, + "step": 3618 + }, + { + "epoch": 0.961094144203957, + "grad_norm": 1.0999678942020576, + "learning_rate": 1.1472122854394394e-05, + "loss": 0.25928011536598206, + "step": 3619 + }, + { + "epoch": 0.9613597131855, + "grad_norm": 1.0356853160291564, + "learning_rate": 1.146777978495605e-05, + "loss": 0.2574170231819153, + "step": 3620 + }, + { + "epoch": 0.9616252821670429, + "grad_norm": 1.1366453776894136, + "learning_rate": 1.1463436432547073e-05, + "loss": 0.2845388650894165, + "step": 3621 + }, + { + "epoch": 0.9618908511485859, + "grad_norm": 1.1067131961561003, + "learning_rate": 1.145909279800481e-05, + "loss": 0.28735876083374023, + "step": 3622 + }, + { + "epoch": 0.9621564201301288, + "grad_norm": 1.100639151702203, + "learning_rate": 1.1454748882166666e-05, + "loss": 0.25739723443984985, + "step": 3623 + }, + { + "epoch": 0.9624219891116718, + "grad_norm": 1.0743852778260963, + "learning_rate": 1.1450404685870098e-05, + "loss": 0.25144338607788086, + "step": 3624 + }, + { + "epoch": 0.9626875580932147, + "grad_norm": 1.0451944769292063, + "learning_rate": 1.144606020995261e-05, + "loss": 0.23981891572475433, + "step": 3625 + }, + { + "epoch": 0.9629531270747577, + "grad_norm": 1.1215387475511582, + "learning_rate": 1.1441715455251764e-05, + "loss": 0.30925339460372925, + "step": 3626 + }, + { + "epoch": 0.9632186960563006, + "grad_norm": 1.1193965021491372, + "learning_rate": 1.1437370422605184e-05, + "loss": 0.2559184432029724, + "step": 3627 + }, + { + "epoch": 0.9634842650378436, + "grad_norm": 1.221260182162867, + "learning_rate": 1.1433025112850542e-05, + "loss": 0.3001229166984558, + "step": 3628 + }, + { + "epoch": 0.9637498340193865, + "grad_norm": 0.9957913669659347, + "learning_rate": 1.1428679526825557e-05, + "loss": 0.24304218590259552, + "step": 3629 + }, + { + "epoch": 0.9640154030009295, + "grad_norm": 1.0405086595778643, + "learning_rate": 1.1424333665368011e-05, + "loss": 0.25677186250686646, + "step": 3630 + }, + { + "epoch": 0.9642809719824724, + "grad_norm": 1.0362119568252992, + "learning_rate": 1.141998752931573e-05, + "loss": 0.2589085102081299, + "step": 3631 + }, + { + "epoch": 0.9645465409640154, + "grad_norm": 1.1004952842028541, + "learning_rate": 1.1415641119506601e-05, + "loss": 0.2588059604167938, + "step": 3632 + }, + { + "epoch": 0.9648121099455583, + "grad_norm": 1.1379378571012249, + "learning_rate": 1.1411294436778562e-05, + "loss": 0.26097869873046875, + "step": 3633 + }, + { + "epoch": 0.9650776789271013, + "grad_norm": 1.2218308438631786, + "learning_rate": 1.1406947481969598e-05, + "loss": 0.26022520661354065, + "step": 3634 + }, + { + "epoch": 0.9653432479086442, + "grad_norm": 1.0737420773814035, + "learning_rate": 1.140260025591775e-05, + "loss": 0.26242876052856445, + "step": 3635 + }, + { + "epoch": 0.9656088168901872, + "grad_norm": 1.1396910340144906, + "learning_rate": 1.1398252759461119e-05, + "loss": 0.30035555362701416, + "step": 3636 + }, + { + "epoch": 0.9658743858717301, + "grad_norm": 1.1365210980452296, + "learning_rate": 1.1393904993437848e-05, + "loss": 0.26388341188430786, + "step": 3637 + }, + { + "epoch": 0.9661399548532731, + "grad_norm": 1.06242333907382, + "learning_rate": 1.1389556958686132e-05, + "loss": 0.28116434812545776, + "step": 3638 + }, + { + "epoch": 0.966405523834816, + "grad_norm": 1.0513966621960738, + "learning_rate": 1.1385208656044222e-05, + "loss": 0.25372493267059326, + "step": 3639 + }, + { + "epoch": 0.9666710928163591, + "grad_norm": 1.1171784181414381, + "learning_rate": 1.1380860086350422e-05, + "loss": 0.2648317813873291, + "step": 3640 + }, + { + "epoch": 0.966936661797902, + "grad_norm": 1.0508956007113521, + "learning_rate": 1.1376511250443082e-05, + "loss": 0.26981276273727417, + "step": 3641 + }, + { + "epoch": 0.967202230779445, + "grad_norm": 1.1513465918880585, + "learning_rate": 1.1372162149160608e-05, + "loss": 0.2934207618236542, + "step": 3642 + }, + { + "epoch": 0.967467799760988, + "grad_norm": 0.9705407845284122, + "learning_rate": 1.1367812783341454e-05, + "loss": 0.24250900745391846, + "step": 3643 + }, + { + "epoch": 0.9677333687425309, + "grad_norm": 1.0409007473472116, + "learning_rate": 1.1363463153824125e-05, + "loss": 0.2565772235393524, + "step": 3644 + }, + { + "epoch": 0.9679989377240739, + "grad_norm": 1.2386980142351325, + "learning_rate": 1.1359113261447183e-05, + "loss": 0.28407829999923706, + "step": 3645 + }, + { + "epoch": 0.9682645067056168, + "grad_norm": 1.1134220293120092, + "learning_rate": 1.1354763107049234e-05, + "loss": 0.2974489629268646, + "step": 3646 + }, + { + "epoch": 0.9685300756871598, + "grad_norm": 1.1611486704366027, + "learning_rate": 1.1350412691468935e-05, + "loss": 0.27539899945259094, + "step": 3647 + }, + { + "epoch": 0.9687956446687027, + "grad_norm": 1.1777496863563888, + "learning_rate": 1.1346062015544997e-05, + "loss": 0.28256523609161377, + "step": 3648 + }, + { + "epoch": 0.9690612136502457, + "grad_norm": 1.0910813538672366, + "learning_rate": 1.1341711080116176e-05, + "loss": 0.27582883834838867, + "step": 3649 + }, + { + "epoch": 0.9693267826317886, + "grad_norm": 1.2299419127493794, + "learning_rate": 1.1337359886021285e-05, + "loss": 0.3199389576911926, + "step": 3650 + }, + { + "epoch": 0.9695923516133316, + "grad_norm": 1.078226808322517, + "learning_rate": 1.1333008434099178e-05, + "loss": 0.2922326922416687, + "step": 3651 + }, + { + "epoch": 0.9698579205948745, + "grad_norm": 1.1833154338367669, + "learning_rate": 1.1328656725188767e-05, + "loss": 0.285635381937027, + "step": 3652 + }, + { + "epoch": 0.9701234895764175, + "grad_norm": 1.1606724829825772, + "learning_rate": 1.1324304760129009e-05, + "loss": 0.3347492814064026, + "step": 3653 + }, + { + "epoch": 0.9703890585579604, + "grad_norm": 1.1079831575977723, + "learning_rate": 1.1319952539758912e-05, + "loss": 0.27379873394966125, + "step": 3654 + }, + { + "epoch": 0.9706546275395034, + "grad_norm": 1.2487680540467303, + "learning_rate": 1.1315600064917534e-05, + "loss": 0.27911311388015747, + "step": 3655 + }, + { + "epoch": 0.9709201965210463, + "grad_norm": 1.187492816658345, + "learning_rate": 1.1311247336443982e-05, + "loss": 0.25750118494033813, + "step": 3656 + }, + { + "epoch": 0.9711857655025893, + "grad_norm": 1.1010343448161526, + "learning_rate": 1.1306894355177405e-05, + "loss": 0.28723078966140747, + "step": 3657 + }, + { + "epoch": 0.9714513344841322, + "grad_norm": 1.0378840795289885, + "learning_rate": 1.1302541121957008e-05, + "loss": 0.25269389152526855, + "step": 3658 + }, + { + "epoch": 0.9717169034656752, + "grad_norm": 1.1923604766845932, + "learning_rate": 1.1298187637622046e-05, + "loss": 0.3041607439517975, + "step": 3659 + }, + { + "epoch": 0.9719824724472181, + "grad_norm": 1.0812687625707742, + "learning_rate": 1.1293833903011819e-05, + "loss": 0.2826605439186096, + "step": 3660 + }, + { + "epoch": 0.9722480414287611, + "grad_norm": 1.1010565715724137, + "learning_rate": 1.1289479918965675e-05, + "loss": 0.2830520570278168, + "step": 3661 + }, + { + "epoch": 0.972513610410304, + "grad_norm": 1.0160541896764337, + "learning_rate": 1.1285125686323011e-05, + "loss": 0.24295952916145325, + "step": 3662 + }, + { + "epoch": 0.972779179391847, + "grad_norm": 1.108181435484162, + "learning_rate": 1.1280771205923269e-05, + "loss": 0.28775808215141296, + "step": 3663 + }, + { + "epoch": 0.97304474837339, + "grad_norm": 0.9715417125511246, + "learning_rate": 1.127641647860595e-05, + "loss": 0.24650296568870544, + "step": 3664 + }, + { + "epoch": 0.9733103173549329, + "grad_norm": 0.9305293200248026, + "learning_rate": 1.1272061505210584e-05, + "loss": 0.22344040870666504, + "step": 3665 + }, + { + "epoch": 0.9735758863364758, + "grad_norm": 1.0859092127038839, + "learning_rate": 1.1267706286576759e-05, + "loss": 0.26920852065086365, + "step": 3666 + }, + { + "epoch": 0.9738414553180188, + "grad_norm": 1.1792674236289236, + "learning_rate": 1.1263350823544115e-05, + "loss": 0.27615875005722046, + "step": 3667 + }, + { + "epoch": 0.9741070242995619, + "grad_norm": 1.0470064037587914, + "learning_rate": 1.1258995116952334e-05, + "loss": 0.2768712043762207, + "step": 3668 + }, + { + "epoch": 0.9743725932811048, + "grad_norm": 1.0568329464095596, + "learning_rate": 1.1254639167641141e-05, + "loss": 0.27764153480529785, + "step": 3669 + }, + { + "epoch": 0.9746381622626478, + "grad_norm": 1.139437307258024, + "learning_rate": 1.1250282976450316e-05, + "loss": 0.27423611283302307, + "step": 3670 + }, + { + "epoch": 0.9749037312441907, + "grad_norm": 1.1238013222894891, + "learning_rate": 1.1245926544219676e-05, + "loss": 0.2626228332519531, + "step": 3671 + }, + { + "epoch": 0.9751693002257337, + "grad_norm": 1.2807555997920204, + "learning_rate": 1.1241569871789096e-05, + "loss": 0.25524014234542847, + "step": 3672 + }, + { + "epoch": 0.9754348692072766, + "grad_norm": 1.1042234540757712, + "learning_rate": 1.1237212959998485e-05, + "loss": 0.30857735872268677, + "step": 3673 + }, + { + "epoch": 0.9757004381888196, + "grad_norm": 1.0235359310129009, + "learning_rate": 1.1232855809687807e-05, + "loss": 0.25099021196365356, + "step": 3674 + }, + { + "epoch": 0.9759660071703625, + "grad_norm": 1.0116202981123898, + "learning_rate": 1.1228498421697068e-05, + "loss": 0.22664576768875122, + "step": 3675 + }, + { + "epoch": 0.9762315761519055, + "grad_norm": 1.151038777130998, + "learning_rate": 1.1224140796866322e-05, + "loss": 0.24727366864681244, + "step": 3676 + }, + { + "epoch": 0.9764971451334484, + "grad_norm": 1.160849411640656, + "learning_rate": 1.121978293603567e-05, + "loss": 0.2561935782432556, + "step": 3677 + }, + { + "epoch": 0.9767627141149914, + "grad_norm": 1.10648815955184, + "learning_rate": 1.1215424840045254e-05, + "loss": 0.2594214677810669, + "step": 3678 + }, + { + "epoch": 0.9770282830965343, + "grad_norm": 1.130419852826836, + "learning_rate": 1.1211066509735265e-05, + "loss": 0.2383778691291809, + "step": 3679 + }, + { + "epoch": 0.9772938520780773, + "grad_norm": 1.2393377504128167, + "learning_rate": 1.1206707945945934e-05, + "loss": 0.2864387035369873, + "step": 3680 + }, + { + "epoch": 0.9775594210596202, + "grad_norm": 1.2012269867709167, + "learning_rate": 1.1202349149517541e-05, + "loss": 0.30415672063827515, + "step": 3681 + }, + { + "epoch": 0.9778249900411632, + "grad_norm": 1.1590063847406842, + "learning_rate": 1.1197990121290415e-05, + "loss": 0.3030807375907898, + "step": 3682 + }, + { + "epoch": 0.9780905590227061, + "grad_norm": 1.1251124481371277, + "learning_rate": 1.1193630862104922e-05, + "loss": 0.2518938481807709, + "step": 3683 + }, + { + "epoch": 0.9783561280042491, + "grad_norm": 1.2096921428918863, + "learning_rate": 1.1189271372801474e-05, + "loss": 0.25353187322616577, + "step": 3684 + }, + { + "epoch": 0.978621696985792, + "grad_norm": 1.401372369430627, + "learning_rate": 1.1184911654220534e-05, + "loss": 0.30639684200286865, + "step": 3685 + }, + { + "epoch": 0.978887265967335, + "grad_norm": 1.1636733460077495, + "learning_rate": 1.1180551707202602e-05, + "loss": 0.295099139213562, + "step": 3686 + }, + { + "epoch": 0.979152834948878, + "grad_norm": 1.0596592048702305, + "learning_rate": 1.1176191532588224e-05, + "loss": 0.2428167164325714, + "step": 3687 + }, + { + "epoch": 0.9794184039304209, + "grad_norm": 1.0401088292404943, + "learning_rate": 1.1171831131217989e-05, + "loss": 0.2716362774372101, + "step": 3688 + }, + { + "epoch": 0.9796839729119639, + "grad_norm": 1.1130709970940986, + "learning_rate": 1.1167470503932534e-05, + "loss": 0.28350287675857544, + "step": 3689 + }, + { + "epoch": 0.9799495418935068, + "grad_norm": 1.0214004744947676, + "learning_rate": 1.1163109651572535e-05, + "loss": 0.2776945233345032, + "step": 3690 + }, + { + "epoch": 0.9802151108750498, + "grad_norm": 1.041237294346951, + "learning_rate": 1.115874857497871e-05, + "loss": 0.2712942063808441, + "step": 3691 + }, + { + "epoch": 0.9804806798565927, + "grad_norm": 1.058232702389033, + "learning_rate": 1.1154387274991829e-05, + "loss": 0.2530008852481842, + "step": 3692 + }, + { + "epoch": 0.9807462488381357, + "grad_norm": 1.0327043619893976, + "learning_rate": 1.1150025752452693e-05, + "loss": 0.24889500439167023, + "step": 3693 + }, + { + "epoch": 0.9810118178196786, + "grad_norm": 1.1013842404358833, + "learning_rate": 1.1145664008202158e-05, + "loss": 0.3051255941390991, + "step": 3694 + }, + { + "epoch": 0.9812773868012216, + "grad_norm": 1.0503003262830894, + "learning_rate": 1.1141302043081112e-05, + "loss": 0.24781765043735504, + "step": 3695 + }, + { + "epoch": 0.9815429557827646, + "grad_norm": 1.2510153019418302, + "learning_rate": 1.1136939857930497e-05, + "loss": 0.3021858036518097, + "step": 3696 + }, + { + "epoch": 0.9818085247643076, + "grad_norm": 1.1052947984569603, + "learning_rate": 1.1132577453591284e-05, + "loss": 0.3026372194290161, + "step": 3697 + }, + { + "epoch": 0.9820740937458505, + "grad_norm": 1.2367828155450835, + "learning_rate": 1.1128214830904494e-05, + "loss": 0.31511861085891724, + "step": 3698 + }, + { + "epoch": 0.9823396627273935, + "grad_norm": 1.076549494496895, + "learning_rate": 1.112385199071119e-05, + "loss": 0.27885258197784424, + "step": 3699 + }, + { + "epoch": 0.9826052317089364, + "grad_norm": 1.0546536629749794, + "learning_rate": 1.1119488933852477e-05, + "loss": 0.2724893391132355, + "step": 3700 + }, + { + "epoch": 0.9828708006904794, + "grad_norm": 1.0683428715266594, + "learning_rate": 1.1115125661169503e-05, + "loss": 0.2836218774318695, + "step": 3701 + }, + { + "epoch": 0.9831363696720223, + "grad_norm": 1.1039385208642913, + "learning_rate": 1.111076217350345e-05, + "loss": 0.24220457673072815, + "step": 3702 + }, + { + "epoch": 0.9834019386535653, + "grad_norm": 1.1586770288767172, + "learning_rate": 1.1106398471695554e-05, + "loss": 0.28599557280540466, + "step": 3703 + }, + { + "epoch": 0.9836675076351082, + "grad_norm": 1.0806945340822165, + "learning_rate": 1.110203455658708e-05, + "loss": 0.30559849739074707, + "step": 3704 + }, + { + "epoch": 0.9839330766166512, + "grad_norm": 1.0573640293446354, + "learning_rate": 1.109767042901934e-05, + "loss": 0.2763117551803589, + "step": 3705 + }, + { + "epoch": 0.9841986455981941, + "grad_norm": 0.9563131800944344, + "learning_rate": 1.109330608983369e-05, + "loss": 0.2028101086616516, + "step": 3706 + }, + { + "epoch": 0.9844642145797371, + "grad_norm": 0.9787835815750591, + "learning_rate": 1.1088941539871515e-05, + "loss": 0.25386112928390503, + "step": 3707 + }, + { + "epoch": 0.98472978356128, + "grad_norm": 1.075996733851366, + "learning_rate": 1.1084576779974257e-05, + "loss": 0.2588289976119995, + "step": 3708 + }, + { + "epoch": 0.984995352542823, + "grad_norm": 1.3003014971272602, + "learning_rate": 1.1080211810983385e-05, + "loss": 0.3201071321964264, + "step": 3709 + }, + { + "epoch": 0.985260921524366, + "grad_norm": 1.2030478206249715, + "learning_rate": 1.107584663374042e-05, + "loss": 0.28439003229141235, + "step": 3710 + }, + { + "epoch": 0.9855264905059089, + "grad_norm": 1.060347062251152, + "learning_rate": 1.1071481249086908e-05, + "loss": 0.2734091579914093, + "step": 3711 + }, + { + "epoch": 0.9857920594874519, + "grad_norm": 1.2115603819692051, + "learning_rate": 1.1067115657864451e-05, + "loss": 0.2917581796646118, + "step": 3712 + }, + { + "epoch": 0.9860576284689948, + "grad_norm": 1.2063997459644484, + "learning_rate": 1.1062749860914681e-05, + "loss": 0.3569914996623993, + "step": 3713 + }, + { + "epoch": 0.9863231974505378, + "grad_norm": 1.127711451799425, + "learning_rate": 1.1058383859079271e-05, + "loss": 0.2574514150619507, + "step": 3714 + }, + { + "epoch": 0.9865887664320807, + "grad_norm": 1.119813552337215, + "learning_rate": 1.1054017653199936e-05, + "loss": 0.3035826086997986, + "step": 3715 + }, + { + "epoch": 0.9868543354136237, + "grad_norm": 1.5863085854725767, + "learning_rate": 1.1049651244118424e-05, + "loss": 0.28067824244499207, + "step": 3716 + }, + { + "epoch": 0.9871199043951666, + "grad_norm": 1.0916600834300794, + "learning_rate": 1.1045284632676535e-05, + "loss": 0.2511579394340515, + "step": 3717 + }, + { + "epoch": 0.9873854733767096, + "grad_norm": 1.2657546371764674, + "learning_rate": 1.1040917819716097e-05, + "loss": 0.3059889078140259, + "step": 3718 + }, + { + "epoch": 0.9876510423582525, + "grad_norm": 1.1224253435238671, + "learning_rate": 1.103655080607898e-05, + "loss": 0.2642200291156769, + "step": 3719 + }, + { + "epoch": 0.9879166113397955, + "grad_norm": 1.0969568004465404, + "learning_rate": 1.1032183592607094e-05, + "loss": 0.2743483781814575, + "step": 3720 + }, + { + "epoch": 0.9881821803213384, + "grad_norm": 1.1317768374698567, + "learning_rate": 1.1027816180142383e-05, + "loss": 0.2597433030605316, + "step": 3721 + }, + { + "epoch": 0.9884477493028814, + "grad_norm": 1.0759312888673545, + "learning_rate": 1.1023448569526834e-05, + "loss": 0.24439337849617004, + "step": 3722 + }, + { + "epoch": 0.9887133182844243, + "grad_norm": 1.0386429343076329, + "learning_rate": 1.1019080761602473e-05, + "loss": 0.2520195245742798, + "step": 3723 + }, + { + "epoch": 0.9889788872659674, + "grad_norm": 1.0921837996926786, + "learning_rate": 1.1014712757211359e-05, + "loss": 0.2904737889766693, + "step": 3724 + }, + { + "epoch": 0.9892444562475103, + "grad_norm": 1.12008182824954, + "learning_rate": 1.1010344557195588e-05, + "loss": 0.28096869587898254, + "step": 3725 + }, + { + "epoch": 0.9895100252290533, + "grad_norm": 1.8392230806075218, + "learning_rate": 1.1005976162397309e-05, + "loss": 0.317839652299881, + "step": 3726 + }, + { + "epoch": 0.9897755942105962, + "grad_norm": 1.19381185696067, + "learning_rate": 1.100160757365869e-05, + "loss": 0.29213201999664307, + "step": 3727 + }, + { + "epoch": 0.9900411631921392, + "grad_norm": 1.215113877896921, + "learning_rate": 1.0997238791821943e-05, + "loss": 0.27034991979599, + "step": 3728 + }, + { + "epoch": 0.9903067321736821, + "grad_norm": 1.2893524723691567, + "learning_rate": 1.0992869817729317e-05, + "loss": 0.30504971742630005, + "step": 3729 + }, + { + "epoch": 0.9905723011552251, + "grad_norm": 1.109889585740049, + "learning_rate": 1.09885006522231e-05, + "loss": 0.30673110485076904, + "step": 3730 + }, + { + "epoch": 0.990837870136768, + "grad_norm": 1.0963153712692437, + "learning_rate": 1.0984131296145616e-05, + "loss": 0.27990686893463135, + "step": 3731 + }, + { + "epoch": 0.991103439118311, + "grad_norm": 1.0228240366531471, + "learning_rate": 1.0979761750339225e-05, + "loss": 0.24379019439220428, + "step": 3732 + }, + { + "epoch": 0.991369008099854, + "grad_norm": 1.1055702239918885, + "learning_rate": 1.0975392015646323e-05, + "loss": 0.30554595589637756, + "step": 3733 + }, + { + "epoch": 0.9916345770813969, + "grad_norm": 1.062606047652276, + "learning_rate": 1.0971022092909342e-05, + "loss": 0.245269775390625, + "step": 3734 + }, + { + "epoch": 0.9919001460629399, + "grad_norm": 1.0977829197687445, + "learning_rate": 1.0966651982970757e-05, + "loss": 0.2732948064804077, + "step": 3735 + }, + { + "epoch": 0.9921657150444828, + "grad_norm": 0.992060831416128, + "learning_rate": 1.0962281686673071e-05, + "loss": 0.25989004969596863, + "step": 3736 + }, + { + "epoch": 0.9924312840260258, + "grad_norm": 1.1415489224758493, + "learning_rate": 1.0957911204858824e-05, + "loss": 0.32891198992729187, + "step": 3737 + }, + { + "epoch": 0.9926968530075687, + "grad_norm": 1.094277657297916, + "learning_rate": 1.0953540538370591e-05, + "loss": 0.29184675216674805, + "step": 3738 + }, + { + "epoch": 0.9929624219891117, + "grad_norm": 1.1381026162174743, + "learning_rate": 1.094916968805099e-05, + "loss": 0.2784018814563751, + "step": 3739 + }, + { + "epoch": 0.9932279909706546, + "grad_norm": 1.1670677505581852, + "learning_rate": 1.094479865474267e-05, + "loss": 0.26586195826530457, + "step": 3740 + }, + { + "epoch": 0.9934935599521976, + "grad_norm": 0.9575913416137994, + "learning_rate": 1.094042743928831e-05, + "loss": 0.24593298137187958, + "step": 3741 + }, + { + "epoch": 0.9937591289337405, + "grad_norm": 1.065966707682552, + "learning_rate": 1.0936056042530632e-05, + "loss": 0.2462792694568634, + "step": 3742 + }, + { + "epoch": 0.9940246979152835, + "grad_norm": 1.2074020558104472, + "learning_rate": 1.0931684465312388e-05, + "loss": 0.2688900828361511, + "step": 3743 + }, + { + "epoch": 0.9942902668968264, + "grad_norm": 1.099682442025033, + "learning_rate": 1.0927312708476367e-05, + "loss": 0.2842782735824585, + "step": 3744 + }, + { + "epoch": 0.9945558358783694, + "grad_norm": 1.0548829148077135, + "learning_rate": 1.0922940772865393e-05, + "loss": 0.249299556016922, + "step": 3745 + }, + { + "epoch": 0.9948214048599123, + "grad_norm": 1.175705262338143, + "learning_rate": 1.0918568659322325e-05, + "loss": 0.2765413522720337, + "step": 3746 + }, + { + "epoch": 0.9950869738414553, + "grad_norm": 1.1414819691892306, + "learning_rate": 1.0914196368690049e-05, + "loss": 0.29750365018844604, + "step": 3747 + }, + { + "epoch": 0.9953525428229982, + "grad_norm": 1.153321336461836, + "learning_rate": 1.0909823901811496e-05, + "loss": 0.25272879004478455, + "step": 3748 + }, + { + "epoch": 0.9956181118045412, + "grad_norm": 1.1906489486154657, + "learning_rate": 1.0905451259529626e-05, + "loss": 0.3056861460208893, + "step": 3749 + }, + { + "epoch": 0.9958836807860841, + "grad_norm": 1.1596775625362263, + "learning_rate": 1.090107844268743e-05, + "loss": 0.26723814010620117, + "step": 3750 + }, + { + "epoch": 0.9961492497676271, + "grad_norm": 1.167023454532776, + "learning_rate": 1.0896705452127943e-05, + "loss": 0.29998716711997986, + "step": 3751 + }, + { + "epoch": 0.9964148187491702, + "grad_norm": 1.1519689723038142, + "learning_rate": 1.0892332288694216e-05, + "loss": 0.2690891623497009, + "step": 3752 + }, + { + "epoch": 0.9966803877307131, + "grad_norm": 1.1385088428140973, + "learning_rate": 1.0887958953229349e-05, + "loss": 0.25555333495140076, + "step": 3753 + }, + { + "epoch": 0.996945956712256, + "grad_norm": 1.1617836993376212, + "learning_rate": 1.088358544657647e-05, + "loss": 0.27788421511650085, + "step": 3754 + }, + { + "epoch": 0.997211525693799, + "grad_norm": 1.0981105518173184, + "learning_rate": 1.0879211769578734e-05, + "loss": 0.2566586136817932, + "step": 3755 + }, + { + "epoch": 0.997477094675342, + "grad_norm": 1.1742409056404244, + "learning_rate": 1.0874837923079339e-05, + "loss": 0.3028980493545532, + "step": 3756 + }, + { + "epoch": 0.9977426636568849, + "grad_norm": 1.151070664269376, + "learning_rate": 1.0870463907921512e-05, + "loss": 0.30244824290275574, + "step": 3757 + }, + { + "epoch": 0.9980082326384279, + "grad_norm": 1.0175517300218122, + "learning_rate": 1.086608972494851e-05, + "loss": 0.2610962390899658, + "step": 3758 + }, + { + "epoch": 0.9982738016199708, + "grad_norm": 1.1587347636182326, + "learning_rate": 1.0861715375003623e-05, + "loss": 0.2733536660671234, + "step": 3759 + }, + { + "epoch": 0.9985393706015138, + "grad_norm": 1.094010099730521, + "learning_rate": 1.0857340858930175e-05, + "loss": 0.2915020287036896, + "step": 3760 + }, + { + "epoch": 0.9988049395830567, + "grad_norm": 1.1164899423303463, + "learning_rate": 1.085296617757152e-05, + "loss": 0.2940186560153961, + "step": 3761 + }, + { + "epoch": 0.9990705085645997, + "grad_norm": 1.1441195343158572, + "learning_rate": 1.0848591331771045e-05, + "loss": 0.3002738952636719, + "step": 3762 + }, + { + "epoch": 0.9993360775461426, + "grad_norm": 1.0530840422742196, + "learning_rate": 1.0844216322372172e-05, + "loss": 0.284588485956192, + "step": 3763 + }, + { + "epoch": 0.9996016465276856, + "grad_norm": 1.0971261053209735, + "learning_rate": 1.0839841150218347e-05, + "loss": 0.29395923018455505, + "step": 3764 + }, + { + "epoch": 0.9998672155092285, + "grad_norm": 1.1355876604442514, + "learning_rate": 1.083546581615305e-05, + "loss": 0.2574613094329834, + "step": 3765 + }, + { + "epoch": 1.0, + "grad_norm": 1.535375625820537, + "learning_rate": 1.0831090321019801e-05, + "loss": 0.177712082862854, + "step": 3766 + }, + { + "epoch": 1.000265568981543, + "grad_norm": 1.1101315935040728, + "learning_rate": 1.0826714665662139e-05, + "loss": 0.29758381843566895, + "step": 3767 + }, + { + "epoch": 1.000531137963086, + "grad_norm": 1.055973006911073, + "learning_rate": 1.0822338850923644e-05, + "loss": 0.23377545177936554, + "step": 3768 + }, + { + "epoch": 1.0007967069446289, + "grad_norm": 1.1573191222761028, + "learning_rate": 1.0817962877647911e-05, + "loss": 0.2505020797252655, + "step": 3769 + }, + { + "epoch": 1.0010622759261718, + "grad_norm": 1.0395021899779042, + "learning_rate": 1.0813586746678584e-05, + "loss": 0.26122647523880005, + "step": 3770 + }, + { + "epoch": 1.0013278449077148, + "grad_norm": 1.1508778318464672, + "learning_rate": 1.0809210458859327e-05, + "loss": 0.27962177991867065, + "step": 3771 + }, + { + "epoch": 1.0015934138892577, + "grad_norm": 1.0479777844917506, + "learning_rate": 1.080483401503384e-05, + "loss": 0.21921640634536743, + "step": 3772 + }, + { + "epoch": 1.0018589828708007, + "grad_norm": 1.1277812491041006, + "learning_rate": 1.0800457416045845e-05, + "loss": 0.24623796343803406, + "step": 3773 + }, + { + "epoch": 1.0021245518523436, + "grad_norm": 1.259401152466985, + "learning_rate": 1.0796080662739098e-05, + "loss": 0.3130728006362915, + "step": 3774 + }, + { + "epoch": 1.0023901208338866, + "grad_norm": 1.1209083810179328, + "learning_rate": 1.0791703755957392e-05, + "loss": 0.2548064589500427, + "step": 3775 + }, + { + "epoch": 1.0026556898154295, + "grad_norm": 1.1167206534835417, + "learning_rate": 1.078732669654454e-05, + "loss": 0.20517288148403168, + "step": 3776 + }, + { + "epoch": 1.0029212587969725, + "grad_norm": 1.1055374385175383, + "learning_rate": 1.0782949485344385e-05, + "loss": 0.2634897530078888, + "step": 3777 + }, + { + "epoch": 1.0031868277785154, + "grad_norm": 1.3696848286677328, + "learning_rate": 1.0778572123200804e-05, + "loss": 0.2743223309516907, + "step": 3778 + }, + { + "epoch": 1.0034523967600584, + "grad_norm": 0.9930991365195264, + "learning_rate": 1.0774194610957695e-05, + "loss": 0.24595436453819275, + "step": 3779 + }, + { + "epoch": 1.0037179657416013, + "grad_norm": 1.0885778480679946, + "learning_rate": 1.0769816949459002e-05, + "loss": 0.2508128881454468, + "step": 3780 + }, + { + "epoch": 1.0039835347231443, + "grad_norm": 1.1243431648812525, + "learning_rate": 1.0765439139548677e-05, + "loss": 0.2326367199420929, + "step": 3781 + }, + { + "epoch": 1.0042491037046872, + "grad_norm": 1.1514050771182385, + "learning_rate": 1.0761061182070716e-05, + "loss": 0.2888404130935669, + "step": 3782 + }, + { + "epoch": 1.0045146726862302, + "grad_norm": 1.1399638718055765, + "learning_rate": 1.0756683077869133e-05, + "loss": 0.2804296612739563, + "step": 3783 + }, + { + "epoch": 1.0047802416677731, + "grad_norm": 1.1286027319524963, + "learning_rate": 1.0752304827787979e-05, + "loss": 0.2644953429698944, + "step": 3784 + }, + { + "epoch": 1.005045810649316, + "grad_norm": 1.2396532451569051, + "learning_rate": 1.0747926432671323e-05, + "loss": 0.297788143157959, + "step": 3785 + }, + { + "epoch": 1.005311379630859, + "grad_norm": 1.065071455363874, + "learning_rate": 1.0743547893363276e-05, + "loss": 0.2644156515598297, + "step": 3786 + }, + { + "epoch": 1.005576948612402, + "grad_norm": 1.1640867578019738, + "learning_rate": 1.073916921070796e-05, + "loss": 0.23818905651569366, + "step": 3787 + }, + { + "epoch": 1.005842517593945, + "grad_norm": 1.11872081222192, + "learning_rate": 1.0734790385549538e-05, + "loss": 0.2544933259487152, + "step": 3788 + }, + { + "epoch": 1.006108086575488, + "grad_norm": 1.0836442452511366, + "learning_rate": 1.0730411418732198e-05, + "loss": 0.2569275498390198, + "step": 3789 + }, + { + "epoch": 1.0063736555570308, + "grad_norm": 1.0348585374954582, + "learning_rate": 1.0726032311100153e-05, + "loss": 0.2248159945011139, + "step": 3790 + }, + { + "epoch": 1.0066392245385738, + "grad_norm": 1.1242207493876892, + "learning_rate": 1.072165306349764e-05, + "loss": 0.25541940331459045, + "step": 3791 + }, + { + "epoch": 1.0069047935201167, + "grad_norm": 9.328291099250833, + "learning_rate": 1.0717273676768924e-05, + "loss": 0.24429568648338318, + "step": 3792 + }, + { + "epoch": 1.0071703625016597, + "grad_norm": 1.0574884647737486, + "learning_rate": 1.0712894151758306e-05, + "loss": 0.2586621344089508, + "step": 3793 + }, + { + "epoch": 1.0074359314832027, + "grad_norm": 1.165205157800888, + "learning_rate": 1.0708514489310103e-05, + "loss": 0.28685104846954346, + "step": 3794 + }, + { + "epoch": 1.0077015004647458, + "grad_norm": 1.1536672746294196, + "learning_rate": 1.0704134690268661e-05, + "loss": 0.2847924530506134, + "step": 3795 + }, + { + "epoch": 1.0079670694462888, + "grad_norm": 1.1168453704329862, + "learning_rate": 1.0699754755478358e-05, + "loss": 0.24646440148353577, + "step": 3796 + }, + { + "epoch": 1.0082326384278317, + "grad_norm": 1.217438590106057, + "learning_rate": 1.0695374685783586e-05, + "loss": 0.22286385297775269, + "step": 3797 + }, + { + "epoch": 1.0084982074093747, + "grad_norm": 1.1352166249232278, + "learning_rate": 1.069099448202878e-05, + "loss": 0.2524179518222809, + "step": 3798 + }, + { + "epoch": 1.0087637763909176, + "grad_norm": 1.109981913009372, + "learning_rate": 1.0686614145058387e-05, + "loss": 0.2625758647918701, + "step": 3799 + }, + { + "epoch": 1.0090293453724606, + "grad_norm": 1.0622342238121125, + "learning_rate": 1.0682233675716884e-05, + "loss": 0.25318068265914917, + "step": 3800 + }, + { + "epoch": 1.0092949143540035, + "grad_norm": 1.073699024276181, + "learning_rate": 1.0677853074848774e-05, + "loss": 0.24224570393562317, + "step": 3801 + }, + { + "epoch": 1.0095604833355465, + "grad_norm": 1.1995813349182267, + "learning_rate": 1.0673472343298588e-05, + "loss": 0.28595417737960815, + "step": 3802 + }, + { + "epoch": 1.0098260523170894, + "grad_norm": 1.1558738404506108, + "learning_rate": 1.0669091481910874e-05, + "loss": 0.26894015073776245, + "step": 3803 + }, + { + "epoch": 1.0100916212986324, + "grad_norm": 1.0901744125075639, + "learning_rate": 1.0664710491530214e-05, + "loss": 0.2605208158493042, + "step": 3804 + }, + { + "epoch": 1.0103571902801753, + "grad_norm": 1.082458382717597, + "learning_rate": 1.0660329373001212e-05, + "loss": 0.2595113515853882, + "step": 3805 + }, + { + "epoch": 1.0106227592617183, + "grad_norm": 1.2467081294979763, + "learning_rate": 1.0655948127168494e-05, + "loss": 0.27478674054145813, + "step": 3806 + }, + { + "epoch": 1.0108883282432612, + "grad_norm": 1.0742167098010935, + "learning_rate": 1.0651566754876715e-05, + "loss": 0.2587064504623413, + "step": 3807 + }, + { + "epoch": 1.0111538972248042, + "grad_norm": 1.0593019665426413, + "learning_rate": 1.064718525697055e-05, + "loss": 0.2420537769794464, + "step": 3808 + }, + { + "epoch": 1.0114194662063472, + "grad_norm": 1.1660072059036033, + "learning_rate": 1.0642803634294699e-05, + "loss": 0.29424652457237244, + "step": 3809 + }, + { + "epoch": 1.01168503518789, + "grad_norm": 1.0902934718743655, + "learning_rate": 1.0638421887693887e-05, + "loss": 0.25162142515182495, + "step": 3810 + }, + { + "epoch": 1.011950604169433, + "grad_norm": 1.1456242703963635, + "learning_rate": 1.0634040018012865e-05, + "loss": 0.25661247968673706, + "step": 3811 + }, + { + "epoch": 1.012216173150976, + "grad_norm": 1.0060634238068926, + "learning_rate": 1.0629658026096408e-05, + "loss": 0.2042091339826584, + "step": 3812 + }, + { + "epoch": 1.012481742132519, + "grad_norm": 1.0129340658577524, + "learning_rate": 1.0625275912789307e-05, + "loss": 0.22496266663074493, + "step": 3813 + }, + { + "epoch": 1.012747311114062, + "grad_norm": 1.1382961966722176, + "learning_rate": 1.0620893678936385e-05, + "loss": 0.23609521985054016, + "step": 3814 + }, + { + "epoch": 1.0130128800956049, + "grad_norm": 1.2645443214744188, + "learning_rate": 1.0616511325382486e-05, + "loss": 0.2561722993850708, + "step": 3815 + }, + { + "epoch": 1.0132784490771478, + "grad_norm": 1.1379816472778304, + "learning_rate": 1.0612128852972474e-05, + "loss": 0.2617529630661011, + "step": 3816 + }, + { + "epoch": 1.0135440180586908, + "grad_norm": 1.1862833237483508, + "learning_rate": 1.060774626255124e-05, + "loss": 0.2633543014526367, + "step": 3817 + }, + { + "epoch": 1.0138095870402337, + "grad_norm": 1.0263666085354948, + "learning_rate": 1.0603363554963693e-05, + "loss": 0.19401729106903076, + "step": 3818 + }, + { + "epoch": 1.0140751560217767, + "grad_norm": 1.0891094169836097, + "learning_rate": 1.0598980731054765e-05, + "loss": 0.2583369016647339, + "step": 3819 + }, + { + "epoch": 1.0143407250033196, + "grad_norm": 1.1826598806695992, + "learning_rate": 1.0594597791669419e-05, + "loss": 0.26138922572135925, + "step": 3820 + }, + { + "epoch": 1.0146062939848626, + "grad_norm": 1.1580137447688548, + "learning_rate": 1.0590214737652632e-05, + "loss": 0.2506800591945648, + "step": 3821 + }, + { + "epoch": 1.0148718629664055, + "grad_norm": 1.032579662550809, + "learning_rate": 1.0585831569849405e-05, + "loss": 0.21569974720478058, + "step": 3822 + }, + { + "epoch": 1.0151374319479485, + "grad_norm": 1.37079648056154, + "learning_rate": 1.0581448289104759e-05, + "loss": 0.2765602767467499, + "step": 3823 + }, + { + "epoch": 1.0154030009294914, + "grad_norm": 1.2046968903946047, + "learning_rate": 1.0577064896263743e-05, + "loss": 0.25180384516716003, + "step": 3824 + }, + { + "epoch": 1.0156685699110344, + "grad_norm": 1.0796182560924539, + "learning_rate": 1.0572681392171417e-05, + "loss": 0.24164071679115295, + "step": 3825 + }, + { + "epoch": 1.0159341388925773, + "grad_norm": 1.1523354919316235, + "learning_rate": 1.0568297777672875e-05, + "loss": 0.24206972122192383, + "step": 3826 + }, + { + "epoch": 1.0161997078741203, + "grad_norm": 1.115771237946875, + "learning_rate": 1.0563914053613227e-05, + "loss": 0.24563468992710114, + "step": 3827 + }, + { + "epoch": 1.0164652768556632, + "grad_norm": 1.121826691352643, + "learning_rate": 1.0559530220837593e-05, + "loss": 0.23226243257522583, + "step": 3828 + }, + { + "epoch": 1.0167308458372062, + "grad_norm": 1.4499652400392462, + "learning_rate": 1.0555146280191137e-05, + "loss": 0.2245083749294281, + "step": 3829 + }, + { + "epoch": 1.0169964148187491, + "grad_norm": 1.1230707875328865, + "learning_rate": 1.0550762232519023e-05, + "loss": 0.24455049633979797, + "step": 3830 + }, + { + "epoch": 1.017261983800292, + "grad_norm": 1.1434011419253403, + "learning_rate": 1.0546378078666448e-05, + "loss": 0.2540651857852936, + "step": 3831 + }, + { + "epoch": 1.017527552781835, + "grad_norm": 1.222189193306495, + "learning_rate": 1.0541993819478622e-05, + "loss": 0.23392565548419952, + "step": 3832 + }, + { + "epoch": 1.017793121763378, + "grad_norm": 1.239236731837986, + "learning_rate": 1.053760945580078e-05, + "loss": 0.21601927280426025, + "step": 3833 + }, + { + "epoch": 1.018058690744921, + "grad_norm": 1.1697918037357793, + "learning_rate": 1.0533224988478176e-05, + "loss": 0.24622616171836853, + "step": 3834 + }, + { + "epoch": 1.018324259726464, + "grad_norm": 1.186224891573799, + "learning_rate": 1.0528840418356086e-05, + "loss": 0.2774650752544403, + "step": 3835 + }, + { + "epoch": 1.0185898287080069, + "grad_norm": 1.1218094293898884, + "learning_rate": 1.0524455746279795e-05, + "loss": 0.22323890030384064, + "step": 3836 + }, + { + "epoch": 1.0188553976895498, + "grad_norm": 1.0569207532138136, + "learning_rate": 1.0520070973094622e-05, + "loss": 0.21901552379131317, + "step": 3837 + }, + { + "epoch": 1.0191209666710928, + "grad_norm": 1.1936231752235407, + "learning_rate": 1.0515686099645901e-05, + "loss": 0.3037784695625305, + "step": 3838 + }, + { + "epoch": 1.0193865356526357, + "grad_norm": 1.0847362828180318, + "learning_rate": 1.0511301126778984e-05, + "loss": 0.22658365964889526, + "step": 3839 + }, + { + "epoch": 1.0196521046341787, + "grad_norm": 1.09040618490447, + "learning_rate": 1.0506916055339237e-05, + "loss": 0.23144160211086273, + "step": 3840 + }, + { + "epoch": 1.0199176736157216, + "grad_norm": 1.28339134317777, + "learning_rate": 1.0502530886172055e-05, + "loss": 0.25658899545669556, + "step": 3841 + }, + { + "epoch": 1.0201832425972646, + "grad_norm": 0.9689646092731519, + "learning_rate": 1.0498145620122845e-05, + "loss": 0.19658756256103516, + "step": 3842 + }, + { + "epoch": 1.0204488115788075, + "grad_norm": 1.0949311372526576, + "learning_rate": 1.049376025803703e-05, + "loss": 0.19045208394527435, + "step": 3843 + }, + { + "epoch": 1.0207143805603505, + "grad_norm": 1.1626763108379607, + "learning_rate": 1.0489374800760066e-05, + "loss": 0.2577810287475586, + "step": 3844 + }, + { + "epoch": 1.0209799495418934, + "grad_norm": 1.1521055149329589, + "learning_rate": 1.048498924913741e-05, + "loss": 0.2807403802871704, + "step": 3845 + }, + { + "epoch": 1.0212455185234364, + "grad_norm": 1.2275557893789377, + "learning_rate": 1.0480603604014545e-05, + "loss": 0.2710269093513489, + "step": 3846 + }, + { + "epoch": 1.0215110875049793, + "grad_norm": 1.173604136076929, + "learning_rate": 1.0476217866236974e-05, + "loss": 0.2560620903968811, + "step": 3847 + }, + { + "epoch": 1.0217766564865223, + "grad_norm": 1.1571778426612858, + "learning_rate": 1.0471832036650217e-05, + "loss": 0.2599894404411316, + "step": 3848 + }, + { + "epoch": 1.0220422254680652, + "grad_norm": 1.1339420848197217, + "learning_rate": 1.046744611609981e-05, + "loss": 0.2411944717168808, + "step": 3849 + }, + { + "epoch": 1.0223077944496084, + "grad_norm": 1.1528658942490468, + "learning_rate": 1.0463060105431303e-05, + "loss": 0.25216251611709595, + "step": 3850 + }, + { + "epoch": 1.0225733634311513, + "grad_norm": 1.1884423925105638, + "learning_rate": 1.0458674005490263e-05, + "loss": 0.255629301071167, + "step": 3851 + }, + { + "epoch": 1.0228389324126943, + "grad_norm": 1.0777718220336832, + "learning_rate": 1.0454287817122291e-05, + "loss": 0.24032849073410034, + "step": 3852 + }, + { + "epoch": 1.0231045013942373, + "grad_norm": 1.1154013609024198, + "learning_rate": 1.0449901541172983e-05, + "loss": 0.23188306391239166, + "step": 3853 + }, + { + "epoch": 1.0233700703757802, + "grad_norm": 1.149374478972437, + "learning_rate": 1.0445515178487965e-05, + "loss": 0.2718146741390228, + "step": 3854 + }, + { + "epoch": 1.0236356393573232, + "grad_norm": 1.460691184866812, + "learning_rate": 1.0441128729912876e-05, + "loss": 0.30279839038848877, + "step": 3855 + }, + { + "epoch": 1.023901208338866, + "grad_norm": 1.0711762201816422, + "learning_rate": 1.0436742196293368e-05, + "loss": 0.2185024917125702, + "step": 3856 + }, + { + "epoch": 1.024166777320409, + "grad_norm": 1.2737960148140446, + "learning_rate": 1.0432355578475118e-05, + "loss": 0.2956481873989105, + "step": 3857 + }, + { + "epoch": 1.024432346301952, + "grad_norm": 1.1913794327080105, + "learning_rate": 1.0427968877303809e-05, + "loss": 0.28460678458213806, + "step": 3858 + }, + { + "epoch": 1.024697915283495, + "grad_norm": 1.1716718579119476, + "learning_rate": 1.0423582093625146e-05, + "loss": 0.24597057700157166, + "step": 3859 + }, + { + "epoch": 1.024963484265038, + "grad_norm": 0.987642591779768, + "learning_rate": 1.0419195228284856e-05, + "loss": 0.23986583948135376, + "step": 3860 + }, + { + "epoch": 1.0252290532465809, + "grad_norm": 1.0867576400643644, + "learning_rate": 1.0414808282128668e-05, + "loss": 0.2489446997642517, + "step": 3861 + }, + { + "epoch": 1.0254946222281238, + "grad_norm": 1.1200031637603385, + "learning_rate": 1.0410421256002334e-05, + "loss": 0.26777884364128113, + "step": 3862 + }, + { + "epoch": 1.0257601912096668, + "grad_norm": 1.1645962699086565, + "learning_rate": 1.0406034150751625e-05, + "loss": 0.23506489396095276, + "step": 3863 + }, + { + "epoch": 1.0260257601912097, + "grad_norm": 1.1861093965134106, + "learning_rate": 1.040164696722232e-05, + "loss": 0.2526484429836273, + "step": 3864 + }, + { + "epoch": 1.0262913291727527, + "grad_norm": 1.1320109702434422, + "learning_rate": 1.0397259706260216e-05, + "loss": 0.2179267853498459, + "step": 3865 + }, + { + "epoch": 1.0265568981542956, + "grad_norm": 1.0267487594121727, + "learning_rate": 1.0392872368711126e-05, + "loss": 0.2431088387966156, + "step": 3866 + }, + { + "epoch": 1.0268224671358386, + "grad_norm": 1.1394336459602463, + "learning_rate": 1.0388484955420877e-05, + "loss": 0.26101407408714294, + "step": 3867 + }, + { + "epoch": 1.0270880361173815, + "grad_norm": 1.0741553283028158, + "learning_rate": 1.0384097467235308e-05, + "loss": 0.23780573904514313, + "step": 3868 + }, + { + "epoch": 1.0273536050989245, + "grad_norm": 1.467981467949694, + "learning_rate": 1.0379709905000278e-05, + "loss": 0.2469894289970398, + "step": 3869 + }, + { + "epoch": 1.0276191740804674, + "grad_norm": 1.074989572738127, + "learning_rate": 1.0375322269561658e-05, + "loss": 0.21271926164627075, + "step": 3870 + }, + { + "epoch": 1.0278847430620104, + "grad_norm": 1.1192343716648714, + "learning_rate": 1.0370934561765331e-05, + "loss": 0.22995726764202118, + "step": 3871 + }, + { + "epoch": 1.0281503120435533, + "grad_norm": 1.2051770162428763, + "learning_rate": 1.0366546782457196e-05, + "loss": 0.27448171377182007, + "step": 3872 + }, + { + "epoch": 1.0284158810250963, + "grad_norm": 1.232887313588547, + "learning_rate": 1.0362158932483165e-05, + "loss": 0.25459539890289307, + "step": 3873 + }, + { + "epoch": 1.0286814500066392, + "grad_norm": 1.1436601222318827, + "learning_rate": 1.0357771012689162e-05, + "loss": 0.23213380575180054, + "step": 3874 + }, + { + "epoch": 1.0289470189881822, + "grad_norm": 1.107979602389345, + "learning_rate": 1.0353383023921127e-05, + "loss": 0.2219776064157486, + "step": 3875 + }, + { + "epoch": 1.0292125879697251, + "grad_norm": 1.2445278934711803, + "learning_rate": 1.0348994967025012e-05, + "loss": 0.27059125900268555, + "step": 3876 + }, + { + "epoch": 1.029478156951268, + "grad_norm": 1.2314072238589235, + "learning_rate": 1.034460684284678e-05, + "loss": 0.26921501755714417, + "step": 3877 + }, + { + "epoch": 1.029743725932811, + "grad_norm": 1.153389282583655, + "learning_rate": 1.0340218652232419e-05, + "loss": 0.24727991223335266, + "step": 3878 + }, + { + "epoch": 1.030009294914354, + "grad_norm": 1.2105369925319034, + "learning_rate": 1.0335830396027912e-05, + "loss": 0.26276054978370667, + "step": 3879 + }, + { + "epoch": 1.030274863895897, + "grad_norm": 1.1222835146983237, + "learning_rate": 1.0331442075079268e-05, + "loss": 0.25906458497047424, + "step": 3880 + }, + { + "epoch": 1.03054043287744, + "grad_norm": 1.1936099182612667, + "learning_rate": 1.0327053690232498e-05, + "loss": 0.2708794176578522, + "step": 3881 + }, + { + "epoch": 1.0308060018589829, + "grad_norm": 1.1283814494585969, + "learning_rate": 1.0322665242333634e-05, + "loss": 0.24968653917312622, + "step": 3882 + }, + { + "epoch": 1.0310715708405258, + "grad_norm": 1.1912763351930955, + "learning_rate": 1.0318276732228716e-05, + "loss": 0.2669135332107544, + "step": 3883 + }, + { + "epoch": 1.0313371398220688, + "grad_norm": 1.0733368423352447, + "learning_rate": 1.0313888160763799e-05, + "loss": 0.24173730611801147, + "step": 3884 + }, + { + "epoch": 1.0316027088036117, + "grad_norm": 1.4084549111395024, + "learning_rate": 1.0309499528784948e-05, + "loss": 0.27513059973716736, + "step": 3885 + }, + { + "epoch": 1.0318682777851547, + "grad_norm": 1.163470416419209, + "learning_rate": 1.0305110837138235e-05, + "loss": 0.2512688934803009, + "step": 3886 + }, + { + "epoch": 1.0321338467666976, + "grad_norm": 1.100016135139411, + "learning_rate": 1.0300722086669753e-05, + "loss": 0.2584962844848633, + "step": 3887 + }, + { + "epoch": 1.0323994157482406, + "grad_norm": 1.1125458904355436, + "learning_rate": 1.0296333278225599e-05, + "loss": 0.23692303895950317, + "step": 3888 + }, + { + "epoch": 1.0326649847297835, + "grad_norm": 1.1981051682884363, + "learning_rate": 1.0291944412651884e-05, + "loss": 0.2570871114730835, + "step": 3889 + }, + { + "epoch": 1.0329305537113265, + "grad_norm": 1.1839354606788588, + "learning_rate": 1.028755549079473e-05, + "loss": 0.2896367609500885, + "step": 3890 + }, + { + "epoch": 1.0331961226928694, + "grad_norm": 0.958593784491898, + "learning_rate": 1.0283166513500267e-05, + "loss": 0.19990365207195282, + "step": 3891 + }, + { + "epoch": 1.0334616916744124, + "grad_norm": 1.1157517117826752, + "learning_rate": 1.0278777481614639e-05, + "loss": 0.25235646963119507, + "step": 3892 + }, + { + "epoch": 1.0337272606559553, + "grad_norm": 1.1808927381569394, + "learning_rate": 1.0274388395984003e-05, + "loss": 0.23675012588500977, + "step": 3893 + }, + { + "epoch": 1.0339928296374983, + "grad_norm": 1.1370597202642294, + "learning_rate": 1.026999925745452e-05, + "loss": 0.250516414642334, + "step": 3894 + }, + { + "epoch": 1.0342583986190412, + "grad_norm": 1.0692414219621886, + "learning_rate": 1.0265610066872365e-05, + "loss": 0.24573490023612976, + "step": 3895 + }, + { + "epoch": 1.0345239676005842, + "grad_norm": 1.085358990363196, + "learning_rate": 1.026122082508372e-05, + "loss": 0.2473086714744568, + "step": 3896 + }, + { + "epoch": 1.0347895365821271, + "grad_norm": 1.162338198859519, + "learning_rate": 1.0256831532934783e-05, + "loss": 0.26546406745910645, + "step": 3897 + }, + { + "epoch": 1.03505510556367, + "grad_norm": 1.1034436628854154, + "learning_rate": 1.0252442191271754e-05, + "loss": 0.2565246522426605, + "step": 3898 + }, + { + "epoch": 1.035320674545213, + "grad_norm": 1.0272875416109402, + "learning_rate": 1.0248052800940846e-05, + "loss": 0.24923476576805115, + "step": 3899 + }, + { + "epoch": 1.035586243526756, + "grad_norm": 1.1519345059696067, + "learning_rate": 1.0243663362788286e-05, + "loss": 0.3079240322113037, + "step": 3900 + }, + { + "epoch": 1.035851812508299, + "grad_norm": 1.0586971174066726, + "learning_rate": 1.0239273877660302e-05, + "loss": 0.2482951581478119, + "step": 3901 + }, + { + "epoch": 1.036117381489842, + "grad_norm": 1.1495296797401515, + "learning_rate": 1.0234884346403138e-05, + "loss": 0.2626204192638397, + "step": 3902 + }, + { + "epoch": 1.0363829504713848, + "grad_norm": 1.0578834148114886, + "learning_rate": 1.023049476986304e-05, + "loss": 0.23181654512882233, + "step": 3903 + }, + { + "epoch": 1.0366485194529278, + "grad_norm": 1.2527800012652353, + "learning_rate": 1.0226105148886272e-05, + "loss": 0.29164040088653564, + "step": 3904 + }, + { + "epoch": 1.0369140884344707, + "grad_norm": 1.034136654365203, + "learning_rate": 1.0221715484319094e-05, + "loss": 0.22025801241397858, + "step": 3905 + }, + { + "epoch": 1.0371796574160137, + "grad_norm": 1.1162047929812215, + "learning_rate": 1.021732577700779e-05, + "loss": 0.2819385826587677, + "step": 3906 + }, + { + "epoch": 1.0374452263975567, + "grad_norm": 1.0524498644463125, + "learning_rate": 1.0212936027798637e-05, + "loss": 0.24709002673625946, + "step": 3907 + }, + { + "epoch": 1.0377107953790998, + "grad_norm": 0.9984579723832369, + "learning_rate": 1.0208546237537928e-05, + "loss": 0.22570034861564636, + "step": 3908 + }, + { + "epoch": 1.0379763643606428, + "grad_norm": 1.1543900299803864, + "learning_rate": 1.0204156407071964e-05, + "loss": 0.25642865896224976, + "step": 3909 + }, + { + "epoch": 1.0382419333421857, + "grad_norm": 1.1657404882715603, + "learning_rate": 1.0199766537247053e-05, + "loss": 0.25970256328582764, + "step": 3910 + }, + { + "epoch": 1.0385075023237287, + "grad_norm": 1.1347864223586095, + "learning_rate": 1.019537662890951e-05, + "loss": 0.2560003101825714, + "step": 3911 + }, + { + "epoch": 1.0387730713052716, + "grad_norm": 1.3160565196765366, + "learning_rate": 1.0190986682905656e-05, + "loss": 0.28138649463653564, + "step": 3912 + }, + { + "epoch": 1.0390386402868146, + "grad_norm": 1.4353879235637104, + "learning_rate": 1.0186596700081825e-05, + "loss": 0.23531222343444824, + "step": 3913 + }, + { + "epoch": 1.0393042092683575, + "grad_norm": 1.1850676655471586, + "learning_rate": 1.018220668128435e-05, + "loss": 0.24912862479686737, + "step": 3914 + }, + { + "epoch": 1.0395697782499005, + "grad_norm": 1.0811585337632708, + "learning_rate": 1.0177816627359575e-05, + "loss": 0.24188724160194397, + "step": 3915 + }, + { + "epoch": 1.0398353472314434, + "grad_norm": 1.2093489820950423, + "learning_rate": 1.0173426539153853e-05, + "loss": 0.2709474563598633, + "step": 3916 + }, + { + "epoch": 1.0401009162129864, + "grad_norm": 1.1793292324294091, + "learning_rate": 1.0169036417513538e-05, + "loss": 0.2400204837322235, + "step": 3917 + }, + { + "epoch": 1.0403664851945293, + "grad_norm": 1.0489256907825586, + "learning_rate": 1.0164646263284993e-05, + "loss": 0.2687132954597473, + "step": 3918 + }, + { + "epoch": 1.0406320541760723, + "grad_norm": 1.1628887826217675, + "learning_rate": 1.0160256077314592e-05, + "loss": 0.25139346718788147, + "step": 3919 + }, + { + "epoch": 1.0408976231576152, + "grad_norm": 1.1762633281473511, + "learning_rate": 1.0155865860448712e-05, + "loss": 0.25873464345932007, + "step": 3920 + }, + { + "epoch": 1.0411631921391582, + "grad_norm": 1.1207165962030725, + "learning_rate": 1.0151475613533732e-05, + "loss": 0.2510434687137604, + "step": 3921 + }, + { + "epoch": 1.0414287611207012, + "grad_norm": 1.2260247662339232, + "learning_rate": 1.0147085337416036e-05, + "loss": 0.24567106366157532, + "step": 3922 + }, + { + "epoch": 1.041694330102244, + "grad_norm": 1.1642096823951156, + "learning_rate": 1.0142695032942024e-05, + "loss": 0.25028282403945923, + "step": 3923 + }, + { + "epoch": 1.041959899083787, + "grad_norm": 1.140963361472911, + "learning_rate": 1.0138304700958096e-05, + "loss": 0.23542484641075134, + "step": 3924 + }, + { + "epoch": 1.04222546806533, + "grad_norm": 1.2475887570620718, + "learning_rate": 1.0133914342310649e-05, + "loss": 0.28974449634552, + "step": 3925 + }, + { + "epoch": 1.042491037046873, + "grad_norm": 1.0648736453755918, + "learning_rate": 1.0129523957846097e-05, + "loss": 0.23417247831821442, + "step": 3926 + }, + { + "epoch": 1.042756606028416, + "grad_norm": 1.1427047582178407, + "learning_rate": 1.0125133548410852e-05, + "loss": 0.23247018456459045, + "step": 3927 + }, + { + "epoch": 1.0430221750099589, + "grad_norm": 1.1496713132119072, + "learning_rate": 1.0120743114851337e-05, + "loss": 0.23860129714012146, + "step": 3928 + }, + { + "epoch": 1.0432877439915018, + "grad_norm": 1.1567405333157526, + "learning_rate": 1.0116352658013973e-05, + "loss": 0.2609105706214905, + "step": 3929 + }, + { + "epoch": 1.0435533129730448, + "grad_norm": 1.2453984448185509, + "learning_rate": 1.0111962178745187e-05, + "loss": 0.2559507489204407, + "step": 3930 + }, + { + "epoch": 1.0438188819545877, + "grad_norm": 1.2247288020965454, + "learning_rate": 1.0107571677891415e-05, + "loss": 0.2708527147769928, + "step": 3931 + }, + { + "epoch": 1.0440844509361307, + "grad_norm": 1.2373037230453465, + "learning_rate": 1.0103181156299091e-05, + "loss": 0.25884875655174255, + "step": 3932 + }, + { + "epoch": 1.0443500199176736, + "grad_norm": 1.3022673165052032, + "learning_rate": 1.0098790614814658e-05, + "loss": 0.2631877660751343, + "step": 3933 + }, + { + "epoch": 1.0446155888992166, + "grad_norm": 1.0267097797291302, + "learning_rate": 1.0094400054284559e-05, + "loss": 0.27179086208343506, + "step": 3934 + }, + { + "epoch": 1.0448811578807595, + "grad_norm": 2.1081344450494144, + "learning_rate": 1.0090009475555245e-05, + "loss": 0.21690386533737183, + "step": 3935 + }, + { + "epoch": 1.0451467268623025, + "grad_norm": 1.0188398651288513, + "learning_rate": 1.0085618879473162e-05, + "loss": 0.20192815363407135, + "step": 3936 + }, + { + "epoch": 1.0454122958438454, + "grad_norm": 1.213624997308106, + "learning_rate": 1.0081228266884773e-05, + "loss": 0.2680777907371521, + "step": 3937 + }, + { + "epoch": 1.0456778648253884, + "grad_norm": 1.1871222610891168, + "learning_rate": 1.007683763863653e-05, + "loss": 0.2566579580307007, + "step": 3938 + }, + { + "epoch": 1.0459434338069313, + "grad_norm": 1.1229802475790265, + "learning_rate": 1.0072446995574895e-05, + "loss": 0.2508152723312378, + "step": 3939 + }, + { + "epoch": 1.0462090027884743, + "grad_norm": 1.0850640213400236, + "learning_rate": 1.0068056338546335e-05, + "loss": 0.2880190908908844, + "step": 3940 + }, + { + "epoch": 1.0464745717700172, + "grad_norm": 1.1129549761108044, + "learning_rate": 1.0063665668397316e-05, + "loss": 0.2646787464618683, + "step": 3941 + }, + { + "epoch": 1.0467401407515602, + "grad_norm": 1.1116528447502043, + "learning_rate": 1.0059274985974305e-05, + "loss": 0.2327616810798645, + "step": 3942 + }, + { + "epoch": 1.0470057097331031, + "grad_norm": 1.1644185595792014, + "learning_rate": 1.0054884292123778e-05, + "loss": 0.24756258726119995, + "step": 3943 + }, + { + "epoch": 1.047271278714646, + "grad_norm": 1.1010853288322209, + "learning_rate": 1.0050493587692207e-05, + "loss": 0.23657771944999695, + "step": 3944 + }, + { + "epoch": 1.047536847696189, + "grad_norm": 1.1386107444709148, + "learning_rate": 1.0046102873526068e-05, + "loss": 0.2541351616382599, + "step": 3945 + }, + { + "epoch": 1.047802416677732, + "grad_norm": 1.0912263009271301, + "learning_rate": 1.0041712150471839e-05, + "loss": 0.2330317348241806, + "step": 3946 + }, + { + "epoch": 1.048067985659275, + "grad_norm": 1.0696190454357721, + "learning_rate": 1.0037321419375997e-05, + "loss": 0.23411181569099426, + "step": 3947 + }, + { + "epoch": 1.048333554640818, + "grad_norm": 1.1223872975815399, + "learning_rate": 1.0032930681085028e-05, + "loss": 0.2605017125606537, + "step": 3948 + }, + { + "epoch": 1.0485991236223609, + "grad_norm": 1.1766579775240698, + "learning_rate": 1.0028539936445407e-05, + "loss": 0.28651514649391174, + "step": 3949 + }, + { + "epoch": 1.0488646926039038, + "grad_norm": 1.1469362905517786, + "learning_rate": 1.0024149186303628e-05, + "loss": 0.22912876307964325, + "step": 3950 + }, + { + "epoch": 1.0491302615854468, + "grad_norm": 1.206814749340921, + "learning_rate": 1.001975843150617e-05, + "loss": 0.24032847583293915, + "step": 3951 + }, + { + "epoch": 1.0493958305669897, + "grad_norm": 1.0089656289438405, + "learning_rate": 1.0015367672899521e-05, + "loss": 0.17826229333877563, + "step": 3952 + }, + { + "epoch": 1.0496613995485327, + "grad_norm": 1.1440301784208975, + "learning_rate": 1.0010976911330163e-05, + "loss": 0.2619745433330536, + "step": 3953 + }, + { + "epoch": 1.0499269685300756, + "grad_norm": 1.1124743886634039, + "learning_rate": 1.0006586147644585e-05, + "loss": 0.24104374647140503, + "step": 3954 + }, + { + "epoch": 1.0501925375116186, + "grad_norm": 1.2465051058358483, + "learning_rate": 1.0002195382689277e-05, + "loss": 0.22913998365402222, + "step": 3955 + }, + { + "epoch": 1.0504581064931615, + "grad_norm": 1.2288244416278613, + "learning_rate": 9.997804617310724e-06, + "loss": 0.2625126838684082, + "step": 3956 + }, + { + "epoch": 1.0507236754747045, + "grad_norm": 1.1016811290492863, + "learning_rate": 9.993413852355416e-06, + "loss": 0.23098430037498474, + "step": 3957 + }, + { + "epoch": 1.0509892444562474, + "grad_norm": 1.2581954843436995, + "learning_rate": 9.98902308866984e-06, + "loss": 0.2866731882095337, + "step": 3958 + }, + { + "epoch": 1.0512548134377904, + "grad_norm": 1.2595027481112393, + "learning_rate": 9.984632327100482e-06, + "loss": 0.2520306706428528, + "step": 3959 + }, + { + "epoch": 1.0515203824193333, + "grad_norm": 1.2731218614589663, + "learning_rate": 9.980241568493834e-06, + "loss": 0.29688766598701477, + "step": 3960 + }, + { + "epoch": 1.0517859514008763, + "grad_norm": 1.2865298416208544, + "learning_rate": 9.975850813696375e-06, + "loss": 0.2876695990562439, + "step": 3961 + }, + { + "epoch": 1.0520515203824194, + "grad_norm": 1.1190033835182807, + "learning_rate": 9.971460063554595e-06, + "loss": 0.2402629554271698, + "step": 3962 + }, + { + "epoch": 1.0523170893639624, + "grad_norm": 1.288030170241207, + "learning_rate": 9.967069318914977e-06, + "loss": 0.32080164551734924, + "step": 3963 + }, + { + "epoch": 1.0525826583455054, + "grad_norm": 1.3484684025161604, + "learning_rate": 9.962678580624008e-06, + "loss": 0.2642936110496521, + "step": 3964 + }, + { + "epoch": 1.0528482273270483, + "grad_norm": 1.1668064537758471, + "learning_rate": 9.958287849528163e-06, + "loss": 0.255870521068573, + "step": 3965 + }, + { + "epoch": 1.0531137963085913, + "grad_norm": 1.1779058124731279, + "learning_rate": 9.953897126473933e-06, + "loss": 0.2695184350013733, + "step": 3966 + }, + { + "epoch": 1.0533793652901342, + "grad_norm": 1.1937956388734083, + "learning_rate": 9.949506412307795e-06, + "loss": 0.24576464295387268, + "step": 3967 + }, + { + "epoch": 1.0536449342716772, + "grad_norm": 1.210893055599799, + "learning_rate": 9.945115707876224e-06, + "loss": 0.26517459750175476, + "step": 3968 + }, + { + "epoch": 1.05391050325322, + "grad_norm": 1.261309936483727, + "learning_rate": 9.940725014025696e-06, + "loss": 0.30468082427978516, + "step": 3969 + }, + { + "epoch": 1.054176072234763, + "grad_norm": 1.1007633858966879, + "learning_rate": 9.936334331602687e-06, + "loss": 0.25299298763275146, + "step": 3970 + }, + { + "epoch": 1.054441641216306, + "grad_norm": 1.1621642625136148, + "learning_rate": 9.931943661453668e-06, + "loss": 0.2659488320350647, + "step": 3971 + }, + { + "epoch": 1.054707210197849, + "grad_norm": 1.129768041847351, + "learning_rate": 9.92755300442511e-06, + "loss": 0.25957295298576355, + "step": 3972 + }, + { + "epoch": 1.054972779179392, + "grad_norm": 1.0969185518732962, + "learning_rate": 9.923162361363476e-06, + "loss": 0.2416645884513855, + "step": 3973 + }, + { + "epoch": 1.0552383481609349, + "grad_norm": 1.1032067417924427, + "learning_rate": 9.91877173311523e-06, + "loss": 0.2627662122249603, + "step": 3974 + }, + { + "epoch": 1.0555039171424778, + "grad_norm": 1.1485553701369502, + "learning_rate": 9.91438112052684e-06, + "loss": 0.2876631021499634, + "step": 3975 + }, + { + "epoch": 1.0557694861240208, + "grad_norm": 1.1306607772682384, + "learning_rate": 9.90999052444476e-06, + "loss": 0.28336596488952637, + "step": 3976 + }, + { + "epoch": 1.0560350551055637, + "grad_norm": 1.266085815857313, + "learning_rate": 9.905599945715443e-06, + "loss": 0.2970484495162964, + "step": 3977 + }, + { + "epoch": 1.0563006240871067, + "grad_norm": 1.188464425479595, + "learning_rate": 9.901209385185345e-06, + "loss": 0.27202755212783813, + "step": 3978 + }, + { + "epoch": 1.0565661930686496, + "grad_norm": 1.0823738866829473, + "learning_rate": 9.896818843700912e-06, + "loss": 0.2702459990978241, + "step": 3979 + }, + { + "epoch": 1.0568317620501926, + "grad_norm": 1.2166105195755876, + "learning_rate": 9.89242832210859e-06, + "loss": 0.26057881116867065, + "step": 3980 + }, + { + "epoch": 1.0570973310317355, + "grad_norm": 1.1526398422075472, + "learning_rate": 9.888037821254816e-06, + "loss": 0.24006876349449158, + "step": 3981 + }, + { + "epoch": 1.0573629000132785, + "grad_norm": 1.0864441989704317, + "learning_rate": 9.883647341986032e-06, + "loss": 0.2437625676393509, + "step": 3982 + }, + { + "epoch": 1.0576284689948214, + "grad_norm": 1.0572722810626467, + "learning_rate": 9.879256885148666e-06, + "loss": 0.24256819486618042, + "step": 3983 + }, + { + "epoch": 1.0578940379763644, + "grad_norm": 1.2008491436753201, + "learning_rate": 9.874866451589151e-06, + "loss": 0.2714581787586212, + "step": 3984 + }, + { + "epoch": 1.0581596069579073, + "grad_norm": 1.1859043120388024, + "learning_rate": 9.870476042153907e-06, + "loss": 0.30309075117111206, + "step": 3985 + }, + { + "epoch": 1.0584251759394503, + "grad_norm": 1.3001941243887445, + "learning_rate": 9.866085657689355e-06, + "loss": 0.2938288450241089, + "step": 3986 + }, + { + "epoch": 1.0586907449209932, + "grad_norm": 1.1041962963159588, + "learning_rate": 9.86169529904191e-06, + "loss": 0.23748518526554108, + "step": 3987 + }, + { + "epoch": 1.0589563139025362, + "grad_norm": 1.2345572480055271, + "learning_rate": 9.857304967057977e-06, + "loss": 0.2883969247341156, + "step": 3988 + }, + { + "epoch": 1.0592218828840791, + "grad_norm": 1.0871048681541509, + "learning_rate": 9.852914662583966e-06, + "loss": 0.28301289677619934, + "step": 3989 + }, + { + "epoch": 1.059487451865622, + "grad_norm": 1.0733060702724175, + "learning_rate": 9.848524386466273e-06, + "loss": 0.22616548836231232, + "step": 3990 + }, + { + "epoch": 1.059753020847165, + "grad_norm": 1.06530549901144, + "learning_rate": 9.844134139551291e-06, + "loss": 0.2282804250717163, + "step": 3991 + }, + { + "epoch": 1.060018589828708, + "grad_norm": 1.154557745213229, + "learning_rate": 9.839743922685408e-06, + "loss": 0.2407834678888321, + "step": 3992 + }, + { + "epoch": 1.060284158810251, + "grad_norm": 1.0504099183304738, + "learning_rate": 9.835353736715007e-06, + "loss": 0.22690361738204956, + "step": 3993 + }, + { + "epoch": 1.060549727791794, + "grad_norm": 1.529267187296219, + "learning_rate": 9.830963582486465e-06, + "loss": 0.23291411995887756, + "step": 3994 + }, + { + "epoch": 1.0608152967733369, + "grad_norm": 1.0804914844168854, + "learning_rate": 9.82657346084615e-06, + "loss": 0.24524198472499847, + "step": 3995 + }, + { + "epoch": 1.0610808657548798, + "grad_norm": 1.130929241291739, + "learning_rate": 9.822183372640426e-06, + "loss": 0.22087743878364563, + "step": 3996 + }, + { + "epoch": 1.0613464347364228, + "grad_norm": 1.1374060021264791, + "learning_rate": 9.817793318715652e-06, + "loss": 0.2459079772233963, + "step": 3997 + }, + { + "epoch": 1.0616120037179657, + "grad_norm": 1.1393890830478974, + "learning_rate": 9.813403299918178e-06, + "loss": 0.24429920315742493, + "step": 3998 + }, + { + "epoch": 1.0618775726995087, + "grad_norm": 1.140499707599593, + "learning_rate": 9.809013317094345e-06, + "loss": 0.2332335114479065, + "step": 3999 + }, + { + "epoch": 1.0621431416810516, + "grad_norm": 1.2157908167694267, + "learning_rate": 9.804623371090493e-06, + "loss": 0.2861659526824951, + "step": 4000 + }, + { + "epoch": 1.0624087106625946, + "grad_norm": 1.1293440606459217, + "learning_rate": 9.800233462752949e-06, + "loss": 0.22731532156467438, + "step": 4001 + }, + { + "epoch": 1.0626742796441375, + "grad_norm": 1.127775309467411, + "learning_rate": 9.795843592928036e-06, + "loss": 0.245025634765625, + "step": 4002 + }, + { + "epoch": 1.0629398486256805, + "grad_norm": 1.2380242649872155, + "learning_rate": 9.791453762462075e-06, + "loss": 0.2826273441314697, + "step": 4003 + }, + { + "epoch": 1.0632054176072234, + "grad_norm": 1.1330484645300947, + "learning_rate": 9.787063972201368e-06, + "loss": 0.24737229943275452, + "step": 4004 + }, + { + "epoch": 1.0634709865887664, + "grad_norm": 1.3814870803010457, + "learning_rate": 9.782674222992214e-06, + "loss": 0.23368477821350098, + "step": 4005 + }, + { + "epoch": 1.0637365555703093, + "grad_norm": 1.2631953536046527, + "learning_rate": 9.778284515680908e-06, + "loss": 0.2754492461681366, + "step": 4006 + }, + { + "epoch": 1.0640021245518523, + "grad_norm": 1.1906091191722363, + "learning_rate": 9.773894851113732e-06, + "loss": 0.2814168334007263, + "step": 4007 + }, + { + "epoch": 1.0642676935333952, + "grad_norm": 1.1594492512554253, + "learning_rate": 9.769505230136962e-06, + "loss": 0.25388047099113464, + "step": 4008 + }, + { + "epoch": 1.0645332625149382, + "grad_norm": 1.2618382745485697, + "learning_rate": 9.765115653596867e-06, + "loss": 0.25435230135917664, + "step": 4009 + }, + { + "epoch": 1.0647988314964811, + "grad_norm": 1.2251032153283614, + "learning_rate": 9.760726122339698e-06, + "loss": 0.265840083360672, + "step": 4010 + }, + { + "epoch": 1.065064400478024, + "grad_norm": 1.1297656349054435, + "learning_rate": 9.756336637211716e-06, + "loss": 0.2533451020717621, + "step": 4011 + }, + { + "epoch": 1.065329969459567, + "grad_norm": 1.0890158421111886, + "learning_rate": 9.751947199059155e-06, + "loss": 0.25214290618896484, + "step": 4012 + }, + { + "epoch": 1.06559553844111, + "grad_norm": 1.0603532415232781, + "learning_rate": 9.74755780872825e-06, + "loss": 0.25039419531822205, + "step": 4013 + }, + { + "epoch": 1.065861107422653, + "grad_norm": 1.0177623632775965, + "learning_rate": 9.74316846706522e-06, + "loss": 0.21251091361045837, + "step": 4014 + }, + { + "epoch": 1.066126676404196, + "grad_norm": 1.123294230398497, + "learning_rate": 9.738779174916281e-06, + "loss": 0.25898969173431396, + "step": 4015 + }, + { + "epoch": 1.0663922453857388, + "grad_norm": 1.1054663361669936, + "learning_rate": 9.734389933127639e-06, + "loss": 0.2655499577522278, + "step": 4016 + }, + { + "epoch": 1.0666578143672818, + "grad_norm": 1.1153507141873742, + "learning_rate": 9.730000742545485e-06, + "loss": 0.2221338450908661, + "step": 4017 + }, + { + "epoch": 1.0669233833488247, + "grad_norm": 1.1746716643835395, + "learning_rate": 9.725611604016002e-06, + "loss": 0.2567589581012726, + "step": 4018 + }, + { + "epoch": 1.0671889523303677, + "grad_norm": 1.1090772377521565, + "learning_rate": 9.721222518385361e-06, + "loss": 0.24440976977348328, + "step": 4019 + }, + { + "epoch": 1.0674545213119107, + "grad_norm": 1.061787642846094, + "learning_rate": 9.716833486499735e-06, + "loss": 0.2229192852973938, + "step": 4020 + }, + { + "epoch": 1.0677200902934538, + "grad_norm": 1.1014121727705226, + "learning_rate": 9.712444509205273e-06, + "loss": 0.26231470704078674, + "step": 4021 + }, + { + "epoch": 1.0679856592749968, + "grad_norm": 1.2531191320236732, + "learning_rate": 9.708055587348119e-06, + "loss": 0.25099092721939087, + "step": 4022 + }, + { + "epoch": 1.0682512282565397, + "grad_norm": 1.1402160070516023, + "learning_rate": 9.703666721774403e-06, + "loss": 0.22979633510112762, + "step": 4023 + }, + { + "epoch": 1.0685167972380827, + "grad_norm": 1.09571485621585, + "learning_rate": 9.699277913330252e-06, + "loss": 0.2361093908548355, + "step": 4024 + }, + { + "epoch": 1.0687823662196256, + "grad_norm": 1.0765448804717204, + "learning_rate": 9.694889162861768e-06, + "loss": 0.2390863001346588, + "step": 4025 + }, + { + "epoch": 1.0690479352011686, + "grad_norm": 1.2569917808844517, + "learning_rate": 9.690500471215057e-06, + "loss": 0.24917885661125183, + "step": 4026 + }, + { + "epoch": 1.0693135041827115, + "grad_norm": 1.1387127210628816, + "learning_rate": 9.686111839236206e-06, + "loss": 0.24215272068977356, + "step": 4027 + }, + { + "epoch": 1.0695790731642545, + "grad_norm": 1.2809085503832063, + "learning_rate": 9.681723267771284e-06, + "loss": 0.27874231338500977, + "step": 4028 + }, + { + "epoch": 1.0698446421457974, + "grad_norm": 1.1707122559783085, + "learning_rate": 9.677334757666368e-06, + "loss": 0.24076086282730103, + "step": 4029 + }, + { + "epoch": 1.0701102111273404, + "grad_norm": 1.1092369229920938, + "learning_rate": 9.672946309767504e-06, + "loss": 0.2444242238998413, + "step": 4030 + }, + { + "epoch": 1.0703757801088833, + "grad_norm": 1.2086874522857378, + "learning_rate": 9.668557924920735e-06, + "loss": 0.2737279236316681, + "step": 4031 + }, + { + "epoch": 1.0706413490904263, + "grad_norm": 1.1006436240463247, + "learning_rate": 9.664169603972091e-06, + "loss": 0.24105575680732727, + "step": 4032 + }, + { + "epoch": 1.0709069180719692, + "grad_norm": 1.336482466569566, + "learning_rate": 9.659781347767584e-06, + "loss": 0.27791836857795715, + "step": 4033 + }, + { + "epoch": 1.0711724870535122, + "grad_norm": 1.1518461528529822, + "learning_rate": 9.655393157153221e-06, + "loss": 0.255472868680954, + "step": 4034 + }, + { + "epoch": 1.0714380560350552, + "grad_norm": 1.371220848551681, + "learning_rate": 9.651005032974994e-06, + "loss": 0.2523707151412964, + "step": 4035 + }, + { + "epoch": 1.071703625016598, + "grad_norm": 1.235756547113907, + "learning_rate": 9.64661697607888e-06, + "loss": 0.24584606289863586, + "step": 4036 + }, + { + "epoch": 1.071969193998141, + "grad_norm": 1.1497174260677319, + "learning_rate": 9.64222898731084e-06, + "loss": 0.25182732939720154, + "step": 4037 + }, + { + "epoch": 1.072234762979684, + "grad_norm": 1.0822892740683951, + "learning_rate": 9.637841067516837e-06, + "loss": 0.254008412361145, + "step": 4038 + }, + { + "epoch": 1.072500331961227, + "grad_norm": 1.080204167750926, + "learning_rate": 9.633453217542806e-06, + "loss": 0.2314324826002121, + "step": 4039 + }, + { + "epoch": 1.07276590094277, + "grad_norm": 1.1139945732367915, + "learning_rate": 9.62906543823467e-06, + "loss": 0.2256058305501938, + "step": 4040 + }, + { + "epoch": 1.0730314699243129, + "grad_norm": 1.283214941862177, + "learning_rate": 9.624677730438344e-06, + "loss": 0.2577894330024719, + "step": 4041 + }, + { + "epoch": 1.0732970389058558, + "grad_norm": 1.0911199623079508, + "learning_rate": 9.620290094999723e-06, + "loss": 0.23520560562610626, + "step": 4042 + }, + { + "epoch": 1.0735626078873988, + "grad_norm": 1.1791405346126818, + "learning_rate": 9.615902532764695e-06, + "loss": 0.2472849190235138, + "step": 4043 + }, + { + "epoch": 1.0738281768689417, + "grad_norm": 1.2195787110249676, + "learning_rate": 9.611515044579128e-06, + "loss": 0.25053414702415466, + "step": 4044 + }, + { + "epoch": 1.0740937458504847, + "grad_norm": 1.1090102650773974, + "learning_rate": 9.607127631288879e-06, + "loss": 0.24229007959365845, + "step": 4045 + }, + { + "epoch": 1.0743593148320276, + "grad_norm": 1.4628298980675831, + "learning_rate": 9.602740293739786e-06, + "loss": 0.2793073058128357, + "step": 4046 + }, + { + "epoch": 1.0746248838135706, + "grad_norm": 1.225079236387791, + "learning_rate": 9.598353032777682e-06, + "loss": 0.24547399580478668, + "step": 4047 + }, + { + "epoch": 1.0748904527951135, + "grad_norm": 1.1980997957436126, + "learning_rate": 9.593965849248378e-06, + "loss": 0.2776937186717987, + "step": 4048 + }, + { + "epoch": 1.0751560217766565, + "grad_norm": 1.0781858695117066, + "learning_rate": 9.589578743997668e-06, + "loss": 0.22677727043628693, + "step": 4049 + }, + { + "epoch": 1.0754215907581994, + "grad_norm": 1.4867723677136682, + "learning_rate": 9.585191717871336e-06, + "loss": 0.23254704475402832, + "step": 4050 + }, + { + "epoch": 1.0756871597397424, + "grad_norm": 1.3243435003953368, + "learning_rate": 9.580804771715148e-06, + "loss": 0.2899828255176544, + "step": 4051 + }, + { + "epoch": 1.0759527287212853, + "grad_norm": 1.1397018772236696, + "learning_rate": 9.576417906374856e-06, + "loss": 0.24632850289344788, + "step": 4052 + }, + { + "epoch": 1.0762182977028283, + "grad_norm": 1.2322214200527608, + "learning_rate": 9.572031122696196e-06, + "loss": 0.2661561369895935, + "step": 4053 + }, + { + "epoch": 1.0764838666843712, + "grad_norm": 1.1394013200357536, + "learning_rate": 9.567644421524889e-06, + "loss": 0.22364279627799988, + "step": 4054 + }, + { + "epoch": 1.0767494356659142, + "grad_norm": 1.5026366502842776, + "learning_rate": 9.563257803706635e-06, + "loss": 0.26748427748680115, + "step": 4055 + }, + { + "epoch": 1.0770150046474571, + "grad_norm": 1.1794922225625246, + "learning_rate": 9.55887127008713e-06, + "loss": 0.22851283848285675, + "step": 4056 + }, + { + "epoch": 1.077280573629, + "grad_norm": 1.1340260741391435, + "learning_rate": 9.554484821512037e-06, + "loss": 0.2456260323524475, + "step": 4057 + }, + { + "epoch": 1.077546142610543, + "grad_norm": 1.2884657617459025, + "learning_rate": 9.55009845882702e-06, + "loss": 0.2556169629096985, + "step": 4058 + }, + { + "epoch": 1.077811711592086, + "grad_norm": 1.274618544457263, + "learning_rate": 9.545712182877714e-06, + "loss": 0.280727744102478, + "step": 4059 + }, + { + "epoch": 1.078077280573629, + "grad_norm": 1.1205087247319334, + "learning_rate": 9.54132599450974e-06, + "loss": 0.25315386056900024, + "step": 4060 + }, + { + "epoch": 1.078342849555172, + "grad_norm": 1.1990539773915618, + "learning_rate": 9.536939894568704e-06, + "loss": 0.21985477209091187, + "step": 4061 + }, + { + "epoch": 1.0786084185367149, + "grad_norm": 1.1575613416248978, + "learning_rate": 9.532553883900196e-06, + "loss": 0.24329043924808502, + "step": 4062 + }, + { + "epoch": 1.0788739875182578, + "grad_norm": 1.173950465827748, + "learning_rate": 9.528167963349786e-06, + "loss": 0.2362256497144699, + "step": 4063 + }, + { + "epoch": 1.0791395564998008, + "grad_norm": 1.1458704347110154, + "learning_rate": 9.523782133763027e-06, + "loss": 0.23685476183891296, + "step": 4064 + }, + { + "epoch": 1.0794051254813437, + "grad_norm": 1.2383774104342302, + "learning_rate": 9.519396395985456e-06, + "loss": 0.26232481002807617, + "step": 4065 + }, + { + "epoch": 1.0796706944628867, + "grad_norm": 1.2768574792534622, + "learning_rate": 9.515010750862594e-06, + "loss": 0.25196313858032227, + "step": 4066 + }, + { + "epoch": 1.0799362634444296, + "grad_norm": 1.082792256362845, + "learning_rate": 9.510625199239939e-06, + "loss": 0.22520464658737183, + "step": 4067 + }, + { + "epoch": 1.0802018324259726, + "grad_norm": 1.190229461562689, + "learning_rate": 9.506239741962971e-06, + "loss": 0.27422505617141724, + "step": 4068 + }, + { + "epoch": 1.0804674014075155, + "grad_norm": 1.3120430811123187, + "learning_rate": 9.50185437987716e-06, + "loss": 0.2646682560443878, + "step": 4069 + }, + { + "epoch": 1.0807329703890585, + "grad_norm": 1.3425819541318131, + "learning_rate": 9.497469113827949e-06, + "loss": 0.2661365866661072, + "step": 4070 + }, + { + "epoch": 1.0809985393706014, + "grad_norm": 1.1101351469883673, + "learning_rate": 9.493083944660766e-06, + "loss": 0.23156839609146118, + "step": 4071 + }, + { + "epoch": 1.0812641083521444, + "grad_norm": 1.1805541153651362, + "learning_rate": 9.488698873221021e-06, + "loss": 0.25353243947029114, + "step": 4072 + }, + { + "epoch": 1.0815296773336873, + "grad_norm": 1.2862671823918606, + "learning_rate": 9.484313900354099e-06, + "loss": 0.27488404512405396, + "step": 4073 + }, + { + "epoch": 1.0817952463152305, + "grad_norm": 1.4041005997261422, + "learning_rate": 9.479929026905378e-06, + "loss": 0.2580753564834595, + "step": 4074 + }, + { + "epoch": 1.0820608152967734, + "grad_norm": 1.1405056260482733, + "learning_rate": 9.475544253720206e-06, + "loss": 0.2425471544265747, + "step": 4075 + }, + { + "epoch": 1.0823263842783164, + "grad_norm": 1.2040355319488043, + "learning_rate": 9.471159581643918e-06, + "loss": 0.25268295407295227, + "step": 4076 + }, + { + "epoch": 1.0825919532598594, + "grad_norm": 1.1573228524057126, + "learning_rate": 9.466775011521825e-06, + "loss": 0.2683602571487427, + "step": 4077 + }, + { + "epoch": 1.0828575222414023, + "grad_norm": 1.1300610618916742, + "learning_rate": 9.462390544199221e-06, + "loss": 0.24945034086704254, + "step": 4078 + }, + { + "epoch": 1.0831230912229453, + "grad_norm": 1.1698494765527112, + "learning_rate": 9.458006180521379e-06, + "loss": 0.21784156560897827, + "step": 4079 + }, + { + "epoch": 1.0833886602044882, + "grad_norm": 1.136268907040887, + "learning_rate": 9.453621921333554e-06, + "loss": 0.22704020142555237, + "step": 4080 + }, + { + "epoch": 1.0836542291860312, + "grad_norm": 1.1373990713388034, + "learning_rate": 9.449237767480979e-06, + "loss": 0.2532106637954712, + "step": 4081 + }, + { + "epoch": 1.0839197981675741, + "grad_norm": 1.1568862012297532, + "learning_rate": 9.444853719808864e-06, + "loss": 0.27809134125709534, + "step": 4082 + }, + { + "epoch": 1.084185367149117, + "grad_norm": 1.2102387789201872, + "learning_rate": 9.440469779162407e-06, + "loss": 0.25704264640808105, + "step": 4083 + }, + { + "epoch": 1.08445093613066, + "grad_norm": 1.1827141084910668, + "learning_rate": 9.436085946386778e-06, + "loss": 0.2656276226043701, + "step": 4084 + }, + { + "epoch": 1.084716505112203, + "grad_norm": 1.256991317445651, + "learning_rate": 9.431702222327126e-06, + "loss": 0.277826726436615, + "step": 4085 + }, + { + "epoch": 1.084982074093746, + "grad_norm": 1.2975495041461134, + "learning_rate": 9.427318607828584e-06, + "loss": 0.24656976759433746, + "step": 4086 + }, + { + "epoch": 1.0852476430752889, + "grad_norm": 1.1974770836803283, + "learning_rate": 9.42293510373626e-06, + "loss": 0.2498110830783844, + "step": 4087 + }, + { + "epoch": 1.0855132120568318, + "grad_norm": 1.1492935678310237, + "learning_rate": 9.418551710895243e-06, + "loss": 0.24574093520641327, + "step": 4088 + }, + { + "epoch": 1.0857787810383748, + "grad_norm": 1.2274895872775384, + "learning_rate": 9.414168430150601e-06, + "loss": 0.25271761417388916, + "step": 4089 + }, + { + "epoch": 1.0860443500199177, + "grad_norm": 1.1759358027679858, + "learning_rate": 9.409785262347373e-06, + "loss": 0.29269370436668396, + "step": 4090 + }, + { + "epoch": 1.0863099190014607, + "grad_norm": 1.1247973273146177, + "learning_rate": 9.405402208330581e-06, + "loss": 0.244449645280838, + "step": 4091 + }, + { + "epoch": 1.0865754879830036, + "grad_norm": 1.186787867713906, + "learning_rate": 9.401019268945237e-06, + "loss": 0.23785406351089478, + "step": 4092 + }, + { + "epoch": 1.0868410569645466, + "grad_norm": 1.1479686632621091, + "learning_rate": 9.39663644503631e-06, + "loss": 0.2493479996919632, + "step": 4093 + }, + { + "epoch": 1.0871066259460895, + "grad_norm": 1.1474347559215512, + "learning_rate": 9.392253737448764e-06, + "loss": 0.23758000135421753, + "step": 4094 + }, + { + "epoch": 1.0873721949276325, + "grad_norm": 1.0946885138749496, + "learning_rate": 9.387871147027528e-06, + "loss": 0.22560475766658783, + "step": 4095 + }, + { + "epoch": 1.0876377639091754, + "grad_norm": 1.1552533162715968, + "learning_rate": 9.383488674617515e-06, + "loss": 0.2558273673057556, + "step": 4096 + }, + { + "epoch": 1.0879033328907184, + "grad_norm": 1.2619180705972233, + "learning_rate": 9.379106321063618e-06, + "loss": 0.2822023034095764, + "step": 4097 + }, + { + "epoch": 1.0881689018722613, + "grad_norm": 1.2076346653444254, + "learning_rate": 9.374724087210698e-06, + "loss": 0.2596978545188904, + "step": 4098 + }, + { + "epoch": 1.0884344708538043, + "grad_norm": 1.6785014002913365, + "learning_rate": 9.370341973903597e-06, + "loss": 0.25353628396987915, + "step": 4099 + }, + { + "epoch": 1.0887000398353472, + "grad_norm": 1.2184499887942242, + "learning_rate": 9.365959981987135e-06, + "loss": 0.2547294497489929, + "step": 4100 + }, + { + "epoch": 1.0889656088168902, + "grad_norm": 1.40658558629773, + "learning_rate": 9.361578112306115e-06, + "loss": 0.2688470780849457, + "step": 4101 + }, + { + "epoch": 1.0892311777984331, + "grad_norm": 1.207208011814592, + "learning_rate": 9.357196365705303e-06, + "loss": 0.25772029161453247, + "step": 4102 + }, + { + "epoch": 1.089496746779976, + "grad_norm": 1.3552039168974384, + "learning_rate": 9.352814743029454e-06, + "loss": 0.2875550091266632, + "step": 4103 + }, + { + "epoch": 1.089762315761519, + "grad_norm": 1.4164869081453233, + "learning_rate": 9.34843324512329e-06, + "loss": 0.23085735738277435, + "step": 4104 + }, + { + "epoch": 1.090027884743062, + "grad_norm": 1.2013725541896922, + "learning_rate": 9.34405187283151e-06, + "loss": 0.2607901096343994, + "step": 4105 + }, + { + "epoch": 1.090293453724605, + "grad_norm": 1.1738523720935938, + "learning_rate": 9.339670626998791e-06, + "loss": 0.26165345311164856, + "step": 4106 + }, + { + "epoch": 1.090559022706148, + "grad_norm": 1.1931234826270498, + "learning_rate": 9.335289508469789e-06, + "loss": 0.27884238958358765, + "step": 4107 + }, + { + "epoch": 1.0908245916876909, + "grad_norm": 1.283025870689831, + "learning_rate": 9.33090851808913e-06, + "loss": 0.2689289152622223, + "step": 4108 + }, + { + "epoch": 1.0910901606692338, + "grad_norm": 1.2574326426613287, + "learning_rate": 9.326527656701414e-06, + "loss": 0.2633207440376282, + "step": 4109 + }, + { + "epoch": 1.0913557296507768, + "grad_norm": 1.1611202948336292, + "learning_rate": 9.322146925151226e-06, + "loss": 0.26001888513565063, + "step": 4110 + }, + { + "epoch": 1.0916212986323197, + "grad_norm": 1.1436383156785508, + "learning_rate": 9.31776632428312e-06, + "loss": 0.2739099860191345, + "step": 4111 + }, + { + "epoch": 1.0918868676138627, + "grad_norm": 1.1080458686771364, + "learning_rate": 9.313385854941616e-06, + "loss": 0.24885550141334534, + "step": 4112 + }, + { + "epoch": 1.0921524365954056, + "grad_norm": 1.1643870148920956, + "learning_rate": 9.309005517971222e-06, + "loss": 0.2609873414039612, + "step": 4113 + }, + { + "epoch": 1.0924180055769486, + "grad_norm": 1.427636157796487, + "learning_rate": 9.304625314216415e-06, + "loss": 0.28853538632392883, + "step": 4114 + }, + { + "epoch": 1.0926835745584915, + "grad_norm": 1.072833070391428, + "learning_rate": 9.300245244521647e-06, + "loss": 0.2629924714565277, + "step": 4115 + }, + { + "epoch": 1.0929491435400345, + "grad_norm": 1.1804644749067619, + "learning_rate": 9.295865309731342e-06, + "loss": 0.2687820494174957, + "step": 4116 + }, + { + "epoch": 1.0932147125215774, + "grad_norm": 1.0831905202820669, + "learning_rate": 9.2914855106899e-06, + "loss": 0.2293676733970642, + "step": 4117 + }, + { + "epoch": 1.0934802815031204, + "grad_norm": 1.1645005992728827, + "learning_rate": 9.287105848241694e-06, + "loss": 0.25261443853378296, + "step": 4118 + }, + { + "epoch": 1.0937458504846633, + "grad_norm": 1.1209341991417805, + "learning_rate": 9.282726323231077e-06, + "loss": 0.26238197088241577, + "step": 4119 + }, + { + "epoch": 1.0940114194662063, + "grad_norm": 1.1230838898563178, + "learning_rate": 9.278346936502364e-06, + "loss": 0.25718310475349426, + "step": 4120 + }, + { + "epoch": 1.0942769884477492, + "grad_norm": 1.1872711264618019, + "learning_rate": 9.273967688899849e-06, + "loss": 0.23810459673404694, + "step": 4121 + }, + { + "epoch": 1.0945425574292922, + "grad_norm": 1.0680734314830214, + "learning_rate": 9.269588581267804e-06, + "loss": 0.2197081446647644, + "step": 4122 + }, + { + "epoch": 1.0948081264108351, + "grad_norm": 1.1043223190124707, + "learning_rate": 9.265209614450463e-06, + "loss": 0.2429335117340088, + "step": 4123 + }, + { + "epoch": 1.095073695392378, + "grad_norm": 1.1380552272436657, + "learning_rate": 9.260830789292043e-06, + "loss": 0.23028087615966797, + "step": 4124 + }, + { + "epoch": 1.095339264373921, + "grad_norm": 1.2203393500716264, + "learning_rate": 9.25645210663673e-06, + "loss": 0.2783699035644531, + "step": 4125 + }, + { + "epoch": 1.095604833355464, + "grad_norm": 1.1686978964802806, + "learning_rate": 9.25207356732868e-06, + "loss": 0.25055867433547974, + "step": 4126 + }, + { + "epoch": 1.095870402337007, + "grad_norm": 1.2313132067115398, + "learning_rate": 9.247695172212026e-06, + "loss": 0.28629350662231445, + "step": 4127 + }, + { + "epoch": 1.09613597131855, + "grad_norm": 1.2403423880097748, + "learning_rate": 9.24331692213087e-06, + "loss": 0.2626604735851288, + "step": 4128 + }, + { + "epoch": 1.0964015403000928, + "grad_norm": 1.2478078302425437, + "learning_rate": 9.238938817929288e-06, + "loss": 0.237881600856781, + "step": 4129 + }, + { + "epoch": 1.0966671092816358, + "grad_norm": 1.144955023428898, + "learning_rate": 9.234560860451325e-06, + "loss": 0.2602109909057617, + "step": 4130 + }, + { + "epoch": 1.0969326782631788, + "grad_norm": 1.1775071297104545, + "learning_rate": 9.230183050541001e-06, + "loss": 0.2721475064754486, + "step": 4131 + }, + { + "epoch": 1.0971982472447217, + "grad_norm": 1.7664052681173497, + "learning_rate": 9.225805389042307e-06, + "loss": 0.25844910740852356, + "step": 4132 + }, + { + "epoch": 1.0974638162262647, + "grad_norm": 1.1612334633259545, + "learning_rate": 9.221427876799201e-06, + "loss": 0.26671040058135986, + "step": 4133 + }, + { + "epoch": 1.0977293852078078, + "grad_norm": 1.3116748641368057, + "learning_rate": 9.21705051465562e-06, + "loss": 0.2610115706920624, + "step": 4134 + }, + { + "epoch": 1.0979949541893508, + "grad_norm": 1.1348320206960383, + "learning_rate": 9.212673303455464e-06, + "loss": 0.2518802881240845, + "step": 4135 + }, + { + "epoch": 1.0982605231708937, + "grad_norm": 1.2313324732863455, + "learning_rate": 9.20829624404261e-06, + "loss": 0.28600364923477173, + "step": 4136 + }, + { + "epoch": 1.0985260921524367, + "grad_norm": 1.0787729379648288, + "learning_rate": 9.203919337260903e-06, + "loss": 0.2649504840373993, + "step": 4137 + }, + { + "epoch": 1.0987916611339796, + "grad_norm": 1.0717018301402161, + "learning_rate": 9.199542583954159e-06, + "loss": 0.22613298892974854, + "step": 4138 + }, + { + "epoch": 1.0990572301155226, + "grad_norm": 1.1049408193201318, + "learning_rate": 9.195165984966163e-06, + "loss": 0.22546961903572083, + "step": 4139 + }, + { + "epoch": 1.0993227990970655, + "grad_norm": 1.1132579479037434, + "learning_rate": 9.190789541140675e-06, + "loss": 0.20618169009685516, + "step": 4140 + }, + { + "epoch": 1.0995883680786085, + "grad_norm": 1.1910818165933836, + "learning_rate": 9.18641325332142e-06, + "loss": 0.2434382289648056, + "step": 4141 + }, + { + "epoch": 1.0998539370601514, + "grad_norm": 1.0160349259469954, + "learning_rate": 9.182037122352092e-06, + "loss": 0.19114840030670166, + "step": 4142 + }, + { + "epoch": 1.1001195060416944, + "grad_norm": 1.371175220167047, + "learning_rate": 9.17766114907636e-06, + "loss": 0.2793614864349365, + "step": 4143 + }, + { + "epoch": 1.1003850750232373, + "grad_norm": 1.3230746818872392, + "learning_rate": 9.173285334337863e-06, + "loss": 0.2908466160297394, + "step": 4144 + }, + { + "epoch": 1.1006506440047803, + "grad_norm": 1.1707475106499343, + "learning_rate": 9.168909678980199e-06, + "loss": 0.260933518409729, + "step": 4145 + }, + { + "epoch": 1.1009162129863233, + "grad_norm": 1.170079737982666, + "learning_rate": 9.16453418384695e-06, + "loss": 0.2819761037826538, + "step": 4146 + }, + { + "epoch": 1.1011817819678662, + "grad_norm": 1.251357168283767, + "learning_rate": 9.160158849781657e-06, + "loss": 0.25290411710739136, + "step": 4147 + }, + { + "epoch": 1.1014473509494092, + "grad_norm": 1.0782378998536035, + "learning_rate": 9.155783677627831e-06, + "loss": 0.21255841851234436, + "step": 4148 + }, + { + "epoch": 1.101712919930952, + "grad_norm": 0.9808101112826028, + "learning_rate": 9.151408668228958e-06, + "loss": 0.20631751418113708, + "step": 4149 + }, + { + "epoch": 1.101978488912495, + "grad_norm": 1.0273447794760797, + "learning_rate": 9.147033822428484e-06, + "loss": 0.20976273715496063, + "step": 4150 + }, + { + "epoch": 1.102244057894038, + "grad_norm": 1.0193138467531315, + "learning_rate": 9.142659141069828e-06, + "loss": 0.21464477479457855, + "step": 4151 + }, + { + "epoch": 1.102509626875581, + "grad_norm": 1.182770191723374, + "learning_rate": 9.13828462499638e-06, + "loss": 0.2262338101863861, + "step": 4152 + }, + { + "epoch": 1.102775195857124, + "grad_norm": 1.2057409707570275, + "learning_rate": 9.133910275051493e-06, + "loss": 0.26331469416618347, + "step": 4153 + }, + { + "epoch": 1.1030407648386669, + "grad_norm": 1.1729382721759571, + "learning_rate": 9.129536092078488e-06, + "loss": 0.26280921697616577, + "step": 4154 + }, + { + "epoch": 1.1033063338202098, + "grad_norm": 1.1474203361843618, + "learning_rate": 9.12516207692066e-06, + "loss": 0.2527182698249817, + "step": 4155 + }, + { + "epoch": 1.1035719028017528, + "grad_norm": 1.114868090084267, + "learning_rate": 9.120788230421267e-06, + "loss": 0.21416455507278442, + "step": 4156 + }, + { + "epoch": 1.1038374717832957, + "grad_norm": 1.149698502937602, + "learning_rate": 9.116414553423535e-06, + "loss": 0.25882014632225037, + "step": 4157 + }, + { + "epoch": 1.1041030407648387, + "grad_norm": 1.1615644224212993, + "learning_rate": 9.112041046770653e-06, + "loss": 0.20510248839855194, + "step": 4158 + }, + { + "epoch": 1.1043686097463816, + "grad_norm": 1.372282887646487, + "learning_rate": 9.107667711305786e-06, + "loss": 0.2348058819770813, + "step": 4159 + }, + { + "epoch": 1.1046341787279246, + "grad_norm": 1.2389958643414019, + "learning_rate": 9.10329454787206e-06, + "loss": 0.24561384320259094, + "step": 4160 + }, + { + "epoch": 1.1048997477094675, + "grad_norm": 1.133562757165387, + "learning_rate": 9.098921557312573e-06, + "loss": 0.23025226593017578, + "step": 4161 + }, + { + "epoch": 1.1051653166910105, + "grad_norm": 1.2483870007074676, + "learning_rate": 9.094548740470375e-06, + "loss": 0.2724589705467224, + "step": 4162 + }, + { + "epoch": 1.1054308856725534, + "grad_norm": 1.2319217483915181, + "learning_rate": 9.090176098188504e-06, + "loss": 0.25196704268455505, + "step": 4163 + }, + { + "epoch": 1.1056964546540964, + "grad_norm": 1.0723466269314343, + "learning_rate": 9.085803631309953e-06, + "loss": 0.22673696279525757, + "step": 4164 + }, + { + "epoch": 1.1059620236356393, + "grad_norm": 1.3129015386402236, + "learning_rate": 9.081431340677679e-06, + "loss": 0.23913519084453583, + "step": 4165 + }, + { + "epoch": 1.1062275926171823, + "grad_norm": 1.3859005835374885, + "learning_rate": 9.07705922713461e-06, + "loss": 0.2723861336708069, + "step": 4166 + }, + { + "epoch": 1.1064931615987252, + "grad_norm": 1.15651219284811, + "learning_rate": 9.072687291523636e-06, + "loss": 0.262167364358902, + "step": 4167 + }, + { + "epoch": 1.1067587305802682, + "grad_norm": 1.4186208937810438, + "learning_rate": 9.068315534687615e-06, + "loss": 0.2394658625125885, + "step": 4168 + }, + { + "epoch": 1.1070242995618111, + "grad_norm": 1.116555661084851, + "learning_rate": 9.063943957469373e-06, + "loss": 0.2547619938850403, + "step": 4169 + }, + { + "epoch": 1.107289868543354, + "grad_norm": 1.1242129377429575, + "learning_rate": 9.059572560711697e-06, + "loss": 0.24057570099830627, + "step": 4170 + }, + { + "epoch": 1.107555437524897, + "grad_norm": 1.057297781351654, + "learning_rate": 9.055201345257331e-06, + "loss": 0.21729445457458496, + "step": 4171 + }, + { + "epoch": 1.10782100650644, + "grad_norm": 1.2310508574302907, + "learning_rate": 9.05083031194901e-06, + "loss": 0.26590001583099365, + "step": 4172 + }, + { + "epoch": 1.108086575487983, + "grad_norm": 1.2932563576951384, + "learning_rate": 9.04645946162941e-06, + "loss": 0.26114848256111145, + "step": 4173 + }, + { + "epoch": 1.108352144469526, + "grad_norm": 1.1776684059902396, + "learning_rate": 9.04208879514118e-06, + "loss": 0.2255469262599945, + "step": 4174 + }, + { + "epoch": 1.1086177134510689, + "grad_norm": 1.1791871226781019, + "learning_rate": 9.037718313326932e-06, + "loss": 0.2597671151161194, + "step": 4175 + }, + { + "epoch": 1.1088832824326118, + "grad_norm": 1.1140795273935102, + "learning_rate": 9.033348017029247e-06, + "loss": 0.24820469319820404, + "step": 4176 + }, + { + "epoch": 1.1091488514141548, + "grad_norm": 1.2459789693741423, + "learning_rate": 9.028977907090661e-06, + "loss": 0.23886600136756897, + "step": 4177 + }, + { + "epoch": 1.1094144203956977, + "grad_norm": 1.091274384086243, + "learning_rate": 9.024607984353682e-06, + "loss": 0.24204152822494507, + "step": 4178 + }, + { + "epoch": 1.1096799893772407, + "grad_norm": 1.0934112812518066, + "learning_rate": 9.02023824966078e-06, + "loss": 0.23246638476848602, + "step": 4179 + }, + { + "epoch": 1.1099455583587836, + "grad_norm": 1.124332043141092, + "learning_rate": 9.015868703854386e-06, + "loss": 0.25057342648506165, + "step": 4180 + }, + { + "epoch": 1.1102111273403266, + "grad_norm": 1.117105393632997, + "learning_rate": 9.011499347776902e-06, + "loss": 0.2316257357597351, + "step": 4181 + }, + { + "epoch": 1.1104766963218695, + "grad_norm": 1.4294765240232425, + "learning_rate": 9.007130182270685e-06, + "loss": 0.24824783205986023, + "step": 4182 + }, + { + "epoch": 1.1107422653034125, + "grad_norm": 1.1667528236187257, + "learning_rate": 9.002761208178059e-06, + "loss": 0.25174480676651, + "step": 4183 + }, + { + "epoch": 1.1110078342849554, + "grad_norm": 1.0615254217045484, + "learning_rate": 8.998392426341313e-06, + "loss": 0.22364717721939087, + "step": 4184 + }, + { + "epoch": 1.1112734032664984, + "grad_norm": 1.0478203412338092, + "learning_rate": 8.994023837602694e-06, + "loss": 0.2205432504415512, + "step": 4185 + }, + { + "epoch": 1.1115389722480415, + "grad_norm": 1.4181125559874541, + "learning_rate": 8.989655442804413e-06, + "loss": 0.23303675651550293, + "step": 4186 + }, + { + "epoch": 1.1118045412295845, + "grad_norm": 1.2558407878646785, + "learning_rate": 8.985287242788646e-06, + "loss": 0.3003222644329071, + "step": 4187 + }, + { + "epoch": 1.1120701102111274, + "grad_norm": 1.146183553652687, + "learning_rate": 8.980919238397532e-06, + "loss": 0.2734413146972656, + "step": 4188 + }, + { + "epoch": 1.1123356791926704, + "grad_norm": 1.200748942223162, + "learning_rate": 8.976551430473166e-06, + "loss": 0.24086692929267883, + "step": 4189 + }, + { + "epoch": 1.1126012481742134, + "grad_norm": 1.2277073829430902, + "learning_rate": 8.972183819857618e-06, + "loss": 0.2531188130378723, + "step": 4190 + }, + { + "epoch": 1.1128668171557563, + "grad_norm": 1.1067327267341682, + "learning_rate": 8.96781640739291e-06, + "loss": 0.25059640407562256, + "step": 4191 + }, + { + "epoch": 1.1131323861372993, + "grad_norm": 1.1987793097859372, + "learning_rate": 8.963449193921023e-06, + "loss": 0.22427335381507874, + "step": 4192 + }, + { + "epoch": 1.1133979551188422, + "grad_norm": 1.1842662472837817, + "learning_rate": 8.959082180283906e-06, + "loss": 0.28835898637771606, + "step": 4193 + }, + { + "epoch": 1.1136635241003852, + "grad_norm": 1.1161865281550452, + "learning_rate": 8.954715367323468e-06, + "loss": 0.23919034004211426, + "step": 4194 + }, + { + "epoch": 1.1139290930819281, + "grad_norm": 1.186821665962327, + "learning_rate": 8.950348755881578e-06, + "loss": 0.24583986401557922, + "step": 4195 + }, + { + "epoch": 1.114194662063471, + "grad_norm": 1.2519292440490923, + "learning_rate": 8.94598234680007e-06, + "loss": 0.23869696259498596, + "step": 4196 + }, + { + "epoch": 1.114460231045014, + "grad_norm": 1.1662462204488522, + "learning_rate": 8.941616140920734e-06, + "loss": 0.2672434449195862, + "step": 4197 + }, + { + "epoch": 1.114725800026557, + "grad_norm": 1.2253961517889995, + "learning_rate": 8.937250139085322e-06, + "loss": 0.2660336494445801, + "step": 4198 + }, + { + "epoch": 1.1149913690081, + "grad_norm": 1.1608224464613695, + "learning_rate": 8.932884342135552e-06, + "loss": 0.26461780071258545, + "step": 4199 + }, + { + "epoch": 1.1152569379896429, + "grad_norm": 1.1632580978978435, + "learning_rate": 8.928518750913094e-06, + "loss": 0.22947481274604797, + "step": 4200 + }, + { + "epoch": 1.1155225069711858, + "grad_norm": 1.116659758904741, + "learning_rate": 8.924153366259584e-06, + "loss": 0.22715970873832703, + "step": 4201 + }, + { + "epoch": 1.1157880759527288, + "grad_norm": 1.3785482068816968, + "learning_rate": 8.919788189016618e-06, + "loss": 0.2994215190410614, + "step": 4202 + }, + { + "epoch": 1.1160536449342717, + "grad_norm": 1.158412598714371, + "learning_rate": 8.915423220025747e-06, + "loss": 0.2290656566619873, + "step": 4203 + }, + { + "epoch": 1.1163192139158147, + "grad_norm": 1.093685203516635, + "learning_rate": 8.911058460128489e-06, + "loss": 0.22284844517707825, + "step": 4204 + }, + { + "epoch": 1.1165847828973576, + "grad_norm": 1.0534371355750514, + "learning_rate": 8.906693910166316e-06, + "loss": 0.2095392495393753, + "step": 4205 + }, + { + "epoch": 1.1168503518789006, + "grad_norm": 1.197609739800315, + "learning_rate": 8.902329570980665e-06, + "loss": 0.25098133087158203, + "step": 4206 + }, + { + "epoch": 1.1171159208604435, + "grad_norm": 1.1630125842119448, + "learning_rate": 8.897965443412923e-06, + "loss": 0.24768148362636566, + "step": 4207 + }, + { + "epoch": 1.1173814898419865, + "grad_norm": 1.1213395777051767, + "learning_rate": 8.89360152830445e-06, + "loss": 0.22255480289459229, + "step": 4208 + }, + { + "epoch": 1.1176470588235294, + "grad_norm": 1.2306365389400118, + "learning_rate": 8.889237826496551e-06, + "loss": 0.23721200227737427, + "step": 4209 + }, + { + "epoch": 1.1179126278050724, + "grad_norm": 1.1422779685655824, + "learning_rate": 8.8848743388305e-06, + "loss": 0.25002530217170715, + "step": 4210 + }, + { + "epoch": 1.1181781967866153, + "grad_norm": 1.2862841308153614, + "learning_rate": 8.880511066147524e-06, + "loss": 0.27188029885292053, + "step": 4211 + }, + { + "epoch": 1.1184437657681583, + "grad_norm": 1.1517061730387759, + "learning_rate": 8.876148009288813e-06, + "loss": 0.23056066036224365, + "step": 4212 + }, + { + "epoch": 1.1187093347497012, + "grad_norm": 1.172676602980077, + "learning_rate": 8.87178516909551e-06, + "loss": 0.2336079478263855, + "step": 4213 + }, + { + "epoch": 1.1189749037312442, + "grad_norm": 1.1868473876345316, + "learning_rate": 8.86742254640872e-06, + "loss": 0.27449533343315125, + "step": 4214 + }, + { + "epoch": 1.1192404727127871, + "grad_norm": 1.1500112066365369, + "learning_rate": 8.863060142069508e-06, + "loss": 0.24714893102645874, + "step": 4215 + }, + { + "epoch": 1.11950604169433, + "grad_norm": 1.072070573678295, + "learning_rate": 8.858697956918886e-06, + "loss": 0.2155439257621765, + "step": 4216 + }, + { + "epoch": 1.119771610675873, + "grad_norm": 1.1798452175680678, + "learning_rate": 8.854335991797842e-06, + "loss": 0.23189155757427216, + "step": 4217 + }, + { + "epoch": 1.120037179657416, + "grad_norm": 1.0773206236657924, + "learning_rate": 8.849974247547307e-06, + "loss": 0.23413527011871338, + "step": 4218 + }, + { + "epoch": 1.120302748638959, + "grad_norm": 1.1991513784988423, + "learning_rate": 8.845612725008173e-06, + "loss": 0.2569039463996887, + "step": 4219 + }, + { + "epoch": 1.120568317620502, + "grad_norm": 1.1795807532964264, + "learning_rate": 8.84125142502129e-06, + "loss": 0.2699541449546814, + "step": 4220 + }, + { + "epoch": 1.1208338866020449, + "grad_norm": 1.1092727759218166, + "learning_rate": 8.836890348427468e-06, + "loss": 0.27172449231147766, + "step": 4221 + }, + { + "epoch": 1.1210994555835878, + "grad_norm": 1.2315684717645485, + "learning_rate": 8.83252949606747e-06, + "loss": 0.2839444875717163, + "step": 4222 + }, + { + "epoch": 1.1213650245651308, + "grad_norm": 1.1676850588618106, + "learning_rate": 8.828168868782013e-06, + "loss": 0.22782178223133087, + "step": 4223 + }, + { + "epoch": 1.1216305935466737, + "grad_norm": 1.132889704492098, + "learning_rate": 8.82380846741178e-06, + "loss": 0.2567726671695709, + "step": 4224 + }, + { + "epoch": 1.1218961625282167, + "grad_norm": 1.1872540675130212, + "learning_rate": 8.8194482927974e-06, + "loss": 0.25879523158073425, + "step": 4225 + }, + { + "epoch": 1.1221617315097596, + "grad_norm": 1.0193477801534692, + "learning_rate": 8.815088345779466e-06, + "loss": 0.22109058499336243, + "step": 4226 + }, + { + "epoch": 1.1224273004913026, + "grad_norm": 1.1414592493281657, + "learning_rate": 8.810728627198526e-06, + "loss": 0.23615925014019012, + "step": 4227 + }, + { + "epoch": 1.1226928694728455, + "grad_norm": 1.160290266155045, + "learning_rate": 8.806369137895081e-06, + "loss": 0.2751353085041046, + "step": 4228 + }, + { + "epoch": 1.1229584384543885, + "grad_norm": 1.2566953981709197, + "learning_rate": 8.802009878709587e-06, + "loss": 0.2361963391304016, + "step": 4229 + }, + { + "epoch": 1.1232240074359314, + "grad_norm": 1.186723455251228, + "learning_rate": 8.79765085048246e-06, + "loss": 0.22435930371284485, + "step": 4230 + }, + { + "epoch": 1.1234895764174744, + "grad_norm": 1.1759467333820823, + "learning_rate": 8.79329205405407e-06, + "loss": 0.2355855256319046, + "step": 4231 + }, + { + "epoch": 1.1237551453990173, + "grad_norm": 1.1450490838951077, + "learning_rate": 8.78893349026474e-06, + "loss": 0.24127572774887085, + "step": 4232 + }, + { + "epoch": 1.1240207143805603, + "grad_norm": 1.222656849347683, + "learning_rate": 8.784575159954748e-06, + "loss": 0.2677989602088928, + "step": 4233 + }, + { + "epoch": 1.1242862833621032, + "grad_norm": 1.109384474337522, + "learning_rate": 8.78021706396433e-06, + "loss": 0.2283135950565338, + "step": 4234 + }, + { + "epoch": 1.1245518523436462, + "grad_norm": 1.1669732456316693, + "learning_rate": 8.775859203133678e-06, + "loss": 0.2686103582382202, + "step": 4235 + }, + { + "epoch": 1.1248174213251891, + "grad_norm": 1.3869789172842044, + "learning_rate": 8.771501578302934e-06, + "loss": 0.2638726234436035, + "step": 4236 + }, + { + "epoch": 1.125082990306732, + "grad_norm": 1.0752600847920544, + "learning_rate": 8.767144190312196e-06, + "loss": 0.2517441511154175, + "step": 4237 + }, + { + "epoch": 1.125348559288275, + "grad_norm": 1.1903096570499558, + "learning_rate": 8.762787040001518e-06, + "loss": 0.2593642771244049, + "step": 4238 + }, + { + "epoch": 1.125614128269818, + "grad_norm": 1.123653942868709, + "learning_rate": 8.758430128210908e-06, + "loss": 0.23758336901664734, + "step": 4239 + }, + { + "epoch": 1.125879697251361, + "grad_norm": 1.182033088729647, + "learning_rate": 8.754073455780327e-06, + "loss": 0.2557980716228485, + "step": 4240 + }, + { + "epoch": 1.126145266232904, + "grad_norm": 1.1182311632466304, + "learning_rate": 8.74971702354969e-06, + "loss": 0.2484067678451538, + "step": 4241 + }, + { + "epoch": 1.1264108352144468, + "grad_norm": 1.121886097833982, + "learning_rate": 8.745360832358864e-06, + "loss": 0.23103098571300507, + "step": 4242 + }, + { + "epoch": 1.1266764041959898, + "grad_norm": 1.1856800379472048, + "learning_rate": 8.741004883047667e-06, + "loss": 0.2630731463432312, + "step": 4243 + }, + { + "epoch": 1.1269419731775328, + "grad_norm": 1.1814851216743405, + "learning_rate": 8.736649176455885e-06, + "loss": 0.2413114309310913, + "step": 4244 + }, + { + "epoch": 1.1272075421590757, + "grad_norm": 1.1465608986560651, + "learning_rate": 8.732293713423243e-06, + "loss": 0.22463169693946838, + "step": 4245 + }, + { + "epoch": 1.1274731111406187, + "grad_norm": 1.1943136125759177, + "learning_rate": 8.727938494789421e-06, + "loss": 0.23641429841518402, + "step": 4246 + }, + { + "epoch": 1.1277386801221616, + "grad_norm": 1.399290186521162, + "learning_rate": 8.723583521394054e-06, + "loss": 0.2547767162322998, + "step": 4247 + }, + { + "epoch": 1.1280042491037048, + "grad_norm": 1.1274578262359225, + "learning_rate": 8.719228794076733e-06, + "loss": 0.25753074884414673, + "step": 4248 + }, + { + "epoch": 1.1282698180852477, + "grad_norm": 1.2581544322188265, + "learning_rate": 8.714874313676992e-06, + "loss": 0.30602240562438965, + "step": 4249 + }, + { + "epoch": 1.1285353870667907, + "grad_norm": 1.3693509289176364, + "learning_rate": 8.710520081034328e-06, + "loss": 0.28336623311042786, + "step": 4250 + }, + { + "epoch": 1.1288009560483336, + "grad_norm": 1.179198933472593, + "learning_rate": 8.706166096988185e-06, + "loss": 0.24065867066383362, + "step": 4251 + }, + { + "epoch": 1.1290665250298766, + "grad_norm": 1.1350442144429624, + "learning_rate": 8.701812362377954e-06, + "loss": 0.25674968957901, + "step": 4252 + }, + { + "epoch": 1.1293320940114195, + "grad_norm": 1.0526431620404462, + "learning_rate": 8.697458878042992e-06, + "loss": 0.21502923965454102, + "step": 4253 + }, + { + "epoch": 1.1295976629929625, + "grad_norm": 1.199807552125115, + "learning_rate": 8.693105644822598e-06, + "loss": 0.26848286390304565, + "step": 4254 + }, + { + "epoch": 1.1298632319745054, + "grad_norm": 1.1632395937948599, + "learning_rate": 8.688752663556022e-06, + "loss": 0.24283824861049652, + "step": 4255 + }, + { + "epoch": 1.1301288009560484, + "grad_norm": 1.231861138079484, + "learning_rate": 8.684399935082468e-06, + "loss": 0.2511506974697113, + "step": 4256 + }, + { + "epoch": 1.1303943699375913, + "grad_norm": 1.1293067099587706, + "learning_rate": 8.68004746024109e-06, + "loss": 0.23932483792304993, + "step": 4257 + }, + { + "epoch": 1.1306599389191343, + "grad_norm": 1.229437521917496, + "learning_rate": 8.675695239870993e-06, + "loss": 0.30030694603919983, + "step": 4258 + }, + { + "epoch": 1.1309255079006773, + "grad_norm": 1.1154596754627621, + "learning_rate": 8.671343274811238e-06, + "loss": 0.24699059128761292, + "step": 4259 + }, + { + "epoch": 1.1311910768822202, + "grad_norm": 1.1288414782501015, + "learning_rate": 8.666991565900827e-06, + "loss": 0.26828041672706604, + "step": 4260 + }, + { + "epoch": 1.1314566458637632, + "grad_norm": 1.0765132569205758, + "learning_rate": 8.662640113978717e-06, + "loss": 0.2372082769870758, + "step": 4261 + }, + { + "epoch": 1.131722214845306, + "grad_norm": 1.2100447285144145, + "learning_rate": 8.658288919883824e-06, + "loss": 0.26367881894111633, + "step": 4262 + }, + { + "epoch": 1.131987783826849, + "grad_norm": 1.1035052537421275, + "learning_rate": 8.653937984455007e-06, + "loss": 0.2287222146987915, + "step": 4263 + }, + { + "epoch": 1.132253352808392, + "grad_norm": 1.1417963040520365, + "learning_rate": 8.649587308531067e-06, + "loss": 0.244521826505661, + "step": 4264 + }, + { + "epoch": 1.132518921789935, + "grad_norm": 1.2243689126496846, + "learning_rate": 8.64523689295077e-06, + "loss": 0.26912257075309753, + "step": 4265 + }, + { + "epoch": 1.132784490771478, + "grad_norm": 1.2384832947619873, + "learning_rate": 8.64088673855282e-06, + "loss": 0.23002780973911285, + "step": 4266 + }, + { + "epoch": 1.1330500597530209, + "grad_norm": 1.253742603342847, + "learning_rate": 8.636536846175878e-06, + "loss": 0.2561958432197571, + "step": 4267 + }, + { + "epoch": 1.1333156287345638, + "grad_norm": 1.2156026453092519, + "learning_rate": 8.63218721665855e-06, + "loss": 0.25553008913993835, + "step": 4268 + }, + { + "epoch": 1.1335811977161068, + "grad_norm": 1.1992385112791626, + "learning_rate": 8.627837850839398e-06, + "loss": 0.1992083340883255, + "step": 4269 + }, + { + "epoch": 1.1338467666976497, + "grad_norm": 1.3643398602160783, + "learning_rate": 8.62348874955692e-06, + "loss": 0.23075388371944427, + "step": 4270 + }, + { + "epoch": 1.1341123356791927, + "grad_norm": 1.1072751580070286, + "learning_rate": 8.619139913649582e-06, + "loss": 0.23691913485527039, + "step": 4271 + }, + { + "epoch": 1.1343779046607356, + "grad_norm": 1.2656689209279672, + "learning_rate": 8.61479134395578e-06, + "loss": 0.2536017894744873, + "step": 4272 + }, + { + "epoch": 1.1346434736422786, + "grad_norm": 1.2870409796681632, + "learning_rate": 8.61044304131387e-06, + "loss": 0.3014161288738251, + "step": 4273 + }, + { + "epoch": 1.1349090426238215, + "grad_norm": 1.1669055614665604, + "learning_rate": 8.606095006562156e-06, + "loss": 0.26333582401275635, + "step": 4274 + }, + { + "epoch": 1.1351746116053645, + "grad_norm": 1.2370251285176135, + "learning_rate": 8.601747240538883e-06, + "loss": 0.23796264827251434, + "step": 4275 + }, + { + "epoch": 1.1354401805869074, + "grad_norm": 1.1989417705813543, + "learning_rate": 8.597399744082251e-06, + "loss": 0.23737141489982605, + "step": 4276 + }, + { + "epoch": 1.1357057495684504, + "grad_norm": 1.1281376384049915, + "learning_rate": 8.593052518030407e-06, + "loss": 0.21073032915592194, + "step": 4277 + }, + { + "epoch": 1.1359713185499933, + "grad_norm": 1.2935455290015059, + "learning_rate": 8.588705563221444e-06, + "loss": 0.2597163915634155, + "step": 4278 + }, + { + "epoch": 1.1362368875315363, + "grad_norm": 1.137636804234172, + "learning_rate": 8.584358880493402e-06, + "loss": 0.24541154503822327, + "step": 4279 + }, + { + "epoch": 1.1365024565130792, + "grad_norm": 1.1331800338594176, + "learning_rate": 8.580012470684273e-06, + "loss": 0.19294027984142303, + "step": 4280 + }, + { + "epoch": 1.1367680254946222, + "grad_norm": 1.2387583554091215, + "learning_rate": 8.575666334631994e-06, + "loss": 0.26909738779067993, + "step": 4281 + }, + { + "epoch": 1.1370335944761651, + "grad_norm": 1.2850664046416893, + "learning_rate": 8.571320473174444e-06, + "loss": 0.2550502121448517, + "step": 4282 + }, + { + "epoch": 1.137299163457708, + "grad_norm": 1.138070930000495, + "learning_rate": 8.566974887149461e-06, + "loss": 0.2256634682416916, + "step": 4283 + }, + { + "epoch": 1.137564732439251, + "grad_norm": 1.3289753418379673, + "learning_rate": 8.562629577394817e-06, + "loss": 0.26154983043670654, + "step": 4284 + }, + { + "epoch": 1.137830301420794, + "grad_norm": 1.2426566834274124, + "learning_rate": 8.558284544748239e-06, + "loss": 0.24685145914554596, + "step": 4285 + }, + { + "epoch": 1.138095870402337, + "grad_norm": 1.177162412641928, + "learning_rate": 8.553939790047396e-06, + "loss": 0.2584421932697296, + "step": 4286 + }, + { + "epoch": 1.13836143938388, + "grad_norm": 1.2486541463378953, + "learning_rate": 8.549595314129907e-06, + "loss": 0.24582788348197937, + "step": 4287 + }, + { + "epoch": 1.1386270083654229, + "grad_norm": 1.1978925998644077, + "learning_rate": 8.545251117833334e-06, + "loss": 0.26023977994918823, + "step": 4288 + }, + { + "epoch": 1.1388925773469658, + "grad_norm": 1.2566090334130535, + "learning_rate": 8.54090720199519e-06, + "loss": 0.25575515627861023, + "step": 4289 + }, + { + "epoch": 1.1391581463285088, + "grad_norm": 1.2234599227483165, + "learning_rate": 8.53656356745293e-06, + "loss": 0.2784460783004761, + "step": 4290 + }, + { + "epoch": 1.1394237153100517, + "grad_norm": 1.11922615590049, + "learning_rate": 8.532220215043953e-06, + "loss": 0.24723297357559204, + "step": 4291 + }, + { + "epoch": 1.1396892842915947, + "grad_norm": 1.1960822646368614, + "learning_rate": 8.52787714560561e-06, + "loss": 0.24694418907165527, + "step": 4292 + }, + { + "epoch": 1.1399548532731376, + "grad_norm": 1.2073723964066632, + "learning_rate": 8.52353435997519e-06, + "loss": 0.19976040720939636, + "step": 4293 + }, + { + "epoch": 1.1402204222546806, + "grad_norm": 1.0875644999756633, + "learning_rate": 8.519191858989932e-06, + "loss": 0.21742458641529083, + "step": 4294 + }, + { + "epoch": 1.1404859912362235, + "grad_norm": 1.2040315384402727, + "learning_rate": 8.514849643487018e-06, + "loss": 0.26382917165756226, + "step": 4295 + }, + { + "epoch": 1.1407515602177665, + "grad_norm": 1.3073789721234685, + "learning_rate": 8.510507714303577e-06, + "loss": 0.30778488516807556, + "step": 4296 + }, + { + "epoch": 1.1410171291993096, + "grad_norm": 1.0727267660957265, + "learning_rate": 8.506166072276681e-06, + "loss": 0.20894449949264526, + "step": 4297 + }, + { + "epoch": 1.1412826981808526, + "grad_norm": 1.2119089915252295, + "learning_rate": 8.50182471824335e-06, + "loss": 0.2389567494392395, + "step": 4298 + }, + { + "epoch": 1.1415482671623955, + "grad_norm": 1.0286533711803312, + "learning_rate": 8.497483653040545e-06, + "loss": 0.20531126856803894, + "step": 4299 + }, + { + "epoch": 1.1418138361439385, + "grad_norm": 1.2153067733576255, + "learning_rate": 8.49314287750517e-06, + "loss": 0.2577363848686218, + "step": 4300 + }, + { + "epoch": 1.1420794051254815, + "grad_norm": 1.211343687077752, + "learning_rate": 8.488802392474076e-06, + "loss": 0.24225997924804688, + "step": 4301 + }, + { + "epoch": 1.1423449741070244, + "grad_norm": 1.2698570110354703, + "learning_rate": 8.484462198784058e-06, + "loss": 0.26494917273521423, + "step": 4302 + }, + { + "epoch": 1.1426105430885674, + "grad_norm": 1.2988704892129896, + "learning_rate": 8.480122297271855e-06, + "loss": 0.24903994798660278, + "step": 4303 + }, + { + "epoch": 1.1428761120701103, + "grad_norm": 1.1681075442122268, + "learning_rate": 8.475782688774147e-06, + "loss": 0.25291907787323, + "step": 4304 + }, + { + "epoch": 1.1431416810516533, + "grad_norm": 1.1301459507046017, + "learning_rate": 8.47144337412756e-06, + "loss": 0.22958475351333618, + "step": 4305 + }, + { + "epoch": 1.1434072500331962, + "grad_norm": 1.175766015682232, + "learning_rate": 8.46710435416866e-06, + "loss": 0.2305452972650528, + "step": 4306 + }, + { + "epoch": 1.1436728190147392, + "grad_norm": 1.2105790475425935, + "learning_rate": 8.462765629733965e-06, + "loss": 0.25028055906295776, + "step": 4307 + }, + { + "epoch": 1.1439383879962821, + "grad_norm": 1.2809924485725674, + "learning_rate": 8.458427201659926e-06, + "loss": 0.24873222410678864, + "step": 4308 + }, + { + "epoch": 1.144203956977825, + "grad_norm": 1.2345010944986379, + "learning_rate": 8.454089070782943e-06, + "loss": 0.23396535217761993, + "step": 4309 + }, + { + "epoch": 1.144469525959368, + "grad_norm": 1.1955062282547588, + "learning_rate": 8.449751237939354e-06, + "loss": 0.27120494842529297, + "step": 4310 + }, + { + "epoch": 1.144735094940911, + "grad_norm": 1.182924840045628, + "learning_rate": 8.445413703965441e-06, + "loss": 0.2734759449958801, + "step": 4311 + }, + { + "epoch": 1.145000663922454, + "grad_norm": 1.1584309667252248, + "learning_rate": 8.441076469697434e-06, + "loss": 0.25353512167930603, + "step": 4312 + }, + { + "epoch": 1.1452662329039969, + "grad_norm": 1.1913513856414861, + "learning_rate": 8.436739535971497e-06, + "loss": 0.23851020634174347, + "step": 4313 + }, + { + "epoch": 1.1455318018855398, + "grad_norm": 1.2006838398252668, + "learning_rate": 8.432402903623741e-06, + "loss": 0.26320093870162964, + "step": 4314 + }, + { + "epoch": 1.1457973708670828, + "grad_norm": 1.1065666799118796, + "learning_rate": 8.428066573490211e-06, + "loss": 0.23859955370426178, + "step": 4315 + }, + { + "epoch": 1.1460629398486257, + "grad_norm": 1.197716796975668, + "learning_rate": 8.423730546406911e-06, + "loss": 0.2636772096157074, + "step": 4316 + }, + { + "epoch": 1.1463285088301687, + "grad_norm": 1.2459962038175347, + "learning_rate": 8.419394823209773e-06, + "loss": 0.2656415104866028, + "step": 4317 + }, + { + "epoch": 1.1465940778117116, + "grad_norm": 1.2225993542972535, + "learning_rate": 8.41505940473467e-06, + "loss": 0.2872830033302307, + "step": 4318 + }, + { + "epoch": 1.1468596467932546, + "grad_norm": 1.4653362839323858, + "learning_rate": 8.410724291817422e-06, + "loss": 0.229783833026886, + "step": 4319 + }, + { + "epoch": 1.1471252157747975, + "grad_norm": 4.273944826146497, + "learning_rate": 8.406389485293786e-06, + "loss": 0.24418675899505615, + "step": 4320 + }, + { + "epoch": 1.1473907847563405, + "grad_norm": 1.2385236183806463, + "learning_rate": 8.402054985999464e-06, + "loss": 0.2535584270954132, + "step": 4321 + }, + { + "epoch": 1.1476563537378834, + "grad_norm": 1.2116145926695832, + "learning_rate": 8.397720794770093e-06, + "loss": 0.23207828402519226, + "step": 4322 + }, + { + "epoch": 1.1479219227194264, + "grad_norm": 1.8129143471218838, + "learning_rate": 8.393386912441257e-06, + "loss": 0.27990391850471497, + "step": 4323 + }, + { + "epoch": 1.1481874917009693, + "grad_norm": 1.059877272327032, + "learning_rate": 8.38905333984847e-06, + "loss": 0.2098318189382553, + "step": 4324 + }, + { + "epoch": 1.1484530606825123, + "grad_norm": 1.1462464609840002, + "learning_rate": 8.384720077827204e-06, + "loss": 0.25303804874420166, + "step": 4325 + }, + { + "epoch": 1.1487186296640552, + "grad_norm": 1.0794728099252306, + "learning_rate": 8.380387127212858e-06, + "loss": 0.23481838405132294, + "step": 4326 + }, + { + "epoch": 1.1489841986455982, + "grad_norm": 1.1782142095551065, + "learning_rate": 8.376054488840771e-06, + "loss": 0.24842356145381927, + "step": 4327 + }, + { + "epoch": 1.1492497676271411, + "grad_norm": 1.136832039914945, + "learning_rate": 8.37172216354623e-06, + "loss": 0.23927366733551025, + "step": 4328 + }, + { + "epoch": 1.149515336608684, + "grad_norm": 1.1577812724546028, + "learning_rate": 8.367390152164448e-06, + "loss": 0.23836453258991241, + "step": 4329 + }, + { + "epoch": 1.149780905590227, + "grad_norm": 1.2492179140984832, + "learning_rate": 8.36305845553059e-06, + "loss": 0.2562161982059479, + "step": 4330 + }, + { + "epoch": 1.15004647457177, + "grad_norm": 1.120151700121908, + "learning_rate": 8.358727074479755e-06, + "loss": 0.21255920827388763, + "step": 4331 + }, + { + "epoch": 1.150312043553313, + "grad_norm": 1.1011600870179878, + "learning_rate": 8.354396009846985e-06, + "loss": 0.24200043082237244, + "step": 4332 + }, + { + "epoch": 1.150577612534856, + "grad_norm": 1.1644551235897023, + "learning_rate": 8.35006526246725e-06, + "loss": 0.23582379519939423, + "step": 4333 + }, + { + "epoch": 1.1508431815163989, + "grad_norm": 1.093546349726341, + "learning_rate": 8.34573483317548e-06, + "loss": 0.21554499864578247, + "step": 4334 + }, + { + "epoch": 1.1511087504979418, + "grad_norm": 1.2460346716976907, + "learning_rate": 8.341404722806525e-06, + "loss": 0.2789759039878845, + "step": 4335 + }, + { + "epoch": 1.1513743194794848, + "grad_norm": 1.212813860768853, + "learning_rate": 8.337074932195175e-06, + "loss": 0.24677832424640656, + "step": 4336 + }, + { + "epoch": 1.1516398884610277, + "grad_norm": 1.2351497128261646, + "learning_rate": 8.332745462176166e-06, + "loss": 0.28122392296791077, + "step": 4337 + }, + { + "epoch": 1.1519054574425707, + "grad_norm": 1.2447069177647443, + "learning_rate": 8.328416313584169e-06, + "loss": 0.23219403624534607, + "step": 4338 + }, + { + "epoch": 1.1521710264241136, + "grad_norm": 1.1258797089625292, + "learning_rate": 8.324087487253792e-06, + "loss": 0.19928379356861115, + "step": 4339 + }, + { + "epoch": 1.1524365954056566, + "grad_norm": 1.2737910298174706, + "learning_rate": 8.31975898401958e-06, + "loss": 0.27730467915534973, + "step": 4340 + }, + { + "epoch": 1.1527021643871995, + "grad_norm": 1.3906235348842741, + "learning_rate": 8.315430804716022e-06, + "loss": 0.25462737679481506, + "step": 4341 + }, + { + "epoch": 1.1529677333687425, + "grad_norm": 1.1703737499238527, + "learning_rate": 8.311102950177533e-06, + "loss": 0.2363007366657257, + "step": 4342 + }, + { + "epoch": 1.1532333023502854, + "grad_norm": 1.2498285131266695, + "learning_rate": 8.306775421238482e-06, + "loss": 0.2648352384567261, + "step": 4343 + }, + { + "epoch": 1.1534988713318284, + "grad_norm": 1.394847110607811, + "learning_rate": 8.302448218733158e-06, + "loss": 0.25645309686660767, + "step": 4344 + }, + { + "epoch": 1.1537644403133713, + "grad_norm": 1.2178564426244172, + "learning_rate": 8.298121343495797e-06, + "loss": 0.22962522506713867, + "step": 4345 + }, + { + "epoch": 1.1540300092949143, + "grad_norm": 1.132403649349265, + "learning_rate": 8.293794796360569e-06, + "loss": 0.21269623935222626, + "step": 4346 + }, + { + "epoch": 1.1542955782764572, + "grad_norm": 1.1646919704485588, + "learning_rate": 8.289468578161581e-06, + "loss": 0.2518436014652252, + "step": 4347 + }, + { + "epoch": 1.1545611472580002, + "grad_norm": 1.193830808481187, + "learning_rate": 8.285142689732877e-06, + "loss": 0.2318439483642578, + "step": 4348 + }, + { + "epoch": 1.1548267162395431, + "grad_norm": 1.0953821300718658, + "learning_rate": 8.280817131908438e-06, + "loss": 0.2278512567281723, + "step": 4349 + }, + { + "epoch": 1.155092285221086, + "grad_norm": 1.3446091578493078, + "learning_rate": 8.27649190552218e-06, + "loss": 0.2521114945411682, + "step": 4350 + }, + { + "epoch": 1.155357854202629, + "grad_norm": 1.1722019112748296, + "learning_rate": 8.272167011407955e-06, + "loss": 0.2565760016441345, + "step": 4351 + }, + { + "epoch": 1.155623423184172, + "grad_norm": 1.3209067321897832, + "learning_rate": 8.267842450399552e-06, + "loss": 0.2603546679019928, + "step": 4352 + }, + { + "epoch": 1.155888992165715, + "grad_norm": 1.1697050726438265, + "learning_rate": 8.263518223330698e-06, + "loss": 0.2175855189561844, + "step": 4353 + }, + { + "epoch": 1.156154561147258, + "grad_norm": 1.1937135661774867, + "learning_rate": 8.25919433103505e-06, + "loss": 0.24521774053573608, + "step": 4354 + }, + { + "epoch": 1.1564201301288008, + "grad_norm": 1.3267445452853517, + "learning_rate": 8.254870774346203e-06, + "loss": 0.29673823714256287, + "step": 4355 + }, + { + "epoch": 1.1566856991103438, + "grad_norm": 1.260162624950344, + "learning_rate": 8.25054755409769e-06, + "loss": 0.26994144916534424, + "step": 4356 + }, + { + "epoch": 1.1569512680918868, + "grad_norm": 1.1578908727655277, + "learning_rate": 8.246224671122974e-06, + "loss": 0.2545935809612274, + "step": 4357 + }, + { + "epoch": 1.1572168370734297, + "grad_norm": 1.1469888258961152, + "learning_rate": 8.241902126255458e-06, + "loss": 0.23589034378528595, + "step": 4358 + }, + { + "epoch": 1.1574824060549727, + "grad_norm": 1.229284708155894, + "learning_rate": 8.237579920328478e-06, + "loss": 0.2617190480232239, + "step": 4359 + }, + { + "epoch": 1.1577479750365158, + "grad_norm": 1.2741716320060574, + "learning_rate": 8.233258054175302e-06, + "loss": 0.3092418313026428, + "step": 4360 + }, + { + "epoch": 1.1580135440180588, + "grad_norm": 1.1377305602079475, + "learning_rate": 8.228936528629138e-06, + "loss": 0.22873908281326294, + "step": 4361 + }, + { + "epoch": 1.1582791129996017, + "grad_norm": 1.0592847205754, + "learning_rate": 8.224615344523123e-06, + "loss": 0.22549089789390564, + "step": 4362 + }, + { + "epoch": 1.1585446819811447, + "grad_norm": 1.0288617285826194, + "learning_rate": 8.22029450269033e-06, + "loss": 0.19141459465026855, + "step": 4363 + }, + { + "epoch": 1.1588102509626876, + "grad_norm": 1.1679333849265336, + "learning_rate": 8.21597400396377e-06, + "loss": 0.24277547001838684, + "step": 4364 + }, + { + "epoch": 1.1590758199442306, + "grad_norm": 1.1463053400858605, + "learning_rate": 8.21165384917638e-06, + "loss": 0.2429513931274414, + "step": 4365 + }, + { + "epoch": 1.1593413889257735, + "grad_norm": 1.0775583631999657, + "learning_rate": 8.207334039161035e-06, + "loss": 0.24710172414779663, + "step": 4366 + }, + { + "epoch": 1.1596069579073165, + "grad_norm": 1.1226530732908067, + "learning_rate": 8.203014574750546e-06, + "loss": 0.2553783357143402, + "step": 4367 + }, + { + "epoch": 1.1598725268888594, + "grad_norm": 1.1664625510577165, + "learning_rate": 8.198695456777653e-06, + "loss": 0.2558436095714569, + "step": 4368 + }, + { + "epoch": 1.1601380958704024, + "grad_norm": 1.093371491828669, + "learning_rate": 8.19437668607503e-06, + "loss": 0.20780377089977264, + "step": 4369 + }, + { + "epoch": 1.1604036648519453, + "grad_norm": 1.0184271240235683, + "learning_rate": 8.190058263475288e-06, + "loss": 0.22397254407405853, + "step": 4370 + }, + { + "epoch": 1.1606692338334883, + "grad_norm": 1.1123966470918765, + "learning_rate": 8.185740189810967e-06, + "loss": 0.2763773798942566, + "step": 4371 + }, + { + "epoch": 1.1609348028150313, + "grad_norm": 1.234569017856286, + "learning_rate": 8.181422465914541e-06, + "loss": 0.2801940441131592, + "step": 4372 + }, + { + "epoch": 1.1612003717965742, + "grad_norm": 1.3078225086374202, + "learning_rate": 8.177105092618413e-06, + "loss": 0.20949441194534302, + "step": 4373 + }, + { + "epoch": 1.1614659407781172, + "grad_norm": 1.020800458401727, + "learning_rate": 8.172788070754927e-06, + "loss": 0.24503354728221893, + "step": 4374 + }, + { + "epoch": 1.16173150975966, + "grad_norm": 1.212252624187319, + "learning_rate": 8.16847140115635e-06, + "loss": 0.256147563457489, + "step": 4375 + }, + { + "epoch": 1.161997078741203, + "grad_norm": 1.079933692504349, + "learning_rate": 8.164155084654886e-06, + "loss": 0.2178848683834076, + "step": 4376 + }, + { + "epoch": 1.162262647722746, + "grad_norm": 1.0121292441974634, + "learning_rate": 8.159839122082668e-06, + "loss": 0.22624582052230835, + "step": 4377 + }, + { + "epoch": 1.162528216704289, + "grad_norm": 1.0294597777179986, + "learning_rate": 8.155523514271764e-06, + "loss": 0.2184191346168518, + "step": 4378 + }, + { + "epoch": 1.162793785685832, + "grad_norm": 1.2825595051682412, + "learning_rate": 8.151208262054175e-06, + "loss": 0.2623840868473053, + "step": 4379 + }, + { + "epoch": 1.1630593546673749, + "grad_norm": 1.2529929341607686, + "learning_rate": 8.14689336626183e-06, + "loss": 0.27181199193000793, + "step": 4380 + }, + { + "epoch": 1.1633249236489178, + "grad_norm": 1.282994089786083, + "learning_rate": 8.142578827726587e-06, + "loss": 0.2791554629802704, + "step": 4381 + }, + { + "epoch": 1.1635904926304608, + "grad_norm": 1.221608581014812, + "learning_rate": 8.13826464728024e-06, + "loss": 0.2466641068458557, + "step": 4382 + }, + { + "epoch": 1.1638560616120037, + "grad_norm": 0.9724735599541757, + "learning_rate": 8.133950825754511e-06, + "loss": 0.1951724737882614, + "step": 4383 + }, + { + "epoch": 1.1641216305935467, + "grad_norm": 1.2462068833977051, + "learning_rate": 8.129637363981056e-06, + "loss": 0.2520062029361725, + "step": 4384 + }, + { + "epoch": 1.1643871995750896, + "grad_norm": 1.230128345167748, + "learning_rate": 8.12532426279146e-06, + "loss": 0.24101334810256958, + "step": 4385 + }, + { + "epoch": 1.1646527685566326, + "grad_norm": 1.244671245504639, + "learning_rate": 8.121011523017235e-06, + "loss": 0.2741190791130066, + "step": 4386 + }, + { + "epoch": 1.1649183375381755, + "grad_norm": 1.1570746383559662, + "learning_rate": 8.116699145489822e-06, + "loss": 0.2575281858444214, + "step": 4387 + }, + { + "epoch": 1.1651839065197185, + "grad_norm": 1.157233381368316, + "learning_rate": 8.112387131040608e-06, + "loss": 0.2557298243045807, + "step": 4388 + }, + { + "epoch": 1.1654494755012614, + "grad_norm": 1.2560692108341776, + "learning_rate": 8.108075480500892e-06, + "loss": 0.27485036849975586, + "step": 4389 + }, + { + "epoch": 1.1657150444828044, + "grad_norm": 1.2517544472207511, + "learning_rate": 8.103764194701909e-06, + "loss": 0.26458340883255005, + "step": 4390 + }, + { + "epoch": 1.1659806134643473, + "grad_norm": 1.2310585386329624, + "learning_rate": 8.099453274474827e-06, + "loss": 0.2281840592622757, + "step": 4391 + }, + { + "epoch": 1.1662461824458903, + "grad_norm": 1.2367230880082285, + "learning_rate": 8.095142720650739e-06, + "loss": 0.24956555664539337, + "step": 4392 + }, + { + "epoch": 1.1665117514274332, + "grad_norm": 1.109202461245095, + "learning_rate": 8.090832534060671e-06, + "loss": 0.22619420289993286, + "step": 4393 + }, + { + "epoch": 1.1667773204089762, + "grad_norm": 1.2922206575995636, + "learning_rate": 8.086522715535571e-06, + "loss": 0.2780688405036926, + "step": 4394 + }, + { + "epoch": 1.1670428893905191, + "grad_norm": 1.2699378735794575, + "learning_rate": 8.082213265906323e-06, + "loss": 0.2600886821746826, + "step": 4395 + }, + { + "epoch": 1.167308458372062, + "grad_norm": 1.244234758234162, + "learning_rate": 8.077904186003736e-06, + "loss": 0.25049078464508057, + "step": 4396 + }, + { + "epoch": 1.167574027353605, + "grad_norm": 1.2327544821473595, + "learning_rate": 8.073595476658558e-06, + "loss": 0.27745798230171204, + "step": 4397 + }, + { + "epoch": 1.167839596335148, + "grad_norm": 1.1682547274263488, + "learning_rate": 8.069287138701452e-06, + "loss": 0.2191929668188095, + "step": 4398 + }, + { + "epoch": 1.168105165316691, + "grad_norm": 1.297306908163856, + "learning_rate": 8.064979172963014e-06, + "loss": 0.24307313561439514, + "step": 4399 + }, + { + "epoch": 1.168370734298234, + "grad_norm": 1.1837345133145987, + "learning_rate": 8.060671580273772e-06, + "loss": 0.23036238551139832, + "step": 4400 + }, + { + "epoch": 1.1686363032797769, + "grad_norm": 1.096627050675377, + "learning_rate": 8.056364361464176e-06, + "loss": 0.2394433617591858, + "step": 4401 + }, + { + "epoch": 1.1689018722613198, + "grad_norm": 1.183557399538609, + "learning_rate": 8.052057517364608e-06, + "loss": 0.24099211394786835, + "step": 4402 + }, + { + "epoch": 1.1691674412428628, + "grad_norm": 1.1293667282926971, + "learning_rate": 8.047751048805376e-06, + "loss": 0.22036173939704895, + "step": 4403 + }, + { + "epoch": 1.1694330102244057, + "grad_norm": 1.185484128157471, + "learning_rate": 8.043444956616717e-06, + "loss": 0.22400429844856262, + "step": 4404 + }, + { + "epoch": 1.1696985792059487, + "grad_norm": 1.0594769241160498, + "learning_rate": 8.039139241628792e-06, + "loss": 0.21649131178855896, + "step": 4405 + }, + { + "epoch": 1.1699641481874916, + "grad_norm": 1.150957898906185, + "learning_rate": 8.034833904671698e-06, + "loss": 0.23412205278873444, + "step": 4406 + }, + { + "epoch": 1.1702297171690346, + "grad_norm": 1.2025485392569255, + "learning_rate": 8.030528946575453e-06, + "loss": 0.23822304606437683, + "step": 4407 + }, + { + "epoch": 1.1704952861505775, + "grad_norm": 1.2929661052617345, + "learning_rate": 8.026224368169998e-06, + "loss": 0.29250186681747437, + "step": 4408 + }, + { + "epoch": 1.1707608551321207, + "grad_norm": 1.4098437716027425, + "learning_rate": 8.021920170285205e-06, + "loss": 0.26794207096099854, + "step": 4409 + }, + { + "epoch": 1.1710264241136636, + "grad_norm": 1.2469013694849018, + "learning_rate": 8.017616353750874e-06, + "loss": 0.2573787271976471, + "step": 4410 + }, + { + "epoch": 1.1712919930952066, + "grad_norm": 1.1835378975512396, + "learning_rate": 8.01331291939673e-06, + "loss": 0.2744356691837311, + "step": 4411 + }, + { + "epoch": 1.1715575620767495, + "grad_norm": 1.4542599881672131, + "learning_rate": 8.009009868052424e-06, + "loss": 0.2582886815071106, + "step": 4412 + }, + { + "epoch": 1.1718231310582925, + "grad_norm": 1.1766031171819216, + "learning_rate": 8.004707200547534e-06, + "loss": 0.2553568482398987, + "step": 4413 + }, + { + "epoch": 1.1720887000398355, + "grad_norm": 1.144579662849428, + "learning_rate": 8.00040491771156e-06, + "loss": 0.2670289874076843, + "step": 4414 + }, + { + "epoch": 1.1723542690213784, + "grad_norm": 1.1520006084984327, + "learning_rate": 7.99610302037394e-06, + "loss": 0.215460866689682, + "step": 4415 + }, + { + "epoch": 1.1726198380029214, + "grad_norm": 1.2764670908026035, + "learning_rate": 7.991801509364023e-06, + "loss": 0.26481571793556213, + "step": 4416 + }, + { + "epoch": 1.1728854069844643, + "grad_norm": 1.0239999030663398, + "learning_rate": 7.98750038551109e-06, + "loss": 0.2060776650905609, + "step": 4417 + }, + { + "epoch": 1.1731509759660073, + "grad_norm": 1.147707044406535, + "learning_rate": 7.983199649644349e-06, + "loss": 0.2401561588048935, + "step": 4418 + }, + { + "epoch": 1.1734165449475502, + "grad_norm": 1.3064882111410037, + "learning_rate": 7.978899302592927e-06, + "loss": 0.2545842230319977, + "step": 4419 + }, + { + "epoch": 1.1736821139290932, + "grad_norm": 1.199445262296627, + "learning_rate": 7.974599345185884e-06, + "loss": 0.29925093054771423, + "step": 4420 + }, + { + "epoch": 1.1739476829106361, + "grad_norm": 1.7583031900565322, + "learning_rate": 7.9702997782522e-06, + "loss": 0.23944757878780365, + "step": 4421 + }, + { + "epoch": 1.174213251892179, + "grad_norm": 1.057746400765015, + "learning_rate": 7.96600060262078e-06, + "loss": 0.23745761811733246, + "step": 4422 + }, + { + "epoch": 1.174478820873722, + "grad_norm": 1.1164780002442092, + "learning_rate": 7.961701819120453e-06, + "loss": 0.22170330584049225, + "step": 4423 + }, + { + "epoch": 1.174744389855265, + "grad_norm": 1.2607094160663312, + "learning_rate": 7.95740342857998e-06, + "loss": 0.2645890712738037, + "step": 4424 + }, + { + "epoch": 1.175009958836808, + "grad_norm": 1.2171129338535713, + "learning_rate": 7.953105431828032e-06, + "loss": 0.25232207775115967, + "step": 4425 + }, + { + "epoch": 1.1752755278183509, + "grad_norm": 1.20503293579659, + "learning_rate": 7.948807829693219e-06, + "loss": 0.2656644880771637, + "step": 4426 + }, + { + "epoch": 1.1755410967998938, + "grad_norm": 1.069230366230624, + "learning_rate": 7.944510623004063e-06, + "loss": 0.25290653109550476, + "step": 4427 + }, + { + "epoch": 1.1758066657814368, + "grad_norm": 1.1825821036814732, + "learning_rate": 7.940213812589018e-06, + "loss": 0.27464741468429565, + "step": 4428 + }, + { + "epoch": 1.1760722347629797, + "grad_norm": 1.4910942744639428, + "learning_rate": 7.935917399276455e-06, + "loss": 0.2562064528465271, + "step": 4429 + }, + { + "epoch": 1.1763378037445227, + "grad_norm": 1.2720371671465533, + "learning_rate": 7.931621383894676e-06, + "loss": 0.267793208360672, + "step": 4430 + }, + { + "epoch": 1.1766033727260656, + "grad_norm": 1.1490167098873316, + "learning_rate": 7.9273257672719e-06, + "loss": 0.23651085793972015, + "step": 4431 + }, + { + "epoch": 1.1768689417076086, + "grad_norm": 1.0804412076412697, + "learning_rate": 7.923030550236267e-06, + "loss": 0.23691008985042572, + "step": 4432 + }, + { + "epoch": 1.1771345106891515, + "grad_norm": 1.1540873295746452, + "learning_rate": 7.918735733615852e-06, + "loss": 0.24495704472064972, + "step": 4433 + }, + { + "epoch": 1.1774000796706945, + "grad_norm": 1.4423069413713672, + "learning_rate": 7.91444131823864e-06, + "loss": 0.25423017144203186, + "step": 4434 + }, + { + "epoch": 1.1776656486522374, + "grad_norm": 1.1113893983435537, + "learning_rate": 7.910147304932548e-06, + "loss": 0.22870390117168427, + "step": 4435 + }, + { + "epoch": 1.1779312176337804, + "grad_norm": 1.0473620824498977, + "learning_rate": 7.905853694525405e-06, + "loss": 0.23037508130073547, + "step": 4436 + }, + { + "epoch": 1.1781967866153233, + "grad_norm": 1.2886040363623328, + "learning_rate": 7.901560487844973e-06, + "loss": 0.31184864044189453, + "step": 4437 + }, + { + "epoch": 1.1784623555968663, + "grad_norm": 1.302197101799982, + "learning_rate": 7.89726768571893e-06, + "loss": 0.24140426516532898, + "step": 4438 + }, + { + "epoch": 1.1787279245784092, + "grad_norm": 1.2134032336682008, + "learning_rate": 7.892975288974877e-06, + "loss": 0.25602301955223083, + "step": 4439 + }, + { + "epoch": 1.1789934935599522, + "grad_norm": 1.1868063067331378, + "learning_rate": 7.888683298440339e-06, + "loss": 0.2717514932155609, + "step": 4440 + }, + { + "epoch": 1.1792590625414952, + "grad_norm": 1.1670818939848298, + "learning_rate": 7.884391714942757e-06, + "loss": 0.252475380897522, + "step": 4441 + }, + { + "epoch": 1.179524631523038, + "grad_norm": 1.161546405047816, + "learning_rate": 7.880100539309506e-06, + "loss": 0.24777942895889282, + "step": 4442 + }, + { + "epoch": 1.179790200504581, + "grad_norm": 1.194146333188245, + "learning_rate": 7.875809772367867e-06, + "loss": 0.25111010670661926, + "step": 4443 + }, + { + "epoch": 1.180055769486124, + "grad_norm": 1.163412583383914, + "learning_rate": 7.87151941494505e-06, + "loss": 0.26183217763900757, + "step": 4444 + }, + { + "epoch": 1.180321338467667, + "grad_norm": 1.2974065116766642, + "learning_rate": 7.867229467868189e-06, + "loss": 0.27538490295410156, + "step": 4445 + }, + { + "epoch": 1.18058690744921, + "grad_norm": 1.078206017492716, + "learning_rate": 7.862939931964333e-06, + "loss": 0.2192106693983078, + "step": 4446 + }, + { + "epoch": 1.1808524764307529, + "grad_norm": 1.2415747879020278, + "learning_rate": 7.858650808060453e-06, + "loss": 0.26506057381629944, + "step": 4447 + }, + { + "epoch": 1.1811180454122958, + "grad_norm": 1.103375758703505, + "learning_rate": 7.854362096983443e-06, + "loss": 0.2345719337463379, + "step": 4448 + }, + { + "epoch": 1.1813836143938388, + "grad_norm": 1.1651284585435833, + "learning_rate": 7.850073799560114e-06, + "loss": 0.21404311060905457, + "step": 4449 + }, + { + "epoch": 1.1816491833753817, + "grad_norm": 1.1572235550991925, + "learning_rate": 7.8457859166172e-06, + "loss": 0.24332138895988464, + "step": 4450 + }, + { + "epoch": 1.1819147523569247, + "grad_norm": 1.1687901862394692, + "learning_rate": 7.841498448981354e-06, + "loss": 0.25025150179862976, + "step": 4451 + }, + { + "epoch": 1.1821803213384676, + "grad_norm": 1.167419454587793, + "learning_rate": 7.837211397479152e-06, + "loss": 0.21918940544128418, + "step": 4452 + }, + { + "epoch": 1.1824458903200106, + "grad_norm": 1.1517463754639392, + "learning_rate": 7.832924762937083e-06, + "loss": 0.24976079165935516, + "step": 4453 + }, + { + "epoch": 1.1827114593015535, + "grad_norm": 1.1165052000707918, + "learning_rate": 7.828638546181565e-06, + "loss": 0.21146243810653687, + "step": 4454 + }, + { + "epoch": 1.1829770282830965, + "grad_norm": 1.1110608449393633, + "learning_rate": 7.824352748038924e-06, + "loss": 0.22921445965766907, + "step": 4455 + }, + { + "epoch": 1.1832425972646394, + "grad_norm": 1.1833669908026252, + "learning_rate": 7.820067369335413e-06, + "loss": 0.24401478469371796, + "step": 4456 + }, + { + "epoch": 1.1835081662461824, + "grad_norm": 1.2543977272663969, + "learning_rate": 7.815782410897209e-06, + "loss": 0.2717207074165344, + "step": 4457 + }, + { + "epoch": 1.1837737352277253, + "grad_norm": 1.0934075655453726, + "learning_rate": 7.81149787355039e-06, + "loss": 0.20752058923244476, + "step": 4458 + }, + { + "epoch": 1.1840393042092683, + "grad_norm": 1.3448722481333402, + "learning_rate": 7.807213758120965e-06, + "loss": 0.31095850467681885, + "step": 4459 + }, + { + "epoch": 1.1843048731908112, + "grad_norm": 1.1769654791590503, + "learning_rate": 7.802930065434874e-06, + "loss": 0.23761102557182312, + "step": 4460 + }, + { + "epoch": 1.1845704421723542, + "grad_norm": 1.3225327364557968, + "learning_rate": 7.798646796317952e-06, + "loss": 0.2509460151195526, + "step": 4461 + }, + { + "epoch": 1.1848360111538971, + "grad_norm": 1.472525937697874, + "learning_rate": 7.794363951595966e-06, + "loss": 0.25903213024139404, + "step": 4462 + }, + { + "epoch": 1.18510158013544, + "grad_norm": 1.1904413554334654, + "learning_rate": 7.790081532094596e-06, + "loss": 0.23304736614227295, + "step": 4463 + }, + { + "epoch": 1.185367149116983, + "grad_norm": 1.311875765456408, + "learning_rate": 7.785799538639445e-06, + "loss": 0.28707265853881836, + "step": 4464 + }, + { + "epoch": 1.185632718098526, + "grad_norm": 1.0202920254712324, + "learning_rate": 7.781517972056028e-06, + "loss": 0.20282745361328125, + "step": 4465 + }, + { + "epoch": 1.185898287080069, + "grad_norm": 1.2606153791729335, + "learning_rate": 7.777236833169782e-06, + "loss": 0.24056631326675415, + "step": 4466 + }, + { + "epoch": 1.186163856061612, + "grad_norm": 1.4946194524955894, + "learning_rate": 7.772956122806058e-06, + "loss": 0.2677255868911743, + "step": 4467 + }, + { + "epoch": 1.1864294250431549, + "grad_norm": 1.2681064192856966, + "learning_rate": 7.768675841790124e-06, + "loss": 0.22032876312732697, + "step": 4468 + }, + { + "epoch": 1.1866949940246978, + "grad_norm": 1.3138325978828467, + "learning_rate": 7.764395990947177e-06, + "loss": 0.2980336546897888, + "step": 4469 + }, + { + "epoch": 1.1869605630062408, + "grad_norm": 1.2624280680532078, + "learning_rate": 7.760116571102314e-06, + "loss": 0.2562638521194458, + "step": 4470 + }, + { + "epoch": 1.1872261319877837, + "grad_norm": 1.2207997545500016, + "learning_rate": 7.755837583080561e-06, + "loss": 0.262576699256897, + "step": 4471 + }, + { + "epoch": 1.1874917009693267, + "grad_norm": 1.2672893771429377, + "learning_rate": 7.751559027706858e-06, + "loss": 0.2654029130935669, + "step": 4472 + }, + { + "epoch": 1.1877572699508698, + "grad_norm": 1.2996444615622489, + "learning_rate": 7.747280905806051e-06, + "loss": 0.2946662902832031, + "step": 4473 + }, + { + "epoch": 1.1880228389324128, + "grad_norm": 1.193974235945654, + "learning_rate": 7.743003218202921e-06, + "loss": 0.25140905380249023, + "step": 4474 + }, + { + "epoch": 1.1882884079139557, + "grad_norm": 1.2240016583398612, + "learning_rate": 7.738725965722149e-06, + "loss": 0.2601654529571533, + "step": 4475 + }, + { + "epoch": 1.1885539768954987, + "grad_norm": 1.9675422662507516, + "learning_rate": 7.73444914918834e-06, + "loss": 0.2639954090118408, + "step": 4476 + }, + { + "epoch": 1.1888195458770416, + "grad_norm": 1.174151986382161, + "learning_rate": 7.730172769426014e-06, + "loss": 0.23391291499137878, + "step": 4477 + }, + { + "epoch": 1.1890851148585846, + "grad_norm": 2.254589386622623, + "learning_rate": 7.725896827259613e-06, + "loss": 0.2912144958972931, + "step": 4478 + }, + { + "epoch": 1.1893506838401275, + "grad_norm": 1.0905445077469016, + "learning_rate": 7.72162132351348e-06, + "loss": 0.23867549002170563, + "step": 4479 + }, + { + "epoch": 1.1896162528216705, + "grad_norm": 1.1124853975848743, + "learning_rate": 7.717346259011888e-06, + "loss": 0.22434742748737335, + "step": 4480 + }, + { + "epoch": 1.1898818218032134, + "grad_norm": 1.2440839352544732, + "learning_rate": 7.713071634579017e-06, + "loss": 0.2504398822784424, + "step": 4481 + }, + { + "epoch": 1.1901473907847564, + "grad_norm": 1.1759629506533034, + "learning_rate": 7.70879745103896e-06, + "loss": 0.24887195229530334, + "step": 4482 + }, + { + "epoch": 1.1904129597662994, + "grad_norm": 1.2603454999195398, + "learning_rate": 7.704523709215732e-06, + "loss": 0.2730141580104828, + "step": 4483 + }, + { + "epoch": 1.1906785287478423, + "grad_norm": 1.2285382464481551, + "learning_rate": 7.70025040993326e-06, + "loss": 0.22197315096855164, + "step": 4484 + }, + { + "epoch": 1.1909440977293853, + "grad_norm": 1.2004564929121084, + "learning_rate": 7.695977554015387e-06, + "loss": 0.2852731943130493, + "step": 4485 + }, + { + "epoch": 1.1912096667109282, + "grad_norm": 1.2815387200597224, + "learning_rate": 7.691705142285863e-06, + "loss": 0.2577238976955414, + "step": 4486 + }, + { + "epoch": 1.1914752356924712, + "grad_norm": 1.066499567502605, + "learning_rate": 7.68743317556837e-06, + "loss": 0.23510503768920898, + "step": 4487 + }, + { + "epoch": 1.191740804674014, + "grad_norm": 1.557745891642732, + "learning_rate": 7.683161654686486e-06, + "loss": 0.2553985118865967, + "step": 4488 + }, + { + "epoch": 1.192006373655557, + "grad_norm": 1.1965147913981737, + "learning_rate": 7.67889058046371e-06, + "loss": 0.2778642475605011, + "step": 4489 + }, + { + "epoch": 1.1922719426371, + "grad_norm": 1.1622951487110165, + "learning_rate": 7.674619953723455e-06, + "loss": 0.24740618467330933, + "step": 4490 + }, + { + "epoch": 1.192537511618643, + "grad_norm": 1.1598996003550786, + "learning_rate": 7.670349775289047e-06, + "loss": 0.2453901171684265, + "step": 4491 + }, + { + "epoch": 1.192803080600186, + "grad_norm": 1.1444233008842855, + "learning_rate": 7.666080045983726e-06, + "loss": 0.2336064875125885, + "step": 4492 + }, + { + "epoch": 1.1930686495817289, + "grad_norm": 1.18047841753512, + "learning_rate": 7.661810766630648e-06, + "loss": 0.2375800907611847, + "step": 4493 + }, + { + "epoch": 1.1933342185632718, + "grad_norm": 1.1241813274405275, + "learning_rate": 7.657541938052876e-06, + "loss": 0.21272733807563782, + "step": 4494 + }, + { + "epoch": 1.1935997875448148, + "grad_norm": 1.1531042348696576, + "learning_rate": 7.65327356107339e-06, + "loss": 0.26597708463668823, + "step": 4495 + }, + { + "epoch": 1.1938653565263577, + "grad_norm": 1.1715955143508257, + "learning_rate": 7.649005636515088e-06, + "loss": 0.267806738615036, + "step": 4496 + }, + { + "epoch": 1.1941309255079007, + "grad_norm": 1.1812545197713797, + "learning_rate": 7.64473816520077e-06, + "loss": 0.2260194569826126, + "step": 4497 + }, + { + "epoch": 1.1943964944894436, + "grad_norm": 1.298416110387325, + "learning_rate": 7.640471147953157e-06, + "loss": 0.24523532390594482, + "step": 4498 + }, + { + "epoch": 1.1946620634709866, + "grad_norm": 1.1020194586485352, + "learning_rate": 7.636204585594879e-06, + "loss": 0.23230910301208496, + "step": 4499 + }, + { + "epoch": 1.1949276324525295, + "grad_norm": 1.1141631171804318, + "learning_rate": 7.631938478948478e-06, + "loss": 0.23322705924510956, + "step": 4500 + }, + { + "epoch": 1.1951932014340725, + "grad_norm": 1.3011711597097497, + "learning_rate": 7.6276728288364086e-06, + "loss": 0.25614386796951294, + "step": 4501 + }, + { + "epoch": 1.1954587704156154, + "grad_norm": 1.2188058731839337, + "learning_rate": 7.62340763608104e-06, + "loss": 0.22921821475028992, + "step": 4502 + }, + { + "epoch": 1.1957243393971584, + "grad_norm": 1.1538976889459698, + "learning_rate": 7.619142901504649e-06, + "loss": 0.25528913736343384, + "step": 4503 + }, + { + "epoch": 1.1959899083787013, + "grad_norm": 1.1730292690453887, + "learning_rate": 7.614878625929425e-06, + "loss": 0.2528502643108368, + "step": 4504 + }, + { + "epoch": 1.1962554773602443, + "grad_norm": 1.2636827238002009, + "learning_rate": 7.610614810177474e-06, + "loss": 0.2519027590751648, + "step": 4505 + }, + { + "epoch": 1.1965210463417872, + "grad_norm": 1.3563109831905724, + "learning_rate": 7.606351455070808e-06, + "loss": 0.2895655333995819, + "step": 4506 + }, + { + "epoch": 1.1967866153233302, + "grad_norm": 1.2317858842714817, + "learning_rate": 7.6020885614313515e-06, + "loss": 0.24588793516159058, + "step": 4507 + }, + { + "epoch": 1.1970521843048731, + "grad_norm": 1.3148149004868621, + "learning_rate": 7.597826130080938e-06, + "loss": 0.2996830940246582, + "step": 4508 + }, + { + "epoch": 1.197317753286416, + "grad_norm": 1.2289139982746875, + "learning_rate": 7.593564161841318e-06, + "loss": 0.2654343247413635, + "step": 4509 + }, + { + "epoch": 1.197583322267959, + "grad_norm": 1.2104660234722762, + "learning_rate": 7.589302657534144e-06, + "loss": 0.24949109554290771, + "step": 4510 + }, + { + "epoch": 1.197848891249502, + "grad_norm": 1.1785955409512114, + "learning_rate": 7.5850416179809886e-06, + "loss": 0.23205731809139252, + "step": 4511 + }, + { + "epoch": 1.198114460231045, + "grad_norm": 3.351023225066079, + "learning_rate": 7.580781044003324e-06, + "loss": 0.232904314994812, + "step": 4512 + }, + { + "epoch": 1.198380029212588, + "grad_norm": 1.0569352775404934, + "learning_rate": 7.576520936422542e-06, + "loss": 0.25071364641189575, + "step": 4513 + }, + { + "epoch": 1.1986455981941309, + "grad_norm": 1.3613643273685416, + "learning_rate": 7.572261296059944e-06, + "loss": 0.2574467658996582, + "step": 4514 + }, + { + "epoch": 1.1989111671756738, + "grad_norm": 1.1866331959407248, + "learning_rate": 7.568002123736735e-06, + "loss": 0.23134055733680725, + "step": 4515 + }, + { + "epoch": 1.1991767361572168, + "grad_norm": 1.093870770411857, + "learning_rate": 7.5637434202740334e-06, + "loss": 0.22163332998752594, + "step": 4516 + }, + { + "epoch": 1.1994423051387597, + "grad_norm": 1.182308432196374, + "learning_rate": 7.559485186492868e-06, + "loss": 0.2665749788284302, + "step": 4517 + }, + { + "epoch": 1.1997078741203027, + "grad_norm": 1.0758759053634162, + "learning_rate": 7.555227423214174e-06, + "loss": 0.2237103432416916, + "step": 4518 + }, + { + "epoch": 1.1999734431018456, + "grad_norm": 1.2216323349035507, + "learning_rate": 7.550970131258801e-06, + "loss": 0.23287461698055267, + "step": 4519 + }, + { + "epoch": 1.2002390120833886, + "grad_norm": 1.1237156855078405, + "learning_rate": 7.5467133114475025e-06, + "loss": 0.2296323925256729, + "step": 4520 + }, + { + "epoch": 1.2005045810649315, + "grad_norm": 1.0900498705064874, + "learning_rate": 7.542456964600944e-06, + "loss": 0.21358339488506317, + "step": 4521 + }, + { + "epoch": 1.2007701500464747, + "grad_norm": 1.2516498821908515, + "learning_rate": 7.5382010915396954e-06, + "loss": 0.2355872094631195, + "step": 4522 + }, + { + "epoch": 1.2010357190280176, + "grad_norm": 1.2039029354448443, + "learning_rate": 7.5339456930842455e-06, + "loss": 0.25397661328315735, + "step": 4523 + }, + { + "epoch": 1.2013012880095606, + "grad_norm": 1.1762399479435963, + "learning_rate": 7.52969077005498e-06, + "loss": 0.26658257842063904, + "step": 4524 + }, + { + "epoch": 1.2015668569911035, + "grad_norm": 1.1889790145170218, + "learning_rate": 7.525436323272201e-06, + "loss": 0.27207136154174805, + "step": 4525 + }, + { + "epoch": 1.2018324259726465, + "grad_norm": 1.1867510172835751, + "learning_rate": 7.521182353556114e-06, + "loss": 0.25889313220977783, + "step": 4526 + }, + { + "epoch": 1.2020979949541895, + "grad_norm": 1.3095753328357655, + "learning_rate": 7.516928861726834e-06, + "loss": 0.272185742855072, + "step": 4527 + }, + { + "epoch": 1.2023635639357324, + "grad_norm": 1.156226984644319, + "learning_rate": 7.512675848604385e-06, + "loss": 0.25371503829956055, + "step": 4528 + }, + { + "epoch": 1.2026291329172754, + "grad_norm": 1.2028831911106082, + "learning_rate": 7.5084233150086964e-06, + "loss": 0.2554902732372284, + "step": 4529 + }, + { + "epoch": 1.2028947018988183, + "grad_norm": 1.1714528701705076, + "learning_rate": 7.50417126175961e-06, + "loss": 0.22007369995117188, + "step": 4530 + }, + { + "epoch": 1.2031602708803613, + "grad_norm": 1.2057968317835202, + "learning_rate": 7.499919689676861e-06, + "loss": 0.27492445707321167, + "step": 4531 + }, + { + "epoch": 1.2034258398619042, + "grad_norm": 1.1229280499713745, + "learning_rate": 7.4956685995801144e-06, + "loss": 0.2321021854877472, + "step": 4532 + }, + { + "epoch": 1.2036914088434472, + "grad_norm": 1.1735641467762012, + "learning_rate": 7.491417992288927e-06, + "loss": 0.25410759449005127, + "step": 4533 + }, + { + "epoch": 1.2039569778249901, + "grad_norm": 1.0638924164212193, + "learning_rate": 7.487167868622765e-06, + "loss": 0.2080576866865158, + "step": 4534 + }, + { + "epoch": 1.204222546806533, + "grad_norm": 1.115815492341061, + "learning_rate": 7.482918229401001e-06, + "loss": 0.2333327978849411, + "step": 4535 + }, + { + "epoch": 1.204488115788076, + "grad_norm": 1.1999209092526242, + "learning_rate": 7.478669075442917e-06, + "loss": 0.23160479962825775, + "step": 4536 + }, + { + "epoch": 1.204753684769619, + "grad_norm": 1.2136747509439494, + "learning_rate": 7.474420407567699e-06, + "loss": 0.2627696394920349, + "step": 4537 + }, + { + "epoch": 1.205019253751162, + "grad_norm": 1.0694648198090266, + "learning_rate": 7.470172226594441e-06, + "loss": 0.18656940758228302, + "step": 4538 + }, + { + "epoch": 1.2052848227327049, + "grad_norm": 1.2245138263513848, + "learning_rate": 7.465924533342139e-06, + "loss": 0.2749083340167999, + "step": 4539 + }, + { + "epoch": 1.2055503917142478, + "grad_norm": 1.3944907322006155, + "learning_rate": 7.461677328629696e-06, + "loss": 0.27484387159347534, + "step": 4540 + }, + { + "epoch": 1.2058159606957908, + "grad_norm": 1.254197138569937, + "learning_rate": 7.457430613275934e-06, + "loss": 0.26357588171958923, + "step": 4541 + }, + { + "epoch": 1.2060815296773337, + "grad_norm": 1.2004336778554112, + "learning_rate": 7.453184388099559e-06, + "loss": 0.23495343327522278, + "step": 4542 + }, + { + "epoch": 1.2063470986588767, + "grad_norm": 1.2123259782755003, + "learning_rate": 7.4489386539192e-06, + "loss": 0.253970205783844, + "step": 4543 + }, + { + "epoch": 1.2066126676404196, + "grad_norm": 1.1523820852778563, + "learning_rate": 7.444693411553383e-06, + "loss": 0.24919062852859497, + "step": 4544 + }, + { + "epoch": 1.2068782366219626, + "grad_norm": 1.2181666045865969, + "learning_rate": 7.440448661820536e-06, + "loss": 0.24373450875282288, + "step": 4545 + }, + { + "epoch": 1.2071438056035055, + "grad_norm": 1.3762501451890354, + "learning_rate": 7.436204405539002e-06, + "loss": 0.24739482998847961, + "step": 4546 + }, + { + "epoch": 1.2074093745850485, + "grad_norm": 1.2982074074943253, + "learning_rate": 7.4319606435270195e-06, + "loss": 0.27041494846343994, + "step": 4547 + }, + { + "epoch": 1.2076749435665914, + "grad_norm": 1.1359942984852744, + "learning_rate": 7.427717376602739e-06, + "loss": 0.23243938386440277, + "step": 4548 + }, + { + "epoch": 1.2079405125481344, + "grad_norm": 1.3118758722508392, + "learning_rate": 7.423474605584206e-06, + "loss": 0.2346343696117401, + "step": 4549 + }, + { + "epoch": 1.2082060815296773, + "grad_norm": 1.1819354183035133, + "learning_rate": 7.419232331289385e-06, + "loss": 0.2587367296218872, + "step": 4550 + }, + { + "epoch": 1.2084716505112203, + "grad_norm": 1.195922174249915, + "learning_rate": 7.414990554536134e-06, + "loss": 0.2552938461303711, + "step": 4551 + }, + { + "epoch": 1.2087372194927632, + "grad_norm": 1.2688216449772127, + "learning_rate": 7.410749276142221e-06, + "loss": 0.2693648040294647, + "step": 4552 + }, + { + "epoch": 1.2090027884743062, + "grad_norm": 1.1997939452425357, + "learning_rate": 7.406508496925307e-06, + "loss": 0.21543294191360474, + "step": 4553 + }, + { + "epoch": 1.2092683574558492, + "grad_norm": 1.2385892147047024, + "learning_rate": 7.402268217702966e-06, + "loss": 0.2913009524345398, + "step": 4554 + }, + { + "epoch": 1.209533926437392, + "grad_norm": 1.0671356100150298, + "learning_rate": 7.398028439292675e-06, + "loss": 0.23279520869255066, + "step": 4555 + }, + { + "epoch": 1.209799495418935, + "grad_norm": 1.0946575444558022, + "learning_rate": 7.393789162511815e-06, + "loss": 0.25086939334869385, + "step": 4556 + }, + { + "epoch": 1.210065064400478, + "grad_norm": 1.0964890001200192, + "learning_rate": 7.389550388177662e-06, + "loss": 0.21704714000225067, + "step": 4557 + }, + { + "epoch": 1.210330633382021, + "grad_norm": 1.126699331966135, + "learning_rate": 7.3853121171074115e-06, + "loss": 0.230219304561615, + "step": 4558 + }, + { + "epoch": 1.210596202363564, + "grad_norm": 1.1809668678269754, + "learning_rate": 7.381074350118149e-06, + "loss": 0.26073017716407776, + "step": 4559 + }, + { + "epoch": 1.2108617713451069, + "grad_norm": 1.2065072762311946, + "learning_rate": 7.376837088026863e-06, + "loss": 0.25186216831207275, + "step": 4560 + }, + { + "epoch": 1.2111273403266498, + "grad_norm": 1.3978877577958326, + "learning_rate": 7.372600331650449e-06, + "loss": 0.28719040751457214, + "step": 4561 + }, + { + "epoch": 1.2113929093081928, + "grad_norm": 1.16073083909203, + "learning_rate": 7.368364081805704e-06, + "loss": 0.23972755670547485, + "step": 4562 + }, + { + "epoch": 1.2116584782897357, + "grad_norm": 1.096919114864748, + "learning_rate": 7.364128339309326e-06, + "loss": 0.23053769767284393, + "step": 4563 + }, + { + "epoch": 1.2119240472712787, + "grad_norm": 1.2910615683085556, + "learning_rate": 7.359893104977917e-06, + "loss": 0.25124189257621765, + "step": 4564 + }, + { + "epoch": 1.2121896162528216, + "grad_norm": 1.1863697592423188, + "learning_rate": 7.355658379627981e-06, + "loss": 0.2243686318397522, + "step": 4565 + }, + { + "epoch": 1.2124551852343646, + "grad_norm": 1.244591161752608, + "learning_rate": 7.3514241640759175e-06, + "loss": 0.26047343015670776, + "step": 4566 + }, + { + "epoch": 1.2127207542159075, + "grad_norm": 1.1775978450301259, + "learning_rate": 7.3471904591380434e-06, + "loss": 0.23603469133377075, + "step": 4567 + }, + { + "epoch": 1.2129863231974505, + "grad_norm": 1.2261707581126196, + "learning_rate": 7.342957265630561e-06, + "loss": 0.31320711970329285, + "step": 4568 + }, + { + "epoch": 1.2132518921789934, + "grad_norm": 1.22464158648852, + "learning_rate": 7.338724584369581e-06, + "loss": 0.22159788012504578, + "step": 4569 + }, + { + "epoch": 1.2135174611605364, + "grad_norm": 1.1206153371836056, + "learning_rate": 7.334492416171114e-06, + "loss": 0.21992239356040955, + "step": 4570 + }, + { + "epoch": 1.2137830301420793, + "grad_norm": 1.3229661253734524, + "learning_rate": 7.330260761851071e-06, + "loss": 0.20708827674388885, + "step": 4571 + }, + { + "epoch": 1.2140485991236223, + "grad_norm": 1.1899658624900848, + "learning_rate": 7.326029622225269e-06, + "loss": 0.2846507132053375, + "step": 4572 + }, + { + "epoch": 1.2143141681051652, + "grad_norm": 1.2218224134688922, + "learning_rate": 7.321798998109417e-06, + "loss": 0.24903801083564758, + "step": 4573 + }, + { + "epoch": 1.2145797370867082, + "grad_norm": 1.1817295734811926, + "learning_rate": 7.317568890319134e-06, + "loss": 0.23426681756973267, + "step": 4574 + }, + { + "epoch": 1.2148453060682511, + "grad_norm": 1.1685993771040228, + "learning_rate": 7.31333929966993e-06, + "loss": 0.2374490350484848, + "step": 4575 + }, + { + "epoch": 1.215110875049794, + "grad_norm": 1.13335327598736, + "learning_rate": 7.309110226977223e-06, + "loss": 0.24035832285881042, + "step": 4576 + }, + { + "epoch": 1.215376444031337, + "grad_norm": 1.2837405582571324, + "learning_rate": 7.30488167305633e-06, + "loss": 0.21872258186340332, + "step": 4577 + }, + { + "epoch": 1.21564201301288, + "grad_norm": 1.3425258296129825, + "learning_rate": 7.300653638722463e-06, + "loss": 0.2940255403518677, + "step": 4578 + }, + { + "epoch": 1.215907581994423, + "grad_norm": 1.1158795437619367, + "learning_rate": 7.29642612479074e-06, + "loss": 0.20970892906188965, + "step": 4579 + }, + { + "epoch": 1.216173150975966, + "grad_norm": 1.1571301789790744, + "learning_rate": 7.292199132076175e-06, + "loss": 0.21217449009418488, + "step": 4580 + }, + { + "epoch": 1.2164387199575089, + "grad_norm": 1.2448503896532135, + "learning_rate": 7.28797266139368e-06, + "loss": 0.2463359832763672, + "step": 4581 + }, + { + "epoch": 1.2167042889390518, + "grad_norm": 1.132320428820701, + "learning_rate": 7.283746713558071e-06, + "loss": 0.21921415627002716, + "step": 4582 + }, + { + "epoch": 1.2169698579205948, + "grad_norm": 1.2437376760058587, + "learning_rate": 7.279521289384059e-06, + "loss": 0.2412380576133728, + "step": 4583 + }, + { + "epoch": 1.2172354269021377, + "grad_norm": 1.180878934188553, + "learning_rate": 7.275296389686258e-06, + "loss": 0.2558564245700836, + "step": 4584 + }, + { + "epoch": 1.2175009958836809, + "grad_norm": 1.2566060880081307, + "learning_rate": 7.271072015279179e-06, + "loss": 0.2548869848251343, + "step": 4585 + }, + { + "epoch": 1.2177665648652238, + "grad_norm": 1.4407566508510072, + "learning_rate": 7.2668481669772304e-06, + "loss": 0.22183407843112946, + "step": 4586 + }, + { + "epoch": 1.2180321338467668, + "grad_norm": 1.20165829214997, + "learning_rate": 7.262624845594721e-06, + "loss": 0.24722473323345184, + "step": 4587 + }, + { + "epoch": 1.2182977028283097, + "grad_norm": 1.190564524584547, + "learning_rate": 7.258402051945858e-06, + "loss": 0.2678988575935364, + "step": 4588 + }, + { + "epoch": 1.2185632718098527, + "grad_norm": 1.187777405395345, + "learning_rate": 7.2541797868447435e-06, + "loss": 0.2116469144821167, + "step": 4589 + }, + { + "epoch": 1.2188288407913956, + "grad_norm": 1.2500071795758152, + "learning_rate": 7.249958051105383e-06, + "loss": 0.23897933959960938, + "step": 4590 + }, + { + "epoch": 1.2190944097729386, + "grad_norm": 1.2473885744661077, + "learning_rate": 7.245736845541676e-06, + "loss": 0.25434061884880066, + "step": 4591 + }, + { + "epoch": 1.2193599787544815, + "grad_norm": 1.2108382272450464, + "learning_rate": 7.2415161709674235e-06, + "loss": 0.2602628469467163, + "step": 4592 + }, + { + "epoch": 1.2196255477360245, + "grad_norm": 3.1633443202169764, + "learning_rate": 7.2372960281963165e-06, + "loss": 0.2519065737724304, + "step": 4593 + }, + { + "epoch": 1.2198911167175674, + "grad_norm": 1.550903602515833, + "learning_rate": 7.233076418041954e-06, + "loss": 0.24404102563858032, + "step": 4594 + }, + { + "epoch": 1.2201566856991104, + "grad_norm": 1.1561711817096534, + "learning_rate": 7.228857341317825e-06, + "loss": 0.23633979260921478, + "step": 4595 + }, + { + "epoch": 1.2204222546806534, + "grad_norm": 1.2128002082313463, + "learning_rate": 7.224638798837319e-06, + "loss": 0.2513781189918518, + "step": 4596 + }, + { + "epoch": 1.2206878236621963, + "grad_norm": 1.2409533600026899, + "learning_rate": 7.220420791413721e-06, + "loss": 0.23270189762115479, + "step": 4597 + }, + { + "epoch": 1.2209533926437393, + "grad_norm": 1.2503409564498669, + "learning_rate": 7.21620331986021e-06, + "loss": 0.2770010530948639, + "step": 4598 + }, + { + "epoch": 1.2212189616252822, + "grad_norm": 1.1284522462719728, + "learning_rate": 7.2119863849898684e-06, + "loss": 0.2312745451927185, + "step": 4599 + }, + { + "epoch": 1.2214845306068252, + "grad_norm": 1.2725314186948387, + "learning_rate": 7.20776998761567e-06, + "loss": 0.231276735663414, + "step": 4600 + }, + { + "epoch": 1.221750099588368, + "grad_norm": 1.1715742737590393, + "learning_rate": 7.203554128550486e-06, + "loss": 0.24927708506584167, + "step": 4601 + }, + { + "epoch": 1.222015668569911, + "grad_norm": 1.1138441718661785, + "learning_rate": 7.199338808607084e-06, + "loss": 0.23033373057842255, + "step": 4602 + }, + { + "epoch": 1.222281237551454, + "grad_norm": 1.2545098885673684, + "learning_rate": 7.195124028598131e-06, + "loss": 0.24003425240516663, + "step": 4603 + }, + { + "epoch": 1.222546806532997, + "grad_norm": 1.1872708193619057, + "learning_rate": 7.190909789336185e-06, + "loss": 0.22648809850215912, + "step": 4604 + }, + { + "epoch": 1.22281237551454, + "grad_norm": 1.2511860493227276, + "learning_rate": 7.1866960916337006e-06, + "loss": 0.2605816125869751, + "step": 4605 + }, + { + "epoch": 1.2230779444960829, + "grad_norm": 1.1424629632361756, + "learning_rate": 7.1824829363030305e-06, + "loss": 0.21549202501773834, + "step": 4606 + }, + { + "epoch": 1.2233435134776258, + "grad_norm": 1.1532084986944064, + "learning_rate": 7.17827032415642e-06, + "loss": 0.23113220930099487, + "step": 4607 + }, + { + "epoch": 1.2236090824591688, + "grad_norm": 1.1649312720163907, + "learning_rate": 7.174058256006012e-06, + "loss": 0.22736643254756927, + "step": 4608 + }, + { + "epoch": 1.2238746514407117, + "grad_norm": 1.172011833362534, + "learning_rate": 7.169846732663845e-06, + "loss": 0.2686663866043091, + "step": 4609 + }, + { + "epoch": 1.2241402204222547, + "grad_norm": 1.1555217624379808, + "learning_rate": 7.1656357549418485e-06, + "loss": 0.1980462670326233, + "step": 4610 + }, + { + "epoch": 1.2244057894037976, + "grad_norm": 1.2401629806715768, + "learning_rate": 7.161425323651846e-06, + "loss": 0.22997641563415527, + "step": 4611 + }, + { + "epoch": 1.2246713583853406, + "grad_norm": 1.3367939845671126, + "learning_rate": 7.157215439605567e-06, + "loss": 0.28781357407569885, + "step": 4612 + }, + { + "epoch": 1.2249369273668835, + "grad_norm": 1.2895382897388425, + "learning_rate": 7.153006103614624e-06, + "loss": 0.22558270394802094, + "step": 4613 + }, + { + "epoch": 1.2252024963484265, + "grad_norm": 1.1860196927831441, + "learning_rate": 7.148797316490527e-06, + "loss": 0.2435922622680664, + "step": 4614 + }, + { + "epoch": 1.2254680653299694, + "grad_norm": 1.2828543438888096, + "learning_rate": 7.14458907904468e-06, + "loss": 0.27840936183929443, + "step": 4615 + }, + { + "epoch": 1.2257336343115124, + "grad_norm": 1.2350405670943831, + "learning_rate": 7.1403813920883825e-06, + "loss": 0.2775651812553406, + "step": 4616 + }, + { + "epoch": 1.2259992032930553, + "grad_norm": 1.2738452228129284, + "learning_rate": 7.136174256432828e-06, + "loss": 0.2430988848209381, + "step": 4617 + }, + { + "epoch": 1.2262647722745983, + "grad_norm": 1.0618083363199646, + "learning_rate": 7.131967672889101e-06, + "loss": 0.2018759697675705, + "step": 4618 + }, + { + "epoch": 1.2265303412561412, + "grad_norm": 1.2320094058432127, + "learning_rate": 7.127761642268179e-06, + "loss": 0.25314825773239136, + "step": 4619 + }, + { + "epoch": 1.2267959102376842, + "grad_norm": 1.409693024729639, + "learning_rate": 7.123556165380935e-06, + "loss": 0.2542746365070343, + "step": 4620 + }, + { + "epoch": 1.2270614792192271, + "grad_norm": 1.2571649384815597, + "learning_rate": 7.119351243038142e-06, + "loss": 0.2912300229072571, + "step": 4621 + }, + { + "epoch": 1.22732704820077, + "grad_norm": 1.3877507856901592, + "learning_rate": 7.115146876050454e-06, + "loss": 0.26893284916877747, + "step": 4622 + }, + { + "epoch": 1.227592617182313, + "grad_norm": 1.3833428208823224, + "learning_rate": 7.110943065228425e-06, + "loss": 0.2711215317249298, + "step": 4623 + }, + { + "epoch": 1.227858186163856, + "grad_norm": 1.346165350849743, + "learning_rate": 7.106739811382501e-06, + "loss": 0.25530266761779785, + "step": 4624 + }, + { + "epoch": 1.228123755145399, + "grad_norm": 1.268299981159743, + "learning_rate": 7.102537115323018e-06, + "loss": 0.2547178864479065, + "step": 4625 + }, + { + "epoch": 1.228389324126942, + "grad_norm": 1.5802606545447795, + "learning_rate": 7.0983349778602064e-06, + "loss": 0.27973634004592896, + "step": 4626 + }, + { + "epoch": 1.2286548931084849, + "grad_norm": 1.205257873334912, + "learning_rate": 7.0941333998041884e-06, + "loss": 0.24066339433193207, + "step": 4627 + }, + { + "epoch": 1.2289204620900278, + "grad_norm": 1.1798307734371165, + "learning_rate": 7.0899323819649816e-06, + "loss": 0.24305742979049683, + "step": 4628 + }, + { + "epoch": 1.2291860310715708, + "grad_norm": 1.163221794708842, + "learning_rate": 7.085731925152484e-06, + "loss": 0.22478783130645752, + "step": 4629 + }, + { + "epoch": 1.2294516000531137, + "grad_norm": 1.1812808698189172, + "learning_rate": 7.081532030176506e-06, + "loss": 0.24995659291744232, + "step": 4630 + }, + { + "epoch": 1.2297171690346567, + "grad_norm": 1.1575900439946216, + "learning_rate": 7.077332697846733e-06, + "loss": 0.2579454183578491, + "step": 4631 + }, + { + "epoch": 1.2299827380161996, + "grad_norm": 1.2378373931288529, + "learning_rate": 7.073133928972745e-06, + "loss": 0.2513299286365509, + "step": 4632 + }, + { + "epoch": 1.2302483069977426, + "grad_norm": 1.0751310135047412, + "learning_rate": 7.068935724364016e-06, + "loss": 0.23344315588474274, + "step": 4633 + }, + { + "epoch": 1.2305138759792857, + "grad_norm": 1.1882346043976466, + "learning_rate": 7.064738084829912e-06, + "loss": 0.26750341057777405, + "step": 4634 + }, + { + "epoch": 1.2307794449608287, + "grad_norm": 1.1622882344241228, + "learning_rate": 7.0605410111796855e-06, + "loss": 0.22424373030662537, + "step": 4635 + }, + { + "epoch": 1.2310450139423716, + "grad_norm": 1.0711348851881108, + "learning_rate": 7.056344504222485e-06, + "loss": 0.24261844158172607, + "step": 4636 + }, + { + "epoch": 1.2313105829239146, + "grad_norm": 1.1382788327638453, + "learning_rate": 7.052148564767347e-06, + "loss": 0.22273704409599304, + "step": 4637 + }, + { + "epoch": 1.2315761519054576, + "grad_norm": 1.217398110209698, + "learning_rate": 7.047953193623195e-06, + "loss": 0.23726603388786316, + "step": 4638 + }, + { + "epoch": 1.2318417208870005, + "grad_norm": 1.1961933626954258, + "learning_rate": 7.043758391598856e-06, + "loss": 0.2612340748310089, + "step": 4639 + }, + { + "epoch": 1.2321072898685435, + "grad_norm": 1.3828917417203295, + "learning_rate": 7.039564159503034e-06, + "loss": 0.25722867250442505, + "step": 4640 + }, + { + "epoch": 1.2323728588500864, + "grad_norm": 1.2106898963951274, + "learning_rate": 7.035370498144325e-06, + "loss": 0.25940731167793274, + "step": 4641 + }, + { + "epoch": 1.2326384278316294, + "grad_norm": 1.1431229158704634, + "learning_rate": 7.03117740833122e-06, + "loss": 0.2328685224056244, + "step": 4642 + }, + { + "epoch": 1.2329039968131723, + "grad_norm": 1.360549509974518, + "learning_rate": 7.0269848908720965e-06, + "loss": 0.3019352853298187, + "step": 4643 + }, + { + "epoch": 1.2331695657947153, + "grad_norm": 1.370123584713732, + "learning_rate": 7.022792946575222e-06, + "loss": 0.2665002942085266, + "step": 4644 + }, + { + "epoch": 1.2334351347762582, + "grad_norm": 1.2172549009924116, + "learning_rate": 7.018601576248755e-06, + "loss": 0.2425101399421692, + "step": 4645 + }, + { + "epoch": 1.2337007037578012, + "grad_norm": 1.2088470091841177, + "learning_rate": 7.014410780700743e-06, + "loss": 0.23319771885871887, + "step": 4646 + }, + { + "epoch": 1.2339662727393441, + "grad_norm": 1.1714631765087196, + "learning_rate": 7.010220560739116e-06, + "loss": 0.23033195734024048, + "step": 4647 + }, + { + "epoch": 1.234231841720887, + "grad_norm": 1.211199620492339, + "learning_rate": 7.006030917171707e-06, + "loss": 0.24682006239891052, + "step": 4648 + }, + { + "epoch": 1.23449741070243, + "grad_norm": 1.2881207045369418, + "learning_rate": 7.001841850806228e-06, + "loss": 0.25566285848617554, + "step": 4649 + }, + { + "epoch": 1.234762979683973, + "grad_norm": 1.32329780476303, + "learning_rate": 6.9976533624502784e-06, + "loss": 0.2791779339313507, + "step": 4650 + }, + { + "epoch": 1.235028548665516, + "grad_norm": 1.3093366388831746, + "learning_rate": 6.993465452911352e-06, + "loss": 0.25597846508026123, + "step": 4651 + }, + { + "epoch": 1.2352941176470589, + "grad_norm": 1.197170425293823, + "learning_rate": 6.9892781229968275e-06, + "loss": 0.24034728109836578, + "step": 4652 + }, + { + "epoch": 1.2355596866286018, + "grad_norm": 1.2583607623295634, + "learning_rate": 6.985091373513972e-06, + "loss": 0.2209509015083313, + "step": 4653 + }, + { + "epoch": 1.2358252556101448, + "grad_norm": 1.298261075070858, + "learning_rate": 6.980905205269942e-06, + "loss": 0.29106947779655457, + "step": 4654 + }, + { + "epoch": 1.2360908245916877, + "grad_norm": 1.226505577270481, + "learning_rate": 6.976719619071782e-06, + "loss": 0.24014753103256226, + "step": 4655 + }, + { + "epoch": 1.2363563935732307, + "grad_norm": 1.2297022971330018, + "learning_rate": 6.972534615726422e-06, + "loss": 0.27135470509529114, + "step": 4656 + }, + { + "epoch": 1.2366219625547736, + "grad_norm": 1.2219120714336154, + "learning_rate": 6.968350196040683e-06, + "loss": 0.23386257886886597, + "step": 4657 + }, + { + "epoch": 1.2368875315363166, + "grad_norm": 1.1452987159774544, + "learning_rate": 6.964166360821271e-06, + "loss": 0.23119661211967468, + "step": 4658 + }, + { + "epoch": 1.2371531005178595, + "grad_norm": 1.1767967288021879, + "learning_rate": 6.959983110874782e-06, + "loss": 0.2399922013282776, + "step": 4659 + }, + { + "epoch": 1.2374186694994025, + "grad_norm": 1.0521231856668218, + "learning_rate": 6.9558004470076944e-06, + "loss": 0.18323534727096558, + "step": 4660 + }, + { + "epoch": 1.2376842384809454, + "grad_norm": 1.1985431375912965, + "learning_rate": 6.951618370026378e-06, + "loss": 0.25683268904685974, + "step": 4661 + }, + { + "epoch": 1.2379498074624884, + "grad_norm": 1.307367140627743, + "learning_rate": 6.947436880737089e-06, + "loss": 0.2861499786376953, + "step": 4662 + }, + { + "epoch": 1.2382153764440313, + "grad_norm": 1.3831407282476516, + "learning_rate": 6.943255979945965e-06, + "loss": 0.28021398186683655, + "step": 4663 + }, + { + "epoch": 1.2384809454255743, + "grad_norm": 1.2940713851528283, + "learning_rate": 6.939075668459039e-06, + "loss": 0.2739776074886322, + "step": 4664 + }, + { + "epoch": 1.2387465144071172, + "grad_norm": 1.3433235944815516, + "learning_rate": 6.934895947082221e-06, + "loss": 0.26015231013298035, + "step": 4665 + }, + { + "epoch": 1.2390120833886602, + "grad_norm": 1.3230400884249285, + "learning_rate": 6.930716816621317e-06, + "loss": 0.2572113871574402, + "step": 4666 + }, + { + "epoch": 1.2392776523702032, + "grad_norm": 1.266134559335497, + "learning_rate": 6.926538277882012e-06, + "loss": 0.24094708263874054, + "step": 4667 + }, + { + "epoch": 1.239543221351746, + "grad_norm": 1.1175335748548278, + "learning_rate": 6.92236033166988e-06, + "loss": 0.22803835570812225, + "step": 4668 + }, + { + "epoch": 1.239808790333289, + "grad_norm": 1.1198379137737728, + "learning_rate": 6.9181829787903774e-06, + "loss": 0.23672322928905487, + "step": 4669 + }, + { + "epoch": 1.240074359314832, + "grad_norm": 1.3356297624894082, + "learning_rate": 6.91400622004885e-06, + "loss": 0.2568579912185669, + "step": 4670 + }, + { + "epoch": 1.240339928296375, + "grad_norm": 1.1768710116388783, + "learning_rate": 6.909830056250527e-06, + "loss": 0.25267845392227173, + "step": 4671 + }, + { + "epoch": 1.240605497277918, + "grad_norm": 1.2702969549109802, + "learning_rate": 6.905654488200524e-06, + "loss": 0.30336999893188477, + "step": 4672 + }, + { + "epoch": 1.2408710662594609, + "grad_norm": 1.17710991443045, + "learning_rate": 6.901479516703842e-06, + "loss": 0.2741299867630005, + "step": 4673 + }, + { + "epoch": 1.2411366352410038, + "grad_norm": 1.276658372251755, + "learning_rate": 6.897305142565363e-06, + "loss": 0.2896823585033417, + "step": 4674 + }, + { + "epoch": 1.2414022042225468, + "grad_norm": 1.2718591233587666, + "learning_rate": 6.8931313665898625e-06, + "loss": 0.23102329671382904, + "step": 4675 + }, + { + "epoch": 1.2416677732040897, + "grad_norm": 1.3209479857777737, + "learning_rate": 6.8889581895819915e-06, + "loss": 0.2600775361061096, + "step": 4676 + }, + { + "epoch": 1.2419333421856327, + "grad_norm": 1.1932453661715805, + "learning_rate": 6.884785612346291e-06, + "loss": 0.23589132726192474, + "step": 4677 + }, + { + "epoch": 1.2421989111671756, + "grad_norm": 1.155454248544126, + "learning_rate": 6.880613635687184e-06, + "loss": 0.24419361352920532, + "step": 4678 + }, + { + "epoch": 1.2424644801487186, + "grad_norm": 1.1323309321599895, + "learning_rate": 6.876442260408977e-06, + "loss": 0.23267227411270142, + "step": 4679 + }, + { + "epoch": 1.2427300491302615, + "grad_norm": 1.2244929254620942, + "learning_rate": 6.8722714873158635e-06, + "loss": 0.2507064938545227, + "step": 4680 + }, + { + "epoch": 1.2429956181118045, + "grad_norm": 1.2079227486812785, + "learning_rate": 6.868101317211922e-06, + "loss": 0.2529929280281067, + "step": 4681 + }, + { + "epoch": 1.2432611870933474, + "grad_norm": 1.1627205371245832, + "learning_rate": 6.863931750901107e-06, + "loss": 0.23255379498004913, + "step": 4682 + }, + { + "epoch": 1.2435267560748904, + "grad_norm": 1.1997195000446994, + "learning_rate": 6.859762789187259e-06, + "loss": 0.22757332026958466, + "step": 4683 + }, + { + "epoch": 1.2437923250564333, + "grad_norm": 1.2115398233652928, + "learning_rate": 6.8555944328741145e-06, + "loss": 0.2578364312648773, + "step": 4684 + }, + { + "epoch": 1.2440578940379763, + "grad_norm": 1.1854445431935166, + "learning_rate": 6.851426682765278e-06, + "loss": 0.27568408846855164, + "step": 4685 + }, + { + "epoch": 1.2443234630195192, + "grad_norm": 1.19754548578965, + "learning_rate": 6.847259539664244e-06, + "loss": 0.25595831871032715, + "step": 4686 + }, + { + "epoch": 1.2445890320010622, + "grad_norm": 1.1807617266458326, + "learning_rate": 6.843093004374386e-06, + "loss": 0.2195426970720291, + "step": 4687 + }, + { + "epoch": 1.2448546009826051, + "grad_norm": 1.1623631531241645, + "learning_rate": 6.838927077698967e-06, + "loss": 0.23247741162776947, + "step": 4688 + }, + { + "epoch": 1.245120169964148, + "grad_norm": 1.2953467781322094, + "learning_rate": 6.834761760441127e-06, + "loss": 0.26149916648864746, + "step": 4689 + }, + { + "epoch": 1.245385738945691, + "grad_norm": 1.1310243964126157, + "learning_rate": 6.830597053403885e-06, + "loss": 0.2521447241306305, + "step": 4690 + }, + { + "epoch": 1.245651307927234, + "grad_norm": 1.1803812700297758, + "learning_rate": 6.826432957390155e-06, + "loss": 0.23401981592178345, + "step": 4691 + }, + { + "epoch": 1.245916876908777, + "grad_norm": 1.3114713754211442, + "learning_rate": 6.822269473202714e-06, + "loss": 0.25341230630874634, + "step": 4692 + }, + { + "epoch": 1.24618244589032, + "grad_norm": 1.2025537581570156, + "learning_rate": 6.818106601644248e-06, + "loss": 0.2513907551765442, + "step": 4693 + }, + { + "epoch": 1.2464480148718629, + "grad_norm": 1.2263403478965602, + "learning_rate": 6.8139443435173005e-06, + "loss": 0.2682073414325714, + "step": 4694 + }, + { + "epoch": 1.2467135838534058, + "grad_norm": 1.1801313342439474, + "learning_rate": 6.809782699624308e-06, + "loss": 0.22726872563362122, + "step": 4695 + }, + { + "epoch": 1.2469791528349488, + "grad_norm": 1.3004812874511507, + "learning_rate": 6.805621670767588e-06, + "loss": 0.24184030294418335, + "step": 4696 + }, + { + "epoch": 1.247244721816492, + "grad_norm": 1.0395051535883466, + "learning_rate": 6.801461257749334e-06, + "loss": 0.203639417886734, + "step": 4697 + }, + { + "epoch": 1.2475102907980349, + "grad_norm": 1.1786557175840897, + "learning_rate": 6.797301461371626e-06, + "loss": 0.2170606106519699, + "step": 4698 + }, + { + "epoch": 1.2477758597795778, + "grad_norm": 1.1231113548110434, + "learning_rate": 6.7931422824364245e-06, + "loss": 0.2225056290626526, + "step": 4699 + }, + { + "epoch": 1.2480414287611208, + "grad_norm": 1.1702414518259399, + "learning_rate": 6.788983721745569e-06, + "loss": 0.2388974130153656, + "step": 4700 + }, + { + "epoch": 1.2483069977426637, + "grad_norm": 1.14649445863332, + "learning_rate": 6.784825780100776e-06, + "loss": 0.2291644811630249, + "step": 4701 + }, + { + "epoch": 1.2485725667242067, + "grad_norm": 1.3474164807852358, + "learning_rate": 6.7806684583036595e-06, + "loss": 0.23793739080429077, + "step": 4702 + }, + { + "epoch": 1.2488381357057496, + "grad_norm": 1.2839354787463726, + "learning_rate": 6.776511757155695e-06, + "loss": 0.2756902277469635, + "step": 4703 + }, + { + "epoch": 1.2491037046872926, + "grad_norm": 1.3039866822855, + "learning_rate": 6.772355677458249e-06, + "loss": 0.25046268105506897, + "step": 4704 + }, + { + "epoch": 1.2493692736688355, + "grad_norm": 1.3053078100109528, + "learning_rate": 6.7682002200125575e-06, + "loss": 0.238486647605896, + "step": 4705 + }, + { + "epoch": 1.2496348426503785, + "grad_norm": 1.1855651210182463, + "learning_rate": 6.764045385619751e-06, + "loss": 0.2366628348827362, + "step": 4706 + }, + { + "epoch": 1.2499004116319214, + "grad_norm": 1.21176387977239, + "learning_rate": 6.759891175080827e-06, + "loss": 0.24825221300125122, + "step": 4707 + }, + { + "epoch": 1.2501659806134644, + "grad_norm": 1.2922207381934139, + "learning_rate": 6.755737589196673e-06, + "loss": 0.2304186224937439, + "step": 4708 + }, + { + "epoch": 1.2504315495950074, + "grad_norm": 1.200468035859197, + "learning_rate": 6.7515846287680476e-06, + "loss": 0.2824471592903137, + "step": 4709 + }, + { + "epoch": 1.2506971185765503, + "grad_norm": 1.1994302764371214, + "learning_rate": 6.747432294595591e-06, + "loss": 0.23130697011947632, + "step": 4710 + }, + { + "epoch": 1.2509626875580933, + "grad_norm": 1.3183641444794993, + "learning_rate": 6.7432805874798334e-06, + "loss": 0.28371602296829224, + "step": 4711 + }, + { + "epoch": 1.2512282565396362, + "grad_norm": 1.1529924861272876, + "learning_rate": 6.739129508221167e-06, + "loss": 0.23452092707157135, + "step": 4712 + }, + { + "epoch": 1.2514938255211792, + "grad_norm": 1.245806995398341, + "learning_rate": 6.734979057619873e-06, + "loss": 0.22486859560012817, + "step": 4713 + }, + { + "epoch": 1.2517593945027221, + "grad_norm": 1.3481589110906722, + "learning_rate": 6.730829236476111e-06, + "loss": 0.2818532884120941, + "step": 4714 + }, + { + "epoch": 1.252024963484265, + "grad_norm": 1.172531442878329, + "learning_rate": 6.7266800455899125e-06, + "loss": 0.2060810923576355, + "step": 4715 + }, + { + "epoch": 1.252290532465808, + "grad_norm": 1.2183128764116598, + "learning_rate": 6.722531485761199e-06, + "loss": 0.2183244377374649, + "step": 4716 + }, + { + "epoch": 1.252556101447351, + "grad_norm": 1.2596677279915016, + "learning_rate": 6.71838355778976e-06, + "loss": 0.24757327139377594, + "step": 4717 + }, + { + "epoch": 1.252821670428894, + "grad_norm": 1.3267776765958388, + "learning_rate": 6.714236262475268e-06, + "loss": 0.3058333396911621, + "step": 4718 + }, + { + "epoch": 1.2530872394104369, + "grad_norm": 1.1893155452841293, + "learning_rate": 6.71008960061727e-06, + "loss": 0.24095620214939117, + "step": 4719 + }, + { + "epoch": 1.2533528083919798, + "grad_norm": 1.3050165159615794, + "learning_rate": 6.705943573015199e-06, + "loss": 0.25614839792251587, + "step": 4720 + }, + { + "epoch": 1.2536183773735228, + "grad_norm": 1.2537185610498753, + "learning_rate": 6.701798180468356e-06, + "loss": 0.22295254468917847, + "step": 4721 + }, + { + "epoch": 1.2538839463550657, + "grad_norm": 1.1724661677534984, + "learning_rate": 6.697653423775926e-06, + "loss": 0.24783796072006226, + "step": 4722 + }, + { + "epoch": 1.2541495153366087, + "grad_norm": 1.5676339911360846, + "learning_rate": 6.693509303736969e-06, + "loss": 0.19702200591564178, + "step": 4723 + }, + { + "epoch": 1.2544150843181516, + "grad_norm": 1.2713976115459882, + "learning_rate": 6.689365821150421e-06, + "loss": 0.2539074122905731, + "step": 4724 + }, + { + "epoch": 1.2546806532996946, + "grad_norm": 1.2015875463338734, + "learning_rate": 6.6852229768150976e-06, + "loss": 0.2480372041463852, + "step": 4725 + }, + { + "epoch": 1.2549462222812375, + "grad_norm": 1.1742876462412417, + "learning_rate": 6.68108077152969e-06, + "loss": 0.2231048047542572, + "step": 4726 + }, + { + "epoch": 1.2552117912627805, + "grad_norm": 1.1571308721577904, + "learning_rate": 6.676939206092766e-06, + "loss": 0.260783851146698, + "step": 4727 + }, + { + "epoch": 1.2554773602443234, + "grad_norm": 1.2569537102203152, + "learning_rate": 6.67279828130277e-06, + "loss": 0.24069254100322723, + "step": 4728 + }, + { + "epoch": 1.2557429292258664, + "grad_norm": 1.1732343490674524, + "learning_rate": 6.668657997958027e-06, + "loss": 0.2578867971897125, + "step": 4729 + }, + { + "epoch": 1.2560084982074093, + "grad_norm": 1.102080552368197, + "learning_rate": 6.664518356856732e-06, + "loss": 0.20724457502365112, + "step": 4730 + }, + { + "epoch": 1.2562740671889523, + "grad_norm": 1.1527224778451435, + "learning_rate": 6.6603793587969586e-06, + "loss": 0.23107580840587616, + "step": 4731 + }, + { + "epoch": 1.2565396361704952, + "grad_norm": 1.123633807819834, + "learning_rate": 6.656241004576659e-06, + "loss": 0.2481832504272461, + "step": 4732 + }, + { + "epoch": 1.2568052051520382, + "grad_norm": 1.1353422900728998, + "learning_rate": 6.652103294993657e-06, + "loss": 0.2219698578119278, + "step": 4733 + }, + { + "epoch": 1.2570707741335811, + "grad_norm": 1.1538807443087884, + "learning_rate": 6.647966230845655e-06, + "loss": 0.2245863974094391, + "step": 4734 + }, + { + "epoch": 1.257336343115124, + "grad_norm": 1.1991392114731283, + "learning_rate": 6.643829812930231e-06, + "loss": 0.2086387574672699, + "step": 4735 + }, + { + "epoch": 1.257601912096667, + "grad_norm": 1.1702949625685939, + "learning_rate": 6.6396940420448355e-06, + "loss": 0.23484499752521515, + "step": 4736 + }, + { + "epoch": 1.25786748107821, + "grad_norm": 1.1449620939429583, + "learning_rate": 6.635558918986797e-06, + "loss": 0.22011062502861023, + "step": 4737 + }, + { + "epoch": 1.258133050059753, + "grad_norm": 1.240312422577115, + "learning_rate": 6.631424444553319e-06, + "loss": 0.2426830381155014, + "step": 4738 + }, + { + "epoch": 1.258398619041296, + "grad_norm": 1.2472398676845469, + "learning_rate": 6.627290619541481e-06, + "loss": 0.2702174484729767, + "step": 4739 + }, + { + "epoch": 1.2586641880228389, + "grad_norm": 1.4005529994015682, + "learning_rate": 6.623157444748234e-06, + "loss": 0.26594820618629456, + "step": 4740 + }, + { + "epoch": 1.2589297570043818, + "grad_norm": 1.2550785934224764, + "learning_rate": 6.619024920970405e-06, + "loss": 0.2546013593673706, + "step": 4741 + }, + { + "epoch": 1.2591953259859248, + "grad_norm": 1.425429985784882, + "learning_rate": 6.614893049004696e-06, + "loss": 0.27207985520362854, + "step": 4742 + }, + { + "epoch": 1.259460894967468, + "grad_norm": 1.4445692953489113, + "learning_rate": 6.610761829647685e-06, + "loss": 0.2640937566757202, + "step": 4743 + }, + { + "epoch": 1.2597264639490109, + "grad_norm": 1.4095791296432063, + "learning_rate": 6.60663126369582e-06, + "loss": 0.2890278697013855, + "step": 4744 + }, + { + "epoch": 1.2599920329305538, + "grad_norm": 1.1225606468440805, + "learning_rate": 6.602501351945425e-06, + "loss": 0.24610492587089539, + "step": 4745 + }, + { + "epoch": 1.2602576019120968, + "grad_norm": 1.5273064552741338, + "learning_rate": 6.598372095192699e-06, + "loss": 0.24946746230125427, + "step": 4746 + }, + { + "epoch": 1.2605231708936397, + "grad_norm": 1.0546449518544165, + "learning_rate": 6.594243494233717e-06, + "loss": 0.2369944453239441, + "step": 4747 + }, + { + "epoch": 1.2607887398751827, + "grad_norm": 1.180556169492091, + "learning_rate": 6.590115549864421e-06, + "loss": 0.20980143547058105, + "step": 4748 + }, + { + "epoch": 1.2610543088567256, + "grad_norm": 1.1524244978042124, + "learning_rate": 6.5859882628806315e-06, + "loss": 0.22930344939231873, + "step": 4749 + }, + { + "epoch": 1.2613198778382686, + "grad_norm": 1.1353386909454481, + "learning_rate": 6.5818616340780405e-06, + "loss": 0.22352416813373566, + "step": 4750 + }, + { + "epoch": 1.2615854468198116, + "grad_norm": 1.0615225488277533, + "learning_rate": 6.577735664252214e-06, + "loss": 0.2049327939748764, + "step": 4751 + }, + { + "epoch": 1.2618510158013545, + "grad_norm": 1.3420243952278277, + "learning_rate": 6.573610354198587e-06, + "loss": 0.21858355402946472, + "step": 4752 + }, + { + "epoch": 1.2621165847828975, + "grad_norm": 1.1248247337478985, + "learning_rate": 6.5694857047124786e-06, + "loss": 0.225118950009346, + "step": 4753 + }, + { + "epoch": 1.2623821537644404, + "grad_norm": 1.1623337764465298, + "learning_rate": 6.565361716589063e-06, + "loss": 0.25780409574508667, + "step": 4754 + }, + { + "epoch": 1.2626477227459834, + "grad_norm": 1.1580907073042885, + "learning_rate": 6.5612383906233964e-06, + "loss": 0.23507939279079437, + "step": 4755 + }, + { + "epoch": 1.2629132917275263, + "grad_norm": 1.1733914893757196, + "learning_rate": 6.557115727610417e-06, + "loss": 0.27884477376937866, + "step": 4756 + }, + { + "epoch": 1.2631788607090693, + "grad_norm": 1.145599873702901, + "learning_rate": 6.552993728344921e-06, + "loss": 0.2564120888710022, + "step": 4757 + }, + { + "epoch": 1.2634444296906122, + "grad_norm": 1.3139857622357067, + "learning_rate": 6.548872393621578e-06, + "loss": 0.259651243686676, + "step": 4758 + }, + { + "epoch": 1.2637099986721552, + "grad_norm": 1.2930462493551071, + "learning_rate": 6.544751724234937e-06, + "loss": 0.23473814129829407, + "step": 4759 + }, + { + "epoch": 1.2639755676536981, + "grad_norm": 1.4411652435541018, + "learning_rate": 6.540631720979411e-06, + "loss": 0.2447129189968109, + "step": 4760 + }, + { + "epoch": 1.264241136635241, + "grad_norm": 1.1968236723875711, + "learning_rate": 6.536512384649294e-06, + "loss": 0.22695237398147583, + "step": 4761 + }, + { + "epoch": 1.264506705616784, + "grad_norm": 1.117214929215876, + "learning_rate": 6.532393716038738e-06, + "loss": 0.24303656816482544, + "step": 4762 + }, + { + "epoch": 1.264772274598327, + "grad_norm": 1.2106972269991043, + "learning_rate": 6.528275715941776e-06, + "loss": 0.23911908268928528, + "step": 4763 + }, + { + "epoch": 1.26503784357987, + "grad_norm": 1.0480584899589354, + "learning_rate": 6.524158385152309e-06, + "loss": 0.19766747951507568, + "step": 4764 + }, + { + "epoch": 1.2653034125614129, + "grad_norm": 1.390914844473808, + "learning_rate": 6.520041724464114e-06, + "loss": 0.24074134230613708, + "step": 4765 + }, + { + "epoch": 1.2655689815429558, + "grad_norm": 1.3379815630375766, + "learning_rate": 6.515925734670834e-06, + "loss": 0.27557867765426636, + "step": 4766 + }, + { + "epoch": 1.2658345505244988, + "grad_norm": 1.3286252957995823, + "learning_rate": 6.511810416565979e-06, + "loss": 0.24387787282466888, + "step": 4767 + }, + { + "epoch": 1.2661001195060417, + "grad_norm": 1.4234035593814256, + "learning_rate": 6.507695770942939e-06, + "loss": 0.27863091230392456, + "step": 4768 + }, + { + "epoch": 1.2663656884875847, + "grad_norm": 1.1364646133588507, + "learning_rate": 6.503581798594965e-06, + "loss": 0.23589591681957245, + "step": 4769 + }, + { + "epoch": 1.2666312574691276, + "grad_norm": 1.1932509985997282, + "learning_rate": 6.499468500315185e-06, + "loss": 0.22869807481765747, + "step": 4770 + }, + { + "epoch": 1.2668968264506706, + "grad_norm": 1.2498634762148577, + "learning_rate": 6.495355876896592e-06, + "loss": 0.2351568192243576, + "step": 4771 + }, + { + "epoch": 1.2671623954322135, + "grad_norm": 1.1271253337210285, + "learning_rate": 6.491243929132052e-06, + "loss": 0.2291228175163269, + "step": 4772 + }, + { + "epoch": 1.2674279644137565, + "grad_norm": 1.2013953219342957, + "learning_rate": 6.487132657814297e-06, + "loss": 0.23203743994235992, + "step": 4773 + }, + { + "epoch": 1.2676935333952994, + "grad_norm": 1.0887907712326863, + "learning_rate": 6.483022063735938e-06, + "loss": 0.22035656869411469, + "step": 4774 + }, + { + "epoch": 1.2679591023768424, + "grad_norm": 1.1270651148723736, + "learning_rate": 6.478912147689448e-06, + "loss": 0.21576716005802155, + "step": 4775 + }, + { + "epoch": 1.2682246713583853, + "grad_norm": 1.3174966546949713, + "learning_rate": 6.474802910467171e-06, + "loss": 0.27764660120010376, + "step": 4776 + }, + { + "epoch": 1.2684902403399283, + "grad_norm": 1.2418434137314485, + "learning_rate": 6.4706943528613135e-06, + "loss": 0.23715822398662567, + "step": 4777 + }, + { + "epoch": 1.2687558093214713, + "grad_norm": 1.1794293567561218, + "learning_rate": 6.4665864756639606e-06, + "loss": 0.27764302492141724, + "step": 4778 + }, + { + "epoch": 1.2690213783030142, + "grad_norm": 1.2157630211554828, + "learning_rate": 6.4624792796670624e-06, + "loss": 0.21634885668754578, + "step": 4779 + }, + { + "epoch": 1.2692869472845572, + "grad_norm": 1.2217447541656432, + "learning_rate": 6.458372765662438e-06, + "loss": 0.27262234687805176, + "step": 4780 + }, + { + "epoch": 1.2695525162661, + "grad_norm": 1.1716437260315133, + "learning_rate": 6.454266934441775e-06, + "loss": 0.2219458371400833, + "step": 4781 + }, + { + "epoch": 1.269818085247643, + "grad_norm": 1.2515340549821425, + "learning_rate": 6.450161786796625e-06, + "loss": 0.22181497514247894, + "step": 4782 + }, + { + "epoch": 1.270083654229186, + "grad_norm": 1.1858127036353512, + "learning_rate": 6.446057323518422e-06, + "loss": 0.22642338275909424, + "step": 4783 + }, + { + "epoch": 1.270349223210729, + "grad_norm": 1.2243357553110101, + "learning_rate": 6.441953545398451e-06, + "loss": 0.239711195230484, + "step": 4784 + }, + { + "epoch": 1.270614792192272, + "grad_norm": 1.29507599792429, + "learning_rate": 6.437850453227872e-06, + "loss": 0.2422255128622055, + "step": 4785 + }, + { + "epoch": 1.2708803611738149, + "grad_norm": 1.3013507424737665, + "learning_rate": 6.433748047797715e-06, + "loss": 0.23184439539909363, + "step": 4786 + }, + { + "epoch": 1.2711459301553578, + "grad_norm": 1.3032581886502261, + "learning_rate": 6.429646329898873e-06, + "loss": 0.2737428843975067, + "step": 4787 + }, + { + "epoch": 1.2714114991369008, + "grad_norm": 1.2565288812855064, + "learning_rate": 6.4255453003221115e-06, + "loss": 0.23565897345542908, + "step": 4788 + }, + { + "epoch": 1.2716770681184437, + "grad_norm": 1.3665497750328797, + "learning_rate": 6.421444959858059e-06, + "loss": 0.24349254369735718, + "step": 4789 + }, + { + "epoch": 1.2719426370999867, + "grad_norm": 1.2050219186384792, + "learning_rate": 6.4173453092972115e-06, + "loss": 0.2637769281864166, + "step": 4790 + }, + { + "epoch": 1.2722082060815296, + "grad_norm": 1.0381858832581394, + "learning_rate": 6.413246349429934e-06, + "loss": 0.21420228481292725, + "step": 4791 + }, + { + "epoch": 1.2724737750630726, + "grad_norm": 1.1333618917642097, + "learning_rate": 6.409148081046461e-06, + "loss": 0.25270405411720276, + "step": 4792 + }, + { + "epoch": 1.2727393440446155, + "grad_norm": 1.270676964933882, + "learning_rate": 6.405050504936887e-06, + "loss": 0.2710546851158142, + "step": 4793 + }, + { + "epoch": 1.2730049130261585, + "grad_norm": 1.1608891040490155, + "learning_rate": 6.400953621891178e-06, + "loss": 0.2388489842414856, + "step": 4794 + }, + { + "epoch": 1.2732704820077014, + "grad_norm": 1.1600463634666516, + "learning_rate": 6.396857432699164e-06, + "loss": 0.24581485986709595, + "step": 4795 + }, + { + "epoch": 1.2735360509892444, + "grad_norm": 1.18464881130754, + "learning_rate": 6.3927619381505404e-06, + "loss": 0.24219104647636414, + "step": 4796 + }, + { + "epoch": 1.2738016199707873, + "grad_norm": 1.0878857914267965, + "learning_rate": 6.388667139034873e-06, + "loss": 0.22722014784812927, + "step": 4797 + }, + { + "epoch": 1.2740671889523303, + "grad_norm": 1.275017638940232, + "learning_rate": 6.384573036141589e-06, + "loss": 0.25177234411239624, + "step": 4798 + }, + { + "epoch": 1.2743327579338732, + "grad_norm": 1.2824350948041237, + "learning_rate": 6.380479630259983e-06, + "loss": 0.2291412651538849, + "step": 4799 + }, + { + "epoch": 1.2745983269154162, + "grad_norm": 1.3215047708165757, + "learning_rate": 6.376386922179216e-06, + "loss": 0.2528606951236725, + "step": 4800 + }, + { + "epoch": 1.2748638958969591, + "grad_norm": 1.11001311385955, + "learning_rate": 6.372294912688315e-06, + "loss": 0.21383032202720642, + "step": 4801 + }, + { + "epoch": 1.275129464878502, + "grad_norm": 1.2162134010863295, + "learning_rate": 6.368203602576168e-06, + "loss": 0.2538087069988251, + "step": 4802 + }, + { + "epoch": 1.275395033860045, + "grad_norm": 1.2127822206191197, + "learning_rate": 6.364112992631537e-06, + "loss": 0.24437417089939117, + "step": 4803 + }, + { + "epoch": 1.275660602841588, + "grad_norm": 1.1678428848154245, + "learning_rate": 6.360023083643036e-06, + "loss": 0.2347753942012787, + "step": 4804 + }, + { + "epoch": 1.275926171823131, + "grad_norm": 1.226812886332051, + "learning_rate": 6.3559338763991576e-06, + "loss": 0.271645188331604, + "step": 4805 + }, + { + "epoch": 1.276191740804674, + "grad_norm": 1.2088165730060163, + "learning_rate": 6.35184537168825e-06, + "loss": 0.2465275228023529, + "step": 4806 + }, + { + "epoch": 1.2764573097862169, + "grad_norm": 1.216147524532817, + "learning_rate": 6.347757570298527e-06, + "loss": 0.26494044065475464, + "step": 4807 + }, + { + "epoch": 1.2767228787677598, + "grad_norm": 3.360286997098956, + "learning_rate": 6.343670473018071e-06, + "loss": 0.28292080760002136, + "step": 4808 + }, + { + "epoch": 1.2769884477493028, + "grad_norm": 1.2160142828428218, + "learning_rate": 6.339584080634824e-06, + "loss": 0.2525850534439087, + "step": 4809 + }, + { + "epoch": 1.2772540167308457, + "grad_norm": 1.224576908350391, + "learning_rate": 6.335498393936597e-06, + "loss": 0.22056345641613007, + "step": 4810 + }, + { + "epoch": 1.2775195857123887, + "grad_norm": 1.1603347806824698, + "learning_rate": 6.331413413711061e-06, + "loss": 0.23081058263778687, + "step": 4811 + }, + { + "epoch": 1.2777851546939316, + "grad_norm": 1.2309265633693007, + "learning_rate": 6.327329140745751e-06, + "loss": 0.2722470760345459, + "step": 4812 + }, + { + "epoch": 1.2780507236754748, + "grad_norm": 1.2598117885787161, + "learning_rate": 6.32324557582807e-06, + "loss": 0.24454641342163086, + "step": 4813 + }, + { + "epoch": 1.2783162926570177, + "grad_norm": 1.2713820573097572, + "learning_rate": 6.319162719745277e-06, + "loss": 0.21884413063526154, + "step": 4814 + }, + { + "epoch": 1.2785818616385607, + "grad_norm": 1.276590514388197, + "learning_rate": 6.3150805732845e-06, + "loss": 0.2737545669078827, + "step": 4815 + }, + { + "epoch": 1.2788474306201036, + "grad_norm": 1.1747258996206047, + "learning_rate": 6.31099913723273e-06, + "loss": 0.2478230595588684, + "step": 4816 + }, + { + "epoch": 1.2791129996016466, + "grad_norm": 1.2461752717378811, + "learning_rate": 6.306918412376817e-06, + "loss": 0.2508094310760498, + "step": 4817 + }, + { + "epoch": 1.2793785685831895, + "grad_norm": 1.267840547546021, + "learning_rate": 6.302838399503477e-06, + "loss": 0.24666383862495422, + "step": 4818 + }, + { + "epoch": 1.2796441375647325, + "grad_norm": 1.176059099377582, + "learning_rate": 6.298759099399292e-06, + "loss": 0.27833491563796997, + "step": 4819 + }, + { + "epoch": 1.2799097065462754, + "grad_norm": 1.1948595147219725, + "learning_rate": 6.294680512850699e-06, + "loss": 0.23092475533485413, + "step": 4820 + }, + { + "epoch": 1.2801752755278184, + "grad_norm": 1.1935160504644853, + "learning_rate": 6.290602640644005e-06, + "loss": 0.2714667022228241, + "step": 4821 + }, + { + "epoch": 1.2804408445093614, + "grad_norm": 1.1769422055863235, + "learning_rate": 6.286525483565373e-06, + "loss": 0.23292411863803864, + "step": 4822 + }, + { + "epoch": 1.2807064134909043, + "grad_norm": 1.1322856806053188, + "learning_rate": 6.282449042400831e-06, + "loss": 0.23809143900871277, + "step": 4823 + }, + { + "epoch": 1.2809719824724473, + "grad_norm": 1.0235534573008647, + "learning_rate": 6.278373317936269e-06, + "loss": 0.22593267261981964, + "step": 4824 + }, + { + "epoch": 1.2812375514539902, + "grad_norm": 1.2491300300411192, + "learning_rate": 6.274298310957439e-06, + "loss": 0.26024624705314636, + "step": 4825 + }, + { + "epoch": 1.2815031204355332, + "grad_norm": 1.138185007529017, + "learning_rate": 6.270224022249957e-06, + "loss": 0.22418126463890076, + "step": 4826 + }, + { + "epoch": 1.2817686894170761, + "grad_norm": 1.2374650134400174, + "learning_rate": 6.266150452599288e-06, + "loss": 0.26452577114105225, + "step": 4827 + }, + { + "epoch": 1.282034258398619, + "grad_norm": 1.2453587043668277, + "learning_rate": 6.262077602790779e-06, + "loss": 0.24412381649017334, + "step": 4828 + }, + { + "epoch": 1.282299827380162, + "grad_norm": 1.1670875672055734, + "learning_rate": 6.258005473609623e-06, + "loss": 0.22476118803024292, + "step": 4829 + }, + { + "epoch": 1.282565396361705, + "grad_norm": 1.1744502576491334, + "learning_rate": 6.25393406584088e-06, + "loss": 0.2208547294139862, + "step": 4830 + }, + { + "epoch": 1.282830965343248, + "grad_norm": 1.340282271944368, + "learning_rate": 6.249863380269467e-06, + "loss": 0.2903650999069214, + "step": 4831 + }, + { + "epoch": 1.2830965343247909, + "grad_norm": 1.2018727401561922, + "learning_rate": 6.245793417680168e-06, + "loss": 0.24413639307022095, + "step": 4832 + }, + { + "epoch": 1.2833621033063338, + "grad_norm": 1.162422850806728, + "learning_rate": 6.241724178857621e-06, + "loss": 0.2193944752216339, + "step": 4833 + }, + { + "epoch": 1.2836276722878768, + "grad_norm": 1.2159517583191957, + "learning_rate": 6.237655664586326e-06, + "loss": 0.22847513854503632, + "step": 4834 + }, + { + "epoch": 1.2838932412694197, + "grad_norm": 1.4211501406512423, + "learning_rate": 6.233587875650648e-06, + "loss": 0.269639253616333, + "step": 4835 + }, + { + "epoch": 1.2841588102509627, + "grad_norm": 1.3153478129856002, + "learning_rate": 6.229520812834801e-06, + "loss": 0.26329392194747925, + "step": 4836 + }, + { + "epoch": 1.2844243792325056, + "grad_norm": 1.0811891602166492, + "learning_rate": 6.225454476922877e-06, + "loss": 0.18800514936447144, + "step": 4837 + }, + { + "epoch": 1.2846899482140486, + "grad_norm": 1.2987987933289529, + "learning_rate": 6.2213888686988125e-06, + "loss": 0.2617965340614319, + "step": 4838 + }, + { + "epoch": 1.2849555171955915, + "grad_norm": 1.2029687476094635, + "learning_rate": 6.217323988946411e-06, + "loss": 0.22468717396259308, + "step": 4839 + }, + { + "epoch": 1.2852210861771345, + "grad_norm": 1.2126923104659393, + "learning_rate": 6.213259838449333e-06, + "loss": 0.22465646266937256, + "step": 4840 + }, + { + "epoch": 1.2854866551586774, + "grad_norm": 1.243457795287806, + "learning_rate": 6.209196417991096e-06, + "loss": 0.2655075490474701, + "step": 4841 + }, + { + "epoch": 1.2857522241402204, + "grad_norm": 1.2818071805394324, + "learning_rate": 6.205133728355081e-06, + "loss": 0.25313282012939453, + "step": 4842 + }, + { + "epoch": 1.2860177931217633, + "grad_norm": 1.2136879668034726, + "learning_rate": 6.201071770324527e-06, + "loss": 0.23176322877407074, + "step": 4843 + }, + { + "epoch": 1.2862833621033063, + "grad_norm": 1.3628911983979357, + "learning_rate": 6.197010544682531e-06, + "loss": 0.27396953105926514, + "step": 4844 + }, + { + "epoch": 1.2865489310848492, + "grad_norm": 1.2333432651370633, + "learning_rate": 6.192950052212046e-06, + "loss": 0.24966171383857727, + "step": 4845 + }, + { + "epoch": 1.2868145000663922, + "grad_norm": 1.184789059228899, + "learning_rate": 6.188890293695895e-06, + "loss": 0.23290866613388062, + "step": 4846 + }, + { + "epoch": 1.2870800690479351, + "grad_norm": 1.2080105834836115, + "learning_rate": 6.184831269916749e-06, + "loss": 0.2368975132703781, + "step": 4847 + }, + { + "epoch": 1.287345638029478, + "grad_norm": 1.35199057217418, + "learning_rate": 6.180772981657139e-06, + "loss": 0.25305312871932983, + "step": 4848 + }, + { + "epoch": 1.287611207011021, + "grad_norm": 1.1825950927599171, + "learning_rate": 6.176715429699452e-06, + "loss": 0.22752982378005981, + "step": 4849 + }, + { + "epoch": 1.287876775992564, + "grad_norm": 1.152582857494987, + "learning_rate": 6.1726586148259395e-06, + "loss": 0.22426503896713257, + "step": 4850 + }, + { + "epoch": 1.288142344974107, + "grad_norm": 1.2203273234703247, + "learning_rate": 6.168602537818706e-06, + "loss": 0.21261993050575256, + "step": 4851 + }, + { + "epoch": 1.28840791395565, + "grad_norm": 1.1907151660933317, + "learning_rate": 6.1645471994597185e-06, + "loss": 0.237461656332016, + "step": 4852 + }, + { + "epoch": 1.2886734829371929, + "grad_norm": 1.113120156932308, + "learning_rate": 6.160492600530794e-06, + "loss": 0.1926390826702118, + "step": 4853 + }, + { + "epoch": 1.2889390519187358, + "grad_norm": 1.6824005161064397, + "learning_rate": 6.156438741813608e-06, + "loss": 0.22673740983009338, + "step": 4854 + }, + { + "epoch": 1.289204620900279, + "grad_norm": 1.1453361708789405, + "learning_rate": 6.15238562408971e-06, + "loss": 0.22148582339286804, + "step": 4855 + }, + { + "epoch": 1.289470189881822, + "grad_norm": 1.3581323367394031, + "learning_rate": 6.148333248140483e-06, + "loss": 0.28319716453552246, + "step": 4856 + }, + { + "epoch": 1.289735758863365, + "grad_norm": 1.4367360633574449, + "learning_rate": 6.14428161474718e-06, + "loss": 0.23505647480487823, + "step": 4857 + }, + { + "epoch": 1.2900013278449078, + "grad_norm": 1.2052965186154045, + "learning_rate": 6.140230724690908e-06, + "loss": 0.24323523044586182, + "step": 4858 + }, + { + "epoch": 1.2902668968264508, + "grad_norm": 1.2357784405363281, + "learning_rate": 6.136180578752629e-06, + "loss": 0.22818386554718018, + "step": 4859 + }, + { + "epoch": 1.2905324658079937, + "grad_norm": 1.2670464740614045, + "learning_rate": 6.132131177713165e-06, + "loss": 0.24285198748111725, + "step": 4860 + }, + { + "epoch": 1.2907980347895367, + "grad_norm": 1.1369753370104339, + "learning_rate": 6.128082522353194e-06, + "loss": 0.24115213751792908, + "step": 4861 + }, + { + "epoch": 1.2910636037710796, + "grad_norm": 1.2213111344560537, + "learning_rate": 6.124034613453247e-06, + "loss": 0.21564510464668274, + "step": 4862 + }, + { + "epoch": 1.2913291727526226, + "grad_norm": 1.299973209896211, + "learning_rate": 6.119987451793711e-06, + "loss": 0.2329743504524231, + "step": 4863 + }, + { + "epoch": 1.2915947417341656, + "grad_norm": 1.2218786239106318, + "learning_rate": 6.115941038154835e-06, + "loss": 0.2161208689212799, + "step": 4864 + }, + { + "epoch": 1.2918603107157085, + "grad_norm": 1.2078035628631776, + "learning_rate": 6.111895373316721e-06, + "loss": 0.22765520215034485, + "step": 4865 + }, + { + "epoch": 1.2921258796972515, + "grad_norm": 1.2199257873933993, + "learning_rate": 6.107850458059322e-06, + "loss": 0.25506818294525146, + "step": 4866 + }, + { + "epoch": 1.2923914486787944, + "grad_norm": 1.2014544077782259, + "learning_rate": 6.1038062931624505e-06, + "loss": 0.22543852031230927, + "step": 4867 + }, + { + "epoch": 1.2926570176603374, + "grad_norm": 1.282222410309602, + "learning_rate": 6.099762879405776e-06, + "loss": 0.24295030534267426, + "step": 4868 + }, + { + "epoch": 1.2929225866418803, + "grad_norm": 1.2221545432256802, + "learning_rate": 6.095720217568819e-06, + "loss": 0.2385009229183197, + "step": 4869 + }, + { + "epoch": 1.2931881556234233, + "grad_norm": 1.119514297375773, + "learning_rate": 6.091678308430956e-06, + "loss": 0.21410472691059113, + "step": 4870 + }, + { + "epoch": 1.2934537246049662, + "grad_norm": 1.299309717988783, + "learning_rate": 6.087637152771422e-06, + "loss": 0.25934773683547974, + "step": 4871 + }, + { + "epoch": 1.2937192935865092, + "grad_norm": 1.1783576597419445, + "learning_rate": 6.0835967513693e-06, + "loss": 0.24584373831748962, + "step": 4872 + }, + { + "epoch": 1.2939848625680521, + "grad_norm": 1.3413866916188153, + "learning_rate": 6.079557105003537e-06, + "loss": 0.2403055876493454, + "step": 4873 + }, + { + "epoch": 1.294250431549595, + "grad_norm": 1.2348806886655737, + "learning_rate": 6.075518214452927e-06, + "loss": 0.23861736059188843, + "step": 4874 + }, + { + "epoch": 1.294516000531138, + "grad_norm": 1.2099712971645404, + "learning_rate": 6.071480080496119e-06, + "loss": 0.21356427669525146, + "step": 4875 + }, + { + "epoch": 1.294781569512681, + "grad_norm": 1.314183683224707, + "learning_rate": 6.067442703911621e-06, + "loss": 0.2835869789123535, + "step": 4876 + }, + { + "epoch": 1.295047138494224, + "grad_norm": 1.1868362719294436, + "learning_rate": 6.063406085477788e-06, + "loss": 0.24233242869377136, + "step": 4877 + }, + { + "epoch": 1.2953127074757669, + "grad_norm": 1.2596980829406919, + "learning_rate": 6.059370225972834e-06, + "loss": 0.24986369907855988, + "step": 4878 + }, + { + "epoch": 1.2955782764573098, + "grad_norm": 1.2583930460503605, + "learning_rate": 6.055335126174826e-06, + "loss": 0.2445756494998932, + "step": 4879 + }, + { + "epoch": 1.2958438454388528, + "grad_norm": 1.0635663336037695, + "learning_rate": 6.0513007868616825e-06, + "loss": 0.21331898868083954, + "step": 4880 + }, + { + "epoch": 1.2961094144203957, + "grad_norm": 1.1578193819974294, + "learning_rate": 6.047267208811174e-06, + "loss": 0.2782329320907593, + "step": 4881 + }, + { + "epoch": 1.2963749834019387, + "grad_norm": 2.326385436360766, + "learning_rate": 6.043234392800932e-06, + "loss": 0.20866765081882477, + "step": 4882 + }, + { + "epoch": 1.2966405523834816, + "grad_norm": 1.3211750202424803, + "learning_rate": 6.039202339608432e-06, + "loss": 0.2517815828323364, + "step": 4883 + }, + { + "epoch": 1.2969061213650246, + "grad_norm": 1.283845753322191, + "learning_rate": 6.03517105001101e-06, + "loss": 0.2617926597595215, + "step": 4884 + }, + { + "epoch": 1.2971716903465675, + "grad_norm": 1.3255504140080887, + "learning_rate": 6.0311405247858465e-06, + "loss": 0.24753305315971375, + "step": 4885 + }, + { + "epoch": 1.2974372593281105, + "grad_norm": 1.1805849927447047, + "learning_rate": 6.027110764709982e-06, + "loss": 0.19791719317436218, + "step": 4886 + }, + { + "epoch": 1.2977028283096534, + "grad_norm": 1.236398594932959, + "learning_rate": 6.023081770560307e-06, + "loss": 0.243608757853508, + "step": 4887 + }, + { + "epoch": 1.2979683972911964, + "grad_norm": 1.3652744342035896, + "learning_rate": 6.019053543113564e-06, + "loss": 0.20469853281974792, + "step": 4888 + }, + { + "epoch": 1.2982339662727393, + "grad_norm": 1.4682720215540639, + "learning_rate": 6.015026083146345e-06, + "loss": 0.25613903999328613, + "step": 4889 + }, + { + "epoch": 1.2984995352542823, + "grad_norm": 1.236223607561111, + "learning_rate": 6.010999391435097e-06, + "loss": 0.23349006474018097, + "step": 4890 + }, + { + "epoch": 1.2987651042358253, + "grad_norm": 1.1137410591057113, + "learning_rate": 6.006973468756124e-06, + "loss": 0.23646268248558044, + "step": 4891 + }, + { + "epoch": 1.2990306732173682, + "grad_norm": 1.2845979720118916, + "learning_rate": 6.002948315885572e-06, + "loss": 0.2371794581413269, + "step": 4892 + }, + { + "epoch": 1.2992962421989112, + "grad_norm": 1.1150236044260142, + "learning_rate": 5.998923933599443e-06, + "loss": 0.23791949450969696, + "step": 4893 + }, + { + "epoch": 1.299561811180454, + "grad_norm": 1.2865838186648229, + "learning_rate": 5.994900322673593e-06, + "loss": 0.26923009753227234, + "step": 4894 + }, + { + "epoch": 1.299827380161997, + "grad_norm": 1.2724647699376699, + "learning_rate": 5.990877483883723e-06, + "loss": 0.20164884626865387, + "step": 4895 + }, + { + "epoch": 1.30009294914354, + "grad_norm": 1.1263986142938482, + "learning_rate": 5.986855418005393e-06, + "loss": 0.22345462441444397, + "step": 4896 + }, + { + "epoch": 1.300358518125083, + "grad_norm": 1.2936789930425872, + "learning_rate": 5.982834125814007e-06, + "loss": 0.26678675413131714, + "step": 4897 + }, + { + "epoch": 1.300624087106626, + "grad_norm": 1.3112472329084983, + "learning_rate": 5.978813608084825e-06, + "loss": 0.24674496054649353, + "step": 4898 + }, + { + "epoch": 1.3008896560881689, + "grad_norm": 1.3746634467420622, + "learning_rate": 5.974793865592947e-06, + "loss": 0.2804900109767914, + "step": 4899 + }, + { + "epoch": 1.3011552250697118, + "grad_norm": 1.3113866221822363, + "learning_rate": 5.970774899113345e-06, + "loss": 0.2413155734539032, + "step": 4900 + }, + { + "epoch": 1.3014207940512548, + "grad_norm": 1.139036608300987, + "learning_rate": 5.96675670942082e-06, + "loss": 0.21217301487922668, + "step": 4901 + }, + { + "epoch": 1.3016863630327977, + "grad_norm": 1.2012277530250777, + "learning_rate": 5.962739297290035e-06, + "loss": 0.23362940549850464, + "step": 4902 + }, + { + "epoch": 1.3019519320143407, + "grad_norm": 1.251148135143295, + "learning_rate": 5.958722663495499e-06, + "loss": 0.2669242322444916, + "step": 4903 + }, + { + "epoch": 1.3022175009958836, + "grad_norm": 1.2365395348631665, + "learning_rate": 5.95470680881157e-06, + "loss": 0.2234608232975006, + "step": 4904 + }, + { + "epoch": 1.3024830699774266, + "grad_norm": 1.2441781101215288, + "learning_rate": 5.95069173401246e-06, + "loss": 0.25150394439697266, + "step": 4905 + }, + { + "epoch": 1.3027486389589695, + "grad_norm": 1.127228294882686, + "learning_rate": 5.9466774398722264e-06, + "loss": 0.2408430427312851, + "step": 4906 + }, + { + "epoch": 1.3030142079405125, + "grad_norm": 1.1200862415380408, + "learning_rate": 5.942663927164776e-06, + "loss": 0.2197013795375824, + "step": 4907 + }, + { + "epoch": 1.3032797769220554, + "grad_norm": 1.1474317141184802, + "learning_rate": 5.938651196663865e-06, + "loss": 0.2224964201450348, + "step": 4908 + }, + { + "epoch": 1.3035453459035984, + "grad_norm": 1.313380369558454, + "learning_rate": 5.934639249143108e-06, + "loss": 0.26466232538223267, + "step": 4909 + }, + { + "epoch": 1.3038109148851413, + "grad_norm": 1.2910852400248352, + "learning_rate": 5.930628085375958e-06, + "loss": 0.257996141910553, + "step": 4910 + }, + { + "epoch": 1.3040764838666843, + "grad_norm": 1.2056479933898356, + "learning_rate": 5.92661770613572e-06, + "loss": 0.21995162963867188, + "step": 4911 + }, + { + "epoch": 1.3043420528482272, + "grad_norm": 1.3003100511120855, + "learning_rate": 5.922608112195546e-06, + "loss": 0.26007258892059326, + "step": 4912 + }, + { + "epoch": 1.3046076218297702, + "grad_norm": 1.2951583817832037, + "learning_rate": 5.918599304328442e-06, + "loss": 0.25168827176094055, + "step": 4913 + }, + { + "epoch": 1.3048731908113131, + "grad_norm": 1.1932184000685677, + "learning_rate": 5.9145912833072535e-06, + "loss": 0.24686852097511292, + "step": 4914 + }, + { + "epoch": 1.305138759792856, + "grad_norm": 1.1951264683753895, + "learning_rate": 5.910584049904684e-06, + "loss": 0.247032031416893, + "step": 4915 + }, + { + "epoch": 1.305404328774399, + "grad_norm": 1.1517786776797445, + "learning_rate": 5.906577604893278e-06, + "loss": 0.21644674241542816, + "step": 4916 + }, + { + "epoch": 1.305669897755942, + "grad_norm": 1.3685662184124912, + "learning_rate": 5.9025719490454304e-06, + "loss": 0.28093478083610535, + "step": 4917 + }, + { + "epoch": 1.305935466737485, + "grad_norm": 1.2246452754262638, + "learning_rate": 5.898567083133389e-06, + "loss": 0.23731757700443268, + "step": 4918 + }, + { + "epoch": 1.306201035719028, + "grad_norm": 1.1125400405938466, + "learning_rate": 5.894563007929243e-06, + "loss": 0.20725491642951965, + "step": 4919 + }, + { + "epoch": 1.3064666047005709, + "grad_norm": 1.3186749566879576, + "learning_rate": 5.89055972420493e-06, + "loss": 0.2509433329105377, + "step": 4920 + }, + { + "epoch": 1.3067321736821138, + "grad_norm": 1.2793911736037649, + "learning_rate": 5.886557232732235e-06, + "loss": 0.2611580491065979, + "step": 4921 + }, + { + "epoch": 1.3069977426636568, + "grad_norm": 1.1754660821918204, + "learning_rate": 5.882555534282792e-06, + "loss": 0.20567595958709717, + "step": 4922 + }, + { + "epoch": 1.3072633116451997, + "grad_norm": 1.2179299933591687, + "learning_rate": 5.878554629628081e-06, + "loss": 0.22851137816905975, + "step": 4923 + }, + { + "epoch": 1.3075288806267427, + "grad_norm": 1.2283350051517878, + "learning_rate": 5.874554519539431e-06, + "loss": 0.24295902252197266, + "step": 4924 + }, + { + "epoch": 1.3077944496082856, + "grad_norm": 1.4565590371796837, + "learning_rate": 5.870555204788013e-06, + "loss": 0.29564642906188965, + "step": 4925 + }, + { + "epoch": 1.3080600185898288, + "grad_norm": 1.1906652754397118, + "learning_rate": 5.8665566861448465e-06, + "loss": 0.2399739921092987, + "step": 4926 + }, + { + "epoch": 1.3083255875713717, + "grad_norm": 1.2056826487968673, + "learning_rate": 5.862558964380806e-06, + "loss": 0.23882555961608887, + "step": 4927 + }, + { + "epoch": 1.3085911565529147, + "grad_norm": 1.2167231777259742, + "learning_rate": 5.858562040266599e-06, + "loss": 0.2510842978954315, + "step": 4928 + }, + { + "epoch": 1.3088567255344576, + "grad_norm": 1.3760419048772665, + "learning_rate": 5.854565914572787e-06, + "loss": 0.257358193397522, + "step": 4929 + }, + { + "epoch": 1.3091222945160006, + "grad_norm": 1.1144476904886809, + "learning_rate": 5.850570588069775e-06, + "loss": 0.23228219151496887, + "step": 4930 + }, + { + "epoch": 1.3093878634975435, + "grad_norm": 1.2711888334314898, + "learning_rate": 5.846576061527818e-06, + "loss": 0.2234456092119217, + "step": 4931 + }, + { + "epoch": 1.3096534324790865, + "grad_norm": 1.1978737759145446, + "learning_rate": 5.842582335717009e-06, + "loss": 0.2273438423871994, + "step": 4932 + }, + { + "epoch": 1.3099190014606295, + "grad_norm": 1.2382395020505186, + "learning_rate": 5.838589411407294e-06, + "loss": 0.2423306405544281, + "step": 4933 + }, + { + "epoch": 1.3101845704421724, + "grad_norm": 1.2388376015521172, + "learning_rate": 5.834597289368463e-06, + "loss": 0.266438364982605, + "step": 4934 + }, + { + "epoch": 1.3104501394237154, + "grad_norm": 1.2553012161793193, + "learning_rate": 5.830605970370142e-06, + "loss": 0.2469342052936554, + "step": 4935 + }, + { + "epoch": 1.3107157084052583, + "grad_norm": 1.2077087937137967, + "learning_rate": 5.8266154551818225e-06, + "loss": 0.2834509611129761, + "step": 4936 + }, + { + "epoch": 1.3109812773868013, + "grad_norm": 1.3037377411135151, + "learning_rate": 5.822625744572821e-06, + "loss": 0.2615162134170532, + "step": 4937 + }, + { + "epoch": 1.3112468463683442, + "grad_norm": 1.1529903033018742, + "learning_rate": 5.818636839312309e-06, + "loss": 0.2247931957244873, + "step": 4938 + }, + { + "epoch": 1.3115124153498872, + "grad_norm": 1.162136486746663, + "learning_rate": 5.814648740169299e-06, + "loss": 0.23759335279464722, + "step": 4939 + }, + { + "epoch": 1.3117779843314301, + "grad_norm": 1.2647326324758852, + "learning_rate": 5.8106614479126515e-06, + "loss": 0.23381784558296204, + "step": 4940 + }, + { + "epoch": 1.312043553312973, + "grad_norm": 1.2132087226777075, + "learning_rate": 5.8066749633110675e-06, + "loss": 0.2671264410018921, + "step": 4941 + }, + { + "epoch": 1.312309122294516, + "grad_norm": 1.09997395594631, + "learning_rate": 5.8026892871330944e-06, + "loss": 0.226065531373024, + "step": 4942 + }, + { + "epoch": 1.312574691276059, + "grad_norm": 1.3057172624305828, + "learning_rate": 5.798704420147124e-06, + "loss": 0.2654735743999481, + "step": 4943 + }, + { + "epoch": 1.312840260257602, + "grad_norm": 1.2538641402604982, + "learning_rate": 5.794720363121389e-06, + "loss": 0.23757833242416382, + "step": 4944 + }, + { + "epoch": 1.3131058292391449, + "grad_norm": 1.2131030914710175, + "learning_rate": 5.790737116823975e-06, + "loss": 0.2561591565608978, + "step": 4945 + }, + { + "epoch": 1.3133713982206878, + "grad_norm": 1.1698592689009908, + "learning_rate": 5.7867546820227995e-06, + "loss": 0.22105304896831512, + "step": 4946 + }, + { + "epoch": 1.3136369672022308, + "grad_norm": 1.190016500907537, + "learning_rate": 5.7827730594856325e-06, + "loss": 0.2485857605934143, + "step": 4947 + }, + { + "epoch": 1.3139025361837737, + "grad_norm": 1.2087719424455774, + "learning_rate": 5.7787922499800804e-06, + "loss": 0.21256676316261292, + "step": 4948 + }, + { + "epoch": 1.3141681051653167, + "grad_norm": 1.2561271472593831, + "learning_rate": 5.774812254273604e-06, + "loss": 0.2700715661048889, + "step": 4949 + }, + { + "epoch": 1.3144336741468596, + "grad_norm": 1.072264118800501, + "learning_rate": 5.770833073133488e-06, + "loss": 0.22239381074905396, + "step": 4950 + }, + { + "epoch": 1.3146992431284026, + "grad_norm": 1.2811464089131772, + "learning_rate": 5.766854707326878e-06, + "loss": 0.22973249852657318, + "step": 4951 + }, + { + "epoch": 1.3149648121099455, + "grad_norm": 1.3904264621036453, + "learning_rate": 5.762877157620751e-06, + "loss": 0.27923673391342163, + "step": 4952 + }, + { + "epoch": 1.3152303810914885, + "grad_norm": 1.1321859486950596, + "learning_rate": 5.758900424781939e-06, + "loss": 0.23142218589782715, + "step": 4953 + }, + { + "epoch": 1.3154959500730314, + "grad_norm": 1.2732500147617782, + "learning_rate": 5.754924509577107e-06, + "loss": 0.23697996139526367, + "step": 4954 + }, + { + "epoch": 1.3157615190545744, + "grad_norm": 1.2838523265227373, + "learning_rate": 5.750949412772764e-06, + "loss": 0.27600961923599243, + "step": 4955 + }, + { + "epoch": 1.3160270880361173, + "grad_norm": 1.1644607269636458, + "learning_rate": 5.74697513513526e-06, + "loss": 0.2300705760717392, + "step": 4956 + }, + { + "epoch": 1.3162926570176603, + "grad_norm": 1.2927833273456342, + "learning_rate": 5.743001677430791e-06, + "loss": 0.2771111726760864, + "step": 4957 + }, + { + "epoch": 1.3165582259992032, + "grad_norm": 1.2582954956741819, + "learning_rate": 5.739029040425391e-06, + "loss": 0.2195657342672348, + "step": 4958 + }, + { + "epoch": 1.3168237949807462, + "grad_norm": 1.3450534906440017, + "learning_rate": 5.735057224884939e-06, + "loss": 0.2877159118652344, + "step": 4959 + }, + { + "epoch": 1.3170893639622892, + "grad_norm": 1.2211564124942835, + "learning_rate": 5.731086231575154e-06, + "loss": 0.264115571975708, + "step": 4960 + }, + { + "epoch": 1.317354932943832, + "grad_norm": 1.1286607753384608, + "learning_rate": 5.727116061261593e-06, + "loss": 0.22574637830257416, + "step": 4961 + }, + { + "epoch": 1.317620501925375, + "grad_norm": 1.3177978069758023, + "learning_rate": 5.723146714709664e-06, + "loss": 0.26063698530197144, + "step": 4962 + }, + { + "epoch": 1.317886070906918, + "grad_norm": 1.2211473527893268, + "learning_rate": 5.719178192684611e-06, + "loss": 0.26272428035736084, + "step": 4963 + }, + { + "epoch": 1.318151639888461, + "grad_norm": 1.257373941755789, + "learning_rate": 5.715210495951513e-06, + "loss": 0.27188578248023987, + "step": 4964 + }, + { + "epoch": 1.318417208870004, + "grad_norm": 1.2786927551317604, + "learning_rate": 5.711243625275296e-06, + "loss": 0.26374363899230957, + "step": 4965 + }, + { + "epoch": 1.3186827778515469, + "grad_norm": 1.2469422291735242, + "learning_rate": 5.7072775814207275e-06, + "loss": 0.24819093942642212, + "step": 4966 + }, + { + "epoch": 1.3189483468330898, + "grad_norm": 1.3834225319345155, + "learning_rate": 5.703312365152412e-06, + "loss": 0.24387019872665405, + "step": 4967 + }, + { + "epoch": 1.319213915814633, + "grad_norm": 1.2919715806670669, + "learning_rate": 5.699347977234799e-06, + "loss": 0.2198091745376587, + "step": 4968 + }, + { + "epoch": 1.319479484796176, + "grad_norm": 1.3500197578827224, + "learning_rate": 5.695384418432174e-06, + "loss": 0.24349649250507355, + "step": 4969 + }, + { + "epoch": 1.319745053777719, + "grad_norm": 1.238323956307032, + "learning_rate": 5.691421689508661e-06, + "loss": 0.2330506294965744, + "step": 4970 + }, + { + "epoch": 1.3200106227592618, + "grad_norm": 1.2015417123740977, + "learning_rate": 5.687459791228234e-06, + "loss": 0.22821848094463348, + "step": 4971 + }, + { + "epoch": 1.3202761917408048, + "grad_norm": 1.1813366864368284, + "learning_rate": 5.683498724354699e-06, + "loss": 0.2342798113822937, + "step": 4972 + }, + { + "epoch": 1.3205417607223477, + "grad_norm": 1.0659168750954966, + "learning_rate": 5.679538489651702e-06, + "loss": 0.19689922034740448, + "step": 4973 + }, + { + "epoch": 1.3208073297038907, + "grad_norm": 1.1808385090527131, + "learning_rate": 5.675579087882727e-06, + "loss": 0.23910056054592133, + "step": 4974 + }, + { + "epoch": 1.3210728986854336, + "grad_norm": 1.381638431012013, + "learning_rate": 5.671620519811105e-06, + "loss": 0.25725993514060974, + "step": 4975 + }, + { + "epoch": 1.3213384676669766, + "grad_norm": 1.3528699347449313, + "learning_rate": 5.667662786199997e-06, + "loss": 0.3030434250831604, + "step": 4976 + }, + { + "epoch": 1.3216040366485196, + "grad_norm": 1.1182092617897728, + "learning_rate": 5.6637058878124075e-06, + "loss": 0.223737433552742, + "step": 4977 + }, + { + "epoch": 1.3218696056300625, + "grad_norm": 1.07766141822832, + "learning_rate": 5.659749825411183e-06, + "loss": 0.21480265259742737, + "step": 4978 + }, + { + "epoch": 1.3221351746116055, + "grad_norm": 1.2398269968997129, + "learning_rate": 5.655794599759001e-06, + "loss": 0.23288744688034058, + "step": 4979 + }, + { + "epoch": 1.3224007435931484, + "grad_norm": 1.3344080514533678, + "learning_rate": 5.651840211618387e-06, + "loss": 0.23701068758964539, + "step": 4980 + }, + { + "epoch": 1.3226663125746914, + "grad_norm": 1.2102834630940547, + "learning_rate": 5.647886661751698e-06, + "loss": 0.22164157032966614, + "step": 4981 + }, + { + "epoch": 1.3229318815562343, + "grad_norm": 1.2096538262244674, + "learning_rate": 5.643933950921132e-06, + "loss": 0.23426607251167297, + "step": 4982 + }, + { + "epoch": 1.3231974505377773, + "grad_norm": 1.1880047089826309, + "learning_rate": 5.6399820798887266e-06, + "loss": 0.2567834258079529, + "step": 4983 + }, + { + "epoch": 1.3234630195193202, + "grad_norm": 1.3013809826248692, + "learning_rate": 5.6360310494163525e-06, + "loss": 0.2713038921356201, + "step": 4984 + }, + { + "epoch": 1.3237285885008632, + "grad_norm": 1.2908080991459006, + "learning_rate": 5.632080860265725e-06, + "loss": 0.2548249661922455, + "step": 4985 + }, + { + "epoch": 1.3239941574824061, + "grad_norm": 1.3471244082770852, + "learning_rate": 5.628131513198392e-06, + "loss": 0.2442832589149475, + "step": 4986 + }, + { + "epoch": 1.324259726463949, + "grad_norm": 1.3063670062134878, + "learning_rate": 5.6241830089757435e-06, + "loss": 0.24654853343963623, + "step": 4987 + }, + { + "epoch": 1.324525295445492, + "grad_norm": 1.2792033582455469, + "learning_rate": 5.620235348358997e-06, + "loss": 0.2802797853946686, + "step": 4988 + }, + { + "epoch": 1.324790864427035, + "grad_norm": 1.0588655062771883, + "learning_rate": 5.616288532109225e-06, + "loss": 0.18801404535770416, + "step": 4989 + }, + { + "epoch": 1.325056433408578, + "grad_norm": 1.2235746865490262, + "learning_rate": 5.6123425609873235e-06, + "loss": 0.2685382068157196, + "step": 4990 + }, + { + "epoch": 1.3253220023901209, + "grad_norm": 1.1873888072876837, + "learning_rate": 5.608397435754029e-06, + "loss": 0.23479774594306946, + "step": 4991 + }, + { + "epoch": 1.3255875713716638, + "grad_norm": 1.2164455244711625, + "learning_rate": 5.604453157169914e-06, + "loss": 0.24198031425476074, + "step": 4992 + }, + { + "epoch": 1.3258531403532068, + "grad_norm": 1.3448749532595476, + "learning_rate": 5.60050972599539e-06, + "loss": 0.25523462891578674, + "step": 4993 + }, + { + "epoch": 1.3261187093347497, + "grad_norm": 1.1695382845281797, + "learning_rate": 5.596567142990703e-06, + "loss": 0.23196743428707123, + "step": 4994 + }, + { + "epoch": 1.3263842783162927, + "grad_norm": 1.3145586744837223, + "learning_rate": 5.592625408915939e-06, + "loss": 0.29365748167037964, + "step": 4995 + }, + { + "epoch": 1.3266498472978356, + "grad_norm": 1.1946134760289593, + "learning_rate": 5.588684524531014e-06, + "loss": 0.24509185552597046, + "step": 4996 + }, + { + "epoch": 1.3269154162793786, + "grad_norm": 1.3358300509723116, + "learning_rate": 5.584744490595687e-06, + "loss": 0.27032390236854553, + "step": 4997 + }, + { + "epoch": 1.3271809852609215, + "grad_norm": 1.1645416268641489, + "learning_rate": 5.580805307869549e-06, + "loss": 0.24401508271694183, + "step": 4998 + }, + { + "epoch": 1.3274465542424645, + "grad_norm": 1.1506901325018217, + "learning_rate": 5.576866977112028e-06, + "loss": 0.2216658741235733, + "step": 4999 + }, + { + "epoch": 1.3277121232240074, + "grad_norm": 1.1830944265124126, + "learning_rate": 5.5729294990823875e-06, + "loss": 0.24545373022556305, + "step": 5000 + }, + { + "epoch": 1.3279776922055504, + "grad_norm": 1.377548009409137, + "learning_rate": 5.568992874539728e-06, + "loss": 0.260816752910614, + "step": 5001 + }, + { + "epoch": 1.3282432611870933, + "grad_norm": 1.1392730403811622, + "learning_rate": 5.565057104242984e-06, + "loss": 0.1850551962852478, + "step": 5002 + }, + { + "epoch": 1.3285088301686363, + "grad_norm": 2.1232949408605624, + "learning_rate": 5.561122188950923e-06, + "loss": 0.26854407787323, + "step": 5003 + }, + { + "epoch": 1.3287743991501793, + "grad_norm": 1.1591208934359583, + "learning_rate": 5.557188129422153e-06, + "loss": 0.24294906854629517, + "step": 5004 + }, + { + "epoch": 1.3290399681317222, + "grad_norm": 1.1880501452095942, + "learning_rate": 5.553254926415114e-06, + "loss": 0.2533603310585022, + "step": 5005 + }, + { + "epoch": 1.3293055371132652, + "grad_norm": 1.1756183262516449, + "learning_rate": 5.549322580688077e-06, + "loss": 0.2082313448190689, + "step": 5006 + }, + { + "epoch": 1.329571106094808, + "grad_norm": 1.1602290025540025, + "learning_rate": 5.545391092999158e-06, + "loss": 0.24265842139720917, + "step": 5007 + }, + { + "epoch": 1.329836675076351, + "grad_norm": 1.2321490774961563, + "learning_rate": 5.541460464106301e-06, + "loss": 0.2483578324317932, + "step": 5008 + }, + { + "epoch": 1.330102244057894, + "grad_norm": 1.2798509363454456, + "learning_rate": 5.537530694767281e-06, + "loss": 0.2769540548324585, + "step": 5009 + }, + { + "epoch": 1.330367813039437, + "grad_norm": 1.1781048091325885, + "learning_rate": 5.533601785739714e-06, + "loss": 0.2132025957107544, + "step": 5010 + }, + { + "epoch": 1.33063338202098, + "grad_norm": 1.2726887496075767, + "learning_rate": 5.529673737781047e-06, + "loss": 0.25223806500434875, + "step": 5011 + }, + { + "epoch": 1.3308989510025229, + "grad_norm": 1.13329365262538, + "learning_rate": 5.52574655164856e-06, + "loss": 0.22631296515464783, + "step": 5012 + }, + { + "epoch": 1.3311645199840658, + "grad_norm": 1.1821255064699665, + "learning_rate": 5.5218202280993725e-06, + "loss": 0.23756693303585052, + "step": 5013 + }, + { + "epoch": 1.3314300889656088, + "grad_norm": 1.2775335630974591, + "learning_rate": 5.517894767890427e-06, + "loss": 0.24746376276016235, + "step": 5014 + }, + { + "epoch": 1.3316956579471517, + "grad_norm": 1.105165815318004, + "learning_rate": 5.513970171778504e-06, + "loss": 0.21463070809841156, + "step": 5015 + }, + { + "epoch": 1.3319612269286947, + "grad_norm": 1.2090979668871258, + "learning_rate": 5.510046440520228e-06, + "loss": 0.21256107091903687, + "step": 5016 + }, + { + "epoch": 1.3322267959102376, + "grad_norm": 1.1963664670778913, + "learning_rate": 5.506123574872044e-06, + "loss": 0.25800254940986633, + "step": 5017 + }, + { + "epoch": 1.3324923648917806, + "grad_norm": 1.2726257558813519, + "learning_rate": 5.502201575590236e-06, + "loss": 0.2421891689300537, + "step": 5018 + }, + { + "epoch": 1.3327579338733235, + "grad_norm": 1.3181283061442692, + "learning_rate": 5.498280443430917e-06, + "loss": 0.24375903606414795, + "step": 5019 + }, + { + "epoch": 1.3330235028548665, + "grad_norm": 1.2419078132332353, + "learning_rate": 5.494360179150033e-06, + "loss": 0.22173303365707397, + "step": 5020 + }, + { + "epoch": 1.3332890718364094, + "grad_norm": 1.1754676882141941, + "learning_rate": 5.49044078350337e-06, + "loss": 0.24005022644996643, + "step": 5021 + }, + { + "epoch": 1.3335546408179524, + "grad_norm": 1.194558748352182, + "learning_rate": 5.486522257246538e-06, + "loss": 0.2600201964378357, + "step": 5022 + }, + { + "epoch": 1.3338202097994953, + "grad_norm": 1.2112657273591712, + "learning_rate": 5.482604601134984e-06, + "loss": 0.22889836132526398, + "step": 5023 + }, + { + "epoch": 1.3340857787810383, + "grad_norm": 1.151722502872684, + "learning_rate": 5.478687815923981e-06, + "loss": 0.25045812129974365, + "step": 5024 + }, + { + "epoch": 1.3343513477625812, + "grad_norm": 1.2499612320902753, + "learning_rate": 5.474771902368646e-06, + "loss": 0.24649837613105774, + "step": 5025 + }, + { + "epoch": 1.3346169167441242, + "grad_norm": 1.1975824340507155, + "learning_rate": 5.470856861223919e-06, + "loss": 0.23994389176368713, + "step": 5026 + }, + { + "epoch": 1.3348824857256671, + "grad_norm": 1.2488470912807048, + "learning_rate": 5.466942693244572e-06, + "loss": 0.24381600320339203, + "step": 5027 + }, + { + "epoch": 1.33514805470721, + "grad_norm": 1.1770895947351019, + "learning_rate": 5.463029399185217e-06, + "loss": 0.22110486030578613, + "step": 5028 + }, + { + "epoch": 1.335413623688753, + "grad_norm": 1.2878634690011452, + "learning_rate": 5.459116979800281e-06, + "loss": 0.25733259320259094, + "step": 5029 + }, + { + "epoch": 1.335679192670296, + "grad_norm": 1.2598918710105835, + "learning_rate": 5.4552054358440355e-06, + "loss": 0.22853803634643555, + "step": 5030 + }, + { + "epoch": 1.335944761651839, + "grad_norm": 1.3118793520277159, + "learning_rate": 5.451294768070581e-06, + "loss": 0.27503639459609985, + "step": 5031 + }, + { + "epoch": 1.336210330633382, + "grad_norm": 1.2721314541046291, + "learning_rate": 5.447384977233849e-06, + "loss": 0.27931997179985046, + "step": 5032 + }, + { + "epoch": 1.3364758996149249, + "grad_norm": 1.2287817779118972, + "learning_rate": 5.443476064087596e-06, + "loss": 0.2477954626083374, + "step": 5033 + }, + { + "epoch": 1.3367414685964678, + "grad_norm": 1.2204002745504476, + "learning_rate": 5.439568029385422e-06, + "loss": 0.2195623219013214, + "step": 5034 + }, + { + "epoch": 1.3370070375780108, + "grad_norm": 1.230653492520276, + "learning_rate": 5.435660873880747e-06, + "loss": 0.22160238027572632, + "step": 5035 + }, + { + "epoch": 1.3372726065595537, + "grad_norm": 1.6764380815480615, + "learning_rate": 5.4317545983268235e-06, + "loss": 0.24107405543327332, + "step": 5036 + }, + { + "epoch": 1.3375381755410967, + "grad_norm": 1.2985203082435115, + "learning_rate": 5.427849203476738e-06, + "loss": 0.2480086386203766, + "step": 5037 + }, + { + "epoch": 1.3378037445226398, + "grad_norm": 1.2654518356324462, + "learning_rate": 5.4239446900834005e-06, + "loss": 0.22476691007614136, + "step": 5038 + }, + { + "epoch": 1.3380693135041828, + "grad_norm": 1.217906592075979, + "learning_rate": 5.420041058899559e-06, + "loss": 0.23685473203659058, + "step": 5039 + }, + { + "epoch": 1.3383348824857257, + "grad_norm": 1.215790635675812, + "learning_rate": 5.416138310677784e-06, + "loss": 0.27753746509552, + "step": 5040 + }, + { + "epoch": 1.3386004514672687, + "grad_norm": 1.2682075315501737, + "learning_rate": 5.412236446170482e-06, + "loss": 0.22446027398109436, + "step": 5041 + }, + { + "epoch": 1.3388660204488116, + "grad_norm": 1.2214424011593596, + "learning_rate": 5.4083354661298816e-06, + "loss": 0.2535285949707031, + "step": 5042 + }, + { + "epoch": 1.3391315894303546, + "grad_norm": 1.2982364680013232, + "learning_rate": 5.4044353713080565e-06, + "loss": 0.2412964254617691, + "step": 5043 + }, + { + "epoch": 1.3393971584118975, + "grad_norm": 1.3092797704576777, + "learning_rate": 5.4005361624568895e-06, + "loss": 0.23863038420677185, + "step": 5044 + }, + { + "epoch": 1.3396627273934405, + "grad_norm": 1.159506578977356, + "learning_rate": 5.396637840328105e-06, + "loss": 0.22741727530956268, + "step": 5045 + }, + { + "epoch": 1.3399282963749835, + "grad_norm": 1.285452356277395, + "learning_rate": 5.392740405673251e-06, + "loss": 0.2497379630804062, + "step": 5046 + }, + { + "epoch": 1.3401938653565264, + "grad_norm": 1.2401289485061215, + "learning_rate": 5.388843859243712e-06, + "loss": 0.19558298587799072, + "step": 5047 + }, + { + "epoch": 1.3404594343380694, + "grad_norm": 1.2074615239750155, + "learning_rate": 5.3849482017906914e-06, + "loss": 0.2266748994588852, + "step": 5048 + }, + { + "epoch": 1.3407250033196123, + "grad_norm": 1.2657162316868396, + "learning_rate": 5.381053434065229e-06, + "loss": 0.2410028576850891, + "step": 5049 + }, + { + "epoch": 1.3409905723011553, + "grad_norm": 1.301692886719208, + "learning_rate": 5.37715955681819e-06, + "loss": 0.23965512216091156, + "step": 5050 + }, + { + "epoch": 1.3412561412826982, + "grad_norm": 1.1756365557449155, + "learning_rate": 5.373266570800262e-06, + "loss": 0.22440138459205627, + "step": 5051 + }, + { + "epoch": 1.3415217102642412, + "grad_norm": 1.2562473271519534, + "learning_rate": 5.369374476761975e-06, + "loss": 0.2509710192680359, + "step": 5052 + }, + { + "epoch": 1.3417872792457841, + "grad_norm": 1.3381440207626536, + "learning_rate": 5.365483275453677e-06, + "loss": 0.26555800437927246, + "step": 5053 + }, + { + "epoch": 1.342052848227327, + "grad_norm": 1.2240809600669689, + "learning_rate": 5.361592967625544e-06, + "loss": 0.23089733719825745, + "step": 5054 + }, + { + "epoch": 1.34231841720887, + "grad_norm": 1.1178692263054482, + "learning_rate": 5.357703554027582e-06, + "loss": 0.2040700763463974, + "step": 5055 + }, + { + "epoch": 1.342583986190413, + "grad_norm": 1.309704975193781, + "learning_rate": 5.353815035409624e-06, + "loss": 0.23539039492607117, + "step": 5056 + }, + { + "epoch": 1.342849555171956, + "grad_norm": 1.7065922202358847, + "learning_rate": 5.3499274125213294e-06, + "loss": 0.2190464437007904, + "step": 5057 + }, + { + "epoch": 1.3431151241534989, + "grad_norm": 1.1478595499251703, + "learning_rate": 5.346040686112189e-06, + "loss": 0.21557429432868958, + "step": 5058 + }, + { + "epoch": 1.3433806931350418, + "grad_norm": 1.1934269644730748, + "learning_rate": 5.342154856931515e-06, + "loss": 0.24398267269134521, + "step": 5059 + }, + { + "epoch": 1.3436462621165848, + "grad_norm": 1.1089059625649784, + "learning_rate": 5.338269925728451e-06, + "loss": 0.21652038395404816, + "step": 5060 + }, + { + "epoch": 1.3439118310981277, + "grad_norm": 1.1937531358219302, + "learning_rate": 5.334385893251966e-06, + "loss": 0.2031325101852417, + "step": 5061 + }, + { + "epoch": 1.3441774000796707, + "grad_norm": 1.1621991357090053, + "learning_rate": 5.330502760250853e-06, + "loss": 0.2484835982322693, + "step": 5062 + }, + { + "epoch": 1.3444429690612136, + "grad_norm": 1.2657742595884374, + "learning_rate": 5.326620527473737e-06, + "loss": 0.23698699474334717, + "step": 5063 + }, + { + "epoch": 1.3447085380427566, + "grad_norm": 1.2000433743668328, + "learning_rate": 5.322739195669065e-06, + "loss": 0.23928484320640564, + "step": 5064 + }, + { + "epoch": 1.3449741070242995, + "grad_norm": 1.1828146199314795, + "learning_rate": 5.318858765585115e-06, + "loss": 0.22679512202739716, + "step": 5065 + }, + { + "epoch": 1.3452396760058425, + "grad_norm": 1.2334385564497414, + "learning_rate": 5.314979237969984e-06, + "loss": 0.2115025818347931, + "step": 5066 + }, + { + "epoch": 1.3455052449873854, + "grad_norm": 1.261129899382787, + "learning_rate": 5.311100613571603e-06, + "loss": 0.2441834807395935, + "step": 5067 + }, + { + "epoch": 1.3457708139689284, + "grad_norm": 1.2722125718860966, + "learning_rate": 5.307222893137722e-06, + "loss": 0.2549205720424652, + "step": 5068 + }, + { + "epoch": 1.3460363829504713, + "grad_norm": 1.179054242584843, + "learning_rate": 5.3033460774159185e-06, + "loss": 0.24652990698814392, + "step": 5069 + }, + { + "epoch": 1.3463019519320143, + "grad_norm": 1.2062419936470874, + "learning_rate": 5.299470167153602e-06, + "loss": 0.2403775006532669, + "step": 5070 + }, + { + "epoch": 1.3465675209135572, + "grad_norm": 1.1208895570259512, + "learning_rate": 5.295595163097999e-06, + "loss": 0.2215663194656372, + "step": 5071 + }, + { + "epoch": 1.3468330898951002, + "grad_norm": 1.2914937229567889, + "learning_rate": 5.291721065996167e-06, + "loss": 0.2567424774169922, + "step": 5072 + }, + { + "epoch": 1.3470986588766432, + "grad_norm": 1.0608079556396839, + "learning_rate": 5.287847876594984e-06, + "loss": 0.21162359416484833, + "step": 5073 + }, + { + "epoch": 1.347364227858186, + "grad_norm": 1.221049341797181, + "learning_rate": 5.283975595641155e-06, + "loss": 0.21851085126399994, + "step": 5074 + }, + { + "epoch": 1.347629796839729, + "grad_norm": 1.2935501467753354, + "learning_rate": 5.280104223881212e-06, + "loss": 0.2491171509027481, + "step": 5075 + }, + { + "epoch": 1.347895365821272, + "grad_norm": 1.2921255335421646, + "learning_rate": 5.276233762061507e-06, + "loss": 0.22467780113220215, + "step": 5076 + }, + { + "epoch": 1.348160934802815, + "grad_norm": 1.159790816626821, + "learning_rate": 5.272364210928223e-06, + "loss": 0.24531611800193787, + "step": 5077 + }, + { + "epoch": 1.348426503784358, + "grad_norm": 1.2178282841242851, + "learning_rate": 5.268495571227361e-06, + "loss": 0.2582520544528961, + "step": 5078 + }, + { + "epoch": 1.3486920727659009, + "grad_norm": 1.2175282778251775, + "learning_rate": 5.264627843704749e-06, + "loss": 0.21180811524391174, + "step": 5079 + }, + { + "epoch": 1.348957641747444, + "grad_norm": 1.2942378328530906, + "learning_rate": 5.2607610291060406e-06, + "loss": 0.27026671171188354, + "step": 5080 + }, + { + "epoch": 1.349223210728987, + "grad_norm": 1.1721525183169563, + "learning_rate": 5.256895128176712e-06, + "loss": 0.22954419255256653, + "step": 5081 + }, + { + "epoch": 1.34948877971053, + "grad_norm": 1.3561853541918854, + "learning_rate": 5.253030141662063e-06, + "loss": 0.24064484238624573, + "step": 5082 + }, + { + "epoch": 1.349754348692073, + "grad_norm": 1.1245550279116328, + "learning_rate": 5.249166070307218e-06, + "loss": 0.1981196105480194, + "step": 5083 + }, + { + "epoch": 1.3500199176736158, + "grad_norm": 1.0881909699390468, + "learning_rate": 5.2453029148571226e-06, + "loss": 0.19882233440876007, + "step": 5084 + }, + { + "epoch": 1.3502854866551588, + "grad_norm": 1.2123536275051694, + "learning_rate": 5.24144067605655e-06, + "loss": 0.2409907579421997, + "step": 5085 + }, + { + "epoch": 1.3505510556367017, + "grad_norm": 1.2197874501412473, + "learning_rate": 5.237579354650092e-06, + "loss": 0.2205093652009964, + "step": 5086 + }, + { + "epoch": 1.3508166246182447, + "grad_norm": 1.4716074796051495, + "learning_rate": 5.233718951382163e-06, + "loss": 0.2283058911561966, + "step": 5087 + }, + { + "epoch": 1.3510821935997877, + "grad_norm": 1.2561007307780203, + "learning_rate": 5.229859466997012e-06, + "loss": 0.25584474205970764, + "step": 5088 + }, + { + "epoch": 1.3513477625813306, + "grad_norm": 1.1491167817661179, + "learning_rate": 5.226000902238696e-06, + "loss": 0.22516845166683197, + "step": 5089 + }, + { + "epoch": 1.3516133315628736, + "grad_norm": 1.2604818786719383, + "learning_rate": 5.222143257851102e-06, + "loss": 0.23440764844417572, + "step": 5090 + }, + { + "epoch": 1.3518789005444165, + "grad_norm": 1.2156754572685655, + "learning_rate": 5.218286534577938e-06, + "loss": 0.25858962535858154, + "step": 5091 + }, + { + "epoch": 1.3521444695259595, + "grad_norm": 1.1425154357949754, + "learning_rate": 5.214430733162736e-06, + "loss": 0.20676326751708984, + "step": 5092 + }, + { + "epoch": 1.3524100385075024, + "grad_norm": 1.1266241214136956, + "learning_rate": 5.210575854348853e-06, + "loss": 0.21892425417900085, + "step": 5093 + }, + { + "epoch": 1.3526756074890454, + "grad_norm": 1.2379350388596377, + "learning_rate": 5.206721898879454e-06, + "loss": 0.2538335919380188, + "step": 5094 + }, + { + "epoch": 1.3529411764705883, + "grad_norm": 1.2059035716196298, + "learning_rate": 5.202868867497542e-06, + "loss": 0.24750448763370514, + "step": 5095 + }, + { + "epoch": 1.3532067454521313, + "grad_norm": 1.2602608504342458, + "learning_rate": 5.199016760945931e-06, + "loss": 0.2569364011287689, + "step": 5096 + }, + { + "epoch": 1.3534723144336742, + "grad_norm": 0.9860855220263709, + "learning_rate": 5.19516557996727e-06, + "loss": 0.16788914799690247, + "step": 5097 + }, + { + "epoch": 1.3537378834152172, + "grad_norm": 1.0020852845957948, + "learning_rate": 5.191315325304018e-06, + "loss": 0.19006651639938354, + "step": 5098 + }, + { + "epoch": 1.3540034523967601, + "grad_norm": 1.187896658740898, + "learning_rate": 5.1874659976984575e-06, + "loss": 0.23474551737308502, + "step": 5099 + }, + { + "epoch": 1.354269021378303, + "grad_norm": 1.2829971661643687, + "learning_rate": 5.183617597892694e-06, + "loss": 0.26601099967956543, + "step": 5100 + }, + { + "epoch": 1.354534590359846, + "grad_norm": 1.1758855450162613, + "learning_rate": 5.179770126628654e-06, + "loss": 0.24207550287246704, + "step": 5101 + }, + { + "epoch": 1.354800159341389, + "grad_norm": 1.2535446057143411, + "learning_rate": 5.175923584648083e-06, + "loss": 0.2538307309150696, + "step": 5102 + }, + { + "epoch": 1.355065728322932, + "grad_norm": 1.1865818667829109, + "learning_rate": 5.172077972692553e-06, + "loss": 0.23073242604732513, + "step": 5103 + }, + { + "epoch": 1.3553312973044749, + "grad_norm": 1.348848385270533, + "learning_rate": 5.168233291503448e-06, + "loss": 0.2634595036506653, + "step": 5104 + }, + { + "epoch": 1.3555968662860178, + "grad_norm": 1.225057907199874, + "learning_rate": 5.1643895418219744e-06, + "loss": 0.23282350599765778, + "step": 5105 + }, + { + "epoch": 1.3558624352675608, + "grad_norm": 1.333152685269679, + "learning_rate": 5.160546724389172e-06, + "loss": 0.2543700933456421, + "step": 5106 + }, + { + "epoch": 1.3561280042491037, + "grad_norm": 1.1449256417555271, + "learning_rate": 5.1567048399458855e-06, + "loss": 0.2005772739648819, + "step": 5107 + }, + { + "epoch": 1.3563935732306467, + "grad_norm": 1.2429630346358373, + "learning_rate": 5.152863889232787e-06, + "loss": 0.2367073893547058, + "step": 5108 + }, + { + "epoch": 1.3566591422121896, + "grad_norm": 1.2839253544945022, + "learning_rate": 5.14902387299036e-06, + "loss": 0.25600770115852356, + "step": 5109 + }, + { + "epoch": 1.3569247111937326, + "grad_norm": 1.198566513294344, + "learning_rate": 5.145184791958918e-06, + "loss": 0.21678754687309265, + "step": 5110 + }, + { + "epoch": 1.3571902801752755, + "grad_norm": 1.3894724787206996, + "learning_rate": 5.141346646878591e-06, + "loss": 0.265438973903656, + "step": 5111 + }, + { + "epoch": 1.3574558491568185, + "grad_norm": 1.1239736089383028, + "learning_rate": 5.13750943848933e-06, + "loss": 0.24246999621391296, + "step": 5112 + }, + { + "epoch": 1.3577214181383614, + "grad_norm": 1.299396280421792, + "learning_rate": 5.133673167530899e-06, + "loss": 0.25401771068573, + "step": 5113 + }, + { + "epoch": 1.3579869871199044, + "grad_norm": 1.2329813534125698, + "learning_rate": 5.129837834742885e-06, + "loss": 0.2698017656803131, + "step": 5114 + }, + { + "epoch": 1.3582525561014474, + "grad_norm": 1.2787210937788358, + "learning_rate": 5.126003440864703e-06, + "loss": 0.27006995677948, + "step": 5115 + }, + { + "epoch": 1.3585181250829903, + "grad_norm": 1.2695682196385796, + "learning_rate": 5.122169986635575e-06, + "loss": 0.2370866984128952, + "step": 5116 + }, + { + "epoch": 1.3587836940645333, + "grad_norm": 1.3031561376922138, + "learning_rate": 5.1183374727945425e-06, + "loss": 0.24017807841300964, + "step": 5117 + }, + { + "epoch": 1.3590492630460762, + "grad_norm": 1.1487956614446662, + "learning_rate": 5.114505900080473e-06, + "loss": 0.21664533019065857, + "step": 5118 + }, + { + "epoch": 1.3593148320276192, + "grad_norm": 4.246209132455192, + "learning_rate": 5.110675269232046e-06, + "loss": 0.24561598896980286, + "step": 5119 + }, + { + "epoch": 1.359580401009162, + "grad_norm": 1.3902415348604562, + "learning_rate": 5.106845580987763e-06, + "loss": 0.26678937673568726, + "step": 5120 + }, + { + "epoch": 1.359845969990705, + "grad_norm": 1.354168350096278, + "learning_rate": 5.103016836085943e-06, + "loss": 0.21919070184230804, + "step": 5121 + }, + { + "epoch": 1.360111538972248, + "grad_norm": 1.3057665036353723, + "learning_rate": 5.099189035264722e-06, + "loss": 0.24887943267822266, + "step": 5122 + }, + { + "epoch": 1.360377107953791, + "grad_norm": 1.2017875007060346, + "learning_rate": 5.0953621792620556e-06, + "loss": 0.23597784340381622, + "step": 5123 + }, + { + "epoch": 1.360642676935334, + "grad_norm": 1.2098630506546966, + "learning_rate": 5.091536268815717e-06, + "loss": 0.21265193819999695, + "step": 5124 + }, + { + "epoch": 1.3609082459168769, + "grad_norm": 1.3606980074054404, + "learning_rate": 5.0877113046632945e-06, + "loss": 0.29837465286254883, + "step": 5125 + }, + { + "epoch": 1.3611738148984198, + "grad_norm": 1.1915793844006848, + "learning_rate": 5.0838872875421975e-06, + "loss": 0.2324269413948059, + "step": 5126 + }, + { + "epoch": 1.3614393838799628, + "grad_norm": 1.0970197687294143, + "learning_rate": 5.080064218189652e-06, + "loss": 0.19149541854858398, + "step": 5127 + }, + { + "epoch": 1.3617049528615057, + "grad_norm": 1.1710303609542994, + "learning_rate": 5.0762420973427e-06, + "loss": 0.247644305229187, + "step": 5128 + }, + { + "epoch": 1.3619705218430487, + "grad_norm": 1.1403838601028529, + "learning_rate": 5.0724209257382006e-06, + "loss": 0.2272202968597412, + "step": 5129 + }, + { + "epoch": 1.3622360908245916, + "grad_norm": 1.2012952880900256, + "learning_rate": 5.068600704112832e-06, + "loss": 0.25735989212989807, + "step": 5130 + }, + { + "epoch": 1.3625016598061346, + "grad_norm": 1.1771555574179005, + "learning_rate": 5.064781433203086e-06, + "loss": 0.19970473647117615, + "step": 5131 + }, + { + "epoch": 1.3627672287876775, + "grad_norm": 1.2156620394191346, + "learning_rate": 5.060963113745272e-06, + "loss": 0.24289372563362122, + "step": 5132 + }, + { + "epoch": 1.3630327977692205, + "grad_norm": 1.2352988713677027, + "learning_rate": 5.0571457464755226e-06, + "loss": 0.2757350504398346, + "step": 5133 + }, + { + "epoch": 1.3632983667507634, + "grad_norm": 1.2115447809386193, + "learning_rate": 5.053329332129777e-06, + "loss": 0.24552851915359497, + "step": 5134 + }, + { + "epoch": 1.3635639357323064, + "grad_norm": 1.1546263092618338, + "learning_rate": 5.049513871443797e-06, + "loss": 0.22152797877788544, + "step": 5135 + }, + { + "epoch": 1.3638295047138493, + "grad_norm": 1.2567398712194906, + "learning_rate": 5.045699365153155e-06, + "loss": 0.27098602056503296, + "step": 5136 + }, + { + "epoch": 1.3640950736953923, + "grad_norm": 1.201852433475055, + "learning_rate": 5.041885813993246e-06, + "loss": 0.21275216341018677, + "step": 5137 + }, + { + "epoch": 1.3643606426769352, + "grad_norm": 1.3326670101473788, + "learning_rate": 5.038073218699275e-06, + "loss": 0.2510162591934204, + "step": 5138 + }, + { + "epoch": 1.3646262116584782, + "grad_norm": 1.2702563681918038, + "learning_rate": 5.034261580006269e-06, + "loss": 0.23203429579734802, + "step": 5139 + }, + { + "epoch": 1.3648917806400211, + "grad_norm": 1.137285489869793, + "learning_rate": 5.030450898649064e-06, + "loss": 0.22178995609283447, + "step": 5140 + }, + { + "epoch": 1.365157349621564, + "grad_norm": 1.2415754400243457, + "learning_rate": 5.026641175362316e-06, + "loss": 0.2567412257194519, + "step": 5141 + }, + { + "epoch": 1.365422918603107, + "grad_norm": 1.232487080143156, + "learning_rate": 5.022832410880494e-06, + "loss": 0.21939827501773834, + "step": 5142 + }, + { + "epoch": 1.36568848758465, + "grad_norm": 1.4733425270104286, + "learning_rate": 5.019024605937882e-06, + "loss": 0.2325637936592102, + "step": 5143 + }, + { + "epoch": 1.365954056566193, + "grad_norm": 1.266575596941496, + "learning_rate": 5.015217761268582e-06, + "loss": 0.2416393756866455, + "step": 5144 + }, + { + "epoch": 1.366219625547736, + "grad_norm": 1.289260413423763, + "learning_rate": 5.011411877606507e-06, + "loss": 0.2439568042755127, + "step": 5145 + }, + { + "epoch": 1.3664851945292789, + "grad_norm": 1.1439689034996021, + "learning_rate": 5.007606955685387e-06, + "loss": 0.2495957612991333, + "step": 5146 + }, + { + "epoch": 1.3667507635108218, + "grad_norm": 1.1937127912858143, + "learning_rate": 5.003802996238766e-06, + "loss": 0.23415328562259674, + "step": 5147 + }, + { + "epoch": 1.3670163324923648, + "grad_norm": 1.26410321081345, + "learning_rate": 5.000000000000003e-06, + "loss": 0.2637922465801239, + "step": 5148 + }, + { + "epoch": 1.3672819014739077, + "grad_norm": 1.243307173830296, + "learning_rate": 4.9961979677022696e-06, + "loss": 0.2319526970386505, + "step": 5149 + }, + { + "epoch": 1.3675474704554509, + "grad_norm": 1.2115383829826751, + "learning_rate": 4.992396900078551e-06, + "loss": 0.2338445484638214, + "step": 5150 + }, + { + "epoch": 1.3678130394369938, + "grad_norm": 1.1683439299091893, + "learning_rate": 4.988596797861654e-06, + "loss": 0.19041961431503296, + "step": 5151 + }, + { + "epoch": 1.3680786084185368, + "grad_norm": 1.233073404450011, + "learning_rate": 4.984797661784191e-06, + "loss": 0.2698138952255249, + "step": 5152 + }, + { + "epoch": 1.3683441774000797, + "grad_norm": 1.2592426315358647, + "learning_rate": 4.980999492578588e-06, + "loss": 0.2208167165517807, + "step": 5153 + }, + { + "epoch": 1.3686097463816227, + "grad_norm": 1.1935159953807641, + "learning_rate": 4.9772022909770915e-06, + "loss": 0.2515152096748352, + "step": 5154 + }, + { + "epoch": 1.3688753153631656, + "grad_norm": 1.3110804278343313, + "learning_rate": 4.973406057711755e-06, + "loss": 0.2393365204334259, + "step": 5155 + }, + { + "epoch": 1.3691408843447086, + "grad_norm": 1.302037077529998, + "learning_rate": 4.969610793514446e-06, + "loss": 0.24546492099761963, + "step": 5156 + }, + { + "epoch": 1.3694064533262515, + "grad_norm": 1.5300417364025873, + "learning_rate": 4.965816499116849e-06, + "loss": 0.252412348985672, + "step": 5157 + }, + { + "epoch": 1.3696720223077945, + "grad_norm": 1.1552882128683561, + "learning_rate": 4.962023175250461e-06, + "loss": 0.22654281556606293, + "step": 5158 + }, + { + "epoch": 1.3699375912893375, + "grad_norm": 1.2873880265204376, + "learning_rate": 4.958230822646581e-06, + "loss": 0.2542813718318939, + "step": 5159 + }, + { + "epoch": 1.3702031602708804, + "grad_norm": 1.2851879635778218, + "learning_rate": 4.9544394420363395e-06, + "loss": 0.25376224517822266, + "step": 5160 + }, + { + "epoch": 1.3704687292524234, + "grad_norm": 1.252574665809313, + "learning_rate": 4.950649034150666e-06, + "loss": 0.21911674737930298, + "step": 5161 + }, + { + "epoch": 1.3707342982339663, + "grad_norm": 1.3527776455922371, + "learning_rate": 4.946859599720308e-06, + "loss": 0.2805126905441284, + "step": 5162 + }, + { + "epoch": 1.3709998672155093, + "grad_norm": 1.1716388954292443, + "learning_rate": 4.943071139475824e-06, + "loss": 0.2189590483903885, + "step": 5163 + }, + { + "epoch": 1.3712654361970522, + "grad_norm": 1.2218109142926636, + "learning_rate": 4.939283654147582e-06, + "loss": 0.21837599575519562, + "step": 5164 + }, + { + "epoch": 1.3715310051785952, + "grad_norm": 1.2779646624690562, + "learning_rate": 4.935497144465766e-06, + "loss": 0.25090983510017395, + "step": 5165 + }, + { + "epoch": 1.3717965741601381, + "grad_norm": 1.1988734011828608, + "learning_rate": 4.93171161116037e-06, + "loss": 0.22028754651546478, + "step": 5166 + }, + { + "epoch": 1.372062143141681, + "grad_norm": 1.1554753760684375, + "learning_rate": 4.927927054961201e-06, + "loss": 0.20097196102142334, + "step": 5167 + }, + { + "epoch": 1.372327712123224, + "grad_norm": 1.209557738779129, + "learning_rate": 4.924143476597872e-06, + "loss": 0.230082705616951, + "step": 5168 + }, + { + "epoch": 1.372593281104767, + "grad_norm": 1.1549715219295726, + "learning_rate": 4.920360876799821e-06, + "loss": 0.23701804876327515, + "step": 5169 + }, + { + "epoch": 1.37285885008631, + "grad_norm": 1.2740998730652584, + "learning_rate": 4.9165792562962834e-06, + "loss": 0.22357231378555298, + "step": 5170 + }, + { + "epoch": 1.3731244190678529, + "grad_norm": 1.2042473616661704, + "learning_rate": 4.912798615816312e-06, + "loss": 0.2533026337623596, + "step": 5171 + }, + { + "epoch": 1.3733899880493958, + "grad_norm": 1.3342025781776312, + "learning_rate": 4.90901895608877e-06, + "loss": 0.24878138303756714, + "step": 5172 + }, + { + "epoch": 1.3736555570309388, + "grad_norm": 1.5415419516618216, + "learning_rate": 4.905240277842335e-06, + "loss": 0.22641420364379883, + "step": 5173 + }, + { + "epoch": 1.3739211260124817, + "grad_norm": 1.2916997982097302, + "learning_rate": 4.901462581805483e-06, + "loss": 0.24495793879032135, + "step": 5174 + }, + { + "epoch": 1.3741866949940247, + "grad_norm": 1.3531795848957913, + "learning_rate": 4.897685868706512e-06, + "loss": 0.2688868045806885, + "step": 5175 + }, + { + "epoch": 1.3744522639755676, + "grad_norm": 1.2828126418821555, + "learning_rate": 4.893910139273531e-06, + "loss": 0.25796642899513245, + "step": 5176 + }, + { + "epoch": 1.3747178329571106, + "grad_norm": 1.4091718050104127, + "learning_rate": 4.890135394234451e-06, + "loss": 0.27557405829429626, + "step": 5177 + }, + { + "epoch": 1.3749834019386535, + "grad_norm": 1.620605499986823, + "learning_rate": 4.886361634317004e-06, + "loss": 0.23553809523582458, + "step": 5178 + }, + { + "epoch": 1.3752489709201965, + "grad_norm": 1.2608742989736732, + "learning_rate": 4.882588860248725e-06, + "loss": 0.2454400360584259, + "step": 5179 + }, + { + "epoch": 1.3755145399017394, + "grad_norm": 1.1743865548501493, + "learning_rate": 4.878817072756959e-06, + "loss": 0.19460657238960266, + "step": 5180 + }, + { + "epoch": 1.3757801088832824, + "grad_norm": 1.2528300475452, + "learning_rate": 4.875046272568863e-06, + "loss": 0.24833449721336365, + "step": 5181 + }, + { + "epoch": 1.3760456778648253, + "grad_norm": 1.3263672125712147, + "learning_rate": 4.871276460411403e-06, + "loss": 0.2774161994457245, + "step": 5182 + }, + { + "epoch": 1.3763112468463683, + "grad_norm": 2.6268834337513667, + "learning_rate": 4.867507637011353e-06, + "loss": 0.2277964949607849, + "step": 5183 + }, + { + "epoch": 1.3765768158279112, + "grad_norm": 1.8924198767245841, + "learning_rate": 4.863739803095299e-06, + "loss": 0.2176733911037445, + "step": 5184 + }, + { + "epoch": 1.3768423848094542, + "grad_norm": 1.3153810073025014, + "learning_rate": 4.859972959389634e-06, + "loss": 0.23529113829135895, + "step": 5185 + }, + { + "epoch": 1.3771079537909972, + "grad_norm": 1.3909544444662505, + "learning_rate": 4.856207106620557e-06, + "loss": 0.2646695077419281, + "step": 5186 + }, + { + "epoch": 1.37737352277254, + "grad_norm": 1.2095108180861869, + "learning_rate": 4.852442245514093e-06, + "loss": 0.23179873824119568, + "step": 5187 + }, + { + "epoch": 1.377639091754083, + "grad_norm": 1.1084014698771758, + "learning_rate": 4.84867837679605e-06, + "loss": 0.2127494066953659, + "step": 5188 + }, + { + "epoch": 1.377904660735626, + "grad_norm": 1.2275201950569183, + "learning_rate": 4.844915501192062e-06, + "loss": 0.2204679548740387, + "step": 5189 + }, + { + "epoch": 1.378170229717169, + "grad_norm": 1.2078653060668294, + "learning_rate": 4.841153619427567e-06, + "loss": 0.20271794497966766, + "step": 5190 + }, + { + "epoch": 1.378435798698712, + "grad_norm": 1.4269963155687142, + "learning_rate": 4.837392732227811e-06, + "loss": 0.2785792052745819, + "step": 5191 + }, + { + "epoch": 1.3787013676802549, + "grad_norm": 1.2501319487764966, + "learning_rate": 4.8336328403178486e-06, + "loss": 0.24904468655586243, + "step": 5192 + }, + { + "epoch": 1.378966936661798, + "grad_norm": 1.1230965332904321, + "learning_rate": 4.829873944422544e-06, + "loss": 0.20045346021652222, + "step": 5193 + }, + { + "epoch": 1.379232505643341, + "grad_norm": 1.1339816903135191, + "learning_rate": 4.826116045266565e-06, + "loss": 0.21814313530921936, + "step": 5194 + }, + { + "epoch": 1.379498074624884, + "grad_norm": 1.236126479276255, + "learning_rate": 4.82235914357439e-06, + "loss": 0.2408592253923416, + "step": 5195 + }, + { + "epoch": 1.379763643606427, + "grad_norm": 1.1229995433845732, + "learning_rate": 4.818603240070311e-06, + "loss": 0.21453416347503662, + "step": 5196 + }, + { + "epoch": 1.3800292125879698, + "grad_norm": 1.2915687788203387, + "learning_rate": 4.814848335478418e-06, + "loss": 0.2578599154949188, + "step": 5197 + }, + { + "epoch": 1.3802947815695128, + "grad_norm": 1.0696662022967476, + "learning_rate": 4.811094430522613e-06, + "loss": 0.1980094015598297, + "step": 5198 + }, + { + "epoch": 1.3805603505510557, + "grad_norm": 1.202740960535961, + "learning_rate": 4.807341525926604e-06, + "loss": 0.24620960652828217, + "step": 5199 + }, + { + "epoch": 1.3808259195325987, + "grad_norm": 1.2486655803425535, + "learning_rate": 4.803589622413908e-06, + "loss": 0.23525282740592957, + "step": 5200 + }, + { + "epoch": 1.3810914885141417, + "grad_norm": 1.1657735912575689, + "learning_rate": 4.799838720707847e-06, + "loss": 0.2277744859457016, + "step": 5201 + }, + { + "epoch": 1.3813570574956846, + "grad_norm": 1.2927728942283212, + "learning_rate": 4.796088821531549e-06, + "loss": 0.2727074921131134, + "step": 5202 + }, + { + "epoch": 1.3816226264772276, + "grad_norm": 1.2370931993726209, + "learning_rate": 4.7923399256079525e-06, + "loss": 0.21686753630638123, + "step": 5203 + }, + { + "epoch": 1.3818881954587705, + "grad_norm": 1.2572583885252075, + "learning_rate": 4.788592033659799e-06, + "loss": 0.2841380834579468, + "step": 5204 + }, + { + "epoch": 1.3821537644403135, + "grad_norm": 1.1157272204593003, + "learning_rate": 4.78484514640964e-06, + "loss": 0.24577853083610535, + "step": 5205 + }, + { + "epoch": 1.3824193334218564, + "grad_norm": 1.2077705032221964, + "learning_rate": 4.7810992645798285e-06, + "loss": 0.22289782762527466, + "step": 5206 + }, + { + "epoch": 1.3826849024033994, + "grad_norm": 1.1476107334002954, + "learning_rate": 4.7773543888925274e-06, + "loss": 0.2223999947309494, + "step": 5207 + }, + { + "epoch": 1.3829504713849423, + "grad_norm": 1.2183085137487102, + "learning_rate": 4.773610520069706e-06, + "loss": 0.23938870429992676, + "step": 5208 + }, + { + "epoch": 1.3832160403664853, + "grad_norm": 1.219370193725879, + "learning_rate": 4.769867658833136e-06, + "loss": 0.260856568813324, + "step": 5209 + }, + { + "epoch": 1.3834816093480282, + "grad_norm": 1.2333269697463725, + "learning_rate": 4.766125805904398e-06, + "loss": 0.23602089285850525, + "step": 5210 + }, + { + "epoch": 1.3837471783295712, + "grad_norm": 1.156747833138865, + "learning_rate": 4.762384962004877e-06, + "loss": 0.22543978691101074, + "step": 5211 + }, + { + "epoch": 1.3840127473111141, + "grad_norm": 1.3639051201807257, + "learning_rate": 4.758645127855763e-06, + "loss": 0.2432224452495575, + "step": 5212 + }, + { + "epoch": 1.384278316292657, + "grad_norm": 1.3947016936895973, + "learning_rate": 4.754906304178049e-06, + "loss": 0.22764597833156586, + "step": 5213 + }, + { + "epoch": 1.3845438852742, + "grad_norm": 1.2064067504011344, + "learning_rate": 4.751168491692541e-06, + "loss": 0.22503387928009033, + "step": 5214 + }, + { + "epoch": 1.384809454255743, + "grad_norm": 1.1066861130484609, + "learning_rate": 4.747431691119846e-06, + "loss": 0.21889932453632355, + "step": 5215 + }, + { + "epoch": 1.385075023237286, + "grad_norm": 1.3903278318809302, + "learning_rate": 4.743695903180372e-06, + "loss": 0.2695825695991516, + "step": 5216 + }, + { + "epoch": 1.3853405922188289, + "grad_norm": 1.2921759622470506, + "learning_rate": 4.739961128594336e-06, + "loss": 0.265118271112442, + "step": 5217 + }, + { + "epoch": 1.3856061612003718, + "grad_norm": 1.1349207398090602, + "learning_rate": 4.736227368081757e-06, + "loss": 0.2050788253545761, + "step": 5218 + }, + { + "epoch": 1.3858717301819148, + "grad_norm": 1.23951121142384, + "learning_rate": 4.7324946223624625e-06, + "loss": 0.274588406085968, + "step": 5219 + }, + { + "epoch": 1.3861372991634577, + "grad_norm": 1.209560473571303, + "learning_rate": 4.728762892156079e-06, + "loss": 0.2242514044046402, + "step": 5220 + }, + { + "epoch": 1.3864028681450007, + "grad_norm": 1.1337174836883812, + "learning_rate": 4.725032178182042e-06, + "loss": 0.19989261031150818, + "step": 5221 + }, + { + "epoch": 1.3866684371265436, + "grad_norm": 1.1989339880554155, + "learning_rate": 4.721302481159588e-06, + "loss": 0.24409207701683044, + "step": 5222 + }, + { + "epoch": 1.3869340061080866, + "grad_norm": 1.2425140627800753, + "learning_rate": 4.71757380180776e-06, + "loss": 0.25146353244781494, + "step": 5223 + }, + { + "epoch": 1.3871995750896295, + "grad_norm": 1.245669068902739, + "learning_rate": 4.713846140845401e-06, + "loss": 0.23076622188091278, + "step": 5224 + }, + { + "epoch": 1.3874651440711725, + "grad_norm": 1.1122357580396618, + "learning_rate": 4.7101194989911635e-06, + "loss": 0.2159188687801361, + "step": 5225 + }, + { + "epoch": 1.3877307130527154, + "grad_norm": 1.433039209205417, + "learning_rate": 4.706393876963497e-06, + "loss": 0.24891307950019836, + "step": 5226 + }, + { + "epoch": 1.3879962820342584, + "grad_norm": 1.2167285098476437, + "learning_rate": 4.702669275480659e-06, + "loss": 0.26254773139953613, + "step": 5227 + }, + { + "epoch": 1.3882618510158014, + "grad_norm": 1.0872799599118763, + "learning_rate": 4.698945695260709e-06, + "loss": 0.19589121639728546, + "step": 5228 + }, + { + "epoch": 1.3885274199973443, + "grad_norm": 1.273899860234835, + "learning_rate": 4.695223137021509e-06, + "loss": 0.23796147108078003, + "step": 5229 + }, + { + "epoch": 1.3887929889788873, + "grad_norm": 1.1566738109261303, + "learning_rate": 4.6915016014807235e-06, + "loss": 0.21211156249046326, + "step": 5230 + }, + { + "epoch": 1.3890585579604302, + "grad_norm": 1.1477189909918881, + "learning_rate": 4.687781089355817e-06, + "loss": 0.22418555617332458, + "step": 5231 + }, + { + "epoch": 1.3893241269419732, + "grad_norm": 1.1999712861158167, + "learning_rate": 4.68406160136407e-06, + "loss": 0.24140511453151703, + "step": 5232 + }, + { + "epoch": 1.389589695923516, + "grad_norm": 1.3515422291949701, + "learning_rate": 4.68034313822255e-06, + "loss": 0.2863473892211914, + "step": 5233 + }, + { + "epoch": 1.389855264905059, + "grad_norm": 1.1002404477789451, + "learning_rate": 4.676625700648133e-06, + "loss": 0.21283546090126038, + "step": 5234 + }, + { + "epoch": 1.390120833886602, + "grad_norm": 1.311958297113244, + "learning_rate": 4.672909289357498e-06, + "loss": 0.2701990008354187, + "step": 5235 + }, + { + "epoch": 1.390386402868145, + "grad_norm": 1.1672674472381515, + "learning_rate": 4.669193905067124e-06, + "loss": 0.23807264864444733, + "step": 5236 + }, + { + "epoch": 1.390651971849688, + "grad_norm": 1.3282268361230456, + "learning_rate": 4.665479548493298e-06, + "loss": 0.22204206883907318, + "step": 5237 + }, + { + "epoch": 1.3909175408312309, + "grad_norm": 1.2590492281878678, + "learning_rate": 4.661766220352098e-06, + "loss": 0.22389569878578186, + "step": 5238 + }, + { + "epoch": 1.3911831098127738, + "grad_norm": 1.2844920522393721, + "learning_rate": 4.65805392135941e-06, + "loss": 0.23752997815608978, + "step": 5239 + }, + { + "epoch": 1.3914486787943168, + "grad_norm": 1.8677910056359206, + "learning_rate": 4.654342652230921e-06, + "loss": 0.24055880308151245, + "step": 5240 + }, + { + "epoch": 1.3917142477758597, + "grad_norm": 1.2030621240735913, + "learning_rate": 4.6506324136821255e-06, + "loss": 0.22136151790618896, + "step": 5241 + }, + { + "epoch": 1.3919798167574027, + "grad_norm": 1.299031121789001, + "learning_rate": 4.646923206428311e-06, + "loss": 0.2616429924964905, + "step": 5242 + }, + { + "epoch": 1.3922453857389456, + "grad_norm": 1.218734267375269, + "learning_rate": 4.643215031184569e-06, + "loss": 0.24827662110328674, + "step": 5243 + }, + { + "epoch": 1.3925109547204886, + "grad_norm": 1.3223478407487963, + "learning_rate": 4.639507888665792e-06, + "loss": 0.21999669075012207, + "step": 5244 + }, + { + "epoch": 1.3927765237020315, + "grad_norm": 1.3241857590600639, + "learning_rate": 4.6358017795866715e-06, + "loss": 0.24511300027370453, + "step": 5245 + }, + { + "epoch": 1.3930420926835745, + "grad_norm": 1.2459535025826622, + "learning_rate": 4.632096704661704e-06, + "loss": 0.2410753220319748, + "step": 5246 + }, + { + "epoch": 1.3933076616651174, + "grad_norm": 1.157173292152249, + "learning_rate": 4.628392664605184e-06, + "loss": 0.2160021960735321, + "step": 5247 + }, + { + "epoch": 1.3935732306466604, + "grad_norm": 1.2204303717623475, + "learning_rate": 4.624689660131204e-06, + "loss": 0.22672782838344574, + "step": 5248 + }, + { + "epoch": 1.3938387996282033, + "grad_norm": 1.3056904555347544, + "learning_rate": 4.620987691953659e-06, + "loss": 0.25474926829338074, + "step": 5249 + }, + { + "epoch": 1.3941043686097463, + "grad_norm": 1.3078938706976893, + "learning_rate": 4.617286760786252e-06, + "loss": 0.2449323832988739, + "step": 5250 + }, + { + "epoch": 1.3943699375912892, + "grad_norm": 1.4350253205296164, + "learning_rate": 4.613586867342473e-06, + "loss": 0.23727643489837646, + "step": 5251 + }, + { + "epoch": 1.3946355065728322, + "grad_norm": 1.492440797106639, + "learning_rate": 4.609888012335624e-06, + "loss": 0.23727962374687195, + "step": 5252 + }, + { + "epoch": 1.3949010755543751, + "grad_norm": 1.1595482332609377, + "learning_rate": 4.60619019647879e-06, + "loss": 0.21957805752754211, + "step": 5253 + }, + { + "epoch": 1.395166644535918, + "grad_norm": 1.1972608851584254, + "learning_rate": 4.6024934204848745e-06, + "loss": 0.24184471368789673, + "step": 5254 + }, + { + "epoch": 1.395432213517461, + "grad_norm": 1.2654091836286674, + "learning_rate": 4.598797685066568e-06, + "loss": 0.239216148853302, + "step": 5255 + }, + { + "epoch": 1.395697782499004, + "grad_norm": 1.1503034311319646, + "learning_rate": 4.595102990936367e-06, + "loss": 0.17741018533706665, + "step": 5256 + }, + { + "epoch": 1.395963351480547, + "grad_norm": 1.2669115039567294, + "learning_rate": 4.591409338806566e-06, + "loss": 0.26139867305755615, + "step": 5257 + }, + { + "epoch": 1.39622892046209, + "grad_norm": 1.1295627244433792, + "learning_rate": 4.587716729389251e-06, + "loss": 0.23689255118370056, + "step": 5258 + }, + { + "epoch": 1.3964944894436329, + "grad_norm": 1.3449494333614898, + "learning_rate": 4.584025163396323e-06, + "loss": 0.22679267823696136, + "step": 5259 + }, + { + "epoch": 1.3967600584251758, + "grad_norm": 1.4665032620533849, + "learning_rate": 4.580334641539467e-06, + "loss": 0.2743435204029083, + "step": 5260 + }, + { + "epoch": 1.3970256274067188, + "grad_norm": 1.166091966014122, + "learning_rate": 4.5766451645301735e-06, + "loss": 0.22738990187644958, + "step": 5261 + }, + { + "epoch": 1.3972911963882617, + "grad_norm": 1.2398512539901747, + "learning_rate": 4.57295673307973e-06, + "loss": 0.24826082587242126, + "step": 5262 + }, + { + "epoch": 1.3975567653698049, + "grad_norm": 1.2172880570038314, + "learning_rate": 4.569269347899222e-06, + "loss": 0.23121042549610138, + "step": 5263 + }, + { + "epoch": 1.3978223343513478, + "grad_norm": 2.1881918032824443, + "learning_rate": 4.5655830096995345e-06, + "loss": 0.21382957696914673, + "step": 5264 + }, + { + "epoch": 1.3980879033328908, + "grad_norm": 1.6700623666107715, + "learning_rate": 4.561897719191349e-06, + "loss": 0.24439184367656708, + "step": 5265 + }, + { + "epoch": 1.3983534723144337, + "grad_norm": 1.1734120938371422, + "learning_rate": 4.558213477085148e-06, + "loss": 0.2106003314256668, + "step": 5266 + }, + { + "epoch": 1.3986190412959767, + "grad_norm": 1.568387486793487, + "learning_rate": 4.554530284091209e-06, + "loss": 0.3073291480541229, + "step": 5267 + }, + { + "epoch": 1.3988846102775196, + "grad_norm": 1.226744359266016, + "learning_rate": 4.550848140919606e-06, + "loss": 0.2448226660490036, + "step": 5268 + }, + { + "epoch": 1.3991501792590626, + "grad_norm": 1.4434974870419186, + "learning_rate": 4.5471670482802165e-06, + "loss": 0.25378671288490295, + "step": 5269 + }, + { + "epoch": 1.3994157482406056, + "grad_norm": 1.243366792714921, + "learning_rate": 4.5434870068827086e-06, + "loss": 0.2735089659690857, + "step": 5270 + }, + { + "epoch": 1.3996813172221485, + "grad_norm": 1.3983115308066707, + "learning_rate": 4.539808017436552e-06, + "loss": 0.2530548870563507, + "step": 5271 + }, + { + "epoch": 1.3999468862036915, + "grad_norm": 1.2566722493021396, + "learning_rate": 4.536130080651015e-06, + "loss": 0.23692254722118378, + "step": 5272 + }, + { + "epoch": 1.4002124551852344, + "grad_norm": 1.257120121799197, + "learning_rate": 4.532453197235155e-06, + "loss": 0.24554882943630219, + "step": 5273 + }, + { + "epoch": 1.4004780241667774, + "grad_norm": 1.2106096425654094, + "learning_rate": 4.528777367897837e-06, + "loss": 0.20152084529399872, + "step": 5274 + }, + { + "epoch": 1.4007435931483203, + "grad_norm": 1.207683737630722, + "learning_rate": 4.525102593347714e-06, + "loss": 0.20908965170383453, + "step": 5275 + }, + { + "epoch": 1.4010091621298633, + "grad_norm": 1.2398706056963738, + "learning_rate": 4.521428874293238e-06, + "loss": 0.23158209025859833, + "step": 5276 + }, + { + "epoch": 1.4012747311114062, + "grad_norm": 1.2494835342931663, + "learning_rate": 4.517756211442664e-06, + "loss": 0.2483675330877304, + "step": 5277 + }, + { + "epoch": 1.4015403000929492, + "grad_norm": 1.1662936164598174, + "learning_rate": 4.514084605504035e-06, + "loss": 0.23435397446155548, + "step": 5278 + }, + { + "epoch": 1.4018058690744921, + "grad_norm": 1.242534131664269, + "learning_rate": 4.510414057185195e-06, + "loss": 0.2605316936969757, + "step": 5279 + }, + { + "epoch": 1.402071438056035, + "grad_norm": 1.148911142729499, + "learning_rate": 4.506744567193782e-06, + "loss": 0.2279929518699646, + "step": 5280 + }, + { + "epoch": 1.402337007037578, + "grad_norm": 1.1849060379752767, + "learning_rate": 4.503076136237228e-06, + "loss": 0.23011639714241028, + "step": 5281 + }, + { + "epoch": 1.402602576019121, + "grad_norm": 1.1735153050753564, + "learning_rate": 4.499408765022765e-06, + "loss": 0.213611900806427, + "step": 5282 + }, + { + "epoch": 1.402868145000664, + "grad_norm": 1.3225078215525052, + "learning_rate": 4.495742454257418e-06, + "loss": 0.25555503368377686, + "step": 5283 + }, + { + "epoch": 1.4031337139822069, + "grad_norm": 1.331030123703595, + "learning_rate": 4.4920772046480095e-06, + "loss": 0.2694614827632904, + "step": 5284 + }, + { + "epoch": 1.4033992829637498, + "grad_norm": 1.3958578164403037, + "learning_rate": 4.4884130169011565e-06, + "loss": 0.2160607874393463, + "step": 5285 + }, + { + "epoch": 1.4036648519452928, + "grad_norm": 1.4996515147203022, + "learning_rate": 4.48474989172327e-06, + "loss": 0.2556128203868866, + "step": 5286 + }, + { + "epoch": 1.4039304209268357, + "grad_norm": 1.2506403611380352, + "learning_rate": 4.481087829820558e-06, + "loss": 0.2251313328742981, + "step": 5287 + }, + { + "epoch": 1.4041959899083787, + "grad_norm": 1.380992563161254, + "learning_rate": 4.477426831899024e-06, + "loss": 0.26856666803359985, + "step": 5288 + }, + { + "epoch": 1.4044615588899216, + "grad_norm": 1.2429158128712894, + "learning_rate": 4.473766898664464e-06, + "loss": 0.25573840737342834, + "step": 5289 + }, + { + "epoch": 1.4047271278714646, + "grad_norm": 1.2559748496125192, + "learning_rate": 4.4701080308224685e-06, + "loss": 0.26519301533699036, + "step": 5290 + }, + { + "epoch": 1.4049926968530075, + "grad_norm": 1.5959863642176566, + "learning_rate": 4.466450229078427e-06, + "loss": 0.2329619824886322, + "step": 5291 + }, + { + "epoch": 1.4052582658345505, + "grad_norm": 1.208485124140325, + "learning_rate": 4.4627934941375185e-06, + "loss": 0.2243901491165161, + "step": 5292 + }, + { + "epoch": 1.4055238348160934, + "grad_norm": 1.2042065274178317, + "learning_rate": 4.45913782670472e-06, + "loss": 0.22516998648643494, + "step": 5293 + }, + { + "epoch": 1.4057894037976364, + "grad_norm": 1.2427926273641645, + "learning_rate": 4.455483227484796e-06, + "loss": 0.25573113560676575, + "step": 5294 + }, + { + "epoch": 1.4060549727791793, + "grad_norm": 1.3935629686917204, + "learning_rate": 4.451829697182317e-06, + "loss": 0.2568536698818207, + "step": 5295 + }, + { + "epoch": 1.4063205417607223, + "grad_norm": 1.293797792298673, + "learning_rate": 4.448177236501638e-06, + "loss": 0.24510663747787476, + "step": 5296 + }, + { + "epoch": 1.4065861107422652, + "grad_norm": 1.3445763390180965, + "learning_rate": 4.444525846146911e-06, + "loss": 0.24890470504760742, + "step": 5297 + }, + { + "epoch": 1.4068516797238082, + "grad_norm": 1.3096169257052843, + "learning_rate": 4.440875526822081e-06, + "loss": 0.21442994475364685, + "step": 5298 + }, + { + "epoch": 1.4071172487053512, + "grad_norm": 1.2628911672392604, + "learning_rate": 4.437226279230884e-06, + "loss": 0.24281370639801025, + "step": 5299 + }, + { + "epoch": 1.407382817686894, + "grad_norm": 1.2336479145010515, + "learning_rate": 4.433578104076853e-06, + "loss": 0.19542500376701355, + "step": 5300 + }, + { + "epoch": 1.407648386668437, + "grad_norm": 1.256359230599367, + "learning_rate": 4.429931002063315e-06, + "loss": 0.22688990831375122, + "step": 5301 + }, + { + "epoch": 1.40791395564998, + "grad_norm": 1.3692436485711592, + "learning_rate": 4.42628497389339e-06, + "loss": 0.2520858347415924, + "step": 5302 + }, + { + "epoch": 1.408179524631523, + "grad_norm": 1.1723697651028326, + "learning_rate": 4.42264002026998e-06, + "loss": 0.237991064786911, + "step": 5303 + }, + { + "epoch": 1.408445093613066, + "grad_norm": 1.1277997255078087, + "learning_rate": 4.418996141895797e-06, + "loss": 0.20164436101913452, + "step": 5304 + }, + { + "epoch": 1.408710662594609, + "grad_norm": 1.2657361694815492, + "learning_rate": 4.415353339473338e-06, + "loss": 0.24009189009666443, + "step": 5305 + }, + { + "epoch": 1.408976231576152, + "grad_norm": 1.138145945953283, + "learning_rate": 4.411711613704889e-06, + "loss": 0.23170322179794312, + "step": 5306 + }, + { + "epoch": 1.409241800557695, + "grad_norm": 1.2244077415708243, + "learning_rate": 4.408070965292534e-06, + "loss": 0.2280617356300354, + "step": 5307 + }, + { + "epoch": 1.409507369539238, + "grad_norm": 1.2724409466040383, + "learning_rate": 4.404431394938145e-06, + "loss": 0.21982887387275696, + "step": 5308 + }, + { + "epoch": 1.409772938520781, + "grad_norm": 1.265647410959733, + "learning_rate": 4.40079290334339e-06, + "loss": 0.25295430421829224, + "step": 5309 + }, + { + "epoch": 1.4100385075023238, + "grad_norm": 1.1099961782761754, + "learning_rate": 4.397155491209727e-06, + "loss": 0.20109041035175323, + "step": 5310 + }, + { + "epoch": 1.4103040764838668, + "grad_norm": 1.3436616824827443, + "learning_rate": 4.393519159238405e-06, + "loss": 0.2487715482711792, + "step": 5311 + }, + { + "epoch": 1.4105696454654097, + "grad_norm": 1.1475311486694626, + "learning_rate": 4.389883908130465e-06, + "loss": 0.2031790167093277, + "step": 5312 + }, + { + "epoch": 1.4108352144469527, + "grad_norm": 1.277969729475343, + "learning_rate": 4.386249738586744e-06, + "loss": 0.23029211163520813, + "step": 5313 + }, + { + "epoch": 1.4111007834284957, + "grad_norm": 1.2100830863469687, + "learning_rate": 4.382616651307866e-06, + "loss": 0.23080995678901672, + "step": 5314 + }, + { + "epoch": 1.4113663524100386, + "grad_norm": 1.2376227742095711, + "learning_rate": 4.378984646994248e-06, + "loss": 0.2450534999370575, + "step": 5315 + }, + { + "epoch": 1.4116319213915816, + "grad_norm": 1.266655148641824, + "learning_rate": 4.375353726346094e-06, + "loss": 0.24349799752235413, + "step": 5316 + }, + { + "epoch": 1.4118974903731245, + "grad_norm": 1.2696628766548714, + "learning_rate": 4.371723890063411e-06, + "loss": 0.2431599199771881, + "step": 5317 + }, + { + "epoch": 1.4121630593546675, + "grad_norm": 1.3688178233929764, + "learning_rate": 4.368095138845978e-06, + "loss": 0.2051251232624054, + "step": 5318 + }, + { + "epoch": 1.4124286283362104, + "grad_norm": 1.1726447102511934, + "learning_rate": 4.36446747339338e-06, + "loss": 0.21346575021743774, + "step": 5319 + }, + { + "epoch": 1.4126941973177534, + "grad_norm": 1.2726406383058895, + "learning_rate": 4.360840894404989e-06, + "loss": 0.22193217277526855, + "step": 5320 + }, + { + "epoch": 1.4129597662992963, + "grad_norm": 1.2762131056761095, + "learning_rate": 4.357215402579961e-06, + "loss": 0.2112501859664917, + "step": 5321 + }, + { + "epoch": 1.4132253352808393, + "grad_norm": 1.1864412536946314, + "learning_rate": 4.3535909986172565e-06, + "loss": 0.2648766040802002, + "step": 5322 + }, + { + "epoch": 1.4134909042623822, + "grad_norm": 1.1533413783243194, + "learning_rate": 4.349967683215614e-06, + "loss": 0.22139690816402435, + "step": 5323 + }, + { + "epoch": 1.4137564732439252, + "grad_norm": 1.0259028802936685, + "learning_rate": 4.346345457073568e-06, + "loss": 0.21558481454849243, + "step": 5324 + }, + { + "epoch": 1.4140220422254681, + "grad_norm": 1.2763949378052617, + "learning_rate": 4.342724320889438e-06, + "loss": 0.2013886272907257, + "step": 5325 + }, + { + "epoch": 1.414287611207011, + "grad_norm": 1.2216640015824227, + "learning_rate": 4.3391042753613375e-06, + "loss": 0.2428729385137558, + "step": 5326 + }, + { + "epoch": 1.414553180188554, + "grad_norm": 1.2385329501903242, + "learning_rate": 4.3354853211871696e-06, + "loss": 0.20930354297161102, + "step": 5327 + }, + { + "epoch": 1.414818749170097, + "grad_norm": 1.1373474530618315, + "learning_rate": 4.331867459064623e-06, + "loss": 0.18988853693008423, + "step": 5328 + }, + { + "epoch": 1.41508431815164, + "grad_norm": 1.2833653393491664, + "learning_rate": 4.328250689691182e-06, + "loss": 0.24618801474571228, + "step": 5329 + }, + { + "epoch": 1.4153498871331829, + "grad_norm": 1.2635824567099267, + "learning_rate": 4.324635013764113e-06, + "loss": 0.23857265710830688, + "step": 5330 + }, + { + "epoch": 1.4156154561147258, + "grad_norm": 1.3200622076177175, + "learning_rate": 4.321020431980483e-06, + "loss": 0.21869014203548431, + "step": 5331 + }, + { + "epoch": 1.4158810250962688, + "grad_norm": 1.2317649692424293, + "learning_rate": 4.317406945037138e-06, + "loss": 0.2508969008922577, + "step": 5332 + }, + { + "epoch": 1.4161465940778117, + "grad_norm": 1.2114692744130235, + "learning_rate": 4.313794553630711e-06, + "loss": 0.2406233549118042, + "step": 5333 + }, + { + "epoch": 1.4164121630593547, + "grad_norm": 1.3314396378070763, + "learning_rate": 4.310183258457632e-06, + "loss": 0.2376224398612976, + "step": 5334 + }, + { + "epoch": 1.4166777320408976, + "grad_norm": 1.4802475566731417, + "learning_rate": 4.306573060214115e-06, + "loss": 0.2818688750267029, + "step": 5335 + }, + { + "epoch": 1.4169433010224406, + "grad_norm": 1.2248721858463099, + "learning_rate": 4.302963959596165e-06, + "loss": 0.2279777228832245, + "step": 5336 + }, + { + "epoch": 1.4172088700039835, + "grad_norm": 1.3681495314955672, + "learning_rate": 4.299355957299573e-06, + "loss": 0.2652052640914917, + "step": 5337 + }, + { + "epoch": 1.4174744389855265, + "grad_norm": 1.2814638931564002, + "learning_rate": 4.2957490540199185e-06, + "loss": 0.24415750801563263, + "step": 5338 + }, + { + "epoch": 1.4177400079670694, + "grad_norm": 1.2028147011593575, + "learning_rate": 4.292143250452569e-06, + "loss": 0.2318287044763565, + "step": 5339 + }, + { + "epoch": 1.4180055769486124, + "grad_norm": 1.1621443407054215, + "learning_rate": 4.288538547292685e-06, + "loss": 0.19914361834526062, + "step": 5340 + }, + { + "epoch": 1.4182711459301554, + "grad_norm": 1.2533818722517012, + "learning_rate": 4.2849349452352095e-06, + "loss": 0.22550678253173828, + "step": 5341 + }, + { + "epoch": 1.4185367149116983, + "grad_norm": 1.3481328868952585, + "learning_rate": 4.281332444974874e-06, + "loss": 0.25001436471939087, + "step": 5342 + }, + { + "epoch": 1.4188022838932413, + "grad_norm": 1.2557895781680242, + "learning_rate": 4.277731047206197e-06, + "loss": 0.24873407185077667, + "step": 5343 + }, + { + "epoch": 1.4190678528747842, + "grad_norm": 1.2532145662207181, + "learning_rate": 4.274130752623487e-06, + "loss": 0.25732600688934326, + "step": 5344 + }, + { + "epoch": 1.4193334218563272, + "grad_norm": 1.1956499236331526, + "learning_rate": 4.270531561920836e-06, + "loss": 0.1894054263830185, + "step": 5345 + }, + { + "epoch": 1.4195989908378701, + "grad_norm": 1.2861805940078326, + "learning_rate": 4.2669334757921284e-06, + "loss": 0.2632025480270386, + "step": 5346 + }, + { + "epoch": 1.419864559819413, + "grad_norm": 1.1223708980675566, + "learning_rate": 4.2633364949310315e-06, + "loss": 0.22106415033340454, + "step": 5347 + }, + { + "epoch": 1.420130128800956, + "grad_norm": 1.2191554963858982, + "learning_rate": 4.259740620031e-06, + "loss": 0.2246699184179306, + "step": 5348 + }, + { + "epoch": 1.420395697782499, + "grad_norm": 1.2377251567235985, + "learning_rate": 4.256145851785277e-06, + "loss": 0.2335890382528305, + "step": 5349 + }, + { + "epoch": 1.420661266764042, + "grad_norm": 1.3200881727026734, + "learning_rate": 4.252552190886892e-06, + "loss": 0.25485220551490784, + "step": 5350 + }, + { + "epoch": 1.4209268357455849, + "grad_norm": 1.406483107573335, + "learning_rate": 4.248959638028659e-06, + "loss": 0.26234719157218933, + "step": 5351 + }, + { + "epoch": 1.4211924047271278, + "grad_norm": 1.1946878328095272, + "learning_rate": 4.245368193903181e-06, + "loss": 0.22083795070648193, + "step": 5352 + }, + { + "epoch": 1.4214579737086708, + "grad_norm": 1.288602079194267, + "learning_rate": 4.241777859202846e-06, + "loss": 0.1886332929134369, + "step": 5353 + }, + { + "epoch": 1.4217235426902137, + "grad_norm": 1.506700165302322, + "learning_rate": 4.238188634619826e-06, + "loss": 0.26154160499572754, + "step": 5354 + }, + { + "epoch": 1.4219891116717567, + "grad_norm": 1.1472960297751262, + "learning_rate": 4.234600520846085e-06, + "loss": 0.24761158227920532, + "step": 5355 + }, + { + "epoch": 1.4222546806532996, + "grad_norm": 1.154393443673505, + "learning_rate": 4.2310135185733625e-06, + "loss": 0.20936736464500427, + "step": 5356 + }, + { + "epoch": 1.4225202496348426, + "grad_norm": 1.15600424022186, + "learning_rate": 4.227427628493198e-06, + "loss": 0.2173127979040146, + "step": 5357 + }, + { + "epoch": 1.4227858186163855, + "grad_norm": 1.217414245555098, + "learning_rate": 4.223842851296907e-06, + "loss": 0.2598559260368347, + "step": 5358 + }, + { + "epoch": 1.4230513875979285, + "grad_norm": 1.224021391863692, + "learning_rate": 4.22025918767559e-06, + "loss": 0.23701196908950806, + "step": 5359 + }, + { + "epoch": 1.4233169565794714, + "grad_norm": 1.2134140712383175, + "learning_rate": 4.216676638320135e-06, + "loss": 0.26052403450012207, + "step": 5360 + }, + { + "epoch": 1.4235825255610144, + "grad_norm": 1.2465682642545985, + "learning_rate": 4.213095203921217e-06, + "loss": 0.2464584857225418, + "step": 5361 + }, + { + "epoch": 1.4238480945425573, + "grad_norm": 1.2646547527576821, + "learning_rate": 4.209514885169294e-06, + "loss": 0.25889426469802856, + "step": 5362 + }, + { + "epoch": 1.4241136635241003, + "grad_norm": 1.2990812156107416, + "learning_rate": 4.2059356827546076e-06, + "loss": 0.26529380679130554, + "step": 5363 + }, + { + "epoch": 1.4243792325056432, + "grad_norm": 1.1509506747022789, + "learning_rate": 4.202357597367187e-06, + "loss": 0.2284630388021469, + "step": 5364 + }, + { + "epoch": 1.4246448014871862, + "grad_norm": 1.1509689814009059, + "learning_rate": 4.198780629696845e-06, + "loss": 0.2361873984336853, + "step": 5365 + }, + { + "epoch": 1.4249103704687291, + "grad_norm": 1.2489364054166838, + "learning_rate": 4.195204780433179e-06, + "loss": 0.2473624348640442, + "step": 5366 + }, + { + "epoch": 1.425175939450272, + "grad_norm": 1.2584581044476912, + "learning_rate": 4.19163005026557e-06, + "loss": 0.24852773547172546, + "step": 5367 + }, + { + "epoch": 1.425441508431815, + "grad_norm": 1.413523972125062, + "learning_rate": 4.188056439883183e-06, + "loss": 0.28409647941589355, + "step": 5368 + }, + { + "epoch": 1.425707077413358, + "grad_norm": 1.2672381227374172, + "learning_rate": 4.18448394997497e-06, + "loss": 0.2500985562801361, + "step": 5369 + }, + { + "epoch": 1.425972646394901, + "grad_norm": 1.2421534737421158, + "learning_rate": 4.1809125812296635e-06, + "loss": 0.23475977778434753, + "step": 5370 + }, + { + "epoch": 1.426238215376444, + "grad_norm": 1.3107626948919207, + "learning_rate": 4.177342334335782e-06, + "loss": 0.22925345599651337, + "step": 5371 + }, + { + "epoch": 1.4265037843579869, + "grad_norm": 1.1701714137905739, + "learning_rate": 4.173773209981627e-06, + "loss": 0.24463894963264465, + "step": 5372 + }, + { + "epoch": 1.4267693533395298, + "grad_norm": 1.2600839330793319, + "learning_rate": 4.170205208855281e-06, + "loss": 0.2451590746641159, + "step": 5373 + }, + { + "epoch": 1.4270349223210728, + "grad_norm": 1.192456234510782, + "learning_rate": 4.166638331644613e-06, + "loss": 0.21078437566757202, + "step": 5374 + }, + { + "epoch": 1.427300491302616, + "grad_norm": 1.1548728286132999, + "learning_rate": 4.163072579037279e-06, + "loss": 0.21466529369354248, + "step": 5375 + }, + { + "epoch": 1.4275660602841589, + "grad_norm": 1.3327200015078104, + "learning_rate": 4.159507951720713e-06, + "loss": 0.20103147625923157, + "step": 5376 + }, + { + "epoch": 1.4278316292657018, + "grad_norm": 1.2634022835060015, + "learning_rate": 4.15594445038213e-06, + "loss": 0.2618871331214905, + "step": 5377 + }, + { + "epoch": 1.4280971982472448, + "grad_norm": 1.314150540124243, + "learning_rate": 4.152382075708534e-06, + "loss": 0.2496388852596283, + "step": 5378 + }, + { + "epoch": 1.4283627672287877, + "grad_norm": 1.2776066314767451, + "learning_rate": 4.148820828386707e-06, + "loss": 0.2663899064064026, + "step": 5379 + }, + { + "epoch": 1.4286283362103307, + "grad_norm": 1.223751737565641, + "learning_rate": 4.145260709103216e-06, + "loss": 0.23617541790008545, + "step": 5380 + }, + { + "epoch": 1.4288939051918736, + "grad_norm": 1.2184450229688006, + "learning_rate": 4.141701718544411e-06, + "loss": 0.200006365776062, + "step": 5381 + }, + { + "epoch": 1.4291594741734166, + "grad_norm": 1.2899877428495155, + "learning_rate": 4.138143857396425e-06, + "loss": 0.22707203030586243, + "step": 5382 + }, + { + "epoch": 1.4294250431549596, + "grad_norm": 1.210998695531734, + "learning_rate": 4.134587126345162e-06, + "loss": 0.23903624713420868, + "step": 5383 + }, + { + "epoch": 1.4296906121365025, + "grad_norm": 1.56990305006701, + "learning_rate": 4.131031526076329e-06, + "loss": 0.2308908998966217, + "step": 5384 + }, + { + "epoch": 1.4299561811180455, + "grad_norm": 1.2125776866133393, + "learning_rate": 4.127477057275398e-06, + "loss": 0.18762601912021637, + "step": 5385 + }, + { + "epoch": 1.4302217500995884, + "grad_norm": 1.3670823879917342, + "learning_rate": 4.123923720627633e-06, + "loss": 0.281406044960022, + "step": 5386 + }, + { + "epoch": 1.4304873190811314, + "grad_norm": 1.24677960623226, + "learning_rate": 4.120371516818071e-06, + "loss": 0.24858589470386505, + "step": 5387 + }, + { + "epoch": 1.4307528880626743, + "grad_norm": 1.2017896897650255, + "learning_rate": 4.116820446531538e-06, + "loss": 0.22179371118545532, + "step": 5388 + }, + { + "epoch": 1.4310184570442173, + "grad_norm": 1.1523445225939053, + "learning_rate": 4.113270510452636e-06, + "loss": 0.22086869180202484, + "step": 5389 + }, + { + "epoch": 1.4312840260257602, + "grad_norm": 1.295626323300653, + "learning_rate": 4.109721709265753e-06, + "loss": 0.231503427028656, + "step": 5390 + }, + { + "epoch": 1.4315495950073032, + "grad_norm": 1.31237620612278, + "learning_rate": 4.106174043655054e-06, + "loss": 0.255252867937088, + "step": 5391 + }, + { + "epoch": 1.4318151639888461, + "grad_norm": 1.2773394357808008, + "learning_rate": 4.1026275143044854e-06, + "loss": 0.23336587846279144, + "step": 5392 + }, + { + "epoch": 1.432080732970389, + "grad_norm": 1.3267952754600625, + "learning_rate": 4.099082121897783e-06, + "loss": 0.2468583881855011, + "step": 5393 + }, + { + "epoch": 1.432346301951932, + "grad_norm": 1.2137255679394872, + "learning_rate": 4.095537867118452e-06, + "loss": 0.21211153268814087, + "step": 5394 + }, + { + "epoch": 1.432611870933475, + "grad_norm": 1.2552061461264346, + "learning_rate": 4.091994750649783e-06, + "loss": 0.23173204064369202, + "step": 5395 + }, + { + "epoch": 1.432877439915018, + "grad_norm": 1.2420339991667666, + "learning_rate": 4.088452773174853e-06, + "loss": 0.2606658935546875, + "step": 5396 + }, + { + "epoch": 1.4331430088965609, + "grad_norm": 1.2141954954044303, + "learning_rate": 4.084911935376502e-06, + "loss": 0.21198314428329468, + "step": 5397 + }, + { + "epoch": 1.4334085778781038, + "grad_norm": 1.273859413406427, + "learning_rate": 4.08137223793737e-06, + "loss": 0.216193288564682, + "step": 5398 + }, + { + "epoch": 1.4336741468596468, + "grad_norm": 1.3862686522767422, + "learning_rate": 4.077833681539866e-06, + "loss": 0.27767330408096313, + "step": 5399 + }, + { + "epoch": 1.4339397158411897, + "grad_norm": 1.193043888736233, + "learning_rate": 4.0742962668661826e-06, + "loss": 0.21584349870681763, + "step": 5400 + }, + { + "epoch": 1.4342052848227327, + "grad_norm": 1.2801175216615184, + "learning_rate": 4.070759994598288e-06, + "loss": 0.220070481300354, + "step": 5401 + }, + { + "epoch": 1.4344708538042756, + "grad_norm": 1.4276288870785, + "learning_rate": 4.067224865417941e-06, + "loss": 0.26035353541374207, + "step": 5402 + }, + { + "epoch": 1.4347364227858186, + "grad_norm": 1.1784144309393945, + "learning_rate": 4.063690880006671e-06, + "loss": 0.23704876005649567, + "step": 5403 + }, + { + "epoch": 1.4350019917673615, + "grad_norm": 1.2793709287846655, + "learning_rate": 4.060158039045785e-06, + "loss": 0.2345760464668274, + "step": 5404 + }, + { + "epoch": 1.4352675607489045, + "grad_norm": 1.2583985201804126, + "learning_rate": 4.056626343216377e-06, + "loss": 0.21307331323623657, + "step": 5405 + }, + { + "epoch": 1.4355331297304474, + "grad_norm": 1.2401804894465362, + "learning_rate": 4.053095793199313e-06, + "loss": 0.22029465436935425, + "step": 5406 + }, + { + "epoch": 1.4357986987119904, + "grad_norm": 1.3865770800537958, + "learning_rate": 4.049566389675244e-06, + "loss": 0.23419252038002014, + "step": 5407 + }, + { + "epoch": 1.4360642676935333, + "grad_norm": 1.2114754283066453, + "learning_rate": 4.046038133324595e-06, + "loss": 0.21648669242858887, + "step": 5408 + }, + { + "epoch": 1.4363298366750763, + "grad_norm": 1.3682353450989566, + "learning_rate": 4.042511024827573e-06, + "loss": 0.2343464195728302, + "step": 5409 + }, + { + "epoch": 1.4365954056566193, + "grad_norm": 1.28417678054491, + "learning_rate": 4.0389850648641615e-06, + "loss": 0.20108605921268463, + "step": 5410 + }, + { + "epoch": 1.4368609746381622, + "grad_norm": 1.2806759093192033, + "learning_rate": 4.0354602541141315e-06, + "loss": 0.21885806322097778, + "step": 5411 + }, + { + "epoch": 1.4371265436197052, + "grad_norm": 1.276580988371958, + "learning_rate": 4.031936593257017e-06, + "loss": 0.2382376492023468, + "step": 5412 + }, + { + "epoch": 1.437392112601248, + "grad_norm": 1.1333519329501958, + "learning_rate": 4.028414082972141e-06, + "loss": 0.21434128284454346, + "step": 5413 + }, + { + "epoch": 1.437657681582791, + "grad_norm": 1.2161992893188567, + "learning_rate": 4.024892723938601e-06, + "loss": 0.2345191240310669, + "step": 5414 + }, + { + "epoch": 1.437923250564334, + "grad_norm": 1.309666461481554, + "learning_rate": 4.021372516835273e-06, + "loss": 0.2478899210691452, + "step": 5415 + }, + { + "epoch": 1.438188819545877, + "grad_norm": 1.2593045594203824, + "learning_rate": 4.017853462340813e-06, + "loss": 0.21356827020645142, + "step": 5416 + }, + { + "epoch": 1.4384543885274201, + "grad_norm": 1.3891493537034765, + "learning_rate": 4.014335561133652e-06, + "loss": 0.26329827308654785, + "step": 5417 + }, + { + "epoch": 1.438719957508963, + "grad_norm": 1.3689872343615141, + "learning_rate": 4.010818813892e-06, + "loss": 0.25880998373031616, + "step": 5418 + }, + { + "epoch": 1.438985526490506, + "grad_norm": 1.2738388972586026, + "learning_rate": 4.007303221293844e-06, + "loss": 0.22749441862106323, + "step": 5419 + }, + { + "epoch": 1.439251095472049, + "grad_norm": 1.2267331489472144, + "learning_rate": 4.00378878401695e-06, + "loss": 0.2242615520954132, + "step": 5420 + }, + { + "epoch": 1.439516664453592, + "grad_norm": 1.168704950265394, + "learning_rate": 4.000275502738862e-06, + "loss": 0.19751839339733124, + "step": 5421 + }, + { + "epoch": 1.439782233435135, + "grad_norm": 1.4000090999513362, + "learning_rate": 3.996763378136895e-06, + "loss": 0.27319905161857605, + "step": 5422 + }, + { + "epoch": 1.4400478024166778, + "grad_norm": 1.1483039760635705, + "learning_rate": 3.993252410888149e-06, + "loss": 0.21676769852638245, + "step": 5423 + }, + { + "epoch": 1.4403133713982208, + "grad_norm": 1.222649759682682, + "learning_rate": 3.989742601669494e-06, + "loss": 0.22788718342781067, + "step": 5424 + }, + { + "epoch": 1.4405789403797638, + "grad_norm": 1.1800102666876688, + "learning_rate": 3.986233951157581e-06, + "loss": 0.23224875330924988, + "step": 5425 + }, + { + "epoch": 1.4408445093613067, + "grad_norm": 1.3242271211713557, + "learning_rate": 3.982726460028836e-06, + "loss": 0.23625247180461884, + "step": 5426 + }, + { + "epoch": 1.4411100783428497, + "grad_norm": 1.237043381628487, + "learning_rate": 3.979220128959463e-06, + "loss": 0.2092093527317047, + "step": 5427 + }, + { + "epoch": 1.4413756473243926, + "grad_norm": 1.164989095324882, + "learning_rate": 3.975714958625442e-06, + "loss": 0.22196070849895477, + "step": 5428 + }, + { + "epoch": 1.4416412163059356, + "grad_norm": 1.248575755705502, + "learning_rate": 3.972210949702525e-06, + "loss": 0.21276375651359558, + "step": 5429 + }, + { + "epoch": 1.4419067852874785, + "grad_norm": 1.2714203744447936, + "learning_rate": 3.968708102866247e-06, + "loss": 0.22150103747844696, + "step": 5430 + }, + { + "epoch": 1.4421723542690215, + "grad_norm": 1.2519929176778726, + "learning_rate": 3.965206418791914e-06, + "loss": 0.24529573321342468, + "step": 5431 + }, + { + "epoch": 1.4424379232505644, + "grad_norm": 1.3331662749929607, + "learning_rate": 3.961705898154609e-06, + "loss": 0.24349135160446167, + "step": 5432 + }, + { + "epoch": 1.4427034922321074, + "grad_norm": 1.3094668545917496, + "learning_rate": 3.9582065416291926e-06, + "loss": 0.23481428623199463, + "step": 5433 + }, + { + "epoch": 1.4429690612136503, + "grad_norm": 1.2664431166747565, + "learning_rate": 3.954708349890299e-06, + "loss": 0.2366936057806015, + "step": 5434 + }, + { + "epoch": 1.4432346301951933, + "grad_norm": 1.2699903819491114, + "learning_rate": 3.951211323612336e-06, + "loss": 0.24792322516441345, + "step": 5435 + }, + { + "epoch": 1.4435001991767362, + "grad_norm": 1.1943208090894295, + "learning_rate": 3.947715463469493e-06, + "loss": 0.22601652145385742, + "step": 5436 + }, + { + "epoch": 1.4437657681582792, + "grad_norm": 1.1333130191791405, + "learning_rate": 3.9442207701357235e-06, + "loss": 0.19603165984153748, + "step": 5437 + }, + { + "epoch": 1.4440313371398221, + "grad_norm": 1.26512939224431, + "learning_rate": 3.940727244284772e-06, + "loss": 0.22619353234767914, + "step": 5438 + }, + { + "epoch": 1.444296906121365, + "grad_norm": 1.3207139711857465, + "learning_rate": 3.937234886590146e-06, + "loss": 0.24836638569831848, + "step": 5439 + }, + { + "epoch": 1.444562475102908, + "grad_norm": 1.2114237797025103, + "learning_rate": 3.933743697725129e-06, + "loss": 0.21585768461227417, + "step": 5440 + }, + { + "epoch": 1.444828044084451, + "grad_norm": 1.2037953387653635, + "learning_rate": 3.930253678362784e-06, + "loss": 0.20876167714595795, + "step": 5441 + }, + { + "epoch": 1.445093613065994, + "grad_norm": 1.2825218153573943, + "learning_rate": 3.926764829175943e-06, + "loss": 0.24337999522686005, + "step": 5442 + }, + { + "epoch": 1.4453591820475369, + "grad_norm": 1.2238662957767994, + "learning_rate": 3.9232771508372155e-06, + "loss": 0.2511219084262848, + "step": 5443 + }, + { + "epoch": 1.4456247510290798, + "grad_norm": 1.2796769482653771, + "learning_rate": 3.919790644018986e-06, + "loss": 0.26257213950157166, + "step": 5444 + }, + { + "epoch": 1.4458903200106228, + "grad_norm": 1.3570371082898334, + "learning_rate": 3.91630530939341e-06, + "loss": 0.2720959782600403, + "step": 5445 + }, + { + "epoch": 1.4461558889921657, + "grad_norm": 1.2897968589877258, + "learning_rate": 3.912821147632421e-06, + "loss": 0.23849177360534668, + "step": 5446 + }, + { + "epoch": 1.4464214579737087, + "grad_norm": 1.2539273982781811, + "learning_rate": 3.909338159407722e-06, + "loss": 0.2366214245557785, + "step": 5447 + }, + { + "epoch": 1.4466870269552516, + "grad_norm": 1.21348130376658, + "learning_rate": 3.905856345390793e-06, + "loss": 0.21905584633350372, + "step": 5448 + }, + { + "epoch": 1.4469525959367946, + "grad_norm": 1.3001423574977207, + "learning_rate": 3.902375706252887e-06, + "loss": 0.23964065313339233, + "step": 5449 + }, + { + "epoch": 1.4472181649183375, + "grad_norm": 1.2161208716702177, + "learning_rate": 3.89889624266503e-06, + "loss": 0.22246500849723816, + "step": 5450 + }, + { + "epoch": 1.4474837338998805, + "grad_norm": 1.2845367508241097, + "learning_rate": 3.895417955298022e-06, + "loss": 0.22980710864067078, + "step": 5451 + }, + { + "epoch": 1.4477493028814234, + "grad_norm": 1.4690832477509688, + "learning_rate": 3.8919408448224346e-06, + "loss": 0.21276253461837769, + "step": 5452 + }, + { + "epoch": 1.4480148718629664, + "grad_norm": 1.3515036942552143, + "learning_rate": 3.888464911908616e-06, + "loss": 0.23925542831420898, + "step": 5453 + }, + { + "epoch": 1.4482804408445094, + "grad_norm": 1.1871457723177183, + "learning_rate": 3.884990157226683e-06, + "loss": 0.21528369188308716, + "step": 5454 + }, + { + "epoch": 1.4485460098260523, + "grad_norm": 1.2673056278722348, + "learning_rate": 3.8815165814465235e-06, + "loss": 0.24563542008399963, + "step": 5455 + }, + { + "epoch": 1.4488115788075953, + "grad_norm": 1.2561210989748839, + "learning_rate": 3.87804418523781e-06, + "loss": 0.2721150517463684, + "step": 5456 + }, + { + "epoch": 1.4490771477891382, + "grad_norm": 1.3721328159682122, + "learning_rate": 3.874572969269976e-06, + "loss": 0.23716527223587036, + "step": 5457 + }, + { + "epoch": 1.4493427167706812, + "grad_norm": 1.5185790933002854, + "learning_rate": 3.871102934212231e-06, + "loss": 0.2182254046201706, + "step": 5458 + }, + { + "epoch": 1.4496082857522241, + "grad_norm": 1.233204842662738, + "learning_rate": 3.867634080733557e-06, + "loss": 0.2179020643234253, + "step": 5459 + }, + { + "epoch": 1.449873854733767, + "grad_norm": 1.2633976965193632, + "learning_rate": 3.864166409502706e-06, + "loss": 0.22901684045791626, + "step": 5460 + }, + { + "epoch": 1.45013942371531, + "grad_norm": 1.209132482684757, + "learning_rate": 3.860699921188211e-06, + "loss": 0.2287352979183197, + "step": 5461 + }, + { + "epoch": 1.450404992696853, + "grad_norm": 1.214494370780124, + "learning_rate": 3.85723461645836e-06, + "loss": 0.2448873668909073, + "step": 5462 + }, + { + "epoch": 1.450670561678396, + "grad_norm": 1.323933009108344, + "learning_rate": 3.85377049598123e-06, + "loss": 0.2693510055541992, + "step": 5463 + }, + { + "epoch": 1.4509361306599389, + "grad_norm": 1.1826355120377283, + "learning_rate": 3.8503075604246554e-06, + "loss": 0.25414884090423584, + "step": 5464 + }, + { + "epoch": 1.4512016996414818, + "grad_norm": 1.3400776704302024, + "learning_rate": 3.846845810456258e-06, + "loss": 0.27798837423324585, + "step": 5465 + }, + { + "epoch": 1.4514672686230248, + "grad_norm": 1.3109571985733361, + "learning_rate": 3.8433852467434175e-06, + "loss": 0.23348593711853027, + "step": 5466 + }, + { + "epoch": 1.4517328376045677, + "grad_norm": 1.148921292979252, + "learning_rate": 3.839925869953292e-06, + "loss": 0.20993635058403015, + "step": 5467 + }, + { + "epoch": 1.4519984065861107, + "grad_norm": 1.1967150813107374, + "learning_rate": 3.836467680752808e-06, + "loss": 0.225263774394989, + "step": 5468 + }, + { + "epoch": 1.4522639755676536, + "grad_norm": 4.549069881323283, + "learning_rate": 3.833010679808662e-06, + "loss": 0.2481595277786255, + "step": 5469 + }, + { + "epoch": 1.4525295445491966, + "grad_norm": 1.098861894900169, + "learning_rate": 3.829554867787324e-06, + "loss": 0.20755310356616974, + "step": 5470 + }, + { + "epoch": 1.4527951135307395, + "grad_norm": 1.3031978879220207, + "learning_rate": 3.826100245355034e-06, + "loss": 0.22124455869197845, + "step": 5471 + }, + { + "epoch": 1.4530606825122825, + "grad_norm": 1.1779333046553406, + "learning_rate": 3.822646813177803e-06, + "loss": 0.23461398482322693, + "step": 5472 + }, + { + "epoch": 1.4533262514938254, + "grad_norm": 1.123494857736561, + "learning_rate": 3.819194571921407e-06, + "loss": 0.22890526056289673, + "step": 5473 + }, + { + "epoch": 1.4535918204753684, + "grad_norm": 1.1163449125196687, + "learning_rate": 3.815743522251406e-06, + "loss": 0.23236533999443054, + "step": 5474 + }, + { + "epoch": 1.4538573894569113, + "grad_norm": 1.204733497516731, + "learning_rate": 3.8122936648331164e-06, + "loss": 0.2192365825176239, + "step": 5475 + }, + { + "epoch": 1.4541229584384543, + "grad_norm": 1.3061324350348682, + "learning_rate": 3.8088450003316346e-06, + "loss": 0.23970162868499756, + "step": 5476 + }, + { + "epoch": 1.4543885274199972, + "grad_norm": 1.256131451943752, + "learning_rate": 3.8053975294118163e-06, + "loss": 0.24270984530448914, + "step": 5477 + }, + { + "epoch": 1.4546540964015402, + "grad_norm": 1.1616491435133687, + "learning_rate": 3.801951252738295e-06, + "loss": 0.22228944301605225, + "step": 5478 + }, + { + "epoch": 1.4549196653830831, + "grad_norm": 1.2998939083384287, + "learning_rate": 3.7985061709754735e-06, + "loss": 0.25029584765434265, + "step": 5479 + }, + { + "epoch": 1.455185234364626, + "grad_norm": 1.1546196330858232, + "learning_rate": 3.795062284787522e-06, + "loss": 0.23831725120544434, + "step": 5480 + }, + { + "epoch": 1.455450803346169, + "grad_norm": 1.2698177511587796, + "learning_rate": 3.7916195948383817e-06, + "loss": 0.2571605145931244, + "step": 5481 + }, + { + "epoch": 1.455716372327712, + "grad_norm": 1.4321109332673951, + "learning_rate": 3.7881781017917586e-06, + "loss": 0.2660857141017914, + "step": 5482 + }, + { + "epoch": 1.455981941309255, + "grad_norm": 1.3406733437493707, + "learning_rate": 3.7847378063111394e-06, + "loss": 0.2468302845954895, + "step": 5483 + }, + { + "epoch": 1.456247510290798, + "grad_norm": 1.363296358111954, + "learning_rate": 3.7812987090597696e-06, + "loss": 0.2559482753276825, + "step": 5484 + }, + { + "epoch": 1.4565130792723409, + "grad_norm": 1.2144737578388247, + "learning_rate": 3.7778608107006654e-06, + "loss": 0.24484393000602722, + "step": 5485 + }, + { + "epoch": 1.4567786482538838, + "grad_norm": 1.1782087302857855, + "learning_rate": 3.774424111896614e-06, + "loss": 0.2376541644334793, + "step": 5486 + }, + { + "epoch": 1.4570442172354268, + "grad_norm": 1.1748479481028287, + "learning_rate": 3.770988613310169e-06, + "loss": 0.22265875339508057, + "step": 5487 + }, + { + "epoch": 1.45730978621697, + "grad_norm": 1.2316185421612622, + "learning_rate": 3.7675543156036555e-06, + "loss": 0.2511552572250366, + "step": 5488 + }, + { + "epoch": 1.457575355198513, + "grad_norm": 1.2601957381413438, + "learning_rate": 3.764121219439165e-06, + "loss": 0.2412843108177185, + "step": 5489 + }, + { + "epoch": 1.4578409241800558, + "grad_norm": 1.2622123015546969, + "learning_rate": 3.760689325478559e-06, + "loss": 0.26342809200286865, + "step": 5490 + }, + { + "epoch": 1.4581064931615988, + "grad_norm": 1.2994089172948287, + "learning_rate": 3.7572586343834638e-06, + "loss": 0.23315641283988953, + "step": 5491 + }, + { + "epoch": 1.4583720621431417, + "grad_norm": 1.0927170518216454, + "learning_rate": 3.753829146815279e-06, + "loss": 0.24148929119110107, + "step": 5492 + }, + { + "epoch": 1.4586376311246847, + "grad_norm": 1.363697618202234, + "learning_rate": 3.750400863435166e-06, + "loss": 0.22838115692138672, + "step": 5493 + }, + { + "epoch": 1.4589032001062276, + "grad_norm": 1.2083898158968958, + "learning_rate": 3.746973784904061e-06, + "loss": 0.21669608354568481, + "step": 5494 + }, + { + "epoch": 1.4591687690877706, + "grad_norm": 1.4819576271076944, + "learning_rate": 3.743547911882662e-06, + "loss": 0.25619322061538696, + "step": 5495 + }, + { + "epoch": 1.4594343380693136, + "grad_norm": 1.2058542987095502, + "learning_rate": 3.7401232450314384e-06, + "loss": 0.23629480600357056, + "step": 5496 + }, + { + "epoch": 1.4596999070508565, + "grad_norm": 1.189438722154431, + "learning_rate": 3.7366997850106245e-06, + "loss": 0.21799582242965698, + "step": 5497 + }, + { + "epoch": 1.4599654760323995, + "grad_norm": 1.372571579127378, + "learning_rate": 3.733277532480223e-06, + "loss": 0.2582590579986572, + "step": 5498 + }, + { + "epoch": 1.4602310450139424, + "grad_norm": 1.1675281771435806, + "learning_rate": 3.729856488100003e-06, + "loss": 0.23641736805438995, + "step": 5499 + }, + { + "epoch": 1.4604966139954854, + "grad_norm": 1.3024331747300109, + "learning_rate": 3.7264366525295e-06, + "loss": 0.24150417745113373, + "step": 5500 + }, + { + "epoch": 1.4607621829770283, + "grad_norm": 1.2012687985267718, + "learning_rate": 3.7230180264280245e-06, + "loss": 0.2474009394645691, + "step": 5501 + }, + { + "epoch": 1.4610277519585713, + "grad_norm": 1.3411668359609863, + "learning_rate": 3.7196006104546435e-06, + "loss": 0.269604355096817, + "step": 5502 + }, + { + "epoch": 1.4612933209401142, + "grad_norm": 1.3014753471077654, + "learning_rate": 3.716184405268194e-06, + "loss": 0.24324679374694824, + "step": 5503 + }, + { + "epoch": 1.4615588899216572, + "grad_norm": 1.1306865007600708, + "learning_rate": 3.7127694115272805e-06, + "loss": 0.2249709963798523, + "step": 5504 + }, + { + "epoch": 1.4618244589032001, + "grad_norm": 1.2915165646779034, + "learning_rate": 3.7093556298902734e-06, + "loss": 0.2560918629169464, + "step": 5505 + }, + { + "epoch": 1.462090027884743, + "grad_norm": 1.154084739271703, + "learning_rate": 3.705943061015309e-06, + "loss": 0.22693020105361938, + "step": 5506 + }, + { + "epoch": 1.462355596866286, + "grad_norm": 1.2640727525169442, + "learning_rate": 3.702531705560292e-06, + "loss": 0.2617371678352356, + "step": 5507 + }, + { + "epoch": 1.462621165847829, + "grad_norm": 1.2561844307954502, + "learning_rate": 3.6991215641828903e-06, + "loss": 0.2314397394657135, + "step": 5508 + }, + { + "epoch": 1.462886734829372, + "grad_norm": 1.1063207547372251, + "learning_rate": 3.6957126375405383e-06, + "loss": 0.23186162114143372, + "step": 5509 + }, + { + "epoch": 1.4631523038109149, + "grad_norm": 1.2602306615156422, + "learning_rate": 3.6923049262904375e-06, + "loss": 0.21775083243846893, + "step": 5510 + }, + { + "epoch": 1.4634178727924578, + "grad_norm": 1.2619669881473867, + "learning_rate": 3.688898431089556e-06, + "loss": 0.24707889556884766, + "step": 5511 + }, + { + "epoch": 1.4636834417740008, + "grad_norm": 1.0923805026421214, + "learning_rate": 3.6854931525946237e-06, + "loss": 0.1941150575876236, + "step": 5512 + }, + { + "epoch": 1.4639490107555437, + "grad_norm": 1.0123090946182933, + "learning_rate": 3.6820890914621376e-06, + "loss": 0.17808857560157776, + "step": 5513 + }, + { + "epoch": 1.4642145797370867, + "grad_norm": 1.2139965705715394, + "learning_rate": 3.678686248348363e-06, + "loss": 0.2150077074766159, + "step": 5514 + }, + { + "epoch": 1.4644801487186296, + "grad_norm": 1.4267562521267494, + "learning_rate": 3.6752846239093276e-06, + "loss": 0.2605292797088623, + "step": 5515 + }, + { + "epoch": 1.4647457177001726, + "grad_norm": 1.202920213288267, + "learning_rate": 3.671884218800822e-06, + "loss": 0.22481867671012878, + "step": 5516 + }, + { + "epoch": 1.4650112866817155, + "grad_norm": 5.588780783186036, + "learning_rate": 3.668485033678406e-06, + "loss": 0.24453294277191162, + "step": 5517 + }, + { + "epoch": 1.4652768556632585, + "grad_norm": 1.379432138271627, + "learning_rate": 3.6650870691973996e-06, + "loss": 0.2672286033630371, + "step": 5518 + }, + { + "epoch": 1.4655424246448014, + "grad_norm": 1.2625747265975353, + "learning_rate": 3.661690326012897e-06, + "loss": 0.2514987587928772, + "step": 5519 + }, + { + "epoch": 1.4658079936263444, + "grad_norm": 1.3337549906693908, + "learning_rate": 3.6582948047797438e-06, + "loss": 0.25671514868736267, + "step": 5520 + }, + { + "epoch": 1.4660735626078873, + "grad_norm": 1.3535247420304835, + "learning_rate": 3.654900506152561e-06, + "loss": 0.25485602021217346, + "step": 5521 + }, + { + "epoch": 1.4663391315894303, + "grad_norm": 1.1813027271086827, + "learning_rate": 3.6515074307857257e-06, + "loss": 0.23556292057037354, + "step": 5522 + }, + { + "epoch": 1.4666047005709733, + "grad_norm": 1.15604598759747, + "learning_rate": 3.6481155793333855e-06, + "loss": 0.23347696661949158, + "step": 5523 + }, + { + "epoch": 1.4668702695525162, + "grad_norm": 1.218328581124676, + "learning_rate": 3.6447249524494466e-06, + "loss": 0.2405884712934494, + "step": 5524 + }, + { + "epoch": 1.4671358385340592, + "grad_norm": 1.2423110513745568, + "learning_rate": 3.6413355507875845e-06, + "loss": 0.23668336868286133, + "step": 5525 + }, + { + "epoch": 1.467401407515602, + "grad_norm": 1.207526661238473, + "learning_rate": 3.6379473750012375e-06, + "loss": 0.25534945726394653, + "step": 5526 + }, + { + "epoch": 1.467666976497145, + "grad_norm": 1.267472887202726, + "learning_rate": 3.634560425743596e-06, + "loss": 0.22227410972118378, + "step": 5527 + }, + { + "epoch": 1.467932545478688, + "grad_norm": 1.4853214348875312, + "learning_rate": 3.631174703667636e-06, + "loss": 0.23395927250385284, + "step": 5528 + }, + { + "epoch": 1.468198114460231, + "grad_norm": 1.2396534638298151, + "learning_rate": 3.6277902094260785e-06, + "loss": 0.23419208824634552, + "step": 5529 + }, + { + "epoch": 1.4684636834417741, + "grad_norm": 1.3441597355302621, + "learning_rate": 3.6244069436714158e-06, + "loss": 0.22185654938220978, + "step": 5530 + }, + { + "epoch": 1.468729252423317, + "grad_norm": 1.2489989202798994, + "learning_rate": 3.621024907055901e-06, + "loss": 0.2705134153366089, + "step": 5531 + }, + { + "epoch": 1.46899482140486, + "grad_norm": 1.23195362246657, + "learning_rate": 3.617644100231551e-06, + "loss": 0.23426109552383423, + "step": 5532 + }, + { + "epoch": 1.469260390386403, + "grad_norm": 1.2477206941188708, + "learning_rate": 3.6142645238501462e-06, + "loss": 0.25527146458625793, + "step": 5533 + }, + { + "epoch": 1.469525959367946, + "grad_norm": 1.1030456616341389, + "learning_rate": 3.610886178563228e-06, + "loss": 0.1882668435573578, + "step": 5534 + }, + { + "epoch": 1.469791528349489, + "grad_norm": 1.2622509171219458, + "learning_rate": 3.607509065022101e-06, + "loss": 0.24060532450675964, + "step": 5535 + }, + { + "epoch": 1.4700570973310318, + "grad_norm": 1.2245038712856335, + "learning_rate": 3.6041331838778325e-06, + "loss": 0.23555803298950195, + "step": 5536 + }, + { + "epoch": 1.4703226663125748, + "grad_norm": 1.2192798079575136, + "learning_rate": 3.6007585357812557e-06, + "loss": 0.23126551508903503, + "step": 5537 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 1.139497037450913, + "learning_rate": 3.597385121382961e-06, + "loss": 0.24203836917877197, + "step": 5538 + }, + { + "epoch": 1.4708538042756607, + "grad_norm": 1.2467383616518404, + "learning_rate": 3.5940129413333046e-06, + "loss": 0.239767923951149, + "step": 5539 + }, + { + "epoch": 1.4711193732572037, + "grad_norm": 1.158137574546163, + "learning_rate": 3.5906419962824002e-06, + "loss": 0.24732957780361176, + "step": 5540 + }, + { + "epoch": 1.4713849422387466, + "grad_norm": 1.2722296085836442, + "learning_rate": 3.587272286880131e-06, + "loss": 0.2296421229839325, + "step": 5541 + }, + { + "epoch": 1.4716505112202896, + "grad_norm": 1.2453973567418024, + "learning_rate": 3.583903813776132e-06, + "loss": 0.2339775711297989, + "step": 5542 + }, + { + "epoch": 1.4719160802018325, + "grad_norm": 1.194940832073201, + "learning_rate": 3.5805365776198052e-06, + "loss": 0.230351984500885, + "step": 5543 + }, + { + "epoch": 1.4721816491833755, + "grad_norm": 1.2792126719917591, + "learning_rate": 3.5771705790603163e-06, + "loss": 0.2501414716243744, + "step": 5544 + }, + { + "epoch": 1.4724472181649184, + "grad_norm": 1.2327284472179139, + "learning_rate": 3.5738058187465864e-06, + "loss": 0.23387153446674347, + "step": 5545 + }, + { + "epoch": 1.4727127871464614, + "grad_norm": 1.2921618045206031, + "learning_rate": 3.570442297327307e-06, + "loss": 0.23874594271183014, + "step": 5546 + }, + { + "epoch": 1.4729783561280043, + "grad_norm": 1.2841826918754735, + "learning_rate": 3.5670800154509245e-06, + "loss": 0.21867451071739197, + "step": 5547 + }, + { + "epoch": 1.4732439251095473, + "grad_norm": 1.2937830650411482, + "learning_rate": 3.563718973765644e-06, + "loss": 0.24124100804328918, + "step": 5548 + }, + { + "epoch": 1.4735094940910902, + "grad_norm": 1.2156419794246578, + "learning_rate": 3.5603591729194377e-06, + "loss": 0.22185327112674713, + "step": 5549 + }, + { + "epoch": 1.4737750630726332, + "grad_norm": 1.1571779294098303, + "learning_rate": 3.5570006135600345e-06, + "loss": 0.21193793416023254, + "step": 5550 + }, + { + "epoch": 1.4740406320541761, + "grad_norm": 1.3939617841899903, + "learning_rate": 3.553643296334924e-06, + "loss": 0.2615143656730652, + "step": 5551 + }, + { + "epoch": 1.474306201035719, + "grad_norm": 1.1936451275051074, + "learning_rate": 3.5502872218913597e-06, + "loss": 0.24937541782855988, + "step": 5552 + }, + { + "epoch": 1.474571770017262, + "grad_norm": 1.0736225386439564, + "learning_rate": 3.5469323908763507e-06, + "loss": 0.22849224507808685, + "step": 5553 + }, + { + "epoch": 1.474837338998805, + "grad_norm": 1.6488166459783042, + "learning_rate": 3.5435788039366657e-06, + "loss": 0.2209717333316803, + "step": 5554 + }, + { + "epoch": 1.475102907980348, + "grad_norm": 1.2992665215674652, + "learning_rate": 3.5402264617188453e-06, + "loss": 0.2529235780239105, + "step": 5555 + }, + { + "epoch": 1.4753684769618909, + "grad_norm": 1.2133685762997675, + "learning_rate": 3.536875364869181e-06, + "loss": 0.2045450657606125, + "step": 5556 + }, + { + "epoch": 1.4756340459434338, + "grad_norm": 1.0591536248970717, + "learning_rate": 3.5335255140337167e-06, + "loss": 0.1973644196987152, + "step": 5557 + }, + { + "epoch": 1.4758996149249768, + "grad_norm": 1.3059187006673687, + "learning_rate": 3.5301769098582685e-06, + "loss": 0.27417299151420593, + "step": 5558 + }, + { + "epoch": 1.4761651839065197, + "grad_norm": 1.2500382678843112, + "learning_rate": 3.5268295529884077e-06, + "loss": 0.24541756510734558, + "step": 5559 + }, + { + "epoch": 1.4764307528880627, + "grad_norm": 1.4461383875060436, + "learning_rate": 3.5234834440694655e-06, + "loss": 0.25785958766937256, + "step": 5560 + }, + { + "epoch": 1.4766963218696056, + "grad_norm": 1.1676448271023605, + "learning_rate": 3.5201385837465307e-06, + "loss": 0.21099212765693665, + "step": 5561 + }, + { + "epoch": 1.4769618908511486, + "grad_norm": 1.1787333048605453, + "learning_rate": 3.5167949726644545e-06, + "loss": 0.26023173332214355, + "step": 5562 + }, + { + "epoch": 1.4772274598326915, + "grad_norm": 1.6670162101301063, + "learning_rate": 3.5134526114678426e-06, + "loss": 0.22882963716983795, + "step": 5563 + }, + { + "epoch": 1.4774930288142345, + "grad_norm": 1.312450944331431, + "learning_rate": 3.5101115008010677e-06, + "loss": 0.21987251937389374, + "step": 5564 + }, + { + "epoch": 1.4777585977957775, + "grad_norm": 1.163985983495263, + "learning_rate": 3.506771641308255e-06, + "loss": 0.2169610857963562, + "step": 5565 + }, + { + "epoch": 1.4780241667773204, + "grad_norm": 4.440133890295746, + "learning_rate": 3.50343303363329e-06, + "loss": 0.22723034024238586, + "step": 5566 + }, + { + "epoch": 1.4782897357588634, + "grad_norm": 1.2392064660120468, + "learning_rate": 3.5000956784198157e-06, + "loss": 0.23738276958465576, + "step": 5567 + }, + { + "epoch": 1.4785553047404063, + "grad_norm": 1.1818266174210303, + "learning_rate": 3.496759576311235e-06, + "loss": 0.19922251999378204, + "step": 5568 + }, + { + "epoch": 1.4788208737219493, + "grad_norm": 1.294067668946831, + "learning_rate": 3.4934247279507092e-06, + "loss": 0.22529268264770508, + "step": 5569 + }, + { + "epoch": 1.4790864427034922, + "grad_norm": 1.3551359298814187, + "learning_rate": 3.4900911339811583e-06, + "loss": 0.26758015155792236, + "step": 5570 + }, + { + "epoch": 1.4793520116850352, + "grad_norm": 1.2627897957153122, + "learning_rate": 3.48675879504526e-06, + "loss": 0.24752648174762726, + "step": 5571 + }, + { + "epoch": 1.4796175806665781, + "grad_norm": 1.3085621441307098, + "learning_rate": 3.483427711785449e-06, + "loss": 0.25337618589401245, + "step": 5572 + }, + { + "epoch": 1.479883149648121, + "grad_norm": 1.3543288061594618, + "learning_rate": 3.480097884843919e-06, + "loss": 0.24504786729812622, + "step": 5573 + }, + { + "epoch": 1.480148718629664, + "grad_norm": 1.1750849317955903, + "learning_rate": 3.4767693148626223e-06, + "loss": 0.21255145967006683, + "step": 5574 + }, + { + "epoch": 1.480414287611207, + "grad_norm": 1.2853041773936769, + "learning_rate": 3.473442002483267e-06, + "loss": 0.2501891553401947, + "step": 5575 + }, + { + "epoch": 1.48067985659275, + "grad_norm": 1.195974425335747, + "learning_rate": 3.4701159483473202e-06, + "loss": 0.25276634097099304, + "step": 5576 + }, + { + "epoch": 1.4809454255742929, + "grad_norm": 1.427206116406706, + "learning_rate": 3.4667911530960052e-06, + "loss": 0.2760567367076874, + "step": 5577 + }, + { + "epoch": 1.4812109945558358, + "grad_norm": 1.2442739080424003, + "learning_rate": 3.463467617370305e-06, + "loss": 0.22686481475830078, + "step": 5578 + }, + { + "epoch": 1.4814765635373788, + "grad_norm": 1.2374194002920247, + "learning_rate": 3.4601453418109554e-06, + "loss": 0.23262599110603333, + "step": 5579 + }, + { + "epoch": 1.4817421325189217, + "grad_norm": 1.2263890428702933, + "learning_rate": 3.4568243270584545e-06, + "loss": 0.22231365740299225, + "step": 5580 + }, + { + "epoch": 1.4820077015004647, + "grad_norm": 1.2193067799394695, + "learning_rate": 3.4535045737530504e-06, + "loss": 0.22237855195999146, + "step": 5581 + }, + { + "epoch": 1.4822732704820076, + "grad_norm": 1.208437884817879, + "learning_rate": 3.4501860825347587e-06, + "loss": 0.2260412871837616, + "step": 5582 + }, + { + "epoch": 1.4825388394635506, + "grad_norm": 1.3488909026023506, + "learning_rate": 3.4468688540433425e-06, + "loss": 0.2133496105670929, + "step": 5583 + }, + { + "epoch": 1.4828044084450935, + "grad_norm": 1.231358912436915, + "learning_rate": 3.4435528889183245e-06, + "loss": 0.24750375747680664, + "step": 5584 + }, + { + "epoch": 1.4830699774266365, + "grad_norm": 1.2053641188090713, + "learning_rate": 3.440238187798983e-06, + "loss": 0.23673412203788757, + "step": 5585 + }, + { + "epoch": 1.4833355464081794, + "grad_norm": 1.312048381493266, + "learning_rate": 3.436924751324354e-06, + "loss": 0.2505243420600891, + "step": 5586 + }, + { + "epoch": 1.4836011153897224, + "grad_norm": 1.2769153596955758, + "learning_rate": 3.433612580133229e-06, + "loss": 0.276151180267334, + "step": 5587 + }, + { + "epoch": 1.4838666843712653, + "grad_norm": 1.0245497892529305, + "learning_rate": 3.430301674864154e-06, + "loss": 0.1756816953420639, + "step": 5588 + }, + { + "epoch": 1.4841322533528083, + "grad_norm": 1.2667973514811224, + "learning_rate": 3.4269920361554342e-06, + "loss": 0.25901898741722107, + "step": 5589 + }, + { + "epoch": 1.4843978223343512, + "grad_norm": 1.2034260428652863, + "learning_rate": 3.4236836646451286e-06, + "loss": 0.21196085214614868, + "step": 5590 + }, + { + "epoch": 1.4846633913158942, + "grad_norm": 1.2887221468811698, + "learning_rate": 3.4203765609710525e-06, + "loss": 0.24153128266334534, + "step": 5591 + }, + { + "epoch": 1.4849289602974372, + "grad_norm": 1.2285562462634616, + "learning_rate": 3.4170707257707757e-06, + "loss": 0.25715887546539307, + "step": 5592 + }, + { + "epoch": 1.48519452927898, + "grad_norm": 1.430212837200284, + "learning_rate": 3.413766159681624e-06, + "loss": 0.2920379042625427, + "step": 5593 + }, + { + "epoch": 1.485460098260523, + "grad_norm": 1.2173970332611068, + "learning_rate": 3.41046286334068e-06, + "loss": 0.22127456963062286, + "step": 5594 + }, + { + "epoch": 1.485725667242066, + "grad_norm": 1.2534339617557788, + "learning_rate": 3.4071608373847786e-06, + "loss": 0.23103584349155426, + "step": 5595 + }, + { + "epoch": 1.485991236223609, + "grad_norm": 1.2999427041349472, + "learning_rate": 3.403860082450513e-06, + "loss": 0.29068222641944885, + "step": 5596 + }, + { + "epoch": 1.486256805205152, + "grad_norm": 1.2532608064541852, + "learning_rate": 3.4005605991742296e-06, + "loss": 0.23703888058662415, + "step": 5597 + }, + { + "epoch": 1.4865223741866949, + "grad_norm": 1.4039489349034764, + "learning_rate": 3.3972623881920296e-06, + "loss": 0.23348261415958405, + "step": 5598 + }, + { + "epoch": 1.4867879431682378, + "grad_norm": 1.1603139615742908, + "learning_rate": 3.3939654501397645e-06, + "loss": 0.24733223021030426, + "step": 5599 + }, + { + "epoch": 1.487053512149781, + "grad_norm": 1.1220204153088178, + "learning_rate": 3.3906697856530548e-06, + "loss": 0.22576835751533508, + "step": 5600 + }, + { + "epoch": 1.487319081131324, + "grad_norm": 1.1809335952834177, + "learning_rate": 3.3873753953672593e-06, + "loss": 0.20863527059555054, + "step": 5601 + }, + { + "epoch": 1.487584650112867, + "grad_norm": 1.1823379745083873, + "learning_rate": 3.384082279917499e-06, + "loss": 0.2299712598323822, + "step": 5602 + }, + { + "epoch": 1.4878502190944098, + "grad_norm": 1.1858521746021262, + "learning_rate": 3.380790439938648e-06, + "loss": 0.23058944940567017, + "step": 5603 + }, + { + "epoch": 1.4881157880759528, + "grad_norm": 1.1304663814123712, + "learning_rate": 3.3774998760653344e-06, + "loss": 0.20307201147079468, + "step": 5604 + }, + { + "epoch": 1.4883813570574957, + "grad_norm": 1.112411027996001, + "learning_rate": 3.3742105889319388e-06, + "loss": 0.2296266108751297, + "step": 5605 + }, + { + "epoch": 1.4886469260390387, + "grad_norm": 1.3206442060716181, + "learning_rate": 3.370922579172601e-06, + "loss": 0.22702309489250183, + "step": 5606 + }, + { + "epoch": 1.4889124950205816, + "grad_norm": 1.4590848907033545, + "learning_rate": 3.3676358474212035e-06, + "loss": 0.30432331562042236, + "step": 5607 + }, + { + "epoch": 1.4891780640021246, + "grad_norm": 1.201356120373459, + "learning_rate": 3.3643503943113907e-06, + "loss": 0.2488052248954773, + "step": 5608 + }, + { + "epoch": 1.4894436329836676, + "grad_norm": 1.2096846483257637, + "learning_rate": 3.361066220476564e-06, + "loss": 0.2221754938364029, + "step": 5609 + }, + { + "epoch": 1.4897092019652105, + "grad_norm": 1.289556223007011, + "learning_rate": 3.3577833265498728e-06, + "loss": 0.2547761797904968, + "step": 5610 + }, + { + "epoch": 1.4899747709467535, + "grad_norm": 1.3306628367975963, + "learning_rate": 3.3545017131642164e-06, + "loss": 0.21811938285827637, + "step": 5611 + }, + { + "epoch": 1.4902403399282964, + "grad_norm": 1.4022029015386877, + "learning_rate": 3.3512213809522554e-06, + "loss": 0.30436158180236816, + "step": 5612 + }, + { + "epoch": 1.4905059089098394, + "grad_norm": 1.2224150283856856, + "learning_rate": 3.3479423305463953e-06, + "loss": 0.2053622156381607, + "step": 5613 + }, + { + "epoch": 1.4907714778913823, + "grad_norm": 1.3026832238379669, + "learning_rate": 3.344664562578801e-06, + "loss": 0.2017601728439331, + "step": 5614 + }, + { + "epoch": 1.4910370468729253, + "grad_norm": 1.2856046275416113, + "learning_rate": 3.341388077681387e-06, + "loss": 0.23668046295642853, + "step": 5615 + }, + { + "epoch": 1.4913026158544682, + "grad_norm": 1.1460002150937032, + "learning_rate": 3.338112876485821e-06, + "loss": 0.20016951858997345, + "step": 5616 + }, + { + "epoch": 1.4915681848360112, + "grad_norm": 1.3606548245166536, + "learning_rate": 3.3348389596235177e-06, + "loss": 0.25477850437164307, + "step": 5617 + }, + { + "epoch": 1.4918337538175541, + "grad_norm": 1.2758175160721472, + "learning_rate": 3.3315663277256594e-06, + "loss": 0.24063366651535034, + "step": 5618 + }, + { + "epoch": 1.492099322799097, + "grad_norm": 1.2737128535751616, + "learning_rate": 3.328294981423165e-06, + "loss": 0.23443251848220825, + "step": 5619 + }, + { + "epoch": 1.49236489178064, + "grad_norm": 1.1580169148577781, + "learning_rate": 3.325024921346717e-06, + "loss": 0.21191264688968658, + "step": 5620 + }, + { + "epoch": 1.492630460762183, + "grad_norm": 1.213323558189925, + "learning_rate": 3.3217561481267367e-06, + "loss": 0.22062326967716217, + "step": 5621 + }, + { + "epoch": 1.492896029743726, + "grad_norm": 1.1757529457487401, + "learning_rate": 3.318488662393409e-06, + "loss": 0.2235480695962906, + "step": 5622 + }, + { + "epoch": 1.4931615987252689, + "grad_norm": 1.2611472240425432, + "learning_rate": 3.315222464776665e-06, + "loss": 0.26665517687797546, + "step": 5623 + }, + { + "epoch": 1.4934271677068118, + "grad_norm": 1.270220596773442, + "learning_rate": 3.3119575559061902e-06, + "loss": 0.24300602078437805, + "step": 5624 + }, + { + "epoch": 1.4936927366883548, + "grad_norm": 1.2622444254847978, + "learning_rate": 3.308693936411421e-06, + "loss": 0.25441884994506836, + "step": 5625 + }, + { + "epoch": 1.4939583056698977, + "grad_norm": 1.2781695234171213, + "learning_rate": 3.3054316069215407e-06, + "loss": 0.23236152529716492, + "step": 5626 + }, + { + "epoch": 1.4942238746514407, + "grad_norm": 1.2299113342509724, + "learning_rate": 3.3021705680654946e-06, + "loss": 0.24535568058490753, + "step": 5627 + }, + { + "epoch": 1.4944894436329836, + "grad_norm": 1.3635919919461823, + "learning_rate": 3.29891082047197e-06, + "loss": 0.2542986273765564, + "step": 5628 + }, + { + "epoch": 1.4947550126145266, + "grad_norm": 1.3442816383357798, + "learning_rate": 3.295652364769407e-06, + "loss": 0.26490268111228943, + "step": 5629 + }, + { + "epoch": 1.4950205815960695, + "grad_norm": 1.2455944135633985, + "learning_rate": 3.292395201585997e-06, + "loss": 0.25576913356781006, + "step": 5630 + }, + { + "epoch": 1.4952861505776125, + "grad_norm": 1.321982811797117, + "learning_rate": 3.2891393315496846e-06, + "loss": 0.2930823266506195, + "step": 5631 + }, + { + "epoch": 1.4955517195591554, + "grad_norm": 1.3029577245101889, + "learning_rate": 3.285884755288161e-06, + "loss": 0.2426074892282486, + "step": 5632 + }, + { + "epoch": 1.4958172885406984, + "grad_norm": 1.1912484566122454, + "learning_rate": 3.2826314734288713e-06, + "loss": 0.24090878665447235, + "step": 5633 + }, + { + "epoch": 1.4960828575222413, + "grad_norm": 1.291391881665867, + "learning_rate": 3.2793794865990092e-06, + "loss": 0.26155173778533936, + "step": 5634 + }, + { + "epoch": 1.4963484265037843, + "grad_norm": 1.2581171617638447, + "learning_rate": 3.2761287954255195e-06, + "loss": 0.2594009041786194, + "step": 5635 + }, + { + "epoch": 1.4966139954853273, + "grad_norm": 1.248912763921314, + "learning_rate": 3.2728794005350972e-06, + "loss": 0.24434763193130493, + "step": 5636 + }, + { + "epoch": 1.4968795644668702, + "grad_norm": 1.3459414061970596, + "learning_rate": 3.269631302554188e-06, + "loss": 0.2622208297252655, + "step": 5637 + }, + { + "epoch": 1.4971451334484132, + "grad_norm": 1.2222057610309294, + "learning_rate": 3.266384502108987e-06, + "loss": 0.18913154304027557, + "step": 5638 + }, + { + "epoch": 1.497410702429956, + "grad_norm": 1.260519406868159, + "learning_rate": 3.263138999825437e-06, + "loss": 0.2610907554626465, + "step": 5639 + }, + { + "epoch": 1.497676271411499, + "grad_norm": 1.2585537664404678, + "learning_rate": 3.2598947963292337e-06, + "loss": 0.25841569900512695, + "step": 5640 + }, + { + "epoch": 1.497941840393042, + "grad_norm": 1.1680179490188496, + "learning_rate": 3.256651892245822e-06, + "loss": 0.2066381573677063, + "step": 5641 + }, + { + "epoch": 1.4982074093745852, + "grad_norm": 1.1877407935219242, + "learning_rate": 3.253410288200396e-06, + "loss": 0.23956719040870667, + "step": 5642 + }, + { + "epoch": 1.4984729783561281, + "grad_norm": 1.1996406642135662, + "learning_rate": 3.250169984817897e-06, + "loss": 0.23999394476413727, + "step": 5643 + }, + { + "epoch": 1.498738547337671, + "grad_norm": 1.4056134439986134, + "learning_rate": 3.2469309827230156e-06, + "loss": 0.24273940920829773, + "step": 5644 + }, + { + "epoch": 1.499004116319214, + "grad_norm": 1.193555704549332, + "learning_rate": 3.2436932825401977e-06, + "loss": 0.2212621569633484, + "step": 5645 + }, + { + "epoch": 1.499269685300757, + "grad_norm": 1.293874995027958, + "learning_rate": 3.2404568848936325e-06, + "loss": 0.2487148940563202, + "step": 5646 + }, + { + "epoch": 1.4995352542823, + "grad_norm": 1.2610121684030642, + "learning_rate": 3.237221790407259e-06, + "loss": 0.29314422607421875, + "step": 5647 + }, + { + "epoch": 1.499800823263843, + "grad_norm": 1.1765702458871505, + "learning_rate": 3.233987999704763e-06, + "loss": 0.22727417945861816, + "step": 5648 + }, + { + "epoch": 1.5000663922453858, + "grad_norm": 1.1578089091098656, + "learning_rate": 3.230755513409585e-06, + "loss": 0.18877442181110382, + "step": 5649 + }, + { + "epoch": 1.5003319612269288, + "grad_norm": 1.2855274132536632, + "learning_rate": 3.2275243321449068e-06, + "loss": 0.2504552900791168, + "step": 5650 + }, + { + "epoch": 1.5005975302084718, + "grad_norm": 1.1905373910388852, + "learning_rate": 3.224294456533663e-06, + "loss": 0.23579174280166626, + "step": 5651 + }, + { + "epoch": 1.5008630991900147, + "grad_norm": 1.3692203179408873, + "learning_rate": 3.221065887198537e-06, + "loss": 0.29236793518066406, + "step": 5652 + }, + { + "epoch": 1.5011286681715577, + "grad_norm": 1.3245217175369617, + "learning_rate": 3.2178386247619577e-06, + "loss": 0.2735568881034851, + "step": 5653 + }, + { + "epoch": 1.5013942371531006, + "grad_norm": 1.240462888838021, + "learning_rate": 3.214612669846103e-06, + "loss": 0.2391616702079773, + "step": 5654 + }, + { + "epoch": 1.5016598061346436, + "grad_norm": 1.3766117264936455, + "learning_rate": 3.2113880230729e-06, + "loss": 0.24532485008239746, + "step": 5655 + }, + { + "epoch": 1.5019253751161865, + "grad_norm": 1.3310069624279295, + "learning_rate": 3.2081646850640215e-06, + "loss": 0.2605767250061035, + "step": 5656 + }, + { + "epoch": 1.5021909440977295, + "grad_norm": 1.2109489933208193, + "learning_rate": 3.2049426564408893e-06, + "loss": 0.2651350200176239, + "step": 5657 + }, + { + "epoch": 1.5024565130792724, + "grad_norm": 1.3305800775425032, + "learning_rate": 3.2017219378246734e-06, + "loss": 0.2719389498233795, + "step": 5658 + }, + { + "epoch": 1.5027220820608154, + "grad_norm": 1.2359239723239188, + "learning_rate": 3.198502529836288e-06, + "loss": 0.23077815771102905, + "step": 5659 + }, + { + "epoch": 1.5029876510423583, + "grad_norm": 1.0838054114896152, + "learning_rate": 3.1952844330964007e-06, + "loss": 0.21954959630966187, + "step": 5660 + }, + { + "epoch": 1.5032532200239013, + "grad_norm": 1.3480229773492907, + "learning_rate": 3.1920676482254186e-06, + "loss": 0.28229185938835144, + "step": 5661 + }, + { + "epoch": 1.5035187890054442, + "grad_norm": 1.2587796771658648, + "learning_rate": 3.1888521758435e-06, + "loss": 0.24612295627593994, + "step": 5662 + }, + { + "epoch": 1.5037843579869872, + "grad_norm": 1.2649379995915024, + "learning_rate": 3.185638016570555e-06, + "loss": 0.24191413819789886, + "step": 5663 + }, + { + "epoch": 1.5040499269685301, + "grad_norm": 1.225446339219085, + "learning_rate": 3.1824251710262323e-06, + "loss": 0.2427935004234314, + "step": 5664 + }, + { + "epoch": 1.504315495950073, + "grad_norm": 1.2595635392757376, + "learning_rate": 3.17921363982993e-06, + "loss": 0.2600318193435669, + "step": 5665 + }, + { + "epoch": 1.504581064931616, + "grad_norm": 1.2817020254494476, + "learning_rate": 3.1760034236007954e-06, + "loss": 0.25215205550193787, + "step": 5666 + }, + { + "epoch": 1.504846633913159, + "grad_norm": 1.2568573714231897, + "learning_rate": 3.1727945229577183e-06, + "loss": 0.24460548162460327, + "step": 5667 + }, + { + "epoch": 1.505112202894702, + "grad_norm": 1.2881955251422392, + "learning_rate": 3.169586938519338e-06, + "loss": 0.2812577486038208, + "step": 5668 + }, + { + "epoch": 1.5053777718762449, + "grad_norm": 1.1272225605105841, + "learning_rate": 3.166380670904039e-06, + "loss": 0.23297616839408875, + "step": 5669 + }, + { + "epoch": 1.5056433408577878, + "grad_norm": 1.1954331932042688, + "learning_rate": 3.163175720729954e-06, + "loss": 0.21659572422504425, + "step": 5670 + }, + { + "epoch": 1.5059089098393308, + "grad_norm": 1.2142230208725098, + "learning_rate": 3.1599720886149508e-06, + "loss": 0.22246181964874268, + "step": 5671 + }, + { + "epoch": 1.5061744788208737, + "grad_norm": 1.132636194795227, + "learning_rate": 3.1567697751766624e-06, + "loss": 0.20020918548107147, + "step": 5672 + }, + { + "epoch": 1.5064400478024167, + "grad_norm": 1.363041735701654, + "learning_rate": 3.1535687810324523e-06, + "loss": 0.25693628191947937, + "step": 5673 + }, + { + "epoch": 1.5067056167839596, + "grad_norm": 1.5250673507385644, + "learning_rate": 3.150369106799436e-06, + "loss": 0.21841923892498016, + "step": 5674 + }, + { + "epoch": 1.5069711857655026, + "grad_norm": 1.1710254495806258, + "learning_rate": 3.1471707530944707e-06, + "loss": 0.18131780624389648, + "step": 5675 + }, + { + "epoch": 1.5072367547470455, + "grad_norm": 1.180596749481675, + "learning_rate": 3.143973720534164e-06, + "loss": 0.22510449588298798, + "step": 5676 + }, + { + "epoch": 1.5075023237285885, + "grad_norm": 1.3952546557365002, + "learning_rate": 3.1407780097348627e-06, + "loss": 0.23721462488174438, + "step": 5677 + }, + { + "epoch": 1.5077678927101315, + "grad_norm": 1.2200574848273704, + "learning_rate": 3.1375836213126653e-06, + "loss": 0.24281899631023407, + "step": 5678 + }, + { + "epoch": 1.5080334616916744, + "grad_norm": 1.3211068465604292, + "learning_rate": 3.134390555883412e-06, + "loss": 0.23910081386566162, + "step": 5679 + }, + { + "epoch": 1.5082990306732174, + "grad_norm": 1.357027881520108, + "learning_rate": 3.1311988140626825e-06, + "loss": 0.2635132670402527, + "step": 5680 + }, + { + "epoch": 1.5085645996547603, + "grad_norm": 1.239638674575543, + "learning_rate": 3.1280083964658147e-06, + "loss": 0.24802634119987488, + "step": 5681 + }, + { + "epoch": 1.5088301686363033, + "grad_norm": 1.3861680174510138, + "learning_rate": 3.1248193037078823e-06, + "loss": 0.24081437289714813, + "step": 5682 + }, + { + "epoch": 1.5090957376178462, + "grad_norm": 1.2124748227090532, + "learning_rate": 3.121631536403701e-06, + "loss": 0.19550001621246338, + "step": 5683 + }, + { + "epoch": 1.5093613065993892, + "grad_norm": 1.309177755877421, + "learning_rate": 3.118445095167837e-06, + "loss": 0.2397807538509369, + "step": 5684 + }, + { + "epoch": 1.5096268755809321, + "grad_norm": 1.2243819490197418, + "learning_rate": 3.115259980614602e-06, + "loss": 0.2185651659965515, + "step": 5685 + }, + { + "epoch": 1.509892444562475, + "grad_norm": 1.2555724014592389, + "learning_rate": 3.1120761933580414e-06, + "loss": 0.22214055061340332, + "step": 5686 + }, + { + "epoch": 1.510158013544018, + "grad_norm": 1.4127254863789025, + "learning_rate": 3.108893734011955e-06, + "loss": 0.23971091210842133, + "step": 5687 + }, + { + "epoch": 1.510423582525561, + "grad_norm": 1.3331222718828735, + "learning_rate": 3.1057126031898843e-06, + "loss": 0.26458197832107544, + "step": 5688 + }, + { + "epoch": 1.510689151507104, + "grad_norm": 1.3487790050882777, + "learning_rate": 3.1025328015051093e-06, + "loss": 0.23730339109897614, + "step": 5689 + }, + { + "epoch": 1.5109547204886469, + "grad_norm": 1.2964784198979393, + "learning_rate": 3.0993543295706653e-06, + "loss": 0.21981677412986755, + "step": 5690 + }, + { + "epoch": 1.5112202894701898, + "grad_norm": 1.1812817656913812, + "learning_rate": 3.0961771879993206e-06, + "loss": 0.21984878182411194, + "step": 5691 + }, + { + "epoch": 1.5114858584517328, + "grad_norm": 1.2732802047873515, + "learning_rate": 3.093001377403592e-06, + "loss": 0.23086440563201904, + "step": 5692 + }, + { + "epoch": 1.5117514274332757, + "grad_norm": 2.3681680891314953, + "learning_rate": 3.0898268983957368e-06, + "loss": 0.2355024814605713, + "step": 5693 + }, + { + "epoch": 1.5120169964148187, + "grad_norm": 1.3061363772251866, + "learning_rate": 3.0866537515877584e-06, + "loss": 0.21210229396820068, + "step": 5694 + }, + { + "epoch": 1.5122825653963616, + "grad_norm": 1.3436771657394675, + "learning_rate": 3.0834819375914003e-06, + "loss": 0.2387622594833374, + "step": 5695 + }, + { + "epoch": 1.5125481343779046, + "grad_norm": 1.3482258979232278, + "learning_rate": 3.0803114570181527e-06, + "loss": 0.23822402954101562, + "step": 5696 + }, + { + "epoch": 1.5128137033594475, + "grad_norm": 1.3248058910768958, + "learning_rate": 3.0771423104792454e-06, + "loss": 0.26844173669815063, + "step": 5697 + }, + { + "epoch": 1.5130792723409905, + "grad_norm": 1.2131778927640824, + "learning_rate": 3.07397449858565e-06, + "loss": 0.23288767039775848, + "step": 5698 + }, + { + "epoch": 1.5133448413225334, + "grad_norm": 1.2716046597052009, + "learning_rate": 3.0708080219480896e-06, + "loss": 0.23273086547851562, + "step": 5699 + }, + { + "epoch": 1.5136104103040764, + "grad_norm": 1.4240236624695346, + "learning_rate": 3.067642881177023e-06, + "loss": 0.2505509555339813, + "step": 5700 + }, + { + "epoch": 1.5138759792856193, + "grad_norm": 1.1441752919653974, + "learning_rate": 3.0644790768826473e-06, + "loss": 0.22801508009433746, + "step": 5701 + }, + { + "epoch": 1.5141415482671623, + "grad_norm": 1.1462347465841034, + "learning_rate": 3.061316609674908e-06, + "loss": 0.2110593169927597, + "step": 5702 + }, + { + "epoch": 1.5144071172487052, + "grad_norm": 1.2145033288630525, + "learning_rate": 3.0581554801634927e-06, + "loss": 0.22201795876026154, + "step": 5703 + }, + { + "epoch": 1.5146726862302482, + "grad_norm": 1.2993896506173446, + "learning_rate": 3.054995688957829e-06, + "loss": 0.23104460537433624, + "step": 5704 + }, + { + "epoch": 1.5149382552117912, + "grad_norm": 1.5590161841107484, + "learning_rate": 3.0518372366670877e-06, + "loss": 0.23373261094093323, + "step": 5705 + }, + { + "epoch": 1.515203824193334, + "grad_norm": 1.368121139637646, + "learning_rate": 3.0486801239001806e-06, + "loss": 0.2404957264661789, + "step": 5706 + }, + { + "epoch": 1.515469393174877, + "grad_norm": 1.2346548477581518, + "learning_rate": 3.0455243512657606e-06, + "loss": 0.23209382593631744, + "step": 5707 + }, + { + "epoch": 1.51573496215642, + "grad_norm": 1.156984368318911, + "learning_rate": 3.042369919372228e-06, + "loss": 0.218237042427063, + "step": 5708 + }, + { + "epoch": 1.516000531137963, + "grad_norm": 12.380411974697722, + "learning_rate": 3.039216828827717e-06, + "loss": 0.25025027990341187, + "step": 5709 + }, + { + "epoch": 1.516266100119506, + "grad_norm": 1.3454644235463973, + "learning_rate": 3.036065080240106e-06, + "loss": 0.24729448556900024, + "step": 5710 + }, + { + "epoch": 1.5165316691010489, + "grad_norm": 1.246980236713752, + "learning_rate": 3.032914674217017e-06, + "loss": 0.23614796996116638, + "step": 5711 + }, + { + "epoch": 1.5167972380825918, + "grad_norm": 1.1947534591327391, + "learning_rate": 3.029765611365808e-06, + "loss": 0.2313452661037445, + "step": 5712 + }, + { + "epoch": 1.5170628070641348, + "grad_norm": 1.2169352172923076, + "learning_rate": 3.0266178922935842e-06, + "loss": 0.22152003645896912, + "step": 5713 + }, + { + "epoch": 1.5173283760456777, + "grad_norm": 1.3132034423317465, + "learning_rate": 3.0234715176071874e-06, + "loss": 0.25942179560661316, + "step": 5714 + }, + { + "epoch": 1.5175939450272207, + "grad_norm": 1.213532583392701, + "learning_rate": 3.0203264879132e-06, + "loss": 0.25030237436294556, + "step": 5715 + }, + { + "epoch": 1.5178595140087636, + "grad_norm": 1.212709044397772, + "learning_rate": 3.0171828038179497e-06, + "loss": 0.2025807797908783, + "step": 5716 + }, + { + "epoch": 1.5181250829903066, + "grad_norm": 1.3035190960753136, + "learning_rate": 3.014040465927499e-06, + "loss": 0.20455190539360046, + "step": 5717 + }, + { + "epoch": 1.5183906519718495, + "grad_norm": 1.2171025232725439, + "learning_rate": 3.010899474847655e-06, + "loss": 0.24197113513946533, + "step": 5718 + }, + { + "epoch": 1.5186562209533925, + "grad_norm": 1.243656057613246, + "learning_rate": 3.007759831183964e-06, + "loss": 0.22290384769439697, + "step": 5719 + }, + { + "epoch": 1.5189217899349357, + "grad_norm": 1.133911078511842, + "learning_rate": 3.0046215355417117e-06, + "loss": 0.23087520897388458, + "step": 5720 + }, + { + "epoch": 1.5191873589164786, + "grad_norm": 1.3329430419316783, + "learning_rate": 3.0014845885259236e-06, + "loss": 0.24425405263900757, + "step": 5721 + }, + { + "epoch": 1.5194529278980216, + "grad_norm": 1.310265396817766, + "learning_rate": 2.9983489907413675e-06, + "loss": 0.24888862669467926, + "step": 5722 + }, + { + "epoch": 1.5197184968795645, + "grad_norm": 1.3023172954247402, + "learning_rate": 2.9952147427925493e-06, + "loss": 0.23556756973266602, + "step": 5723 + }, + { + "epoch": 1.5199840658611075, + "grad_norm": 1.3924872169111115, + "learning_rate": 2.992081845283715e-06, + "loss": 0.2532619833946228, + "step": 5724 + }, + { + "epoch": 1.5202496348426504, + "grad_norm": 1.3351422936737996, + "learning_rate": 2.988950298818848e-06, + "loss": 0.2574974000453949, + "step": 5725 + }, + { + "epoch": 1.5205152038241934, + "grad_norm": 1.1244851887087242, + "learning_rate": 2.9858201040016775e-06, + "loss": 0.21997734904289246, + "step": 5726 + }, + { + "epoch": 1.5207807728057363, + "grad_norm": 1.3952335702566243, + "learning_rate": 2.982691261435666e-06, + "loss": 0.2174127697944641, + "step": 5727 + }, + { + "epoch": 1.5210463417872793, + "grad_norm": 1.4277294646697747, + "learning_rate": 2.979563771724019e-06, + "loss": 0.22455093264579773, + "step": 5728 + }, + { + "epoch": 1.5213119107688222, + "grad_norm": 1.2606427849530746, + "learning_rate": 2.976437635469678e-06, + "loss": 0.270727276802063, + "step": 5729 + }, + { + "epoch": 1.5215774797503652, + "grad_norm": 1.1901052998095392, + "learning_rate": 2.9733128532753254e-06, + "loss": 0.2233714610338211, + "step": 5730 + }, + { + "epoch": 1.5218430487319081, + "grad_norm": 1.364720864117707, + "learning_rate": 2.970189425743383e-06, + "loss": 0.23599566519260406, + "step": 5731 + }, + { + "epoch": 1.522108617713451, + "grad_norm": 1.2707197493270106, + "learning_rate": 2.967067353476011e-06, + "loss": 0.23598654568195343, + "step": 5732 + }, + { + "epoch": 1.522374186694994, + "grad_norm": 1.1793549120144597, + "learning_rate": 2.963946637075107e-06, + "loss": 0.205197274684906, + "step": 5733 + }, + { + "epoch": 1.522639755676537, + "grad_norm": 1.1887492971446227, + "learning_rate": 2.9608272771423073e-06, + "loss": 0.23581506311893463, + "step": 5734 + }, + { + "epoch": 1.52290532465808, + "grad_norm": 1.2937911951812968, + "learning_rate": 2.9577092742789915e-06, + "loss": 0.2088197022676468, + "step": 5735 + }, + { + "epoch": 1.5231708936396229, + "grad_norm": 1.2943182118738674, + "learning_rate": 2.95459262908627e-06, + "loss": 0.22607067227363586, + "step": 5736 + }, + { + "epoch": 1.5234364626211658, + "grad_norm": 1.1748118237242067, + "learning_rate": 2.951477342164998e-06, + "loss": 0.22242344915866852, + "step": 5737 + }, + { + "epoch": 1.5237020316027088, + "grad_norm": 1.3280405020263697, + "learning_rate": 2.9483634141157636e-06, + "loss": 0.25626271963119507, + "step": 5738 + }, + { + "epoch": 1.5239676005842517, + "grad_norm": 1.2212084732536523, + "learning_rate": 2.9452508455388975e-06, + "loss": 0.2241421341896057, + "step": 5739 + }, + { + "epoch": 1.5242331695657947, + "grad_norm": 1.5088982481303157, + "learning_rate": 2.9421396370344648e-06, + "loss": 0.2191103994846344, + "step": 5740 + }, + { + "epoch": 1.5244987385473376, + "grad_norm": 1.2411878451658047, + "learning_rate": 2.9390297892022703e-06, + "loss": 0.26252660155296326, + "step": 5741 + }, + { + "epoch": 1.5247643075288806, + "grad_norm": 1.3964551352557335, + "learning_rate": 2.9359213026418567e-06, + "loss": 0.21522507071495056, + "step": 5742 + }, + { + "epoch": 1.5250298765104235, + "grad_norm": 1.0905013771622027, + "learning_rate": 2.932814177952499e-06, + "loss": 0.20159044861793518, + "step": 5743 + }, + { + "epoch": 1.5252954454919665, + "grad_norm": 1.138416177249403, + "learning_rate": 2.929708415733221e-06, + "loss": 0.22679558396339417, + "step": 5744 + }, + { + "epoch": 1.5255610144735094, + "grad_norm": 1.199157018703913, + "learning_rate": 2.926604016582776e-06, + "loss": 0.2315664291381836, + "step": 5745 + }, + { + "epoch": 1.5258265834550524, + "grad_norm": 1.2568252329386058, + "learning_rate": 2.923500981099652e-06, + "loss": 0.229634091258049, + "step": 5746 + }, + { + "epoch": 1.5260921524365954, + "grad_norm": 1.2179751735416722, + "learning_rate": 2.9203993098820793e-06, + "loss": 0.20657674968242645, + "step": 5747 + }, + { + "epoch": 1.5263577214181385, + "grad_norm": 1.2447733239425043, + "learning_rate": 2.9172990035280237e-06, + "loss": 0.2306358814239502, + "step": 5748 + }, + { + "epoch": 1.5266232903996815, + "grad_norm": 1.2950411042959078, + "learning_rate": 2.9142000626351875e-06, + "loss": 0.2608031928539276, + "step": 5749 + }, + { + "epoch": 1.5268888593812244, + "grad_norm": 1.337100599856471, + "learning_rate": 2.911102487801013e-06, + "loss": 0.24675670266151428, + "step": 5750 + }, + { + "epoch": 1.5271544283627674, + "grad_norm": 1.3568337572597398, + "learning_rate": 2.908006279622667e-06, + "loss": 0.22544966638088226, + "step": 5751 + }, + { + "epoch": 1.5274199973443103, + "grad_norm": 1.3214418017258782, + "learning_rate": 2.904911438697071e-06, + "loss": 0.2328556478023529, + "step": 5752 + }, + { + "epoch": 1.5276855663258533, + "grad_norm": 1.25396823790717, + "learning_rate": 2.901817965620871e-06, + "loss": 0.2316005825996399, + "step": 5753 + }, + { + "epoch": 1.5279511353073962, + "grad_norm": 1.2976508240318196, + "learning_rate": 2.8987258609904522e-06, + "loss": 0.2332756370306015, + "step": 5754 + }, + { + "epoch": 1.5282167042889392, + "grad_norm": 1.3432276903845415, + "learning_rate": 2.8956351254019355e-06, + "loss": 0.24855142831802368, + "step": 5755 + }, + { + "epoch": 1.5284822732704821, + "grad_norm": 1.2138875439685706, + "learning_rate": 2.8925457594511775e-06, + "loss": 0.18745368719100952, + "step": 5756 + }, + { + "epoch": 1.528747842252025, + "grad_norm": 1.877743895818308, + "learning_rate": 2.889457763733774e-06, + "loss": 0.22402942180633545, + "step": 5757 + }, + { + "epoch": 1.529013411233568, + "grad_norm": 1.292567134146249, + "learning_rate": 2.886371138845051e-06, + "loss": 0.2156108319759369, + "step": 5758 + }, + { + "epoch": 1.529278980215111, + "grad_norm": 1.2848231417758293, + "learning_rate": 2.883285885380076e-06, + "loss": 0.22866520285606384, + "step": 5759 + }, + { + "epoch": 1.529544549196654, + "grad_norm": 1.2907471990668473, + "learning_rate": 2.880202003933645e-06, + "loss": 0.2486938238143921, + "step": 5760 + }, + { + "epoch": 1.529810118178197, + "grad_norm": 1.34098643692872, + "learning_rate": 2.877119495100301e-06, + "loss": 0.2565295696258545, + "step": 5761 + }, + { + "epoch": 1.5300756871597399, + "grad_norm": 1.1480290388256142, + "learning_rate": 2.8740383594743116e-06, + "loss": 0.21510455012321472, + "step": 5762 + }, + { + "epoch": 1.5303412561412828, + "grad_norm": 1.266250058472157, + "learning_rate": 2.8709585976496825e-06, + "loss": 0.2122025489807129, + "step": 5763 + }, + { + "epoch": 1.5306068251228258, + "grad_norm": 1.3017513152107745, + "learning_rate": 2.8678802102201575e-06, + "loss": 0.24274399876594543, + "step": 5764 + }, + { + "epoch": 1.5308723941043687, + "grad_norm": 1.4573413266326471, + "learning_rate": 2.864803197779216e-06, + "loss": 0.22325341403484344, + "step": 5765 + }, + { + "epoch": 1.5311379630859117, + "grad_norm": 1.3303976558080437, + "learning_rate": 2.8617275609200625e-06, + "loss": 0.25205284357070923, + "step": 5766 + }, + { + "epoch": 1.5314035320674546, + "grad_norm": 1.2638986714524767, + "learning_rate": 2.8586533002356465e-06, + "loss": 0.2047557830810547, + "step": 5767 + }, + { + "epoch": 1.5316691010489976, + "grad_norm": 1.2195584514594966, + "learning_rate": 2.8555804163186508e-06, + "loss": 0.2166992425918579, + "step": 5768 + }, + { + "epoch": 1.5319346700305405, + "grad_norm": 1.2333416807696795, + "learning_rate": 2.8525089097614867e-06, + "loss": 0.26253193616867065, + "step": 5769 + }, + { + "epoch": 1.5322002390120835, + "grad_norm": 1.2030637435961495, + "learning_rate": 2.8494387811563108e-06, + "loss": 0.23307687044143677, + "step": 5770 + }, + { + "epoch": 1.5324658079936264, + "grad_norm": 1.2191481171426857, + "learning_rate": 2.8463700310950047e-06, + "loss": 0.22128549218177795, + "step": 5771 + }, + { + "epoch": 1.5327313769751694, + "grad_norm": 1.272136705974986, + "learning_rate": 2.8433026601691883e-06, + "loss": 0.21966281533241272, + "step": 5772 + }, + { + "epoch": 1.5329969459567123, + "grad_norm": 1.341088625881783, + "learning_rate": 2.840236668970213e-06, + "loss": 0.22869305312633514, + "step": 5773 + }, + { + "epoch": 1.5332625149382553, + "grad_norm": 1.2257027323986465, + "learning_rate": 2.837172058089167e-06, + "loss": 0.21431279182434082, + "step": 5774 + }, + { + "epoch": 1.5335280839197982, + "grad_norm": 1.3512853622822856, + "learning_rate": 2.8341088281168693e-06, + "loss": 0.24610282480716705, + "step": 5775 + }, + { + "epoch": 1.5337936529013412, + "grad_norm": 1.3400303957635655, + "learning_rate": 2.8310469796438767e-06, + "loss": 0.24414925277233124, + "step": 5776 + }, + { + "epoch": 1.5340592218828841, + "grad_norm": 1.3597459613858938, + "learning_rate": 2.8279865132604766e-06, + "loss": 0.2330513596534729, + "step": 5777 + }, + { + "epoch": 1.534324790864427, + "grad_norm": 1.2551411616890042, + "learning_rate": 2.8249274295566863e-06, + "loss": 0.23048308491706848, + "step": 5778 + }, + { + "epoch": 1.53459035984597, + "grad_norm": 1.2566974883874766, + "learning_rate": 2.821869729122273e-06, + "loss": 0.2411375492811203, + "step": 5779 + }, + { + "epoch": 1.534855928827513, + "grad_norm": 1.384873838300398, + "learning_rate": 2.818813412546715e-06, + "loss": 0.22985543310642242, + "step": 5780 + }, + { + "epoch": 1.535121497809056, + "grad_norm": 1.320574666083159, + "learning_rate": 2.815758480419235e-06, + "loss": 0.20867247879505157, + "step": 5781 + }, + { + "epoch": 1.5353870667905989, + "grad_norm": 2.0414068761810182, + "learning_rate": 2.8127049333287913e-06, + "loss": 0.26378586888313293, + "step": 5782 + }, + { + "epoch": 1.5356526357721418, + "grad_norm": 1.552041032509997, + "learning_rate": 2.8096527718640687e-06, + "loss": 0.2690306305885315, + "step": 5783 + }, + { + "epoch": 1.5359182047536848, + "grad_norm": 1.1602606034579108, + "learning_rate": 2.8066019966134907e-06, + "loss": 0.22226165235042572, + "step": 5784 + }, + { + "epoch": 1.5361837737352277, + "grad_norm": 1.2201060637055436, + "learning_rate": 2.803552608165209e-06, + "loss": 0.23370322585105896, + "step": 5785 + }, + { + "epoch": 1.5364493427167707, + "grad_norm": 1.3067141176486328, + "learning_rate": 2.8005046071071107e-06, + "loss": 0.26137909293174744, + "step": 5786 + }, + { + "epoch": 1.5367149116983136, + "grad_norm": 1.3588127622676833, + "learning_rate": 2.7974579940268096e-06, + "loss": 0.22630617022514343, + "step": 5787 + }, + { + "epoch": 1.5369804806798566, + "grad_norm": 1.2356618590652273, + "learning_rate": 2.7944127695116663e-06, + "loss": 0.22641140222549438, + "step": 5788 + }, + { + "epoch": 1.5372460496613995, + "grad_norm": 1.266648551925957, + "learning_rate": 2.791368934148757e-06, + "loss": 0.19647541642189026, + "step": 5789 + }, + { + "epoch": 1.5375116186429425, + "grad_norm": 1.212906210017999, + "learning_rate": 2.788326488524901e-06, + "loss": 0.22399532794952393, + "step": 5790 + }, + { + "epoch": 1.5377771876244855, + "grad_norm": 1.2862970389756843, + "learning_rate": 2.7852854332266434e-06, + "loss": 0.22549685835838318, + "step": 5791 + }, + { + "epoch": 1.5380427566060284, + "grad_norm": 1.168406987557996, + "learning_rate": 2.7822457688402637e-06, + "loss": 0.2129821628332138, + "step": 5792 + }, + { + "epoch": 1.5383083255875714, + "grad_norm": 1.2301298306170827, + "learning_rate": 2.7792074959517755e-06, + "loss": 0.25330638885498047, + "step": 5793 + }, + { + "epoch": 1.5385738945691143, + "grad_norm": 1.3148661968254225, + "learning_rate": 2.7761706151469204e-06, + "loss": 0.2413945198059082, + "step": 5794 + }, + { + "epoch": 1.5388394635506573, + "grad_norm": 1.2551515744231165, + "learning_rate": 2.773135127011174e-06, + "loss": 0.21930523216724396, + "step": 5795 + }, + { + "epoch": 1.5391050325322002, + "grad_norm": 1.2506577052831476, + "learning_rate": 2.7701010321297416e-06, + "loss": 0.25499141216278076, + "step": 5796 + }, + { + "epoch": 1.5393706015137432, + "grad_norm": 1.1567311669751301, + "learning_rate": 2.7670683310875613e-06, + "loss": 0.19475680589675903, + "step": 5797 + }, + { + "epoch": 1.5396361704952861, + "grad_norm": 1.3159422945276043, + "learning_rate": 2.7640370244693026e-06, + "loss": 0.22155825793743134, + "step": 5798 + }, + { + "epoch": 1.539901739476829, + "grad_norm": 1.1818601031709017, + "learning_rate": 2.761007112859365e-06, + "loss": 0.2146138846874237, + "step": 5799 + }, + { + "epoch": 1.540167308458372, + "grad_norm": 1.146035478957987, + "learning_rate": 2.7579785968418804e-06, + "loss": 0.22698411345481873, + "step": 5800 + }, + { + "epoch": 1.540432877439915, + "grad_norm": 1.2904710642906891, + "learning_rate": 2.75495147700071e-06, + "loss": 0.23889532685279846, + "step": 5801 + }, + { + "epoch": 1.540698446421458, + "grad_norm": 1.2353012354195356, + "learning_rate": 2.7519257539194488e-06, + "loss": 0.2514609694480896, + "step": 5802 + }, + { + "epoch": 1.5409640154030009, + "grad_norm": 1.2405153867334813, + "learning_rate": 2.7489014281814185e-06, + "loss": 0.22332100570201874, + "step": 5803 + }, + { + "epoch": 1.5412295843845438, + "grad_norm": 1.1768236369414826, + "learning_rate": 2.745878500369673e-06, + "loss": 0.21316683292388916, + "step": 5804 + }, + { + "epoch": 1.5414951533660868, + "grad_norm": 1.2446325297163028, + "learning_rate": 2.742856971066996e-06, + "loss": 0.2228018194437027, + "step": 5805 + }, + { + "epoch": 1.5417607223476297, + "grad_norm": 1.3243067869686356, + "learning_rate": 2.7398368408559084e-06, + "loss": 0.22217239439487457, + "step": 5806 + }, + { + "epoch": 1.5420262913291727, + "grad_norm": 1.331116794742511, + "learning_rate": 2.736818110318652e-06, + "loss": 0.21147233247756958, + "step": 5807 + }, + { + "epoch": 1.5422918603107156, + "grad_norm": 1.2851526092309566, + "learning_rate": 2.7338007800372024e-06, + "loss": 0.23844698071479797, + "step": 5808 + }, + { + "epoch": 1.5425574292922586, + "grad_norm": 1.3238454632326748, + "learning_rate": 2.7307848505932653e-06, + "loss": 0.2361423820257187, + "step": 5809 + }, + { + "epoch": 1.5428229982738015, + "grad_norm": 1.1977956377916248, + "learning_rate": 2.727770322568277e-06, + "loss": 0.21585656702518463, + "step": 5810 + }, + { + "epoch": 1.5430885672553445, + "grad_norm": 1.172295737533699, + "learning_rate": 2.724757196543403e-06, + "loss": 0.233969584107399, + "step": 5811 + }, + { + "epoch": 1.5433541362368874, + "grad_norm": 1.3309852612756656, + "learning_rate": 2.7217454730995363e-06, + "loss": 0.25040164589881897, + "step": 5812 + }, + { + "epoch": 1.5436197052184304, + "grad_norm": 1.5198455877328005, + "learning_rate": 2.7187351528173046e-06, + "loss": 0.25848713517189026, + "step": 5813 + }, + { + "epoch": 1.5438852741999733, + "grad_norm": 1.409976572144199, + "learning_rate": 2.715726236277061e-06, + "loss": 0.22255051136016846, + "step": 5814 + }, + { + "epoch": 1.5441508431815163, + "grad_norm": 1.1799889920310853, + "learning_rate": 2.7127187240588883e-06, + "loss": 0.1882694661617279, + "step": 5815 + }, + { + "epoch": 1.5444164121630592, + "grad_norm": 1.178741445510241, + "learning_rate": 2.7097126167426002e-06, + "loss": 0.20070400834083557, + "step": 5816 + }, + { + "epoch": 1.5446819811446022, + "grad_norm": 1.2959554460073714, + "learning_rate": 2.706707914907739e-06, + "loss": 0.25316092371940613, + "step": 5817 + }, + { + "epoch": 1.5449475501261452, + "grad_norm": 1.334925654094324, + "learning_rate": 2.703704619133576e-06, + "loss": 0.24665585160255432, + "step": 5818 + }, + { + "epoch": 1.545213119107688, + "grad_norm": 1.290703779819622, + "learning_rate": 2.7007027299991095e-06, + "loss": 0.24172846972942352, + "step": 5819 + }, + { + "epoch": 1.545478688089231, + "grad_norm": 1.2781945872260183, + "learning_rate": 2.6977022480830708e-06, + "loss": 0.2405129075050354, + "step": 5820 + }, + { + "epoch": 1.545744257070774, + "grad_norm": 1.075296946307477, + "learning_rate": 2.694703173963914e-06, + "loss": 0.19716276228427887, + "step": 5821 + }, + { + "epoch": 1.546009826052317, + "grad_norm": 1.1434881656258093, + "learning_rate": 2.6917055082198284e-06, + "loss": 0.20343703031539917, + "step": 5822 + }, + { + "epoch": 1.54627539503386, + "grad_norm": 1.5985849963050902, + "learning_rate": 2.688709251428725e-06, + "loss": 0.24382619559764862, + "step": 5823 + }, + { + "epoch": 1.5465409640154029, + "grad_norm": 1.7314575476063523, + "learning_rate": 2.6857144041682514e-06, + "loss": 0.2962399423122406, + "step": 5824 + }, + { + "epoch": 1.5468065329969458, + "grad_norm": 1.2699118659079873, + "learning_rate": 2.6827209670157774e-06, + "loss": 0.24034687876701355, + "step": 5825 + }, + { + "epoch": 1.5470721019784888, + "grad_norm": 1.3757632125147359, + "learning_rate": 2.6797289405484016e-06, + "loss": 0.2575085163116455, + "step": 5826 + }, + { + "epoch": 1.5473376709600317, + "grad_norm": 1.556424910652697, + "learning_rate": 2.6767383253429515e-06, + "loss": 0.2586629092693329, + "step": 5827 + }, + { + "epoch": 1.5476032399415747, + "grad_norm": 1.096117045688234, + "learning_rate": 2.6737491219759815e-06, + "loss": 0.18447624146938324, + "step": 5828 + }, + { + "epoch": 1.5478688089231176, + "grad_norm": 1.3930188378643134, + "learning_rate": 2.670761331023779e-06, + "loss": 0.244853213429451, + "step": 5829 + }, + { + "epoch": 1.5481343779046606, + "grad_norm": 1.3163693020327074, + "learning_rate": 2.66777495306235e-06, + "loss": 0.24641919136047363, + "step": 5830 + }, + { + "epoch": 1.5483999468862035, + "grad_norm": 1.4086337954424433, + "learning_rate": 2.6647899886674323e-06, + "loss": 0.2364550232887268, + "step": 5831 + }, + { + "epoch": 1.5486655158677467, + "grad_norm": 1.1695450852938096, + "learning_rate": 2.6618064384144925e-06, + "loss": 0.17760278284549713, + "step": 5832 + }, + { + "epoch": 1.5489310848492897, + "grad_norm": 1.1988872335295608, + "learning_rate": 2.6588243028787274e-06, + "loss": 0.18571510910987854, + "step": 5833 + }, + { + "epoch": 1.5491966538308326, + "grad_norm": 1.2537289047953852, + "learning_rate": 2.655843582635057e-06, + "loss": 0.23693162202835083, + "step": 5834 + }, + { + "epoch": 1.5494622228123756, + "grad_norm": 1.3552352092705502, + "learning_rate": 2.652864278258126e-06, + "loss": 0.26481011509895325, + "step": 5835 + }, + { + "epoch": 1.5497277917939185, + "grad_norm": 1.4182429828127188, + "learning_rate": 2.6498863903223115e-06, + "loss": 0.23405003547668457, + "step": 5836 + }, + { + "epoch": 1.5499933607754615, + "grad_norm": 2.5576796684815686, + "learning_rate": 2.6469099194017144e-06, + "loss": 0.20662814378738403, + "step": 5837 + }, + { + "epoch": 1.5502589297570044, + "grad_norm": 1.3124069479853646, + "learning_rate": 2.6439348660701634e-06, + "loss": 0.2722313404083252, + "step": 5838 + }, + { + "epoch": 1.5505244987385474, + "grad_norm": 1.3906100112719377, + "learning_rate": 2.6409612309012134e-06, + "loss": 0.2288864552974701, + "step": 5839 + }, + { + "epoch": 1.5507900677200903, + "grad_norm": 1.322570753297788, + "learning_rate": 2.6379890144681464e-06, + "loss": 0.2286190539598465, + "step": 5840 + }, + { + "epoch": 1.5510556367016333, + "grad_norm": 1.2231420705695173, + "learning_rate": 2.6350182173439666e-06, + "loss": 0.22478938102722168, + "step": 5841 + }, + { + "epoch": 1.5513212056831762, + "grad_norm": 1.415848841276022, + "learning_rate": 2.6320488401014166e-06, + "loss": 0.2520615756511688, + "step": 5842 + }, + { + "epoch": 1.5515867746647192, + "grad_norm": 1.3741284890856262, + "learning_rate": 2.629080883312952e-06, + "loss": 0.2121289074420929, + "step": 5843 + }, + { + "epoch": 1.5518523436462621, + "grad_norm": 1.3092311759839703, + "learning_rate": 2.6261143475507656e-06, + "loss": 0.2252352237701416, + "step": 5844 + }, + { + "epoch": 1.552117912627805, + "grad_norm": 1.191285245143269, + "learning_rate": 2.6231492333867626e-06, + "loss": 0.21188892424106598, + "step": 5845 + }, + { + "epoch": 1.552383481609348, + "grad_norm": 1.1276138403597054, + "learning_rate": 2.6201855413925857e-06, + "loss": 0.21534699201583862, + "step": 5846 + }, + { + "epoch": 1.552649050590891, + "grad_norm": 1.2849885490704696, + "learning_rate": 2.6172232721395998e-06, + "loss": 0.21781614422798157, + "step": 5847 + }, + { + "epoch": 1.552914619572434, + "grad_norm": 1.3317886914724781, + "learning_rate": 2.6142624261988947e-06, + "loss": 0.2476508915424347, + "step": 5848 + }, + { + "epoch": 1.5531801885539769, + "grad_norm": 1.3439658215829489, + "learning_rate": 2.611303004141287e-06, + "loss": 0.2692151665687561, + "step": 5849 + }, + { + "epoch": 1.5534457575355198, + "grad_norm": 1.2839746536411722, + "learning_rate": 2.6083450065373163e-06, + "loss": 0.24868687987327576, + "step": 5850 + }, + { + "epoch": 1.5537113265170628, + "grad_norm": 1.2704813852574235, + "learning_rate": 2.6053884339572543e-06, + "loss": 0.24215853214263916, + "step": 5851 + }, + { + "epoch": 1.5539768954986057, + "grad_norm": 1.2100819665594098, + "learning_rate": 2.602433286971091e-06, + "loss": 0.2157444804906845, + "step": 5852 + }, + { + "epoch": 1.5542424644801487, + "grad_norm": 1.369237575424674, + "learning_rate": 2.599479566148544e-06, + "loss": 0.22152379155158997, + "step": 5853 + }, + { + "epoch": 1.5545080334616916, + "grad_norm": 1.1930490692336162, + "learning_rate": 2.596527272059055e-06, + "loss": 0.2278299182653427, + "step": 5854 + }, + { + "epoch": 1.5547736024432346, + "grad_norm": 1.406485645097326, + "learning_rate": 2.593576405271793e-06, + "loss": 0.23183950781822205, + "step": 5855 + }, + { + "epoch": 1.5550391714247775, + "grad_norm": 1.209726796816396, + "learning_rate": 2.5906269663556484e-06, + "loss": 0.22167566418647766, + "step": 5856 + }, + { + "epoch": 1.5553047404063205, + "grad_norm": 1.1790986825354977, + "learning_rate": 2.5876789558792403e-06, + "loss": 0.24111366271972656, + "step": 5857 + }, + { + "epoch": 1.5555703093878634, + "grad_norm": 1.1706391072024214, + "learning_rate": 2.5847323744109087e-06, + "loss": 0.2090388983488083, + "step": 5858 + }, + { + "epoch": 1.5558358783694064, + "grad_norm": 1.2588154614837785, + "learning_rate": 2.58178722251872e-06, + "loss": 0.2087189108133316, + "step": 5859 + }, + { + "epoch": 1.5561014473509496, + "grad_norm": 1.300626487965864, + "learning_rate": 2.578843500770465e-06, + "loss": 0.2277342677116394, + "step": 5860 + }, + { + "epoch": 1.5563670163324925, + "grad_norm": 1.3517116904487896, + "learning_rate": 2.57590120973366e-06, + "loss": 0.2204241305589676, + "step": 5861 + }, + { + "epoch": 1.5566325853140355, + "grad_norm": 1.213807933631201, + "learning_rate": 2.5729603499755416e-06, + "loss": 0.2138606607913971, + "step": 5862 + }, + { + "epoch": 1.5568981542955784, + "grad_norm": 1.4669648743657906, + "learning_rate": 2.5700209220630733e-06, + "loss": 0.21257862448692322, + "step": 5863 + }, + { + "epoch": 1.5571637232771214, + "grad_norm": 1.2314998246120414, + "learning_rate": 2.5670829265629437e-06, + "loss": 0.20991909503936768, + "step": 5864 + }, + { + "epoch": 1.5574292922586643, + "grad_norm": 1.294980658460416, + "learning_rate": 2.5641463640415633e-06, + "loss": 0.23745422065258026, + "step": 5865 + }, + { + "epoch": 1.5576948612402073, + "grad_norm": 1.2425796180120088, + "learning_rate": 2.561211235065065e-06, + "loss": 0.21482989192008972, + "step": 5866 + }, + { + "epoch": 1.5579604302217502, + "grad_norm": 1.008120888370748, + "learning_rate": 2.558277540199309e-06, + "loss": 0.17866572737693787, + "step": 5867 + }, + { + "epoch": 1.5582259992032932, + "grad_norm": 1.2966262005019353, + "learning_rate": 2.555345280009872e-06, + "loss": 0.223822683095932, + "step": 5868 + }, + { + "epoch": 1.5584915681848361, + "grad_norm": 1.339606961190666, + "learning_rate": 2.552414455062068e-06, + "loss": 0.2293519228696823, + "step": 5869 + }, + { + "epoch": 1.558757137166379, + "grad_norm": 1.3023504432012787, + "learning_rate": 2.5494850659209203e-06, + "loss": 0.2556726038455963, + "step": 5870 + }, + { + "epoch": 1.559022706147922, + "grad_norm": 1.255574464472328, + "learning_rate": 2.546557113151181e-06, + "loss": 0.26891303062438965, + "step": 5871 + }, + { + "epoch": 1.559288275129465, + "grad_norm": 1.1754509839553133, + "learning_rate": 2.5436305973173257e-06, + "loss": 0.19510813057422638, + "step": 5872 + }, + { + "epoch": 1.559553844111008, + "grad_norm": 1.2819966401856495, + "learning_rate": 2.5407055189835518e-06, + "loss": 0.22906547784805298, + "step": 5873 + }, + { + "epoch": 1.559819413092551, + "grad_norm": 1.3121165067922245, + "learning_rate": 2.5377818787137788e-06, + "loss": 0.25452786684036255, + "step": 5874 + }, + { + "epoch": 1.5600849820740939, + "grad_norm": 1.2743199898597464, + "learning_rate": 2.5348596770716503e-06, + "loss": 0.205597922205925, + "step": 5875 + }, + { + "epoch": 1.5603505510556368, + "grad_norm": 1.3020148941868286, + "learning_rate": 2.5319389146205344e-06, + "loss": 0.24009352922439575, + "step": 5876 + }, + { + "epoch": 1.5606161200371798, + "grad_norm": 1.433983972963341, + "learning_rate": 2.5290195919235173e-06, + "loss": 0.23381268978118896, + "step": 5877 + }, + { + "epoch": 1.5608816890187227, + "grad_norm": 1.1554092234943296, + "learning_rate": 2.52610170954341e-06, + "loss": 0.2267276644706726, + "step": 5878 + }, + { + "epoch": 1.5611472580002657, + "grad_norm": 1.2742422977156036, + "learning_rate": 2.5231852680427482e-06, + "loss": 0.24330289661884308, + "step": 5879 + }, + { + "epoch": 1.5614128269818086, + "grad_norm": 1.2802855767249914, + "learning_rate": 2.5202702679837852e-06, + "loss": 0.24877145886421204, + "step": 5880 + }, + { + "epoch": 1.5616783959633516, + "grad_norm": 1.1377670913842177, + "learning_rate": 2.5173567099285e-06, + "loss": 0.20410388708114624, + "step": 5881 + }, + { + "epoch": 1.5619439649448945, + "grad_norm": 1.2268765869469427, + "learning_rate": 2.514444594438591e-06, + "loss": 0.21524877846240997, + "step": 5882 + }, + { + "epoch": 1.5622095339264375, + "grad_norm": 1.1986269244208958, + "learning_rate": 2.5115339220754796e-06, + "loss": 0.18785043060779572, + "step": 5883 + }, + { + "epoch": 1.5624751029079804, + "grad_norm": 1.3539528047627718, + "learning_rate": 2.5086246934003113e-06, + "loss": 0.21200208365917206, + "step": 5884 + }, + { + "epoch": 1.5627406718895234, + "grad_norm": 1.6373531833898813, + "learning_rate": 2.5057169089739485e-06, + "loss": 0.20752021670341492, + "step": 5885 + }, + { + "epoch": 1.5630062408710663, + "grad_norm": 1.1717071963534185, + "learning_rate": 2.502810569356976e-06, + "loss": 0.21395736932754517, + "step": 5886 + }, + { + "epoch": 1.5632718098526093, + "grad_norm": 1.2664848714228343, + "learning_rate": 2.499905675109707e-06, + "loss": 0.26949262619018555, + "step": 5887 + }, + { + "epoch": 1.5635373788341522, + "grad_norm": 1.5283985889023297, + "learning_rate": 2.497002226792169e-06, + "loss": 0.2309839278459549, + "step": 5888 + }, + { + "epoch": 1.5638029478156952, + "grad_norm": 1.2596143819163301, + "learning_rate": 2.4941002249641123e-06, + "loss": 0.24415400624275208, + "step": 5889 + }, + { + "epoch": 1.5640685167972381, + "grad_norm": 1.3074402223027564, + "learning_rate": 2.4911996701850083e-06, + "loss": 0.23493322730064392, + "step": 5890 + }, + { + "epoch": 1.564334085778781, + "grad_norm": 1.260748243658743, + "learning_rate": 2.488300563014049e-06, + "loss": 0.23824438452720642, + "step": 5891 + }, + { + "epoch": 1.564599654760324, + "grad_norm": 1.2534870916273309, + "learning_rate": 2.4854029040101503e-06, + "loss": 0.2523414194583893, + "step": 5892 + }, + { + "epoch": 1.564865223741867, + "grad_norm": 1.2879106186872462, + "learning_rate": 2.482506693731944e-06, + "loss": 0.21360887587070465, + "step": 5893 + }, + { + "epoch": 1.56513079272341, + "grad_norm": 1.1951820042572139, + "learning_rate": 2.47961193273779e-06, + "loss": 0.21182934939861298, + "step": 5894 + }, + { + "epoch": 1.5653963617049529, + "grad_norm": 1.4293886797193323, + "learning_rate": 2.4767186215857542e-06, + "loss": 0.23104771971702576, + "step": 5895 + }, + { + "epoch": 1.5656619306864958, + "grad_norm": 1.2606491547398977, + "learning_rate": 2.473826760833643e-06, + "loss": 0.22297397255897522, + "step": 5896 + }, + { + "epoch": 1.5659274996680388, + "grad_norm": 1.176802218612286, + "learning_rate": 2.4709363510389684e-06, + "loss": 0.21597865223884583, + "step": 5897 + }, + { + "epoch": 1.5661930686495817, + "grad_norm": 1.4303555951561693, + "learning_rate": 2.468047392758969e-06, + "loss": 0.27620527148246765, + "step": 5898 + }, + { + "epoch": 1.5664586376311247, + "grad_norm": 1.373809252877093, + "learning_rate": 2.465159886550601e-06, + "loss": 0.25262463092803955, + "step": 5899 + }, + { + "epoch": 1.5667242066126676, + "grad_norm": 1.376719462816966, + "learning_rate": 2.462273832970542e-06, + "loss": 0.2729034125804901, + "step": 5900 + }, + { + "epoch": 1.5669897755942106, + "grad_norm": 1.3637563490895455, + "learning_rate": 2.459389232575188e-06, + "loss": 0.2313854992389679, + "step": 5901 + }, + { + "epoch": 1.5672553445757536, + "grad_norm": 1.3202318144066494, + "learning_rate": 2.456506085920658e-06, + "loss": 0.22513791918754578, + "step": 5902 + }, + { + "epoch": 1.5675209135572965, + "grad_norm": 1.3152362934287614, + "learning_rate": 2.4536243935627856e-06, + "loss": 0.2658824026584625, + "step": 5903 + }, + { + "epoch": 1.5677864825388395, + "grad_norm": 1.1721087348112986, + "learning_rate": 2.4507441560571275e-06, + "loss": 0.21781010925769806, + "step": 5904 + }, + { + "epoch": 1.5680520515203824, + "grad_norm": 1.3393030222309363, + "learning_rate": 2.4478653739589632e-06, + "loss": 0.21047937870025635, + "step": 5905 + }, + { + "epoch": 1.5683176205019254, + "grad_norm": 1.2196979825563006, + "learning_rate": 2.4449880478232858e-06, + "loss": 0.21674057841300964, + "step": 5906 + }, + { + "epoch": 1.5685831894834683, + "grad_norm": 1.200112520021674, + "learning_rate": 2.44211217820481e-06, + "loss": 0.22062627971172333, + "step": 5907 + }, + { + "epoch": 1.5688487584650113, + "grad_norm": 1.3158234051142574, + "learning_rate": 2.439237765657968e-06, + "loss": 0.22440886497497559, + "step": 5908 + }, + { + "epoch": 1.5691143274465542, + "grad_norm": 1.129873307165861, + "learning_rate": 2.4363648107369175e-06, + "loss": 0.21888123452663422, + "step": 5909 + }, + { + "epoch": 1.5693798964280972, + "grad_norm": 1.2586007199788052, + "learning_rate": 2.433493313995524e-06, + "loss": 0.23104462027549744, + "step": 5910 + }, + { + "epoch": 1.5696454654096401, + "grad_norm": 1.427902558182486, + "learning_rate": 2.4306232759873803e-06, + "loss": 0.23032237589359283, + "step": 5911 + }, + { + "epoch": 1.569911034391183, + "grad_norm": 1.3780752776280365, + "learning_rate": 2.4277546972657974e-06, + "loss": 0.2588527202606201, + "step": 5912 + }, + { + "epoch": 1.570176603372726, + "grad_norm": 1.4647042397629928, + "learning_rate": 2.424887578383799e-06, + "loss": 0.2845698893070221, + "step": 5913 + }, + { + "epoch": 1.570442172354269, + "grad_norm": 1.338246310760916, + "learning_rate": 2.4220219198941384e-06, + "loss": 0.23010894656181335, + "step": 5914 + }, + { + "epoch": 1.570707741335812, + "grad_norm": 1.3783426416349442, + "learning_rate": 2.419157722349278e-06, + "loss": 0.2623594403266907, + "step": 5915 + }, + { + "epoch": 1.5709733103173549, + "grad_norm": 1.2349976574308903, + "learning_rate": 2.416294986301401e-06, + "loss": 0.2107153981924057, + "step": 5916 + }, + { + "epoch": 1.5712388792988978, + "grad_norm": 1.3633626366853218, + "learning_rate": 2.413433712302409e-06, + "loss": 0.2115003615617752, + "step": 5917 + }, + { + "epoch": 1.5715044482804408, + "grad_norm": 1.3738602333573011, + "learning_rate": 2.410573900903921e-06, + "loss": 0.22406762838363647, + "step": 5918 + }, + { + "epoch": 1.5717700172619837, + "grad_norm": 1.3017270649216575, + "learning_rate": 2.407715552657277e-06, + "loss": 0.24878525733947754, + "step": 5919 + }, + { + "epoch": 1.5720355862435267, + "grad_norm": 1.5003273963811, + "learning_rate": 2.404858668113532e-06, + "loss": 0.24546805024147034, + "step": 5920 + }, + { + "epoch": 1.5723011552250696, + "grad_norm": 1.5650848412040055, + "learning_rate": 2.402003247823459e-06, + "loss": 0.23430263996124268, + "step": 5921 + }, + { + "epoch": 1.5725667242066126, + "grad_norm": 1.3939131226044492, + "learning_rate": 2.399149292337547e-06, + "loss": 0.26935267448425293, + "step": 5922 + }, + { + "epoch": 1.5728322931881555, + "grad_norm": 1.1554138984093538, + "learning_rate": 2.3962968022060097e-06, + "loss": 0.21104472875595093, + "step": 5923 + }, + { + "epoch": 1.5730978621696985, + "grad_norm": 1.147816084956367, + "learning_rate": 2.3934457779787755e-06, + "loss": 0.17162750661373138, + "step": 5924 + }, + { + "epoch": 1.5733634311512414, + "grad_norm": 1.2036391990293953, + "learning_rate": 2.390596220205481e-06, + "loss": 0.22233474254608154, + "step": 5925 + }, + { + "epoch": 1.5736290001327844, + "grad_norm": 1.456348691360017, + "learning_rate": 2.387748129435491e-06, + "loss": 0.2326992005109787, + "step": 5926 + }, + { + "epoch": 1.5738945691143273, + "grad_norm": 1.2656294085970974, + "learning_rate": 2.3849015062178835e-06, + "loss": 0.245779350399971, + "step": 5927 + }, + { + "epoch": 1.5741601380958703, + "grad_norm": 1.2198185109849795, + "learning_rate": 2.382056351101454e-06, + "loss": 0.24269379675388336, + "step": 5928 + }, + { + "epoch": 1.5744257070774133, + "grad_norm": 1.2241918308854736, + "learning_rate": 2.3792126646347138e-06, + "loss": 0.23644019663333893, + "step": 5929 + }, + { + "epoch": 1.5746912760589562, + "grad_norm": 1.2680435600362268, + "learning_rate": 2.376370447365893e-06, + "loss": 0.254330575466156, + "step": 5930 + }, + { + "epoch": 1.5749568450404992, + "grad_norm": 1.4146409212378834, + "learning_rate": 2.373529699842936e-06, + "loss": 0.2728506922721863, + "step": 5931 + }, + { + "epoch": 1.575222414022042, + "grad_norm": 1.3627178065769006, + "learning_rate": 2.3706904226135087e-06, + "loss": 0.23671439290046692, + "step": 5932 + }, + { + "epoch": 1.575487983003585, + "grad_norm": 1.409873356618632, + "learning_rate": 2.367852616224989e-06, + "loss": 0.24205748736858368, + "step": 5933 + }, + { + "epoch": 1.575753551985128, + "grad_norm": 1.2728197754861583, + "learning_rate": 2.3650162812244725e-06, + "loss": 0.1915436089038849, + "step": 5934 + }, + { + "epoch": 1.576019120966671, + "grad_norm": 1.2091326643578577, + "learning_rate": 2.3621814181587697e-06, + "loss": 0.23453299701213837, + "step": 5935 + }, + { + "epoch": 1.576284689948214, + "grad_norm": 1.3060415308267561, + "learning_rate": 2.3593480275744106e-06, + "loss": 0.24066327512264252, + "step": 5936 + }, + { + "epoch": 1.5765502589297569, + "grad_norm": 1.246429396187596, + "learning_rate": 2.356516110017639e-06, + "loss": 0.22510530054569244, + "step": 5937 + }, + { + "epoch": 1.5768158279112998, + "grad_norm": 1.2889494549478113, + "learning_rate": 2.3536856660344144e-06, + "loss": 0.22967353463172913, + "step": 5938 + }, + { + "epoch": 1.5770813968928428, + "grad_norm": 1.2404139099674472, + "learning_rate": 2.3508566961704127e-06, + "loss": 0.2299107313156128, + "step": 5939 + }, + { + "epoch": 1.5773469658743857, + "grad_norm": 1.2560783974284127, + "learning_rate": 2.3480292009710282e-06, + "loss": 0.23418918251991272, + "step": 5940 + }, + { + "epoch": 1.5776125348559287, + "grad_norm": 1.2857056044544095, + "learning_rate": 2.3452031809813657e-06, + "loss": 0.26528510451316833, + "step": 5941 + }, + { + "epoch": 1.5778781038374716, + "grad_norm": 1.1247059842406957, + "learning_rate": 2.342378636746251e-06, + "loss": 0.21878717839717865, + "step": 5942 + }, + { + "epoch": 1.5781436728190146, + "grad_norm": 1.1637472196421235, + "learning_rate": 2.339555568810221e-06, + "loss": 0.19697530567646027, + "step": 5943 + }, + { + "epoch": 1.5784092418005577, + "grad_norm": 1.3422665805434115, + "learning_rate": 2.3367339777175313e-06, + "loss": 0.24812257289886475, + "step": 5944 + }, + { + "epoch": 1.5786748107821007, + "grad_norm": 1.3285793357341238, + "learning_rate": 2.3339138640121504e-06, + "loss": 0.27651745080947876, + "step": 5945 + }, + { + "epoch": 1.5789403797636437, + "grad_norm": 1.308131821171991, + "learning_rate": 2.3310952282377643e-06, + "loss": 0.2651634216308594, + "step": 5946 + }, + { + "epoch": 1.5792059487451866, + "grad_norm": 1.3163549633798883, + "learning_rate": 2.328278070937772e-06, + "loss": 0.23799028992652893, + "step": 5947 + }, + { + "epoch": 1.5794715177267296, + "grad_norm": 1.4229706240812914, + "learning_rate": 2.3254623926552867e-06, + "loss": 0.2528802752494812, + "step": 5948 + }, + { + "epoch": 1.5797370867082725, + "grad_norm": 1.2071666314804592, + "learning_rate": 2.322648193933137e-06, + "loss": 0.23819346725940704, + "step": 5949 + }, + { + "epoch": 1.5800026556898155, + "grad_norm": 1.2694222057013376, + "learning_rate": 2.319835475313873e-06, + "loss": 0.2510845959186554, + "step": 5950 + }, + { + "epoch": 1.5802682246713584, + "grad_norm": 1.0731141255180743, + "learning_rate": 2.31702423733975e-06, + "loss": 0.20156612992286682, + "step": 5951 + }, + { + "epoch": 1.5805337936529014, + "grad_norm": 1.320010192923148, + "learning_rate": 2.3142144805527413e-06, + "loss": 0.23375174403190613, + "step": 5952 + }, + { + "epoch": 1.5807993626344443, + "grad_norm": 1.187058092026163, + "learning_rate": 2.311406205494535e-06, + "loss": 0.2378280758857727, + "step": 5953 + }, + { + "epoch": 1.5810649316159873, + "grad_norm": 1.4550533599389408, + "learning_rate": 2.308599412706535e-06, + "loss": 0.2087683081626892, + "step": 5954 + }, + { + "epoch": 1.5813305005975302, + "grad_norm": 1.2856302099767283, + "learning_rate": 2.3057941027298557e-06, + "loss": 0.2228693962097168, + "step": 5955 + }, + { + "epoch": 1.5815960695790732, + "grad_norm": 1.4738789364963756, + "learning_rate": 2.302990276105329e-06, + "loss": 0.22694727778434753, + "step": 5956 + }, + { + "epoch": 1.5818616385606161, + "grad_norm": 1.2486840544551192, + "learning_rate": 2.300187933373499e-06, + "loss": 0.22996942698955536, + "step": 5957 + }, + { + "epoch": 1.582127207542159, + "grad_norm": 1.331719034245123, + "learning_rate": 2.2973870750746253e-06, + "loss": 0.2440253496170044, + "step": 5958 + }, + { + "epoch": 1.582392776523702, + "grad_norm": 1.3266637203740035, + "learning_rate": 2.2945877017486782e-06, + "loss": 0.2507309019565582, + "step": 5959 + }, + { + "epoch": 1.582658345505245, + "grad_norm": 2.8683041985739677, + "learning_rate": 2.2917898139353467e-06, + "loss": 0.24790918827056885, + "step": 5960 + }, + { + "epoch": 1.582923914486788, + "grad_norm": 1.4168604850261965, + "learning_rate": 2.2889934121740287e-06, + "loss": 0.22106975317001343, + "step": 5961 + }, + { + "epoch": 1.5831894834683309, + "grad_norm": 1.5726662217531726, + "learning_rate": 2.2861984970038385e-06, + "loss": 0.2410939633846283, + "step": 5962 + }, + { + "epoch": 1.5834550524498738, + "grad_norm": 1.1559016560001114, + "learning_rate": 2.283405068963601e-06, + "loss": 0.22821484506130219, + "step": 5963 + }, + { + "epoch": 1.5837206214314168, + "grad_norm": 1.2324685594628142, + "learning_rate": 2.2806131285918588e-06, + "loss": 0.21425281465053558, + "step": 5964 + }, + { + "epoch": 1.5839861904129597, + "grad_norm": 1.2434376170807215, + "learning_rate": 2.277822676426863e-06, + "loss": 0.22428902983665466, + "step": 5965 + }, + { + "epoch": 1.5842517593945027, + "grad_norm": 1.4592375031786005, + "learning_rate": 2.27503371300658e-06, + "loss": 0.2986769676208496, + "step": 5966 + }, + { + "epoch": 1.5845173283760456, + "grad_norm": 1.4384957681975041, + "learning_rate": 2.272246238868687e-06, + "loss": 0.24697065353393555, + "step": 5967 + }, + { + "epoch": 1.5847828973575886, + "grad_norm": 1.3175254870878064, + "learning_rate": 2.269460254550583e-06, + "loss": 0.23725461959838867, + "step": 5968 + }, + { + "epoch": 1.5850484663391315, + "grad_norm": 1.5010497616053564, + "learning_rate": 2.2666757605893664e-06, + "loss": 0.2661248445510864, + "step": 5969 + }, + { + "epoch": 1.5853140353206745, + "grad_norm": 1.2390278830143426, + "learning_rate": 2.263892757521858e-06, + "loss": 0.23328733444213867, + "step": 5970 + }, + { + "epoch": 1.5855796043022174, + "grad_norm": 1.2547818797647754, + "learning_rate": 2.2611112458845873e-06, + "loss": 0.22886580228805542, + "step": 5971 + }, + { + "epoch": 1.5858451732837606, + "grad_norm": 1.1882681583888588, + "learning_rate": 2.2583312262137966e-06, + "loss": 0.25051698088645935, + "step": 5972 + }, + { + "epoch": 1.5861107422653036, + "grad_norm": 1.2988472953319592, + "learning_rate": 2.2555526990454413e-06, + "loss": 0.2400815784931183, + "step": 5973 + }, + { + "epoch": 1.5863763112468465, + "grad_norm": 1.1598677166947555, + "learning_rate": 2.2527756649151912e-06, + "loss": 0.2212347537279129, + "step": 5974 + }, + { + "epoch": 1.5866418802283895, + "grad_norm": 1.355013417523964, + "learning_rate": 2.2500001243584204e-06, + "loss": 0.3002026379108429, + "step": 5975 + }, + { + "epoch": 1.5869074492099324, + "grad_norm": 1.1899701199057289, + "learning_rate": 2.2472260779102185e-06, + "loss": 0.19813531637191772, + "step": 5976 + }, + { + "epoch": 1.5871730181914754, + "grad_norm": 1.2404972223723234, + "learning_rate": 2.2444535261053968e-06, + "loss": 0.2233983874320984, + "step": 5977 + }, + { + "epoch": 1.5874385871730183, + "grad_norm": 1.417840431772693, + "learning_rate": 2.2416824694784676e-06, + "loss": 0.26059988141059875, + "step": 5978 + }, + { + "epoch": 1.5877041561545613, + "grad_norm": 1.2961846276739968, + "learning_rate": 2.2389129085636573e-06, + "loss": 0.23058606684207916, + "step": 5979 + }, + { + "epoch": 1.5879697251361042, + "grad_norm": 1.3397298592095879, + "learning_rate": 2.236144843894904e-06, + "loss": 0.2414383739233017, + "step": 5980 + }, + { + "epoch": 1.5882352941176472, + "grad_norm": 1.2013757541083616, + "learning_rate": 2.23337827600586e-06, + "loss": 0.21688291430473328, + "step": 5981 + }, + { + "epoch": 1.5885008630991901, + "grad_norm": 1.2977536190104755, + "learning_rate": 2.2306132054298847e-06, + "loss": 0.24297408759593964, + "step": 5982 + }, + { + "epoch": 1.588766432080733, + "grad_norm": 1.449081017944755, + "learning_rate": 2.227849632700052e-06, + "loss": 0.2655821442604065, + "step": 5983 + }, + { + "epoch": 1.589032001062276, + "grad_norm": 1.2305338711146763, + "learning_rate": 2.225087558349146e-06, + "loss": 0.20545080304145813, + "step": 5984 + }, + { + "epoch": 1.589297570043819, + "grad_norm": 1.470607418959754, + "learning_rate": 2.2223269829096593e-06, + "loss": 0.24151475727558136, + "step": 5985 + }, + { + "epoch": 1.589563139025362, + "grad_norm": 1.2194062039730535, + "learning_rate": 2.2195679069138043e-06, + "loss": 0.2294519543647766, + "step": 5986 + }, + { + "epoch": 1.589828708006905, + "grad_norm": 1.3319096935394759, + "learning_rate": 2.2168103308934953e-06, + "loss": 0.2041824758052826, + "step": 5987 + }, + { + "epoch": 1.5900942769884479, + "grad_norm": 1.181577384258167, + "learning_rate": 2.21405425538036e-06, + "loss": 0.1856188029050827, + "step": 5988 + }, + { + "epoch": 1.5903598459699908, + "grad_norm": 1.2644853901124522, + "learning_rate": 2.2112996809057395e-06, + "loss": 0.24337685108184814, + "step": 5989 + }, + { + "epoch": 1.5906254149515338, + "grad_norm": 1.1714048449744126, + "learning_rate": 2.20854660800068e-06, + "loss": 0.2201787382364273, + "step": 5990 + }, + { + "epoch": 1.5908909839330767, + "grad_norm": 1.322531300676563, + "learning_rate": 2.2057950371959427e-06, + "loss": 0.23505619168281555, + "step": 5991 + }, + { + "epoch": 1.5911565529146197, + "grad_norm": 1.4085526679551708, + "learning_rate": 2.203044969021997e-06, + "loss": 0.19528049230575562, + "step": 5992 + }, + { + "epoch": 1.5914221218961626, + "grad_norm": 1.2299879902160842, + "learning_rate": 2.2002964040090256e-06, + "loss": 0.22281290590763092, + "step": 5993 + }, + { + "epoch": 1.5916876908777056, + "grad_norm": 1.310771483519368, + "learning_rate": 2.1975493426869155e-06, + "loss": 0.19606761634349823, + "step": 5994 + }, + { + "epoch": 1.5919532598592485, + "grad_norm": 1.2570005315725017, + "learning_rate": 2.1948037855852733e-06, + "loss": 0.22559323906898499, + "step": 5995 + }, + { + "epoch": 1.5922188288407915, + "grad_norm": 1.2326545276620708, + "learning_rate": 2.192059733233408e-06, + "loss": 0.20417393743991852, + "step": 5996 + }, + { + "epoch": 1.5924843978223344, + "grad_norm": 1.351064737074131, + "learning_rate": 2.18931718616034e-06, + "loss": 0.2579960525035858, + "step": 5997 + }, + { + "epoch": 1.5927499668038774, + "grad_norm": 1.2980140620122547, + "learning_rate": 2.1865761448948e-06, + "loss": 0.23339781165122986, + "step": 5998 + }, + { + "epoch": 1.5930155357854203, + "grad_norm": 1.2588476812522966, + "learning_rate": 2.1838366099652274e-06, + "loss": 0.2368197739124298, + "step": 5999 + }, + { + "epoch": 1.5932811047669633, + "grad_norm": 1.2980274155826699, + "learning_rate": 2.1810985818997743e-06, + "loss": 0.2225847840309143, + "step": 6000 + }, + { + "epoch": 1.5935466737485062, + "grad_norm": 1.3094945647641514, + "learning_rate": 2.1783620612263e-06, + "loss": 0.2426701784133911, + "step": 6001 + }, + { + "epoch": 1.5938122427300492, + "grad_norm": 1.284834767608695, + "learning_rate": 2.175627048472372e-06, + "loss": 0.23647268116474152, + "step": 6002 + }, + { + "epoch": 1.5940778117115921, + "grad_norm": 1.2525920428706867, + "learning_rate": 2.1728935441652687e-06, + "loss": 0.22843337059020996, + "step": 6003 + }, + { + "epoch": 1.594343380693135, + "grad_norm": 1.1786632019087344, + "learning_rate": 2.1701615488319785e-06, + "loss": 0.21524465084075928, + "step": 6004 + }, + { + "epoch": 1.594608949674678, + "grad_norm": 1.225831889373155, + "learning_rate": 2.167431062999197e-06, + "loss": 0.2160830795764923, + "step": 6005 + }, + { + "epoch": 1.594874518656221, + "grad_norm": 1.238709201727011, + "learning_rate": 2.1647020871933288e-06, + "loss": 0.2321595996618271, + "step": 6006 + }, + { + "epoch": 1.595140087637764, + "grad_norm": 1.164283210992047, + "learning_rate": 2.1619746219404916e-06, + "loss": 0.21255026757717133, + "step": 6007 + }, + { + "epoch": 1.5954056566193069, + "grad_norm": 1.3822319128280973, + "learning_rate": 2.1592486677665047e-06, + "loss": 0.22851255536079407, + "step": 6008 + }, + { + "epoch": 1.5956712256008498, + "grad_norm": 1.3982384304626327, + "learning_rate": 2.1565242251969022e-06, + "loss": 0.23844364285469055, + "step": 6009 + }, + { + "epoch": 1.5959367945823928, + "grad_norm": 1.3184134341650149, + "learning_rate": 2.153801294756924e-06, + "loss": 0.2592385411262512, + "step": 6010 + }, + { + "epoch": 1.5962023635639357, + "grad_norm": 1.221300094567036, + "learning_rate": 2.151079876971519e-06, + "loss": 0.22163718938827515, + "step": 6011 + }, + { + "epoch": 1.5964679325454787, + "grad_norm": 1.1840952132259899, + "learning_rate": 2.1483599723653415e-06, + "loss": 0.1960998773574829, + "step": 6012 + }, + { + "epoch": 1.5967335015270216, + "grad_norm": 1.1732770789502442, + "learning_rate": 2.145641581462762e-06, + "loss": 0.20811150968074799, + "step": 6013 + }, + { + "epoch": 1.5969990705085646, + "grad_norm": 1.2065470685478314, + "learning_rate": 2.1429247047878534e-06, + "loss": 0.23184621334075928, + "step": 6014 + }, + { + "epoch": 1.5972646394901076, + "grad_norm": 1.3338850940720004, + "learning_rate": 2.1402093428643942e-06, + "loss": 0.22043758630752563, + "step": 6015 + }, + { + "epoch": 1.5975302084716505, + "grad_norm": 1.1736165993383876, + "learning_rate": 2.137495496215878e-06, + "loss": 0.18621152639389038, + "step": 6016 + }, + { + "epoch": 1.5977957774531935, + "grad_norm": 1.332636421894691, + "learning_rate": 2.1347831653654995e-06, + "loss": 0.2422473132610321, + "step": 6017 + }, + { + "epoch": 1.5980613464347364, + "grad_norm": 1.5933227500597664, + "learning_rate": 2.132072350836164e-06, + "loss": 0.2147202491760254, + "step": 6018 + }, + { + "epoch": 1.5983269154162794, + "grad_norm": 1.5455916288717333, + "learning_rate": 2.1293630531504873e-06, + "loss": 0.23091933131217957, + "step": 6019 + }, + { + "epoch": 1.5985924843978223, + "grad_norm": 1.290869089573798, + "learning_rate": 2.1266552728307876e-06, + "loss": 0.220037579536438, + "step": 6020 + }, + { + "epoch": 1.5988580533793653, + "grad_norm": 1.3343924424387823, + "learning_rate": 2.1239490103990946e-06, + "loss": 0.25520551204681396, + "step": 6021 + }, + { + "epoch": 1.5991236223609082, + "grad_norm": 1.412222062207012, + "learning_rate": 2.1212442663771427e-06, + "loss": 0.23216915130615234, + "step": 6022 + }, + { + "epoch": 1.5993891913424512, + "grad_norm": 1.381515312381825, + "learning_rate": 2.118541041286374e-06, + "loss": 0.22098806500434875, + "step": 6023 + }, + { + "epoch": 1.5996547603239941, + "grad_norm": 1.4609594644715316, + "learning_rate": 2.11583933564794e-06, + "loss": 0.261300265789032, + "step": 6024 + }, + { + "epoch": 1.599920329305537, + "grad_norm": 1.2095539498781858, + "learning_rate": 2.113139149982698e-06, + "loss": 0.20427154004573822, + "step": 6025 + }, + { + "epoch": 1.60018589828708, + "grad_norm": 1.2158101663646808, + "learning_rate": 2.110440484811209e-06, + "loss": 0.20700547099113464, + "step": 6026 + }, + { + "epoch": 1.600451467268623, + "grad_norm": 1.4331467444820847, + "learning_rate": 2.1077433406537475e-06, + "loss": 0.2789752185344696, + "step": 6027 + }, + { + "epoch": 1.600717036250166, + "grad_norm": 1.2991321976135584, + "learning_rate": 2.1050477180302885e-06, + "loss": 0.2205841988325119, + "step": 6028 + }, + { + "epoch": 1.6009826052317089, + "grad_norm": 1.3197920849647402, + "learning_rate": 2.1023536174605184e-06, + "loss": 0.24921822547912598, + "step": 6029 + }, + { + "epoch": 1.6012481742132518, + "grad_norm": 2.014197229906981, + "learning_rate": 2.0996610394638228e-06, + "loss": 0.2516329288482666, + "step": 6030 + }, + { + "epoch": 1.6015137431947948, + "grad_norm": 1.2656936665142342, + "learning_rate": 2.096969984559306e-06, + "loss": 0.21832503378391266, + "step": 6031 + }, + { + "epoch": 1.6017793121763377, + "grad_norm": 1.530808592055088, + "learning_rate": 2.094280453265769e-06, + "loss": 0.2499273419380188, + "step": 6032 + }, + { + "epoch": 1.6020448811578807, + "grad_norm": 1.167125195859278, + "learning_rate": 2.09159244610172e-06, + "loss": 0.21701282262802124, + "step": 6033 + }, + { + "epoch": 1.6023104501394236, + "grad_norm": 1.2536801575307182, + "learning_rate": 2.0889059635853783e-06, + "loss": 0.24446213245391846, + "step": 6034 + }, + { + "epoch": 1.6025760191209666, + "grad_norm": 1.412317581200794, + "learning_rate": 2.0862210062346622e-06, + "loss": 0.27299973368644714, + "step": 6035 + }, + { + "epoch": 1.6028415881025095, + "grad_norm": 1.320945278338079, + "learning_rate": 2.0835375745672027e-06, + "loss": 0.2384832501411438, + "step": 6036 + }, + { + "epoch": 1.6031071570840525, + "grad_norm": 1.340788170535406, + "learning_rate": 2.0808556691003335e-06, + "loss": 0.2563338875770569, + "step": 6037 + }, + { + "epoch": 1.6033727260655954, + "grad_norm": 1.5240284764155023, + "learning_rate": 2.0781752903510954e-06, + "loss": 0.29148975014686584, + "step": 6038 + }, + { + "epoch": 1.6036382950471384, + "grad_norm": 1.1673304070468655, + "learning_rate": 2.0754964388362264e-06, + "loss": 0.24276503920555115, + "step": 6039 + }, + { + "epoch": 1.6039038640286813, + "grad_norm": 1.2629655044665746, + "learning_rate": 2.0728191150721866e-06, + "loss": 0.1863931119441986, + "step": 6040 + }, + { + "epoch": 1.6041694330102243, + "grad_norm": 1.1731073698012655, + "learning_rate": 2.0701433195751286e-06, + "loss": 0.21270868182182312, + "step": 6041 + }, + { + "epoch": 1.6044350019917673, + "grad_norm": 1.2780583308550695, + "learning_rate": 2.0674690528609155e-06, + "loss": 0.21542516350746155, + "step": 6042 + }, + { + "epoch": 1.6047005709733102, + "grad_norm": 1.256432235067539, + "learning_rate": 2.0647963154451124e-06, + "loss": 0.23099860548973083, + "step": 6043 + }, + { + "epoch": 1.6049661399548532, + "grad_norm": 1.1769565332020941, + "learning_rate": 2.062125107842993e-06, + "loss": 0.22757291793823242, + "step": 6044 + }, + { + "epoch": 1.605231708936396, + "grad_norm": 1.317404807729369, + "learning_rate": 2.0594554305695346e-06, + "loss": 0.2370409518480301, + "step": 6045 + }, + { + "epoch": 1.605497277917939, + "grad_norm": 1.1803781252235817, + "learning_rate": 2.0567872841394186e-06, + "loss": 0.21620309352874756, + "step": 6046 + }, + { + "epoch": 1.605762846899482, + "grad_norm": 1.2191738819977833, + "learning_rate": 2.0541206690670324e-06, + "loss": 0.22821158170700073, + "step": 6047 + }, + { + "epoch": 1.606028415881025, + "grad_norm": 1.385940331470305, + "learning_rate": 2.0514555858664663e-06, + "loss": 0.24930253624916077, + "step": 6048 + }, + { + "epoch": 1.606293984862568, + "grad_norm": 1.3966922562239508, + "learning_rate": 2.048792035051521e-06, + "loss": 0.2491561770439148, + "step": 6049 + }, + { + "epoch": 1.6065595538441109, + "grad_norm": 1.3037697337655914, + "learning_rate": 2.046130017135697e-06, + "loss": 0.20652002096176147, + "step": 6050 + }, + { + "epoch": 1.6068251228256538, + "grad_norm": 1.1970911046995705, + "learning_rate": 2.0434695326321975e-06, + "loss": 0.25670793652534485, + "step": 6051 + }, + { + "epoch": 1.6070906918071968, + "grad_norm": 1.2469219040368793, + "learning_rate": 2.0408105820539328e-06, + "loss": 0.2328418493270874, + "step": 6052 + }, + { + "epoch": 1.6073562607887397, + "grad_norm": 1.2657559287734064, + "learning_rate": 2.0381531659135213e-06, + "loss": 0.20811162889003754, + "step": 6053 + }, + { + "epoch": 1.6076218297702827, + "grad_norm": 1.2637409014709644, + "learning_rate": 2.0354972847232756e-06, + "loss": 0.24068522453308105, + "step": 6054 + }, + { + "epoch": 1.6078873987518256, + "grad_norm": 1.3537388998191249, + "learning_rate": 2.032842938995221e-06, + "loss": 0.2519197463989258, + "step": 6055 + }, + { + "epoch": 1.6081529677333686, + "grad_norm": 1.349413355425799, + "learning_rate": 2.030190129241083e-06, + "loss": 0.2293267697095871, + "step": 6056 + }, + { + "epoch": 1.6084185367149118, + "grad_norm": 1.8474927483406436, + "learning_rate": 2.027538855972291e-06, + "loss": 0.22398510575294495, + "step": 6057 + }, + { + "epoch": 1.6086841056964547, + "grad_norm": 1.4186878733418118, + "learning_rate": 2.0248891196999833e-06, + "loss": 0.23074102401733398, + "step": 6058 + }, + { + "epoch": 1.6089496746779977, + "grad_norm": 1.352152679115686, + "learning_rate": 2.0222409209349957e-06, + "loss": 0.2618173658847809, + "step": 6059 + }, + { + "epoch": 1.6092152436595406, + "grad_norm": 1.2898742263880296, + "learning_rate": 2.0195942601878703e-06, + "loss": 0.25361114740371704, + "step": 6060 + }, + { + "epoch": 1.6094808126410836, + "grad_norm": 1.2270527625039152, + "learning_rate": 2.016949137968851e-06, + "loss": 0.2276519238948822, + "step": 6061 + }, + { + "epoch": 1.6097463816226265, + "grad_norm": 1.3155356069823825, + "learning_rate": 2.0143055547878863e-06, + "loss": 0.20834363996982574, + "step": 6062 + }, + { + "epoch": 1.6100119506041695, + "grad_norm": 1.348708703656222, + "learning_rate": 2.011663511154628e-06, + "loss": 0.2579394578933716, + "step": 6063 + }, + { + "epoch": 1.6102775195857124, + "grad_norm": 1.2574503425710122, + "learning_rate": 2.009023007578431e-06, + "loss": 0.22118912637233734, + "step": 6064 + }, + { + "epoch": 1.6105430885672554, + "grad_norm": 1.1631210187007555, + "learning_rate": 2.0063840445683537e-06, + "loss": 0.1881515383720398, + "step": 6065 + }, + { + "epoch": 1.6108086575487983, + "grad_norm": 1.2884662240297928, + "learning_rate": 2.003746622633155e-06, + "loss": 0.2270805984735489, + "step": 6066 + }, + { + "epoch": 1.6110742265303413, + "grad_norm": 1.4261065534360056, + "learning_rate": 2.0011107422813013e-06, + "loss": 0.26356351375579834, + "step": 6067 + }, + { + "epoch": 1.6113397955118842, + "grad_norm": 1.2506363457624738, + "learning_rate": 1.9984764040209615e-06, + "loss": 0.22937676310539246, + "step": 6068 + }, + { + "epoch": 1.6116053644934272, + "grad_norm": 1.329188800311282, + "learning_rate": 1.99584360836e-06, + "loss": 0.25062739849090576, + "step": 6069 + }, + { + "epoch": 1.6118709334749701, + "grad_norm": 1.1593663351806502, + "learning_rate": 1.993212355805989e-06, + "loss": 0.2031324952840805, + "step": 6070 + }, + { + "epoch": 1.612136502456513, + "grad_norm": 1.3722085699931008, + "learning_rate": 1.990582646866206e-06, + "loss": 0.25769656896591187, + "step": 6071 + }, + { + "epoch": 1.612402071438056, + "grad_norm": 1.3184109520906713, + "learning_rate": 1.987954482047626e-06, + "loss": 0.23856252431869507, + "step": 6072 + }, + { + "epoch": 1.612667640419599, + "grad_norm": 1.3452730145342116, + "learning_rate": 1.9853278618569284e-06, + "loss": 0.2336723804473877, + "step": 6073 + }, + { + "epoch": 1.612933209401142, + "grad_norm": 1.3427497614935235, + "learning_rate": 1.9827027868004942e-06, + "loss": 0.22327622771263123, + "step": 6074 + }, + { + "epoch": 1.6131987783826849, + "grad_norm": 1.302817235652594, + "learning_rate": 1.980079257384405e-06, + "loss": 0.26695019006729126, + "step": 6075 + }, + { + "epoch": 1.6134643473642278, + "grad_norm": 1.174792834468628, + "learning_rate": 1.9774572741144514e-06, + "loss": 0.2467387616634369, + "step": 6076 + }, + { + "epoch": 1.6137299163457708, + "grad_norm": 1.3974546997540778, + "learning_rate": 1.9748368374961193e-06, + "loss": 0.25473737716674805, + "step": 6077 + }, + { + "epoch": 1.6139954853273137, + "grad_norm": 1.295354894556923, + "learning_rate": 1.972217948034596e-06, + "loss": 0.25508594512939453, + "step": 6078 + }, + { + "epoch": 1.6142610543088567, + "grad_norm": 1.2627621502033493, + "learning_rate": 1.969600606234774e-06, + "loss": 0.23020131886005402, + "step": 6079 + }, + { + "epoch": 1.6145266232903996, + "grad_norm": 1.2036992831321345, + "learning_rate": 1.9669848126012447e-06, + "loss": 0.249805748462677, + "step": 6080 + }, + { + "epoch": 1.6147921922719426, + "grad_norm": 1.2304217597704168, + "learning_rate": 1.964370567638303e-06, + "loss": 0.2377707064151764, + "step": 6081 + }, + { + "epoch": 1.6150577612534855, + "grad_norm": 1.3812388616949685, + "learning_rate": 1.9617578718499452e-06, + "loss": 0.28656789660453796, + "step": 6082 + }, + { + "epoch": 1.6153233302350285, + "grad_norm": 1.3083477730508752, + "learning_rate": 1.9591467257398668e-06, + "loss": 0.22079989314079285, + "step": 6083 + }, + { + "epoch": 1.6155888992165715, + "grad_norm": 1.048982897357468, + "learning_rate": 1.9565371298114666e-06, + "loss": 0.1993042230606079, + "step": 6084 + }, + { + "epoch": 1.6158544681981146, + "grad_norm": 1.1837758778278344, + "learning_rate": 1.9539290845678438e-06, + "loss": 0.20818357169628143, + "step": 6085 + }, + { + "epoch": 1.6161200371796576, + "grad_norm": 1.2192677831294998, + "learning_rate": 1.9513225905117996e-06, + "loss": 0.20531761646270752, + "step": 6086 + }, + { + "epoch": 1.6163856061612005, + "grad_norm": 1.2499003349392819, + "learning_rate": 1.948717648145834e-06, + "loss": 0.23414376378059387, + "step": 6087 + }, + { + "epoch": 1.6166511751427435, + "grad_norm": 1.2073482694002922, + "learning_rate": 1.9461142579721493e-06, + "loss": 0.2025471031665802, + "step": 6088 + }, + { + "epoch": 1.6169167441242864, + "grad_norm": 1.4729414889087271, + "learning_rate": 1.943512420492649e-06, + "loss": 0.19130446016788483, + "step": 6089 + }, + { + "epoch": 1.6171823131058294, + "grad_norm": 1.1947055473554775, + "learning_rate": 1.940912136208938e-06, + "loss": 0.21637848019599915, + "step": 6090 + }, + { + "epoch": 1.6174478820873723, + "grad_norm": 1.301401884532825, + "learning_rate": 1.9383134056223176e-06, + "loss": 0.26844075322151184, + "step": 6091 + }, + { + "epoch": 1.6177134510689153, + "grad_norm": 1.1755891449306313, + "learning_rate": 1.935716229233794e-06, + "loss": 0.19573305547237396, + "step": 6092 + }, + { + "epoch": 1.6179790200504582, + "grad_norm": 1.2705214543802177, + "learning_rate": 1.93312060754407e-06, + "loss": 0.22705954313278198, + "step": 6093 + }, + { + "epoch": 1.6182445890320012, + "grad_norm": 1.279170245457384, + "learning_rate": 1.9305265410535545e-06, + "loss": 0.2505400478839874, + "step": 6094 + }, + { + "epoch": 1.6185101580135441, + "grad_norm": 1.2108711177458409, + "learning_rate": 1.927934030262353e-06, + "loss": 0.2328193187713623, + "step": 6095 + }, + { + "epoch": 1.618775726995087, + "grad_norm": 1.2588974628750198, + "learning_rate": 1.9253430756702674e-06, + "loss": 0.23876577615737915, + "step": 6096 + }, + { + "epoch": 1.61904129597663, + "grad_norm": 1.3685755624123837, + "learning_rate": 1.9227536777768063e-06, + "loss": 0.2390732318162918, + "step": 6097 + }, + { + "epoch": 1.619306864958173, + "grad_norm": 1.3858306009370809, + "learning_rate": 1.9201658370811736e-06, + "loss": 0.25231993198394775, + "step": 6098 + }, + { + "epoch": 1.619572433939716, + "grad_norm": 1.2520374949609627, + "learning_rate": 1.917579554082274e-06, + "loss": 0.21527352929115295, + "step": 6099 + }, + { + "epoch": 1.619838002921259, + "grad_norm": 1.2236250632687489, + "learning_rate": 1.9149948292787133e-06, + "loss": 0.21394580602645874, + "step": 6100 + }, + { + "epoch": 1.6201035719028019, + "grad_norm": 1.3465338603905943, + "learning_rate": 1.912411663168796e-06, + "loss": 0.26093196868896484, + "step": 6101 + }, + { + "epoch": 1.6203691408843448, + "grad_norm": 1.3518497357465815, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.2631412744522095, + "step": 6102 + }, + { + "epoch": 1.6206347098658878, + "grad_norm": 1.3007944720423297, + "learning_rate": 1.9072500090216073e-06, + "loss": 0.270250141620636, + "step": 6103 + }, + { + "epoch": 1.6209002788474307, + "grad_norm": 1.3385737712068424, + "learning_rate": 1.9046715219794397e-06, + "loss": 0.22944031655788422, + "step": 6104 + }, + { + "epoch": 1.6211658478289737, + "grad_norm": 1.2125488505372424, + "learning_rate": 1.902094595621129e-06, + "loss": 0.24429070949554443, + "step": 6105 + }, + { + "epoch": 1.6214314168105166, + "grad_norm": 1.2581532570405378, + "learning_rate": 1.8995192304434729e-06, + "loss": 0.25656238198280334, + "step": 6106 + }, + { + "epoch": 1.6216969857920596, + "grad_norm": 1.3466122688772229, + "learning_rate": 1.8969454269429743e-06, + "loss": 0.2575233280658722, + "step": 6107 + }, + { + "epoch": 1.6219625547736025, + "grad_norm": 1.245984919504028, + "learning_rate": 1.8943731856158299e-06, + "loss": 0.24881063401699066, + "step": 6108 + }, + { + "epoch": 1.6222281237551455, + "grad_norm": 1.2845731125917577, + "learning_rate": 1.8918025069579382e-06, + "loss": 0.23353847861289978, + "step": 6109 + }, + { + "epoch": 1.6224936927366884, + "grad_norm": 1.2505489106727152, + "learning_rate": 1.8892333914648953e-06, + "loss": 0.21085457503795624, + "step": 6110 + }, + { + "epoch": 1.6227592617182314, + "grad_norm": 1.4134001131082032, + "learning_rate": 1.8866658396319947e-06, + "loss": 0.28600943088531494, + "step": 6111 + }, + { + "epoch": 1.6230248306997743, + "grad_norm": 1.1689838110439057, + "learning_rate": 1.8840998519542352e-06, + "loss": 0.22580507397651672, + "step": 6112 + }, + { + "epoch": 1.6232903996813173, + "grad_norm": 1.212526750953587, + "learning_rate": 1.8815354289263066e-06, + "loss": 0.19310800731182098, + "step": 6113 + }, + { + "epoch": 1.6235559686628602, + "grad_norm": 1.3020905454433194, + "learning_rate": 1.8789725710425988e-06, + "loss": 0.21633204817771912, + "step": 6114 + }, + { + "epoch": 1.6238215376444032, + "grad_norm": 1.4315370828946672, + "learning_rate": 1.8764112787972e-06, + "loss": 0.21346023678779602, + "step": 6115 + }, + { + "epoch": 1.6240871066259461, + "grad_norm": 1.21392020481053, + "learning_rate": 1.8738515526838986e-06, + "loss": 0.21206694841384888, + "step": 6116 + }, + { + "epoch": 1.624352675607489, + "grad_norm": 1.3197096686410696, + "learning_rate": 1.8712933931961773e-06, + "loss": 0.2135339230298996, + "step": 6117 + }, + { + "epoch": 1.624618244589032, + "grad_norm": 1.2484635869956482, + "learning_rate": 1.8687368008272243e-06, + "loss": 0.2168758660554886, + "step": 6118 + }, + { + "epoch": 1.624883813570575, + "grad_norm": 1.1804251189525716, + "learning_rate": 1.866181776069914e-06, + "loss": 0.20825617015361786, + "step": 6119 + }, + { + "epoch": 1.625149382552118, + "grad_norm": 1.291082575518304, + "learning_rate": 1.863628319416826e-06, + "loss": 0.25367867946624756, + "step": 6120 + }, + { + "epoch": 1.625414951533661, + "grad_norm": 1.3053498393136334, + "learning_rate": 1.8610764313602404e-06, + "loss": 0.21604284644126892, + "step": 6121 + }, + { + "epoch": 1.6256805205152038, + "grad_norm": 1.2871138327885168, + "learning_rate": 1.8585261123921283e-06, + "loss": 0.2324865758419037, + "step": 6122 + }, + { + "epoch": 1.6259460894967468, + "grad_norm": 1.2467444217539543, + "learning_rate": 1.8559773630041632e-06, + "loss": 0.2077629417181015, + "step": 6123 + }, + { + "epoch": 1.6262116584782897, + "grad_norm": 1.1704936500874914, + "learning_rate": 1.8534301836877122e-06, + "loss": 0.19919469952583313, + "step": 6124 + }, + { + "epoch": 1.6264772274598327, + "grad_norm": 1.1998850682672693, + "learning_rate": 1.8508845749338412e-06, + "loss": 0.21069160103797913, + "step": 6125 + }, + { + "epoch": 1.6267427964413756, + "grad_norm": 1.218804714337499, + "learning_rate": 1.8483405372333152e-06, + "loss": 0.2286640703678131, + "step": 6126 + }, + { + "epoch": 1.6270083654229186, + "grad_norm": 1.33630910648056, + "learning_rate": 1.8457980710765932e-06, + "loss": 0.2430541068315506, + "step": 6127 + }, + { + "epoch": 1.6272739344044616, + "grad_norm": 1.3713498598627625, + "learning_rate": 1.8432571769538344e-06, + "loss": 0.21875709295272827, + "step": 6128 + }, + { + "epoch": 1.6275395033860045, + "grad_norm": 1.4416966555618131, + "learning_rate": 1.8407178553548876e-06, + "loss": 0.22591018676757812, + "step": 6129 + }, + { + "epoch": 1.6278050723675475, + "grad_norm": 1.362917465597037, + "learning_rate": 1.8381801067693129e-06, + "loss": 0.25429075956344604, + "step": 6130 + }, + { + "epoch": 1.6280706413490904, + "grad_norm": 1.31452454626215, + "learning_rate": 1.8356439316863528e-06, + "loss": 0.2437858283519745, + "step": 6131 + }, + { + "epoch": 1.6283362103306334, + "grad_norm": 1.2489983792436092, + "learning_rate": 1.8331093305949532e-06, + "loss": 0.24196262657642365, + "step": 6132 + }, + { + "epoch": 1.6286017793121763, + "grad_norm": 1.3756170241894088, + "learning_rate": 1.8305763039837576e-06, + "loss": 0.25779271125793457, + "step": 6133 + }, + { + "epoch": 1.6288673482937193, + "grad_norm": 1.223955710903011, + "learning_rate": 1.8280448523410987e-06, + "loss": 0.23418015241622925, + "step": 6134 + }, + { + "epoch": 1.6291329172752622, + "grad_norm": 1.3748973147827792, + "learning_rate": 1.8255149761550128e-06, + "loss": 0.2670775353908539, + "step": 6135 + }, + { + "epoch": 1.6293984862568052, + "grad_norm": 1.423176544673552, + "learning_rate": 1.822986675913231e-06, + "loss": 0.29342639446258545, + "step": 6136 + }, + { + "epoch": 1.6296640552383481, + "grad_norm": 1.244422511511833, + "learning_rate": 1.8204599521031785e-06, + "loss": 0.22768062353134155, + "step": 6137 + }, + { + "epoch": 1.629929624219891, + "grad_norm": 1.6355607569945512, + "learning_rate": 1.817934805211976e-06, + "loss": 0.23938167095184326, + "step": 6138 + }, + { + "epoch": 1.630195193201434, + "grad_norm": 1.311916117620117, + "learning_rate": 1.8154112357264474e-06, + "loss": 0.1982264518737793, + "step": 6139 + }, + { + "epoch": 1.630460762182977, + "grad_norm": 1.3026965235969699, + "learning_rate": 1.8128892441331047e-06, + "loss": 0.23591312766075134, + "step": 6140 + }, + { + "epoch": 1.63072633116452, + "grad_norm": 1.259123916156089, + "learning_rate": 1.8103688309181567e-06, + "loss": 0.20317673683166504, + "step": 6141 + }, + { + "epoch": 1.6309919001460629, + "grad_norm": 1.2846300858550195, + "learning_rate": 1.8078499965675112e-06, + "loss": 0.233676478266716, + "step": 6142 + }, + { + "epoch": 1.6312574691276058, + "grad_norm": 1.3296785293607047, + "learning_rate": 1.8053327415667688e-06, + "loss": 0.22850775718688965, + "step": 6143 + }, + { + "epoch": 1.6315230381091488, + "grad_norm": 1.2850656633806874, + "learning_rate": 1.8028170664012268e-06, + "loss": 0.2603572607040405, + "step": 6144 + }, + { + "epoch": 1.6317886070906917, + "grad_norm": 1.3208849168125785, + "learning_rate": 1.8003029715558773e-06, + "loss": 0.27881523966789246, + "step": 6145 + }, + { + "epoch": 1.6320541760722347, + "grad_norm": 1.225668329292659, + "learning_rate": 1.797790457515406e-06, + "loss": 0.21744176745414734, + "step": 6146 + }, + { + "epoch": 1.6323197450537776, + "grad_norm": 1.2220588910103882, + "learning_rate": 1.7952795247642008e-06, + "loss": 0.20449542999267578, + "step": 6147 + }, + { + "epoch": 1.6325853140353206, + "grad_norm": 1.3015735321136237, + "learning_rate": 1.7927701737863402e-06, + "loss": 0.25641053915023804, + "step": 6148 + }, + { + "epoch": 1.6328508830168635, + "grad_norm": 1.294201240106412, + "learning_rate": 1.7902624050655914e-06, + "loss": 0.23583751916885376, + "step": 6149 + }, + { + "epoch": 1.6331164519984065, + "grad_norm": 1.4310897316272893, + "learning_rate": 1.787756219085427e-06, + "loss": 0.2709866762161255, + "step": 6150 + }, + { + "epoch": 1.6333820209799494, + "grad_norm": 1.2536554341378991, + "learning_rate": 1.785251616329009e-06, + "loss": 0.233103945851326, + "step": 6151 + }, + { + "epoch": 1.6336475899614924, + "grad_norm": 1.2660813048243769, + "learning_rate": 1.7827485972791957e-06, + "loss": 0.2665184438228607, + "step": 6152 + }, + { + "epoch": 1.6339131589430353, + "grad_norm": 1.2551185732946457, + "learning_rate": 1.7802471624185392e-06, + "loss": 0.20934605598449707, + "step": 6153 + }, + { + "epoch": 1.6341787279245783, + "grad_norm": 1.2179362426676639, + "learning_rate": 1.7777473122292866e-06, + "loss": 0.2102464735507965, + "step": 6154 + }, + { + "epoch": 1.6344442969061213, + "grad_norm": 1.2289784110367914, + "learning_rate": 1.7752490471933769e-06, + "loss": 0.22889986634254456, + "step": 6155 + }, + { + "epoch": 1.6347098658876642, + "grad_norm": 1.3627659705359922, + "learning_rate": 1.772752367792452e-06, + "loss": 0.2261584997177124, + "step": 6156 + }, + { + "epoch": 1.6349754348692072, + "grad_norm": 1.2186249427048736, + "learning_rate": 1.7702572745078395e-06, + "loss": 0.21456710994243622, + "step": 6157 + }, + { + "epoch": 1.63524100385075, + "grad_norm": 1.1535452073956258, + "learning_rate": 1.7677637678205627e-06, + "loss": 0.22762097418308258, + "step": 6158 + }, + { + "epoch": 1.635506572832293, + "grad_norm": 1.306484526102534, + "learning_rate": 1.7652718482113417e-06, + "loss": 0.24772633612155914, + "step": 6159 + }, + { + "epoch": 1.635772141813836, + "grad_norm": 1.3290630048425123, + "learning_rate": 1.7627815161605887e-06, + "loss": 0.22980757057666779, + "step": 6160 + }, + { + "epoch": 1.636037710795379, + "grad_norm": 1.1593602123779645, + "learning_rate": 1.760292772148411e-06, + "loss": 0.19560125470161438, + "step": 6161 + }, + { + "epoch": 1.636303279776922, + "grad_norm": 1.388673809129743, + "learning_rate": 1.7578056166546086e-06, + "loss": 0.23733064532279968, + "step": 6162 + }, + { + "epoch": 1.6365688487584649, + "grad_norm": 1.2026681813349183, + "learning_rate": 1.7553200501586743e-06, + "loss": 0.21064560115337372, + "step": 6163 + }, + { + "epoch": 1.6368344177400078, + "grad_norm": 1.3444341606502546, + "learning_rate": 1.7528360731397986e-06, + "loss": 0.26709994673728943, + "step": 6164 + }, + { + "epoch": 1.6370999867215508, + "grad_norm": 1.2755110888757868, + "learning_rate": 1.750353686076861e-06, + "loss": 0.26555943489074707, + "step": 6165 + }, + { + "epoch": 1.6373655557030937, + "grad_norm": 1.3299250322981557, + "learning_rate": 1.7478728894484375e-06, + "loss": 0.24480760097503662, + "step": 6166 + }, + { + "epoch": 1.6376311246846367, + "grad_norm": 1.2560095314061934, + "learning_rate": 1.7453936837327967e-06, + "loss": 0.2170884907245636, + "step": 6167 + }, + { + "epoch": 1.6378966936661796, + "grad_norm": 1.340756013397369, + "learning_rate": 1.7429160694078983e-06, + "loss": 0.24728982150554657, + "step": 6168 + }, + { + "epoch": 1.6381622626477228, + "grad_norm": 1.1911402182063675, + "learning_rate": 1.7404400469513994e-06, + "loss": 0.20886945724487305, + "step": 6169 + }, + { + "epoch": 1.6384278316292658, + "grad_norm": 1.2150445755778985, + "learning_rate": 1.7379656168406467e-06, + "loss": 0.1892474740743637, + "step": 6170 + }, + { + "epoch": 1.6386934006108087, + "grad_norm": 1.3004801024505461, + "learning_rate": 1.7354927795526821e-06, + "loss": 0.24953782558441162, + "step": 6171 + }, + { + "epoch": 1.6389589695923517, + "grad_norm": 1.2292705802712374, + "learning_rate": 1.7330215355642377e-06, + "loss": 0.2311600148677826, + "step": 6172 + }, + { + "epoch": 1.6392245385738946, + "grad_norm": 1.2596864005467026, + "learning_rate": 1.73055188535174e-06, + "loss": 0.24018675088882446, + "step": 6173 + }, + { + "epoch": 1.6394901075554376, + "grad_norm": 1.3394449685829455, + "learning_rate": 1.7280838293913116e-06, + "loss": 0.22607022523880005, + "step": 6174 + }, + { + "epoch": 1.6397556765369805, + "grad_norm": 1.2860534255043978, + "learning_rate": 1.7256173681587619e-06, + "loss": 0.23725482821464539, + "step": 6175 + }, + { + "epoch": 1.6400212455185235, + "grad_norm": 1.2500709715234832, + "learning_rate": 1.723152502129597e-06, + "loss": 0.241235613822937, + "step": 6176 + }, + { + "epoch": 1.6402868145000664, + "grad_norm": 1.2070755501863832, + "learning_rate": 1.7206892317790136e-06, + "loss": 0.2150690108537674, + "step": 6177 + }, + { + "epoch": 1.6405523834816094, + "grad_norm": 1.2557873581014805, + "learning_rate": 1.7182275575819007e-06, + "loss": 0.22133421897888184, + "step": 6178 + }, + { + "epoch": 1.6408179524631523, + "grad_norm": 1.1297884729403, + "learning_rate": 1.7157674800128399e-06, + "loss": 0.1937463879585266, + "step": 6179 + }, + { + "epoch": 1.6410835214446953, + "grad_norm": 1.0851305240668396, + "learning_rate": 1.7133089995461062e-06, + "loss": 0.18938027322292328, + "step": 6180 + }, + { + "epoch": 1.6413490904262382, + "grad_norm": 1.2621430482402598, + "learning_rate": 1.7108521166556646e-06, + "loss": 0.23577997088432312, + "step": 6181 + }, + { + "epoch": 1.6416146594077812, + "grad_norm": 1.2915526813468403, + "learning_rate": 1.7083968318151734e-06, + "loss": 0.2712448537349701, + "step": 6182 + }, + { + "epoch": 1.6418802283893241, + "grad_norm": 1.276409938985324, + "learning_rate": 1.7059431454979825e-06, + "loss": 0.24242255091667175, + "step": 6183 + }, + { + "epoch": 1.642145797370867, + "grad_norm": 1.3152058895449834, + "learning_rate": 1.7034910581771347e-06, + "loss": 0.22521010041236877, + "step": 6184 + }, + { + "epoch": 1.64241136635241, + "grad_norm": 1.3840145244958133, + "learning_rate": 1.7010405703253618e-06, + "loss": 0.22026273608207703, + "step": 6185 + }, + { + "epoch": 1.642676935333953, + "grad_norm": 1.458737402535225, + "learning_rate": 1.6985916824150894e-06, + "loss": 0.22726528346538544, + "step": 6186 + }, + { + "epoch": 1.642942504315496, + "grad_norm": 1.3396783040947258, + "learning_rate": 1.6961443949184353e-06, + "loss": 0.25172409415245056, + "step": 6187 + }, + { + "epoch": 1.6432080732970389, + "grad_norm": 1.1393591185728944, + "learning_rate": 1.6936987083072065e-06, + "loss": 0.21173113584518433, + "step": 6188 + }, + { + "epoch": 1.6434736422785818, + "grad_norm": 1.3589729407555038, + "learning_rate": 1.6912546230529036e-06, + "loss": 0.22596749663352966, + "step": 6189 + }, + { + "epoch": 1.6437392112601248, + "grad_norm": 1.3604263454917045, + "learning_rate": 1.6888121396267166e-06, + "loss": 0.2749077081680298, + "step": 6190 + }, + { + "epoch": 1.6440047802416677, + "grad_norm": 2.5555069132462283, + "learning_rate": 1.6863712584995252e-06, + "loss": 0.22150780260562897, + "step": 6191 + }, + { + "epoch": 1.6442703492232107, + "grad_norm": 1.2838243253096144, + "learning_rate": 1.6839319801419073e-06, + "loss": 0.23437368869781494, + "step": 6192 + }, + { + "epoch": 1.6445359182047536, + "grad_norm": 1.3069256977628543, + "learning_rate": 1.681494305024125e-06, + "loss": 0.22949008643627167, + "step": 6193 + }, + { + "epoch": 1.6448014871862966, + "grad_norm": 1.2956112975441718, + "learning_rate": 1.6790582336161332e-06, + "loss": 0.24147525429725647, + "step": 6194 + }, + { + "epoch": 1.6450670561678395, + "grad_norm": 1.180082798545332, + "learning_rate": 1.6766237663875773e-06, + "loss": 0.2001456618309021, + "step": 6195 + }, + { + "epoch": 1.6453326251493825, + "grad_norm": 1.2710753216206616, + "learning_rate": 1.674190903807794e-06, + "loss": 0.17668186128139496, + "step": 6196 + }, + { + "epoch": 1.6455981941309257, + "grad_norm": 1.369840319031622, + "learning_rate": 1.6717596463458107e-06, + "loss": 0.24585255980491638, + "step": 6197 + }, + { + "epoch": 1.6458637631124686, + "grad_norm": 1.2328642285488454, + "learning_rate": 1.6693299944703479e-06, + "loss": 0.2234572172164917, + "step": 6198 + }, + { + "epoch": 1.6461293320940116, + "grad_norm": 1.2369910191993496, + "learning_rate": 1.6669019486498083e-06, + "loss": 0.2007240653038025, + "step": 6199 + }, + { + "epoch": 1.6463949010755545, + "grad_norm": 1.317383450933259, + "learning_rate": 1.6644755093522913e-06, + "loss": 0.21926215291023254, + "step": 6200 + }, + { + "epoch": 1.6466604700570975, + "grad_norm": 1.3404302006039666, + "learning_rate": 1.662050677045589e-06, + "loss": 0.24797898530960083, + "step": 6201 + }, + { + "epoch": 1.6469260390386404, + "grad_norm": 1.285343354391859, + "learning_rate": 1.65962745219718e-06, + "loss": 0.22087037563323975, + "step": 6202 + }, + { + "epoch": 1.6471916080201834, + "grad_norm": 1.2765781805195457, + "learning_rate": 1.6572058352742327e-06, + "loss": 0.23073960840702057, + "step": 6203 + }, + { + "epoch": 1.6474571770017263, + "grad_norm": 1.3644493807061109, + "learning_rate": 1.6547858267436056e-06, + "loss": 0.2430298924446106, + "step": 6204 + }, + { + "epoch": 1.6477227459832693, + "grad_norm": 1.286198443262182, + "learning_rate": 1.6523674270718493e-06, + "loss": 0.23337247967720032, + "step": 6205 + }, + { + "epoch": 1.6479883149648122, + "grad_norm": 1.2144238817830517, + "learning_rate": 1.6499506367252016e-06, + "loss": 0.22141093015670776, + "step": 6206 + }, + { + "epoch": 1.6482538839463552, + "grad_norm": 1.280282959866893, + "learning_rate": 1.647535456169591e-06, + "loss": 0.23247988522052765, + "step": 6207 + }, + { + "epoch": 1.6485194529278981, + "grad_norm": 1.3728921390628253, + "learning_rate": 1.6451218858706374e-06, + "loss": 0.2659391760826111, + "step": 6208 + }, + { + "epoch": 1.648785021909441, + "grad_norm": 1.2534645715863684, + "learning_rate": 1.642709926293644e-06, + "loss": 0.2154998630285263, + "step": 6209 + }, + { + "epoch": 1.649050590890984, + "grad_norm": 1.322825591754104, + "learning_rate": 1.6402995779036146e-06, + "loss": 0.20363599061965942, + "step": 6210 + }, + { + "epoch": 1.649316159872527, + "grad_norm": 1.3775669953664806, + "learning_rate": 1.6378908411652328e-06, + "loss": 0.23388779163360596, + "step": 6211 + }, + { + "epoch": 1.64958172885407, + "grad_norm": 1.205059730534318, + "learning_rate": 1.6354837165428772e-06, + "loss": 0.20465341210365295, + "step": 6212 + }, + { + "epoch": 1.649847297835613, + "grad_norm": 1.2409004364034002, + "learning_rate": 1.6330782045006088e-06, + "loss": 0.2233584225177765, + "step": 6213 + }, + { + "epoch": 1.6501128668171559, + "grad_norm": 1.313264623251788, + "learning_rate": 1.6306743055021834e-06, + "loss": 0.2880077064037323, + "step": 6214 + }, + { + "epoch": 1.6503784357986988, + "grad_norm": 1.2769524753658168, + "learning_rate": 1.6282720200110458e-06, + "loss": 0.23332230746746063, + "step": 6215 + }, + { + "epoch": 1.6506440047802418, + "grad_norm": 1.2682336609825682, + "learning_rate": 1.6258713484903266e-06, + "loss": 0.22191204130649567, + "step": 6216 + }, + { + "epoch": 1.6509095737617847, + "grad_norm": 1.2899982671052521, + "learning_rate": 1.6234722914028478e-06, + "loss": 0.2403659224510193, + "step": 6217 + }, + { + "epoch": 1.6511751427433277, + "grad_norm": 1.2823746538865957, + "learning_rate": 1.6210748492111161e-06, + "loss": 0.2230256348848343, + "step": 6218 + }, + { + "epoch": 1.6514407117248706, + "grad_norm": 1.233703409456991, + "learning_rate": 1.6186790223773375e-06, + "loss": 0.2086302787065506, + "step": 6219 + }, + { + "epoch": 1.6517062807064136, + "grad_norm": 1.2696219439991872, + "learning_rate": 1.6162848113633934e-06, + "loss": 0.22336703538894653, + "step": 6220 + }, + { + "epoch": 1.6519718496879565, + "grad_norm": 1.2026474951561137, + "learning_rate": 1.6138922166308613e-06, + "loss": 0.2354746013879776, + "step": 6221 + }, + { + "epoch": 1.6522374186694995, + "grad_norm": 1.212799588563382, + "learning_rate": 1.6115012386410045e-06, + "loss": 0.23983564972877502, + "step": 6222 + }, + { + "epoch": 1.6525029876510424, + "grad_norm": 1.3394195242071623, + "learning_rate": 1.6091118778547765e-06, + "loss": 0.25468897819519043, + "step": 6223 + }, + { + "epoch": 1.6527685566325854, + "grad_norm": 1.2085737685975797, + "learning_rate": 1.6067241347328166e-06, + "loss": 0.2225346863269806, + "step": 6224 + }, + { + "epoch": 1.6530341256141283, + "grad_norm": 1.4474708027397767, + "learning_rate": 1.6043380097354543e-06, + "loss": 0.28801992535591125, + "step": 6225 + }, + { + "epoch": 1.6532996945956713, + "grad_norm": 1.1308003259460488, + "learning_rate": 1.6019535033227063e-06, + "loss": 0.1869816929101944, + "step": 6226 + }, + { + "epoch": 1.6535652635772142, + "grad_norm": 1.3022141110443597, + "learning_rate": 1.5995706159542768e-06, + "loss": 0.2569049894809723, + "step": 6227 + }, + { + "epoch": 1.6538308325587572, + "grad_norm": 1.2689496619282572, + "learning_rate": 1.5971893480895583e-06, + "loss": 0.19138488173484802, + "step": 6228 + }, + { + "epoch": 1.6540964015403001, + "grad_norm": 1.2583553251304942, + "learning_rate": 1.5948097001876318e-06, + "loss": 0.23107777535915375, + "step": 6229 + }, + { + "epoch": 1.654361970521843, + "grad_norm": 1.4140324563807463, + "learning_rate": 1.5924316727072652e-06, + "loss": 0.21682313084602356, + "step": 6230 + }, + { + "epoch": 1.654627539503386, + "grad_norm": 1.6445896965406597, + "learning_rate": 1.5900552661069135e-06, + "loss": 0.27629974484443665, + "step": 6231 + }, + { + "epoch": 1.654893108484929, + "grad_norm": 1.2060133562172235, + "learning_rate": 1.587680480844721e-06, + "loss": 0.21919876337051392, + "step": 6232 + }, + { + "epoch": 1.655158677466472, + "grad_norm": 1.4827934801999716, + "learning_rate": 1.5853073173785183e-06, + "loss": 0.2556184232234955, + "step": 6233 + }, + { + "epoch": 1.655424246448015, + "grad_norm": 1.1362954303327644, + "learning_rate": 1.5829357761658214e-06, + "loss": 0.1904449462890625, + "step": 6234 + }, + { + "epoch": 1.6556898154295578, + "grad_norm": 1.2410374365127181, + "learning_rate": 1.5805658576638372e-06, + "loss": 0.1991434246301651, + "step": 6235 + }, + { + "epoch": 1.6559553844111008, + "grad_norm": 1.4428347821081515, + "learning_rate": 1.5781975623294554e-06, + "loss": 0.2609177231788635, + "step": 6236 + }, + { + "epoch": 1.6562209533926437, + "grad_norm": 1.276051044481299, + "learning_rate": 1.575830890619261e-06, + "loss": 0.2481592893600464, + "step": 6237 + }, + { + "epoch": 1.6564865223741867, + "grad_norm": 1.2930470444266673, + "learning_rate": 1.5734658429895156e-06, + "loss": 0.23855090141296387, + "step": 6238 + }, + { + "epoch": 1.6567520913557297, + "grad_norm": 1.326739898505445, + "learning_rate": 1.5711024198961745e-06, + "loss": 0.2480623573064804, + "step": 6239 + }, + { + "epoch": 1.6570176603372726, + "grad_norm": 1.4145385747738486, + "learning_rate": 1.5687406217948775e-06, + "loss": 0.2504739463329315, + "step": 6240 + }, + { + "epoch": 1.6572832293188156, + "grad_norm": 1.1843269954841462, + "learning_rate": 1.5663804491409506e-06, + "loss": 0.2068580538034439, + "step": 6241 + }, + { + "epoch": 1.6575487983003585, + "grad_norm": 1.45151426190796, + "learning_rate": 1.5640219023894077e-06, + "loss": 0.2448163628578186, + "step": 6242 + }, + { + "epoch": 1.6578143672819015, + "grad_norm": 1.3391765527579818, + "learning_rate": 1.5616649819949492e-06, + "loss": 0.2514716386795044, + "step": 6243 + }, + { + "epoch": 1.6580799362634444, + "grad_norm": 1.1884099966156902, + "learning_rate": 1.559309688411962e-06, + "loss": 0.2067629098892212, + "step": 6244 + }, + { + "epoch": 1.6583455052449874, + "grad_norm": 1.2042735442206352, + "learning_rate": 1.5569560220945168e-06, + "loss": 0.22909750044345856, + "step": 6245 + }, + { + "epoch": 1.6586110742265303, + "grad_norm": 1.4646403481954997, + "learning_rate": 1.5546039834963745e-06, + "loss": 0.203629732131958, + "step": 6246 + }, + { + "epoch": 1.6588766432080733, + "grad_norm": 1.2050936311763847, + "learning_rate": 1.552253573070981e-06, + "loss": 0.21919086575508118, + "step": 6247 + }, + { + "epoch": 1.6591422121896162, + "grad_norm": 1.4379501702554756, + "learning_rate": 1.549904791271466e-06, + "loss": 0.2535661458969116, + "step": 6248 + }, + { + "epoch": 1.6594077811711592, + "grad_norm": 1.2609582047884877, + "learning_rate": 1.5475576385506475e-06, + "loss": 0.224460631608963, + "step": 6249 + }, + { + "epoch": 1.6596733501527021, + "grad_norm": 1.2625738742925756, + "learning_rate": 1.5452121153610288e-06, + "loss": 0.21925818920135498, + "step": 6250 + }, + { + "epoch": 1.659938919134245, + "grad_norm": 1.2787763694898493, + "learning_rate": 1.5428682221547997e-06, + "loss": 0.2100696563720703, + "step": 6251 + }, + { + "epoch": 1.660204488115788, + "grad_norm": 1.3484219674096825, + "learning_rate": 1.540525959383834e-06, + "loss": 0.25982293486595154, + "step": 6252 + }, + { + "epoch": 1.660470057097331, + "grad_norm": 1.2527966644905648, + "learning_rate": 1.538185327499694e-06, + "loss": 0.23615162074565887, + "step": 6253 + }, + { + "epoch": 1.660735626078874, + "grad_norm": 1.2738910414784854, + "learning_rate": 1.5358463269536218e-06, + "loss": 0.2454022467136383, + "step": 6254 + }, + { + "epoch": 1.6610011950604169, + "grad_norm": 1.3825181535789863, + "learning_rate": 1.5335089581965556e-06, + "loss": 0.2330605536699295, + "step": 6255 + }, + { + "epoch": 1.6612667640419598, + "grad_norm": 1.2169082012465264, + "learning_rate": 1.5311732216791087e-06, + "loss": 0.23193006217479706, + "step": 6256 + }, + { + "epoch": 1.6615323330235028, + "grad_norm": 1.2690481284418431, + "learning_rate": 1.5288391178515838e-06, + "loss": 0.23254770040512085, + "step": 6257 + }, + { + "epoch": 1.6617979020050457, + "grad_norm": 1.2246821396199268, + "learning_rate": 1.5265066471639701e-06, + "loss": 0.23240572214126587, + "step": 6258 + }, + { + "epoch": 1.6620634709865887, + "grad_norm": 1.3414134094293932, + "learning_rate": 1.5241758100659386e-06, + "loss": 0.2765730619430542, + "step": 6259 + }, + { + "epoch": 1.6623290399681316, + "grad_norm": 1.2956291225041994, + "learning_rate": 1.5218466070068472e-06, + "loss": 0.26366496086120605, + "step": 6260 + }, + { + "epoch": 1.6625946089496746, + "grad_norm": 1.240730160583952, + "learning_rate": 1.5195190384357405e-06, + "loss": 0.22322653234004974, + "step": 6261 + }, + { + "epoch": 1.6628601779312175, + "grad_norm": 1.2433877123660553, + "learning_rate": 1.5171931048013466e-06, + "loss": 0.24144116044044495, + "step": 6262 + }, + { + "epoch": 1.6631257469127605, + "grad_norm": 1.3783130308299147, + "learning_rate": 1.5148688065520734e-06, + "loss": 0.24559618532657623, + "step": 6263 + }, + { + "epoch": 1.6633913158943034, + "grad_norm": 1.3258590224160887, + "learning_rate": 1.5125461441360223e-06, + "loss": 0.24337056279182434, + "step": 6264 + }, + { + "epoch": 1.6636568848758464, + "grad_norm": 1.3292875380649603, + "learning_rate": 1.5102251180009752e-06, + "loss": 0.2733612358570099, + "step": 6265 + }, + { + "epoch": 1.6639224538573893, + "grad_norm": 1.2329811544038785, + "learning_rate": 1.5079057285943976e-06, + "loss": 0.2116459757089615, + "step": 6266 + }, + { + "epoch": 1.6641880228389323, + "grad_norm": 1.2335642813115397, + "learning_rate": 1.5055879763634407e-06, + "loss": 0.21221664547920227, + "step": 6267 + }, + { + "epoch": 1.6644535918204753, + "grad_norm": 1.2500150658336624, + "learning_rate": 1.503271861754939e-06, + "loss": 0.21166589856147766, + "step": 6268 + }, + { + "epoch": 1.6647191608020182, + "grad_norm": 1.5113123418333367, + "learning_rate": 1.5009573852154136e-06, + "loss": 0.2652161121368408, + "step": 6269 + }, + { + "epoch": 1.6649847297835612, + "grad_norm": 1.262834880378694, + "learning_rate": 1.4986445471910672e-06, + "loss": 0.22142267227172852, + "step": 6270 + }, + { + "epoch": 1.665250298765104, + "grad_norm": 1.4442965183949772, + "learning_rate": 1.4963333481277874e-06, + "loss": 0.2307332456111908, + "step": 6271 + }, + { + "epoch": 1.665515867746647, + "grad_norm": 1.411326986781179, + "learning_rate": 1.494023788471144e-06, + "loss": 0.2669411897659302, + "step": 6272 + }, + { + "epoch": 1.66578143672819, + "grad_norm": 1.2823998109594834, + "learning_rate": 1.4917158686663992e-06, + "loss": 0.2468804121017456, + "step": 6273 + }, + { + "epoch": 1.666047005709733, + "grad_norm": 1.2639666166307362, + "learning_rate": 1.4894095891584882e-06, + "loss": 0.24152463674545288, + "step": 6274 + }, + { + "epoch": 1.666312574691276, + "grad_norm": 1.098201760932299, + "learning_rate": 1.4871049503920353e-06, + "loss": 0.1966545283794403, + "step": 6275 + }, + { + "epoch": 1.6665781436728189, + "grad_norm": 1.2773845282560163, + "learning_rate": 1.4848019528113477e-06, + "loss": 0.24772626161575317, + "step": 6276 + }, + { + "epoch": 1.6668437126543618, + "grad_norm": 1.3731672204722256, + "learning_rate": 1.4825005968604189e-06, + "loss": 0.22138851881027222, + "step": 6277 + }, + { + "epoch": 1.6671092816359048, + "grad_norm": 1.2245583238686863, + "learning_rate": 1.4802008829829172e-06, + "loss": 0.24345465004444122, + "step": 6278 + }, + { + "epoch": 1.6673748506174477, + "grad_norm": 1.3209828849983516, + "learning_rate": 1.477902811622205e-06, + "loss": 0.22862716019153595, + "step": 6279 + }, + { + "epoch": 1.6676404195989907, + "grad_norm": 1.2914770883474422, + "learning_rate": 1.4756063832213207e-06, + "loss": 0.2763083577156067, + "step": 6280 + }, + { + "epoch": 1.6679059885805336, + "grad_norm": 1.3142139937070516, + "learning_rate": 1.4733115982229885e-06, + "loss": 0.24631357192993164, + "step": 6281 + }, + { + "epoch": 1.6681715575620768, + "grad_norm": 1.322429969576976, + "learning_rate": 1.4710184570696184e-06, + "loss": 0.22650030255317688, + "step": 6282 + }, + { + "epoch": 1.6684371265436198, + "grad_norm": 1.3243342318873437, + "learning_rate": 1.4687269602033006e-06, + "loss": 0.2455909103155136, + "step": 6283 + }, + { + "epoch": 1.6687026955251627, + "grad_norm": 1.3711517369784783, + "learning_rate": 1.4664371080658079e-06, + "loss": 0.25625506043434143, + "step": 6284 + }, + { + "epoch": 1.6689682645067057, + "grad_norm": 1.1450036681372322, + "learning_rate": 1.4641489010985954e-06, + "loss": 0.22178369760513306, + "step": 6285 + }, + { + "epoch": 1.6692338334882486, + "grad_norm": 1.2644620602089436, + "learning_rate": 1.4618623397428055e-06, + "loss": 0.23936234414577484, + "step": 6286 + }, + { + "epoch": 1.6694994024697916, + "grad_norm": 1.2667144776178243, + "learning_rate": 1.459577424439258e-06, + "loss": 0.21629829704761505, + "step": 6287 + }, + { + "epoch": 1.6697649714513345, + "grad_norm": 1.3486786043134158, + "learning_rate": 1.457294155628457e-06, + "loss": 0.238427072763443, + "step": 6288 + }, + { + "epoch": 1.6700305404328775, + "grad_norm": 1.412674472973442, + "learning_rate": 1.4550125337505926e-06, + "loss": 0.23168250918388367, + "step": 6289 + }, + { + "epoch": 1.6702961094144204, + "grad_norm": 1.3185872633193214, + "learning_rate": 1.45273255924553e-06, + "loss": 0.25518402457237244, + "step": 6290 + }, + { + "epoch": 1.6705616783959634, + "grad_norm": 1.2092220747685465, + "learning_rate": 1.450454232552826e-06, + "loss": 0.2488553822040558, + "step": 6291 + }, + { + "epoch": 1.6708272473775063, + "grad_norm": 1.4309048190710245, + "learning_rate": 1.448177554111716e-06, + "loss": 0.2684085965156555, + "step": 6292 + }, + { + "epoch": 1.6710928163590493, + "grad_norm": 1.3645105519242562, + "learning_rate": 1.4459025243611124e-06, + "loss": 0.24627447128295898, + "step": 6293 + }, + { + "epoch": 1.6713583853405922, + "grad_norm": 1.2960987120962004, + "learning_rate": 1.4436291437396156e-06, + "loss": 0.24725376069545746, + "step": 6294 + }, + { + "epoch": 1.6716239543221352, + "grad_norm": 1.2752333210419433, + "learning_rate": 1.4413574126855067e-06, + "loss": 0.23488914966583252, + "step": 6295 + }, + { + "epoch": 1.6718895233036781, + "grad_norm": 1.2385365684534737, + "learning_rate": 1.4390873316367492e-06, + "loss": 0.2031177133321762, + "step": 6296 + }, + { + "epoch": 1.672155092285221, + "grad_norm": 1.265889760948498, + "learning_rate": 1.4368189010309874e-06, + "loss": 0.25378018617630005, + "step": 6297 + }, + { + "epoch": 1.672420661266764, + "grad_norm": 1.2443137764428682, + "learning_rate": 1.434552121305548e-06, + "loss": 0.21305282413959503, + "step": 6298 + }, + { + "epoch": 1.672686230248307, + "grad_norm": 1.1925787762252436, + "learning_rate": 1.432286992897437e-06, + "loss": 0.20908987522125244, + "step": 6299 + }, + { + "epoch": 1.67295179922985, + "grad_norm": 1.2228377563088515, + "learning_rate": 1.4300235162433496e-06, + "loss": 0.21945340931415558, + "step": 6300 + }, + { + "epoch": 1.6732173682113929, + "grad_norm": 1.3659267409445854, + "learning_rate": 1.4277616917796544e-06, + "loss": 0.22096669673919678, + "step": 6301 + }, + { + "epoch": 1.6734829371929358, + "grad_norm": 1.2773291306452106, + "learning_rate": 1.425501519942406e-06, + "loss": 0.2233850657939911, + "step": 6302 + }, + { + "epoch": 1.6737485061744788, + "grad_norm": 1.2672720076411363, + "learning_rate": 1.423243001167337e-06, + "loss": 0.21432995796203613, + "step": 6303 + }, + { + "epoch": 1.6740140751560217, + "grad_norm": 1.3864014459258447, + "learning_rate": 1.4209861358898636e-06, + "loss": 0.2649557590484619, + "step": 6304 + }, + { + "epoch": 1.6742796441375647, + "grad_norm": 1.2642836811067808, + "learning_rate": 1.418730924545083e-06, + "loss": 0.24918347597122192, + "step": 6305 + }, + { + "epoch": 1.6745452131191076, + "grad_norm": 1.3089175693989048, + "learning_rate": 1.4164773675677745e-06, + "loss": 0.24121029675006866, + "step": 6306 + }, + { + "epoch": 1.6748107821006506, + "grad_norm": 1.2569762960026158, + "learning_rate": 1.4142254653923949e-06, + "loss": 0.24401789903640747, + "step": 6307 + }, + { + "epoch": 1.6750763510821935, + "grad_norm": 1.3272546708188746, + "learning_rate": 1.4119752184530867e-06, + "loss": 0.2374853938817978, + "step": 6308 + }, + { + "epoch": 1.6753419200637365, + "grad_norm": 1.2973848864698938, + "learning_rate": 1.4097266271836695e-06, + "loss": 0.2351088970899582, + "step": 6309 + }, + { + "epoch": 1.6756074890452797, + "grad_norm": 1.301417674196528, + "learning_rate": 1.407479692017647e-06, + "loss": 0.19560754299163818, + "step": 6310 + }, + { + "epoch": 1.6758730580268226, + "grad_norm": 1.390250023674765, + "learning_rate": 1.405234413388199e-06, + "loss": 0.24124252796173096, + "step": 6311 + }, + { + "epoch": 1.6761386270083656, + "grad_norm": 1.3742469305206364, + "learning_rate": 1.4029907917281903e-06, + "loss": 0.2208215445280075, + "step": 6312 + }, + { + "epoch": 1.6764041959899085, + "grad_norm": 1.2125662977366807, + "learning_rate": 1.4007488274701653e-06, + "loss": 0.23888292908668518, + "step": 6313 + }, + { + "epoch": 1.6766697649714515, + "grad_norm": 1.2936432356109655, + "learning_rate": 1.3985085210463479e-06, + "loss": 0.24079063534736633, + "step": 6314 + }, + { + "epoch": 1.6769353339529944, + "grad_norm": 1.2011852751375642, + "learning_rate": 1.3962698728886414e-06, + "loss": 0.18975606560707092, + "step": 6315 + }, + { + "epoch": 1.6772009029345374, + "grad_norm": 1.322599968285396, + "learning_rate": 1.3940328834286333e-06, + "loss": 0.201214998960495, + "step": 6316 + }, + { + "epoch": 1.6774664719160803, + "grad_norm": 1.2090909210103018, + "learning_rate": 1.3917975530975836e-06, + "loss": 0.20079322159290314, + "step": 6317 + }, + { + "epoch": 1.6777320408976233, + "grad_norm": 1.2732868066143843, + "learning_rate": 1.3895638823264447e-06, + "loss": 0.23593586683273315, + "step": 6318 + }, + { + "epoch": 1.6779976098791662, + "grad_norm": 1.3931846809533017, + "learning_rate": 1.3873318715458383e-06, + "loss": 0.26574259996414185, + "step": 6319 + }, + { + "epoch": 1.6782631788607092, + "grad_norm": 1.252943610173436, + "learning_rate": 1.3851015211860696e-06, + "loss": 0.20573323965072632, + "step": 6320 + }, + { + "epoch": 1.6785287478422521, + "grad_norm": 1.4484920974875073, + "learning_rate": 1.3828728316771244e-06, + "loss": 0.25610506534576416, + "step": 6321 + }, + { + "epoch": 1.678794316823795, + "grad_norm": 1.330338299337135, + "learning_rate": 1.380645803448668e-06, + "loss": 0.2138693630695343, + "step": 6322 + }, + { + "epoch": 1.679059885805338, + "grad_norm": 1.1479105398064924, + "learning_rate": 1.3784204369300447e-06, + "loss": 0.21522866189479828, + "step": 6323 + }, + { + "epoch": 1.679325454786881, + "grad_norm": 1.441538971613898, + "learning_rate": 1.376196732550279e-06, + "loss": 0.25622743368148804, + "step": 6324 + }, + { + "epoch": 1.679591023768424, + "grad_norm": 1.354050705773023, + "learning_rate": 1.3739746907380757e-06, + "loss": 0.18025386333465576, + "step": 6325 + }, + { + "epoch": 1.679856592749967, + "grad_norm": 1.1665775097977176, + "learning_rate": 1.3717543119218168e-06, + "loss": 0.18785078823566437, + "step": 6326 + }, + { + "epoch": 1.6801221617315099, + "grad_norm": 1.3771154706722653, + "learning_rate": 1.3695355965295653e-06, + "loss": 0.24682481586933136, + "step": 6327 + }, + { + "epoch": 1.6803877307130528, + "grad_norm": 1.2994385931646761, + "learning_rate": 1.3673185449890647e-06, + "loss": 0.2193487137556076, + "step": 6328 + }, + { + "epoch": 1.6806532996945958, + "grad_norm": 1.2960131024456552, + "learning_rate": 1.3651031577277351e-06, + "loss": 0.24963265657424927, + "step": 6329 + }, + { + "epoch": 1.6809188686761387, + "grad_norm": 1.2714587333981215, + "learning_rate": 1.3628894351726785e-06, + "loss": 0.21473057568073273, + "step": 6330 + }, + { + "epoch": 1.6811844376576817, + "grad_norm": 1.4508064568072063, + "learning_rate": 1.3606773777506731e-06, + "loss": 0.2539534866809845, + "step": 6331 + }, + { + "epoch": 1.6814500066392246, + "grad_norm": 1.5049767699399101, + "learning_rate": 1.3584669858881771e-06, + "loss": 0.2671799659729004, + "step": 6332 + }, + { + "epoch": 1.6817155756207676, + "grad_norm": 1.211295376852026, + "learning_rate": 1.3562582600113295e-06, + "loss": 0.24291013181209564, + "step": 6333 + }, + { + "epoch": 1.6819811446023105, + "grad_norm": 1.3672105989135315, + "learning_rate": 1.354051200545946e-06, + "loss": 0.24249233305454254, + "step": 6334 + }, + { + "epoch": 1.6822467135838535, + "grad_norm": 1.2855842039831968, + "learning_rate": 1.351845807917519e-06, + "loss": 0.21647261083126068, + "step": 6335 + }, + { + "epoch": 1.6825122825653964, + "grad_norm": 1.2764605035604815, + "learning_rate": 1.349642082551227e-06, + "loss": 0.2348332703113556, + "step": 6336 + }, + { + "epoch": 1.6827778515469394, + "grad_norm": 1.3049495455341118, + "learning_rate": 1.34744002487192e-06, + "loss": 0.22503259778022766, + "step": 6337 + }, + { + "epoch": 1.6830434205284823, + "grad_norm": 1.3236190891705721, + "learning_rate": 1.3452396353041286e-06, + "loss": 0.2397763580083847, + "step": 6338 + }, + { + "epoch": 1.6833089895100253, + "grad_norm": 1.156426557066381, + "learning_rate": 1.3430409142720624e-06, + "loss": 0.23345956206321716, + "step": 6339 + }, + { + "epoch": 1.6835745584915682, + "grad_norm": 1.1932341696009043, + "learning_rate": 1.3408438621996088e-06, + "loss": 0.19660598039627075, + "step": 6340 + }, + { + "epoch": 1.6838401274731112, + "grad_norm": 1.262928020262074, + "learning_rate": 1.3386484795103327e-06, + "loss": 0.19148695468902588, + "step": 6341 + }, + { + "epoch": 1.6841056964546541, + "grad_norm": 1.2112774084067142, + "learning_rate": 1.3364547666274819e-06, + "loss": 0.2078169733285904, + "step": 6342 + }, + { + "epoch": 1.684371265436197, + "grad_norm": 1.3703852622718744, + "learning_rate": 1.3342627239739715e-06, + "loss": 0.23122575879096985, + "step": 6343 + }, + { + "epoch": 1.68463683441774, + "grad_norm": 1.350523705417422, + "learning_rate": 1.3320723519724032e-06, + "loss": 0.2744083106517792, + "step": 6344 + }, + { + "epoch": 1.684902403399283, + "grad_norm": 1.3462449472678248, + "learning_rate": 1.3298836510450597e-06, + "loss": 0.26361098885536194, + "step": 6345 + }, + { + "epoch": 1.685167972380826, + "grad_norm": 1.2550654654863131, + "learning_rate": 1.3276966216138932e-06, + "loss": 0.21833205223083496, + "step": 6346 + }, + { + "epoch": 1.685433541362369, + "grad_norm": 1.306325021058624, + "learning_rate": 1.3255112641005374e-06, + "loss": 0.22075100243091583, + "step": 6347 + }, + { + "epoch": 1.6856991103439118, + "grad_norm": 1.4286786068270776, + "learning_rate": 1.3233275789263034e-06, + "loss": 0.24352343380451202, + "step": 6348 + }, + { + "epoch": 1.6859646793254548, + "grad_norm": 1.5476580340833483, + "learning_rate": 1.3211455665121808e-06, + "loss": 0.2331303060054779, + "step": 6349 + }, + { + "epoch": 1.6862302483069977, + "grad_norm": 1.398559395598541, + "learning_rate": 1.3189652272788356e-06, + "loss": 0.2511689066886902, + "step": 6350 + }, + { + "epoch": 1.6864958172885407, + "grad_norm": 1.1704691076383393, + "learning_rate": 1.3167865616466113e-06, + "loss": 0.18535873293876648, + "step": 6351 + }, + { + "epoch": 1.6867613862700837, + "grad_norm": 1.3097469055952822, + "learning_rate": 1.3146095700355289e-06, + "loss": 0.23924914002418518, + "step": 6352 + }, + { + "epoch": 1.6870269552516266, + "grad_norm": 1.1591649275755667, + "learning_rate": 1.3124342528652845e-06, + "loss": 0.19710025191307068, + "step": 6353 + }, + { + "epoch": 1.6872925242331696, + "grad_norm": 1.393629731020981, + "learning_rate": 1.3102606105552585e-06, + "loss": 0.21439281105995178, + "step": 6354 + }, + { + "epoch": 1.6875580932147125, + "grad_norm": 1.3051512833867451, + "learning_rate": 1.3080886435245e-06, + "loss": 0.2647722363471985, + "step": 6355 + }, + { + "epoch": 1.6878236621962555, + "grad_norm": 2.6038516980586355, + "learning_rate": 1.3059183521917396e-06, + "loss": 0.2202019840478897, + "step": 6356 + }, + { + "epoch": 1.6880892311777984, + "grad_norm": 1.3022104210295473, + "learning_rate": 1.3037497369753871e-06, + "loss": 0.25833001732826233, + "step": 6357 + }, + { + "epoch": 1.6883548001593414, + "grad_norm": 1.1906464618269579, + "learning_rate": 1.3015827982935192e-06, + "loss": 0.19984321296215057, + "step": 6358 + }, + { + "epoch": 1.6886203691408843, + "grad_norm": 1.3347301103088016, + "learning_rate": 1.2994175365638996e-06, + "loss": 0.2190552055835724, + "step": 6359 + }, + { + "epoch": 1.6888859381224273, + "grad_norm": 1.265894337049371, + "learning_rate": 1.2972539522039652e-06, + "loss": 0.26262593269348145, + "step": 6360 + }, + { + "epoch": 1.6891515071039702, + "grad_norm": 1.285416913994909, + "learning_rate": 1.2950920456308292e-06, + "loss": 0.2665651738643646, + "step": 6361 + }, + { + "epoch": 1.6894170760855132, + "grad_norm": 1.213162722605336, + "learning_rate": 1.2929318172612803e-06, + "loss": 0.22369208931922913, + "step": 6362 + }, + { + "epoch": 1.6896826450670561, + "grad_norm": 1.2234073567984471, + "learning_rate": 1.2907732675117878e-06, + "loss": 0.21063543856143951, + "step": 6363 + }, + { + "epoch": 1.689948214048599, + "grad_norm": 1.3608426715056905, + "learning_rate": 1.2886163967984944e-06, + "loss": 0.2303045690059662, + "step": 6364 + }, + { + "epoch": 1.690213783030142, + "grad_norm": 1.1473656525455074, + "learning_rate": 1.2864612055372182e-06, + "loss": 0.20185884833335876, + "step": 6365 + }, + { + "epoch": 1.690479352011685, + "grad_norm": 1.2673026097919315, + "learning_rate": 1.284307694143455e-06, + "loss": 0.22900527715682983, + "step": 6366 + }, + { + "epoch": 1.690744920993228, + "grad_norm": 1.2373147270640896, + "learning_rate": 1.282155863032377e-06, + "loss": 0.21405862271785736, + "step": 6367 + }, + { + "epoch": 1.6910104899747709, + "grad_norm": 1.3139606008654157, + "learning_rate": 1.2800057126188304e-06, + "loss": 0.26143258810043335, + "step": 6368 + }, + { + "epoch": 1.6912760589563138, + "grad_norm": 1.319330305112879, + "learning_rate": 1.2778572433173397e-06, + "loss": 0.24437926709651947, + "step": 6369 + }, + { + "epoch": 1.6915416279378568, + "grad_norm": 1.1954155676954614, + "learning_rate": 1.275710455542104e-06, + "loss": 0.24862337112426758, + "step": 6370 + }, + { + "epoch": 1.6918071969193997, + "grad_norm": 1.2264107157331223, + "learning_rate": 1.2735653497069978e-06, + "loss": 0.2146604359149933, + "step": 6371 + }, + { + "epoch": 1.6920727659009427, + "grad_norm": 1.3217815480091177, + "learning_rate": 1.2714219262255777e-06, + "loss": 0.2525256872177124, + "step": 6372 + }, + { + "epoch": 1.6923383348824856, + "grad_norm": 1.289957068010404, + "learning_rate": 1.2692801855110638e-06, + "loss": 0.23462912440299988, + "step": 6373 + }, + { + "epoch": 1.6926039038640286, + "grad_norm": 1.3468375801476438, + "learning_rate": 1.2671401279763595e-06, + "loss": 0.21551170945167542, + "step": 6374 + }, + { + "epoch": 1.6928694728455715, + "grad_norm": 1.4457180200872415, + "learning_rate": 1.2650017540340454e-06, + "loss": 0.24094407260417938, + "step": 6375 + }, + { + "epoch": 1.6931350418271145, + "grad_norm": 1.2168123169553724, + "learning_rate": 1.2628650640963736e-06, + "loss": 0.23101133108139038, + "step": 6376 + }, + { + "epoch": 1.6934006108086574, + "grad_norm": 1.4830646801660192, + "learning_rate": 1.2607300585752724e-06, + "loss": 0.2513899803161621, + "step": 6377 + }, + { + "epoch": 1.6936661797902004, + "grad_norm": 1.417144859782869, + "learning_rate": 1.258596737882345e-06, + "loss": 0.2490600198507309, + "step": 6378 + }, + { + "epoch": 1.6939317487717434, + "grad_norm": 1.3403225341914131, + "learning_rate": 1.256465102428872e-06, + "loss": 0.25767675042152405, + "step": 6379 + }, + { + "epoch": 1.6941973177532863, + "grad_norm": 1.2775246675329248, + "learning_rate": 1.254335152625804e-06, + "loss": 0.2231348305940628, + "step": 6380 + }, + { + "epoch": 1.6944628867348293, + "grad_norm": 1.4410136520558763, + "learning_rate": 1.2522068888837758e-06, + "loss": 0.25873979926109314, + "step": 6381 + }, + { + "epoch": 1.6947284557163722, + "grad_norm": 1.4111151195923193, + "learning_rate": 1.2500803116130887e-06, + "loss": 0.2848423421382904, + "step": 6382 + }, + { + "epoch": 1.6949940246979152, + "grad_norm": 1.1110125207312456, + "learning_rate": 1.247955421223721e-06, + "loss": 0.21343804895877838, + "step": 6383 + }, + { + "epoch": 1.695259593679458, + "grad_norm": 1.3025436504976033, + "learning_rate": 1.245832218125328e-06, + "loss": 0.23080062866210938, + "step": 6384 + }, + { + "epoch": 1.695525162661001, + "grad_norm": 1.3020267493975237, + "learning_rate": 1.2437107027272376e-06, + "loss": 0.2397225797176361, + "step": 6385 + }, + { + "epoch": 1.695790731642544, + "grad_norm": 1.3120966348534624, + "learning_rate": 1.2415908754384532e-06, + "loss": 0.22798654437065125, + "step": 6386 + }, + { + "epoch": 1.696056300624087, + "grad_norm": 1.3399304326822938, + "learning_rate": 1.2394727366676518e-06, + "loss": 0.2534061074256897, + "step": 6387 + }, + { + "epoch": 1.69632186960563, + "grad_norm": 1.2269756633197797, + "learning_rate": 1.2373562868231858e-06, + "loss": 0.2127036452293396, + "step": 6388 + }, + { + "epoch": 1.6965874385871729, + "grad_norm": 1.341525895521795, + "learning_rate": 1.2352415263130813e-06, + "loss": 0.22341205179691315, + "step": 6389 + }, + { + "epoch": 1.6968530075687158, + "grad_norm": 1.316572711467383, + "learning_rate": 1.2331284555450406e-06, + "loss": 0.2435426563024521, + "step": 6390 + }, + { + "epoch": 1.6971185765502588, + "grad_norm": 1.3203864338710647, + "learning_rate": 1.2310170749264383e-06, + "loss": 0.24652531743049622, + "step": 6391 + }, + { + "epoch": 1.6973841455318017, + "grad_norm": 1.251250109623578, + "learning_rate": 1.228907384864323e-06, + "loss": 0.24172671139240265, + "step": 6392 + }, + { + "epoch": 1.6976497145133447, + "grad_norm": 1.293405881850453, + "learning_rate": 1.2267993857654182e-06, + "loss": 0.21534420549869537, + "step": 6393 + }, + { + "epoch": 1.6979152834948879, + "grad_norm": 2.1259133697182575, + "learning_rate": 1.2246930780361221e-06, + "loss": 0.2617778182029724, + "step": 6394 + }, + { + "epoch": 1.6981808524764308, + "grad_norm": 1.1793022391098469, + "learning_rate": 1.2225884620825046e-06, + "loss": 0.20388583838939667, + "step": 6395 + }, + { + "epoch": 1.6984464214579738, + "grad_norm": 1.289033320527503, + "learning_rate": 1.220485538310312e-06, + "loss": 0.23714327812194824, + "step": 6396 + }, + { + "epoch": 1.6987119904395167, + "grad_norm": 1.3592785135687544, + "learning_rate": 1.2183843071249634e-06, + "loss": 0.2495463341474533, + "step": 6397 + }, + { + "epoch": 1.6989775594210597, + "grad_norm": 1.2730498991215184, + "learning_rate": 1.2162847689315483e-06, + "loss": 0.2419012188911438, + "step": 6398 + }, + { + "epoch": 1.6992431284026026, + "grad_norm": 1.2226640861076554, + "learning_rate": 1.214186924134838e-06, + "loss": 0.23392438888549805, + "step": 6399 + }, + { + "epoch": 1.6995086973841456, + "grad_norm": 1.3210458214149883, + "learning_rate": 1.2120907731392695e-06, + "loss": 0.22855526208877563, + "step": 6400 + }, + { + "epoch": 1.6997742663656885, + "grad_norm": 1.2152782326664608, + "learning_rate": 1.2099963163489558e-06, + "loss": 0.22393949329853058, + "step": 6401 + }, + { + "epoch": 1.7000398353472315, + "grad_norm": 1.3855673404796554, + "learning_rate": 1.2079035541676832e-06, + "loss": 0.2539960741996765, + "step": 6402 + }, + { + "epoch": 1.7003054043287744, + "grad_norm": 1.3330270743987416, + "learning_rate": 1.2058124869989129e-06, + "loss": 0.23716852068901062, + "step": 6403 + }, + { + "epoch": 1.7005709733103174, + "grad_norm": 1.347782549245642, + "learning_rate": 1.2037231152457773e-06, + "loss": 0.24658545851707458, + "step": 6404 + }, + { + "epoch": 1.7008365422918603, + "grad_norm": 1.2494300647338343, + "learning_rate": 1.201635439311083e-06, + "loss": 0.2316630333662033, + "step": 6405 + }, + { + "epoch": 1.7011021112734033, + "grad_norm": 1.0834142572483991, + "learning_rate": 1.1995494595973089e-06, + "loss": 0.20434345304965973, + "step": 6406 + }, + { + "epoch": 1.7013676802549462, + "grad_norm": 1.3445140884275912, + "learning_rate": 1.197465176506607e-06, + "loss": 0.2585931420326233, + "step": 6407 + }, + { + "epoch": 1.7016332492364892, + "grad_norm": 1.2567668360829787, + "learning_rate": 1.1953825904408033e-06, + "loss": 0.23007069528102875, + "step": 6408 + }, + { + "epoch": 1.7018988182180321, + "grad_norm": 1.2770978609777501, + "learning_rate": 1.1933017018013948e-06, + "loss": 0.21822810173034668, + "step": 6409 + }, + { + "epoch": 1.702164387199575, + "grad_norm": 1.2875752799081717, + "learning_rate": 1.1912225109895526e-06, + "loss": 0.241228848695755, + "step": 6410 + }, + { + "epoch": 1.702429956181118, + "grad_norm": 1.3509759956774154, + "learning_rate": 1.1891450184061203e-06, + "loss": 0.28803908824920654, + "step": 6411 + }, + { + "epoch": 1.702695525162661, + "grad_norm": 1.3018941028318989, + "learning_rate": 1.1870692244516147e-06, + "loss": 0.2387516349554062, + "step": 6412 + }, + { + "epoch": 1.702961094144204, + "grad_norm": 1.2538051398244094, + "learning_rate": 1.1849951295262242e-06, + "loss": 0.19774140417575836, + "step": 6413 + }, + { + "epoch": 1.7032266631257469, + "grad_norm": 1.269953409174644, + "learning_rate": 1.1829227340298088e-06, + "loss": 0.22842247784137726, + "step": 6414 + }, + { + "epoch": 1.7034922321072898, + "grad_norm": 1.1987695898844528, + "learning_rate": 1.1808520383619015e-06, + "loss": 0.21994739770889282, + "step": 6415 + }, + { + "epoch": 1.7037578010888328, + "grad_norm": 1.2719096074486522, + "learning_rate": 1.1787830429217084e-06, + "loss": 0.22328051924705505, + "step": 6416 + }, + { + "epoch": 1.7040233700703757, + "grad_norm": 1.3583279531737376, + "learning_rate": 1.1767157481081092e-06, + "loss": 0.26704326272010803, + "step": 6417 + }, + { + "epoch": 1.7042889390519187, + "grad_norm": 1.2796404749500392, + "learning_rate": 1.174650154319653e-06, + "loss": 0.2148481160402298, + "step": 6418 + }, + { + "epoch": 1.7045545080334616, + "grad_norm": 1.1912742761204351, + "learning_rate": 1.1725862619545625e-06, + "loss": 0.21731218695640564, + "step": 6419 + }, + { + "epoch": 1.7048200770150046, + "grad_norm": 1.3502505047017879, + "learning_rate": 1.1705240714107301e-06, + "loss": 0.20832043886184692, + "step": 6420 + }, + { + "epoch": 1.7050856459965475, + "grad_norm": 1.2922565511595965, + "learning_rate": 1.1684635830857249e-06, + "loss": 0.21739046275615692, + "step": 6421 + }, + { + "epoch": 1.7053512149780907, + "grad_norm": 1.3041232291639149, + "learning_rate": 1.1664047973767811e-06, + "loss": 0.23972246050834656, + "step": 6422 + }, + { + "epoch": 1.7056167839596337, + "grad_norm": 1.2420174603299015, + "learning_rate": 1.1643477146808092e-06, + "loss": 0.2471289187669754, + "step": 6423 + }, + { + "epoch": 1.7058823529411766, + "grad_norm": 1.2148999014811244, + "learning_rate": 1.1622923353943916e-06, + "loss": 0.2014283537864685, + "step": 6424 + }, + { + "epoch": 1.7061479219227196, + "grad_norm": 1.1799937956162947, + "learning_rate": 1.1602386599137782e-06, + "loss": 0.21680915355682373, + "step": 6425 + }, + { + "epoch": 1.7064134909042625, + "grad_norm": 1.2221660563202492, + "learning_rate": 1.158186688634898e-06, + "loss": 0.2101205736398697, + "step": 6426 + }, + { + "epoch": 1.7066790598858055, + "grad_norm": 1.2879683442276364, + "learning_rate": 1.1561364219533444e-06, + "loss": 0.22114071249961853, + "step": 6427 + }, + { + "epoch": 1.7069446288673484, + "grad_norm": 1.2910925736026095, + "learning_rate": 1.1540878602643858e-06, + "loss": 0.20608706772327423, + "step": 6428 + }, + { + "epoch": 1.7072101978488914, + "grad_norm": 1.2486066037383718, + "learning_rate": 1.1520410039629593e-06, + "loss": 0.2247905433177948, + "step": 6429 + }, + { + "epoch": 1.7074757668304343, + "grad_norm": 1.1718742986299986, + "learning_rate": 1.1499958534436751e-06, + "loss": 0.22623226046562195, + "step": 6430 + }, + { + "epoch": 1.7077413358119773, + "grad_norm": 1.2776253558863635, + "learning_rate": 1.1479524091008142e-06, + "loss": 0.2063906192779541, + "step": 6431 + }, + { + "epoch": 1.7080069047935202, + "grad_norm": 1.4035125322254989, + "learning_rate": 1.1459106713283286e-06, + "loss": 0.2787795960903168, + "step": 6432 + }, + { + "epoch": 1.7082724737750632, + "grad_norm": 1.2096674582385407, + "learning_rate": 1.1438706405198419e-06, + "loss": 0.23090440034866333, + "step": 6433 + }, + { + "epoch": 1.7085380427566061, + "grad_norm": 1.288319877687408, + "learning_rate": 1.141832317068645e-06, + "loss": 0.23690670728683472, + "step": 6434 + }, + { + "epoch": 1.708803611738149, + "grad_norm": 1.2499926164056985, + "learning_rate": 1.1397957013677064e-06, + "loss": 0.209202378988266, + "step": 6435 + }, + { + "epoch": 1.709069180719692, + "grad_norm": 1.2311768368116, + "learning_rate": 1.1377607938096635e-06, + "loss": 0.22541575133800507, + "step": 6436 + }, + { + "epoch": 1.709334749701235, + "grad_norm": 1.3505125458173146, + "learning_rate": 1.1357275947868162e-06, + "loss": 0.2460884153842926, + "step": 6437 + }, + { + "epoch": 1.709600318682778, + "grad_norm": 1.195327574575731, + "learning_rate": 1.1336961046911443e-06, + "loss": 0.21967202425003052, + "step": 6438 + }, + { + "epoch": 1.709865887664321, + "grad_norm": 1.346022527152768, + "learning_rate": 1.1316663239142954e-06, + "loss": 0.23619329929351807, + "step": 6439 + }, + { + "epoch": 1.7101314566458639, + "grad_norm": 1.3033234842407981, + "learning_rate": 1.129638252847587e-06, + "loss": 0.24563436210155487, + "step": 6440 + }, + { + "epoch": 1.7103970256274068, + "grad_norm": 1.3840933006905622, + "learning_rate": 1.1276118918820068e-06, + "loss": 0.25508859753608704, + "step": 6441 + }, + { + "epoch": 1.7106625946089498, + "grad_norm": 1.3406379279103604, + "learning_rate": 1.1255872414082136e-06, + "loss": 0.24761545658111572, + "step": 6442 + }, + { + "epoch": 1.7109281635904927, + "grad_norm": 4.632018568484065, + "learning_rate": 1.1235643018165344e-06, + "loss": 0.2355962097644806, + "step": 6443 + }, + { + "epoch": 1.7111937325720357, + "grad_norm": 1.3274457548497118, + "learning_rate": 1.1215430734969723e-06, + "loss": 0.2534273862838745, + "step": 6444 + }, + { + "epoch": 1.7114593015535786, + "grad_norm": 1.2846712625276346, + "learning_rate": 1.1195235568391938e-06, + "loss": 0.2756424844264984, + "step": 6445 + }, + { + "epoch": 1.7117248705351216, + "grad_norm": 1.2126020570228762, + "learning_rate": 1.1175057522325383e-06, + "loss": 0.2198309451341629, + "step": 6446 + }, + { + "epoch": 1.7119904395166645, + "grad_norm": 1.2343738377988847, + "learning_rate": 1.1154896600660136e-06, + "loss": 0.21767666935920715, + "step": 6447 + }, + { + "epoch": 1.7122560084982075, + "grad_norm": 1.4965895030859304, + "learning_rate": 1.1134752807283e-06, + "loss": 0.2679128348827362, + "step": 6448 + }, + { + "epoch": 1.7125215774797504, + "grad_norm": 1.292131622576057, + "learning_rate": 1.1114626146077457e-06, + "loss": 0.2268792986869812, + "step": 6449 + }, + { + "epoch": 1.7127871464612934, + "grad_norm": 1.224637524783582, + "learning_rate": 1.109451662092369e-06, + "loss": 0.21585378050804138, + "step": 6450 + }, + { + "epoch": 1.7130527154428363, + "grad_norm": 1.3157463227820392, + "learning_rate": 1.1074424235698567e-06, + "loss": 0.2258647382259369, + "step": 6451 + }, + { + "epoch": 1.7133182844243793, + "grad_norm": 1.3742268123946286, + "learning_rate": 1.1054348994275677e-06, + "loss": 0.2456682175397873, + "step": 6452 + }, + { + "epoch": 1.7135838534059222, + "grad_norm": 1.4853732102975625, + "learning_rate": 1.1034290900525279e-06, + "loss": 0.22897745668888092, + "step": 6453 + }, + { + "epoch": 1.7138494223874652, + "grad_norm": 1.133114987282755, + "learning_rate": 1.101424995831435e-06, + "loss": 0.1910650134086609, + "step": 6454 + }, + { + "epoch": 1.7141149913690081, + "grad_norm": 1.2728981818199352, + "learning_rate": 1.0994226171506529e-06, + "loss": 0.2519158720970154, + "step": 6455 + }, + { + "epoch": 1.714380560350551, + "grad_norm": 1.259309948081026, + "learning_rate": 1.0974219543962184e-06, + "loss": 0.24191951751708984, + "step": 6456 + }, + { + "epoch": 1.714646129332094, + "grad_norm": 1.3159238719963862, + "learning_rate": 1.0954230079538352e-06, + "loss": 0.2560814619064331, + "step": 6457 + }, + { + "epoch": 1.714911698313637, + "grad_norm": 1.2640782659289207, + "learning_rate": 1.0934257782088763e-06, + "loss": 0.22969035804271698, + "step": 6458 + }, + { + "epoch": 1.71517726729518, + "grad_norm": 1.3584917562872394, + "learning_rate": 1.0914302655463837e-06, + "loss": 0.26114046573638916, + "step": 6459 + }, + { + "epoch": 1.715442836276723, + "grad_norm": 1.2235177756044688, + "learning_rate": 1.0894364703510685e-06, + "loss": 0.21457752585411072, + "step": 6460 + }, + { + "epoch": 1.7157084052582658, + "grad_norm": 1.164559577491723, + "learning_rate": 1.0874443930073098e-06, + "loss": 0.19998760521411896, + "step": 6461 + }, + { + "epoch": 1.7159739742398088, + "grad_norm": 1.2278101157674874, + "learning_rate": 1.0854540338991615e-06, + "loss": 0.2379671037197113, + "step": 6462 + }, + { + "epoch": 1.7162395432213517, + "grad_norm": 1.3827652808641404, + "learning_rate": 1.0834653934103367e-06, + "loss": 0.2236609309911728, + "step": 6463 + }, + { + "epoch": 1.7165051122028947, + "grad_norm": 1.2673726734268553, + "learning_rate": 1.0814784719242234e-06, + "loss": 0.22507379949092865, + "step": 6464 + }, + { + "epoch": 1.7167706811844377, + "grad_norm": 1.3174434539455087, + "learning_rate": 1.079493269823877e-06, + "loss": 0.22138816118240356, + "step": 6465 + }, + { + "epoch": 1.7170362501659806, + "grad_norm": 1.3880746036316538, + "learning_rate": 1.0775097874920204e-06, + "loss": 0.227338969707489, + "step": 6466 + }, + { + "epoch": 1.7173018191475236, + "grad_norm": 1.2588670866885754, + "learning_rate": 1.0755280253110466e-06, + "loss": 0.23694375157356262, + "step": 6467 + }, + { + "epoch": 1.7175673881290665, + "grad_norm": 1.365387614603678, + "learning_rate": 1.0735479836630136e-06, + "loss": 0.26219409704208374, + "step": 6468 + }, + { + "epoch": 1.7178329571106095, + "grad_norm": 1.20539748496599, + "learning_rate": 1.0715696629296524e-06, + "loss": 0.22215887904167175, + "step": 6469 + }, + { + "epoch": 1.7180985260921524, + "grad_norm": 1.3543481839639284, + "learning_rate": 1.0695930634923602e-06, + "loss": 0.25434768199920654, + "step": 6470 + }, + { + "epoch": 1.7183640950736954, + "grad_norm": 1.1809119822759757, + "learning_rate": 1.0676181857321998e-06, + "loss": 0.2092076987028122, + "step": 6471 + }, + { + "epoch": 1.7186296640552383, + "grad_norm": 1.330663320526799, + "learning_rate": 1.0656450300299048e-06, + "loss": 0.2710237503051758, + "step": 6472 + }, + { + "epoch": 1.7188952330367813, + "grad_norm": 1.2715188060789504, + "learning_rate": 1.0636735967658785e-06, + "loss": 0.2533886432647705, + "step": 6473 + }, + { + "epoch": 1.7191608020183242, + "grad_norm": 1.2174102707049457, + "learning_rate": 1.0617038863201878e-06, + "loss": 0.2545754909515381, + "step": 6474 + }, + { + "epoch": 1.7194263709998672, + "grad_norm": 1.2560655592374788, + "learning_rate": 1.0597358990725703e-06, + "loss": 0.26010993123054504, + "step": 6475 + }, + { + "epoch": 1.7196919399814101, + "grad_norm": 1.2632076366916114, + "learning_rate": 1.0577696354024314e-06, + "loss": 0.22529907524585724, + "step": 6476 + }, + { + "epoch": 1.719957508962953, + "grad_norm": 1.157260113755536, + "learning_rate": 1.0558050956888433e-06, + "loss": 0.1897469311952591, + "step": 6477 + }, + { + "epoch": 1.720223077944496, + "grad_norm": 1.31651804495616, + "learning_rate": 1.0538422803105441e-06, + "loss": 0.24663670361042023, + "step": 6478 + }, + { + "epoch": 1.720488646926039, + "grad_norm": 1.343902959790046, + "learning_rate": 1.0518811896459423e-06, + "loss": 0.2462892383337021, + "step": 6479 + }, + { + "epoch": 1.720754215907582, + "grad_norm": 1.117431347891292, + "learning_rate": 1.0499218240731157e-06, + "loss": 0.18652144074440002, + "step": 6480 + }, + { + "epoch": 1.7210197848891249, + "grad_norm": 1.2234103731079693, + "learning_rate": 1.0479641839698052e-06, + "loss": 0.24614468216896057, + "step": 6481 + }, + { + "epoch": 1.7212853538706678, + "grad_norm": 1.2632894895468527, + "learning_rate": 1.046008269713421e-06, + "loss": 0.27925312519073486, + "step": 6482 + }, + { + "epoch": 1.7215509228522108, + "grad_norm": 1.3426272887839532, + "learning_rate": 1.0440540816810395e-06, + "loss": 0.2626710832118988, + "step": 6483 + }, + { + "epoch": 1.7218164918337537, + "grad_norm": 1.2982212521269376, + "learning_rate": 1.042101620249405e-06, + "loss": 0.23039895296096802, + "step": 6484 + }, + { + "epoch": 1.7220820608152967, + "grad_norm": 1.2564768074123291, + "learning_rate": 1.0401508857949295e-06, + "loss": 0.19559775292873383, + "step": 6485 + }, + { + "epoch": 1.7223476297968396, + "grad_norm": 1.222035384596064, + "learning_rate": 1.0382018786936943e-06, + "loss": 0.24982990324497223, + "step": 6486 + }, + { + "epoch": 1.7226131987783826, + "grad_norm": 1.356827120814655, + "learning_rate": 1.0362545993214402e-06, + "loss": 0.26212313771247864, + "step": 6487 + }, + { + "epoch": 1.7228787677599255, + "grad_norm": 1.2583181328160484, + "learning_rate": 1.0343090480535788e-06, + "loss": 0.22827446460723877, + "step": 6488 + }, + { + "epoch": 1.7231443367414685, + "grad_norm": 1.3650470156220376, + "learning_rate": 1.032365225265196e-06, + "loss": 0.2710435390472412, + "step": 6489 + }, + { + "epoch": 1.7234099057230114, + "grad_norm": 1.560435811081079, + "learning_rate": 1.030423131331033e-06, + "loss": 0.25116702914237976, + "step": 6490 + }, + { + "epoch": 1.7236754747045544, + "grad_norm": 1.2598369270207033, + "learning_rate": 1.0284827666255048e-06, + "loss": 0.1980481743812561, + "step": 6491 + }, + { + "epoch": 1.7239410436860974, + "grad_norm": 1.3159445178277585, + "learning_rate": 1.0265441315226898e-06, + "loss": 0.2777971625328064, + "step": 6492 + }, + { + "epoch": 1.7242066126676403, + "grad_norm": 1.3290253215924488, + "learning_rate": 1.0246072263963336e-06, + "loss": 0.23041702806949615, + "step": 6493 + }, + { + "epoch": 1.7244721816491833, + "grad_norm": 1.2761862568921072, + "learning_rate": 1.0226720516198495e-06, + "loss": 0.21428728103637695, + "step": 6494 + }, + { + "epoch": 1.7247377506307262, + "grad_norm": 1.2965072992275601, + "learning_rate": 1.020738607566316e-06, + "loss": 0.22577518224716187, + "step": 6495 + }, + { + "epoch": 1.7250033196122692, + "grad_norm": 1.2489154030372867, + "learning_rate": 1.0188068946084783e-06, + "loss": 0.21080979704856873, + "step": 6496 + }, + { + "epoch": 1.7252688885938121, + "grad_norm": 1.1941107816051266, + "learning_rate": 1.0168769131187472e-06, + "loss": 0.21232858300209045, + "step": 6497 + }, + { + "epoch": 1.725534457575355, + "grad_norm": 1.3035016990745079, + "learning_rate": 1.0149486634692019e-06, + "loss": 0.25525614619255066, + "step": 6498 + }, + { + "epoch": 1.725800026556898, + "grad_norm": 1.2742578592858531, + "learning_rate": 1.0130221460315858e-06, + "loss": 0.26291778683662415, + "step": 6499 + }, + { + "epoch": 1.726065595538441, + "grad_norm": 1.1747703502148148, + "learning_rate": 1.011097361177308e-06, + "loss": 0.21314382553100586, + "step": 6500 + }, + { + "epoch": 1.726331164519984, + "grad_norm": 1.3027182735878766, + "learning_rate": 1.0091743092774474e-06, + "loss": 0.2106419950723648, + "step": 6501 + }, + { + "epoch": 1.7265967335015269, + "grad_norm": 1.2753206037657139, + "learning_rate": 1.0072529907027407e-06, + "loss": 0.22456032037734985, + "step": 6502 + }, + { + "epoch": 1.7268623024830698, + "grad_norm": 2.1059170179774807, + "learning_rate": 1.0053334058235975e-06, + "loss": 0.2301097959280014, + "step": 6503 + }, + { + "epoch": 1.7271278714646128, + "grad_norm": 1.4062353485935484, + "learning_rate": 1.0034155550100922e-06, + "loss": 0.21207617223262787, + "step": 6504 + }, + { + "epoch": 1.7273934404461557, + "grad_norm": 1.3379977808716934, + "learning_rate": 1.0014994386319621e-06, + "loss": 0.24378664791584015, + "step": 6505 + }, + { + "epoch": 1.727659009427699, + "grad_norm": 1.402146752515372, + "learning_rate": 9.995850570586107e-07, + "loss": 0.24914023280143738, + "step": 6506 + }, + { + "epoch": 1.7279245784092419, + "grad_norm": 1.2949159811476645, + "learning_rate": 9.976724106591128e-07, + "loss": 0.23235921561717987, + "step": 6507 + }, + { + "epoch": 1.7281901473907848, + "grad_norm": 1.295455173430887, + "learning_rate": 9.957614998022015e-07, + "loss": 0.22441455721855164, + "step": 6508 + }, + { + "epoch": 1.7284557163723278, + "grad_norm": 1.4195770964317103, + "learning_rate": 9.93852324856278e-07, + "loss": 0.2559920847415924, + "step": 6509 + }, + { + "epoch": 1.7287212853538707, + "grad_norm": 1.2106097617539484, + "learning_rate": 9.919448861894088e-07, + "loss": 0.21378321945667267, + "step": 6510 + }, + { + "epoch": 1.7289868543354137, + "grad_norm": 1.223247289196822, + "learning_rate": 9.900391841693247e-07, + "loss": 0.23622627556324005, + "step": 6511 + }, + { + "epoch": 1.7292524233169566, + "grad_norm": 1.2354266119490807, + "learning_rate": 9.88135219163424e-07, + "loss": 0.217013418674469, + "step": 6512 + }, + { + "epoch": 1.7295179922984996, + "grad_norm": 1.342902376475473, + "learning_rate": 9.862329915387669e-07, + "loss": 0.2221517264842987, + "step": 6513 + }, + { + "epoch": 1.7297835612800425, + "grad_norm": 1.3136496001371853, + "learning_rate": 9.84332501662083e-07, + "loss": 0.24377144873142242, + "step": 6514 + }, + { + "epoch": 1.7300491302615855, + "grad_norm": 1.2574348774674273, + "learning_rate": 9.824337498997593e-07, + "loss": 0.23368799686431885, + "step": 6515 + }, + { + "epoch": 1.7303146992431284, + "grad_norm": 1.1949944292188206, + "learning_rate": 9.805367366178608e-07, + "loss": 0.23061680793762207, + "step": 6516 + }, + { + "epoch": 1.7305802682246714, + "grad_norm": 1.2715048223769598, + "learning_rate": 9.78641462182104e-07, + "loss": 0.24157950282096863, + "step": 6517 + }, + { + "epoch": 1.7308458372062143, + "grad_norm": 1.3248165077712177, + "learning_rate": 9.76747926957875e-07, + "loss": 0.2122395783662796, + "step": 6518 + }, + { + "epoch": 1.7311114061877573, + "grad_norm": 1.320024810941134, + "learning_rate": 9.748561313102266e-07, + "loss": 0.2351134717464447, + "step": 6519 + }, + { + "epoch": 1.7313769751693002, + "grad_norm": 1.2421546716744003, + "learning_rate": 9.729660756038738e-07, + "loss": 0.22462692856788635, + "step": 6520 + }, + { + "epoch": 1.7316425441508432, + "grad_norm": 1.191887437920794, + "learning_rate": 9.710777602031985e-07, + "loss": 0.2140806019306183, + "step": 6521 + }, + { + "epoch": 1.7319081131323861, + "grad_norm": 1.1138928252794336, + "learning_rate": 9.691911854722447e-07, + "loss": 0.22256694734096527, + "step": 6522 + }, + { + "epoch": 1.732173682113929, + "grad_norm": 1.3703383963226383, + "learning_rate": 9.673063517747216e-07, + "loss": 0.26044604182243347, + "step": 6523 + }, + { + "epoch": 1.732439251095472, + "grad_norm": 1.2598416492801234, + "learning_rate": 9.65423259474001e-07, + "loss": 0.22553196549415588, + "step": 6524 + }, + { + "epoch": 1.732704820077015, + "grad_norm": 1.351471142700479, + "learning_rate": 9.635419089331255e-07, + "loss": 0.2240113914012909, + "step": 6525 + }, + { + "epoch": 1.732970389058558, + "grad_norm": 1.1814437793767476, + "learning_rate": 9.616623005147952e-07, + "loss": 0.2239987701177597, + "step": 6526 + }, + { + "epoch": 1.7332359580401009, + "grad_norm": 1.3385972692968178, + "learning_rate": 9.597844345813746e-07, + "loss": 0.2779507040977478, + "step": 6527 + }, + { + "epoch": 1.7335015270216438, + "grad_norm": 1.24243402144453, + "learning_rate": 9.57908311494896e-07, + "loss": 0.20211297273635864, + "step": 6528 + }, + { + "epoch": 1.7337670960031868, + "grad_norm": 1.3764658259437736, + "learning_rate": 9.560339316170542e-07, + "loss": 0.2552817165851593, + "step": 6529 + }, + { + "epoch": 1.7340326649847297, + "grad_norm": 1.2797541334315956, + "learning_rate": 9.54161295309206e-07, + "loss": 0.248790442943573, + "step": 6530 + }, + { + "epoch": 1.7342982339662727, + "grad_norm": 1.2952054804389268, + "learning_rate": 9.522904029323754e-07, + "loss": 0.22865381836891174, + "step": 6531 + }, + { + "epoch": 1.7345638029478156, + "grad_norm": 1.2248102039230788, + "learning_rate": 9.504212548472458e-07, + "loss": 0.212583988904953, + "step": 6532 + }, + { + "epoch": 1.7348293719293586, + "grad_norm": 1.3834113478738954, + "learning_rate": 9.48553851414169e-07, + "loss": 0.24632221460342407, + "step": 6533 + }, + { + "epoch": 1.7350949409109018, + "grad_norm": 1.2843254083507383, + "learning_rate": 9.466881929931582e-07, + "loss": 0.2264299988746643, + "step": 6534 + }, + { + "epoch": 1.7353605098924447, + "grad_norm": 1.1969400150248917, + "learning_rate": 9.4482427994389e-07, + "loss": 0.21560585498809814, + "step": 6535 + }, + { + "epoch": 1.7356260788739877, + "grad_norm": 1.2133784097522973, + "learning_rate": 9.429621126257038e-07, + "loss": 0.24358224868774414, + "step": 6536 + }, + { + "epoch": 1.7358916478555306, + "grad_norm": 1.2714225965713206, + "learning_rate": 9.411016913976045e-07, + "loss": 0.23307816684246063, + "step": 6537 + }, + { + "epoch": 1.7361572168370736, + "grad_norm": 1.3040669928143356, + "learning_rate": 9.392430166182597e-07, + "loss": 0.28001490235328674, + "step": 6538 + }, + { + "epoch": 1.7364227858186165, + "grad_norm": 1.271471324412232, + "learning_rate": 9.373860886459996e-07, + "loss": 0.22544093430042267, + "step": 6539 + }, + { + "epoch": 1.7366883548001595, + "grad_norm": 1.196472605989987, + "learning_rate": 9.355309078388186e-07, + "loss": 0.2066478282213211, + "step": 6540 + }, + { + "epoch": 1.7369539237817024, + "grad_norm": 1.3162468805281542, + "learning_rate": 9.336774745543697e-07, + "loss": 0.21185964345932007, + "step": 6541 + }, + { + "epoch": 1.7372194927632454, + "grad_norm": 1.2806137892507987, + "learning_rate": 9.318257891499793e-07, + "loss": 0.2337890863418579, + "step": 6542 + }, + { + "epoch": 1.7374850617447883, + "grad_norm": 1.3468215205180822, + "learning_rate": 9.299758519826274e-07, + "loss": 0.2430594563484192, + "step": 6543 + }, + { + "epoch": 1.7377506307263313, + "grad_norm": 1.4072339591675835, + "learning_rate": 9.281276634089609e-07, + "loss": 0.24799269437789917, + "step": 6544 + }, + { + "epoch": 1.7380161997078742, + "grad_norm": 1.3533264573117185, + "learning_rate": 9.26281223785287e-07, + "loss": 0.24756166338920593, + "step": 6545 + }, + { + "epoch": 1.7382817686894172, + "grad_norm": 1.281195516970091, + "learning_rate": 9.244365334675787e-07, + "loss": 0.23465190827846527, + "step": 6546 + }, + { + "epoch": 1.7385473376709601, + "grad_norm": 1.22953964144765, + "learning_rate": 9.225935928114716e-07, + "loss": 0.2039640098810196, + "step": 6547 + }, + { + "epoch": 1.738812906652503, + "grad_norm": 1.3426382286400422, + "learning_rate": 9.207524021722602e-07, + "loss": 0.22304412722587585, + "step": 6548 + }, + { + "epoch": 1.739078475634046, + "grad_norm": 1.2253196898929546, + "learning_rate": 9.189129619049064e-07, + "loss": 0.19985908269882202, + "step": 6549 + }, + { + "epoch": 1.739344044615589, + "grad_norm": 1.3354963919439176, + "learning_rate": 9.17075272364032e-07, + "loss": 0.2335432469844818, + "step": 6550 + }, + { + "epoch": 1.739609613597132, + "grad_norm": 1.6822196536181961, + "learning_rate": 9.152393339039223e-07, + "loss": 0.2313593327999115, + "step": 6551 + }, + { + "epoch": 1.739875182578675, + "grad_norm": 1.310977344619443, + "learning_rate": 9.134051468785243e-07, + "loss": 0.2320600152015686, + "step": 6552 + }, + { + "epoch": 1.7401407515602179, + "grad_norm": 1.0942022372096942, + "learning_rate": 9.115727116414475e-07, + "loss": 0.1870848387479782, + "step": 6553 + }, + { + "epoch": 1.7404063205417608, + "grad_norm": 1.340037469005655, + "learning_rate": 9.097420285459635e-07, + "loss": 0.22922812402248383, + "step": 6554 + }, + { + "epoch": 1.7406718895233038, + "grad_norm": 1.3705243227438364, + "learning_rate": 9.079130979450068e-07, + "loss": 0.2505050301551819, + "step": 6555 + }, + { + "epoch": 1.7409374585048467, + "grad_norm": 1.3187608464438627, + "learning_rate": 9.060859201911732e-07, + "loss": 0.20445439219474792, + "step": 6556 + }, + { + "epoch": 1.7412030274863897, + "grad_norm": 1.1489822386745985, + "learning_rate": 9.042604956367218e-07, + "loss": 0.22338441014289856, + "step": 6557 + }, + { + "epoch": 1.7414685964679326, + "grad_norm": 1.2900464387857213, + "learning_rate": 9.024368246335735e-07, + "loss": 0.24923941493034363, + "step": 6558 + }, + { + "epoch": 1.7417341654494756, + "grad_norm": 1.3383952744906746, + "learning_rate": 9.006149075333071e-07, + "loss": 0.22842931747436523, + "step": 6559 + }, + { + "epoch": 1.7419997344310185, + "grad_norm": 1.391145524863548, + "learning_rate": 8.987947446871703e-07, + "loss": 0.22451579570770264, + "step": 6560 + }, + { + "epoch": 1.7422653034125615, + "grad_norm": 1.3218089225892669, + "learning_rate": 8.969763364460682e-07, + "loss": 0.2521047592163086, + "step": 6561 + }, + { + "epoch": 1.7425308723941044, + "grad_norm": 1.1675892500249985, + "learning_rate": 8.951596831605691e-07, + "loss": 0.25001099705696106, + "step": 6562 + }, + { + "epoch": 1.7427964413756474, + "grad_norm": 1.175521207104519, + "learning_rate": 8.933447851809007e-07, + "loss": 0.19592508673667908, + "step": 6563 + }, + { + "epoch": 1.7430620103571903, + "grad_norm": 1.399887131584603, + "learning_rate": 8.915316428569554e-07, + "loss": 0.2785179018974304, + "step": 6564 + }, + { + "epoch": 1.7433275793387333, + "grad_norm": 1.1688351316361159, + "learning_rate": 8.897202565382845e-07, + "loss": 0.20700594782829285, + "step": 6565 + }, + { + "epoch": 1.7435931483202762, + "grad_norm": 1.2225569857896341, + "learning_rate": 8.879106265741044e-07, + "loss": 0.253167062997818, + "step": 6566 + }, + { + "epoch": 1.7438587173018192, + "grad_norm": 1.4278912909015264, + "learning_rate": 8.861027533132859e-07, + "loss": 0.27672937512397766, + "step": 6567 + }, + { + "epoch": 1.7441242862833621, + "grad_norm": 1.3136368448280313, + "learning_rate": 8.842966371043671e-07, + "loss": 0.23050950467586517, + "step": 6568 + }, + { + "epoch": 1.744389855264905, + "grad_norm": 1.2790658189865058, + "learning_rate": 8.824922782955481e-07, + "loss": 0.23529425263404846, + "step": 6569 + }, + { + "epoch": 1.744655424246448, + "grad_norm": 1.2887213562899031, + "learning_rate": 8.806896772346873e-07, + "loss": 0.21803250908851624, + "step": 6570 + }, + { + "epoch": 1.744920993227991, + "grad_norm": 1.3669961004756481, + "learning_rate": 8.788888342693047e-07, + "loss": 0.24237293004989624, + "step": 6571 + }, + { + "epoch": 1.745186562209534, + "grad_norm": 1.1957319745445254, + "learning_rate": 8.770897497465803e-07, + "loss": 0.2008107602596283, + "step": 6572 + }, + { + "epoch": 1.745452131191077, + "grad_norm": 1.2693790937709173, + "learning_rate": 8.752924240133587e-07, + "loss": 0.23106279969215393, + "step": 6573 + }, + { + "epoch": 1.7457177001726198, + "grad_norm": 1.377716829660982, + "learning_rate": 8.734968574161406e-07, + "loss": 0.23726215958595276, + "step": 6574 + }, + { + "epoch": 1.7459832691541628, + "grad_norm": 1.211024095215965, + "learning_rate": 8.717030503010915e-07, + "loss": 0.26349812746047974, + "step": 6575 + }, + { + "epoch": 1.7462488381357057, + "grad_norm": 1.2871963140003055, + "learning_rate": 8.699110030140367e-07, + "loss": 0.23226451873779297, + "step": 6576 + }, + { + "epoch": 1.7465144071172487, + "grad_norm": 1.3173524718115384, + "learning_rate": 8.68120715900459e-07, + "loss": 0.22188402712345123, + "step": 6577 + }, + { + "epoch": 1.7467799760987917, + "grad_norm": 1.2367242455559135, + "learning_rate": 8.663321893055087e-07, + "loss": 0.21238234639167786, + "step": 6578 + }, + { + "epoch": 1.7470455450803346, + "grad_norm": 1.3423960800972676, + "learning_rate": 8.645454235739903e-07, + "loss": 0.2700675427913666, + "step": 6579 + }, + { + "epoch": 1.7473111140618776, + "grad_norm": 1.2737029023524005, + "learning_rate": 8.627604190503714e-07, + "loss": 0.24463894963264465, + "step": 6580 + }, + { + "epoch": 1.7475766830434205, + "grad_norm": 1.2537801110870739, + "learning_rate": 8.609771760787822e-07, + "loss": 0.23429079353809357, + "step": 6581 + }, + { + "epoch": 1.7478422520249635, + "grad_norm": 1.342775712878445, + "learning_rate": 8.591956950030067e-07, + "loss": 0.21767663955688477, + "step": 6582 + }, + { + "epoch": 1.7481078210065064, + "grad_norm": 1.3390334282971272, + "learning_rate": 8.574159761664957e-07, + "loss": 0.2499813735485077, + "step": 6583 + }, + { + "epoch": 1.7483733899880494, + "grad_norm": 1.471955255689367, + "learning_rate": 8.556380199123582e-07, + "loss": 0.28065958619117737, + "step": 6584 + }, + { + "epoch": 1.7486389589695923, + "grad_norm": 1.3012440070718, + "learning_rate": 8.538618265833621e-07, + "loss": 0.2166985273361206, + "step": 6585 + }, + { + "epoch": 1.7489045279511353, + "grad_norm": 1.2228700023368582, + "learning_rate": 8.520873965219356e-07, + "loss": 0.22835782170295715, + "step": 6586 + }, + { + "epoch": 1.7491700969326782, + "grad_norm": 1.2209097376008975, + "learning_rate": 8.503147300701709e-07, + "loss": 0.23575961589813232, + "step": 6587 + }, + { + "epoch": 1.7494356659142212, + "grad_norm": 1.1275514661567778, + "learning_rate": 8.485438275698154e-07, + "loss": 0.183369442820549, + "step": 6588 + }, + { + "epoch": 1.7497012348957641, + "grad_norm": 1.519810508178025, + "learning_rate": 8.467746893622786e-07, + "loss": 0.2731352746486664, + "step": 6589 + }, + { + "epoch": 1.749966803877307, + "grad_norm": 1.2913957246056922, + "learning_rate": 8.450073157886296e-07, + "loss": 0.20177578926086426, + "step": 6590 + }, + { + "epoch": 1.75023237285885, + "grad_norm": 1.2742798574628598, + "learning_rate": 8.432417071895982e-07, + "loss": 0.21672385931015015, + "step": 6591 + }, + { + "epoch": 1.750497941840393, + "grad_norm": 1.370933216008306, + "learning_rate": 8.414778639055699e-07, + "loss": 0.2503831386566162, + "step": 6592 + }, + { + "epoch": 1.750763510821936, + "grad_norm": 1.2884133202144494, + "learning_rate": 8.397157862765959e-07, + "loss": 0.2427521049976349, + "step": 6593 + }, + { + "epoch": 1.7510290798034789, + "grad_norm": 1.3424141731181953, + "learning_rate": 8.379554746423824e-07, + "loss": 0.23128533363342285, + "step": 6594 + }, + { + "epoch": 1.7512946487850218, + "grad_norm": 1.2353999110478557, + "learning_rate": 8.361969293422967e-07, + "loss": 0.2470957189798355, + "step": 6595 + }, + { + "epoch": 1.7515602177665648, + "grad_norm": 1.3335789710762707, + "learning_rate": 8.344401507153665e-07, + "loss": 0.29447510838508606, + "step": 6596 + }, + { + "epoch": 1.7518257867481077, + "grad_norm": 1.197223419032368, + "learning_rate": 8.326851391002777e-07, + "loss": 0.21585828065872192, + "step": 6597 + }, + { + "epoch": 1.7520913557296507, + "grad_norm": 1.2653558688292899, + "learning_rate": 8.30931894835375e-07, + "loss": 0.24081121385097504, + "step": 6598 + }, + { + "epoch": 1.7523569247111936, + "grad_norm": 1.3408805119391818, + "learning_rate": 8.291804182586638e-07, + "loss": 0.23052063584327698, + "step": 6599 + }, + { + "epoch": 1.7526224936927366, + "grad_norm": 1.2126901970374089, + "learning_rate": 8.274307097078093e-07, + "loss": 0.19008183479309082, + "step": 6600 + }, + { + "epoch": 1.7528880626742795, + "grad_norm": 1.3285441470167585, + "learning_rate": 8.25682769520132e-07, + "loss": 0.2632960379123688, + "step": 6601 + }, + { + "epoch": 1.7531536316558225, + "grad_norm": 1.4350439941988302, + "learning_rate": 8.239365980326175e-07, + "loss": 0.25958624482154846, + "step": 6602 + }, + { + "epoch": 1.7534192006373654, + "grad_norm": 1.304275360361708, + "learning_rate": 8.221921955819035e-07, + "loss": 0.22370605170726776, + "step": 6603 + }, + { + "epoch": 1.7536847696189084, + "grad_norm": 1.2385957043075924, + "learning_rate": 8.204495625042919e-07, + "loss": 0.22018703818321228, + "step": 6604 + }, + { + "epoch": 1.7539503386004514, + "grad_norm": 1.3626754196729718, + "learning_rate": 8.187086991357418e-07, + "loss": 0.26802191138267517, + "step": 6605 + }, + { + "epoch": 1.7542159075819943, + "grad_norm": 1.5313825040978437, + "learning_rate": 8.169696058118725e-07, + "loss": 0.21560518443584442, + "step": 6606 + }, + { + "epoch": 1.7544814765635373, + "grad_norm": 1.270508998157205, + "learning_rate": 8.152322828679593e-07, + "loss": 0.23222430050373077, + "step": 6607 + }, + { + "epoch": 1.7547470455450802, + "grad_norm": 1.1542994886817455, + "learning_rate": 8.134967306389374e-07, + "loss": 0.17638427019119263, + "step": 6608 + }, + { + "epoch": 1.7550126145266232, + "grad_norm": 1.3257823658984844, + "learning_rate": 8.117629494594015e-07, + "loss": 0.21539513766765594, + "step": 6609 + }, + { + "epoch": 1.7552781835081661, + "grad_norm": 1.3431199934216977, + "learning_rate": 8.100309396636031e-07, + "loss": 0.2265736162662506, + "step": 6610 + }, + { + "epoch": 1.755543752489709, + "grad_norm": 1.3478032961337874, + "learning_rate": 8.083007015854549e-07, + "loss": 0.2688787281513214, + "step": 6611 + }, + { + "epoch": 1.755809321471252, + "grad_norm": 1.3027271078273857, + "learning_rate": 8.065722355585249e-07, + "loss": 0.19756367802619934, + "step": 6612 + }, + { + "epoch": 1.756074890452795, + "grad_norm": 1.3749986253881121, + "learning_rate": 8.048455419160405e-07, + "loss": 0.19934290647506714, + "step": 6613 + }, + { + "epoch": 1.756340459434338, + "grad_norm": 1.5756000064179743, + "learning_rate": 8.031206209908904e-07, + "loss": 0.2523588538169861, + "step": 6614 + }, + { + "epoch": 1.7566060284158809, + "grad_norm": 1.2988900493114706, + "learning_rate": 8.01397473115616e-07, + "loss": 0.22825747728347778, + "step": 6615 + }, + { + "epoch": 1.7568715973974238, + "grad_norm": 1.3238944187902402, + "learning_rate": 7.996760986224228e-07, + "loss": 0.24525251984596252, + "step": 6616 + }, + { + "epoch": 1.7571371663789668, + "grad_norm": 1.366323962207031, + "learning_rate": 7.979564978431687e-07, + "loss": 0.21883559226989746, + "step": 6617 + }, + { + "epoch": 1.7574027353605097, + "grad_norm": 1.5827948860142422, + "learning_rate": 7.96238671109374e-07, + "loss": 0.2642098069190979, + "step": 6618 + }, + { + "epoch": 1.757668304342053, + "grad_norm": 1.3345016667633411, + "learning_rate": 7.945226187522159e-07, + "loss": 0.24094998836517334, + "step": 6619 + }, + { + "epoch": 1.7579338733235959, + "grad_norm": 1.2243450261876818, + "learning_rate": 7.928083411025278e-07, + "loss": 0.2225762903690338, + "step": 6620 + }, + { + "epoch": 1.7581994423051388, + "grad_norm": 1.2991544127435968, + "learning_rate": 7.910958384908041e-07, + "loss": 0.26722851395606995, + "step": 6621 + }, + { + "epoch": 1.7584650112866818, + "grad_norm": 1.3206157533666447, + "learning_rate": 7.893851112471907e-07, + "loss": 0.2176910787820816, + "step": 6622 + }, + { + "epoch": 1.7587305802682247, + "grad_norm": 1.3618122023344794, + "learning_rate": 7.876761597015003e-07, + "loss": 0.20261354744434357, + "step": 6623 + }, + { + "epoch": 1.7589961492497677, + "grad_norm": 1.1728416456458601, + "learning_rate": 7.859689841831975e-07, + "loss": 0.23314467072486877, + "step": 6624 + }, + { + "epoch": 1.7592617182313106, + "grad_norm": 1.3115277523344588, + "learning_rate": 7.842635850214054e-07, + "loss": 0.19854989647865295, + "step": 6625 + }, + { + "epoch": 1.7595272872128536, + "grad_norm": 1.2614486006783794, + "learning_rate": 7.825599625449043e-07, + "loss": 0.2422565519809723, + "step": 6626 + }, + { + "epoch": 1.7597928561943965, + "grad_norm": 1.342773057026848, + "learning_rate": 7.808581170821328e-07, + "loss": 0.27029529213905334, + "step": 6627 + }, + { + "epoch": 1.7600584251759395, + "grad_norm": 1.1918292148332001, + "learning_rate": 7.791580489611872e-07, + "loss": 0.23596832156181335, + "step": 6628 + }, + { + "epoch": 1.7603239941574824, + "grad_norm": 1.2062344481848934, + "learning_rate": 7.774597585098198e-07, + "loss": 0.218271404504776, + "step": 6629 + }, + { + "epoch": 1.7605895631390254, + "grad_norm": 1.3762692469809215, + "learning_rate": 7.75763246055441e-07, + "loss": 0.2551255226135254, + "step": 6630 + }, + { + "epoch": 1.7608551321205683, + "grad_norm": 1.3049962391533094, + "learning_rate": 7.740685119251179e-07, + "loss": 0.24410653114318848, + "step": 6631 + }, + { + "epoch": 1.7611207011021113, + "grad_norm": 1.2577276419448338, + "learning_rate": 7.723755564455771e-07, + "loss": 0.23044872283935547, + "step": 6632 + }, + { + "epoch": 1.7613862700836542, + "grad_norm": 1.334208934461724, + "learning_rate": 7.706843799431985e-07, + "loss": 0.24569427967071533, + "step": 6633 + }, + { + "epoch": 1.7616518390651972, + "grad_norm": 1.1605227177029394, + "learning_rate": 7.689949827440224e-07, + "loss": 0.200277179479599, + "step": 6634 + }, + { + "epoch": 1.7619174080467401, + "grad_norm": 1.1742759165978003, + "learning_rate": 7.673073651737428e-07, + "loss": 0.19217821955680847, + "step": 6635 + }, + { + "epoch": 1.762182977028283, + "grad_norm": 1.281151649074766, + "learning_rate": 7.656215275577151e-07, + "loss": 0.227005273103714, + "step": 6636 + }, + { + "epoch": 1.762448546009826, + "grad_norm": 1.2211778988331632, + "learning_rate": 7.639374702209468e-07, + "loss": 0.21359863877296448, + "step": 6637 + }, + { + "epoch": 1.762714114991369, + "grad_norm": 1.267969218396632, + "learning_rate": 7.62255193488105e-07, + "loss": 0.24056711792945862, + "step": 6638 + }, + { + "epoch": 1.762979683972912, + "grad_norm": 1.28035138481303, + "learning_rate": 7.605746976835127e-07, + "loss": 0.20897413790225983, + "step": 6639 + }, + { + "epoch": 1.763245252954455, + "grad_norm": 1.2567764889990254, + "learning_rate": 7.588959831311493e-07, + "loss": 0.20395967364311218, + "step": 6640 + }, + { + "epoch": 1.7635108219359978, + "grad_norm": 1.4827108993688454, + "learning_rate": 7.572190501546517e-07, + "loss": 0.2334095984697342, + "step": 6641 + }, + { + "epoch": 1.7637763909175408, + "grad_norm": 1.3358734576215814, + "learning_rate": 7.555438990773134e-07, + "loss": 0.23892858624458313, + "step": 6642 + }, + { + "epoch": 1.7640419598990837, + "grad_norm": 1.3063666339869877, + "learning_rate": 7.538705302220839e-07, + "loss": 0.23515449464321136, + "step": 6643 + }, + { + "epoch": 1.7643075288806267, + "grad_norm": 1.1919354046726482, + "learning_rate": 7.521989439115674e-07, + "loss": 0.19728611409664154, + "step": 6644 + }, + { + "epoch": 1.7645730978621696, + "grad_norm": 1.2609989060636697, + "learning_rate": 7.505291404680281e-07, + "loss": 0.22277355194091797, + "step": 6645 + }, + { + "epoch": 1.7648386668437126, + "grad_norm": 1.2129119488866849, + "learning_rate": 7.488611202133822e-07, + "loss": 0.24117602407932281, + "step": 6646 + }, + { + "epoch": 1.7651042358252558, + "grad_norm": 1.3643314179100876, + "learning_rate": 7.471948834692045e-07, + "loss": 0.24675750732421875, + "step": 6647 + }, + { + "epoch": 1.7653698048067987, + "grad_norm": 1.3261352525807495, + "learning_rate": 7.455304305567279e-07, + "loss": 0.2413899004459381, + "step": 6648 + }, + { + "epoch": 1.7656353737883417, + "grad_norm": 1.3357210816225529, + "learning_rate": 7.438677617968348e-07, + "loss": 0.22125428915023804, + "step": 6649 + }, + { + "epoch": 1.7659009427698846, + "grad_norm": 1.2099689083776513, + "learning_rate": 7.422068775100732e-07, + "loss": 0.205051988363266, + "step": 6650 + }, + { + "epoch": 1.7661665117514276, + "grad_norm": 1.2734255069971199, + "learning_rate": 7.405477780166415e-07, + "loss": 0.23711715638637543, + "step": 6651 + }, + { + "epoch": 1.7664320807329705, + "grad_norm": 1.4063590395204508, + "learning_rate": 7.388904636363914e-07, + "loss": 0.2591046988964081, + "step": 6652 + }, + { + "epoch": 1.7666976497145135, + "grad_norm": 1.4323150626725398, + "learning_rate": 7.372349346888363e-07, + "loss": 0.24837243556976318, + "step": 6653 + }, + { + "epoch": 1.7669632186960564, + "grad_norm": 1.1492996795155954, + "learning_rate": 7.35581191493141e-07, + "loss": 0.20910412073135376, + "step": 6654 + }, + { + "epoch": 1.7672287876775994, + "grad_norm": 1.113119722429438, + "learning_rate": 7.339292343681282e-07, + "loss": 0.2056204229593277, + "step": 6655 + }, + { + "epoch": 1.7674943566591423, + "grad_norm": 1.2927092177897141, + "learning_rate": 7.322790636322764e-07, + "loss": 0.2496742308139801, + "step": 6656 + }, + { + "epoch": 1.7677599256406853, + "grad_norm": 1.3571185149739835, + "learning_rate": 7.306306796037188e-07, + "loss": 0.24432921409606934, + "step": 6657 + }, + { + "epoch": 1.7680254946222282, + "grad_norm": 1.3006085174415165, + "learning_rate": 7.289840826002414e-07, + "loss": 0.2492775321006775, + "step": 6658 + }, + { + "epoch": 1.7682910636037712, + "grad_norm": 1.3256617876861967, + "learning_rate": 7.273392729392936e-07, + "loss": 0.22673827409744263, + "step": 6659 + }, + { + "epoch": 1.7685566325853141, + "grad_norm": 1.3730978211523115, + "learning_rate": 7.25696250937975e-07, + "loss": 0.2225622981786728, + "step": 6660 + }, + { + "epoch": 1.768822201566857, + "grad_norm": 1.2296766172450786, + "learning_rate": 7.240550169130378e-07, + "loss": 0.24896883964538574, + "step": 6661 + }, + { + "epoch": 1.7690877705484, + "grad_norm": 1.2103035123370711, + "learning_rate": 7.224155711808923e-07, + "loss": 0.2395302951335907, + "step": 6662 + }, + { + "epoch": 1.769353339529943, + "grad_norm": 1.2658162555194572, + "learning_rate": 7.207779140576066e-07, + "loss": 0.2255886197090149, + "step": 6663 + }, + { + "epoch": 1.769618908511486, + "grad_norm": 1.2518907529925698, + "learning_rate": 7.191420458589005e-07, + "loss": 0.24029678106307983, + "step": 6664 + }, + { + "epoch": 1.769884477493029, + "grad_norm": 1.1016484922093457, + "learning_rate": 7.175079669001506e-07, + "loss": 0.19399142265319824, + "step": 6665 + }, + { + "epoch": 1.7701500464745719, + "grad_norm": 1.2291425924678119, + "learning_rate": 7.158756774963882e-07, + "loss": 0.24569162726402283, + "step": 6666 + }, + { + "epoch": 1.7704156154561148, + "grad_norm": 1.2180012837263907, + "learning_rate": 7.142451779622971e-07, + "loss": 0.2484329342842102, + "step": 6667 + }, + { + "epoch": 1.7706811844376578, + "grad_norm": 1.2505833357389051, + "learning_rate": 7.126164686122216e-07, + "loss": 0.24423512816429138, + "step": 6668 + }, + { + "epoch": 1.7709467534192007, + "grad_norm": 1.1277554918017485, + "learning_rate": 7.109895497601571e-07, + "loss": 0.20146678388118744, + "step": 6669 + }, + { + "epoch": 1.7712123224007437, + "grad_norm": 1.2945002187740315, + "learning_rate": 7.093644217197526e-07, + "loss": 0.23329001665115356, + "step": 6670 + }, + { + "epoch": 1.7714778913822866, + "grad_norm": 1.1689758736288713, + "learning_rate": 7.077410848043165e-07, + "loss": 0.2290019690990448, + "step": 6671 + }, + { + "epoch": 1.7717434603638296, + "grad_norm": 1.2744441159542537, + "learning_rate": 7.061195393268061e-07, + "loss": 0.2329377382993698, + "step": 6672 + }, + { + "epoch": 1.7720090293453725, + "grad_norm": 1.1430677052322078, + "learning_rate": 7.04499785599837e-07, + "loss": 0.21513575315475464, + "step": 6673 + }, + { + "epoch": 1.7722745983269155, + "grad_norm": 1.1659646021132744, + "learning_rate": 7.028818239356794e-07, + "loss": 0.19022463262081146, + "step": 6674 + }, + { + "epoch": 1.7725401673084584, + "grad_norm": 1.2837523861206293, + "learning_rate": 7.012656546462571e-07, + "loss": 0.2097887396812439, + "step": 6675 + }, + { + "epoch": 1.7728057362900014, + "grad_norm": 1.3991640357566577, + "learning_rate": 6.996512780431486e-07, + "loss": 0.2559792101383209, + "step": 6676 + }, + { + "epoch": 1.7730713052715443, + "grad_norm": 1.3219531410357084, + "learning_rate": 6.980386944375849e-07, + "loss": 0.24624274671077728, + "step": 6677 + }, + { + "epoch": 1.7733368742530873, + "grad_norm": 1.2405076465604956, + "learning_rate": 6.964279041404553e-07, + "loss": 0.22904372215270996, + "step": 6678 + }, + { + "epoch": 1.7736024432346302, + "grad_norm": 1.216707646052236, + "learning_rate": 6.948189074623002e-07, + "loss": 0.20808623731136322, + "step": 6679 + }, + { + "epoch": 1.7738680122161732, + "grad_norm": 1.229477200185015, + "learning_rate": 6.932117047133158e-07, + "loss": 0.1931435763835907, + "step": 6680 + }, + { + "epoch": 1.7741335811977161, + "grad_norm": 1.2962984681963328, + "learning_rate": 6.91606296203351e-07, + "loss": 0.22938531637191772, + "step": 6681 + }, + { + "epoch": 1.774399150179259, + "grad_norm": 1.2921857742770726, + "learning_rate": 6.900026822419103e-07, + "loss": 0.240365132689476, + "step": 6682 + }, + { + "epoch": 1.774664719160802, + "grad_norm": 1.3560359754116593, + "learning_rate": 6.8840086313815e-07, + "loss": 0.26665499806404114, + "step": 6683 + }, + { + "epoch": 1.774930288142345, + "grad_norm": 1.1827095382370005, + "learning_rate": 6.86800839200884e-07, + "loss": 0.19775834679603577, + "step": 6684 + }, + { + "epoch": 1.775195857123888, + "grad_norm": 1.2698613362606737, + "learning_rate": 6.852026107385756e-07, + "loss": 0.20334021747112274, + "step": 6685 + }, + { + "epoch": 1.775461426105431, + "grad_norm": 1.1845529296493982, + "learning_rate": 6.836061780593484e-07, + "loss": 0.20670340955257416, + "step": 6686 + }, + { + "epoch": 1.7757269950869738, + "grad_norm": 1.2940248868651125, + "learning_rate": 6.820115414709727e-07, + "loss": 0.2033209353685379, + "step": 6687 + }, + { + "epoch": 1.7759925640685168, + "grad_norm": 1.101442360403221, + "learning_rate": 6.804187012808761e-07, + "loss": 0.23827815055847168, + "step": 6688 + }, + { + "epoch": 1.7762581330500598, + "grad_norm": 1.200357834005043, + "learning_rate": 6.788276577961394e-07, + "loss": 0.2054731547832489, + "step": 6689 + }, + { + "epoch": 1.7765237020316027, + "grad_norm": 1.3006753644657554, + "learning_rate": 6.772384113234987e-07, + "loss": 0.25553691387176514, + "step": 6690 + }, + { + "epoch": 1.7767892710131457, + "grad_norm": 1.2800516387465457, + "learning_rate": 6.756509621693385e-07, + "loss": 0.23650874197483063, + "step": 6691 + }, + { + "epoch": 1.7770548399946886, + "grad_norm": 1.2987358367196533, + "learning_rate": 6.740653106397033e-07, + "loss": 0.2353624701499939, + "step": 6692 + }, + { + "epoch": 1.7773204089762316, + "grad_norm": 1.3578478166739052, + "learning_rate": 6.724814570402871e-07, + "loss": 0.26034629344940186, + "step": 6693 + }, + { + "epoch": 1.7775859779577745, + "grad_norm": 1.2070636800070726, + "learning_rate": 6.70899401676438e-07, + "loss": 0.2272130399942398, + "step": 6694 + }, + { + "epoch": 1.7778515469393175, + "grad_norm": 1.353295285146214, + "learning_rate": 6.693191448531589e-07, + "loss": 0.27940404415130615, + "step": 6695 + }, + { + "epoch": 1.7781171159208604, + "grad_norm": 1.2726244327901954, + "learning_rate": 6.677406868751013e-07, + "loss": 0.22997702658176422, + "step": 6696 + }, + { + "epoch": 1.7783826849024034, + "grad_norm": 1.2569026906720413, + "learning_rate": 6.661640280465775e-07, + "loss": 0.22918452322483063, + "step": 6697 + }, + { + "epoch": 1.7786482538839463, + "grad_norm": 1.2456580683228033, + "learning_rate": 6.645891686715456e-07, + "loss": 0.18456090986728668, + "step": 6698 + }, + { + "epoch": 1.7789138228654893, + "grad_norm": 1.3290472252808803, + "learning_rate": 6.630161090536214e-07, + "loss": 0.23256534337997437, + "step": 6699 + }, + { + "epoch": 1.7791793918470322, + "grad_norm": 1.2224316750050632, + "learning_rate": 6.614448494960713e-07, + "loss": 0.21171879768371582, + "step": 6700 + }, + { + "epoch": 1.7794449608285752, + "grad_norm": 1.201224789246079, + "learning_rate": 6.598753903018163e-07, + "loss": 0.21382400393486023, + "step": 6701 + }, + { + "epoch": 1.7797105298101181, + "grad_norm": 1.2240177347792593, + "learning_rate": 6.583077317734299e-07, + "loss": 0.22954748570919037, + "step": 6702 + }, + { + "epoch": 1.779976098791661, + "grad_norm": 1.519530195710278, + "learning_rate": 6.56741874213136e-07, + "loss": 0.25691086053848267, + "step": 6703 + }, + { + "epoch": 1.780241667773204, + "grad_norm": 1.4662002194098382, + "learning_rate": 6.551778179228174e-07, + "loss": 0.23413901031017303, + "step": 6704 + }, + { + "epoch": 1.780507236754747, + "grad_norm": 1.2775019242293946, + "learning_rate": 6.536155632040031e-07, + "loss": 0.2493733912706375, + "step": 6705 + }, + { + "epoch": 1.78077280573629, + "grad_norm": 1.2512747936457356, + "learning_rate": 6.520551103578776e-07, + "loss": 0.26094138622283936, + "step": 6706 + }, + { + "epoch": 1.7810383747178329, + "grad_norm": 1.3016608765448805, + "learning_rate": 6.504964596852781e-07, + "loss": 0.23509518802165985, + "step": 6707 + }, + { + "epoch": 1.7813039436993758, + "grad_norm": 1.4726929969063267, + "learning_rate": 6.489396114866942e-07, + "loss": 0.2471122294664383, + "step": 6708 + }, + { + "epoch": 1.7815695126809188, + "grad_norm": 1.3034668854019054, + "learning_rate": 6.47384566062268e-07, + "loss": 0.2363303005695343, + "step": 6709 + }, + { + "epoch": 1.7818350816624617, + "grad_norm": 1.1801501968168786, + "learning_rate": 6.458313237117953e-07, + "loss": 0.18868233263492584, + "step": 6710 + }, + { + "epoch": 1.7821006506440047, + "grad_norm": 1.3437880175802723, + "learning_rate": 6.442798847347187e-07, + "loss": 0.23380546271800995, + "step": 6711 + }, + { + "epoch": 1.7823662196255476, + "grad_norm": 1.471740030592424, + "learning_rate": 6.42730249430139e-07, + "loss": 0.24112167954444885, + "step": 6712 + }, + { + "epoch": 1.7826317886070906, + "grad_norm": 1.2664184946697812, + "learning_rate": 6.411824180968096e-07, + "loss": 0.2397521436214447, + "step": 6713 + }, + { + "epoch": 1.7828973575886335, + "grad_norm": 1.309174308390434, + "learning_rate": 6.396363910331338e-07, + "loss": 0.23775406181812286, + "step": 6714 + }, + { + "epoch": 1.7831629265701765, + "grad_norm": 1.4327166340451307, + "learning_rate": 6.380921685371655e-07, + "loss": 0.23278602957725525, + "step": 6715 + }, + { + "epoch": 1.7834284955517195, + "grad_norm": 1.1135605228940266, + "learning_rate": 6.365497509066143e-07, + "loss": 0.20028996467590332, + "step": 6716 + }, + { + "epoch": 1.7836940645332624, + "grad_norm": 1.146963533940078, + "learning_rate": 6.35009138438839e-07, + "loss": 0.20862875878810883, + "step": 6717 + }, + { + "epoch": 1.7839596335148054, + "grad_norm": 1.3257848293601993, + "learning_rate": 6.334703314308521e-07, + "loss": 0.23522542417049408, + "step": 6718 + }, + { + "epoch": 1.7842252024963483, + "grad_norm": 1.2172150430538355, + "learning_rate": 6.319333301793173e-07, + "loss": 0.24633824825286865, + "step": 6719 + }, + { + "epoch": 1.7844907714778913, + "grad_norm": 1.3131451310460658, + "learning_rate": 6.30398134980551e-07, + "loss": 0.22141410410404205, + "step": 6720 + }, + { + "epoch": 1.7847563404594342, + "grad_norm": 1.3593079444355614, + "learning_rate": 6.288647461305186e-07, + "loss": 0.23313754796981812, + "step": 6721 + }, + { + "epoch": 1.7850219094409772, + "grad_norm": 1.2751593889081192, + "learning_rate": 6.273331639248414e-07, + "loss": 0.22015389800071716, + "step": 6722 + }, + { + "epoch": 1.7852874784225201, + "grad_norm": 1.2716859790694561, + "learning_rate": 6.258033886587911e-07, + "loss": 0.21154522895812988, + "step": 6723 + }, + { + "epoch": 1.785553047404063, + "grad_norm": 1.3319130935282857, + "learning_rate": 6.242754206272883e-07, + "loss": 0.2320503294467926, + "step": 6724 + }, + { + "epoch": 1.785818616385606, + "grad_norm": 1.2016740259413836, + "learning_rate": 6.227492601249097e-07, + "loss": 0.21778921782970428, + "step": 6725 + }, + { + "epoch": 1.786084185367149, + "grad_norm": 1.2321504813505204, + "learning_rate": 6.212249074458776e-07, + "loss": 0.2368871569633484, + "step": 6726 + }, + { + "epoch": 1.786349754348692, + "grad_norm": 1.5195368545073897, + "learning_rate": 6.197023628840704e-07, + "loss": 0.27269479632377625, + "step": 6727 + }, + { + "epoch": 1.7866153233302349, + "grad_norm": 1.2744130185555103, + "learning_rate": 6.181816267330177e-07, + "loss": 0.2414151132106781, + "step": 6728 + }, + { + "epoch": 1.7868808923117778, + "grad_norm": 1.1197825562175172, + "learning_rate": 6.166626992858993e-07, + "loss": 0.2156972736120224, + "step": 6729 + }, + { + "epoch": 1.7871464612933208, + "grad_norm": 1.2748992996552195, + "learning_rate": 6.151455808355455e-07, + "loss": 0.2510441541671753, + "step": 6730 + }, + { + "epoch": 1.787412030274864, + "grad_norm": 1.2924509412618195, + "learning_rate": 6.136302716744402e-07, + "loss": 0.20290088653564453, + "step": 6731 + }, + { + "epoch": 1.787677599256407, + "grad_norm": 1.3705736121123597, + "learning_rate": 6.121167720947174e-07, + "loss": 0.25088101625442505, + "step": 6732 + }, + { + "epoch": 1.7879431682379499, + "grad_norm": 1.3723338572382136, + "learning_rate": 6.106050823881604e-07, + "loss": 0.2566376328468323, + "step": 6733 + }, + { + "epoch": 1.7882087372194928, + "grad_norm": 1.1043772478174716, + "learning_rate": 6.09095202846206e-07, + "loss": 0.1882714033126831, + "step": 6734 + }, + { + "epoch": 1.7884743062010358, + "grad_norm": 1.2323780172305254, + "learning_rate": 6.075871337599404e-07, + "loss": 0.18705856800079346, + "step": 6735 + }, + { + "epoch": 1.7887398751825787, + "grad_norm": 1.1976910574931858, + "learning_rate": 6.060808754201031e-07, + "loss": 0.24756133556365967, + "step": 6736 + }, + { + "epoch": 1.7890054441641217, + "grad_norm": 1.3197777974144425, + "learning_rate": 6.045764281170818e-07, + "loss": 0.2537599205970764, + "step": 6737 + }, + { + "epoch": 1.7892710131456646, + "grad_norm": 1.330362234255321, + "learning_rate": 6.030737921409169e-07, + "loss": 0.22049202024936676, + "step": 6738 + }, + { + "epoch": 1.7895365821272076, + "grad_norm": 1.1222347914068396, + "learning_rate": 6.015729677812965e-07, + "loss": 0.20820394158363342, + "step": 6739 + }, + { + "epoch": 1.7898021511087505, + "grad_norm": 1.3153590716408405, + "learning_rate": 6.00073955327567e-07, + "loss": 0.2339879721403122, + "step": 6740 + }, + { + "epoch": 1.7900677200902935, + "grad_norm": 1.2483259153993207, + "learning_rate": 5.98576755068715e-07, + "loss": 0.22082161903381348, + "step": 6741 + }, + { + "epoch": 1.7903332890718364, + "grad_norm": 1.28162605766883, + "learning_rate": 5.97081367293385e-07, + "loss": 0.21883058547973633, + "step": 6742 + }, + { + "epoch": 1.7905988580533794, + "grad_norm": 1.1591166092235485, + "learning_rate": 5.955877922898712e-07, + "loss": 0.214680016040802, + "step": 6743 + }, + { + "epoch": 1.7908644270349223, + "grad_norm": 1.37628370977899, + "learning_rate": 5.940960303461152e-07, + "loss": 0.24533744156360626, + "step": 6744 + }, + { + "epoch": 1.7911299960164653, + "grad_norm": 1.3046535737377691, + "learning_rate": 5.926060817497137e-07, + "loss": 0.19857585430145264, + "step": 6745 + }, + { + "epoch": 1.7913955649980082, + "grad_norm": 1.4468975368000232, + "learning_rate": 5.911179467879081e-07, + "loss": 0.27493876218795776, + "step": 6746 + }, + { + "epoch": 1.7916611339795512, + "grad_norm": 1.1490145590407708, + "learning_rate": 5.896316257475954e-07, + "loss": 0.20560544729232788, + "step": 6747 + }, + { + "epoch": 1.7919267029610941, + "grad_norm": 1.2213631424870741, + "learning_rate": 5.881471189153199e-07, + "loss": 0.23559418320655823, + "step": 6748 + }, + { + "epoch": 1.792192271942637, + "grad_norm": 1.3144055462601232, + "learning_rate": 5.866644265772769e-07, + "loss": 0.23055103421211243, + "step": 6749 + }, + { + "epoch": 1.79245784092418, + "grad_norm": 1.4747052812755685, + "learning_rate": 5.851835490193136e-07, + "loss": 0.2780724763870239, + "step": 6750 + }, + { + "epoch": 1.792723409905723, + "grad_norm": 1.2354333862915858, + "learning_rate": 5.837044865269248e-07, + "loss": 0.20216618478298187, + "step": 6751 + }, + { + "epoch": 1.792988978887266, + "grad_norm": 1.308066661539038, + "learning_rate": 5.822272393852557e-07, + "loss": 0.2289930284023285, + "step": 6752 + }, + { + "epoch": 1.793254547868809, + "grad_norm": 1.2952454297764495, + "learning_rate": 5.80751807879103e-07, + "loss": 0.2028929740190506, + "step": 6753 + }, + { + "epoch": 1.7935201168503518, + "grad_norm": 1.2960791997009702, + "learning_rate": 5.792781922929114e-07, + "loss": 0.1964842826128006, + "step": 6754 + }, + { + "epoch": 1.7937856858318948, + "grad_norm": 1.4512315838061285, + "learning_rate": 5.77806392910778e-07, + "loss": 0.2617039084434509, + "step": 6755 + }, + { + "epoch": 1.7940512548134377, + "grad_norm": 1.325466585449178, + "learning_rate": 5.76336410016447e-07, + "loss": 0.2582395374774933, + "step": 6756 + }, + { + "epoch": 1.7943168237949807, + "grad_norm": 1.2587701407069858, + "learning_rate": 5.74868243893314e-07, + "loss": 0.23379334807395935, + "step": 6757 + }, + { + "epoch": 1.7945823927765236, + "grad_norm": 1.2979435124807637, + "learning_rate": 5.734018948244247e-07, + "loss": 0.2376977801322937, + "step": 6758 + }, + { + "epoch": 1.7948479617580668, + "grad_norm": 1.414785341098569, + "learning_rate": 5.719373630924741e-07, + "loss": 0.21816037595272064, + "step": 6759 + }, + { + "epoch": 1.7951135307396098, + "grad_norm": 1.1404163081963787, + "learning_rate": 5.704746489798063e-07, + "loss": 0.22156387567520142, + "step": 6760 + }, + { + "epoch": 1.7953790997211527, + "grad_norm": 1.195358056085369, + "learning_rate": 5.690137527684147e-07, + "loss": 0.20818129181861877, + "step": 6761 + }, + { + "epoch": 1.7956446687026957, + "grad_norm": 1.1501993150491747, + "learning_rate": 5.67554674739944e-07, + "loss": 0.18672943115234375, + "step": 6762 + }, + { + "epoch": 1.7959102376842386, + "grad_norm": 1.2143392515173568, + "learning_rate": 5.66097415175686e-07, + "loss": 0.2023036777973175, + "step": 6763 + }, + { + "epoch": 1.7961758066657816, + "grad_norm": 1.3551091626165586, + "learning_rate": 5.646419743565845e-07, + "loss": 0.24798424541950226, + "step": 6764 + }, + { + "epoch": 1.7964413756473245, + "grad_norm": 1.2034553304236573, + "learning_rate": 5.631883525632297e-07, + "loss": 0.1885790377855301, + "step": 6765 + }, + { + "epoch": 1.7967069446288675, + "grad_norm": 1.3693229184747842, + "learning_rate": 5.617365500758631e-07, + "loss": 0.24120381474494934, + "step": 6766 + }, + { + "epoch": 1.7969725136104104, + "grad_norm": 1.2063823939207, + "learning_rate": 5.602865671743763e-07, + "loss": 0.24238690733909607, + "step": 6767 + }, + { + "epoch": 1.7972380825919534, + "grad_norm": 1.2611645650605894, + "learning_rate": 5.588384041383089e-07, + "loss": 0.22928190231323242, + "step": 6768 + }, + { + "epoch": 1.7975036515734963, + "grad_norm": 1.3148280979127052, + "learning_rate": 5.573920612468486e-07, + "loss": 0.2464730143547058, + "step": 6769 + }, + { + "epoch": 1.7977692205550393, + "grad_norm": 1.149985298163883, + "learning_rate": 5.559475387788348e-07, + "loss": 0.2167670875787735, + "step": 6770 + }, + { + "epoch": 1.7980347895365822, + "grad_norm": 1.3365719233561757, + "learning_rate": 5.545048370127526e-07, + "loss": 0.24080663919448853, + "step": 6771 + }, + { + "epoch": 1.7983003585181252, + "grad_norm": 1.3571891328346308, + "learning_rate": 5.530639562267382e-07, + "loss": 0.25481417775154114, + "step": 6772 + }, + { + "epoch": 1.7985659274996681, + "grad_norm": 1.3525822075957274, + "learning_rate": 5.51624896698576e-07, + "loss": 0.23328909277915955, + "step": 6773 + }, + { + "epoch": 1.798831496481211, + "grad_norm": 1.136424514008492, + "learning_rate": 5.50187658705702e-07, + "loss": 0.18779747188091278, + "step": 6774 + }, + { + "epoch": 1.799097065462754, + "grad_norm": 1.3089016035676113, + "learning_rate": 5.487522425251968e-07, + "loss": 0.24840545654296875, + "step": 6775 + }, + { + "epoch": 1.799362634444297, + "grad_norm": 1.4658187281761286, + "learning_rate": 5.473186484337911e-07, + "loss": 0.2559642791748047, + "step": 6776 + }, + { + "epoch": 1.79962820342584, + "grad_norm": 1.3714243263968933, + "learning_rate": 5.458868767078673e-07, + "loss": 0.2005981206893921, + "step": 6777 + }, + { + "epoch": 1.799893772407383, + "grad_norm": 1.4085177100377464, + "learning_rate": 5.444569276234523e-07, + "loss": 0.2480883002281189, + "step": 6778 + }, + { + "epoch": 1.8001593413889259, + "grad_norm": 1.2203856732153913, + "learning_rate": 5.430288014562235e-07, + "loss": 0.23043295741081238, + "step": 6779 + }, + { + "epoch": 1.8004249103704688, + "grad_norm": 1.4245462518797845, + "learning_rate": 5.416024984815072e-07, + "loss": 0.22702521085739136, + "step": 6780 + }, + { + "epoch": 1.8006904793520118, + "grad_norm": 1.153610007644359, + "learning_rate": 5.401780189742789e-07, + "loss": 0.19955751299858093, + "step": 6781 + }, + { + "epoch": 1.8009560483335547, + "grad_norm": 1.2560139759300732, + "learning_rate": 5.387553632091591e-07, + "loss": 0.19743162393569946, + "step": 6782 + }, + { + "epoch": 1.8012216173150977, + "grad_norm": 1.3072968250539403, + "learning_rate": 5.373345314604206e-07, + "loss": 0.2262525111436844, + "step": 6783 + }, + { + "epoch": 1.8014871862966406, + "grad_norm": 1.2987858405959638, + "learning_rate": 5.359155240019809e-07, + "loss": 0.249632328748703, + "step": 6784 + }, + { + "epoch": 1.8017527552781836, + "grad_norm": 1.1804135507002813, + "learning_rate": 5.344983411074111e-07, + "loss": 0.19300231337547302, + "step": 6785 + }, + { + "epoch": 1.8020183242597265, + "grad_norm": 1.293291337799575, + "learning_rate": 5.330829830499263e-07, + "loss": 0.22256134450435638, + "step": 6786 + }, + { + "epoch": 1.8022838932412695, + "grad_norm": 1.283065855572867, + "learning_rate": 5.316694501023911e-07, + "loss": 0.2666356563568115, + "step": 6787 + }, + { + "epoch": 1.8025494622228124, + "grad_norm": 1.239663996945653, + "learning_rate": 5.302577425373156e-07, + "loss": 0.223050057888031, + "step": 6788 + }, + { + "epoch": 1.8028150312043554, + "grad_norm": 1.3011452698852823, + "learning_rate": 5.288478606268632e-07, + "loss": 0.2298094481229782, + "step": 6789 + }, + { + "epoch": 1.8030806001858983, + "grad_norm": 1.4761708863150307, + "learning_rate": 5.27439804642843e-07, + "loss": 0.23596417903900146, + "step": 6790 + }, + { + "epoch": 1.8033461691674413, + "grad_norm": 1.226229776793909, + "learning_rate": 5.26033574856708e-07, + "loss": 0.19501623511314392, + "step": 6791 + }, + { + "epoch": 1.8036117381489842, + "grad_norm": 1.2825838070785722, + "learning_rate": 5.246291715395657e-07, + "loss": 0.23518472909927368, + "step": 6792 + }, + { + "epoch": 1.8038773071305272, + "grad_norm": 1.1820374841237484, + "learning_rate": 5.232265949621651e-07, + "loss": 0.2251899093389511, + "step": 6793 + }, + { + "epoch": 1.8041428761120701, + "grad_norm": 1.1527654541489951, + "learning_rate": 5.218258453949099e-07, + "loss": 0.1764119267463684, + "step": 6794 + }, + { + "epoch": 1.804408445093613, + "grad_norm": 1.2895741356204065, + "learning_rate": 5.204269231078484e-07, + "loss": 0.20768773555755615, + "step": 6795 + }, + { + "epoch": 1.804674014075156, + "grad_norm": 1.3841780370828203, + "learning_rate": 5.19029828370674e-07, + "loss": 0.2115546613931656, + "step": 6796 + }, + { + "epoch": 1.804939583056699, + "grad_norm": 1.315680847185169, + "learning_rate": 5.176345614527312e-07, + "loss": 0.2465972602367401, + "step": 6797 + }, + { + "epoch": 1.805205152038242, + "grad_norm": 1.379203464130328, + "learning_rate": 5.162411226230102e-07, + "loss": 0.2359803020954132, + "step": 6798 + }, + { + "epoch": 1.805470721019785, + "grad_norm": 1.4106819634653143, + "learning_rate": 5.148495121501506e-07, + "loss": 0.27518990635871887, + "step": 6799 + }, + { + "epoch": 1.8057362900013278, + "grad_norm": 1.3653410113402416, + "learning_rate": 5.134597303024391e-07, + "loss": 0.23914849758148193, + "step": 6800 + }, + { + "epoch": 1.8060018589828708, + "grad_norm": 1.256847668479307, + "learning_rate": 5.120717773478068e-07, + "loss": 0.21771098673343658, + "step": 6801 + }, + { + "epoch": 1.8062674279644138, + "grad_norm": 1.2716100664289411, + "learning_rate": 5.106856535538363e-07, + "loss": 0.235421285033226, + "step": 6802 + }, + { + "epoch": 1.8065329969459567, + "grad_norm": 1.4167241401735549, + "learning_rate": 5.093013591877561e-07, + "loss": 0.23973548412322998, + "step": 6803 + }, + { + "epoch": 1.8067985659274997, + "grad_norm": 1.484886222602596, + "learning_rate": 5.079188945164426e-07, + "loss": 0.24059349298477173, + "step": 6804 + }, + { + "epoch": 1.8070641349090426, + "grad_norm": 1.3840991454067133, + "learning_rate": 5.065382598064161e-07, + "loss": 0.25188207626342773, + "step": 6805 + }, + { + "epoch": 1.8073297038905856, + "grad_norm": 1.1866308474402574, + "learning_rate": 5.051594553238482e-07, + "loss": 0.20124536752700806, + "step": 6806 + }, + { + "epoch": 1.8075952728721285, + "grad_norm": 1.2234769875088154, + "learning_rate": 5.037824813345571e-07, + "loss": 0.2059330940246582, + "step": 6807 + }, + { + "epoch": 1.8078608418536715, + "grad_norm": 1.2468279665046458, + "learning_rate": 5.024073381040052e-07, + "loss": 0.2122621238231659, + "step": 6808 + }, + { + "epoch": 1.8081264108352144, + "grad_norm": 1.2203093249465347, + "learning_rate": 5.010340258973046e-07, + "loss": 0.20064303278923035, + "step": 6809 + }, + { + "epoch": 1.8083919798167574, + "grad_norm": 1.3685187895509534, + "learning_rate": 4.996625449792147e-07, + "loss": 0.24773281812667847, + "step": 6810 + }, + { + "epoch": 1.8086575487983003, + "grad_norm": 1.149837064877599, + "learning_rate": 4.982928956141375e-07, + "loss": 0.2111661732196808, + "step": 6811 + }, + { + "epoch": 1.8089231177798433, + "grad_norm": 1.2721912706796665, + "learning_rate": 4.969250780661306e-07, + "loss": 0.24823394417762756, + "step": 6812 + }, + { + "epoch": 1.8091886867613862, + "grad_norm": 1.410632443971984, + "learning_rate": 4.955590925988896e-07, + "loss": 0.24726605415344238, + "step": 6813 + }, + { + "epoch": 1.8094542557429292, + "grad_norm": 1.3112520269484638, + "learning_rate": 4.941949394757605e-07, + "loss": 0.2269962728023529, + "step": 6814 + }, + { + "epoch": 1.8097198247244721, + "grad_norm": 1.311172380903373, + "learning_rate": 4.928326189597377e-07, + "loss": 0.2336469292640686, + "step": 6815 + }, + { + "epoch": 1.809985393706015, + "grad_norm": 1.3372206959113173, + "learning_rate": 4.914721313134585e-07, + "loss": 0.24872124195098877, + "step": 6816 + }, + { + "epoch": 1.810250962687558, + "grad_norm": 1.3116570930981006, + "learning_rate": 4.901134767992099e-07, + "loss": 0.2484157383441925, + "step": 6817 + }, + { + "epoch": 1.810516531669101, + "grad_norm": 1.5234901533359522, + "learning_rate": 4.887566556789247e-07, + "loss": 0.24683158099651337, + "step": 6818 + }, + { + "epoch": 1.810782100650644, + "grad_norm": 1.1959899225802055, + "learning_rate": 4.874016682141802e-07, + "loss": 0.18717995285987854, + "step": 6819 + }, + { + "epoch": 1.8110476696321869, + "grad_norm": 1.2862771000886628, + "learning_rate": 4.860485146662053e-07, + "loss": 0.2220807671546936, + "step": 6820 + }, + { + "epoch": 1.8113132386137298, + "grad_norm": 1.196369102162481, + "learning_rate": 4.84697195295869e-07, + "loss": 0.2178400307893753, + "step": 6821 + }, + { + "epoch": 1.8115788075952728, + "grad_norm": 1.2250082051849178, + "learning_rate": 4.833477103636908e-07, + "loss": 0.2056645154953003, + "step": 6822 + }, + { + "epoch": 1.8118443765768157, + "grad_norm": 1.1729075702986809, + "learning_rate": 4.820000601298358e-07, + "loss": 0.21441905200481415, + "step": 6823 + }, + { + "epoch": 1.8121099455583587, + "grad_norm": 1.4445497728186703, + "learning_rate": 4.806542448541151e-07, + "loss": 0.17688237130641937, + "step": 6824 + }, + { + "epoch": 1.8123755145399016, + "grad_norm": 1.3216659704658935, + "learning_rate": 4.793102647959847e-07, + "loss": 0.22405505180358887, + "step": 6825 + }, + { + "epoch": 1.8126410835214446, + "grad_norm": 1.4226735460298432, + "learning_rate": 4.779681202145503e-07, + "loss": 0.21617908775806427, + "step": 6826 + }, + { + "epoch": 1.8129066525029875, + "grad_norm": 1.3284639992790963, + "learning_rate": 4.766278113685596e-07, + "loss": 0.23570871353149414, + "step": 6827 + }, + { + "epoch": 1.8131722214845305, + "grad_norm": 1.222373726415007, + "learning_rate": 4.7528933851641036e-07, + "loss": 0.23806743323802948, + "step": 6828 + }, + { + "epoch": 1.8134377904660735, + "grad_norm": 1.3312930220149763, + "learning_rate": 4.739527019161405e-07, + "loss": 0.24859179556369781, + "step": 6829 + }, + { + "epoch": 1.8137033594476164, + "grad_norm": 1.2143252342774762, + "learning_rate": 4.726179018254418e-07, + "loss": 0.21314260363578796, + "step": 6830 + }, + { + "epoch": 1.8139689284291594, + "grad_norm": 1.272910058647325, + "learning_rate": 4.7128493850164715e-07, + "loss": 0.25290659070014954, + "step": 6831 + }, + { + "epoch": 1.8142344974107023, + "grad_norm": 1.1800117497978073, + "learning_rate": 4.699538122017355e-07, + "loss": 0.22606703639030457, + "step": 6832 + }, + { + "epoch": 1.8145000663922453, + "grad_norm": 1.3037958158309495, + "learning_rate": 4.6862452318233275e-07, + "loss": 0.23973071575164795, + "step": 6833 + }, + { + "epoch": 1.8147656353737882, + "grad_norm": 1.2341358358957555, + "learning_rate": 4.672970716997094e-07, + "loss": 0.2225341498851776, + "step": 6834 + }, + { + "epoch": 1.8150312043553312, + "grad_norm": 1.441833447404081, + "learning_rate": 4.6597145800978183e-07, + "loss": 0.19153356552124023, + "step": 6835 + }, + { + "epoch": 1.8152967733368741, + "grad_norm": 1.2010339801105188, + "learning_rate": 4.646476823681145e-07, + "loss": 0.19694843888282776, + "step": 6836 + }, + { + "epoch": 1.815562342318417, + "grad_norm": 1.2719437537675773, + "learning_rate": 4.6332574502991554e-07, + "loss": 0.2353869527578354, + "step": 6837 + }, + { + "epoch": 1.81582791129996, + "grad_norm": 1.3504470280928214, + "learning_rate": 4.6200564625003775e-07, + "loss": 0.20919787883758545, + "step": 6838 + }, + { + "epoch": 1.816093480281503, + "grad_norm": 1.1775336742921327, + "learning_rate": 4.6068738628298193e-07, + "loss": 0.18352919816970825, + "step": 6839 + }, + { + "epoch": 1.816359049263046, + "grad_norm": 1.3571378213568392, + "learning_rate": 4.5937096538289147e-07, + "loss": 0.24711212515830994, + "step": 6840 + }, + { + "epoch": 1.8166246182445889, + "grad_norm": 1.2216287617055834, + "learning_rate": 4.580563838035579e-07, + "loss": 0.2350531816482544, + "step": 6841 + }, + { + "epoch": 1.8168901872261318, + "grad_norm": 1.3731447849726235, + "learning_rate": 4.5674364179841614e-07, + "loss": 0.26124465465545654, + "step": 6842 + }, + { + "epoch": 1.8171557562076748, + "grad_norm": 1.3819435677197398, + "learning_rate": 4.5543273962054934e-07, + "loss": 0.2110440880060196, + "step": 6843 + }, + { + "epoch": 1.817421325189218, + "grad_norm": 1.425540844923539, + "learning_rate": 4.5412367752268094e-07, + "loss": 0.2409415990114212, + "step": 6844 + }, + { + "epoch": 1.817686894170761, + "grad_norm": 1.2827549712815094, + "learning_rate": 4.528164557571857e-07, + "loss": 0.2280777543783188, + "step": 6845 + }, + { + "epoch": 1.8179524631523039, + "grad_norm": 1.111661347066374, + "learning_rate": 4.515110745760787e-07, + "loss": 0.201339989900589, + "step": 6846 + }, + { + "epoch": 1.8182180321338468, + "grad_norm": 1.2576623337538495, + "learning_rate": 4.5020753423102083e-07, + "loss": 0.22910752892494202, + "step": 6847 + }, + { + "epoch": 1.8184836011153898, + "grad_norm": 1.2835742527474332, + "learning_rate": 4.4890583497332327e-07, + "loss": 0.21736779808998108, + "step": 6848 + }, + { + "epoch": 1.8187491700969327, + "grad_norm": 1.282796826855034, + "learning_rate": 4.476059770539354e-07, + "loss": 0.20898449420928955, + "step": 6849 + }, + { + "epoch": 1.8190147390784757, + "grad_norm": 1.2514312774528749, + "learning_rate": 4.463079607234555e-07, + "loss": 0.22159051895141602, + "step": 6850 + }, + { + "epoch": 1.8192803080600186, + "grad_norm": 1.290667660986327, + "learning_rate": 4.450117862321246e-07, + "loss": 0.24081172049045563, + "step": 6851 + }, + { + "epoch": 1.8195458770415616, + "grad_norm": 1.2092663587603776, + "learning_rate": 4.4371745382983164e-07, + "loss": 0.17856758832931519, + "step": 6852 + }, + { + "epoch": 1.8198114460231045, + "grad_norm": 1.2002967167521004, + "learning_rate": 4.424249637661071e-07, + "loss": 0.20796868205070496, + "step": 6853 + }, + { + "epoch": 1.8200770150046475, + "grad_norm": 1.5683273026632796, + "learning_rate": 4.4113431629013046e-07, + "loss": 0.24277149140834808, + "step": 6854 + }, + { + "epoch": 1.8203425839861904, + "grad_norm": 1.1767967505464594, + "learning_rate": 4.3984551165071944e-07, + "loss": 0.19315838813781738, + "step": 6855 + }, + { + "epoch": 1.8206081529677334, + "grad_norm": 1.2457379727303777, + "learning_rate": 4.3855855009634075e-07, + "loss": 0.20789340138435364, + "step": 6856 + }, + { + "epoch": 1.8208737219492763, + "grad_norm": 1.4246348317049922, + "learning_rate": 4.372734318751082e-07, + "loss": 0.2871186137199402, + "step": 6857 + }, + { + "epoch": 1.8211392909308193, + "grad_norm": 1.3878283876849893, + "learning_rate": 4.359901572347758e-07, + "loss": 0.2419736236333847, + "step": 6858 + }, + { + "epoch": 1.8214048599123622, + "grad_norm": 1.3237602075469659, + "learning_rate": 4.3470872642274455e-07, + "loss": 0.2190292328596115, + "step": 6859 + }, + { + "epoch": 1.8216704288939052, + "grad_norm": 1.3879953178475168, + "learning_rate": 4.3342913968605903e-07, + "loss": 0.2654367685317993, + "step": 6860 + }, + { + "epoch": 1.8219359978754481, + "grad_norm": 1.3362249609314758, + "learning_rate": 4.321513972714075e-07, + "loss": 0.2536984086036682, + "step": 6861 + }, + { + "epoch": 1.822201566856991, + "grad_norm": 1.3804156416489965, + "learning_rate": 4.308754994251252e-07, + "loss": 0.260431170463562, + "step": 6862 + }, + { + "epoch": 1.822467135838534, + "grad_norm": 1.1376782237723586, + "learning_rate": 4.2960144639318855e-07, + "loss": 0.19348303973674774, + "step": 6863 + }, + { + "epoch": 1.822732704820077, + "grad_norm": 1.3505211109720399, + "learning_rate": 4.283292384212201e-07, + "loss": 0.2284386157989502, + "step": 6864 + }, + { + "epoch": 1.82299827380162, + "grad_norm": 1.2449697035186624, + "learning_rate": 4.270588757544869e-07, + "loss": 0.23439526557922363, + "step": 6865 + }, + { + "epoch": 1.823263842783163, + "grad_norm": 1.247098399621602, + "learning_rate": 4.2579035863790086e-07, + "loss": 0.2123441994190216, + "step": 6866 + }, + { + "epoch": 1.8235294117647058, + "grad_norm": 1.251423525262008, + "learning_rate": 4.245236873160163e-07, + "loss": 0.24568180739879608, + "step": 6867 + }, + { + "epoch": 1.8237949807462488, + "grad_norm": 1.4504253184377665, + "learning_rate": 4.232588620330325e-07, + "loss": 0.24078285694122314, + "step": 6868 + }, + { + "epoch": 1.8240605497277917, + "grad_norm": 1.157509101798501, + "learning_rate": 4.2199588303279414e-07, + "loss": 0.2003621608018875, + "step": 6869 + }, + { + "epoch": 1.8243261187093347, + "grad_norm": 1.3049050095763572, + "learning_rate": 4.2073475055878664e-07, + "loss": 0.21201889216899872, + "step": 6870 + }, + { + "epoch": 1.8245916876908777, + "grad_norm": 1.429124542908126, + "learning_rate": 4.1947546485414215e-07, + "loss": 0.23175427317619324, + "step": 6871 + }, + { + "epoch": 1.8248572566724208, + "grad_norm": 1.3101487536079581, + "learning_rate": 4.182180261616364e-07, + "loss": 0.2391383945941925, + "step": 6872 + }, + { + "epoch": 1.8251228256539638, + "grad_norm": 1.341869026992186, + "learning_rate": 4.169624347236878e-07, + "loss": 0.23120146989822388, + "step": 6873 + }, + { + "epoch": 1.8253883946355067, + "grad_norm": 1.1699948636498165, + "learning_rate": 4.157086907823604e-07, + "loss": 0.22541432082653046, + "step": 6874 + }, + { + "epoch": 1.8256539636170497, + "grad_norm": 1.3354293669412138, + "learning_rate": 4.1445679457936094e-07, + "loss": 0.25613510608673096, + "step": 6875 + }, + { + "epoch": 1.8259195325985926, + "grad_norm": 1.191861909098097, + "learning_rate": 4.1320674635604186e-07, + "loss": 0.21002547442913055, + "step": 6876 + }, + { + "epoch": 1.8261851015801356, + "grad_norm": 1.230870532242656, + "learning_rate": 4.119585463533959e-07, + "loss": 0.2593066692352295, + "step": 6877 + }, + { + "epoch": 1.8264506705616785, + "grad_norm": 1.4772106156087776, + "learning_rate": 4.1071219481206184e-07, + "loss": 0.23771531879901886, + "step": 6878 + }, + { + "epoch": 1.8267162395432215, + "grad_norm": 1.3106459571340912, + "learning_rate": 4.094676919723206e-07, + "loss": 0.2069541960954666, + "step": 6879 + }, + { + "epoch": 1.8269818085247644, + "grad_norm": 1.2065450512433227, + "learning_rate": 4.082250380740993e-07, + "loss": 0.21314311027526855, + "step": 6880 + }, + { + "epoch": 1.8272473775063074, + "grad_norm": 1.2723957233809677, + "learning_rate": 4.069842333569662e-07, + "loss": 0.198696106672287, + "step": 6881 + }, + { + "epoch": 1.8275129464878503, + "grad_norm": 1.2365636263350124, + "learning_rate": 4.057452780601334e-07, + "loss": 0.22771228849887848, + "step": 6882 + }, + { + "epoch": 1.8277785154693933, + "grad_norm": 1.3935711018120034, + "learning_rate": 4.045081724224564e-07, + "loss": 0.24176150560379028, + "step": 6883 + }, + { + "epoch": 1.8280440844509362, + "grad_norm": 1.1711714123320747, + "learning_rate": 4.0327291668243785e-07, + "loss": 0.18257084488868713, + "step": 6884 + }, + { + "epoch": 1.8283096534324792, + "grad_norm": 1.7740145369201021, + "learning_rate": 4.02039511078216e-07, + "loss": 0.2317531704902649, + "step": 6885 + }, + { + "epoch": 1.8285752224140222, + "grad_norm": 1.237685133468282, + "learning_rate": 4.008079558475797e-07, + "loss": 0.22523516416549683, + "step": 6886 + }, + { + "epoch": 1.828840791395565, + "grad_norm": 1.338469580607285, + "learning_rate": 3.995782512279578e-07, + "loss": 0.22351330518722534, + "step": 6887 + }, + { + "epoch": 1.829106360377108, + "grad_norm": 1.3272231861758204, + "learning_rate": 3.983503974564229e-07, + "loss": 0.22151902318000793, + "step": 6888 + }, + { + "epoch": 1.829371929358651, + "grad_norm": 1.2483501881623744, + "learning_rate": 3.971243947696901e-07, + "loss": 0.20800583064556122, + "step": 6889 + }, + { + "epoch": 1.829637498340194, + "grad_norm": 1.189419989304772, + "learning_rate": 3.959002434041181e-07, + "loss": 0.21332690119743347, + "step": 6890 + }, + { + "epoch": 1.829903067321737, + "grad_norm": 1.3040750377284556, + "learning_rate": 3.946779435957093e-07, + "loss": 0.2561502456665039, + "step": 6891 + }, + { + "epoch": 1.8301686363032799, + "grad_norm": 1.2150229659643972, + "learning_rate": 3.934574955801074e-07, + "loss": 0.23636910319328308, + "step": 6892 + }, + { + "epoch": 1.8304342052848228, + "grad_norm": 1.303931878967275, + "learning_rate": 3.922388995926041e-07, + "loss": 0.26683998107910156, + "step": 6893 + }, + { + "epoch": 1.8306997742663658, + "grad_norm": 1.319570373744726, + "learning_rate": 3.910221558681271e-07, + "loss": 0.2779492735862732, + "step": 6894 + }, + { + "epoch": 1.8309653432479087, + "grad_norm": 1.473106593059021, + "learning_rate": 3.8980726464125095e-07, + "loss": 0.20174488425254822, + "step": 6895 + }, + { + "epoch": 1.8312309122294517, + "grad_norm": 1.3128034885814306, + "learning_rate": 3.885942261461928e-07, + "loss": 0.21486055850982666, + "step": 6896 + }, + { + "epoch": 1.8314964812109946, + "grad_norm": 1.2201269476427121, + "learning_rate": 3.8738304061681107e-07, + "loss": 0.25637733936309814, + "step": 6897 + }, + { + "epoch": 1.8317620501925376, + "grad_norm": 1.3661274524986262, + "learning_rate": 3.8617370828661014e-07, + "loss": 0.2518364489078522, + "step": 6898 + }, + { + "epoch": 1.8320276191740805, + "grad_norm": 1.2902396654446358, + "learning_rate": 3.849662293887324e-07, + "loss": 0.25752246379852295, + "step": 6899 + }, + { + "epoch": 1.8322931881556235, + "grad_norm": 1.1514833439027936, + "learning_rate": 3.8376060415596826e-07, + "loss": 0.20891718566417694, + "step": 6900 + }, + { + "epoch": 1.8325587571371664, + "grad_norm": 1.378720679176223, + "learning_rate": 3.825568328207452e-07, + "loss": 0.20491960644721985, + "step": 6901 + }, + { + "epoch": 1.8328243261187094, + "grad_norm": 1.2540067790590503, + "learning_rate": 3.813549156151386e-07, + "loss": 0.22183339297771454, + "step": 6902 + }, + { + "epoch": 1.8330898951002523, + "grad_norm": 1.3321077338345055, + "learning_rate": 3.801548527708621e-07, + "loss": 0.2476987987756729, + "step": 6903 + }, + { + "epoch": 1.8333554640817953, + "grad_norm": 1.470629998110282, + "learning_rate": 3.7895664451927493e-07, + "loss": 0.26486238837242126, + "step": 6904 + }, + { + "epoch": 1.8336210330633382, + "grad_norm": 1.2524745099106778, + "learning_rate": 3.777602910913769e-07, + "loss": 0.25922873616218567, + "step": 6905 + }, + { + "epoch": 1.8338866020448812, + "grad_norm": 1.317563058388092, + "learning_rate": 3.7656579271781127e-07, + "loss": 0.22682476043701172, + "step": 6906 + }, + { + "epoch": 1.8341521710264241, + "grad_norm": 1.2391277284536568, + "learning_rate": 3.753731496288626e-07, + "loss": 0.20371592044830322, + "step": 6907 + }, + { + "epoch": 1.834417740007967, + "grad_norm": 1.2444383452097851, + "learning_rate": 3.7418236205445826e-07, + "loss": 0.23857446014881134, + "step": 6908 + }, + { + "epoch": 1.83468330898951, + "grad_norm": 2.6487436557467645, + "learning_rate": 3.729934302241689e-07, + "loss": 0.27119290828704834, + "step": 6909 + }, + { + "epoch": 1.834948877971053, + "grad_norm": 1.254159773595776, + "learning_rate": 3.7180635436720567e-07, + "loss": 0.2354927361011505, + "step": 6910 + }, + { + "epoch": 1.835214446952596, + "grad_norm": 1.301136184663389, + "learning_rate": 3.706211347124233e-07, + "loss": 0.26378512382507324, + "step": 6911 + }, + { + "epoch": 1.835480015934139, + "grad_norm": 1.3296098934003593, + "learning_rate": 3.6943777148831907e-07, + "loss": 0.20725026726722717, + "step": 6912 + }, + { + "epoch": 1.8357455849156818, + "grad_norm": 1.2212362377090786, + "learning_rate": 3.682562649230304e-07, + "loss": 0.2049856185913086, + "step": 6913 + }, + { + "epoch": 1.8360111538972248, + "grad_norm": 1.2555620791922353, + "learning_rate": 3.6707661524433833e-07, + "loss": 0.19303423166275024, + "step": 6914 + }, + { + "epoch": 1.8362767228787678, + "grad_norm": 1.2395332139010746, + "learning_rate": 3.6589882267966445e-07, + "loss": 0.21510104835033417, + "step": 6915 + }, + { + "epoch": 1.8365422918603107, + "grad_norm": 1.1669418633603965, + "learning_rate": 3.6472288745607376e-07, + "loss": 0.1933138072490692, + "step": 6916 + }, + { + "epoch": 1.8368078608418537, + "grad_norm": 1.112367559966563, + "learning_rate": 3.6354880980027373e-07, + "loss": 0.2015206664800644, + "step": 6917 + }, + { + "epoch": 1.8370734298233966, + "grad_norm": 1.2823070307410491, + "learning_rate": 3.6237658993861114e-07, + "loss": 0.20550866425037384, + "step": 6918 + }, + { + "epoch": 1.8373389988049396, + "grad_norm": 1.3067689335737758, + "learning_rate": 3.612062280970763e-07, + "loss": 0.221620112657547, + "step": 6919 + }, + { + "epoch": 1.8376045677864825, + "grad_norm": 1.3556317520839982, + "learning_rate": 3.6003772450130315e-07, + "loss": 0.23098941147327423, + "step": 6920 + }, + { + "epoch": 1.8378701367680255, + "grad_norm": 1.147765516964157, + "learning_rate": 3.588710793765626e-07, + "loss": 0.2119837999343872, + "step": 6921 + }, + { + "epoch": 1.8381357057495684, + "grad_norm": 1.3802709807389941, + "learning_rate": 3.5770629294777146e-07, + "loss": 0.24879229068756104, + "step": 6922 + }, + { + "epoch": 1.8384012747311114, + "grad_norm": 1.3060365647669372, + "learning_rate": 3.565433654394879e-07, + "loss": 0.18895789980888367, + "step": 6923 + }, + { + "epoch": 1.8386668437126543, + "grad_norm": 1.2553378569117732, + "learning_rate": 3.55382297075908e-07, + "loss": 0.23148275911808014, + "step": 6924 + }, + { + "epoch": 1.8389324126941973, + "grad_norm": 1.212120061404488, + "learning_rate": 3.542230880808739e-07, + "loss": 0.20919913053512573, + "step": 6925 + }, + { + "epoch": 1.8391979816757402, + "grad_norm": 1.4703495422250146, + "learning_rate": 3.53065738677868e-07, + "loss": 0.22832845151424408, + "step": 6926 + }, + { + "epoch": 1.8394635506572832, + "grad_norm": 1.2792392305491092, + "learning_rate": 3.519102490900117e-07, + "loss": 0.25866004824638367, + "step": 6927 + }, + { + "epoch": 1.8397291196388261, + "grad_norm": 1.4425441758777668, + "learning_rate": 3.507566195400691e-07, + "loss": 0.23372048139572144, + "step": 6928 + }, + { + "epoch": 1.839994688620369, + "grad_norm": 1.3100572186568338, + "learning_rate": 3.496048502504501e-07, + "loss": 0.2516997158527374, + "step": 6929 + }, + { + "epoch": 1.840260257601912, + "grad_norm": 1.3352189279547024, + "learning_rate": 3.4845494144320036e-07, + "loss": 0.21170508861541748, + "step": 6930 + }, + { + "epoch": 1.840525826583455, + "grad_norm": 1.3970465930645521, + "learning_rate": 3.473068933400081e-07, + "loss": 0.2642953395843506, + "step": 6931 + }, + { + "epoch": 1.840791395564998, + "grad_norm": 1.2429277065520816, + "learning_rate": 3.461607061622041e-07, + "loss": 0.2294994294643402, + "step": 6932 + }, + { + "epoch": 1.8410569645465409, + "grad_norm": 1.3898674163561502, + "learning_rate": 3.450163801307582e-07, + "loss": 0.2554621696472168, + "step": 6933 + }, + { + "epoch": 1.8413225335280838, + "grad_norm": 1.5251200097904765, + "learning_rate": 3.4387391546628733e-07, + "loss": 0.2291295826435089, + "step": 6934 + }, + { + "epoch": 1.8415881025096268, + "grad_norm": 1.2253918775229307, + "learning_rate": 3.4273331238903974e-07, + "loss": 0.1996842920780182, + "step": 6935 + }, + { + "epoch": 1.8418536714911697, + "grad_norm": 1.3974356568527164, + "learning_rate": 3.415945711189128e-07, + "loss": 0.248038187623024, + "step": 6936 + }, + { + "epoch": 1.8421192404727127, + "grad_norm": 1.4224083213114915, + "learning_rate": 3.4045769187544096e-07, + "loss": 0.232235848903656, + "step": 6937 + }, + { + "epoch": 1.8423848094542556, + "grad_norm": 1.2811247103872994, + "learning_rate": 3.3932267487780333e-07, + "loss": 0.2526085376739502, + "step": 6938 + }, + { + "epoch": 1.8426503784357986, + "grad_norm": 1.324059920588895, + "learning_rate": 3.381895203448182e-07, + "loss": 0.22401389479637146, + "step": 6939 + }, + { + "epoch": 1.8429159474173415, + "grad_norm": 1.2904044842651823, + "learning_rate": 3.3705822849494195e-07, + "loss": 0.2509264647960663, + "step": 6940 + }, + { + "epoch": 1.8431815163988845, + "grad_norm": 1.2502849304352568, + "learning_rate": 3.3592879954627564e-07, + "loss": 0.2451169192790985, + "step": 6941 + }, + { + "epoch": 1.8434470853804275, + "grad_norm": 1.2774613485778883, + "learning_rate": 3.3480123371655957e-07, + "loss": 0.2361738532781601, + "step": 6942 + }, + { + "epoch": 1.8437126543619704, + "grad_norm": 1.1823675774441849, + "learning_rate": 3.3367553122317544e-07, + "loss": 0.22336295247077942, + "step": 6943 + }, + { + "epoch": 1.8439782233435134, + "grad_norm": 1.4218109729535482, + "learning_rate": 3.325516922831451e-07, + "loss": 0.22287659347057343, + "step": 6944 + }, + { + "epoch": 1.8442437923250563, + "grad_norm": 1.2819242467045069, + "learning_rate": 3.3142971711312975e-07, + "loss": 0.21845945715904236, + "step": 6945 + }, + { + "epoch": 1.8445093613065993, + "grad_norm": 1.2822597279006254, + "learning_rate": 3.303096059294364e-07, + "loss": 0.2650350332260132, + "step": 6946 + }, + { + "epoch": 1.8447749302881422, + "grad_norm": 1.346661503925149, + "learning_rate": 3.291913589480078e-07, + "loss": 0.21282124519348145, + "step": 6947 + }, + { + "epoch": 1.8450404992696852, + "grad_norm": 1.1254422779054267, + "learning_rate": 3.280749763844293e-07, + "loss": 0.17899346351623535, + "step": 6948 + }, + { + "epoch": 1.8453060682512281, + "grad_norm": 1.3295675928838626, + "learning_rate": 3.269604584539254e-07, + "loss": 0.23462103307247162, + "step": 6949 + }, + { + "epoch": 1.845571637232771, + "grad_norm": 1.2573990354862534, + "learning_rate": 3.2584780537136206e-07, + "loss": 0.20188388228416443, + "step": 6950 + }, + { + "epoch": 1.845837206214314, + "grad_norm": 1.3823133322277716, + "learning_rate": 3.247370173512443e-07, + "loss": 0.2760109305381775, + "step": 6951 + }, + { + "epoch": 1.846102775195857, + "grad_norm": 1.1542508493730164, + "learning_rate": 3.236280946077219e-07, + "loss": 0.20977352559566498, + "step": 6952 + }, + { + "epoch": 1.8463683441774, + "grad_norm": 1.299549634983184, + "learning_rate": 3.225210373545806e-07, + "loss": 0.26468873023986816, + "step": 6953 + }, + { + "epoch": 1.8466339131589429, + "grad_norm": 1.287524526318513, + "learning_rate": 3.214158458052463e-07, + "loss": 0.2362184375524521, + "step": 6954 + }, + { + "epoch": 1.8468994821404858, + "grad_norm": 1.29131597308928, + "learning_rate": 3.2031252017278966e-07, + "loss": 0.21406327188014984, + "step": 6955 + }, + { + "epoch": 1.847165051122029, + "grad_norm": 1.4794600314925854, + "learning_rate": 3.1921106066991835e-07, + "loss": 0.2698758840560913, + "step": 6956 + }, + { + "epoch": 1.847430620103572, + "grad_norm": 1.3029413719135112, + "learning_rate": 3.1811146750898025e-07, + "loss": 0.22954389452934265, + "step": 6957 + }, + { + "epoch": 1.847696189085115, + "grad_norm": 1.149631756175727, + "learning_rate": 3.170137409019636e-07, + "loss": 0.23005755245685577, + "step": 6958 + }, + { + "epoch": 1.8479617580666579, + "grad_norm": 1.270561680049171, + "learning_rate": 3.159178810604968e-07, + "loss": 0.22408893704414368, + "step": 6959 + }, + { + "epoch": 1.8482273270482008, + "grad_norm": 1.1761716687553918, + "learning_rate": 3.14823888195851e-07, + "loss": 0.1983698308467865, + "step": 6960 + }, + { + "epoch": 1.8484928960297438, + "grad_norm": 1.387251984339494, + "learning_rate": 3.137317625189329e-07, + "loss": 0.24643054604530334, + "step": 6961 + }, + { + "epoch": 1.8487584650112867, + "grad_norm": 1.3612119090250128, + "learning_rate": 3.1264150424029083e-07, + "loss": 0.274917870759964, + "step": 6962 + }, + { + "epoch": 1.8490240339928297, + "grad_norm": 1.2836957141365997, + "learning_rate": 3.115531135701155e-07, + "loss": 0.2129468023777008, + "step": 6963 + }, + { + "epoch": 1.8492896029743726, + "grad_norm": 1.3421884287788837, + "learning_rate": 3.1046659071823695e-07, + "loss": 0.24127928912639618, + "step": 6964 + }, + { + "epoch": 1.8495551719559156, + "grad_norm": 1.2737231627436634, + "learning_rate": 3.093819358941208e-07, + "loss": 0.2528054416179657, + "step": 6965 + }, + { + "epoch": 1.8498207409374585, + "grad_norm": 1.253824703575336, + "learning_rate": 3.0829914930687767e-07, + "loss": 0.23623798787593842, + "step": 6966 + }, + { + "epoch": 1.8500863099190015, + "grad_norm": 1.231408637511902, + "learning_rate": 3.0721823116525497e-07, + "loss": 0.20241659879684448, + "step": 6967 + }, + { + "epoch": 1.8503518789005444, + "grad_norm": 1.264350645442844, + "learning_rate": 3.0613918167764156e-07, + "loss": 0.24365916848182678, + "step": 6968 + }, + { + "epoch": 1.8506174478820874, + "grad_norm": 1.311846273217192, + "learning_rate": 3.0506200105206554e-07, + "loss": 0.2550637722015381, + "step": 6969 + }, + { + "epoch": 1.8508830168636303, + "grad_norm": 1.1438212130974086, + "learning_rate": 3.0398668949619515e-07, + "loss": 0.21531938016414642, + "step": 6970 + }, + { + "epoch": 1.8511485858451733, + "grad_norm": 1.3468646282560623, + "learning_rate": 3.029132472173368e-07, + "loss": 0.22749900817871094, + "step": 6971 + }, + { + "epoch": 1.8514141548267162, + "grad_norm": 1.186404759445675, + "learning_rate": 3.018416744224373e-07, + "loss": 0.1826775223016739, + "step": 6972 + }, + { + "epoch": 1.8516797238082592, + "grad_norm": 1.1782373460713542, + "learning_rate": 3.0077197131808344e-07, + "loss": 0.21982814371585846, + "step": 6973 + }, + { + "epoch": 1.8519452927898021, + "grad_norm": 1.2874557997839566, + "learning_rate": 2.997041381105026e-07, + "loss": 0.23515473306179047, + "step": 6974 + }, + { + "epoch": 1.852210861771345, + "grad_norm": 1.2184369208885015, + "learning_rate": 2.9863817500556e-07, + "loss": 0.19620616734027863, + "step": 6975 + }, + { + "epoch": 1.852476430752888, + "grad_norm": 1.208715706835639, + "learning_rate": 2.975740822087603e-07, + "loss": 0.22158116102218628, + "step": 6976 + }, + { + "epoch": 1.852741999734431, + "grad_norm": 1.5176127203291871, + "learning_rate": 2.96511859925247e-07, + "loss": 0.23082244396209717, + "step": 6977 + }, + { + "epoch": 1.853007568715974, + "grad_norm": 1.286088700644728, + "learning_rate": 2.954515083598064e-07, + "loss": 0.22743141651153564, + "step": 6978 + }, + { + "epoch": 1.853273137697517, + "grad_norm": 1.3437900472909596, + "learning_rate": 2.943930277168594e-07, + "loss": 0.2329188883304596, + "step": 6979 + }, + { + "epoch": 1.8535387066790598, + "grad_norm": 1.1892741095151198, + "learning_rate": 2.9333641820047055e-07, + "loss": 0.20360302925109863, + "step": 6980 + }, + { + "epoch": 1.8538042756606028, + "grad_norm": 1.1771915113483071, + "learning_rate": 2.922816800143402e-07, + "loss": 0.1903664767742157, + "step": 6981 + }, + { + "epoch": 1.8540698446421457, + "grad_norm": 1.2252145672801615, + "learning_rate": 2.912288133618102e-07, + "loss": 0.2247854322195053, + "step": 6982 + }, + { + "epoch": 1.8543354136236887, + "grad_norm": 1.305215823982529, + "learning_rate": 2.9017781844586035e-07, + "loss": 0.22693192958831787, + "step": 6983 + }, + { + "epoch": 1.8546009826052319, + "grad_norm": 1.3213552294005186, + "learning_rate": 2.891286954691108e-07, + "loss": 0.23769894242286682, + "step": 6984 + }, + { + "epoch": 1.8548665515867748, + "grad_norm": 1.267542763443237, + "learning_rate": 2.880814446338198e-07, + "loss": 0.23251450061798096, + "step": 6985 + }, + { + "epoch": 1.8551321205683178, + "grad_norm": 1.3253334264213772, + "learning_rate": 2.870360661418847e-07, + "loss": 0.20828741788864136, + "step": 6986 + }, + { + "epoch": 1.8553976895498607, + "grad_norm": 1.2448815733296377, + "learning_rate": 2.859925601948421e-07, + "loss": 0.2324519008398056, + "step": 6987 + }, + { + "epoch": 1.8556632585314037, + "grad_norm": 1.2799176737952995, + "learning_rate": 2.8495092699386774e-07, + "loss": 0.2166297733783722, + "step": 6988 + }, + { + "epoch": 1.8559288275129466, + "grad_norm": 1.416567928880924, + "learning_rate": 2.839111667397765e-07, + "loss": 0.2760158181190491, + "step": 6989 + }, + { + "epoch": 1.8561943964944896, + "grad_norm": 1.1117414218952344, + "learning_rate": 2.8287327963302025e-07, + "loss": 0.2263752520084381, + "step": 6990 + }, + { + "epoch": 1.8564599654760325, + "grad_norm": 1.328135206527719, + "learning_rate": 2.8183726587369455e-07, + "loss": 0.2490656077861786, + "step": 6991 + }, + { + "epoch": 1.8567255344575755, + "grad_norm": 1.4860885268210424, + "learning_rate": 2.808031256615285e-07, + "loss": 0.22495508193969727, + "step": 6992 + }, + { + "epoch": 1.8569911034391184, + "grad_norm": 1.297235121122649, + "learning_rate": 2.7977085919589253e-07, + "loss": 0.2671046853065491, + "step": 6993 + }, + { + "epoch": 1.8572566724206614, + "grad_norm": 1.2050300397617886, + "learning_rate": 2.7874046667579535e-07, + "loss": 0.19782954454421997, + "step": 6994 + }, + { + "epoch": 1.8575222414022043, + "grad_norm": 1.3009259795352104, + "learning_rate": 2.777119482998847e-07, + "loss": 0.24458879232406616, + "step": 6995 + }, + { + "epoch": 1.8577878103837473, + "grad_norm": 1.203325902936209, + "learning_rate": 2.7668530426644637e-07, + "loss": 0.23476794362068176, + "step": 6996 + }, + { + "epoch": 1.8580533793652902, + "grad_norm": 1.3828799415147273, + "learning_rate": 2.7566053477340535e-07, + "loss": 0.2318287342786789, + "step": 6997 + }, + { + "epoch": 1.8583189483468332, + "grad_norm": 1.1075382213650395, + "learning_rate": 2.746376400183259e-07, + "loss": 0.21341973543167114, + "step": 6998 + }, + { + "epoch": 1.8585845173283762, + "grad_norm": 1.3634634009375282, + "learning_rate": 2.7361662019840916e-07, + "loss": 0.25269803404808044, + "step": 6999 + }, + { + "epoch": 1.858850086309919, + "grad_norm": 1.2242004376785176, + "learning_rate": 2.7259747551049653e-07, + "loss": 0.24590039253234863, + "step": 7000 + }, + { + "epoch": 1.859115655291462, + "grad_norm": 1.2116643717780577, + "learning_rate": 2.715802061510664e-07, + "loss": 0.19907096028327942, + "step": 7001 + }, + { + "epoch": 1.859381224273005, + "grad_norm": 1.319285786592131, + "learning_rate": 2.705648123162363e-07, + "loss": 0.24304917454719543, + "step": 7002 + }, + { + "epoch": 1.859646793254548, + "grad_norm": 1.3884525546157216, + "learning_rate": 2.6955129420176193e-07, + "loss": 0.24846915900707245, + "step": 7003 + }, + { + "epoch": 1.859912362236091, + "grad_norm": 1.365283429552511, + "learning_rate": 2.685396520030381e-07, + "loss": 0.21709200739860535, + "step": 7004 + }, + { + "epoch": 1.8601779312176339, + "grad_norm": 1.3687506828870908, + "learning_rate": 2.675298859150977e-07, + "loss": 0.28031325340270996, + "step": 7005 + }, + { + "epoch": 1.8604435001991768, + "grad_norm": 1.1527129171653896, + "learning_rate": 2.6652199613261155e-07, + "loss": 0.20367707312107086, + "step": 7006 + }, + { + "epoch": 1.8607090691807198, + "grad_norm": 1.1875101722790007, + "learning_rate": 2.6551598284988877e-07, + "loss": 0.20737403631210327, + "step": 7007 + }, + { + "epoch": 1.8609746381622627, + "grad_norm": 1.3375926225189751, + "learning_rate": 2.6451184626087646e-07, + "loss": 0.2504046559333801, + "step": 7008 + }, + { + "epoch": 1.8612402071438057, + "grad_norm": 1.3403751507501938, + "learning_rate": 2.635095865591608e-07, + "loss": 0.26347339153289795, + "step": 7009 + }, + { + "epoch": 1.8615057761253486, + "grad_norm": 1.1832867553985462, + "learning_rate": 2.625092039379662e-07, + "loss": 0.2347220480442047, + "step": 7010 + }, + { + "epoch": 1.8617713451068916, + "grad_norm": 1.2487098903864389, + "learning_rate": 2.6151069859015386e-07, + "loss": 0.23565630614757538, + "step": 7011 + }, + { + "epoch": 1.8620369140884345, + "grad_norm": 1.2377624004623402, + "learning_rate": 2.605140707082243e-07, + "loss": 0.21462437510490417, + "step": 7012 + }, + { + "epoch": 1.8623024830699775, + "grad_norm": 1.2992774401284823, + "learning_rate": 2.595193204843149e-07, + "loss": 0.24224728345870972, + "step": 7013 + }, + { + "epoch": 1.8625680520515204, + "grad_norm": 1.3531530893390702, + "learning_rate": 2.5852644811020344e-07, + "loss": 0.24200880527496338, + "step": 7014 + }, + { + "epoch": 1.8628336210330634, + "grad_norm": 1.2331149203562455, + "learning_rate": 2.5753545377730227e-07, + "loss": 0.23315191268920898, + "step": 7015 + }, + { + "epoch": 1.8630991900146063, + "grad_norm": 1.4360061023192454, + "learning_rate": 2.56546337676663e-07, + "loss": 0.31112274527549744, + "step": 7016 + }, + { + "epoch": 1.8633647589961493, + "grad_norm": 1.1775380155652753, + "learning_rate": 2.555590999989754e-07, + "loss": 0.2291945070028305, + "step": 7017 + }, + { + "epoch": 1.8636303279776922, + "grad_norm": 1.3248749602779475, + "learning_rate": 2.5457374093457057e-07, + "loss": 0.2324746549129486, + "step": 7018 + }, + { + "epoch": 1.8638958969592352, + "grad_norm": 1.3333311590100283, + "learning_rate": 2.5359026067341086e-07, + "loss": 0.2585206627845764, + "step": 7019 + }, + { + "epoch": 1.8641614659407781, + "grad_norm": 1.254813387894953, + "learning_rate": 2.5260865940510027e-07, + "loss": 0.22986871004104614, + "step": 7020 + }, + { + "epoch": 1.864427034922321, + "grad_norm": 1.3302473304174876, + "learning_rate": 2.5162893731888074e-07, + "loss": 0.22615428268909454, + "step": 7021 + }, + { + "epoch": 1.864692603903864, + "grad_norm": 1.2311139475810073, + "learning_rate": 2.5065109460363113e-07, + "loss": 0.21324753761291504, + "step": 7022 + }, + { + "epoch": 1.864958172885407, + "grad_norm": 1.2499721276179248, + "learning_rate": 2.4967513144786736e-07, + "loss": 0.2247733324766159, + "step": 7023 + }, + { + "epoch": 1.86522374186695, + "grad_norm": 1.198842298043478, + "learning_rate": 2.4870104803974336e-07, + "loss": 0.22080597281455994, + "step": 7024 + }, + { + "epoch": 1.865489310848493, + "grad_norm": 1.3721040923851937, + "learning_rate": 2.4772884456705224e-07, + "loss": 0.23669888079166412, + "step": 7025 + }, + { + "epoch": 1.8657548798300359, + "grad_norm": 1.2946969495879501, + "learning_rate": 2.4675852121722075e-07, + "loss": 0.2320847064256668, + "step": 7026 + }, + { + "epoch": 1.8660204488115788, + "grad_norm": 1.374404266409337, + "learning_rate": 2.4579007817731925e-07, + "loss": 0.2595662474632263, + "step": 7027 + }, + { + "epoch": 1.8662860177931218, + "grad_norm": 1.2351512812852723, + "learning_rate": 2.4482351563405174e-07, + "loss": 0.22152045369148254, + "step": 7028 + }, + { + "epoch": 1.8665515867746647, + "grad_norm": 1.270416082371449, + "learning_rate": 2.4385883377375683e-07, + "loss": 0.2391948401927948, + "step": 7029 + }, + { + "epoch": 1.8668171557562077, + "grad_norm": 1.3234796115140017, + "learning_rate": 2.428960327824159e-07, + "loss": 0.23117749392986298, + "step": 7030 + }, + { + "epoch": 1.8670827247377506, + "grad_norm": 1.313106749776766, + "learning_rate": 2.41935112845646e-07, + "loss": 0.24019500613212585, + "step": 7031 + }, + { + "epoch": 1.8673482937192936, + "grad_norm": 1.253088890729472, + "learning_rate": 2.4097607414869995e-07, + "loss": 0.19560202956199646, + "step": 7032 + }, + { + "epoch": 1.8676138627008365, + "grad_norm": 1.3625686769003584, + "learning_rate": 2.4001891687647103e-07, + "loss": 0.23110055923461914, + "step": 7033 + }, + { + "epoch": 1.8678794316823795, + "grad_norm": 1.3388200482229684, + "learning_rate": 2.39063641213485e-07, + "loss": 0.2214709371328354, + "step": 7034 + }, + { + "epoch": 1.8681450006639224, + "grad_norm": 1.2700799842548796, + "learning_rate": 2.381102473439101e-07, + "loss": 0.22123369574546814, + "step": 7035 + }, + { + "epoch": 1.8684105696454654, + "grad_norm": 1.4629863869289934, + "learning_rate": 2.371587354515481e-07, + "loss": 0.23984813690185547, + "step": 7036 + }, + { + "epoch": 1.8686761386270083, + "grad_norm": 1.4496870886295976, + "learning_rate": 2.3620910571984124e-07, + "loss": 0.26089030504226685, + "step": 7037 + }, + { + "epoch": 1.8689417076085513, + "grad_norm": 1.2076380290124689, + "learning_rate": 2.3526135833186527e-07, + "loss": 0.2344229370355606, + "step": 7038 + }, + { + "epoch": 1.8692072765900942, + "grad_norm": 1.290620691312973, + "learning_rate": 2.34315493470334e-07, + "loss": 0.24499498307704926, + "step": 7039 + }, + { + "epoch": 1.8694728455716372, + "grad_norm": 1.2975050166282813, + "learning_rate": 2.333715113176005e-07, + "loss": 0.21971477568149567, + "step": 7040 + }, + { + "epoch": 1.8697384145531801, + "grad_norm": 1.2659856510175163, + "learning_rate": 2.3242941205565362e-07, + "loss": 0.2594453990459442, + "step": 7041 + }, + { + "epoch": 1.870003983534723, + "grad_norm": 1.3125676617059407, + "learning_rate": 2.3148919586611806e-07, + "loss": 0.24689960479736328, + "step": 7042 + }, + { + "epoch": 1.870269552516266, + "grad_norm": 1.2165345453138858, + "learning_rate": 2.3055086293025665e-07, + "loss": 0.19972509145736694, + "step": 7043 + }, + { + "epoch": 1.870535121497809, + "grad_norm": 1.2460782677559714, + "learning_rate": 2.2961441342896795e-07, + "loss": 0.2139236032962799, + "step": 7044 + }, + { + "epoch": 1.870800690479352, + "grad_norm": 1.196552292185578, + "learning_rate": 2.286798475427898e-07, + "loss": 0.2251984179019928, + "step": 7045 + }, + { + "epoch": 1.8710662594608949, + "grad_norm": 1.2395291577625112, + "learning_rate": 2.277471654518959e-07, + "loss": 0.24517378211021423, + "step": 7046 + }, + { + "epoch": 1.8713318284424378, + "grad_norm": 1.3048847468612028, + "learning_rate": 2.2681636733609457e-07, + "loss": 0.19115275144577026, + "step": 7047 + }, + { + "epoch": 1.8715973974239808, + "grad_norm": 1.2997607659373802, + "learning_rate": 2.2588745337483454e-07, + "loss": 0.26092633605003357, + "step": 7048 + }, + { + "epoch": 1.8718629664055237, + "grad_norm": 1.2646212726473884, + "learning_rate": 2.2496042374719807e-07, + "loss": 0.18862302601337433, + "step": 7049 + }, + { + "epoch": 1.8721285353870667, + "grad_norm": 1.1602330038245767, + "learning_rate": 2.2403527863190554e-07, + "loss": 0.20728996396064758, + "step": 7050 + }, + { + "epoch": 1.8723941043686096, + "grad_norm": 1.236025812615254, + "learning_rate": 2.231120182073143e-07, + "loss": 0.24244122207164764, + "step": 7051 + }, + { + "epoch": 1.8726596733501526, + "grad_norm": 1.205655043915546, + "learning_rate": 2.2219064265141866e-07, + "loss": 0.18956953287124634, + "step": 7052 + }, + { + "epoch": 1.8729252423316956, + "grad_norm": 1.1159089015267554, + "learning_rate": 2.2127115214184868e-07, + "loss": 0.19873176515102386, + "step": 7053 + }, + { + "epoch": 1.8731908113132385, + "grad_norm": 1.2896839736015335, + "learning_rate": 2.203535468558704e-07, + "loss": 0.23717360198497772, + "step": 7054 + }, + { + "epoch": 1.8734563802947815, + "grad_norm": 1.3203924338573048, + "learning_rate": 2.1943782697038896e-07, + "loss": 0.24051904678344727, + "step": 7055 + }, + { + "epoch": 1.8737219492763244, + "grad_norm": 1.3193670550613668, + "learning_rate": 2.1852399266194312e-07, + "loss": 0.23541691899299622, + "step": 7056 + }, + { + "epoch": 1.8739875182578674, + "grad_norm": 1.3395958296451687, + "learning_rate": 2.1761204410671088e-07, + "loss": 0.22566163539886475, + "step": 7057 + }, + { + "epoch": 1.8742530872394103, + "grad_norm": 1.297432294479727, + "learning_rate": 2.167019814805027e-07, + "loss": 0.25771743059158325, + "step": 7058 + }, + { + "epoch": 1.8745186562209533, + "grad_norm": 1.1482951648622821, + "learning_rate": 2.1579380495876934e-07, + "loss": 0.22624637186527252, + "step": 7059 + }, + { + "epoch": 1.8747842252024962, + "grad_norm": 1.3036126318267591, + "learning_rate": 2.148875147165963e-07, + "loss": 0.24671627581119537, + "step": 7060 + }, + { + "epoch": 1.8750497941840392, + "grad_norm": 1.1983704285109544, + "learning_rate": 2.1398311092870605e-07, + "loss": 0.21607278287410736, + "step": 7061 + }, + { + "epoch": 1.8753153631655821, + "grad_norm": 1.1102939736369823, + "learning_rate": 2.1308059376945689e-07, + "loss": 0.1960655301809311, + "step": 7062 + }, + { + "epoch": 1.875580932147125, + "grad_norm": 1.2816228458436618, + "learning_rate": 2.1217996341284297e-07, + "loss": 0.22005721926689148, + "step": 7063 + }, + { + "epoch": 1.875846501128668, + "grad_norm": 1.2746284533707484, + "learning_rate": 2.1128122003249541e-07, + "loss": 0.21442776918411255, + "step": 7064 + }, + { + "epoch": 1.876112070110211, + "grad_norm": 1.1849768238897622, + "learning_rate": 2.1038436380168114e-07, + "loss": 0.23126785457134247, + "step": 7065 + }, + { + "epoch": 1.876377639091754, + "grad_norm": 1.4246070766583077, + "learning_rate": 2.094893948933041e-07, + "loss": 0.24286629259586334, + "step": 7066 + }, + { + "epoch": 1.8766432080732969, + "grad_norm": 1.3706445020134141, + "learning_rate": 2.0859631347990406e-07, + "loss": 0.25771957635879517, + "step": 7067 + }, + { + "epoch": 1.87690877705484, + "grad_norm": 1.1754559873110961, + "learning_rate": 2.0770511973365436e-07, + "loss": 0.19837790727615356, + "step": 7068 + }, + { + "epoch": 1.877174346036383, + "grad_norm": 1.2372359407501599, + "learning_rate": 2.0681581382636984e-07, + "loss": 0.21209359169006348, + "step": 7069 + }, + { + "epoch": 1.877439915017926, + "grad_norm": 1.9178204608286211, + "learning_rate": 2.0592839592949554e-07, + "loss": 0.26641422510147095, + "step": 7070 + }, + { + "epoch": 1.877705483999469, + "grad_norm": 1.3604176831947503, + "learning_rate": 2.050428662141146e-07, + "loss": 0.21609601378440857, + "step": 7071 + }, + { + "epoch": 1.8779710529810119, + "grad_norm": 1.2861845280896875, + "learning_rate": 2.0415922485095051e-07, + "loss": 0.23642000555992126, + "step": 7072 + }, + { + "epoch": 1.8782366219625548, + "grad_norm": 1.3854568667341272, + "learning_rate": 2.0327747201035587e-07, + "loss": 0.24564675986766815, + "step": 7073 + }, + { + "epoch": 1.8785021909440978, + "grad_norm": 1.229212126818568, + "learning_rate": 2.0239760786232355e-07, + "loss": 0.20001479983329773, + "step": 7074 + }, + { + "epoch": 1.8787677599256407, + "grad_norm": 1.2817747323253132, + "learning_rate": 2.015196325764801e-07, + "loss": 0.2590208649635315, + "step": 7075 + }, + { + "epoch": 1.8790333289071837, + "grad_norm": 1.2462050168824985, + "learning_rate": 2.0064354632208904e-07, + "loss": 0.23298504948616028, + "step": 7076 + }, + { + "epoch": 1.8792988978887266, + "grad_norm": 1.2573573484068483, + "learning_rate": 1.997693492680497e-07, + "loss": 0.22409996390342712, + "step": 7077 + }, + { + "epoch": 1.8795644668702696, + "grad_norm": 1.410723892029772, + "learning_rate": 1.9889704158289724e-07, + "loss": 0.27316784858703613, + "step": 7078 + }, + { + "epoch": 1.8798300358518125, + "grad_norm": 1.2924796650338854, + "learning_rate": 1.980266234348016e-07, + "loss": 0.2271946519613266, + "step": 7079 + }, + { + "epoch": 1.8800956048333555, + "grad_norm": 1.2438429761767338, + "learning_rate": 1.9715809499156858e-07, + "loss": 0.20887964963912964, + "step": 7080 + }, + { + "epoch": 1.8803611738148984, + "grad_norm": 1.2112268618082698, + "learning_rate": 1.9629145642064197e-07, + "loss": 0.23468685150146484, + "step": 7081 + }, + { + "epoch": 1.8806267427964414, + "grad_norm": 1.308865144497765, + "learning_rate": 1.9542670788909813e-07, + "loss": 0.21624556183815002, + "step": 7082 + }, + { + "epoch": 1.8808923117779843, + "grad_norm": 1.1751415989571612, + "learning_rate": 1.9456384956365149e-07, + "loss": 0.22328166663646698, + "step": 7083 + }, + { + "epoch": 1.8811578807595273, + "grad_norm": 1.3508603820961609, + "learning_rate": 1.93702881610649e-07, + "loss": 0.2526431381702423, + "step": 7084 + }, + { + "epoch": 1.8814234497410702, + "grad_norm": 1.3562256445660688, + "learning_rate": 1.9284380419607784e-07, + "loss": 0.23668771982192993, + "step": 7085 + }, + { + "epoch": 1.8816890187226132, + "grad_norm": 1.2668189225170288, + "learning_rate": 1.9198661748555557e-07, + "loss": 0.24710845947265625, + "step": 7086 + }, + { + "epoch": 1.8819545877041561, + "grad_norm": 1.4047256701053605, + "learning_rate": 1.911313216443389e-07, + "loss": 0.22696900367736816, + "step": 7087 + }, + { + "epoch": 1.882220156685699, + "grad_norm": 1.3717447863189725, + "learning_rate": 1.9027791683731922e-07, + "loss": 0.21652163565158844, + "step": 7088 + }, + { + "epoch": 1.882485725667242, + "grad_norm": 1.3189608691767827, + "learning_rate": 1.894264032290205e-07, + "loss": 0.2166716307401657, + "step": 7089 + }, + { + "epoch": 1.882751294648785, + "grad_norm": 1.3746931913110367, + "learning_rate": 1.8857678098360698e-07, + "loss": 0.26200050115585327, + "step": 7090 + }, + { + "epoch": 1.883016863630328, + "grad_norm": 1.2945644704190118, + "learning_rate": 1.8772905026487654e-07, + "loss": 0.2292764037847519, + "step": 7091 + }, + { + "epoch": 1.883282432611871, + "grad_norm": 1.3106590918741248, + "learning_rate": 1.8688321123625842e-07, + "loss": 0.23893016576766968, + "step": 7092 + }, + { + "epoch": 1.8835480015934138, + "grad_norm": 1.2241030970764724, + "learning_rate": 1.860392640608244e-07, + "loss": 0.2509230673313141, + "step": 7093 + }, + { + "epoch": 1.8838135705749568, + "grad_norm": 1.2218686374923997, + "learning_rate": 1.8519720890127434e-07, + "loss": 0.24156486988067627, + "step": 7094 + }, + { + "epoch": 1.8840791395564997, + "grad_norm": 1.2859122561460798, + "learning_rate": 1.843570459199462e-07, + "loss": 0.2120019942522049, + "step": 7095 + }, + { + "epoch": 1.884344708538043, + "grad_norm": 1.6579646138710773, + "learning_rate": 1.835187752788159e-07, + "loss": 0.23400259017944336, + "step": 7096 + }, + { + "epoch": 1.8846102775195859, + "grad_norm": 1.281132346942695, + "learning_rate": 1.8268239713949087e-07, + "loss": 0.20913103222846985, + "step": 7097 + }, + { + "epoch": 1.8848758465011288, + "grad_norm": 1.3381319381686223, + "learning_rate": 1.8184791166321546e-07, + "loss": 0.24468877911567688, + "step": 7098 + }, + { + "epoch": 1.8851414154826718, + "grad_norm": 1.236616212709848, + "learning_rate": 1.8101531901086767e-07, + "loss": 0.2038918137550354, + "step": 7099 + }, + { + "epoch": 1.8854069844642147, + "grad_norm": 1.3201086548941574, + "learning_rate": 1.8018461934296239e-07, + "loss": 0.24191413819789886, + "step": 7100 + }, + { + "epoch": 1.8856725534457577, + "grad_norm": 1.277539269643606, + "learning_rate": 1.793558128196493e-07, + "loss": 0.24394474923610687, + "step": 7101 + }, + { + "epoch": 1.8859381224273006, + "grad_norm": 1.1561225023553612, + "learning_rate": 1.7852889960071063e-07, + "loss": 0.22630709409713745, + "step": 7102 + }, + { + "epoch": 1.8862036914088436, + "grad_norm": 1.5472360212555962, + "learning_rate": 1.7770387984556768e-07, + "loss": 0.23936980962753296, + "step": 7103 + }, + { + "epoch": 1.8864692603903865, + "grad_norm": 1.275471897769737, + "learning_rate": 1.768807537132733e-07, + "loss": 0.24808618426322937, + "step": 7104 + }, + { + "epoch": 1.8867348293719295, + "grad_norm": 1.273035999339445, + "learning_rate": 1.7605952136251603e-07, + "loss": 0.23934635519981384, + "step": 7105 + }, + { + "epoch": 1.8870003983534724, + "grad_norm": 1.189686791776393, + "learning_rate": 1.7524018295162148e-07, + "loss": 0.22107656300067902, + "step": 7106 + }, + { + "epoch": 1.8872659673350154, + "grad_norm": 1.3496800848037154, + "learning_rate": 1.7442273863854553e-07, + "loss": 0.23253028094768524, + "step": 7107 + }, + { + "epoch": 1.8875315363165583, + "grad_norm": 1.3028365552765204, + "learning_rate": 1.7360718858088542e-07, + "loss": 0.2501102387905121, + "step": 7108 + }, + { + "epoch": 1.8877971052981013, + "grad_norm": 1.4057988238229884, + "learning_rate": 1.7279353293586765e-07, + "loss": 0.25537967681884766, + "step": 7109 + }, + { + "epoch": 1.8880626742796442, + "grad_norm": 2.7876746143917033, + "learning_rate": 1.7198177186035447e-07, + "loss": 0.25701045989990234, + "step": 7110 + }, + { + "epoch": 1.8883282432611872, + "grad_norm": 1.1447271563365653, + "learning_rate": 1.7117190551084628e-07, + "loss": 0.2109440565109253, + "step": 7111 + }, + { + "epoch": 1.8885938122427302, + "grad_norm": 1.2454061070152636, + "learning_rate": 1.7036393404347373e-07, + "loss": 0.22767721116542816, + "step": 7112 + }, + { + "epoch": 1.888859381224273, + "grad_norm": 1.1572937395529788, + "learning_rate": 1.6955785761400444e-07, + "loss": 0.1976814568042755, + "step": 7113 + }, + { + "epoch": 1.889124950205816, + "grad_norm": 1.1727224852039306, + "learning_rate": 1.687536763778419e-07, + "loss": 0.21109873056411743, + "step": 7114 + }, + { + "epoch": 1.889390519187359, + "grad_norm": 1.1916227822459606, + "learning_rate": 1.6795139049002095e-07, + "loss": 0.2165786623954773, + "step": 7115 + }, + { + "epoch": 1.889656088168902, + "grad_norm": 1.2917556149315792, + "learning_rate": 1.6715100010521347e-07, + "loss": 0.23962441086769104, + "step": 7116 + }, + { + "epoch": 1.889921657150445, + "grad_norm": 1.2423009900583697, + "learning_rate": 1.6635250537772596e-07, + "loss": 0.23351140320301056, + "step": 7117 + }, + { + "epoch": 1.8901872261319879, + "grad_norm": 1.3034348272306633, + "learning_rate": 1.6555590646149866e-07, + "loss": 0.19999945163726807, + "step": 7118 + }, + { + "epoch": 1.8904527951135308, + "grad_norm": 1.432201467842623, + "learning_rate": 1.647612035101054e-07, + "loss": 0.27142196893692017, + "step": 7119 + }, + { + "epoch": 1.8907183640950738, + "grad_norm": 1.2861780172834696, + "learning_rate": 1.6396839667675691e-07, + "loss": 0.21525685489177704, + "step": 7120 + }, + { + "epoch": 1.8909839330766167, + "grad_norm": 3.2062699859400396, + "learning_rate": 1.631774861142965e-07, + "loss": 0.24305005371570587, + "step": 7121 + }, + { + "epoch": 1.8912495020581597, + "grad_norm": 1.2019998279555377, + "learning_rate": 1.6238847197520113e-07, + "loss": 0.23202842473983765, + "step": 7122 + }, + { + "epoch": 1.8915150710397026, + "grad_norm": 1.4409003412080332, + "learning_rate": 1.6160135441158576e-07, + "loss": 0.24373790621757507, + "step": 7123 + }, + { + "epoch": 1.8917806400212456, + "grad_norm": 1.2360359431057044, + "learning_rate": 1.6081613357519565e-07, + "loss": 0.22774222493171692, + "step": 7124 + }, + { + "epoch": 1.8920462090027885, + "grad_norm": 1.2064368847282083, + "learning_rate": 1.6003280961741196e-07, + "loss": 0.20660057663917542, + "step": 7125 + }, + { + "epoch": 1.8923117779843315, + "grad_norm": 1.3070998228758686, + "learning_rate": 1.5925138268925166e-07, + "loss": 0.23578912019729614, + "step": 7126 + }, + { + "epoch": 1.8925773469658744, + "grad_norm": 1.2737250152668298, + "learning_rate": 1.5847185294136313e-07, + "loss": 0.20852091908454895, + "step": 7127 + }, + { + "epoch": 1.8928429159474174, + "grad_norm": 1.1465883719364975, + "learning_rate": 1.5769422052403172e-07, + "loss": 0.17455898225307465, + "step": 7128 + }, + { + "epoch": 1.8931084849289603, + "grad_norm": 1.5036497092390075, + "learning_rate": 1.5691848558717638e-07, + "loss": 0.29552748799324036, + "step": 7129 + }, + { + "epoch": 1.8933740539105033, + "grad_norm": 1.3009458238394367, + "learning_rate": 1.5614464828034746e-07, + "loss": 0.22972649335861206, + "step": 7130 + }, + { + "epoch": 1.8936396228920462, + "grad_norm": 1.2296689152648304, + "learning_rate": 1.5537270875273348e-07, + "loss": 0.2134108692407608, + "step": 7131 + }, + { + "epoch": 1.8939051918735892, + "grad_norm": 1.4119584533896288, + "learning_rate": 1.546026671531542e-07, + "loss": 0.24145451188087463, + "step": 7132 + }, + { + "epoch": 1.8941707608551321, + "grad_norm": 1.355860353407812, + "learning_rate": 1.5383452363006534e-07, + "loss": 0.2323920726776123, + "step": 7133 + }, + { + "epoch": 1.894436329836675, + "grad_norm": 1.197617700552455, + "learning_rate": 1.5306827833155403e-07, + "loss": 0.20091015100479126, + "step": 7134 + }, + { + "epoch": 1.894701898818218, + "grad_norm": 1.370489911603159, + "learning_rate": 1.523039314053465e-07, + "loss": 0.2451317310333252, + "step": 7135 + }, + { + "epoch": 1.894967467799761, + "grad_norm": 1.2946538259097045, + "learning_rate": 1.5154148299879822e-07, + "loss": 0.22744594514369965, + "step": 7136 + }, + { + "epoch": 1.895233036781304, + "grad_norm": 1.2046527835430252, + "learning_rate": 1.5078093325889943e-07, + "loss": 0.2460673749446869, + "step": 7137 + }, + { + "epoch": 1.895498605762847, + "grad_norm": 1.4172423595206858, + "learning_rate": 1.5002228233227722e-07, + "loss": 0.2524537444114685, + "step": 7138 + }, + { + "epoch": 1.8957641747443899, + "grad_norm": 1.1840127480017744, + "learning_rate": 1.4926553036518798e-07, + "loss": 0.2056279480457306, + "step": 7139 + }, + { + "epoch": 1.8960297437259328, + "grad_norm": 1.2144930845419581, + "learning_rate": 1.485106775035261e-07, + "loss": 0.2656184732913971, + "step": 7140 + }, + { + "epoch": 1.8962953127074758, + "grad_norm": 1.1903286988332102, + "learning_rate": 1.477577238928185e-07, + "loss": 0.2190116047859192, + "step": 7141 + }, + { + "epoch": 1.8965608816890187, + "grad_norm": 1.206151177902952, + "learning_rate": 1.4700666967822574e-07, + "loss": 0.22984017431735992, + "step": 7142 + }, + { + "epoch": 1.8968264506705617, + "grad_norm": 1.1949819121682481, + "learning_rate": 1.462575150045409e-07, + "loss": 0.17947378754615784, + "step": 7143 + }, + { + "epoch": 1.8970920196521046, + "grad_norm": 1.2649423314993642, + "learning_rate": 1.4551026001619395e-07, + "loss": 0.24965715408325195, + "step": 7144 + }, + { + "epoch": 1.8973575886336476, + "grad_norm": 1.236302993447548, + "learning_rate": 1.4476490485724526e-07, + "loss": 0.2337307333946228, + "step": 7145 + }, + { + "epoch": 1.8976231576151905, + "grad_norm": 1.2205039464348546, + "learning_rate": 1.4402144967139098e-07, + "loss": 0.22668538987636566, + "step": 7146 + }, + { + "epoch": 1.8978887265967335, + "grad_norm": 1.350785859399433, + "learning_rate": 1.4327989460196091e-07, + "loss": 0.21934781968593597, + "step": 7147 + }, + { + "epoch": 1.8981542955782764, + "grad_norm": 1.2212959594670445, + "learning_rate": 1.4254023979191844e-07, + "loss": 0.1957930624485016, + "step": 7148 + }, + { + "epoch": 1.8984198645598194, + "grad_norm": 1.1724780894008597, + "learning_rate": 1.4180248538385956e-07, + "loss": 0.22351369261741638, + "step": 7149 + }, + { + "epoch": 1.8986854335413623, + "grad_norm": 1.3930947329130605, + "learning_rate": 1.4106663152001487e-07, + "loss": 0.2603265047073364, + "step": 7150 + }, + { + "epoch": 1.8989510025229053, + "grad_norm": 1.260479860356455, + "learning_rate": 1.4033267834224873e-07, + "loss": 0.2566663324832916, + "step": 7151 + }, + { + "epoch": 1.8992165715044482, + "grad_norm": 1.2799319314175146, + "learning_rate": 1.3960062599205682e-07, + "loss": 0.23130206763744354, + "step": 7152 + }, + { + "epoch": 1.8994821404859912, + "grad_norm": 1.1757231252562024, + "learning_rate": 1.3887047461057179e-07, + "loss": 0.17946425080299377, + "step": 7153 + }, + { + "epoch": 1.8997477094675341, + "grad_norm": 1.2434099546308155, + "learning_rate": 1.3814222433855884e-07, + "loss": 0.23946328461170197, + "step": 7154 + }, + { + "epoch": 1.900013278449077, + "grad_norm": 1.2249367291717066, + "learning_rate": 1.3741587531641566e-07, + "loss": 0.21002715826034546, + "step": 7155 + }, + { + "epoch": 1.90027884743062, + "grad_norm": 1.3062374823275615, + "learning_rate": 1.3669142768417242e-07, + "loss": 0.2121986746788025, + "step": 7156 + }, + { + "epoch": 1.900544416412163, + "grad_norm": 1.373871289837254, + "learning_rate": 1.3596888158149525e-07, + "loss": 0.26400670409202576, + "step": 7157 + }, + { + "epoch": 1.900809985393706, + "grad_norm": 1.1813353744292436, + "learning_rate": 1.3524823714768375e-07, + "loss": 0.18764406442642212, + "step": 7158 + }, + { + "epoch": 1.9010755543752489, + "grad_norm": 1.415975931925435, + "learning_rate": 1.3452949452166686e-07, + "loss": 0.2550342381000519, + "step": 7159 + }, + { + "epoch": 1.9013411233567918, + "grad_norm": 1.304366194966887, + "learning_rate": 1.3381265384201035e-07, + "loss": 0.23188576102256775, + "step": 7160 + }, + { + "epoch": 1.9016066923383348, + "grad_norm": 1.2473914592639561, + "learning_rate": 1.3309771524691372e-07, + "loss": 0.23124513030052185, + "step": 7161 + }, + { + "epoch": 1.9018722613198777, + "grad_norm": 1.2056745011797427, + "learning_rate": 1.323846788742078e-07, + "loss": 0.19941067695617676, + "step": 7162 + }, + { + "epoch": 1.9021378303014207, + "grad_norm": 1.4624998875104938, + "learning_rate": 1.316735448613593e-07, + "loss": 0.22510412335395813, + "step": 7163 + }, + { + "epoch": 1.9024033992829636, + "grad_norm": 1.2448961229015743, + "learning_rate": 1.309643133454641e-07, + "loss": 0.19102326035499573, + "step": 7164 + }, + { + "epoch": 1.9026689682645066, + "grad_norm": 1.2307397875458914, + "learning_rate": 1.3025698446325618e-07, + "loss": 0.20826731622219086, + "step": 7165 + }, + { + "epoch": 1.9029345372460496, + "grad_norm": 1.3483240422328144, + "learning_rate": 1.2955155835109757e-07, + "loss": 0.23238909244537354, + "step": 7166 + }, + { + "epoch": 1.9032001062275925, + "grad_norm": 1.4338552298496805, + "learning_rate": 1.2884803514498833e-07, + "loss": 0.2635011374950409, + "step": 7167 + }, + { + "epoch": 1.9034656752091355, + "grad_norm": 1.1745725675637841, + "learning_rate": 1.281464149805578e-07, + "loss": 0.2073322981595993, + "step": 7168 + }, + { + "epoch": 1.9037312441906784, + "grad_norm": 1.2344038568124596, + "learning_rate": 1.274466979930711e-07, + "loss": 0.22091326117515564, + "step": 7169 + }, + { + "epoch": 1.9039968131722214, + "grad_norm": 1.114689842836081, + "learning_rate": 1.2674888431742472e-07, + "loss": 0.18613001704216003, + "step": 7170 + }, + { + "epoch": 1.9042623821537643, + "grad_norm": 1.2788383965135535, + "learning_rate": 1.2605297408814887e-07, + "loss": 0.2165849655866623, + "step": 7171 + }, + { + "epoch": 1.9045279511353073, + "grad_norm": 1.294203512401496, + "learning_rate": 1.2535896743940844e-07, + "loss": 0.21317794919013977, + "step": 7172 + }, + { + "epoch": 1.9047935201168502, + "grad_norm": 1.47127212987638, + "learning_rate": 1.2466686450499866e-07, + "loss": 0.25221073627471924, + "step": 7173 + }, + { + "epoch": 1.9050590890983932, + "grad_norm": 1.2647474973058104, + "learning_rate": 1.239766654183472e-07, + "loss": 0.21598559617996216, + "step": 7174 + }, + { + "epoch": 1.9053246580799361, + "grad_norm": 1.2635227030316536, + "learning_rate": 1.232883703125187e-07, + "loss": 0.2284495085477829, + "step": 7175 + }, + { + "epoch": 1.905590227061479, + "grad_norm": 1.1825527167306378, + "learning_rate": 1.2260197932020713e-07, + "loss": 0.21899332106113434, + "step": 7176 + }, + { + "epoch": 1.905855796043022, + "grad_norm": 1.3588902485974734, + "learning_rate": 1.2191749257374097e-07, + "loss": 0.2633277177810669, + "step": 7177 + }, + { + "epoch": 1.906121365024565, + "grad_norm": 1.2643904365611611, + "learning_rate": 1.2123491020508137e-07, + "loss": 0.2330140471458435, + "step": 7178 + }, + { + "epoch": 1.906386934006108, + "grad_norm": 1.2757939155257039, + "learning_rate": 1.2055423234582087e-07, + "loss": 0.21859750151634216, + "step": 7179 + }, + { + "epoch": 1.9066525029876509, + "grad_norm": 1.3985563606047093, + "learning_rate": 1.198754591271878e-07, + "loss": 0.252164363861084, + "step": 7180 + }, + { + "epoch": 1.906918071969194, + "grad_norm": 1.4365501399575176, + "learning_rate": 1.191985906800408e-07, + "loss": 0.24968160688877106, + "step": 7181 + }, + { + "epoch": 1.907183640950737, + "grad_norm": 1.199067091736319, + "learning_rate": 1.185236271348722e-07, + "loss": 0.2083423137664795, + "step": 7182 + }, + { + "epoch": 1.90744920993228, + "grad_norm": 1.258208503364781, + "learning_rate": 1.1785056862180789e-07, + "loss": 0.2468394935131073, + "step": 7183 + }, + { + "epoch": 1.907714778913823, + "grad_norm": 1.2908738922715033, + "learning_rate": 1.1717941527060405e-07, + "loss": 0.22417521476745605, + "step": 7184 + }, + { + "epoch": 1.9079803478953659, + "grad_norm": 1.2789853859840312, + "learning_rate": 1.1651016721065167e-07, + "loss": 0.2411842793226242, + "step": 7185 + }, + { + "epoch": 1.9082459168769088, + "grad_norm": 1.311967953603668, + "learning_rate": 1.1584282457097417e-07, + "loss": 0.24650761485099792, + "step": 7186 + }, + { + "epoch": 1.9085114858584518, + "grad_norm": 1.3305923315328496, + "learning_rate": 1.1517738748022755e-07, + "loss": 0.22433717548847198, + "step": 7187 + }, + { + "epoch": 1.9087770548399947, + "grad_norm": 1.2666444248015347, + "learning_rate": 1.145138560667003e-07, + "loss": 0.20867910981178284, + "step": 7188 + }, + { + "epoch": 1.9090426238215377, + "grad_norm": 1.2511449541105855, + "learning_rate": 1.138522304583134e-07, + "loss": 0.21889618039131165, + "step": 7189 + }, + { + "epoch": 1.9093081928030806, + "grad_norm": 1.113107479716362, + "learning_rate": 1.1319251078261928e-07, + "loss": 0.19350749254226685, + "step": 7190 + }, + { + "epoch": 1.9095737617846236, + "grad_norm": 1.183265546980091, + "learning_rate": 1.125346971668051e-07, + "loss": 0.19123657047748566, + "step": 7191 + }, + { + "epoch": 1.9098393307661665, + "grad_norm": 1.2653223306994201, + "learning_rate": 1.118787897376905e-07, + "loss": 0.21433782577514648, + "step": 7192 + }, + { + "epoch": 1.9101048997477095, + "grad_norm": 1.474925382041675, + "learning_rate": 1.1122478862172437e-07, + "loss": 0.2521187663078308, + "step": 7193 + }, + { + "epoch": 1.9103704687292524, + "grad_norm": 1.2835872924926361, + "learning_rate": 1.1057269394499248e-07, + "loss": 0.2141486555337906, + "step": 7194 + }, + { + "epoch": 1.9106360377107954, + "grad_norm": 1.271472683987379, + "learning_rate": 1.0992250583320985e-07, + "loss": 0.22960343956947327, + "step": 7195 + }, + { + "epoch": 1.9109016066923383, + "grad_norm": 1.3433609684783299, + "learning_rate": 1.092742244117262e-07, + "loss": 0.21809744834899902, + "step": 7196 + }, + { + "epoch": 1.9111671756738813, + "grad_norm": 1.248347973820862, + "learning_rate": 1.0862784980552044e-07, + "loss": 0.22418212890625, + "step": 7197 + }, + { + "epoch": 1.9114327446554242, + "grad_norm": 1.2504701200893746, + "learning_rate": 1.0798338213920845e-07, + "loss": 0.22050701081752777, + "step": 7198 + }, + { + "epoch": 1.9116983136369672, + "grad_norm": 1.206849931438756, + "learning_rate": 1.0734082153703418e-07, + "loss": 0.23200345039367676, + "step": 7199 + }, + { + "epoch": 1.9119638826185101, + "grad_norm": 1.1102825382626649, + "learning_rate": 1.0670016812287631e-07, + "loss": 0.18366631865501404, + "step": 7200 + }, + { + "epoch": 1.912229451600053, + "grad_norm": 1.2844567521026582, + "learning_rate": 1.0606142202024605e-07, + "loss": 0.24362193048000336, + "step": 7201 + }, + { + "epoch": 1.912495020581596, + "grad_norm": 1.2822631921528913, + "learning_rate": 1.0542458335228601e-07, + "loss": 0.2216200977563858, + "step": 7202 + }, + { + "epoch": 1.912760589563139, + "grad_norm": 1.0921875359661608, + "learning_rate": 1.0478965224176907e-07, + "loss": 0.20216065645217896, + "step": 7203 + }, + { + "epoch": 1.913026158544682, + "grad_norm": 1.254966671592246, + "learning_rate": 1.041566288111051e-07, + "loss": 0.22054359316825867, + "step": 7204 + }, + { + "epoch": 1.913291727526225, + "grad_norm": 1.3532366246655447, + "learning_rate": 1.0352551318233206e-07, + "loss": 0.21569015085697174, + "step": 7205 + }, + { + "epoch": 1.9135572965077678, + "grad_norm": 1.2826756039782425, + "learning_rate": 1.028963054771226e-07, + "loss": 0.22967267036437988, + "step": 7206 + }, + { + "epoch": 1.9138228654893108, + "grad_norm": 1.3494789006319945, + "learning_rate": 1.0226900581677968e-07, + "loss": 0.2422460913658142, + "step": 7207 + }, + { + "epoch": 1.9140884344708538, + "grad_norm": 1.3606228589652338, + "learning_rate": 1.0164361432223879e-07, + "loss": 0.25891292095184326, + "step": 7208 + }, + { + "epoch": 1.914354003452397, + "grad_norm": 1.3570561855059022, + "learning_rate": 1.0102013111406905e-07, + "loss": 0.26915764808654785, + "step": 7209 + }, + { + "epoch": 1.9146195724339399, + "grad_norm": 1.3889996377213247, + "learning_rate": 1.0039855631247097e-07, + "loss": 0.2268485426902771, + "step": 7210 + }, + { + "epoch": 1.9148851414154828, + "grad_norm": 1.254622691077732, + "learning_rate": 9.977889003727647e-08, + "loss": 0.22551512718200684, + "step": 7211 + }, + { + "epoch": 1.9151507103970258, + "grad_norm": 1.233084698895248, + "learning_rate": 9.91611324079489e-08, + "loss": 0.24224743247032166, + "step": 7212 + }, + { + "epoch": 1.9154162793785687, + "grad_norm": 1.2426176239380708, + "learning_rate": 9.854528354358517e-08, + "loss": 0.19550879299640656, + "step": 7213 + }, + { + "epoch": 1.9156818483601117, + "grad_norm": 1.3449782320604147, + "learning_rate": 9.793134356291478e-08, + "loss": 0.24986523389816284, + "step": 7214 + }, + { + "epoch": 1.9159474173416546, + "grad_norm": 1.3340583070384961, + "learning_rate": 9.731931258429638e-08, + "loss": 0.2565170228481293, + "step": 7215 + }, + { + "epoch": 1.9162129863231976, + "grad_norm": 1.185156912642083, + "learning_rate": 9.670919072572449e-08, + "loss": 0.2166958749294281, + "step": 7216 + }, + { + "epoch": 1.9164785553047405, + "grad_norm": 1.2903999319183896, + "learning_rate": 9.610097810482166e-08, + "loss": 0.2002115249633789, + "step": 7217 + }, + { + "epoch": 1.9167441242862835, + "grad_norm": 1.1589813054229285, + "learning_rate": 9.549467483884412e-08, + "loss": 0.209486186504364, + "step": 7218 + }, + { + "epoch": 1.9170096932678264, + "grad_norm": 1.2748483155423624, + "learning_rate": 9.489028104468056e-08, + "loss": 0.22061321139335632, + "step": 7219 + }, + { + "epoch": 1.9172752622493694, + "grad_norm": 1.3916500275624957, + "learning_rate": 9.428779683885114e-08, + "loss": 0.21880047023296356, + "step": 7220 + }, + { + "epoch": 1.9175408312309123, + "grad_norm": 1.174801358834737, + "learning_rate": 9.368722233750849e-08, + "loss": 0.22674325108528137, + "step": 7221 + }, + { + "epoch": 1.9178064002124553, + "grad_norm": 1.2877078963500264, + "learning_rate": 9.308855765643332e-08, + "loss": 0.22100718319416046, + "step": 7222 + }, + { + "epoch": 1.9180719691939982, + "grad_norm": 1.3291196619762962, + "learning_rate": 9.249180291104553e-08, + "loss": 0.23105254769325256, + "step": 7223 + }, + { + "epoch": 1.9183375381755412, + "grad_norm": 1.2897395451200044, + "learning_rate": 9.189695821638755e-08, + "loss": 0.22483405470848083, + "step": 7224 + }, + { + "epoch": 1.9186031071570842, + "grad_norm": 1.0701399001286365, + "learning_rate": 9.130402368714208e-08, + "loss": 0.1939004510641098, + "step": 7225 + }, + { + "epoch": 1.918868676138627, + "grad_norm": 1.2349263677236755, + "learning_rate": 9.071299943761769e-08, + "loss": 0.21722440421581268, + "step": 7226 + }, + { + "epoch": 1.91913424512017, + "grad_norm": 1.2911544131515666, + "learning_rate": 9.012388558175877e-08, + "loss": 0.24213966727256775, + "step": 7227 + }, + { + "epoch": 1.919399814101713, + "grad_norm": 1.2266941536480729, + "learning_rate": 8.953668223313783e-08, + "loss": 0.2305546998977661, + "step": 7228 + }, + { + "epoch": 1.919665383083256, + "grad_norm": 1.3932840646040938, + "learning_rate": 8.895138950496207e-08, + "loss": 0.2678033709526062, + "step": 7229 + }, + { + "epoch": 1.919930952064799, + "grad_norm": 1.2449965535251106, + "learning_rate": 8.836800751006791e-08, + "loss": 0.2491014301776886, + "step": 7230 + }, + { + "epoch": 1.9201965210463419, + "grad_norm": 1.2551836576043742, + "learning_rate": 8.778653636092537e-08, + "loss": 0.21837326884269714, + "step": 7231 + }, + { + "epoch": 1.9204620900278848, + "grad_norm": 1.2745391136427304, + "learning_rate": 8.72069761696348e-08, + "loss": 0.24149999022483826, + "step": 7232 + }, + { + "epoch": 1.9207276590094278, + "grad_norm": 1.3444140835580012, + "learning_rate": 8.662932704792793e-08, + "loss": 0.2124684453010559, + "step": 7233 + }, + { + "epoch": 1.9209932279909707, + "grad_norm": 1.3660213009765734, + "learning_rate": 8.60535891071712e-08, + "loss": 0.2452150285243988, + "step": 7234 + }, + { + "epoch": 1.9212587969725137, + "grad_norm": 1.2005299446152509, + "learning_rate": 8.547976245835698e-08, + "loss": 0.23598846793174744, + "step": 7235 + }, + { + "epoch": 1.9215243659540566, + "grad_norm": 1.3152974069295431, + "learning_rate": 8.490784721211454e-08, + "loss": 0.2105225920677185, + "step": 7236 + }, + { + "epoch": 1.9217899349355996, + "grad_norm": 1.4424977304862223, + "learning_rate": 8.433784347870122e-08, + "loss": 0.2585388720035553, + "step": 7237 + }, + { + "epoch": 1.9220555039171425, + "grad_norm": 1.2300698994172445, + "learning_rate": 8.376975136800691e-08, + "loss": 0.21703900396823883, + "step": 7238 + }, + { + "epoch": 1.9223210728986855, + "grad_norm": 1.2580366958382383, + "learning_rate": 8.3203570989554e-08, + "loss": 0.22771210968494415, + "step": 7239 + }, + { + "epoch": 1.9225866418802284, + "grad_norm": 1.1645003525207898, + "learning_rate": 8.263930245249408e-08, + "loss": 0.22535575926303864, + "step": 7240 + }, + { + "epoch": 1.9228522108617714, + "grad_norm": 1.1822452042500315, + "learning_rate": 8.207694586561344e-08, + "loss": 0.2052595466375351, + "step": 7241 + }, + { + "epoch": 1.9231177798433143, + "grad_norm": 1.2683012213528768, + "learning_rate": 8.151650133732536e-08, + "loss": 0.19611456990242004, + "step": 7242 + }, + { + "epoch": 1.9233833488248573, + "grad_norm": 1.2762939262923303, + "learning_rate": 8.095796897567787e-08, + "loss": 0.20256826281547546, + "step": 7243 + }, + { + "epoch": 1.9236489178064002, + "grad_norm": 1.5444723931343434, + "learning_rate": 8.040134888835038e-08, + "loss": 0.25462138652801514, + "step": 7244 + }, + { + "epoch": 1.9239144867879432, + "grad_norm": 1.2813246309729553, + "learning_rate": 7.984664118265262e-08, + "loss": 0.27362316846847534, + "step": 7245 + }, + { + "epoch": 1.9241800557694861, + "grad_norm": 1.3526739723939418, + "learning_rate": 7.929384596552459e-08, + "loss": 0.23749098181724548, + "step": 7246 + }, + { + "epoch": 1.924445624751029, + "grad_norm": 1.3016147885306604, + "learning_rate": 7.874296334353882e-08, + "loss": 0.2472018599510193, + "step": 7247 + }, + { + "epoch": 1.924711193732572, + "grad_norm": 1.3451463766339227, + "learning_rate": 7.819399342290034e-08, + "loss": 0.23181989789009094, + "step": 7248 + }, + { + "epoch": 1.924976762714115, + "grad_norm": 1.2415200588572097, + "learning_rate": 7.764693630944231e-08, + "loss": 0.21363665163516998, + "step": 7249 + }, + { + "epoch": 1.925242331695658, + "grad_norm": 1.1849821155034532, + "learning_rate": 7.710179210863144e-08, + "loss": 0.21239221096038818, + "step": 7250 + }, + { + "epoch": 1.925507900677201, + "grad_norm": 1.4494720585200522, + "learning_rate": 7.655856092556591e-08, + "loss": 0.2643742263317108, + "step": 7251 + }, + { + "epoch": 1.9257734696587439, + "grad_norm": 1.251877664981762, + "learning_rate": 7.601724286497414e-08, + "loss": 0.2232428789138794, + "step": 7252 + }, + { + "epoch": 1.9260390386402868, + "grad_norm": 1.313277386530887, + "learning_rate": 7.547783803121489e-08, + "loss": 0.2052377462387085, + "step": 7253 + }, + { + "epoch": 1.9263046076218298, + "grad_norm": 1.2540878413614547, + "learning_rate": 7.494034652827942e-08, + "loss": 0.22194740176200867, + "step": 7254 + }, + { + "epoch": 1.9265701766033727, + "grad_norm": 1.2500554609811554, + "learning_rate": 7.440476845979038e-08, + "loss": 0.22004084289073944, + "step": 7255 + }, + { + "epoch": 1.9268357455849157, + "grad_norm": 1.5480704193409933, + "learning_rate": 7.387110392899965e-08, + "loss": 0.2218078374862671, + "step": 7256 + }, + { + "epoch": 1.9271013145664586, + "grad_norm": 1.3006193889830067, + "learning_rate": 7.33393530387927e-08, + "loss": 0.23272839188575745, + "step": 7257 + }, + { + "epoch": 1.9273668835480016, + "grad_norm": 1.3119971487868216, + "learning_rate": 7.280951589168417e-08, + "loss": 0.23666653037071228, + "step": 7258 + }, + { + "epoch": 1.9276324525295445, + "grad_norm": 1.235294099691234, + "learning_rate": 7.228159258982126e-08, + "loss": 0.21946533024311066, + "step": 7259 + }, + { + "epoch": 1.9278980215110875, + "grad_norm": 1.252328485116134, + "learning_rate": 7.175558323498033e-08, + "loss": 0.22158634662628174, + "step": 7260 + }, + { + "epoch": 1.9281635904926304, + "grad_norm": 1.1330771135999202, + "learning_rate": 7.123148792857026e-08, + "loss": 0.19978654384613037, + "step": 7261 + }, + { + "epoch": 1.9284291594741734, + "grad_norm": 1.2859436875650823, + "learning_rate": 7.070930677163023e-08, + "loss": 0.21197813749313354, + "step": 7262 + }, + { + "epoch": 1.9286947284557163, + "grad_norm": 1.2611518825786316, + "learning_rate": 7.018903986483083e-08, + "loss": 0.22650468349456787, + "step": 7263 + }, + { + "epoch": 1.9289602974372593, + "grad_norm": 1.2701948406662635, + "learning_rate": 6.967068730847293e-08, + "loss": 0.22257481515407562, + "step": 7264 + }, + { + "epoch": 1.9292258664188022, + "grad_norm": 1.3219742856760701, + "learning_rate": 6.915424920248992e-08, + "loss": 0.24899804592132568, + "step": 7265 + }, + { + "epoch": 1.9294914354003452, + "grad_norm": 1.2996576951077934, + "learning_rate": 6.863972564644328e-08, + "loss": 0.250610888004303, + "step": 7266 + }, + { + "epoch": 1.9297570043818881, + "grad_norm": 1.251137163804366, + "learning_rate": 6.81271167395292e-08, + "loss": 0.22786292433738708, + "step": 7267 + }, + { + "epoch": 1.930022573363431, + "grad_norm": 1.2890465128808872, + "learning_rate": 6.761642258056977e-08, + "loss": 0.22816789150238037, + "step": 7268 + }, + { + "epoch": 1.930288142344974, + "grad_norm": 1.3522601458627446, + "learning_rate": 6.7107643268024e-08, + "loss": 0.2589687407016754, + "step": 7269 + }, + { + "epoch": 1.930553711326517, + "grad_norm": 1.1963236616697677, + "learning_rate": 6.660077889997673e-08, + "loss": 0.2281583547592163, + "step": 7270 + }, + { + "epoch": 1.93081928030806, + "grad_norm": 1.3347065729182181, + "learning_rate": 6.60958295741454e-08, + "loss": 0.22833740711212158, + "step": 7271 + }, + { + "epoch": 1.931084849289603, + "grad_norm": 1.1611313283452582, + "learning_rate": 6.559279538787877e-08, + "loss": 0.20720313489437103, + "step": 7272 + }, + { + "epoch": 1.9313504182711458, + "grad_norm": 1.1884544288263172, + "learning_rate": 6.509167643815594e-08, + "loss": 0.17191773653030396, + "step": 7273 + }, + { + "epoch": 1.9316159872526888, + "grad_norm": 1.1354230474675757, + "learning_rate": 6.459247282158632e-08, + "loss": 0.23586943745613098, + "step": 7274 + }, + { + "epoch": 1.9318815562342317, + "grad_norm": 1.3318856895013969, + "learning_rate": 6.409518463441067e-08, + "loss": 0.21353168785572052, + "step": 7275 + }, + { + "epoch": 1.9321471252157747, + "grad_norm": 1.404937308132313, + "learning_rate": 6.359981197250009e-08, + "loss": 0.23148195445537567, + "step": 7276 + }, + { + "epoch": 1.9324126941973176, + "grad_norm": 1.3040478141172254, + "learning_rate": 6.310635493135709e-08, + "loss": 0.2113666534423828, + "step": 7277 + }, + { + "epoch": 1.9326782631788606, + "grad_norm": 1.3399999009479682, + "learning_rate": 6.261481360611332e-08, + "loss": 0.27689510583877563, + "step": 7278 + }, + { + "epoch": 1.9329438321604036, + "grad_norm": 1.2809237898551964, + "learning_rate": 6.2125188091533e-08, + "loss": 0.23746277391910553, + "step": 7279 + }, + { + "epoch": 1.9332094011419465, + "grad_norm": 1.4215326252349767, + "learning_rate": 6.163747848201062e-08, + "loss": 0.23123708367347717, + "step": 7280 + }, + { + "epoch": 1.9334749701234895, + "grad_norm": 1.3095914464878196, + "learning_rate": 6.115168487157097e-08, + "loss": 0.23640167713165283, + "step": 7281 + }, + { + "epoch": 1.9337405391050324, + "grad_norm": 1.3278235730632808, + "learning_rate": 6.066780735386801e-08, + "loss": 0.2259385585784912, + "step": 7282 + }, + { + "epoch": 1.9340061080865754, + "grad_norm": 1.230137664492021, + "learning_rate": 6.018584602218824e-08, + "loss": 0.219761461019516, + "step": 7283 + }, + { + "epoch": 1.9342716770681183, + "grad_norm": 1.43054331413576, + "learning_rate": 5.970580096944733e-08, + "loss": 0.24411989748477936, + "step": 7284 + }, + { + "epoch": 1.9345372460496613, + "grad_norm": 1.196712051616964, + "learning_rate": 5.922767228819459e-08, + "loss": 0.232415571808815, + "step": 7285 + }, + { + "epoch": 1.9348028150312042, + "grad_norm": 1.341424963494065, + "learning_rate": 5.875146007060517e-08, + "loss": 0.25938165187835693, + "step": 7286 + }, + { + "epoch": 1.9350683840127472, + "grad_norm": 1.253589726996753, + "learning_rate": 5.827716440848785e-08, + "loss": 0.22138425707817078, + "step": 7287 + }, + { + "epoch": 1.9353339529942901, + "grad_norm": 1.12038038288381, + "learning_rate": 5.7804785393282825e-08, + "loss": 0.19724398851394653, + "step": 7288 + }, + { + "epoch": 1.935599521975833, + "grad_norm": 1.4840167690508577, + "learning_rate": 5.7334323116056136e-08, + "loss": 0.25307583808898926, + "step": 7289 + }, + { + "epoch": 1.935865090957376, + "grad_norm": 1.2525903433235852, + "learning_rate": 5.686577766751078e-08, + "loss": 0.2436421811580658, + "step": 7290 + }, + { + "epoch": 1.936130659938919, + "grad_norm": 1.2518328182394873, + "learning_rate": 5.6399149137973394e-08, + "loss": 0.2164984941482544, + "step": 7291 + }, + { + "epoch": 1.936396228920462, + "grad_norm": 1.2277499731042363, + "learning_rate": 5.5934437617407576e-08, + "loss": 0.22526800632476807, + "step": 7292 + }, + { + "epoch": 1.936661797902005, + "grad_norm": 2.195756796154145, + "learning_rate": 5.547164319540277e-08, + "loss": 0.27787747979164124, + "step": 7293 + }, + { + "epoch": 1.936927366883548, + "grad_norm": 1.2647979578451993, + "learning_rate": 5.5010765961179825e-08, + "loss": 0.2188001275062561, + "step": 7294 + }, + { + "epoch": 1.937192935865091, + "grad_norm": 1.2454775538056309, + "learning_rate": 5.4551806003591e-08, + "loss": 0.22620335221290588, + "step": 7295 + }, + { + "epoch": 1.937458504846634, + "grad_norm": 1.186081247005514, + "learning_rate": 5.409476341111775e-08, + "loss": 0.20357783138751984, + "step": 7296 + }, + { + "epoch": 1.937724073828177, + "grad_norm": 1.2316030990526627, + "learning_rate": 5.3639638271872906e-08, + "loss": 0.22717830538749695, + "step": 7297 + }, + { + "epoch": 1.9379896428097199, + "grad_norm": 1.1600371116406252, + "learning_rate": 5.318643067360074e-08, + "loss": 0.20139163732528687, + "step": 7298 + }, + { + "epoch": 1.9382552117912628, + "grad_norm": 1.3377291184643103, + "learning_rate": 5.273514070367247e-08, + "loss": 0.2620807886123657, + "step": 7299 + }, + { + "epoch": 1.9385207807728058, + "grad_norm": 1.2240680803779018, + "learning_rate": 5.2285768449091834e-08, + "loss": 0.2102596014738083, + "step": 7300 + }, + { + "epoch": 1.9387863497543487, + "grad_norm": 1.3057613284367482, + "learning_rate": 5.183831399649175e-08, + "loss": 0.2105238288640976, + "step": 7301 + }, + { + "epoch": 1.9390519187358917, + "grad_norm": 1.2241670740951547, + "learning_rate": 5.1392777432138773e-08, + "loss": 0.22178848087787628, + "step": 7302 + }, + { + "epoch": 1.9393174877174346, + "grad_norm": 1.3648564311332518, + "learning_rate": 5.094915884192419e-08, + "loss": 0.23375345766544342, + "step": 7303 + }, + { + "epoch": 1.9395830566989776, + "grad_norm": 1.3411332724549108, + "learning_rate": 5.050745831137405e-08, + "loss": 0.22709332406520844, + "step": 7304 + }, + { + "epoch": 1.9398486256805205, + "grad_norm": 1.270429998105922, + "learning_rate": 5.0067675925642437e-08, + "loss": 0.2312362790107727, + "step": 7305 + }, + { + "epoch": 1.9401141946620635, + "grad_norm": 1.159162680689607, + "learning_rate": 4.962981176951376e-08, + "loss": 0.2014419138431549, + "step": 7306 + }, + { + "epoch": 1.9403797636436064, + "grad_norm": 1.4294147842238243, + "learning_rate": 4.9193865927404936e-08, + "loss": 0.23700466752052307, + "step": 7307 + }, + { + "epoch": 1.9406453326251494, + "grad_norm": 1.3814639969092575, + "learning_rate": 4.8759838483358745e-08, + "loss": 0.23362770676612854, + "step": 7308 + }, + { + "epoch": 1.9409109016066923, + "grad_norm": 1.4217349736822034, + "learning_rate": 4.832772952105269e-08, + "loss": 0.26057323813438416, + "step": 7309 + }, + { + "epoch": 1.9411764705882353, + "grad_norm": 1.1693504727058668, + "learning_rate": 4.789753912379014e-08, + "loss": 0.20954950153827667, + "step": 7310 + }, + { + "epoch": 1.9414420395697782, + "grad_norm": 1.1532528532836688, + "learning_rate": 4.746926737450919e-08, + "loss": 0.2100827842950821, + "step": 7311 + }, + { + "epoch": 1.9417076085513212, + "grad_norm": 1.2509560196931713, + "learning_rate": 4.7042914355773795e-08, + "loss": 0.216691792011261, + "step": 7312 + }, + { + "epoch": 1.9419731775328641, + "grad_norm": 1.2086430330598397, + "learning_rate": 4.6618480149780434e-08, + "loss": 0.22815749049186707, + "step": 7313 + }, + { + "epoch": 1.942238746514407, + "grad_norm": 1.3440658280324072, + "learning_rate": 4.6195964838353646e-08, + "loss": 0.23365731537342072, + "step": 7314 + }, + { + "epoch": 1.94250431549595, + "grad_norm": 1.5301363693806977, + "learning_rate": 4.577536850295161e-08, + "loss": 0.2112172693014145, + "step": 7315 + }, + { + "epoch": 1.942769884477493, + "grad_norm": 1.1945701714854287, + "learning_rate": 4.5356691224659466e-08, + "loss": 0.21821950376033783, + "step": 7316 + }, + { + "epoch": 1.943035453459036, + "grad_norm": 1.1491339078592526, + "learning_rate": 4.4939933084192646e-08, + "loss": 0.2374412566423416, + "step": 7317 + }, + { + "epoch": 1.943301022440579, + "grad_norm": 1.3549046355713708, + "learning_rate": 4.4525094161897987e-08, + "loss": 0.2483779489994049, + "step": 7318 + }, + { + "epoch": 1.9435665914221218, + "grad_norm": 1.327945477663327, + "learning_rate": 4.411217453775152e-08, + "loss": 0.23641882836818695, + "step": 7319 + }, + { + "epoch": 1.9438321604036648, + "grad_norm": 1.3586245026219714, + "learning_rate": 4.370117429135956e-08, + "loss": 0.24779492616653442, + "step": 7320 + }, + { + "epoch": 1.944097729385208, + "grad_norm": 1.1641395539357577, + "learning_rate": 4.329209350195651e-08, + "loss": 0.20288071036338806, + "step": 7321 + }, + { + "epoch": 1.944363298366751, + "grad_norm": 1.2676649817410126, + "learning_rate": 4.288493224840928e-08, + "loss": 0.24286144971847534, + "step": 7322 + }, + { + "epoch": 1.9446288673482939, + "grad_norm": 1.3164985028745375, + "learning_rate": 4.2479690609213976e-08, + "loss": 0.22825902700424194, + "step": 7323 + }, + { + "epoch": 1.9448944363298368, + "grad_norm": 1.255280762331411, + "learning_rate": 4.207636866249587e-08, + "loss": 0.22563335299491882, + "step": 7324 + }, + { + "epoch": 1.9451600053113798, + "grad_norm": 1.2990544857906836, + "learning_rate": 4.167496648601166e-08, + "loss": 0.22853273153305054, + "step": 7325 + }, + { + "epoch": 1.9454255742929227, + "grad_norm": 1.1281442356079434, + "learning_rate": 4.1275484157147216e-08, + "loss": 0.20790672302246094, + "step": 7326 + }, + { + "epoch": 1.9456911432744657, + "grad_norm": 1.1980029703513235, + "learning_rate": 4.087792175291649e-08, + "loss": 0.2165423035621643, + "step": 7327 + }, + { + "epoch": 1.9459567122560086, + "grad_norm": 1.3858946395294593, + "learning_rate": 4.048227934996485e-08, + "loss": 0.2605394721031189, + "step": 7328 + }, + { + "epoch": 1.9462222812375516, + "grad_norm": 1.280554987273632, + "learning_rate": 4.008855702456904e-08, + "loss": 0.22624900937080383, + "step": 7329 + }, + { + "epoch": 1.9464878502190945, + "grad_norm": 1.1967949808184344, + "learning_rate": 3.9696754852632804e-08, + "loss": 0.23086196184158325, + "step": 7330 + }, + { + "epoch": 1.9467534192006375, + "grad_norm": 1.4330145211347993, + "learning_rate": 3.9306872909691265e-08, + "loss": 0.24633410573005676, + "step": 7331 + }, + { + "epoch": 1.9470189881821804, + "grad_norm": 2.2568432653955894, + "learning_rate": 3.8918911270908745e-08, + "loss": 0.2535535395145416, + "step": 7332 + }, + { + "epoch": 1.9472845571637234, + "grad_norm": 1.3555855555438505, + "learning_rate": 3.853287001108097e-08, + "loss": 0.23904260993003845, + "step": 7333 + }, + { + "epoch": 1.9475501261452663, + "grad_norm": 1.3963340527453718, + "learning_rate": 3.814874920463063e-08, + "loss": 0.22525179386138916, + "step": 7334 + }, + { + "epoch": 1.9478156951268093, + "grad_norm": 1.415360473918547, + "learning_rate": 3.776654892561293e-08, + "loss": 0.21139883995056152, + "step": 7335 + }, + { + "epoch": 1.9480812641083523, + "grad_norm": 1.2272269269066283, + "learning_rate": 3.738626924771005e-08, + "loss": 0.21939310431480408, + "step": 7336 + }, + { + "epoch": 1.9483468330898952, + "grad_norm": 1.1845473795192814, + "learning_rate": 3.7007910244236664e-08, + "loss": 0.22852283716201782, + "step": 7337 + }, + { + "epoch": 1.9486124020714382, + "grad_norm": 1.2529721413425112, + "learning_rate": 3.663147198813666e-08, + "loss": 0.20769211649894714, + "step": 7338 + }, + { + "epoch": 1.948877971052981, + "grad_norm": 1.216093250313145, + "learning_rate": 3.625695455198086e-08, + "loss": 0.21721890568733215, + "step": 7339 + }, + { + "epoch": 1.949143540034524, + "grad_norm": 1.261493312403511, + "learning_rate": 3.588435800797263e-08, + "loss": 0.24236848950386047, + "step": 7340 + }, + { + "epoch": 1.949409109016067, + "grad_norm": 1.21142050375974, + "learning_rate": 3.5513682427944505e-08, + "loss": 0.2300192266702652, + "step": 7341 + }, + { + "epoch": 1.94967467799761, + "grad_norm": 1.1850825722481098, + "learning_rate": 3.5144927883358215e-08, + "loss": 0.21636728942394257, + "step": 7342 + }, + { + "epoch": 1.949940246979153, + "grad_norm": 1.3000939007920165, + "learning_rate": 3.477809444530578e-08, + "loss": 0.25367966294288635, + "step": 7343 + }, + { + "epoch": 1.9502058159606959, + "grad_norm": 1.4245768388392126, + "learning_rate": 3.4413182184507285e-08, + "loss": 0.24514247477054596, + "step": 7344 + }, + { + "epoch": 1.9504713849422388, + "grad_norm": 1.1048557155163508, + "learning_rate": 3.405019117131425e-08, + "loss": 0.18460404872894287, + "step": 7345 + }, + { + "epoch": 1.9507369539237818, + "grad_norm": 1.275062396510646, + "learning_rate": 3.3689121475706244e-08, + "loss": 0.2096845805644989, + "step": 7346 + }, + { + "epoch": 1.9510025229053247, + "grad_norm": 1.2314050158221594, + "learning_rate": 3.332997316729536e-08, + "loss": 0.22435057163238525, + "step": 7347 + }, + { + "epoch": 1.9512680918868677, + "grad_norm": 1.208912476805739, + "learning_rate": 3.2972746315318436e-08, + "loss": 0.20798128843307495, + "step": 7348 + }, + { + "epoch": 1.9515336608684106, + "grad_norm": 1.2922181556866412, + "learning_rate": 3.2617440988645945e-08, + "loss": 0.23958316445350647, + "step": 7349 + }, + { + "epoch": 1.9517992298499536, + "grad_norm": 1.3799363972113297, + "learning_rate": 3.2264057255777525e-08, + "loss": 0.21934574842453003, + "step": 7350 + }, + { + "epoch": 1.9520647988314965, + "grad_norm": 1.2014453671941887, + "learning_rate": 3.1912595184839804e-08, + "loss": 0.24321375787258148, + "step": 7351 + }, + { + "epoch": 1.9523303678130395, + "grad_norm": 1.1661737247347086, + "learning_rate": 3.156305484359079e-08, + "loss": 0.20932736992835999, + "step": 7352 + }, + { + "epoch": 1.9525959367945824, + "grad_norm": 1.2983329607047998, + "learning_rate": 3.12154362994177e-08, + "loss": 0.19824840128421783, + "step": 7353 + }, + { + "epoch": 1.9528615057761254, + "grad_norm": 1.3128795915591134, + "learning_rate": 3.0869739619338034e-08, + "loss": 0.212745800614357, + "step": 7354 + }, + { + "epoch": 1.9531270747576683, + "grad_norm": 1.247129470001585, + "learning_rate": 3.0525964869997374e-08, + "loss": 0.23044779896736145, + "step": 7355 + }, + { + "epoch": 1.9533926437392113, + "grad_norm": 1.2323689907378315, + "learning_rate": 3.018411211767158e-08, + "loss": 0.2237459123134613, + "step": 7356 + }, + { + "epoch": 1.9536582127207542, + "grad_norm": 1.3228713238231502, + "learning_rate": 2.984418142826684e-08, + "loss": 0.2592429518699646, + "step": 7357 + }, + { + "epoch": 1.9539237817022972, + "grad_norm": 1.1444806738907807, + "learning_rate": 2.9506172867315163e-08, + "loss": 0.17559123039245605, + "step": 7358 + }, + { + "epoch": 1.9541893506838401, + "grad_norm": 1.287127142439038, + "learning_rate": 2.917008649998332e-08, + "loss": 0.24143017828464508, + "step": 7359 + }, + { + "epoch": 1.954454919665383, + "grad_norm": 1.310526275865734, + "learning_rate": 2.883592239106392e-08, + "loss": 0.23560799658298492, + "step": 7360 + }, + { + "epoch": 1.954720488646926, + "grad_norm": 1.357586181070064, + "learning_rate": 2.8503680604979878e-08, + "loss": 0.2456119805574417, + "step": 7361 + }, + { + "epoch": 1.954986057628469, + "grad_norm": 1.2143945666113656, + "learning_rate": 2.817336120578329e-08, + "loss": 0.21878069639205933, + "step": 7362 + }, + { + "epoch": 1.955251626610012, + "grad_norm": 1.2288786099560105, + "learning_rate": 2.7844964257155438e-08, + "loss": 0.20496608316898346, + "step": 7363 + }, + { + "epoch": 1.955517195591555, + "grad_norm": 1.2067776880816419, + "learning_rate": 2.7518489822407902e-08, + "loss": 0.23219498991966248, + "step": 7364 + }, + { + "epoch": 1.9557827645730979, + "grad_norm": 1.3499865013336032, + "learning_rate": 2.7193937964481442e-08, + "loss": 0.2284272015094757, + "step": 7365 + }, + { + "epoch": 1.9560483335546408, + "grad_norm": 1.3177047034961433, + "learning_rate": 2.68713087459449e-08, + "loss": 0.22303974628448486, + "step": 7366 + }, + { + "epoch": 1.9563139025361838, + "grad_norm": 1.337791009624748, + "learning_rate": 2.655060222899741e-08, + "loss": 0.22489243745803833, + "step": 7367 + }, + { + "epoch": 1.9565794715177267, + "grad_norm": 1.2719472133739602, + "learning_rate": 2.6231818475468407e-08, + "loss": 0.27986854314804077, + "step": 7368 + }, + { + "epoch": 1.9568450404992697, + "grad_norm": 1.3884495118427658, + "learning_rate": 2.591495754681539e-08, + "loss": 0.29321208596229553, + "step": 7369 + }, + { + "epoch": 1.9571106094808126, + "grad_norm": 1.3942541242432065, + "learning_rate": 2.5600019504125053e-08, + "loss": 0.2560982406139374, + "step": 7370 + }, + { + "epoch": 1.9573761784623556, + "grad_norm": 1.4283472016053, + "learning_rate": 2.528700440811438e-08, + "loss": 0.264164537191391, + "step": 7371 + }, + { + "epoch": 1.9576417474438985, + "grad_norm": 1.1832183058517125, + "learning_rate": 2.4975912319127326e-08, + "loss": 0.2135474979877472, + "step": 7372 + }, + { + "epoch": 1.9579073164254415, + "grad_norm": 1.265205421311282, + "learning_rate": 2.466674329714036e-08, + "loss": 0.2100939154624939, + "step": 7373 + }, + { + "epoch": 1.9581728854069844, + "grad_norm": 1.395586955333931, + "learning_rate": 2.4359497401758026e-08, + "loss": 0.23327934741973877, + "step": 7374 + }, + { + "epoch": 1.9584384543885274, + "grad_norm": 1.0722904974981595, + "learning_rate": 2.405417469221183e-08, + "loss": 0.18830639123916626, + "step": 7375 + }, + { + "epoch": 1.9587040233700703, + "grad_norm": 1.284092871282835, + "learning_rate": 2.3750775227364686e-08, + "loss": 0.2558823227882385, + "step": 7376 + }, + { + "epoch": 1.9589695923516133, + "grad_norm": 1.2598399224501151, + "learning_rate": 2.3449299065710917e-08, + "loss": 0.24241580069065094, + "step": 7377 + }, + { + "epoch": 1.9592351613331562, + "grad_norm": 1.1684337819721369, + "learning_rate": 2.3149746265368478e-08, + "loss": 0.21678534150123596, + "step": 7378 + }, + { + "epoch": 1.9595007303146992, + "grad_norm": 1.2804084693654512, + "learning_rate": 2.2852116884088947e-08, + "loss": 0.20956794917583466, + "step": 7379 + }, + { + "epoch": 1.9597662992962421, + "grad_norm": 1.2682321373225172, + "learning_rate": 2.2556410979253095e-08, + "loss": 0.2185555249452591, + "step": 7380 + }, + { + "epoch": 1.960031868277785, + "grad_norm": 1.3369178147645102, + "learning_rate": 2.226262860786643e-08, + "loss": 0.21802933514118195, + "step": 7381 + }, + { + "epoch": 1.960297437259328, + "grad_norm": 1.4565773631347612, + "learning_rate": 2.1970769826570317e-08, + "loss": 0.22842684388160706, + "step": 7382 + }, + { + "epoch": 1.960563006240871, + "grad_norm": 1.2737807469252465, + "learning_rate": 2.1680834691628627e-08, + "loss": 0.23380814492702484, + "step": 7383 + }, + { + "epoch": 1.960828575222414, + "grad_norm": 1.311531421948895, + "learning_rate": 2.1392823258938877e-08, + "loss": 0.23476335406303406, + "step": 7384 + }, + { + "epoch": 1.961094144203957, + "grad_norm": 1.2100451325455786, + "learning_rate": 2.110673558402554e-08, + "loss": 0.19657662510871887, + "step": 7385 + }, + { + "epoch": 1.9613597131854998, + "grad_norm": 1.191542044024077, + "learning_rate": 2.0822571722044494e-08, + "loss": 0.1724000722169876, + "step": 7386 + }, + { + "epoch": 1.9616252821670428, + "grad_norm": 1.3535695538712786, + "learning_rate": 2.0540331727777475e-08, + "loss": 0.22960031032562256, + "step": 7387 + }, + { + "epoch": 1.9618908511485857, + "grad_norm": 1.4028518726902017, + "learning_rate": 2.0260015655637623e-08, + "loss": 0.2601638436317444, + "step": 7388 + }, + { + "epoch": 1.9621564201301287, + "grad_norm": 1.3907771240802078, + "learning_rate": 1.998162355966726e-08, + "loss": 0.2562445402145386, + "step": 7389 + }, + { + "epoch": 1.9624219891116716, + "grad_norm": 1.1881922077977833, + "learning_rate": 1.9705155493535688e-08, + "loss": 0.20073221623897552, + "step": 7390 + }, + { + "epoch": 1.9626875580932146, + "grad_norm": 1.2076860773847395, + "learning_rate": 1.9430611510544707e-08, + "loss": 0.18454071879386902, + "step": 7391 + }, + { + "epoch": 1.9629531270747576, + "grad_norm": 1.1878203901407238, + "learning_rate": 1.915799166362087e-08, + "loss": 0.18515023589134216, + "step": 7392 + }, + { + "epoch": 1.9632186960563005, + "grad_norm": 1.3323308983960227, + "learning_rate": 1.8887296005323242e-08, + "loss": 0.25658512115478516, + "step": 7393 + }, + { + "epoch": 1.9634842650378435, + "grad_norm": 1.4122913637661163, + "learning_rate": 1.861852458783897e-08, + "loss": 0.2219933569431305, + "step": 7394 + }, + { + "epoch": 1.9637498340193864, + "grad_norm": 1.3005286775146463, + "learning_rate": 1.8351677462983276e-08, + "loss": 0.24949616193771362, + "step": 7395 + }, + { + "epoch": 1.9640154030009294, + "grad_norm": 1.4026906711741571, + "learning_rate": 1.808675468220167e-08, + "loss": 0.24348726868629456, + "step": 7396 + }, + { + "epoch": 1.9642809719824723, + "grad_norm": 1.3848607909391346, + "learning_rate": 1.782375629656885e-08, + "loss": 0.2329033762216568, + "step": 7397 + }, + { + "epoch": 1.9645465409640153, + "grad_norm": 1.2075544796662319, + "learning_rate": 1.7562682356786488e-08, + "loss": 0.22265426814556122, + "step": 7398 + }, + { + "epoch": 1.9648121099455582, + "grad_norm": 1.2895787739524316, + "learning_rate": 1.730353291318654e-08, + "loss": 0.24438990652561188, + "step": 7399 + }, + { + "epoch": 1.9650776789271012, + "grad_norm": 1.3518107746112518, + "learning_rate": 1.704630801573015e-08, + "loss": 0.2632136642932892, + "step": 7400 + } + ], + "logging_steps": 1, + "max_steps": 7532, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5604619517755392.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-7400/training_args.bin b/checkpoint-7400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c0c92feb0b44b3362d1d98054f06b20cb57a4b7 --- /dev/null +++ b/checkpoint-7400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89857e5ce3d813c9a03825c43337cd93b1e4a595acca4834e9e4f1a47312d609 +size 6968 diff --git a/checkpoint-7400/zero_to_fp32.py b/checkpoint-7400/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-7400/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-7500/README.md b/checkpoint-7500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-7500/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-7500/adapter_config.json b/checkpoint-7500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e68fb35d77856a51c03fe5e97700fc3194faedb5 --- /dev/null +++ b/checkpoint-7500/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.4.mlp.up_proj", + "k_proj", + "layers.16.mlp.up_proj", + "layers.15.mlp.down_proj", + "layers.2.mlp.down_proj", + "layers.14.mlp.up_proj", + "layers.26.mlp.down_proj", + "layers.1.mlp.up_proj", + "layers.14.mlp.down_proj", + "layers.20.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.12.mlp.up_proj", + "layers.10.mlp.up_proj", + "layers.12.mlp.gate_proj", + "layers.22.mlp.down_proj", + "layers.9.mlp.up_proj", + "layers.9.mlp.gate_proj", + "layers.19.mlp.up_proj", + "layers.22.mlp.gate_proj", + "v_proj", + "layers.15.mlp.up_proj", + "layers.21.mlp.up_proj", + "layers.6.mlp.up_proj", + "layers.0.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.11.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.17.mlp.gate_proj", + "layers.1.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.27.mlp.up_proj", + "layers.16.mlp.gate_proj", + "q_proj", + "layers.10.mlp.down_proj", + "layers.7.mlp.gate_proj", + "layers.0.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.22.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.25.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.25.mlp.down_proj", + "layers.13.mlp.down_proj", + "layers.2.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.3.mlp.gate_proj", + "layers.3.mlp.up_proj", + "layers.23.mlp.down_proj", + "o_proj", + "layers.6.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.20.mlp.gate_proj", + "layers.26.mlp.gate_proj", + "layers.24.mlp.down_proj", + "layers.20.mlp.down_proj", + "layers.24.mlp.up_proj", + "layers.0.mlp.gate_proj", + "layers.15.mlp.gate_proj", + "layers.4.mlp.down_proj", + "layers.8.mlp.gate_proj", + "layers.12.mlp.down_proj", + "layers.8.mlp.down_proj", + "layers.25.mlp.gate_proj", + "layers.3.mlp.down_proj", + "layers.11.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.16.mlp.down_proj", + "layers.26.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.10.mlp.gate_proj", + "layers.23.mlp.up_proj", + "layers.21.mlp.gate_proj", + "layers.13.mlp.up_proj", + "layers.18.mlp.gate_proj", + "layers.17.mlp.up_proj", + "layers.5.mlp.down_proj", + "layers.24.mlp.gate_proj", + "layers.4.mlp.gate_proj", + "layers.19.mlp.down_proj", + "layers.27.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.7.mlp.up_proj", + "layers.27.mlp.down_proj", + "layers.2.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.11.mlp.gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-7500/adapter_model.safetensors b/checkpoint-7500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2de40ca865350fa2c76f3114999390ff30e776c --- /dev/null +++ b/checkpoint-7500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fa83d23bb3c0e8d619780adf91d5405e31004534dc1c331b9f6063b6b8ce219 +size 323020440 diff --git a/checkpoint-7500/chat_template.jinja b/checkpoint-7500/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-7500/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-7500/global_step7500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-7500/global_step7500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d39e7111f9be2387b52da77c5ca125237aa6e0e4 --- /dev/null +++ b/checkpoint-7500/global_step7500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e23bb4c07e8bed1f160d496d2ac236684d1554c2c4343877d446b482076fea +size 1937772272 diff --git a/checkpoint-7500/global_step7500/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-7500/global_step7500/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2211fce2e19dc18594abbf07fe6cbb2388f2874e --- /dev/null +++ b/checkpoint-7500/global_step7500/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d265bccc47db1f99c2f7bfa3f57a050eaa6f0e776539f73456bf592daa8ed174 +size 460630 diff --git a/checkpoint-7500/latest b/checkpoint-7500/latest new file mode 100644 index 0000000000000000000000000000000000000000..adaf1d1997918753a9edb52d391b59c7a43bdd62 --- /dev/null +++ b/checkpoint-7500/latest @@ -0,0 +1 @@ +global_step7500 \ No newline at end of file diff --git a/checkpoint-7500/processor_config.json b/checkpoint-7500/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-7500/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-7500/rng_state.pth b/checkpoint-7500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..89270372e0d017aa31278348d253a7584b295f40 --- /dev/null +++ b/checkpoint-7500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2289592b53e561c1d0d516e242a130470be534f52887e4046730b22de1c9487a +size 14244 diff --git a/checkpoint-7500/scheduler.pt b/checkpoint-7500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..afe3ac984e3c04b4fbd3d8d1dd476f93638f9ca6 --- /dev/null +++ b/checkpoint-7500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608776299eb47c2ca521d62c8f9958121e7c8c329266602c86227b7a61c757a9 +size 1000 diff --git a/checkpoint-7500/tokenizer.json b/checkpoint-7500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-7500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-7500/tokenizer_config.json b/checkpoint-7500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-7500/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-7500/trainer_state.json b/checkpoint-7500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3fbf694371788506a1e67b3c0f18c11c6ce377ec --- /dev/null +++ b/checkpoint-7500/trainer_state.json @@ -0,0 +1,52534 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9916345770813968, + "eval_steps": 500, + "global_step": 7500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002655689815429558, + "grad_norm": 1.8881195832990014, + "learning_rate": 0.0, + "loss": 1.1502833366394043, + "step": 1 + }, + { + "epoch": 0.0005311379630859116, + "grad_norm": 1.77718785062999, + "learning_rate": 5.3050397877984086e-08, + "loss": 1.1698756217956543, + "step": 2 + }, + { + "epoch": 0.0007967069446288673, + "grad_norm": 1.6766718507101437, + "learning_rate": 1.0610079575596817e-07, + "loss": 1.1060130596160889, + "step": 3 + }, + { + "epoch": 0.0010622759261718232, + "grad_norm": 1.876053682165919, + "learning_rate": 1.5915119363395226e-07, + "loss": 1.1075276136398315, + "step": 4 + }, + { + "epoch": 0.001327844907714779, + "grad_norm": 1.88228417845019, + "learning_rate": 2.1220159151193635e-07, + "loss": 1.2153511047363281, + "step": 5 + }, + { + "epoch": 0.0015934138892577346, + "grad_norm": 1.9273368394845023, + "learning_rate": 2.6525198938992043e-07, + "loss": 1.1400426626205444, + "step": 6 + }, + { + "epoch": 0.0018589828708006906, + "grad_norm": 1.904814034912833, + "learning_rate": 3.183023872679045e-07, + "loss": 1.2070660591125488, + "step": 7 + }, + { + "epoch": 0.0021245518523436463, + "grad_norm": 1.7346381008587795, + "learning_rate": 3.713527851458886e-07, + "loss": 1.1614588499069214, + "step": 8 + }, + { + "epoch": 0.002390120833886602, + "grad_norm": 1.817032704311048, + "learning_rate": 4.244031830238727e-07, + "loss": 1.1739476919174194, + "step": 9 + }, + { + "epoch": 0.002655689815429558, + "grad_norm": 1.8291974144657501, + "learning_rate": 4.774535809018568e-07, + "loss": 1.1559171676635742, + "step": 10 + }, + { + "epoch": 0.0029212587969725135, + "grad_norm": 2.0039010539208744, + "learning_rate": 5.305039787798409e-07, + "loss": 1.2086225748062134, + "step": 11 + }, + { + "epoch": 0.0031868277785154693, + "grad_norm": 1.876026657216244, + "learning_rate": 5.83554376657825e-07, + "loss": 1.227709174156189, + "step": 12 + }, + { + "epoch": 0.003452396760058425, + "grad_norm": 2.0245192813139825, + "learning_rate": 6.36604774535809e-07, + "loss": 1.255577564239502, + "step": 13 + }, + { + "epoch": 0.003717965741601381, + "grad_norm": 1.8641260357218605, + "learning_rate": 6.896551724137931e-07, + "loss": 1.1953760385513306, + "step": 14 + }, + { + "epoch": 0.0039835347231443365, + "grad_norm": 1.9079733249323254, + "learning_rate": 7.427055702917772e-07, + "loss": 1.1325336694717407, + "step": 15 + }, + { + "epoch": 0.004249103704687293, + "grad_norm": 1.8230190567516942, + "learning_rate": 7.957559681697613e-07, + "loss": 1.232974648475647, + "step": 16 + }, + { + "epoch": 0.004514672686230248, + "grad_norm": 1.8532380418447003, + "learning_rate": 8.488063660477454e-07, + "loss": 1.1527395248413086, + "step": 17 + }, + { + "epoch": 0.004780241667773204, + "grad_norm": 1.986294801704247, + "learning_rate": 9.018567639257295e-07, + "loss": 1.151026964187622, + "step": 18 + }, + { + "epoch": 0.00504581064931616, + "grad_norm": 1.8048967405226255, + "learning_rate": 9.549071618037136e-07, + "loss": 1.155288815498352, + "step": 19 + }, + { + "epoch": 0.005311379630859116, + "grad_norm": 2.1631450267380767, + "learning_rate": 1.0079575596816979e-06, + "loss": 1.183434009552002, + "step": 20 + }, + { + "epoch": 0.005576948612402072, + "grad_norm": 1.88758019498484, + "learning_rate": 1.0610079575596817e-06, + "loss": 1.161030650138855, + "step": 21 + }, + { + "epoch": 0.005842517593945027, + "grad_norm": 1.9605989446426395, + "learning_rate": 1.1140583554376658e-06, + "loss": 1.123382806777954, + "step": 22 + }, + { + "epoch": 0.006108086575487983, + "grad_norm": 2.2042020560619306, + "learning_rate": 1.16710875331565e-06, + "loss": 1.238707423210144, + "step": 23 + }, + { + "epoch": 0.0063736555570309385, + "grad_norm": 2.289866056000848, + "learning_rate": 1.220159151193634e-06, + "loss": 1.2058464288711548, + "step": 24 + }, + { + "epoch": 0.006639224538573895, + "grad_norm": 2.724214643619529, + "learning_rate": 1.273209549071618e-06, + "loss": 1.2351092100143433, + "step": 25 + }, + { + "epoch": 0.00690479352011685, + "grad_norm": 2.5088520951326028, + "learning_rate": 1.3262599469496024e-06, + "loss": 1.1739860773086548, + "step": 26 + }, + { + "epoch": 0.007170362501659806, + "grad_norm": 2.3243798435890155, + "learning_rate": 1.3793103448275862e-06, + "loss": 1.1407617330551147, + "step": 27 + }, + { + "epoch": 0.007435931483202762, + "grad_norm": 2.533007430657115, + "learning_rate": 1.4323607427055705e-06, + "loss": 1.1844531297683716, + "step": 28 + }, + { + "epoch": 0.007701500464745718, + "grad_norm": 2.4702075978733804, + "learning_rate": 1.4854111405835544e-06, + "loss": 1.1293678283691406, + "step": 29 + }, + { + "epoch": 0.007967069446288673, + "grad_norm": 3.0873404038783963, + "learning_rate": 1.5384615384615387e-06, + "loss": 1.1310899257659912, + "step": 30 + }, + { + "epoch": 0.00823263842783163, + "grad_norm": 2.7098364862500013, + "learning_rate": 1.5915119363395226e-06, + "loss": 1.1015795469284058, + "step": 31 + }, + { + "epoch": 0.008498207409374585, + "grad_norm": 2.8074949689582476, + "learning_rate": 1.6445623342175069e-06, + "loss": 1.0756056308746338, + "step": 32 + }, + { + "epoch": 0.00876377639091754, + "grad_norm": 3.1563034348975676, + "learning_rate": 1.6976127320954908e-06, + "loss": 1.1496126651763916, + "step": 33 + }, + { + "epoch": 0.009029345372460496, + "grad_norm": 2.842390896608423, + "learning_rate": 1.750663129973475e-06, + "loss": 1.203465461730957, + "step": 34 + }, + { + "epoch": 0.009294914354003453, + "grad_norm": 2.6747271223349753, + "learning_rate": 1.803713527851459e-06, + "loss": 1.0613923072814941, + "step": 35 + }, + { + "epoch": 0.009560483335546408, + "grad_norm": 2.146709655536541, + "learning_rate": 1.8567639257294432e-06, + "loss": 1.06027090549469, + "step": 36 + }, + { + "epoch": 0.009826052317089363, + "grad_norm": 1.9942495143394863, + "learning_rate": 1.909814323607427e-06, + "loss": 1.0508522987365723, + "step": 37 + }, + { + "epoch": 0.01009162129863232, + "grad_norm": 2.1704927298148107, + "learning_rate": 1.9628647214854114e-06, + "loss": 1.0353929996490479, + "step": 38 + }, + { + "epoch": 0.010357190280175276, + "grad_norm": 1.8252380884349957, + "learning_rate": 2.0159151193633957e-06, + "loss": 0.9974027276039124, + "step": 39 + }, + { + "epoch": 0.010622759261718231, + "grad_norm": 1.7188806752497834, + "learning_rate": 2.0689655172413796e-06, + "loss": 1.0849467515945435, + "step": 40 + }, + { + "epoch": 0.010888328243261186, + "grad_norm": 1.3692667089198218, + "learning_rate": 2.1220159151193635e-06, + "loss": 1.005434274673462, + "step": 41 + }, + { + "epoch": 0.011153897224804143, + "grad_norm": 1.3465343019370317, + "learning_rate": 2.1750663129973478e-06, + "loss": 1.052631139755249, + "step": 42 + }, + { + "epoch": 0.011419466206347099, + "grad_norm": 1.352421126005469, + "learning_rate": 2.2281167108753316e-06, + "loss": 0.9470957517623901, + "step": 43 + }, + { + "epoch": 0.011685035187890054, + "grad_norm": 1.2219308328594767, + "learning_rate": 2.281167108753316e-06, + "loss": 0.9865130186080933, + "step": 44 + }, + { + "epoch": 0.01195060416943301, + "grad_norm": 1.19161259271228, + "learning_rate": 2.3342175066313e-06, + "loss": 0.9405577778816223, + "step": 45 + }, + { + "epoch": 0.012216173150975966, + "grad_norm": 1.1603073869733838, + "learning_rate": 2.387267904509284e-06, + "loss": 0.9418795108795166, + "step": 46 + }, + { + "epoch": 0.012481742132518922, + "grad_norm": 1.1897328813812988, + "learning_rate": 2.440318302387268e-06, + "loss": 0.9841142892837524, + "step": 47 + }, + { + "epoch": 0.012747311114061877, + "grad_norm": 1.159720101499262, + "learning_rate": 2.4933687002652523e-06, + "loss": 0.9412609338760376, + "step": 48 + }, + { + "epoch": 0.013012880095604834, + "grad_norm": 1.1421347262548374, + "learning_rate": 2.546419098143236e-06, + "loss": 0.9239889979362488, + "step": 49 + }, + { + "epoch": 0.01327844907714779, + "grad_norm": 1.144363453746544, + "learning_rate": 2.59946949602122e-06, + "loss": 0.9212941527366638, + "step": 50 + }, + { + "epoch": 0.013544018058690745, + "grad_norm": 0.9916816911141796, + "learning_rate": 2.6525198938992047e-06, + "loss": 0.8863773345947266, + "step": 51 + }, + { + "epoch": 0.0138095870402337, + "grad_norm": 0.9890613082667745, + "learning_rate": 2.7055702917771886e-06, + "loss": 0.8990404009819031, + "step": 52 + }, + { + "epoch": 0.014075156021776657, + "grad_norm": 1.1123466462737277, + "learning_rate": 2.7586206896551725e-06, + "loss": 0.9257171154022217, + "step": 53 + }, + { + "epoch": 0.014340725003319612, + "grad_norm": 0.8689931750055545, + "learning_rate": 2.8116710875331564e-06, + "loss": 0.8239601254463196, + "step": 54 + }, + { + "epoch": 0.014606293984862568, + "grad_norm": 0.9936229603029793, + "learning_rate": 2.864721485411141e-06, + "loss": 0.8656830787658691, + "step": 55 + }, + { + "epoch": 0.014871862966405525, + "grad_norm": 1.0202371081091262, + "learning_rate": 2.917771883289125e-06, + "loss": 0.9470342397689819, + "step": 56 + }, + { + "epoch": 0.01513743194794848, + "grad_norm": 0.9663900963956384, + "learning_rate": 2.970822281167109e-06, + "loss": 0.8699859976768494, + "step": 57 + }, + { + "epoch": 0.015403000929491435, + "grad_norm": 0.940263545207204, + "learning_rate": 3.0238726790450927e-06, + "loss": 0.8668704628944397, + "step": 58 + }, + { + "epoch": 0.01566856991103439, + "grad_norm": 0.9865381848251076, + "learning_rate": 3.0769230769230774e-06, + "loss": 0.841624915599823, + "step": 59 + }, + { + "epoch": 0.015934138892577346, + "grad_norm": 0.8909972421095332, + "learning_rate": 3.1299734748010613e-06, + "loss": 0.8412661552429199, + "step": 60 + }, + { + "epoch": 0.0161997078741203, + "grad_norm": 0.8771283277278942, + "learning_rate": 3.183023872679045e-06, + "loss": 0.818957507610321, + "step": 61 + }, + { + "epoch": 0.01646527685566326, + "grad_norm": 0.9190140482494583, + "learning_rate": 3.23607427055703e-06, + "loss": 0.8030763268470764, + "step": 62 + }, + { + "epoch": 0.016730845837206215, + "grad_norm": 0.8839367067386452, + "learning_rate": 3.2891246684350138e-06, + "loss": 0.7869359850883484, + "step": 63 + }, + { + "epoch": 0.01699641481874917, + "grad_norm": 0.8058255896640879, + "learning_rate": 3.3421750663129977e-06, + "loss": 0.7912170886993408, + "step": 64 + }, + { + "epoch": 0.017261983800292126, + "grad_norm": 0.8538938403853334, + "learning_rate": 3.3952254641909815e-06, + "loss": 0.7736695408821106, + "step": 65 + }, + { + "epoch": 0.01752755278183508, + "grad_norm": 0.8652625375848492, + "learning_rate": 3.448275862068966e-06, + "loss": 0.768275260925293, + "step": 66 + }, + { + "epoch": 0.017793121763378036, + "grad_norm": 0.8691478661970735, + "learning_rate": 3.50132625994695e-06, + "loss": 0.7210639119148254, + "step": 67 + }, + { + "epoch": 0.01805869074492099, + "grad_norm": 0.8378031795839386, + "learning_rate": 3.554376657824934e-06, + "loss": 0.7488028407096863, + "step": 68 + }, + { + "epoch": 0.01832425972646395, + "grad_norm": 0.8943989597273122, + "learning_rate": 3.607427055702918e-06, + "loss": 0.7329621911048889, + "step": 69 + }, + { + "epoch": 0.018589828708006906, + "grad_norm": 0.92104620358882, + "learning_rate": 3.660477453580902e-06, + "loss": 0.7270619869232178, + "step": 70 + }, + { + "epoch": 0.01885539768954986, + "grad_norm": 0.9782498013554233, + "learning_rate": 3.7135278514588865e-06, + "loss": 0.7271254658699036, + "step": 71 + }, + { + "epoch": 0.019120966671092816, + "grad_norm": 0.9115603845811348, + "learning_rate": 3.7665782493368703e-06, + "loss": 0.787033200263977, + "step": 72 + }, + { + "epoch": 0.01938653565263577, + "grad_norm": 0.8604692726067453, + "learning_rate": 3.819628647214854e-06, + "loss": 0.7049479484558105, + "step": 73 + }, + { + "epoch": 0.019652104634178727, + "grad_norm": 0.8610577281688413, + "learning_rate": 3.8726790450928385e-06, + "loss": 0.7146892547607422, + "step": 74 + }, + { + "epoch": 0.019917673615721682, + "grad_norm": 0.7602187567662452, + "learning_rate": 3.925729442970823e-06, + "loss": 0.7212516069412231, + "step": 75 + }, + { + "epoch": 0.02018324259726464, + "grad_norm": 0.6842508042039768, + "learning_rate": 3.978779840848806e-06, + "loss": 0.6612375378608704, + "step": 76 + }, + { + "epoch": 0.020448811578807596, + "grad_norm": 0.7781006919053841, + "learning_rate": 4.031830238726791e-06, + "loss": 0.7038244605064392, + "step": 77 + }, + { + "epoch": 0.02071438056035055, + "grad_norm": 0.7186592057129139, + "learning_rate": 4.084880636604775e-06, + "loss": 0.7081903219223022, + "step": 78 + }, + { + "epoch": 0.020979949541893507, + "grad_norm": 0.7655954113403886, + "learning_rate": 4.137931034482759e-06, + "loss": 0.7079841494560242, + "step": 79 + }, + { + "epoch": 0.021245518523436462, + "grad_norm": 0.7149787673446053, + "learning_rate": 4.190981432360743e-06, + "loss": 0.7090641260147095, + "step": 80 + }, + { + "epoch": 0.021511087504979418, + "grad_norm": 0.6657837070384769, + "learning_rate": 4.244031830238727e-06, + "loss": 0.6632575988769531, + "step": 81 + }, + { + "epoch": 0.021776656486522373, + "grad_norm": 0.6666401713606211, + "learning_rate": 4.297082228116711e-06, + "loss": 0.7231097221374512, + "step": 82 + }, + { + "epoch": 0.02204222546806533, + "grad_norm": 0.6804476609839887, + "learning_rate": 4.3501326259946955e-06, + "loss": 0.6696034669876099, + "step": 83 + }, + { + "epoch": 0.022307794449608287, + "grad_norm": 0.7073638927991296, + "learning_rate": 4.403183023872679e-06, + "loss": 0.7550696134567261, + "step": 84 + }, + { + "epoch": 0.022573363431151242, + "grad_norm": 0.7064770122504733, + "learning_rate": 4.456233421750663e-06, + "loss": 0.671328067779541, + "step": 85 + }, + { + "epoch": 0.022838932412694198, + "grad_norm": 0.6506139330803743, + "learning_rate": 4.5092838196286476e-06, + "loss": 0.6864410638809204, + "step": 86 + }, + { + "epoch": 0.023104501394237153, + "grad_norm": 0.6642837777732639, + "learning_rate": 4.562334217506632e-06, + "loss": 0.6870769262313843, + "step": 87 + }, + { + "epoch": 0.023370070375780108, + "grad_norm": 0.6947506894199804, + "learning_rate": 4.615384615384616e-06, + "loss": 0.6539690494537354, + "step": 88 + }, + { + "epoch": 0.023635639357323063, + "grad_norm": 0.6446743321890098, + "learning_rate": 4.6684350132626e-06, + "loss": 0.6946991086006165, + "step": 89 + }, + { + "epoch": 0.02390120833886602, + "grad_norm": 0.6384512383480915, + "learning_rate": 4.721485411140584e-06, + "loss": 0.6177583932876587, + "step": 90 + }, + { + "epoch": 0.024166777320408978, + "grad_norm": 0.7150510018442997, + "learning_rate": 4.774535809018568e-06, + "loss": 0.6890037059783936, + "step": 91 + }, + { + "epoch": 0.024432346301951933, + "grad_norm": 0.6592991709316253, + "learning_rate": 4.8275862068965525e-06, + "loss": 0.6563063263893127, + "step": 92 + }, + { + "epoch": 0.024697915283494888, + "grad_norm": 0.6897740926797078, + "learning_rate": 4.880636604774536e-06, + "loss": 0.714318573474884, + "step": 93 + }, + { + "epoch": 0.024963484265037843, + "grad_norm": 0.6433596226177777, + "learning_rate": 4.93368700265252e-06, + "loss": 0.6720882654190063, + "step": 94 + }, + { + "epoch": 0.0252290532465808, + "grad_norm": 0.5910528348002435, + "learning_rate": 4.9867374005305045e-06, + "loss": 0.602899968624115, + "step": 95 + }, + { + "epoch": 0.025494622228123754, + "grad_norm": 0.6635651676723159, + "learning_rate": 5.039787798408489e-06, + "loss": 0.6628841161727905, + "step": 96 + }, + { + "epoch": 0.02576019120966671, + "grad_norm": 0.6070065577903714, + "learning_rate": 5.092838196286472e-06, + "loss": 0.6486932635307312, + "step": 97 + }, + { + "epoch": 0.026025760191209668, + "grad_norm": 0.6484848126679549, + "learning_rate": 5.145888594164457e-06, + "loss": 0.6719033122062683, + "step": 98 + }, + { + "epoch": 0.026291329172752623, + "grad_norm": 0.6856934201881044, + "learning_rate": 5.19893899204244e-06, + "loss": 0.6818530559539795, + "step": 99 + }, + { + "epoch": 0.02655689815429558, + "grad_norm": 0.6204811558305167, + "learning_rate": 5.251989389920424e-06, + "loss": 0.6306912899017334, + "step": 100 + }, + { + "epoch": 0.026822467135838534, + "grad_norm": 0.7820574736690976, + "learning_rate": 5.3050397877984095e-06, + "loss": 0.5952945351600647, + "step": 101 + }, + { + "epoch": 0.02708803611738149, + "grad_norm": 0.6546243503849497, + "learning_rate": 5.358090185676394e-06, + "loss": 0.6566107273101807, + "step": 102 + }, + { + "epoch": 0.027353605098924445, + "grad_norm": 0.707921645301647, + "learning_rate": 5.411140583554377e-06, + "loss": 0.6981694102287292, + "step": 103 + }, + { + "epoch": 0.0276191740804674, + "grad_norm": 0.6375441067969543, + "learning_rate": 5.4641909814323615e-06, + "loss": 0.6231328248977661, + "step": 104 + }, + { + "epoch": 0.02788474306201036, + "grad_norm": 0.6964560869475424, + "learning_rate": 5.517241379310345e-06, + "loss": 0.6414977312088013, + "step": 105 + }, + { + "epoch": 0.028150312043553314, + "grad_norm": 0.6835502446580011, + "learning_rate": 5.570291777188329e-06, + "loss": 0.6335234642028809, + "step": 106 + }, + { + "epoch": 0.02841588102509627, + "grad_norm": 0.6248033284508979, + "learning_rate": 5.623342175066313e-06, + "loss": 0.6040852665901184, + "step": 107 + }, + { + "epoch": 0.028681450006639225, + "grad_norm": 0.6645474785171195, + "learning_rate": 5.676392572944297e-06, + "loss": 0.6011114716529846, + "step": 108 + }, + { + "epoch": 0.02894701898818218, + "grad_norm": 0.655106623405533, + "learning_rate": 5.729442970822282e-06, + "loss": 0.6042627096176147, + "step": 109 + }, + { + "epoch": 0.029212587969725135, + "grad_norm": 0.720208539355598, + "learning_rate": 5.782493368700266e-06, + "loss": 0.6183412671089172, + "step": 110 + }, + { + "epoch": 0.02947815695126809, + "grad_norm": 0.6666287454908232, + "learning_rate": 5.83554376657825e-06, + "loss": 0.6150818467140198, + "step": 111 + }, + { + "epoch": 0.02974372593281105, + "grad_norm": 0.6840692324124527, + "learning_rate": 5.888594164456234e-06, + "loss": 0.6202039122581482, + "step": 112 + }, + { + "epoch": 0.030009294914354005, + "grad_norm": 0.6626407253242022, + "learning_rate": 5.941644562334218e-06, + "loss": 0.6334809064865112, + "step": 113 + }, + { + "epoch": 0.03027486389589696, + "grad_norm": 0.6319419097399773, + "learning_rate": 5.994694960212202e-06, + "loss": 0.5728089809417725, + "step": 114 + }, + { + "epoch": 0.030540432877439915, + "grad_norm": 0.6988175213443283, + "learning_rate": 6.0477453580901854e-06, + "loss": 0.6884603500366211, + "step": 115 + }, + { + "epoch": 0.03080600185898287, + "grad_norm": 0.6618120552387852, + "learning_rate": 6.1007957559681706e-06, + "loss": 0.5619829893112183, + "step": 116 + }, + { + "epoch": 0.031071570840525826, + "grad_norm": 0.6756012639437595, + "learning_rate": 6.153846153846155e-06, + "loss": 0.6224710941314697, + "step": 117 + }, + { + "epoch": 0.03133713982206878, + "grad_norm": 0.7208355833756769, + "learning_rate": 6.206896551724138e-06, + "loss": 0.6119496822357178, + "step": 118 + }, + { + "epoch": 0.03160270880361174, + "grad_norm": 0.6917782946677038, + "learning_rate": 6.259946949602123e-06, + "loss": 0.6190857887268066, + "step": 119 + }, + { + "epoch": 0.03186827778515469, + "grad_norm": 0.6704531181022263, + "learning_rate": 6.312997347480107e-06, + "loss": 0.6460769176483154, + "step": 120 + }, + { + "epoch": 0.03213384676669765, + "grad_norm": 0.7493511248909543, + "learning_rate": 6.36604774535809e-06, + "loss": 0.6148796677589417, + "step": 121 + }, + { + "epoch": 0.0323994157482406, + "grad_norm": 0.6359613412994526, + "learning_rate": 6.419098143236075e-06, + "loss": 0.558960497379303, + "step": 122 + }, + { + "epoch": 0.03266498472978356, + "grad_norm": 0.6785691051694177, + "learning_rate": 6.47214854111406e-06, + "loss": 0.5844984650611877, + "step": 123 + }, + { + "epoch": 0.03293055371132652, + "grad_norm": 0.6692815537253501, + "learning_rate": 6.525198938992043e-06, + "loss": 0.5343623161315918, + "step": 124 + }, + { + "epoch": 0.03319612269286947, + "grad_norm": 0.6705726789318588, + "learning_rate": 6.5782493368700276e-06, + "loss": 0.5834348797798157, + "step": 125 + }, + { + "epoch": 0.03346169167441243, + "grad_norm": 0.7626576562771024, + "learning_rate": 6.631299734748011e-06, + "loss": 0.5997360944747925, + "step": 126 + }, + { + "epoch": 0.03372726065595538, + "grad_norm": 0.7117893752859364, + "learning_rate": 6.684350132625995e-06, + "loss": 0.5991666316986084, + "step": 127 + }, + { + "epoch": 0.03399282963749834, + "grad_norm": 0.7060406683837459, + "learning_rate": 6.737400530503979e-06, + "loss": 0.581120491027832, + "step": 128 + }, + { + "epoch": 0.03425839861904129, + "grad_norm": 0.6869761252397286, + "learning_rate": 6.790450928381963e-06, + "loss": 0.6219569444656372, + "step": 129 + }, + { + "epoch": 0.03452396760058425, + "grad_norm": 0.6916173566260286, + "learning_rate": 6.843501326259947e-06, + "loss": 0.5950608253479004, + "step": 130 + }, + { + "epoch": 0.03478953658212721, + "grad_norm": 0.6136480902733893, + "learning_rate": 6.896551724137932e-06, + "loss": 0.5762747526168823, + "step": 131 + }, + { + "epoch": 0.03505510556367016, + "grad_norm": 0.670368708945713, + "learning_rate": 6.949602122015916e-06, + "loss": 0.6003131866455078, + "step": 132 + }, + { + "epoch": 0.03532067454521312, + "grad_norm": 0.6439028776339482, + "learning_rate": 7.0026525198939e-06, + "loss": 0.5866605043411255, + "step": 133 + }, + { + "epoch": 0.03558624352675607, + "grad_norm": 0.8324202287699098, + "learning_rate": 7.055702917771884e-06, + "loss": 0.6668443083763123, + "step": 134 + }, + { + "epoch": 0.03585181250829903, + "grad_norm": 0.7064456856515898, + "learning_rate": 7.108753315649868e-06, + "loss": 0.5738306045532227, + "step": 135 + }, + { + "epoch": 0.03611738148984198, + "grad_norm": 0.6941604370641007, + "learning_rate": 7.1618037135278515e-06, + "loss": 0.5774663686752319, + "step": 136 + }, + { + "epoch": 0.03638295047138494, + "grad_norm": 0.7648336305672251, + "learning_rate": 7.214854111405836e-06, + "loss": 0.5721150636672974, + "step": 137 + }, + { + "epoch": 0.0366485194529279, + "grad_norm": 0.7394576462203543, + "learning_rate": 7.267904509283821e-06, + "loss": 0.6350122690200806, + "step": 138 + }, + { + "epoch": 0.03691408843447085, + "grad_norm": 0.6540602529440619, + "learning_rate": 7.320954907161804e-06, + "loss": 0.5435039401054382, + "step": 139 + }, + { + "epoch": 0.03717965741601381, + "grad_norm": 0.6965351191908165, + "learning_rate": 7.374005305039789e-06, + "loss": 0.5869162678718567, + "step": 140 + }, + { + "epoch": 0.03744522639755676, + "grad_norm": 0.6664228073022063, + "learning_rate": 7.427055702917773e-06, + "loss": 0.5645807981491089, + "step": 141 + }, + { + "epoch": 0.03771079537909972, + "grad_norm": 0.6503771775205762, + "learning_rate": 7.480106100795756e-06, + "loss": 0.5502692461013794, + "step": 142 + }, + { + "epoch": 0.037976364360642674, + "grad_norm": 0.6223645459397411, + "learning_rate": 7.533156498673741e-06, + "loss": 0.5602732300758362, + "step": 143 + }, + { + "epoch": 0.03824193334218563, + "grad_norm": 0.8638951879324807, + "learning_rate": 7.586206896551724e-06, + "loss": 0.6011391282081604, + "step": 144 + }, + { + "epoch": 0.03850750232372859, + "grad_norm": 0.6930636234613441, + "learning_rate": 7.639257294429708e-06, + "loss": 0.5482327938079834, + "step": 145 + }, + { + "epoch": 0.03877307130527154, + "grad_norm": 0.6693652199128735, + "learning_rate": 7.692307692307694e-06, + "loss": 0.5926344394683838, + "step": 146 + }, + { + "epoch": 0.0390386402868145, + "grad_norm": 0.8434991800954339, + "learning_rate": 7.745358090185677e-06, + "loss": 0.6558316946029663, + "step": 147 + }, + { + "epoch": 0.039304209268357454, + "grad_norm": 0.6845819362079449, + "learning_rate": 7.79840848806366e-06, + "loss": 0.572425365447998, + "step": 148 + }, + { + "epoch": 0.03956977824990041, + "grad_norm": 0.696296152543372, + "learning_rate": 7.851458885941646e-06, + "loss": 0.5684784650802612, + "step": 149 + }, + { + "epoch": 0.039835347231443365, + "grad_norm": 0.6779490529346879, + "learning_rate": 7.904509283819629e-06, + "loss": 0.5843643546104431, + "step": 150 + }, + { + "epoch": 0.04010091621298632, + "grad_norm": 0.6894842979231472, + "learning_rate": 7.957559681697613e-06, + "loss": 0.5471494793891907, + "step": 151 + }, + { + "epoch": 0.04036648519452928, + "grad_norm": 0.7583250211136208, + "learning_rate": 8.010610079575598e-06, + "loss": 0.595018744468689, + "step": 152 + }, + { + "epoch": 0.040632054176072234, + "grad_norm": 0.6904128122756304, + "learning_rate": 8.063660477453583e-06, + "loss": 0.5431865453720093, + "step": 153 + }, + { + "epoch": 0.04089762315761519, + "grad_norm": 0.7943246581886504, + "learning_rate": 8.116710875331566e-06, + "loss": 0.5622385740280151, + "step": 154 + }, + { + "epoch": 0.041163192139158145, + "grad_norm": 0.7792002007338675, + "learning_rate": 8.16976127320955e-06, + "loss": 0.5795880556106567, + "step": 155 + }, + { + "epoch": 0.0414287611207011, + "grad_norm": 0.7432143976693507, + "learning_rate": 8.222811671087533e-06, + "loss": 0.5854965448379517, + "step": 156 + }, + { + "epoch": 0.041694330102244055, + "grad_norm": 0.8104825185442435, + "learning_rate": 8.275862068965518e-06, + "loss": 0.5374501943588257, + "step": 157 + }, + { + "epoch": 0.041959899083787014, + "grad_norm": 0.7598674115735401, + "learning_rate": 8.328912466843502e-06, + "loss": 0.5779006481170654, + "step": 158 + }, + { + "epoch": 0.04222546806532997, + "grad_norm": 0.7033741631796787, + "learning_rate": 8.381962864721485e-06, + "loss": 0.550236701965332, + "step": 159 + }, + { + "epoch": 0.042491037046872925, + "grad_norm": 0.7285453499901458, + "learning_rate": 8.43501326259947e-06, + "loss": 0.557443380355835, + "step": 160 + }, + { + "epoch": 0.04275660602841588, + "grad_norm": 0.7050753960524794, + "learning_rate": 8.488063660477454e-06, + "loss": 0.5875238180160522, + "step": 161 + }, + { + "epoch": 0.043022175009958835, + "grad_norm": 0.7215582793376403, + "learning_rate": 8.541114058355439e-06, + "loss": 0.510900616645813, + "step": 162 + }, + { + "epoch": 0.043287743991501794, + "grad_norm": 0.7559114001900116, + "learning_rate": 8.594164456233422e-06, + "loss": 0.5465859174728394, + "step": 163 + }, + { + "epoch": 0.043553312973044746, + "grad_norm": 0.7494489908601825, + "learning_rate": 8.647214854111406e-06, + "loss": 0.5508615970611572, + "step": 164 + }, + { + "epoch": 0.043818881954587705, + "grad_norm": 0.7714387963397975, + "learning_rate": 8.700265251989391e-06, + "loss": 0.5437714457511902, + "step": 165 + }, + { + "epoch": 0.04408445093613066, + "grad_norm": 0.7480600693956645, + "learning_rate": 8.753315649867374e-06, + "loss": 0.542698323726654, + "step": 166 + }, + { + "epoch": 0.044350019917673615, + "grad_norm": 0.7339141407878966, + "learning_rate": 8.806366047745358e-06, + "loss": 0.5169371962547302, + "step": 167 + }, + { + "epoch": 0.044615588899216574, + "grad_norm": 0.725595419270195, + "learning_rate": 8.859416445623343e-06, + "loss": 0.5436176061630249, + "step": 168 + }, + { + "epoch": 0.044881157880759526, + "grad_norm": 0.8205411933516983, + "learning_rate": 8.912466843501327e-06, + "loss": 0.568030834197998, + "step": 169 + }, + { + "epoch": 0.045146726862302484, + "grad_norm": 0.7544356200090666, + "learning_rate": 8.965517241379312e-06, + "loss": 0.5218889713287354, + "step": 170 + }, + { + "epoch": 0.045412295843845436, + "grad_norm": 0.7860957525035722, + "learning_rate": 9.018567639257295e-06, + "loss": 0.5275779962539673, + "step": 171 + }, + { + "epoch": 0.045677864825388395, + "grad_norm": 0.6938225497373272, + "learning_rate": 9.071618037135279e-06, + "loss": 0.5263184905052185, + "step": 172 + }, + { + "epoch": 0.045943433806931354, + "grad_norm": 0.7549069812662602, + "learning_rate": 9.124668435013264e-06, + "loss": 0.563044548034668, + "step": 173 + }, + { + "epoch": 0.046209002788474306, + "grad_norm": 0.9364041083837341, + "learning_rate": 9.177718832891247e-06, + "loss": 0.5896912217140198, + "step": 174 + }, + { + "epoch": 0.046474571770017264, + "grad_norm": 0.7219752548557496, + "learning_rate": 9.230769230769232e-06, + "loss": 0.5163949131965637, + "step": 175 + }, + { + "epoch": 0.046740140751560216, + "grad_norm": 0.8391633255974319, + "learning_rate": 9.283819628647216e-06, + "loss": 0.6203320026397705, + "step": 176 + }, + { + "epoch": 0.047005709733103175, + "grad_norm": 0.9119997852547688, + "learning_rate": 9.3368700265252e-06, + "loss": 0.5528024435043335, + "step": 177 + }, + { + "epoch": 0.04727127871464613, + "grad_norm": 0.8828541610102935, + "learning_rate": 9.389920424403184e-06, + "loss": 0.5657555460929871, + "step": 178 + }, + { + "epoch": 0.047536847696189086, + "grad_norm": 0.7671789386737649, + "learning_rate": 9.442970822281168e-06, + "loss": 0.5301925539970398, + "step": 179 + }, + { + "epoch": 0.04780241667773204, + "grad_norm": 0.8675940797859782, + "learning_rate": 9.496021220159151e-06, + "loss": 0.5388369560241699, + "step": 180 + }, + { + "epoch": 0.048067985659274996, + "grad_norm": 0.7966332028310692, + "learning_rate": 9.549071618037136e-06, + "loss": 0.5549717545509338, + "step": 181 + }, + { + "epoch": 0.048333554640817955, + "grad_norm": 0.8814678011939608, + "learning_rate": 9.60212201591512e-06, + "loss": 0.5959764719009399, + "step": 182 + }, + { + "epoch": 0.04859912362236091, + "grad_norm": 0.7841222204736121, + "learning_rate": 9.655172413793105e-06, + "loss": 0.5461844205856323, + "step": 183 + }, + { + "epoch": 0.048864692603903866, + "grad_norm": 0.7620084886447284, + "learning_rate": 9.708222811671088e-06, + "loss": 0.5428494811058044, + "step": 184 + }, + { + "epoch": 0.04913026158544682, + "grad_norm": 0.7918991595575344, + "learning_rate": 9.761273209549072e-06, + "loss": 0.552198052406311, + "step": 185 + }, + { + "epoch": 0.049395830566989776, + "grad_norm": 0.6896394660507362, + "learning_rate": 9.814323607427057e-06, + "loss": 0.49992549419403076, + "step": 186 + }, + { + "epoch": 0.04966139954853273, + "grad_norm": 0.7875507527713166, + "learning_rate": 9.86737400530504e-06, + "loss": 0.557820200920105, + "step": 187 + }, + { + "epoch": 0.04992696853007569, + "grad_norm": 0.8883719893129148, + "learning_rate": 9.920424403183024e-06, + "loss": 0.5238749384880066, + "step": 188 + }, + { + "epoch": 0.050192537511618646, + "grad_norm": 0.988465476825029, + "learning_rate": 9.973474801061009e-06, + "loss": 0.5346978902816772, + "step": 189 + }, + { + "epoch": 0.0504581064931616, + "grad_norm": 0.8024883433630577, + "learning_rate": 1.0026525198938993e-05, + "loss": 0.5256577730178833, + "step": 190 + }, + { + "epoch": 0.050723675474704556, + "grad_norm": 0.8026852335394901, + "learning_rate": 1.0079575596816978e-05, + "loss": 0.5235393047332764, + "step": 191 + }, + { + "epoch": 0.05098924445624751, + "grad_norm": 0.6835673591276205, + "learning_rate": 1.013262599469496e-05, + "loss": 0.4984837472438812, + "step": 192 + }, + { + "epoch": 0.05125481343779047, + "grad_norm": 0.7829913352817355, + "learning_rate": 1.0185676392572945e-05, + "loss": 0.5209602117538452, + "step": 193 + }, + { + "epoch": 0.05152038241933342, + "grad_norm": 0.8334733472253096, + "learning_rate": 1.023872679045093e-05, + "loss": 0.5468267202377319, + "step": 194 + }, + { + "epoch": 0.05178595140087638, + "grad_norm": 0.8107908645155819, + "learning_rate": 1.0291777188328913e-05, + "loss": 0.5531667470932007, + "step": 195 + }, + { + "epoch": 0.052051520382419336, + "grad_norm": 0.8437904919697584, + "learning_rate": 1.0344827586206898e-05, + "loss": 0.5741526484489441, + "step": 196 + }, + { + "epoch": 0.05231708936396229, + "grad_norm": 0.6830882515315945, + "learning_rate": 1.039787798408488e-05, + "loss": 0.46132561564445496, + "step": 197 + }, + { + "epoch": 0.05258265834550525, + "grad_norm": 0.8402230890409916, + "learning_rate": 1.0450928381962865e-05, + "loss": 0.5074198842048645, + "step": 198 + }, + { + "epoch": 0.0528482273270482, + "grad_norm": 0.7476727742688456, + "learning_rate": 1.0503978779840849e-05, + "loss": 0.5193089842796326, + "step": 199 + }, + { + "epoch": 0.05311379630859116, + "grad_norm": 0.7814745235248249, + "learning_rate": 1.0557029177718834e-05, + "loss": 0.5209243297576904, + "step": 200 + }, + { + "epoch": 0.05337936529013411, + "grad_norm": 0.8844918483638834, + "learning_rate": 1.0610079575596819e-05, + "loss": 0.5607191920280457, + "step": 201 + }, + { + "epoch": 0.05364493427167707, + "grad_norm": 0.7926104097207243, + "learning_rate": 1.0663129973474802e-05, + "loss": 0.5482805371284485, + "step": 202 + }, + { + "epoch": 0.05391050325322003, + "grad_norm": 0.8109463956858287, + "learning_rate": 1.0716180371352788e-05, + "loss": 0.5579961538314819, + "step": 203 + }, + { + "epoch": 0.05417607223476298, + "grad_norm": 0.8246893162942163, + "learning_rate": 1.076923076923077e-05, + "loss": 0.5119072794914246, + "step": 204 + }, + { + "epoch": 0.05444164121630594, + "grad_norm": 0.8293246958439139, + "learning_rate": 1.0822281167108754e-05, + "loss": 0.5129292607307434, + "step": 205 + }, + { + "epoch": 0.05470721019784889, + "grad_norm": 0.6895550242199711, + "learning_rate": 1.0875331564986738e-05, + "loss": 0.500032901763916, + "step": 206 + }, + { + "epoch": 0.05497277917939185, + "grad_norm": 0.8385731092525408, + "learning_rate": 1.0928381962864723e-05, + "loss": 0.5264571309089661, + "step": 207 + }, + { + "epoch": 0.0552383481609348, + "grad_norm": 0.7915802802090326, + "learning_rate": 1.0981432360742708e-05, + "loss": 0.5569590330123901, + "step": 208 + }, + { + "epoch": 0.05550391714247776, + "grad_norm": 0.8546725938844908, + "learning_rate": 1.103448275862069e-05, + "loss": 0.5429908037185669, + "step": 209 + }, + { + "epoch": 0.05576948612402072, + "grad_norm": 0.8175642333393268, + "learning_rate": 1.1087533156498675e-05, + "loss": 0.5073692202568054, + "step": 210 + }, + { + "epoch": 0.05603505510556367, + "grad_norm": 0.9551222157670755, + "learning_rate": 1.1140583554376659e-05, + "loss": 0.5613659620285034, + "step": 211 + }, + { + "epoch": 0.05630062408710663, + "grad_norm": 1.8348970874488084, + "learning_rate": 1.1193633952254644e-05, + "loss": 0.5197691917419434, + "step": 212 + }, + { + "epoch": 0.05656619306864958, + "grad_norm": 0.9173115658326468, + "learning_rate": 1.1246684350132625e-05, + "loss": 0.5410990715026855, + "step": 213 + }, + { + "epoch": 0.05683176205019254, + "grad_norm": 0.8562107533946397, + "learning_rate": 1.129973474801061e-05, + "loss": 0.5852477550506592, + "step": 214 + }, + { + "epoch": 0.05709733103173549, + "grad_norm": 0.8483195878163089, + "learning_rate": 1.1352785145888594e-05, + "loss": 0.5312488079071045, + "step": 215 + }, + { + "epoch": 0.05736290001327845, + "grad_norm": 0.8817111257753456, + "learning_rate": 1.140583554376658e-05, + "loss": 0.5075235366821289, + "step": 216 + }, + { + "epoch": 0.05762846899482141, + "grad_norm": 0.8014885700994473, + "learning_rate": 1.1458885941644564e-05, + "loss": 0.5213298797607422, + "step": 217 + }, + { + "epoch": 0.05789403797636436, + "grad_norm": 0.8852582070340804, + "learning_rate": 1.1511936339522548e-05, + "loss": 0.5564183592796326, + "step": 218 + }, + { + "epoch": 0.05815960695790732, + "grad_norm": 1.0148412469588788, + "learning_rate": 1.1564986737400531e-05, + "loss": 0.5328387022018433, + "step": 219 + }, + { + "epoch": 0.05842517593945027, + "grad_norm": 0.7824132338865165, + "learning_rate": 1.1618037135278515e-05, + "loss": 0.5010273456573486, + "step": 220 + }, + { + "epoch": 0.05869074492099323, + "grad_norm": 0.8493817546068081, + "learning_rate": 1.16710875331565e-05, + "loss": 0.5473708510398865, + "step": 221 + }, + { + "epoch": 0.05895631390253618, + "grad_norm": 1.1554913959885298, + "learning_rate": 1.1724137931034483e-05, + "loss": 0.5359818339347839, + "step": 222 + }, + { + "epoch": 0.05922188288407914, + "grad_norm": 0.9663065987200732, + "learning_rate": 1.1777188328912468e-05, + "loss": 0.5274665951728821, + "step": 223 + }, + { + "epoch": 0.0594874518656221, + "grad_norm": 0.8158672021913522, + "learning_rate": 1.1830238726790454e-05, + "loss": 0.5463781952857971, + "step": 224 + }, + { + "epoch": 0.05975302084716505, + "grad_norm": 0.7817235200046289, + "learning_rate": 1.1883289124668435e-05, + "loss": 0.553212583065033, + "step": 225 + }, + { + "epoch": 0.06001858982870801, + "grad_norm": 0.8540074681170072, + "learning_rate": 1.193633952254642e-05, + "loss": 0.47144171595573425, + "step": 226 + }, + { + "epoch": 0.06028415881025096, + "grad_norm": 0.9191106803002166, + "learning_rate": 1.1989389920424404e-05, + "loss": 0.506844162940979, + "step": 227 + }, + { + "epoch": 0.06054972779179392, + "grad_norm": 0.794192267301098, + "learning_rate": 1.2042440318302389e-05, + "loss": 0.4965322017669678, + "step": 228 + }, + { + "epoch": 0.06081529677333687, + "grad_norm": 0.8421546110465796, + "learning_rate": 1.2095490716180371e-05, + "loss": 0.4815751612186432, + "step": 229 + }, + { + "epoch": 0.06108086575487983, + "grad_norm": 0.8107361719185122, + "learning_rate": 1.2148541114058356e-05, + "loss": 0.5245312452316284, + "step": 230 + }, + { + "epoch": 0.06134643473642279, + "grad_norm": 0.8749447967552209, + "learning_rate": 1.2201591511936341e-05, + "loss": 0.5215133428573608, + "step": 231 + }, + { + "epoch": 0.06161200371796574, + "grad_norm": 0.8315635530714504, + "learning_rate": 1.2254641909814325e-05, + "loss": 0.5039419531822205, + "step": 232 + }, + { + "epoch": 0.0618775726995087, + "grad_norm": 1.0583546039713638, + "learning_rate": 1.230769230769231e-05, + "loss": 0.5562925338745117, + "step": 233 + }, + { + "epoch": 0.06214314168105165, + "grad_norm": 1.069780059811152, + "learning_rate": 1.2360742705570291e-05, + "loss": 0.5372984409332275, + "step": 234 + }, + { + "epoch": 0.06240871066259461, + "grad_norm": 0.8766841361731121, + "learning_rate": 1.2413793103448277e-05, + "loss": 0.44987717270851135, + "step": 235 + }, + { + "epoch": 0.06267427964413756, + "grad_norm": 0.9229136432445015, + "learning_rate": 1.246684350132626e-05, + "loss": 0.537068247795105, + "step": 236 + }, + { + "epoch": 0.06293984862568051, + "grad_norm": 0.9828329951785308, + "learning_rate": 1.2519893899204245e-05, + "loss": 0.504779577255249, + "step": 237 + }, + { + "epoch": 0.06320541760722348, + "grad_norm": 1.0061858451025696, + "learning_rate": 1.257294429708223e-05, + "loss": 0.5524113774299622, + "step": 238 + }, + { + "epoch": 0.06347098658876643, + "grad_norm": 0.9888885225244529, + "learning_rate": 1.2625994694960214e-05, + "loss": 0.5089439153671265, + "step": 239 + }, + { + "epoch": 0.06373655557030938, + "grad_norm": 0.8394940482178029, + "learning_rate": 1.2679045092838197e-05, + "loss": 0.4501679837703705, + "step": 240 + }, + { + "epoch": 0.06400212455185235, + "grad_norm": 0.8117693384854435, + "learning_rate": 1.273209549071618e-05, + "loss": 0.5360216498374939, + "step": 241 + }, + { + "epoch": 0.0642676935333953, + "grad_norm": 0.876954304053235, + "learning_rate": 1.2785145888594166e-05, + "loss": 0.5595712661743164, + "step": 242 + }, + { + "epoch": 0.06453326251493825, + "grad_norm": 1.080992038181853, + "learning_rate": 1.283819628647215e-05, + "loss": 0.5010904669761658, + "step": 243 + }, + { + "epoch": 0.0647988314964812, + "grad_norm": 1.0446842005075034, + "learning_rate": 1.2891246684350134e-05, + "loss": 0.5053697228431702, + "step": 244 + }, + { + "epoch": 0.06506440047802417, + "grad_norm": 0.803002193385922, + "learning_rate": 1.294429708222812e-05, + "loss": 0.5045514106750488, + "step": 245 + }, + { + "epoch": 0.06532996945956712, + "grad_norm": 0.7912163744531999, + "learning_rate": 1.2997347480106101e-05, + "loss": 0.5546073913574219, + "step": 246 + }, + { + "epoch": 0.06559553844111007, + "grad_norm": 0.9572908035308383, + "learning_rate": 1.3050397877984087e-05, + "loss": 0.47276046872138977, + "step": 247 + }, + { + "epoch": 0.06586110742265304, + "grad_norm": 0.8233476091470914, + "learning_rate": 1.310344827586207e-05, + "loss": 0.4757889211177826, + "step": 248 + }, + { + "epoch": 0.06612667640419599, + "grad_norm": 0.8415305337388579, + "learning_rate": 1.3156498673740055e-05, + "loss": 0.5078848600387573, + "step": 249 + }, + { + "epoch": 0.06639224538573894, + "grad_norm": 0.8437984625649567, + "learning_rate": 1.3209549071618037e-05, + "loss": 0.4890335202217102, + "step": 250 + }, + { + "epoch": 0.0666578143672819, + "grad_norm": 0.8299999132068526, + "learning_rate": 1.3262599469496022e-05, + "loss": 0.5406580567359924, + "step": 251 + }, + { + "epoch": 0.06692338334882486, + "grad_norm": 0.9307594142144101, + "learning_rate": 1.3315649867374005e-05, + "loss": 0.5236875414848328, + "step": 252 + }, + { + "epoch": 0.06718895233036781, + "grad_norm": 1.0602580439454288, + "learning_rate": 1.336870026525199e-05, + "loss": 0.4991317391395569, + "step": 253 + }, + { + "epoch": 0.06745452131191076, + "grad_norm": 0.8277603880683132, + "learning_rate": 1.3421750663129976e-05, + "loss": 0.4234679639339447, + "step": 254 + }, + { + "epoch": 0.06772009029345373, + "grad_norm": 0.9984839302922622, + "learning_rate": 1.3474801061007958e-05, + "loss": 0.49749234318733215, + "step": 255 + }, + { + "epoch": 0.06798565927499668, + "grad_norm": 0.9543855303701088, + "learning_rate": 1.3527851458885943e-05, + "loss": 0.5049105286598206, + "step": 256 + }, + { + "epoch": 0.06825122825653963, + "grad_norm": 0.8443711840757044, + "learning_rate": 1.3580901856763926e-05, + "loss": 0.5355304479598999, + "step": 257 + }, + { + "epoch": 0.06851679723808259, + "grad_norm": 0.9255144140027944, + "learning_rate": 1.3633952254641911e-05, + "loss": 0.46302929520606995, + "step": 258 + }, + { + "epoch": 0.06878236621962555, + "grad_norm": 0.953877794861965, + "learning_rate": 1.3687002652519895e-05, + "loss": 0.5054173469543457, + "step": 259 + }, + { + "epoch": 0.0690479352011685, + "grad_norm": 0.8214682466537866, + "learning_rate": 1.374005305039788e-05, + "loss": 0.5018566846847534, + "step": 260 + }, + { + "epoch": 0.06931350418271146, + "grad_norm": 0.878430758752321, + "learning_rate": 1.3793103448275863e-05, + "loss": 0.4938735365867615, + "step": 261 + }, + { + "epoch": 0.06957907316425442, + "grad_norm": 0.8343439459008911, + "learning_rate": 1.3846153846153847e-05, + "loss": 0.4605029225349426, + "step": 262 + }, + { + "epoch": 0.06984464214579737, + "grad_norm": 0.8260329604526515, + "learning_rate": 1.3899204244031832e-05, + "loss": 0.5056782960891724, + "step": 263 + }, + { + "epoch": 0.07011021112734032, + "grad_norm": 0.860551370737139, + "learning_rate": 1.3952254641909815e-05, + "loss": 0.5017784833908081, + "step": 264 + }, + { + "epoch": 0.07037578010888328, + "grad_norm": 0.8353804409772935, + "learning_rate": 1.40053050397878e-05, + "loss": 0.5132012367248535, + "step": 265 + }, + { + "epoch": 0.07064134909042624, + "grad_norm": 0.8151795113028358, + "learning_rate": 1.4058355437665782e-05, + "loss": 0.531212329864502, + "step": 266 + }, + { + "epoch": 0.0709069180719692, + "grad_norm": 0.8086605566204427, + "learning_rate": 1.4111405835543767e-05, + "loss": 0.4900968074798584, + "step": 267 + }, + { + "epoch": 0.07117248705351215, + "grad_norm": 0.8735731145360269, + "learning_rate": 1.4164456233421753e-05, + "loss": 0.45277124643325806, + "step": 268 + }, + { + "epoch": 0.07143805603505511, + "grad_norm": 0.8760293380808535, + "learning_rate": 1.4217506631299736e-05, + "loss": 0.48026078939437866, + "step": 269 + }, + { + "epoch": 0.07170362501659806, + "grad_norm": 0.9019281227597356, + "learning_rate": 1.4270557029177721e-05, + "loss": 0.5111234784126282, + "step": 270 + }, + { + "epoch": 0.07196919399814102, + "grad_norm": 0.9120608197487232, + "learning_rate": 1.4323607427055703e-05, + "loss": 0.5448082685470581, + "step": 271 + }, + { + "epoch": 0.07223476297968397, + "grad_norm": 0.9400729117423203, + "learning_rate": 1.4376657824933688e-05, + "loss": 0.5242921113967896, + "step": 272 + }, + { + "epoch": 0.07250033196122693, + "grad_norm": 0.9404952891335322, + "learning_rate": 1.4429708222811672e-05, + "loss": 0.5194095373153687, + "step": 273 + }, + { + "epoch": 0.07276590094276988, + "grad_norm": 0.8893776382848525, + "learning_rate": 1.4482758620689657e-05, + "loss": 0.4620330333709717, + "step": 274 + }, + { + "epoch": 0.07303146992431284, + "grad_norm": 0.886983687866706, + "learning_rate": 1.4535809018567642e-05, + "loss": 0.4654063582420349, + "step": 275 + }, + { + "epoch": 0.0732970389058558, + "grad_norm": 0.7984003718276244, + "learning_rate": 1.4588859416445624e-05, + "loss": 0.4637746810913086, + "step": 276 + }, + { + "epoch": 0.07356260788739875, + "grad_norm": 0.8288882522584324, + "learning_rate": 1.4641909814323609e-05, + "loss": 0.47949421405792236, + "step": 277 + }, + { + "epoch": 0.0738281768689417, + "grad_norm": 1.0041804846004008, + "learning_rate": 1.4694960212201592e-05, + "loss": 0.49565935134887695, + "step": 278 + }, + { + "epoch": 0.07409374585048466, + "grad_norm": 0.9214786055945364, + "learning_rate": 1.4748010610079577e-05, + "loss": 0.5057941675186157, + "step": 279 + }, + { + "epoch": 0.07435931483202762, + "grad_norm": 0.9073397896109812, + "learning_rate": 1.480106100795756e-05, + "loss": 0.5495956540107727, + "step": 280 + }, + { + "epoch": 0.07462488381357057, + "grad_norm": 0.8743353741776648, + "learning_rate": 1.4854111405835546e-05, + "loss": 0.4502897560596466, + "step": 281 + }, + { + "epoch": 0.07489045279511353, + "grad_norm": 0.8694785116368758, + "learning_rate": 1.490716180371353e-05, + "loss": 0.4799070954322815, + "step": 282 + }, + { + "epoch": 0.07515602177665649, + "grad_norm": 0.886176954457428, + "learning_rate": 1.4960212201591513e-05, + "loss": 0.45640307664871216, + "step": 283 + }, + { + "epoch": 0.07542159075819944, + "grad_norm": 0.8937725285994821, + "learning_rate": 1.5013262599469498e-05, + "loss": 0.47862207889556885, + "step": 284 + }, + { + "epoch": 0.0756871597397424, + "grad_norm": 0.8717898339198907, + "learning_rate": 1.5066312997347481e-05, + "loss": 0.48195987939834595, + "step": 285 + }, + { + "epoch": 0.07595272872128535, + "grad_norm": 0.9124586645482137, + "learning_rate": 1.5119363395225467e-05, + "loss": 0.518566370010376, + "step": 286 + }, + { + "epoch": 0.07621829770282831, + "grad_norm": 0.9766882853479317, + "learning_rate": 1.5172413793103448e-05, + "loss": 0.5034162402153015, + "step": 287 + }, + { + "epoch": 0.07648386668437127, + "grad_norm": 0.8995114639723897, + "learning_rate": 1.5225464190981433e-05, + "loss": 0.497822642326355, + "step": 288 + }, + { + "epoch": 0.07674943566591422, + "grad_norm": 0.8484786603983125, + "learning_rate": 1.5278514588859417e-05, + "loss": 0.510530412197113, + "step": 289 + }, + { + "epoch": 0.07701500464745718, + "grad_norm": 0.9406440408252492, + "learning_rate": 1.53315649867374e-05, + "loss": 0.5163881778717041, + "step": 290 + }, + { + "epoch": 0.07728057362900013, + "grad_norm": 0.9825958938719339, + "learning_rate": 1.5384615384615387e-05, + "loss": 0.5161621570587158, + "step": 291 + }, + { + "epoch": 0.07754614261054309, + "grad_norm": 0.8680267479326179, + "learning_rate": 1.543766578249337e-05, + "loss": 0.5260482430458069, + "step": 292 + }, + { + "epoch": 0.07781171159208604, + "grad_norm": 0.8791995274446183, + "learning_rate": 1.5490716180371354e-05, + "loss": 0.4946279227733612, + "step": 293 + }, + { + "epoch": 0.078077280573629, + "grad_norm": 0.9734620967906259, + "learning_rate": 1.5543766578249338e-05, + "loss": 0.5030514001846313, + "step": 294 + }, + { + "epoch": 0.07834284955517196, + "grad_norm": 0.899295097408943, + "learning_rate": 1.559681697612732e-05, + "loss": 0.48864102363586426, + "step": 295 + }, + { + "epoch": 0.07860841853671491, + "grad_norm": 0.8710376092284174, + "learning_rate": 1.5649867374005304e-05, + "loss": 0.48310425877571106, + "step": 296 + }, + { + "epoch": 0.07887398751825787, + "grad_norm": 1.0094258392730318, + "learning_rate": 1.570291777188329e-05, + "loss": 0.4451446533203125, + "step": 297 + }, + { + "epoch": 0.07913955649980083, + "grad_norm": 0.9863170561942101, + "learning_rate": 1.5755968169761275e-05, + "loss": 0.4884604811668396, + "step": 298 + }, + { + "epoch": 0.07940512548134378, + "grad_norm": 0.8355693003184833, + "learning_rate": 1.5809018567639258e-05, + "loss": 0.5047659873962402, + "step": 299 + }, + { + "epoch": 0.07967069446288673, + "grad_norm": 0.8879040718748079, + "learning_rate": 1.586206896551724e-05, + "loss": 0.49124205112457275, + "step": 300 + }, + { + "epoch": 0.0799362634444297, + "grad_norm": 0.9411885452551192, + "learning_rate": 1.5915119363395225e-05, + "loss": 0.5113086700439453, + "step": 301 + }, + { + "epoch": 0.08020183242597265, + "grad_norm": 0.9345380756850689, + "learning_rate": 1.5968169761273212e-05, + "loss": 0.5298338532447815, + "step": 302 + }, + { + "epoch": 0.0804674014075156, + "grad_norm": 0.9050429706274331, + "learning_rate": 1.6021220159151195e-05, + "loss": 0.4673181176185608, + "step": 303 + }, + { + "epoch": 0.08073297038905856, + "grad_norm": 0.8972864762330055, + "learning_rate": 1.607427055702918e-05, + "loss": 0.45361828804016113, + "step": 304 + }, + { + "epoch": 0.08099853937060152, + "grad_norm": 0.8848533583648175, + "learning_rate": 1.6127320954907166e-05, + "loss": 0.5144034624099731, + "step": 305 + }, + { + "epoch": 0.08126410835214447, + "grad_norm": 0.9263690972931414, + "learning_rate": 1.6180371352785146e-05, + "loss": 0.5027451515197754, + "step": 306 + }, + { + "epoch": 0.08152967733368742, + "grad_norm": 0.8575377500476566, + "learning_rate": 1.6233421750663133e-05, + "loss": 0.4987551271915436, + "step": 307 + }, + { + "epoch": 0.08179524631523039, + "grad_norm": 1.0121964253373468, + "learning_rate": 1.6286472148541116e-05, + "loss": 0.5433062314987183, + "step": 308 + }, + { + "epoch": 0.08206081529677334, + "grad_norm": 0.8973695218716041, + "learning_rate": 1.63395225464191e-05, + "loss": 0.49603772163391113, + "step": 309 + }, + { + "epoch": 0.08232638427831629, + "grad_norm": 0.9033181815462389, + "learning_rate": 1.6392572944297083e-05, + "loss": 0.47990959882736206, + "step": 310 + }, + { + "epoch": 0.08259195325985925, + "grad_norm": 0.9843185449650845, + "learning_rate": 1.6445623342175066e-05, + "loss": 0.5196831226348877, + "step": 311 + }, + { + "epoch": 0.0828575222414022, + "grad_norm": 0.8589822510995361, + "learning_rate": 1.6498673740053053e-05, + "loss": 0.4664091467857361, + "step": 312 + }, + { + "epoch": 0.08312309122294516, + "grad_norm": 0.9077443936761218, + "learning_rate": 1.6551724137931037e-05, + "loss": 0.4405553936958313, + "step": 313 + }, + { + "epoch": 0.08338866020448811, + "grad_norm": 0.8561334135462362, + "learning_rate": 1.660477453580902e-05, + "loss": 0.46172815561294556, + "step": 314 + }, + { + "epoch": 0.08365422918603108, + "grad_norm": 0.8835708894071636, + "learning_rate": 1.6657824933687004e-05, + "loss": 0.5004327297210693, + "step": 315 + }, + { + "epoch": 0.08391979816757403, + "grad_norm": 0.8452618593185571, + "learning_rate": 1.6710875331564987e-05, + "loss": 0.4727814197540283, + "step": 316 + }, + { + "epoch": 0.08418536714911698, + "grad_norm": 0.7631381381409372, + "learning_rate": 1.676392572944297e-05, + "loss": 0.43602120876312256, + "step": 317 + }, + { + "epoch": 0.08445093613065995, + "grad_norm": 0.9092168864142193, + "learning_rate": 1.6816976127320957e-05, + "loss": 0.5110410451889038, + "step": 318 + }, + { + "epoch": 0.0847165051122029, + "grad_norm": 0.9902301773407237, + "learning_rate": 1.687002652519894e-05, + "loss": 0.4798283278942108, + "step": 319 + }, + { + "epoch": 0.08498207409374585, + "grad_norm": 0.8572923551208312, + "learning_rate": 1.6923076923076924e-05, + "loss": 0.45690029859542847, + "step": 320 + }, + { + "epoch": 0.0852476430752888, + "grad_norm": 0.8864718165003516, + "learning_rate": 1.6976127320954908e-05, + "loss": 0.4770117998123169, + "step": 321 + }, + { + "epoch": 0.08551321205683177, + "grad_norm": 0.888032985544436, + "learning_rate": 1.702917771883289e-05, + "loss": 0.512240469455719, + "step": 322 + }, + { + "epoch": 0.08577878103837472, + "grad_norm": 0.8665270088700595, + "learning_rate": 1.7082228116710878e-05, + "loss": 0.4696195423603058, + "step": 323 + }, + { + "epoch": 0.08604435001991767, + "grad_norm": 0.8876364903970222, + "learning_rate": 1.713527851458886e-05, + "loss": 0.4779578149318695, + "step": 324 + }, + { + "epoch": 0.08630991900146064, + "grad_norm": 0.9604080935445363, + "learning_rate": 1.7188328912466845e-05, + "loss": 0.48670440912246704, + "step": 325 + }, + { + "epoch": 0.08657548798300359, + "grad_norm": 0.9813156772782552, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.5285798907279968, + "step": 326 + }, + { + "epoch": 0.08684105696454654, + "grad_norm": 0.9264252564283505, + "learning_rate": 1.7294429708222812e-05, + "loss": 0.46095865964889526, + "step": 327 + }, + { + "epoch": 0.08710662594608949, + "grad_norm": 0.8953179311501671, + "learning_rate": 1.73474801061008e-05, + "loss": 0.44342565536499023, + "step": 328 + }, + { + "epoch": 0.08737219492763246, + "grad_norm": 0.9640917124230414, + "learning_rate": 1.7400530503978782e-05, + "loss": 0.48974257707595825, + "step": 329 + }, + { + "epoch": 0.08763776390917541, + "grad_norm": 1.3568266957703046, + "learning_rate": 1.7453580901856765e-05, + "loss": 0.4763977527618408, + "step": 330 + }, + { + "epoch": 0.08790333289071836, + "grad_norm": 1.0231360729141987, + "learning_rate": 1.750663129973475e-05, + "loss": 0.5390856266021729, + "step": 331 + }, + { + "epoch": 0.08816890187226133, + "grad_norm": 0.9254788253309115, + "learning_rate": 1.7559681697612732e-05, + "loss": 0.4833192825317383, + "step": 332 + }, + { + "epoch": 0.08843447085380428, + "grad_norm": 0.9106057248503829, + "learning_rate": 1.7612732095490716e-05, + "loss": 0.47842955589294434, + "step": 333 + }, + { + "epoch": 0.08870003983534723, + "grad_norm": 0.8653538374375338, + "learning_rate": 1.7665782493368703e-05, + "loss": 0.4543060064315796, + "step": 334 + }, + { + "epoch": 0.08896560881689018, + "grad_norm": 0.9024795887264612, + "learning_rate": 1.7718832891246686e-05, + "loss": 0.4492039978504181, + "step": 335 + }, + { + "epoch": 0.08923117779843315, + "grad_norm": 0.9660730803540603, + "learning_rate": 1.777188328912467e-05, + "loss": 0.4930066466331482, + "step": 336 + }, + { + "epoch": 0.0894967467799761, + "grad_norm": 0.9494811659806174, + "learning_rate": 1.7824933687002653e-05, + "loss": 0.46343356370925903, + "step": 337 + }, + { + "epoch": 0.08976231576151905, + "grad_norm": 0.98824099461907, + "learning_rate": 1.7877984084880636e-05, + "loss": 0.5118839740753174, + "step": 338 + }, + { + "epoch": 0.09002788474306202, + "grad_norm": 0.9759312233085756, + "learning_rate": 1.7931034482758623e-05, + "loss": 0.4659194350242615, + "step": 339 + }, + { + "epoch": 0.09029345372460497, + "grad_norm": 0.868792760549277, + "learning_rate": 1.7984084880636607e-05, + "loss": 0.45929303765296936, + "step": 340 + }, + { + "epoch": 0.09055902270614792, + "grad_norm": 0.9774857416777888, + "learning_rate": 1.803713527851459e-05, + "loss": 0.5072556734085083, + "step": 341 + }, + { + "epoch": 0.09082459168769087, + "grad_norm": 0.8722377179138728, + "learning_rate": 1.8090185676392577e-05, + "loss": 0.42370402812957764, + "step": 342 + }, + { + "epoch": 0.09109016066923384, + "grad_norm": 0.9404121189660462, + "learning_rate": 1.8143236074270557e-05, + "loss": 0.5017818212509155, + "step": 343 + }, + { + "epoch": 0.09135572965077679, + "grad_norm": 1.0279846493738434, + "learning_rate": 1.8196286472148544e-05, + "loss": 0.4746384620666504, + "step": 344 + }, + { + "epoch": 0.09162129863231974, + "grad_norm": 1.0016746569872437, + "learning_rate": 1.8249336870026527e-05, + "loss": 0.49020540714263916, + "step": 345 + }, + { + "epoch": 0.09188686761386271, + "grad_norm": 0.8521475505102624, + "learning_rate": 1.830238726790451e-05, + "loss": 0.4569393992424011, + "step": 346 + }, + { + "epoch": 0.09215243659540566, + "grad_norm": 0.9587089968564823, + "learning_rate": 1.8355437665782494e-05, + "loss": 0.46831727027893066, + "step": 347 + }, + { + "epoch": 0.09241800557694861, + "grad_norm": 0.909230845841239, + "learning_rate": 1.8408488063660478e-05, + "loss": 0.4795265197753906, + "step": 348 + }, + { + "epoch": 0.09268357455849156, + "grad_norm": 0.9641043081337674, + "learning_rate": 1.8461538461538465e-05, + "loss": 0.5122503042221069, + "step": 349 + }, + { + "epoch": 0.09294914354003453, + "grad_norm": 0.8617611974669258, + "learning_rate": 1.8514588859416448e-05, + "loss": 0.4190404713153839, + "step": 350 + }, + { + "epoch": 0.09321471252157748, + "grad_norm": 0.9061006884991066, + "learning_rate": 1.856763925729443e-05, + "loss": 0.47778886556625366, + "step": 351 + }, + { + "epoch": 0.09348028150312043, + "grad_norm": 0.9208451846579827, + "learning_rate": 1.8620689655172415e-05, + "loss": 0.45851507782936096, + "step": 352 + }, + { + "epoch": 0.09374585048466338, + "grad_norm": 1.0050481975496854, + "learning_rate": 1.86737400530504e-05, + "loss": 0.4888782501220703, + "step": 353 + }, + { + "epoch": 0.09401141946620635, + "grad_norm": 0.9454138173982718, + "learning_rate": 1.8726790450928382e-05, + "loss": 0.5032983422279358, + "step": 354 + }, + { + "epoch": 0.0942769884477493, + "grad_norm": 0.9130362696106749, + "learning_rate": 1.877984084880637e-05, + "loss": 0.4754604697227478, + "step": 355 + }, + { + "epoch": 0.09454255742929225, + "grad_norm": 0.9970889038933597, + "learning_rate": 1.8832891246684352e-05, + "loss": 0.488397479057312, + "step": 356 + }, + { + "epoch": 0.09480812641083522, + "grad_norm": 1.222649143916529, + "learning_rate": 1.8885941644562336e-05, + "loss": 0.4775403141975403, + "step": 357 + }, + { + "epoch": 0.09507369539237817, + "grad_norm": 0.9872263151320333, + "learning_rate": 1.893899204244032e-05, + "loss": 0.47063153982162476, + "step": 358 + }, + { + "epoch": 0.09533926437392112, + "grad_norm": 1.0222144168199743, + "learning_rate": 1.8992042440318303e-05, + "loss": 0.4856908321380615, + "step": 359 + }, + { + "epoch": 0.09560483335546408, + "grad_norm": 0.9195037496858368, + "learning_rate": 1.904509283819629e-05, + "loss": 0.440033495426178, + "step": 360 + }, + { + "epoch": 0.09587040233700704, + "grad_norm": 0.9961899484684762, + "learning_rate": 1.9098143236074273e-05, + "loss": 0.4825770854949951, + "step": 361 + }, + { + "epoch": 0.09613597131854999, + "grad_norm": 0.9443841189655576, + "learning_rate": 1.9151193633952256e-05, + "loss": 0.48192232847213745, + "step": 362 + }, + { + "epoch": 0.09640154030009294, + "grad_norm": 0.9065595450317342, + "learning_rate": 1.920424403183024e-05, + "loss": 0.4689444899559021, + "step": 363 + }, + { + "epoch": 0.09666710928163591, + "grad_norm": 0.9970961253516039, + "learning_rate": 1.9257294429708223e-05, + "loss": 0.47120895981788635, + "step": 364 + }, + { + "epoch": 0.09693267826317886, + "grad_norm": 1.0106028234477955, + "learning_rate": 1.931034482758621e-05, + "loss": 0.4968941807746887, + "step": 365 + }, + { + "epoch": 0.09719824724472181, + "grad_norm": 1.115125675989656, + "learning_rate": 1.9363395225464193e-05, + "loss": 0.46982288360595703, + "step": 366 + }, + { + "epoch": 0.09746381622626477, + "grad_norm": 0.9408972278578609, + "learning_rate": 1.9416445623342177e-05, + "loss": 0.4541531205177307, + "step": 367 + }, + { + "epoch": 0.09772938520780773, + "grad_norm": 0.9760564476186651, + "learning_rate": 1.946949602122016e-05, + "loss": 0.45576703548431396, + "step": 368 + }, + { + "epoch": 0.09799495418935068, + "grad_norm": 0.9893999168346334, + "learning_rate": 1.9522546419098144e-05, + "loss": 0.48060357570648193, + "step": 369 + }, + { + "epoch": 0.09826052317089363, + "grad_norm": 0.9675810264832774, + "learning_rate": 1.9575596816976127e-05, + "loss": 0.47536781430244446, + "step": 370 + }, + { + "epoch": 0.0985260921524366, + "grad_norm": 0.9516181191759193, + "learning_rate": 1.9628647214854114e-05, + "loss": 0.46463894844055176, + "step": 371 + }, + { + "epoch": 0.09879166113397955, + "grad_norm": 1.0082712913027811, + "learning_rate": 1.9681697612732098e-05, + "loss": 0.49570178985595703, + "step": 372 + }, + { + "epoch": 0.0990572301155225, + "grad_norm": 1.0327922438955468, + "learning_rate": 1.973474801061008e-05, + "loss": 0.4764043390750885, + "step": 373 + }, + { + "epoch": 0.09932279909706546, + "grad_norm": 0.9227866290107449, + "learning_rate": 1.9787798408488064e-05, + "loss": 0.43582671880722046, + "step": 374 + }, + { + "epoch": 0.09958836807860842, + "grad_norm": 0.9360238854832598, + "learning_rate": 1.9840848806366048e-05, + "loss": 0.46077725291252136, + "step": 375 + }, + { + "epoch": 0.09985393706015137, + "grad_norm": 0.9607682273492437, + "learning_rate": 1.9893899204244035e-05, + "loss": 0.4794929027557373, + "step": 376 + }, + { + "epoch": 0.10011950604169433, + "grad_norm": 0.9619848398175739, + "learning_rate": 1.9946949602122018e-05, + "loss": 0.43174588680267334, + "step": 377 + }, + { + "epoch": 0.10038507502323729, + "grad_norm": 0.90095462919728, + "learning_rate": 2e-05, + "loss": 0.44885915517807007, + "step": 378 + }, + { + "epoch": 0.10065064400478024, + "grad_norm": 1.0789787198205218, + "learning_rate": 1.9999999036058974e-05, + "loss": 0.520150899887085, + "step": 379 + }, + { + "epoch": 0.1009162129863232, + "grad_norm": 0.9699182604374589, + "learning_rate": 1.9999996144236068e-05, + "loss": 0.5139277577400208, + "step": 380 + }, + { + "epoch": 0.10118178196786615, + "grad_norm": 1.0077278580199993, + "learning_rate": 1.999999132453184e-05, + "loss": 0.48935171961784363, + "step": 381 + }, + { + "epoch": 0.10144735094940911, + "grad_norm": 0.9095465340361383, + "learning_rate": 1.999998457694723e-05, + "loss": 0.4805561304092407, + "step": 382 + }, + { + "epoch": 0.10171291993095206, + "grad_norm": 0.9209321398292457, + "learning_rate": 1.9999975901483532e-05, + "loss": 0.4340912997722626, + "step": 383 + }, + { + "epoch": 0.10197848891249502, + "grad_norm": 1.0414639039942946, + "learning_rate": 1.999996529814242e-05, + "loss": 0.48282474279403687, + "step": 384 + }, + { + "epoch": 0.10224405789403798, + "grad_norm": 0.9753320144694753, + "learning_rate": 1.999995276692593e-05, + "loss": 0.4653206169605255, + "step": 385 + }, + { + "epoch": 0.10250962687558093, + "grad_norm": 0.919281113033857, + "learning_rate": 1.999993830783649e-05, + "loss": 0.48501014709472656, + "step": 386 + }, + { + "epoch": 0.10277519585712389, + "grad_norm": 1.0711296444042975, + "learning_rate": 1.9999921920876882e-05, + "loss": 0.48260143399238586, + "step": 387 + }, + { + "epoch": 0.10304076483866684, + "grad_norm": 0.9590085896328235, + "learning_rate": 1.9999903606050267e-05, + "loss": 0.44557270407676697, + "step": 388 + }, + { + "epoch": 0.1033063338202098, + "grad_norm": 1.111282066618818, + "learning_rate": 1.9999883363360175e-05, + "loss": 0.4843652546405792, + "step": 389 + }, + { + "epoch": 0.10357190280175275, + "grad_norm": 0.9708048507544866, + "learning_rate": 1.9999861192810508e-05, + "loss": 0.4536727964878082, + "step": 390 + }, + { + "epoch": 0.1038374717832957, + "grad_norm": 1.0216212958759847, + "learning_rate": 1.9999837094405538e-05, + "loss": 0.49557366967201233, + "step": 391 + }, + { + "epoch": 0.10410304076483867, + "grad_norm": 1.0254795167373827, + "learning_rate": 1.9999811068149917e-05, + "loss": 0.45077240467071533, + "step": 392 + }, + { + "epoch": 0.10436860974638162, + "grad_norm": 0.9857255709196505, + "learning_rate": 1.9999783114048658e-05, + "loss": 0.4554041624069214, + "step": 393 + }, + { + "epoch": 0.10463417872792458, + "grad_norm": 0.8770920920154472, + "learning_rate": 1.999975323210715e-05, + "loss": 0.43526744842529297, + "step": 394 + }, + { + "epoch": 0.10489974770946753, + "grad_norm": 0.9824982196768539, + "learning_rate": 1.9999721422331154e-05, + "loss": 0.4097936749458313, + "step": 395 + }, + { + "epoch": 0.1051653166910105, + "grad_norm": 1.013432449022695, + "learning_rate": 1.9999687684726803e-05, + "loss": 0.4740130305290222, + "step": 396 + }, + { + "epoch": 0.10543088567255345, + "grad_norm": 0.9786752992542405, + "learning_rate": 1.9999652019300604e-05, + "loss": 0.43374374508857727, + "step": 397 + }, + { + "epoch": 0.1056964546540964, + "grad_norm": 0.9323415402935509, + "learning_rate": 1.999961442605943e-05, + "loss": 0.4423784911632538, + "step": 398 + }, + { + "epoch": 0.10596202363563936, + "grad_norm": 1.0497518439124596, + "learning_rate": 1.999957490501053e-05, + "loss": 0.4660544693470001, + "step": 399 + }, + { + "epoch": 0.10622759261718231, + "grad_norm": 1.11742327964835, + "learning_rate": 1.999953345616152e-05, + "loss": 0.4579896628856659, + "step": 400 + }, + { + "epoch": 0.10649316159872527, + "grad_norm": 1.0653029752390735, + "learning_rate": 1.9999490079520395e-05, + "loss": 0.4634096920490265, + "step": 401 + }, + { + "epoch": 0.10675873058026822, + "grad_norm": 0.9969566988589958, + "learning_rate": 1.9999444775095517e-05, + "loss": 0.45374077558517456, + "step": 402 + }, + { + "epoch": 0.10702429956181118, + "grad_norm": 1.1298291912896017, + "learning_rate": 1.9999397542895615e-05, + "loss": 0.49752670526504517, + "step": 403 + }, + { + "epoch": 0.10728986854335414, + "grad_norm": 1.049244919494092, + "learning_rate": 1.99993483829298e-05, + "loss": 0.4539335370063782, + "step": 404 + }, + { + "epoch": 0.10755543752489709, + "grad_norm": 1.0017841795942442, + "learning_rate": 1.999929729520755e-05, + "loss": 0.4665772616863251, + "step": 405 + }, + { + "epoch": 0.10782100650644005, + "grad_norm": 1.023688686658119, + "learning_rate": 1.9999244279738713e-05, + "loss": 0.4850832223892212, + "step": 406 + }, + { + "epoch": 0.108086575487983, + "grad_norm": 0.9960763191436038, + "learning_rate": 1.9999189336533508e-05, + "loss": 0.43974876403808594, + "step": 407 + }, + { + "epoch": 0.10835214446952596, + "grad_norm": 1.0378626233602128, + "learning_rate": 1.9999132465602526e-05, + "loss": 0.46823856234550476, + "step": 408 + }, + { + "epoch": 0.10861771345106891, + "grad_norm": 1.0461372802003532, + "learning_rate": 1.9999073666956734e-05, + "loss": 0.49704545736312866, + "step": 409 + }, + { + "epoch": 0.10888328243261187, + "grad_norm": 1.03380477635781, + "learning_rate": 1.999901294060747e-05, + "loss": 0.3863454759120941, + "step": 410 + }, + { + "epoch": 0.10914885141415483, + "grad_norm": 1.1280569204620268, + "learning_rate": 1.9998950286566438e-05, + "loss": 0.4903780221939087, + "step": 411 + }, + { + "epoch": 0.10941442039569778, + "grad_norm": 0.9546134462956446, + "learning_rate": 1.9998885704845716e-05, + "loss": 0.4312375485897064, + "step": 412 + }, + { + "epoch": 0.10967998937724074, + "grad_norm": 0.9382591225300354, + "learning_rate": 1.9998819195457756e-05, + "loss": 0.4350954294204712, + "step": 413 + }, + { + "epoch": 0.1099455583587837, + "grad_norm": 0.9201016144754837, + "learning_rate": 1.999875075841538e-05, + "loss": 0.4364873766899109, + "step": 414 + }, + { + "epoch": 0.11021112734032665, + "grad_norm": 0.9578414566062486, + "learning_rate": 1.999868039373178e-05, + "loss": 0.42079728841781616, + "step": 415 + }, + { + "epoch": 0.1104766963218696, + "grad_norm": 1.0011321946551845, + "learning_rate": 1.9998608101420527e-05, + "loss": 0.4396737515926361, + "step": 416 + }, + { + "epoch": 0.11074226530341257, + "grad_norm": 0.9922478693245596, + "learning_rate": 1.9998533881495552e-05, + "loss": 0.44765806198120117, + "step": 417 + }, + { + "epoch": 0.11100783428495552, + "grad_norm": 1.0219437952159112, + "learning_rate": 1.999845773397117e-05, + "loss": 0.46199291944503784, + "step": 418 + }, + { + "epoch": 0.11127340326649847, + "grad_norm": 0.9510961467421052, + "learning_rate": 1.9998379658862058e-05, + "loss": 0.44561129808425903, + "step": 419 + }, + { + "epoch": 0.11153897224804143, + "grad_norm": 1.0559368690309399, + "learning_rate": 1.9998299656183263e-05, + "loss": 0.46025681495666504, + "step": 420 + }, + { + "epoch": 0.11180454122958439, + "grad_norm": 0.9881679042322009, + "learning_rate": 1.999821772595022e-05, + "loss": 0.4408613443374634, + "step": 421 + }, + { + "epoch": 0.11207011021112734, + "grad_norm": 0.9620122842513851, + "learning_rate": 1.999813386817871e-05, + "loss": 0.4846842586994171, + "step": 422 + }, + { + "epoch": 0.11233567919267029, + "grad_norm": 0.9697081207450757, + "learning_rate": 1.999804808288491e-05, + "loss": 0.44503283500671387, + "step": 423 + }, + { + "epoch": 0.11260124817421326, + "grad_norm": 0.9687765160951803, + "learning_rate": 1.9997960370085355e-05, + "loss": 0.4090060293674469, + "step": 424 + }, + { + "epoch": 0.11286681715575621, + "grad_norm": 0.9575575943579401, + "learning_rate": 1.999787072979696e-05, + "loss": 0.43246471881866455, + "step": 425 + }, + { + "epoch": 0.11313238613729916, + "grad_norm": 1.001604978030575, + "learning_rate": 1.9997779162036996e-05, + "loss": 0.46283262968063354, + "step": 426 + }, + { + "epoch": 0.11339795511884213, + "grad_norm": 0.9108113962903395, + "learning_rate": 1.999768566682313e-05, + "loss": 0.3866165578365326, + "step": 427 + }, + { + "epoch": 0.11366352410038508, + "grad_norm": 0.9595506331685858, + "learning_rate": 1.9997590244173374e-05, + "loss": 0.4501144289970398, + "step": 428 + }, + { + "epoch": 0.11392909308192803, + "grad_norm": 0.9153639565172541, + "learning_rate": 1.9997492894106127e-05, + "loss": 0.43005290627479553, + "step": 429 + }, + { + "epoch": 0.11419466206347098, + "grad_norm": 0.9635360081712412, + "learning_rate": 1.9997393616640165e-05, + "loss": 0.4427964985370636, + "step": 430 + }, + { + "epoch": 0.11446023104501395, + "grad_norm": 1.0560533392763956, + "learning_rate": 1.999729241179462e-05, + "loss": 0.4690951108932495, + "step": 431 + }, + { + "epoch": 0.1147258000265569, + "grad_norm": 0.9559285214931015, + "learning_rate": 1.9997189279589003e-05, + "loss": 0.456949919462204, + "step": 432 + }, + { + "epoch": 0.11499136900809985, + "grad_norm": 0.9851459681291062, + "learning_rate": 1.99970842200432e-05, + "loss": 0.456052303314209, + "step": 433 + }, + { + "epoch": 0.11525693798964282, + "grad_norm": 0.9609923633405658, + "learning_rate": 1.9996977233177466e-05, + "loss": 0.43220120668411255, + "step": 434 + }, + { + "epoch": 0.11552250697118577, + "grad_norm": 0.9022181145862976, + "learning_rate": 1.9996868319012422e-05, + "loss": 0.4237494170665741, + "step": 435 + }, + { + "epoch": 0.11578807595272872, + "grad_norm": 1.1387519975876466, + "learning_rate": 1.9996757477569072e-05, + "loss": 0.4713878631591797, + "step": 436 + }, + { + "epoch": 0.11605364493427167, + "grad_norm": 1.026114633188765, + "learning_rate": 1.9996644708868776e-05, + "loss": 0.4561111330986023, + "step": 437 + }, + { + "epoch": 0.11631921391581464, + "grad_norm": 1.0425252904592188, + "learning_rate": 1.9996530012933285e-05, + "loss": 0.468253493309021, + "step": 438 + }, + { + "epoch": 0.11658478289735759, + "grad_norm": 0.9323050726416767, + "learning_rate": 1.9996413389784704e-05, + "loss": 0.4815019369125366, + "step": 439 + }, + { + "epoch": 0.11685035187890054, + "grad_norm": 0.9369313249225236, + "learning_rate": 1.9996294839445518e-05, + "loss": 0.4235987663269043, + "step": 440 + }, + { + "epoch": 0.1171159208604435, + "grad_norm": 0.9217309559918773, + "learning_rate": 1.999617436193858e-05, + "loss": 0.40562817454338074, + "step": 441 + }, + { + "epoch": 0.11738148984198646, + "grad_norm": 1.1384168500780398, + "learning_rate": 1.999605195728712e-05, + "loss": 0.424539715051651, + "step": 442 + }, + { + "epoch": 0.11764705882352941, + "grad_norm": 0.9616123874834243, + "learning_rate": 1.9995927625514736e-05, + "loss": 0.43677473068237305, + "step": 443 + }, + { + "epoch": 0.11791262780507236, + "grad_norm": 0.9761533315060044, + "learning_rate": 1.9995801366645396e-05, + "loss": 0.47325971722602844, + "step": 444 + }, + { + "epoch": 0.11817819678661533, + "grad_norm": 0.9447069768738408, + "learning_rate": 1.9995673180703443e-05, + "loss": 0.4206562638282776, + "step": 445 + }, + { + "epoch": 0.11844376576815828, + "grad_norm": 0.9743544240614231, + "learning_rate": 1.999554306771359e-05, + "loss": 0.4492834210395813, + "step": 446 + }, + { + "epoch": 0.11870933474970123, + "grad_norm": 1.0629000505790311, + "learning_rate": 1.9995411027700917e-05, + "loss": 0.4445284605026245, + "step": 447 + }, + { + "epoch": 0.1189749037312442, + "grad_norm": 0.9911650776890225, + "learning_rate": 1.9995277060690885e-05, + "loss": 0.4038352370262146, + "step": 448 + }, + { + "epoch": 0.11924047271278715, + "grad_norm": 0.9418518804089067, + "learning_rate": 1.9995141166709318e-05, + "loss": 0.4261324405670166, + "step": 449 + }, + { + "epoch": 0.1195060416943301, + "grad_norm": 1.067611227425969, + "learning_rate": 1.9995003345782416e-05, + "loss": 0.44187062978744507, + "step": 450 + }, + { + "epoch": 0.11977161067587305, + "grad_norm": 0.9191915914869351, + "learning_rate": 1.9994863597936752e-05, + "loss": 0.44672587513923645, + "step": 451 + }, + { + "epoch": 0.12003717965741602, + "grad_norm": 0.9882052007755191, + "learning_rate": 1.999472192319926e-05, + "loss": 0.44322314858436584, + "step": 452 + }, + { + "epoch": 0.12030274863895897, + "grad_norm": 0.9882289435866314, + "learning_rate": 1.9994578321597258e-05, + "loss": 0.4396611154079437, + "step": 453 + }, + { + "epoch": 0.12056831762050192, + "grad_norm": 0.9831868773412876, + "learning_rate": 1.9994432793158433e-05, + "loss": 0.4487733542919159, + "step": 454 + }, + { + "epoch": 0.12083388660204489, + "grad_norm": 0.9360753951175719, + "learning_rate": 1.999428533791084e-05, + "loss": 0.3969653248786926, + "step": 455 + }, + { + "epoch": 0.12109945558358784, + "grad_norm": 0.9662346637828156, + "learning_rate": 1.9994135955882906e-05, + "loss": 0.39312344789505005, + "step": 456 + }, + { + "epoch": 0.12136502456513079, + "grad_norm": 0.9019524086641805, + "learning_rate": 1.9993984647103425e-05, + "loss": 0.3979804217815399, + "step": 457 + }, + { + "epoch": 0.12163059354667374, + "grad_norm": 1.0970468981958466, + "learning_rate": 1.9993831411601573e-05, + "loss": 0.4430229365825653, + "step": 458 + }, + { + "epoch": 0.12189616252821671, + "grad_norm": 0.994492352252997, + "learning_rate": 1.9993676249406895e-05, + "loss": 0.4511718451976776, + "step": 459 + }, + { + "epoch": 0.12216173150975966, + "grad_norm": 1.091979336298699, + "learning_rate": 1.9993519160549298e-05, + "loss": 0.4686455726623535, + "step": 460 + }, + { + "epoch": 0.12242730049130261, + "grad_norm": 1.0158374042593608, + "learning_rate": 1.9993360145059073e-05, + "loss": 0.4501730501651764, + "step": 461 + }, + { + "epoch": 0.12269286947284558, + "grad_norm": 0.8530053413909426, + "learning_rate": 1.999319920296687e-05, + "loss": 0.40718767046928406, + "step": 462 + }, + { + "epoch": 0.12295843845438853, + "grad_norm": 1.1181007301257784, + "learning_rate": 1.9993036334303716e-05, + "loss": 0.47313761711120605, + "step": 463 + }, + { + "epoch": 0.12322400743593148, + "grad_norm": 0.9710975932515886, + "learning_rate": 1.9992871539101018e-05, + "loss": 0.47417378425598145, + "step": 464 + }, + { + "epoch": 0.12348957641747443, + "grad_norm": 0.9297582414898758, + "learning_rate": 1.999270481739054e-05, + "loss": 0.44206154346466064, + "step": 465 + }, + { + "epoch": 0.1237551453990174, + "grad_norm": 0.8745553533375581, + "learning_rate": 1.9992536169204427e-05, + "loss": 0.3800848722457886, + "step": 466 + }, + { + "epoch": 0.12402071438056035, + "grad_norm": 0.9337162704530373, + "learning_rate": 1.9992365594575194e-05, + "loss": 0.40339407324790955, + "step": 467 + }, + { + "epoch": 0.1242862833621033, + "grad_norm": 0.945328490567385, + "learning_rate": 1.999219309353572e-05, + "loss": 0.45280492305755615, + "step": 468 + }, + { + "epoch": 0.12455185234364627, + "grad_norm": 1.0911195899085697, + "learning_rate": 1.9992018666119266e-05, + "loss": 0.4600910544395447, + "step": 469 + }, + { + "epoch": 0.12481742132518922, + "grad_norm": 0.9649890056306747, + "learning_rate": 1.9991842312359458e-05, + "loss": 0.4475003480911255, + "step": 470 + }, + { + "epoch": 0.12508299030673217, + "grad_norm": 1.0493048741226816, + "learning_rate": 1.9991664032290297e-05, + "loss": 0.45377033948898315, + "step": 471 + }, + { + "epoch": 0.12534855928827512, + "grad_norm": 0.9964208438270044, + "learning_rate": 1.9991483825946147e-05, + "loss": 0.4397522509098053, + "step": 472 + }, + { + "epoch": 0.12561412826981808, + "grad_norm": 0.9309535511597795, + "learning_rate": 1.9991301693361756e-05, + "loss": 0.4258221387863159, + "step": 473 + }, + { + "epoch": 0.12587969725136103, + "grad_norm": 0.9120842027423138, + "learning_rate": 1.9991117634572234e-05, + "loss": 0.40272068977355957, + "step": 474 + }, + { + "epoch": 0.126145266232904, + "grad_norm": 0.8761120829975514, + "learning_rate": 1.9990931649613067e-05, + "loss": 0.3721206784248352, + "step": 475 + }, + { + "epoch": 0.12641083521444696, + "grad_norm": 0.9997105907953329, + "learning_rate": 1.9990743738520115e-05, + "loss": 0.4530203938484192, + "step": 476 + }, + { + "epoch": 0.1266764041959899, + "grad_norm": 0.999446109489731, + "learning_rate": 1.999055390132959e-05, + "loss": 0.4281614422798157, + "step": 477 + }, + { + "epoch": 0.12694197317753286, + "grad_norm": 1.3617327829527315, + "learning_rate": 1.999036213807811e-05, + "loss": 0.41965895891189575, + "step": 478 + }, + { + "epoch": 0.12720754215907581, + "grad_norm": 0.9525189428273744, + "learning_rate": 1.9990168448802633e-05, + "loss": 0.40055203437805176, + "step": 479 + }, + { + "epoch": 0.12747311114061877, + "grad_norm": 1.0868137290392272, + "learning_rate": 1.99899728335405e-05, + "loss": 0.4266522526741028, + "step": 480 + }, + { + "epoch": 0.12773868012216172, + "grad_norm": 1.028316280940819, + "learning_rate": 1.9989775292329425e-05, + "loss": 0.42291250824928284, + "step": 481 + }, + { + "epoch": 0.1280042491037047, + "grad_norm": 1.0319881226067493, + "learning_rate": 1.9989575825207494e-05, + "loss": 0.41346436738967896, + "step": 482 + }, + { + "epoch": 0.12826981808524765, + "grad_norm": 1.0162482863207583, + "learning_rate": 1.998937443221316e-05, + "loss": 0.4092825651168823, + "step": 483 + }, + { + "epoch": 0.1285353870667906, + "grad_norm": 0.9789070022917183, + "learning_rate": 1.998917111338525e-05, + "loss": 0.39763280749320984, + "step": 484 + }, + { + "epoch": 0.12880095604833355, + "grad_norm": 1.1639998102533433, + "learning_rate": 1.9988965868762956e-05, + "loss": 0.45523273944854736, + "step": 485 + }, + { + "epoch": 0.1290665250298765, + "grad_norm": 0.9737102573843942, + "learning_rate": 1.9988758698385854e-05, + "loss": 0.40181300044059753, + "step": 486 + }, + { + "epoch": 0.12933209401141946, + "grad_norm": 1.0269411713354706, + "learning_rate": 1.9988549602293884e-05, + "loss": 0.42487743496894836, + "step": 487 + }, + { + "epoch": 0.1295976629929624, + "grad_norm": 0.9805378587174307, + "learning_rate": 1.998833858052735e-05, + "loss": 0.41672298312187195, + "step": 488 + }, + { + "epoch": 0.1298632319745054, + "grad_norm": 0.9804335652831319, + "learning_rate": 1.998812563312694e-05, + "loss": 0.36750108003616333, + "step": 489 + }, + { + "epoch": 0.13012880095604834, + "grad_norm": 1.0991024476796578, + "learning_rate": 1.9987910760133712e-05, + "loss": 0.49290573596954346, + "step": 490 + }, + { + "epoch": 0.1303943699375913, + "grad_norm": 0.9956647709409898, + "learning_rate": 1.9987693961589084e-05, + "loss": 0.460039347410202, + "step": 491 + }, + { + "epoch": 0.13065993891913424, + "grad_norm": 1.269757897267166, + "learning_rate": 1.998747523753485e-05, + "loss": 0.4471668303012848, + "step": 492 + }, + { + "epoch": 0.1309255079006772, + "grad_norm": 0.9411513149719377, + "learning_rate": 1.9987254588013184e-05, + "loss": 0.395844966173172, + "step": 493 + }, + { + "epoch": 0.13119107688222015, + "grad_norm": 0.9546844808839872, + "learning_rate": 1.9987032013066623e-05, + "loss": 0.4465745985507965, + "step": 494 + }, + { + "epoch": 0.1314566458637631, + "grad_norm": 1.0929917252775374, + "learning_rate": 1.9986807512738075e-05, + "loss": 0.43123912811279297, + "step": 495 + }, + { + "epoch": 0.13172221484530608, + "grad_norm": 0.9741124155963404, + "learning_rate": 1.9986581087070824e-05, + "loss": 0.40066564083099365, + "step": 496 + }, + { + "epoch": 0.13198778382684903, + "grad_norm": 0.9421948045046618, + "learning_rate": 1.9986352736108515e-05, + "loss": 0.38514643907546997, + "step": 497 + }, + { + "epoch": 0.13225335280839198, + "grad_norm": 0.9713567699891517, + "learning_rate": 1.9986122459895182e-05, + "loss": 0.37397241592407227, + "step": 498 + }, + { + "epoch": 0.13251892178993493, + "grad_norm": 0.9697777712481016, + "learning_rate": 1.9985890258475215e-05, + "loss": 0.44865745306015015, + "step": 499 + }, + { + "epoch": 0.1327844907714779, + "grad_norm": 1.000823551239605, + "learning_rate": 1.9985656131893374e-05, + "loss": 0.4161406457424164, + "step": 500 + }, + { + "epoch": 0.13305005975302084, + "grad_norm": 1.049045844462056, + "learning_rate": 1.9985420080194804e-05, + "loss": 0.41364359855651855, + "step": 501 + }, + { + "epoch": 0.1333156287345638, + "grad_norm": 0.9766347522178017, + "learning_rate": 1.9985182103425007e-05, + "loss": 0.38466009497642517, + "step": 502 + }, + { + "epoch": 0.13358119771610677, + "grad_norm": 0.9820108788569575, + "learning_rate": 1.9984942201629868e-05, + "loss": 0.4189472794532776, + "step": 503 + }, + { + "epoch": 0.13384676669764972, + "grad_norm": 1.0124943582595707, + "learning_rate": 1.998470037485563e-05, + "loss": 0.4088754653930664, + "step": 504 + }, + { + "epoch": 0.13411233567919267, + "grad_norm": 0.9404621165531668, + "learning_rate": 1.9984456623148923e-05, + "loss": 0.4197084307670593, + "step": 505 + }, + { + "epoch": 0.13437790466073563, + "grad_norm": 1.022677047132229, + "learning_rate": 1.998421094655673e-05, + "loss": 0.4318644404411316, + "step": 506 + }, + { + "epoch": 0.13464347364227858, + "grad_norm": 0.9443470782499029, + "learning_rate": 1.9983963345126423e-05, + "loss": 0.38180238008499146, + "step": 507 + }, + { + "epoch": 0.13490904262382153, + "grad_norm": 0.9655473739081939, + "learning_rate": 1.9983713818905733e-05, + "loss": 0.38704103231430054, + "step": 508 + }, + { + "epoch": 0.13517461160536448, + "grad_norm": 1.050357567916831, + "learning_rate": 1.998346236794276e-05, + "loss": 0.4206693768501282, + "step": 509 + }, + { + "epoch": 0.13544018058690746, + "grad_norm": 1.1108901361228778, + "learning_rate": 1.9983208992285993e-05, + "loss": 0.42818987369537354, + "step": 510 + }, + { + "epoch": 0.1357057495684504, + "grad_norm": 1.0771548955106338, + "learning_rate": 1.9982953691984274e-05, + "loss": 0.44592660665512085, + "step": 511 + }, + { + "epoch": 0.13597131854999336, + "grad_norm": 1.006125968429414, + "learning_rate": 1.9982696467086815e-05, + "loss": 0.4272580146789551, + "step": 512 + }, + { + "epoch": 0.13623688753153632, + "grad_norm": 1.084212872761102, + "learning_rate": 1.9982437317643218e-05, + "loss": 0.4416295289993286, + "step": 513 + }, + { + "epoch": 0.13650245651307927, + "grad_norm": 1.1040865905907058, + "learning_rate": 1.998217624370343e-05, + "loss": 0.45108669996261597, + "step": 514 + }, + { + "epoch": 0.13676802549462222, + "grad_norm": 0.9866796372680723, + "learning_rate": 1.9981913245317802e-05, + "loss": 0.40311864018440247, + "step": 515 + }, + { + "epoch": 0.13703359447616517, + "grad_norm": 1.041531014011416, + "learning_rate": 1.9981648322537017e-05, + "loss": 0.4388020932674408, + "step": 516 + }, + { + "epoch": 0.13729916345770815, + "grad_norm": 1.069295153220874, + "learning_rate": 1.9981381475412162e-05, + "loss": 0.42741361260414124, + "step": 517 + }, + { + "epoch": 0.1375647324392511, + "grad_norm": 0.8562984414004653, + "learning_rate": 1.9981112703994677e-05, + "loss": 0.3766555190086365, + "step": 518 + }, + { + "epoch": 0.13783030142079405, + "grad_norm": 0.9297024970383198, + "learning_rate": 1.998084200833638e-05, + "loss": 0.38618308305740356, + "step": 519 + }, + { + "epoch": 0.138095870402337, + "grad_norm": 1.0033450202172107, + "learning_rate": 1.9980569388489457e-05, + "loss": 0.4553264379501343, + "step": 520 + }, + { + "epoch": 0.13836143938387996, + "grad_norm": 1.024202819723292, + "learning_rate": 1.9980294844506468e-05, + "loss": 0.44632673263549805, + "step": 521 + }, + { + "epoch": 0.1386270083654229, + "grad_norm": 1.0907023510727254, + "learning_rate": 1.998001837644033e-05, + "loss": 0.4285067617893219, + "step": 522 + }, + { + "epoch": 0.13889257734696586, + "grad_norm": 0.9721672428790065, + "learning_rate": 1.9979739984344365e-05, + "loss": 0.39360538125038147, + "step": 523 + }, + { + "epoch": 0.13915814632850884, + "grad_norm": 0.9475835393492287, + "learning_rate": 1.9979459668272226e-05, + "loss": 0.4007593095302582, + "step": 524 + }, + { + "epoch": 0.1394237153100518, + "grad_norm": 1.028990364637073, + "learning_rate": 1.9979177428277955e-05, + "loss": 0.40176767110824585, + "step": 525 + }, + { + "epoch": 0.13968928429159475, + "grad_norm": 1.0167293750004343, + "learning_rate": 1.9978893264415978e-05, + "loss": 0.4190528392791748, + "step": 526 + }, + { + "epoch": 0.1399548532731377, + "grad_norm": 0.9871913820335487, + "learning_rate": 1.9978607176741063e-05, + "loss": 0.4139288067817688, + "step": 527 + }, + { + "epoch": 0.14022042225468065, + "grad_norm": 0.8610694360554231, + "learning_rate": 1.9978319165308373e-05, + "loss": 0.3666151463985443, + "step": 528 + }, + { + "epoch": 0.1404859912362236, + "grad_norm": 1.016794526359022, + "learning_rate": 1.997802923017343e-05, + "loss": 0.44621142745018005, + "step": 529 + }, + { + "epoch": 0.14075156021776655, + "grad_norm": 0.9742602007181285, + "learning_rate": 1.9977737371392134e-05, + "loss": 0.4162977635860443, + "step": 530 + }, + { + "epoch": 0.14101712919930953, + "grad_norm": 1.0386051117102446, + "learning_rate": 1.997744358902075e-05, + "loss": 0.438882052898407, + "step": 531 + }, + { + "epoch": 0.14128269818085248, + "grad_norm": 0.9131334625730753, + "learning_rate": 1.997714788311591e-05, + "loss": 0.43381333351135254, + "step": 532 + }, + { + "epoch": 0.14154826716239544, + "grad_norm": 1.0341262373297713, + "learning_rate": 1.9976850253734633e-05, + "loss": 0.41925039887428284, + "step": 533 + }, + { + "epoch": 0.1418138361439384, + "grad_norm": 1.0366031704059997, + "learning_rate": 1.997655070093429e-05, + "loss": 0.40469998121261597, + "step": 534 + }, + { + "epoch": 0.14207940512548134, + "grad_norm": 1.069653848503876, + "learning_rate": 1.9976249224772638e-05, + "loss": 0.4252749979496002, + "step": 535 + }, + { + "epoch": 0.1423449741070243, + "grad_norm": 0.9131599330211423, + "learning_rate": 1.9975945825307788e-05, + "loss": 0.42437341809272766, + "step": 536 + }, + { + "epoch": 0.14261054308856724, + "grad_norm": 0.9295944144104017, + "learning_rate": 1.9975640502598243e-05, + "loss": 0.3435184955596924, + "step": 537 + }, + { + "epoch": 0.14287611207011022, + "grad_norm": 1.135805935036872, + "learning_rate": 1.9975333256702864e-05, + "loss": 0.4677535593509674, + "step": 538 + }, + { + "epoch": 0.14314168105165317, + "grad_norm": 0.9857610455714647, + "learning_rate": 1.9975024087680873e-05, + "loss": 0.3860551118850708, + "step": 539 + }, + { + "epoch": 0.14340725003319613, + "grad_norm": 1.0260051612127887, + "learning_rate": 1.9974712995591887e-05, + "loss": 0.4067271649837494, + "step": 540 + }, + { + "epoch": 0.14367281901473908, + "grad_norm": 1.0673102525592195, + "learning_rate": 1.9974399980495877e-05, + "loss": 0.42236536741256714, + "step": 541 + }, + { + "epoch": 0.14393838799628203, + "grad_norm": 0.9825710114440017, + "learning_rate": 1.9974085042453188e-05, + "loss": 0.45230624079704285, + "step": 542 + }, + { + "epoch": 0.14420395697782498, + "grad_norm": 1.0223761508252163, + "learning_rate": 1.997376818152453e-05, + "loss": 0.428194522857666, + "step": 543 + }, + { + "epoch": 0.14446952595936793, + "grad_norm": 1.0337438279048081, + "learning_rate": 1.9973449397771004e-05, + "loss": 0.40774789452552795, + "step": 544 + }, + { + "epoch": 0.1447350949409109, + "grad_norm": 0.9168779980285519, + "learning_rate": 1.9973128691254054e-05, + "loss": 0.4086815118789673, + "step": 545 + }, + { + "epoch": 0.14500066392245387, + "grad_norm": 0.9934439062572693, + "learning_rate": 1.997280606203552e-05, + "loss": 0.4045162796974182, + "step": 546 + }, + { + "epoch": 0.14526623290399682, + "grad_norm": 1.0110955437735047, + "learning_rate": 1.9972481510177594e-05, + "loss": 0.40463268756866455, + "step": 547 + }, + { + "epoch": 0.14553180188553977, + "grad_norm": 1.0029896014566093, + "learning_rate": 1.9972155035742847e-05, + "loss": 0.46733587980270386, + "step": 548 + }, + { + "epoch": 0.14579737086708272, + "grad_norm": 0.9683751197048177, + "learning_rate": 1.997182663879422e-05, + "loss": 0.45210930705070496, + "step": 549 + }, + { + "epoch": 0.14606293984862567, + "grad_norm": 0.9559484778346481, + "learning_rate": 1.9971496319395022e-05, + "loss": 0.39798587560653687, + "step": 550 + }, + { + "epoch": 0.14632850883016862, + "grad_norm": 1.0582410708312875, + "learning_rate": 1.9971164077608937e-05, + "loss": 0.4166080057621002, + "step": 551 + }, + { + "epoch": 0.1465940778117116, + "grad_norm": 0.99705391441119, + "learning_rate": 1.9970829913500017e-05, + "loss": 0.3995435833930969, + "step": 552 + }, + { + "epoch": 0.14685964679325456, + "grad_norm": 0.9693599664680953, + "learning_rate": 1.9970493827132686e-05, + "loss": 0.39335039258003235, + "step": 553 + }, + { + "epoch": 0.1471252157747975, + "grad_norm": 1.0653128556742777, + "learning_rate": 1.9970155818571733e-05, + "loss": 0.3923008441925049, + "step": 554 + }, + { + "epoch": 0.14739078475634046, + "grad_norm": 1.1000528384874784, + "learning_rate": 1.996981588788233e-05, + "loss": 0.42148759961128235, + "step": 555 + }, + { + "epoch": 0.1476563537378834, + "grad_norm": 0.9532704289154984, + "learning_rate": 1.9969474035130005e-05, + "loss": 0.36099517345428467, + "step": 556 + }, + { + "epoch": 0.14792192271942636, + "grad_norm": 0.9498609858415961, + "learning_rate": 1.9969130260380663e-05, + "loss": 0.39650559425354004, + "step": 557 + }, + { + "epoch": 0.14818749170096931, + "grad_norm": 0.9667452630427784, + "learning_rate": 1.9968784563700586e-05, + "loss": 0.36410078406333923, + "step": 558 + }, + { + "epoch": 0.1484530606825123, + "grad_norm": 1.002419821858965, + "learning_rate": 1.996843694515641e-05, + "loss": 0.41312888264656067, + "step": 559 + }, + { + "epoch": 0.14871862966405525, + "grad_norm": 1.1088153047335336, + "learning_rate": 1.9968087404815162e-05, + "loss": 0.3895263373851776, + "step": 560 + }, + { + "epoch": 0.1489841986455982, + "grad_norm": 1.2422388501205763, + "learning_rate": 1.9967735942744226e-05, + "loss": 0.4400597810745239, + "step": 561 + }, + { + "epoch": 0.14924976762714115, + "grad_norm": 1.1300700300497077, + "learning_rate": 1.9967382559011356e-05, + "loss": 0.36712852120399475, + "step": 562 + }, + { + "epoch": 0.1495153366086841, + "grad_norm": 1.0425502358891738, + "learning_rate": 1.9967027253684685e-05, + "loss": 0.4043564200401306, + "step": 563 + }, + { + "epoch": 0.14978090559022705, + "grad_norm": 1.101160625764444, + "learning_rate": 1.9966670026832707e-05, + "loss": 0.45233044028282166, + "step": 564 + }, + { + "epoch": 0.15004647457177, + "grad_norm": 1.3277254520379258, + "learning_rate": 1.9966310878524297e-05, + "loss": 0.441600501537323, + "step": 565 + }, + { + "epoch": 0.15031204355331299, + "grad_norm": 1.0833095900878238, + "learning_rate": 1.9965949808828687e-05, + "loss": 0.4268038868904114, + "step": 566 + }, + { + "epoch": 0.15057761253485594, + "grad_norm": 1.1492448156590855, + "learning_rate": 1.9965586817815494e-05, + "loss": 0.41927874088287354, + "step": 567 + }, + { + "epoch": 0.1508431815163989, + "grad_norm": 1.026170307581087, + "learning_rate": 1.9965221905554695e-05, + "loss": 0.41488781571388245, + "step": 568 + }, + { + "epoch": 0.15110875049794184, + "grad_norm": 0.9559142330236491, + "learning_rate": 1.9964855072116642e-05, + "loss": 0.3624749779701233, + "step": 569 + }, + { + "epoch": 0.1513743194794848, + "grad_norm": 1.254830306735622, + "learning_rate": 1.996448631757206e-05, + "loss": 0.45119866728782654, + "step": 570 + }, + { + "epoch": 0.15163988846102774, + "grad_norm": 1.095837461898702, + "learning_rate": 1.996411564199203e-05, + "loss": 0.41389739513397217, + "step": 571 + }, + { + "epoch": 0.1519054574425707, + "grad_norm": 0.9684460814064966, + "learning_rate": 1.996374304544802e-05, + "loss": 0.3640916347503662, + "step": 572 + }, + { + "epoch": 0.15217102642411368, + "grad_norm": 1.0711015344753547, + "learning_rate": 1.9963368528011867e-05, + "loss": 0.45648565888404846, + "step": 573 + }, + { + "epoch": 0.15243659540565663, + "grad_norm": 0.9722794055909949, + "learning_rate": 1.9962992089755765e-05, + "loss": 0.4335980713367462, + "step": 574 + }, + { + "epoch": 0.15270216438719958, + "grad_norm": 1.158400874054287, + "learning_rate": 1.996261373075229e-05, + "loss": 0.3908158540725708, + "step": 575 + }, + { + "epoch": 0.15296773336874253, + "grad_norm": 0.9311953954584888, + "learning_rate": 1.996223345107439e-05, + "loss": 0.36533305048942566, + "step": 576 + }, + { + "epoch": 0.15323330235028548, + "grad_norm": 0.9771467412652409, + "learning_rate": 1.9961851250795372e-05, + "loss": 0.407212495803833, + "step": 577 + }, + { + "epoch": 0.15349887133182843, + "grad_norm": 0.9988499065644934, + "learning_rate": 1.996146712998892e-05, + "loss": 0.4266315698623657, + "step": 578 + }, + { + "epoch": 0.1537644403133714, + "grad_norm": 0.9843108485081927, + "learning_rate": 1.9961081088729092e-05, + "loss": 0.3806581199169159, + "step": 579 + }, + { + "epoch": 0.15403000929491437, + "grad_norm": 0.9497423806639163, + "learning_rate": 1.9960693127090312e-05, + "loss": 0.40962716937065125, + "step": 580 + }, + { + "epoch": 0.15429557827645732, + "grad_norm": 0.94680923059909, + "learning_rate": 1.996030324514737e-05, + "loss": 0.4195394515991211, + "step": 581 + }, + { + "epoch": 0.15456114725800027, + "grad_norm": 1.0211843119224446, + "learning_rate": 1.995991144297543e-05, + "loss": 0.4366803765296936, + "step": 582 + }, + { + "epoch": 0.15482671623954322, + "grad_norm": 1.1779341722116263, + "learning_rate": 1.995951772065004e-05, + "loss": 0.44951680302619934, + "step": 583 + }, + { + "epoch": 0.15509228522108617, + "grad_norm": 1.1165714790353467, + "learning_rate": 1.9959122078247088e-05, + "loss": 0.42920851707458496, + "step": 584 + }, + { + "epoch": 0.15535785420262913, + "grad_norm": 1.3260467831670406, + "learning_rate": 1.9958724515842856e-05, + "loss": 0.3805098533630371, + "step": 585 + }, + { + "epoch": 0.15562342318417208, + "grad_norm": 1.1544212798945541, + "learning_rate": 1.995832503351399e-05, + "loss": 0.439333438873291, + "step": 586 + }, + { + "epoch": 0.15588899216571506, + "grad_norm": 0.9414235863159184, + "learning_rate": 1.9957923631337505e-05, + "loss": 0.38338547945022583, + "step": 587 + }, + { + "epoch": 0.156154561147258, + "grad_norm": 0.9711288321476074, + "learning_rate": 1.9957520309390786e-05, + "loss": 0.40603697299957275, + "step": 588 + }, + { + "epoch": 0.15642013012880096, + "grad_norm": 0.9468286962292546, + "learning_rate": 1.9957115067751594e-05, + "loss": 0.42816999554634094, + "step": 589 + }, + { + "epoch": 0.1566856991103439, + "grad_norm": 0.979497417166178, + "learning_rate": 1.9956707906498046e-05, + "loss": 0.42367884516716003, + "step": 590 + }, + { + "epoch": 0.15695126809188686, + "grad_norm": 1.1158588594509518, + "learning_rate": 1.995629882570864e-05, + "loss": 0.4349297881126404, + "step": 591 + }, + { + "epoch": 0.15721683707342982, + "grad_norm": 0.9762108745852242, + "learning_rate": 1.995588782546225e-05, + "loss": 0.37990960478782654, + "step": 592 + }, + { + "epoch": 0.15748240605497277, + "grad_norm": 0.9495653219493333, + "learning_rate": 1.9955474905838102e-05, + "loss": 0.4085468649864197, + "step": 593 + }, + { + "epoch": 0.15774797503651575, + "grad_norm": 0.9419429879365407, + "learning_rate": 1.995506006691581e-05, + "loss": 0.41362464427948, + "step": 594 + }, + { + "epoch": 0.1580135440180587, + "grad_norm": 1.002559702640921, + "learning_rate": 1.9954643308775342e-05, + "loss": 0.3830018937587738, + "step": 595 + }, + { + "epoch": 0.15827911299960165, + "grad_norm": 1.1505182326275074, + "learning_rate": 1.995422463149705e-05, + "loss": 0.48350822925567627, + "step": 596 + }, + { + "epoch": 0.1585446819811446, + "grad_norm": 0.9889824166630486, + "learning_rate": 1.995380403516165e-05, + "loss": 0.4215185344219208, + "step": 597 + }, + { + "epoch": 0.15881025096268755, + "grad_norm": 1.06826056700577, + "learning_rate": 1.9953381519850224e-05, + "loss": 0.42061948776245117, + "step": 598 + }, + { + "epoch": 0.1590758199442305, + "grad_norm": 1.032451381790901, + "learning_rate": 1.995295708564423e-05, + "loss": 0.38956254720687866, + "step": 599 + }, + { + "epoch": 0.15934138892577346, + "grad_norm": 1.0492553607775368, + "learning_rate": 1.9952530732625492e-05, + "loss": 0.3864685893058777, + "step": 600 + }, + { + "epoch": 0.15960695790731644, + "grad_norm": 0.9770856461072062, + "learning_rate": 1.9952102460876214e-05, + "loss": 0.395724356174469, + "step": 601 + }, + { + "epoch": 0.1598725268888594, + "grad_norm": 1.04245602393598, + "learning_rate": 1.995167227047895e-05, + "loss": 0.4220300316810608, + "step": 602 + }, + { + "epoch": 0.16013809587040234, + "grad_norm": 1.1406615370546667, + "learning_rate": 1.9951240161516643e-05, + "loss": 0.4129142165184021, + "step": 603 + }, + { + "epoch": 0.1604036648519453, + "grad_norm": 0.983753356740355, + "learning_rate": 1.9950806134072595e-05, + "loss": 0.3951375484466553, + "step": 604 + }, + { + "epoch": 0.16066923383348825, + "grad_norm": 1.0214548083454909, + "learning_rate": 1.9950370188230486e-05, + "loss": 0.4117582142353058, + "step": 605 + }, + { + "epoch": 0.1609348028150312, + "grad_norm": 1.0340746201961049, + "learning_rate": 1.994993232407436e-05, + "loss": 0.3920668363571167, + "step": 606 + }, + { + "epoch": 0.16120037179657415, + "grad_norm": 0.9768399206450091, + "learning_rate": 1.9949492541688626e-05, + "loss": 0.3756999373435974, + "step": 607 + }, + { + "epoch": 0.16146594077811713, + "grad_norm": 1.0034054922110034, + "learning_rate": 1.9949050841158078e-05, + "loss": 0.41009610891342163, + "step": 608 + }, + { + "epoch": 0.16173150975966008, + "grad_norm": 0.9847346075479474, + "learning_rate": 1.994860722256786e-05, + "loss": 0.3986571729183197, + "step": 609 + }, + { + "epoch": 0.16199707874120303, + "grad_norm": 0.9978440495541314, + "learning_rate": 1.994816168600351e-05, + "loss": 0.3903341591358185, + "step": 610 + }, + { + "epoch": 0.16226264772274598, + "grad_norm": 0.9992231775305654, + "learning_rate": 1.994771423155091e-05, + "loss": 0.39725261926651, + "step": 611 + }, + { + "epoch": 0.16252821670428894, + "grad_norm": 0.9446936558476315, + "learning_rate": 1.994726485929633e-05, + "loss": 0.39461129903793335, + "step": 612 + }, + { + "epoch": 0.1627937856858319, + "grad_norm": 1.0162077284831286, + "learning_rate": 1.99468135693264e-05, + "loss": 0.41346144676208496, + "step": 613 + }, + { + "epoch": 0.16305935466737484, + "grad_norm": 1.0305116850266922, + "learning_rate": 1.9946360361728127e-05, + "loss": 0.41148197650909424, + "step": 614 + }, + { + "epoch": 0.16332492364891782, + "grad_norm": 0.9678436330540818, + "learning_rate": 1.9945905236588884e-05, + "loss": 0.38204139471054077, + "step": 615 + }, + { + "epoch": 0.16359049263046077, + "grad_norm": 0.9830320911733957, + "learning_rate": 1.9945448193996412e-05, + "loss": 0.41496896743774414, + "step": 616 + }, + { + "epoch": 0.16385606161200372, + "grad_norm": 0.9327494941136337, + "learning_rate": 1.994498923403882e-05, + "loss": 0.38998982310295105, + "step": 617 + }, + { + "epoch": 0.16412163059354667, + "grad_norm": 1.0310759290486786, + "learning_rate": 1.99445283568046e-05, + "loss": 0.39018991589546204, + "step": 618 + }, + { + "epoch": 0.16438719957508963, + "grad_norm": 1.1133251353738367, + "learning_rate": 1.9944065562382594e-05, + "loss": 0.41579991579055786, + "step": 619 + }, + { + "epoch": 0.16465276855663258, + "grad_norm": 1.1413714641323347, + "learning_rate": 1.9943600850862027e-05, + "loss": 0.426283061504364, + "step": 620 + }, + { + "epoch": 0.16491833753817553, + "grad_norm": 1.0537239280428552, + "learning_rate": 1.9943134222332493e-05, + "loss": 0.418672651052475, + "step": 621 + }, + { + "epoch": 0.1651839065197185, + "grad_norm": 1.0177048245128393, + "learning_rate": 1.9942665676883946e-05, + "loss": 0.4014776349067688, + "step": 622 + }, + { + "epoch": 0.16544947550126146, + "grad_norm": 0.9703989792649265, + "learning_rate": 1.994219521460672e-05, + "loss": 0.3714776933193207, + "step": 623 + }, + { + "epoch": 0.1657150444828044, + "grad_norm": 1.005321267739283, + "learning_rate": 1.9941722835591514e-05, + "loss": 0.39415785670280457, + "step": 624 + }, + { + "epoch": 0.16598061346434737, + "grad_norm": 1.739817458909074, + "learning_rate": 1.9941248539929395e-05, + "loss": 0.3706223964691162, + "step": 625 + }, + { + "epoch": 0.16624618244589032, + "grad_norm": 0.9887487099192142, + "learning_rate": 1.9940772327711807e-05, + "loss": 0.4167429208755493, + "step": 626 + }, + { + "epoch": 0.16651175142743327, + "grad_norm": 1.0502993213264278, + "learning_rate": 1.9940294199030553e-05, + "loss": 0.38234227895736694, + "step": 627 + }, + { + "epoch": 0.16677732040897622, + "grad_norm": 0.9929957655695576, + "learning_rate": 1.9939814153977813e-05, + "loss": 0.4139519929885864, + "step": 628 + }, + { + "epoch": 0.1670428893905192, + "grad_norm": 1.0428716869119874, + "learning_rate": 1.9939332192646136e-05, + "loss": 0.44490402936935425, + "step": 629 + }, + { + "epoch": 0.16730845837206215, + "grad_norm": 0.9723220719956404, + "learning_rate": 1.993884831512843e-05, + "loss": 0.3870658278465271, + "step": 630 + }, + { + "epoch": 0.1675740273536051, + "grad_norm": 0.9337218443909966, + "learning_rate": 1.993836252151799e-05, + "loss": 0.3308948278427124, + "step": 631 + }, + { + "epoch": 0.16783959633514806, + "grad_norm": 1.1119638169858157, + "learning_rate": 1.993787481190847e-05, + "loss": 0.3727487623691559, + "step": 632 + }, + { + "epoch": 0.168105165316691, + "grad_norm": 1.0025380900585623, + "learning_rate": 1.9937385186393888e-05, + "loss": 0.4277465343475342, + "step": 633 + }, + { + "epoch": 0.16837073429823396, + "grad_norm": 1.2120120873899203, + "learning_rate": 1.9936893645068647e-05, + "loss": 0.4276485741138458, + "step": 634 + }, + { + "epoch": 0.1686363032797769, + "grad_norm": 1.000070161461063, + "learning_rate": 1.9936400188027502e-05, + "loss": 0.374578058719635, + "step": 635 + }, + { + "epoch": 0.1689018722613199, + "grad_norm": 1.113556890943216, + "learning_rate": 1.993590481536559e-05, + "loss": 0.4583400785923004, + "step": 636 + }, + { + "epoch": 0.16916744124286284, + "grad_norm": 0.9731147624235688, + "learning_rate": 1.9935407527178417e-05, + "loss": 0.3734489679336548, + "step": 637 + }, + { + "epoch": 0.1694330102244058, + "grad_norm": 1.0110441212525507, + "learning_rate": 1.9934908323561846e-05, + "loss": 0.39524513483047485, + "step": 638 + }, + { + "epoch": 0.16969857920594875, + "grad_norm": 1.0264447655460065, + "learning_rate": 1.9934407204612124e-05, + "loss": 0.42300352454185486, + "step": 639 + }, + { + "epoch": 0.1699641481874917, + "grad_norm": 0.9950374891978715, + "learning_rate": 1.9933904170425858e-05, + "loss": 0.4152276813983917, + "step": 640 + }, + { + "epoch": 0.17022971716903465, + "grad_norm": 1.230783330329369, + "learning_rate": 1.9933399221100026e-05, + "loss": 0.43046653270721436, + "step": 641 + }, + { + "epoch": 0.1704952861505776, + "grad_norm": 1.0095783418631343, + "learning_rate": 1.993289235673198e-05, + "loss": 0.4134339392185211, + "step": 642 + }, + { + "epoch": 0.17076085513212058, + "grad_norm": 1.0051407398693462, + "learning_rate": 1.9932383577419432e-05, + "loss": 0.44028693437576294, + "step": 643 + }, + { + "epoch": 0.17102642411366353, + "grad_norm": 1.0208746920457954, + "learning_rate": 1.9931872883260473e-05, + "loss": 0.3790222704410553, + "step": 644 + }, + { + "epoch": 0.17129199309520649, + "grad_norm": 1.041462978505965, + "learning_rate": 1.9931360274353556e-05, + "loss": 0.3683086633682251, + "step": 645 + }, + { + "epoch": 0.17155756207674944, + "grad_norm": 1.0400069352454702, + "learning_rate": 1.993084575079751e-05, + "loss": 0.3630594313144684, + "step": 646 + }, + { + "epoch": 0.1718231310582924, + "grad_norm": 1.0694046561659416, + "learning_rate": 1.993032931269153e-05, + "loss": 0.4398641884326935, + "step": 647 + }, + { + "epoch": 0.17208870003983534, + "grad_norm": 1.107156801944608, + "learning_rate": 1.992981096013517e-05, + "loss": 0.42222845554351807, + "step": 648 + }, + { + "epoch": 0.1723542690213783, + "grad_norm": 1.043160064840446, + "learning_rate": 1.992929069322837e-05, + "loss": 0.38966643810272217, + "step": 649 + }, + { + "epoch": 0.17261983800292127, + "grad_norm": 1.0607803195691352, + "learning_rate": 1.992876851207143e-05, + "loss": 0.4394804835319519, + "step": 650 + }, + { + "epoch": 0.17288540698446422, + "grad_norm": 0.9714467718451273, + "learning_rate": 1.9928244416765022e-05, + "loss": 0.3475287854671478, + "step": 651 + }, + { + "epoch": 0.17315097596600718, + "grad_norm": 0.9848879046616053, + "learning_rate": 1.992771840741018e-05, + "loss": 0.40047168731689453, + "step": 652 + }, + { + "epoch": 0.17341654494755013, + "grad_norm": 1.0744593937096147, + "learning_rate": 1.9927190484108315e-05, + "loss": 0.4028981328010559, + "step": 653 + }, + { + "epoch": 0.17368211392909308, + "grad_norm": 1.010491020672817, + "learning_rate": 1.9926660646961208e-05, + "loss": 0.3891482949256897, + "step": 654 + }, + { + "epoch": 0.17394768291063603, + "grad_norm": 1.1163232689680433, + "learning_rate": 1.9926128896071e-05, + "loss": 0.4570680856704712, + "step": 655 + }, + { + "epoch": 0.17421325189217898, + "grad_norm": 0.9509061944047602, + "learning_rate": 1.992559523154021e-05, + "loss": 0.392758309841156, + "step": 656 + }, + { + "epoch": 0.17447882087372196, + "grad_norm": 0.9648168194829144, + "learning_rate": 1.992505965347172e-05, + "loss": 0.39552047848701477, + "step": 657 + }, + { + "epoch": 0.17474438985526491, + "grad_norm": 1.045434666464082, + "learning_rate": 1.992452216196879e-05, + "loss": 0.4412619173526764, + "step": 658 + }, + { + "epoch": 0.17500995883680787, + "grad_norm": 1.033655605856329, + "learning_rate": 1.9923982757135028e-05, + "loss": 0.4075942635536194, + "step": 659 + }, + { + "epoch": 0.17527552781835082, + "grad_norm": 1.0660210414475448, + "learning_rate": 1.9923441439074434e-05, + "loss": 0.44615018367767334, + "step": 660 + }, + { + "epoch": 0.17554109679989377, + "grad_norm": 0.9504988883268379, + "learning_rate": 1.992289820789137e-05, + "loss": 0.3957441449165344, + "step": 661 + }, + { + "epoch": 0.17580666578143672, + "grad_norm": 0.9513339400965243, + "learning_rate": 1.992235306369056e-05, + "loss": 0.4014820158481598, + "step": 662 + }, + { + "epoch": 0.17607223476297967, + "grad_norm": 0.9988043316582222, + "learning_rate": 1.9921806006577102e-05, + "loss": 0.39478158950805664, + "step": 663 + }, + { + "epoch": 0.17633780374452265, + "grad_norm": 1.0278124558587338, + "learning_rate": 1.9921257036656463e-05, + "loss": 0.45742082595825195, + "step": 664 + }, + { + "epoch": 0.1766033727260656, + "grad_norm": 0.9674516471555401, + "learning_rate": 1.9920706154034477e-05, + "loss": 0.36519041657447815, + "step": 665 + }, + { + "epoch": 0.17686894170760856, + "grad_norm": 1.0086354363577679, + "learning_rate": 1.992015335881735e-05, + "loss": 0.40599358081817627, + "step": 666 + }, + { + "epoch": 0.1771345106891515, + "grad_norm": 0.958585892866014, + "learning_rate": 1.991959865111165e-05, + "loss": 0.4064781367778778, + "step": 667 + }, + { + "epoch": 0.17740007967069446, + "grad_norm": 0.9430583774727941, + "learning_rate": 1.991904203102432e-05, + "loss": 0.4076484143733978, + "step": 668 + }, + { + "epoch": 0.1776656486522374, + "grad_norm": 1.1044553051326549, + "learning_rate": 1.9918483498662678e-05, + "loss": 0.42157143354415894, + "step": 669 + }, + { + "epoch": 0.17793121763378036, + "grad_norm": 1.005923050768092, + "learning_rate": 1.9917923054134388e-05, + "loss": 0.3814900517463684, + "step": 670 + }, + { + "epoch": 0.17819678661532334, + "grad_norm": 1.0156953904207233, + "learning_rate": 1.9917360697547506e-05, + "loss": 0.4211175739765167, + "step": 671 + }, + { + "epoch": 0.1784623555968663, + "grad_norm": 1.0530805044024834, + "learning_rate": 1.991679642901045e-05, + "loss": 0.3975893259048462, + "step": 672 + }, + { + "epoch": 0.17872792457840925, + "grad_norm": 0.9633270935214763, + "learning_rate": 1.9916230248631993e-05, + "loss": 0.36090826988220215, + "step": 673 + }, + { + "epoch": 0.1789934935599522, + "grad_norm": 0.9408638333666679, + "learning_rate": 1.99156621565213e-05, + "loss": 0.36511334776878357, + "step": 674 + }, + { + "epoch": 0.17925906254149515, + "grad_norm": 1.0839117569759185, + "learning_rate": 1.9915092152787888e-05, + "loss": 0.4131924510002136, + "step": 675 + }, + { + "epoch": 0.1795246315230381, + "grad_norm": 1.1407281463751517, + "learning_rate": 1.9914520237541644e-05, + "loss": 0.4283728301525116, + "step": 676 + }, + { + "epoch": 0.17979020050458105, + "grad_norm": 0.9751873028047018, + "learning_rate": 1.991394641089283e-05, + "loss": 0.3855544924736023, + "step": 677 + }, + { + "epoch": 0.18005576948612403, + "grad_norm": 1.3517309919327671, + "learning_rate": 1.9913370672952074e-05, + "loss": 0.41288501024246216, + "step": 678 + }, + { + "epoch": 0.180321338467667, + "grad_norm": 1.1127679640996702, + "learning_rate": 1.9912793023830365e-05, + "loss": 0.3824073076248169, + "step": 679 + }, + { + "epoch": 0.18058690744920994, + "grad_norm": 1.0055812841256684, + "learning_rate": 1.9912213463639077e-05, + "loss": 0.39005106687545776, + "step": 680 + }, + { + "epoch": 0.1808524764307529, + "grad_norm": 1.0115332151563563, + "learning_rate": 1.9911631992489933e-05, + "loss": 0.3521374464035034, + "step": 681 + }, + { + "epoch": 0.18111804541229584, + "grad_norm": 0.983790464571211, + "learning_rate": 1.9911048610495037e-05, + "loss": 0.337347149848938, + "step": 682 + }, + { + "epoch": 0.1813836143938388, + "grad_norm": 1.1534370397304132, + "learning_rate": 1.9910463317766864e-05, + "loss": 0.4349983334541321, + "step": 683 + }, + { + "epoch": 0.18164918337538175, + "grad_norm": 1.059114838428009, + "learning_rate": 1.9909876114418242e-05, + "loss": 0.3783540427684784, + "step": 684 + }, + { + "epoch": 0.18191475235692472, + "grad_norm": 1.0050293498117582, + "learning_rate": 1.9909287000562383e-05, + "loss": 0.4065130054950714, + "step": 685 + }, + { + "epoch": 0.18218032133846768, + "grad_norm": 1.0122618604087057, + "learning_rate": 1.990869597631286e-05, + "loss": 0.3876315653324127, + "step": 686 + }, + { + "epoch": 0.18244589032001063, + "grad_norm": 0.9622962910168786, + "learning_rate": 1.9908103041783615e-05, + "loss": 0.3716024160385132, + "step": 687 + }, + { + "epoch": 0.18271145930155358, + "grad_norm": 1.086778230300176, + "learning_rate": 1.990750819708896e-05, + "loss": 0.4096733331680298, + "step": 688 + }, + { + "epoch": 0.18297702828309653, + "grad_norm": 1.131269280292305, + "learning_rate": 1.9906911442343567e-05, + "loss": 0.41432395577430725, + "step": 689 + }, + { + "epoch": 0.18324259726463948, + "grad_norm": 1.1182736792418642, + "learning_rate": 1.9906312777662493e-05, + "loss": 0.3934200406074524, + "step": 690 + }, + { + "epoch": 0.18350816624618244, + "grad_norm": 1.0493015785833109, + "learning_rate": 1.9905712203161148e-05, + "loss": 0.4246784746646881, + "step": 691 + }, + { + "epoch": 0.18377373522772542, + "grad_norm": 1.1362836227785695, + "learning_rate": 1.9905109718955323e-05, + "loss": 0.40027567744255066, + "step": 692 + }, + { + "epoch": 0.18403930420926837, + "grad_norm": 1.056262242708622, + "learning_rate": 1.990450532516116e-05, + "loss": 0.4162583351135254, + "step": 693 + }, + { + "epoch": 0.18430487319081132, + "grad_norm": 1.05760814074371, + "learning_rate": 1.990389902189518e-05, + "loss": 0.4133074879646301, + "step": 694 + }, + { + "epoch": 0.18457044217235427, + "grad_norm": 1.0438921885629904, + "learning_rate": 1.9903290809274277e-05, + "loss": 0.333192378282547, + "step": 695 + }, + { + "epoch": 0.18483601115389722, + "grad_norm": 0.9814281867123515, + "learning_rate": 1.9902680687415704e-05, + "loss": 0.39349496364593506, + "step": 696 + }, + { + "epoch": 0.18510158013544017, + "grad_norm": 1.0366332083029342, + "learning_rate": 1.9902068656437086e-05, + "loss": 0.39678412675857544, + "step": 697 + }, + { + "epoch": 0.18536714911698313, + "grad_norm": 1.0003960978434148, + "learning_rate": 1.9901454716456415e-05, + "loss": 0.3553932011127472, + "step": 698 + }, + { + "epoch": 0.18563271809852608, + "grad_norm": 1.0876315802223169, + "learning_rate": 1.990083886759205e-05, + "loss": 0.4264630079269409, + "step": 699 + }, + { + "epoch": 0.18589828708006906, + "grad_norm": 1.0135520655053032, + "learning_rate": 1.9900221109962726e-05, + "loss": 0.3883950412273407, + "step": 700 + }, + { + "epoch": 0.186163856061612, + "grad_norm": 1.0408639715408188, + "learning_rate": 1.989960144368753e-05, + "loss": 0.38465407490730286, + "step": 701 + }, + { + "epoch": 0.18642942504315496, + "grad_norm": 2.2198594223984065, + "learning_rate": 1.9898979868885933e-05, + "loss": 0.39897871017456055, + "step": 702 + }, + { + "epoch": 0.1866949940246979, + "grad_norm": 1.120873004114704, + "learning_rate": 1.9898356385677762e-05, + "loss": 0.4386023283004761, + "step": 703 + }, + { + "epoch": 0.18696056300624087, + "grad_norm": 1.0254606123190075, + "learning_rate": 1.989773099418322e-05, + "loss": 0.42621874809265137, + "step": 704 + }, + { + "epoch": 0.18722613198778382, + "grad_norm": 1.0153284696458207, + "learning_rate": 1.9897103694522877e-05, + "loss": 0.3811546266078949, + "step": 705 + }, + { + "epoch": 0.18749170096932677, + "grad_norm": 1.0634877610237485, + "learning_rate": 1.989647448681767e-05, + "loss": 0.4018982946872711, + "step": 706 + }, + { + "epoch": 0.18775726995086975, + "grad_norm": 1.0316038713106725, + "learning_rate": 1.9895843371188897e-05, + "loss": 0.3920126259326935, + "step": 707 + }, + { + "epoch": 0.1880228389324127, + "grad_norm": 0.9767495366810068, + "learning_rate": 1.9895210347758233e-05, + "loss": 0.3598487973213196, + "step": 708 + }, + { + "epoch": 0.18828840791395565, + "grad_norm": 1.0286682270198635, + "learning_rate": 1.9894575416647717e-05, + "loss": 0.4204316735267639, + "step": 709 + }, + { + "epoch": 0.1885539768954986, + "grad_norm": 0.9653709480495668, + "learning_rate": 1.9893938577979755e-05, + "loss": 0.33814263343811035, + "step": 710 + }, + { + "epoch": 0.18881954587704156, + "grad_norm": 0.9588770367914977, + "learning_rate": 1.9893299831877124e-05, + "loss": 0.3788227140903473, + "step": 711 + }, + { + "epoch": 0.1890851148585845, + "grad_norm": 0.9974371582936609, + "learning_rate": 1.989265917846297e-05, + "loss": 0.38141176104545593, + "step": 712 + }, + { + "epoch": 0.18935068384012746, + "grad_norm": 1.0051109402301954, + "learning_rate": 1.9892016617860793e-05, + "loss": 0.3757280707359314, + "step": 713 + }, + { + "epoch": 0.18961625282167044, + "grad_norm": 0.9863956856856875, + "learning_rate": 1.989137215019448e-05, + "loss": 0.37819087505340576, + "step": 714 + }, + { + "epoch": 0.1898818218032134, + "grad_norm": 1.1797000402703188, + "learning_rate": 1.9890725775588277e-05, + "loss": 0.46046000719070435, + "step": 715 + }, + { + "epoch": 0.19014739078475634, + "grad_norm": 0.9967163493181064, + "learning_rate": 1.9890077494166792e-05, + "loss": 0.33967363834381104, + "step": 716 + }, + { + "epoch": 0.1904129597662993, + "grad_norm": 0.9620841339155507, + "learning_rate": 1.988942730605501e-05, + "loss": 0.36672675609588623, + "step": 717 + }, + { + "epoch": 0.19067852874784225, + "grad_norm": 1.0666183498740949, + "learning_rate": 1.9888775211378278e-05, + "loss": 0.38705015182495117, + "step": 718 + }, + { + "epoch": 0.1909440977293852, + "grad_norm": 1.0696051052523068, + "learning_rate": 1.9888121210262313e-05, + "loss": 0.35257095098495483, + "step": 719 + }, + { + "epoch": 0.19120966671092815, + "grad_norm": 1.0337108803934987, + "learning_rate": 1.9887465302833194e-05, + "loss": 0.3803965449333191, + "step": 720 + }, + { + "epoch": 0.19147523569247113, + "grad_norm": 1.0097965015220993, + "learning_rate": 1.988680748921738e-05, + "loss": 0.38166487216949463, + "step": 721 + }, + { + "epoch": 0.19174080467401408, + "grad_norm": 0.971159209120872, + "learning_rate": 1.988614776954169e-05, + "loss": 0.4017483592033386, + "step": 722 + }, + { + "epoch": 0.19200637365555703, + "grad_norm": 1.0651840937747212, + "learning_rate": 1.98854861439333e-05, + "loss": 0.4343035817146301, + "step": 723 + }, + { + "epoch": 0.19227194263709999, + "grad_norm": 1.0527178531986199, + "learning_rate": 1.9884822612519773e-05, + "loss": 0.4017031192779541, + "step": 724 + }, + { + "epoch": 0.19253751161864294, + "grad_norm": 0.9558335625340557, + "learning_rate": 1.988415717542903e-05, + "loss": 0.32294636964797974, + "step": 725 + }, + { + "epoch": 0.1928030806001859, + "grad_norm": 1.018550638071552, + "learning_rate": 1.988348983278935e-05, + "loss": 0.34661561250686646, + "step": 726 + }, + { + "epoch": 0.19306864958172884, + "grad_norm": 1.1264464061553692, + "learning_rate": 1.98828205847294e-05, + "loss": 0.3588724434375763, + "step": 727 + }, + { + "epoch": 0.19333421856327182, + "grad_norm": 1.151476031768393, + "learning_rate": 1.9882149431378194e-05, + "loss": 0.45439180731773376, + "step": 728 + }, + { + "epoch": 0.19359978754481477, + "grad_norm": 1.092854672146059, + "learning_rate": 1.988147637286513e-05, + "loss": 0.3916742205619812, + "step": 729 + }, + { + "epoch": 0.19386535652635772, + "grad_norm": 1.1073017625666908, + "learning_rate": 1.988080140931996e-05, + "loss": 0.3838115334510803, + "step": 730 + }, + { + "epoch": 0.19413092550790068, + "grad_norm": 1.0305888563782257, + "learning_rate": 1.9880124540872813e-05, + "loss": 0.3803096413612366, + "step": 731 + }, + { + "epoch": 0.19439649448944363, + "grad_norm": 1.0697488639709387, + "learning_rate": 1.987944576765418e-05, + "loss": 0.4180675446987152, + "step": 732 + }, + { + "epoch": 0.19466206347098658, + "grad_norm": 0.968492149308095, + "learning_rate": 1.987876508979492e-05, + "loss": 0.34485924243927, + "step": 733 + }, + { + "epoch": 0.19492763245252953, + "grad_norm": 1.0301319893667387, + "learning_rate": 1.987808250742626e-05, + "loss": 0.3696223795413971, + "step": 734 + }, + { + "epoch": 0.1951932014340725, + "grad_norm": 1.0070871597151176, + "learning_rate": 1.9877398020679796e-05, + "loss": 0.39920324087142944, + "step": 735 + }, + { + "epoch": 0.19545877041561546, + "grad_norm": 0.9772548764362861, + "learning_rate": 1.987671162968748e-05, + "loss": 0.33534419536590576, + "step": 736 + }, + { + "epoch": 0.19572433939715841, + "grad_norm": 0.955184588375953, + "learning_rate": 1.9876023334581657e-05, + "loss": 0.3698185682296753, + "step": 737 + }, + { + "epoch": 0.19598990837870137, + "grad_norm": 1.0108475553340988, + "learning_rate": 1.9875333135495e-05, + "loss": 0.37388375401496887, + "step": 738 + }, + { + "epoch": 0.19625547736024432, + "grad_norm": 0.9685434293396273, + "learning_rate": 1.9874641032560594e-05, + "loss": 0.3285469114780426, + "step": 739 + }, + { + "epoch": 0.19652104634178727, + "grad_norm": 1.01794140535256, + "learning_rate": 1.9873947025911854e-05, + "loss": 0.3539549708366394, + "step": 740 + }, + { + "epoch": 0.19678661532333022, + "grad_norm": 1.0943847325994938, + "learning_rate": 1.9873251115682577e-05, + "loss": 0.4707021117210388, + "step": 741 + }, + { + "epoch": 0.1970521843048732, + "grad_norm": 0.9783865509799976, + "learning_rate": 1.987255330200693e-05, + "loss": 0.3871781826019287, + "step": 742 + }, + { + "epoch": 0.19731775328641615, + "grad_norm": 1.0462206197157178, + "learning_rate": 1.9871853585019446e-05, + "loss": 0.3890243172645569, + "step": 743 + }, + { + "epoch": 0.1975833222679591, + "grad_norm": 0.9914096392216383, + "learning_rate": 1.9871151964855013e-05, + "loss": 0.34914374351501465, + "step": 744 + }, + { + "epoch": 0.19784889124950206, + "grad_norm": 1.0157439665946277, + "learning_rate": 1.9870448441648905e-05, + "loss": 0.41009777784347534, + "step": 745 + }, + { + "epoch": 0.198114460231045, + "grad_norm": 1.0725931773033663, + "learning_rate": 1.9869743015536747e-05, + "loss": 0.39449363946914673, + "step": 746 + }, + { + "epoch": 0.19838002921258796, + "grad_norm": 1.081644116196219, + "learning_rate": 1.9869035686654538e-05, + "loss": 0.3530065417289734, + "step": 747 + }, + { + "epoch": 0.1986455981941309, + "grad_norm": 1.1338420898560146, + "learning_rate": 1.986832645513864e-05, + "loss": 0.4255196154117584, + "step": 748 + }, + { + "epoch": 0.1989111671756739, + "grad_norm": 1.0625457917520444, + "learning_rate": 1.9867615321125796e-05, + "loss": 0.3921143114566803, + "step": 749 + }, + { + "epoch": 0.19917673615721684, + "grad_norm": 1.1076371778966394, + "learning_rate": 1.986690228475309e-05, + "loss": 0.4157381057739258, + "step": 750 + }, + { + "epoch": 0.1994423051387598, + "grad_norm": 0.9887260401437288, + "learning_rate": 1.986618734615799e-05, + "loss": 0.3922047019004822, + "step": 751 + }, + { + "epoch": 0.19970787412030275, + "grad_norm": 1.2477225666156357, + "learning_rate": 1.9865470505478335e-05, + "loss": 0.4378710985183716, + "step": 752 + }, + { + "epoch": 0.1999734431018457, + "grad_norm": 0.9960415180367619, + "learning_rate": 1.986475176285232e-05, + "loss": 0.3636753261089325, + "step": 753 + }, + { + "epoch": 0.20023901208338865, + "grad_norm": 1.0691751577172293, + "learning_rate": 1.986403111841851e-05, + "loss": 0.3509834408760071, + "step": 754 + }, + { + "epoch": 0.2005045810649316, + "grad_norm": 0.9490438891131449, + "learning_rate": 1.986330857231583e-05, + "loss": 0.3539624512195587, + "step": 755 + }, + { + "epoch": 0.20077015004647458, + "grad_norm": 1.002849163142055, + "learning_rate": 1.9862584124683587e-05, + "loss": 0.417904257774353, + "step": 756 + }, + { + "epoch": 0.20103571902801753, + "grad_norm": 0.9438738740406134, + "learning_rate": 1.9861857775661442e-05, + "loss": 0.3602277636528015, + "step": 757 + }, + { + "epoch": 0.2013012880095605, + "grad_norm": 1.0703002408877305, + "learning_rate": 1.986112952538943e-05, + "loss": 0.41064661741256714, + "step": 758 + }, + { + "epoch": 0.20156685699110344, + "grad_norm": 0.9789269746167363, + "learning_rate": 1.9860399374007944e-05, + "loss": 0.36313754320144653, + "step": 759 + }, + { + "epoch": 0.2018324259726464, + "grad_norm": 1.0711706181502203, + "learning_rate": 1.9859667321657755e-05, + "loss": 0.39497628808021545, + "step": 760 + }, + { + "epoch": 0.20209799495418934, + "grad_norm": 1.0173001682725575, + "learning_rate": 1.9858933368479987e-05, + "loss": 0.405613511800766, + "step": 761 + }, + { + "epoch": 0.2023635639357323, + "grad_norm": 0.9881458101524105, + "learning_rate": 1.9858197514616142e-05, + "loss": 0.39093440771102905, + "step": 762 + }, + { + "epoch": 0.20262913291727527, + "grad_norm": 1.0330584509521943, + "learning_rate": 1.9857459760208084e-05, + "loss": 0.39908382296562195, + "step": 763 + }, + { + "epoch": 0.20289470189881822, + "grad_norm": 0.9416263868211369, + "learning_rate": 1.9856720105398038e-05, + "loss": 0.36787620186805725, + "step": 764 + }, + { + "epoch": 0.20316027088036118, + "grad_norm": 1.0128388377672763, + "learning_rate": 1.985597855032861e-05, + "loss": 0.390550822019577, + "step": 765 + }, + { + "epoch": 0.20342583986190413, + "grad_norm": 1.115759431869763, + "learning_rate": 1.9855235095142754e-05, + "loss": 0.4191611409187317, + "step": 766 + }, + { + "epoch": 0.20369140884344708, + "grad_norm": 1.1288935622655036, + "learning_rate": 1.985448973998381e-05, + "loss": 0.4060766100883484, + "step": 767 + }, + { + "epoch": 0.20395697782499003, + "grad_norm": 1.055264696895727, + "learning_rate": 1.985374248499546e-05, + "loss": 0.3906163275241852, + "step": 768 + }, + { + "epoch": 0.20422254680653298, + "grad_norm": 1.0101644212894914, + "learning_rate": 1.9852993330321774e-05, + "loss": 0.3926839828491211, + "step": 769 + }, + { + "epoch": 0.20448811578807596, + "grad_norm": 1.0474151984911524, + "learning_rate": 1.9852242276107182e-05, + "loss": 0.37276068329811096, + "step": 770 + }, + { + "epoch": 0.20475368476961892, + "grad_norm": 0.9531396793135881, + "learning_rate": 1.9851489322496476e-05, + "loss": 0.3765360414981842, + "step": 771 + }, + { + "epoch": 0.20501925375116187, + "grad_norm": 1.0017274873228423, + "learning_rate": 1.9850734469634815e-05, + "loss": 0.35091257095336914, + "step": 772 + }, + { + "epoch": 0.20528482273270482, + "grad_norm": 1.1164065944268338, + "learning_rate": 1.9849977717667725e-05, + "loss": 0.4259791076183319, + "step": 773 + }, + { + "epoch": 0.20555039171424777, + "grad_norm": 0.9939508272565134, + "learning_rate": 1.9849219066741102e-05, + "loss": 0.3563114404678345, + "step": 774 + }, + { + "epoch": 0.20581596069579072, + "grad_norm": 1.0814350606971046, + "learning_rate": 1.9848458517001203e-05, + "loss": 0.4148223102092743, + "step": 775 + }, + { + "epoch": 0.20608152967733367, + "grad_norm": 1.0296405515766518, + "learning_rate": 1.9847696068594655e-05, + "loss": 0.3817785382270813, + "step": 776 + }, + { + "epoch": 0.20634709865887665, + "grad_norm": 1.115875170640065, + "learning_rate": 1.984693172166845e-05, + "loss": 0.41741886734962463, + "step": 777 + }, + { + "epoch": 0.2066126676404196, + "grad_norm": 1.0479957521256793, + "learning_rate": 1.9846165476369938e-05, + "loss": 0.34800025820732117, + "step": 778 + }, + { + "epoch": 0.20687823662196256, + "grad_norm": 1.0122784392492805, + "learning_rate": 1.9845397332846848e-05, + "loss": 0.38093405961990356, + "step": 779 + }, + { + "epoch": 0.2071438056035055, + "grad_norm": 1.0953515150858002, + "learning_rate": 1.9844627291247268e-05, + "loss": 0.40733009576797485, + "step": 780 + }, + { + "epoch": 0.20740937458504846, + "grad_norm": 1.1011295166986532, + "learning_rate": 1.9843855351719655e-05, + "loss": 0.3829066753387451, + "step": 781 + }, + { + "epoch": 0.2076749435665914, + "grad_norm": 1.0316161170996605, + "learning_rate": 1.9843081514412827e-05, + "loss": 0.3574868440628052, + "step": 782 + }, + { + "epoch": 0.20794051254813437, + "grad_norm": 1.071531696766489, + "learning_rate": 1.984230577947597e-05, + "loss": 0.3675144612789154, + "step": 783 + }, + { + "epoch": 0.20820608152967734, + "grad_norm": 0.9982781618225591, + "learning_rate": 1.9841528147058638e-05, + "loss": 0.36120525002479553, + "step": 784 + }, + { + "epoch": 0.2084716505112203, + "grad_norm": 1.0016427535647234, + "learning_rate": 1.984074861731075e-05, + "loss": 0.3651392459869385, + "step": 785 + }, + { + "epoch": 0.20873721949276325, + "grad_norm": 1.1254815799645344, + "learning_rate": 1.983996719038259e-05, + "loss": 0.4204651117324829, + "step": 786 + }, + { + "epoch": 0.2090027884743062, + "grad_norm": 1.0600310007301286, + "learning_rate": 1.9839183866424806e-05, + "loss": 0.4452149271965027, + "step": 787 + }, + { + "epoch": 0.20926835745584915, + "grad_norm": 1.000047138771705, + "learning_rate": 1.9838398645588418e-05, + "loss": 0.3931270241737366, + "step": 788 + }, + { + "epoch": 0.2095339264373921, + "grad_norm": 1.0009892054118905, + "learning_rate": 1.98376115280248e-05, + "loss": 0.3680538535118103, + "step": 789 + }, + { + "epoch": 0.20979949541893506, + "grad_norm": 0.9848864128393906, + "learning_rate": 1.9836822513885704e-05, + "loss": 0.3766820728778839, + "step": 790 + }, + { + "epoch": 0.21006506440047804, + "grad_norm": 1.0494510099931045, + "learning_rate": 1.9836031603323245e-05, + "loss": 0.3602439761161804, + "step": 791 + }, + { + "epoch": 0.210330633382021, + "grad_norm": 0.9790632198207762, + "learning_rate": 1.98352387964899e-05, + "loss": 0.38925549387931824, + "step": 792 + }, + { + "epoch": 0.21059620236356394, + "grad_norm": 1.0121548586068807, + "learning_rate": 1.9834444093538504e-05, + "loss": 0.3569640517234802, + "step": 793 + }, + { + "epoch": 0.2108617713451069, + "grad_norm": 1.0171085592107372, + "learning_rate": 1.9833647494622275e-05, + "loss": 0.3543340265750885, + "step": 794 + }, + { + "epoch": 0.21112734032664984, + "grad_norm": 1.0426744340585967, + "learning_rate": 1.983284899989479e-05, + "loss": 0.37313222885131836, + "step": 795 + }, + { + "epoch": 0.2113929093081928, + "grad_norm": 1.0940501026222131, + "learning_rate": 1.983204860950998e-05, + "loss": 0.3874257802963257, + "step": 796 + }, + { + "epoch": 0.21165847828973575, + "grad_norm": 1.005805069630653, + "learning_rate": 1.983124632362216e-05, + "loss": 0.3815164864063263, + "step": 797 + }, + { + "epoch": 0.21192404727127873, + "grad_norm": 1.0879143214156584, + "learning_rate": 1.9830442142386e-05, + "loss": 0.39476731419563293, + "step": 798 + }, + { + "epoch": 0.21218961625282168, + "grad_norm": 1.0888281701524323, + "learning_rate": 1.9829636065956527e-05, + "loss": 0.399338036775589, + "step": 799 + }, + { + "epoch": 0.21245518523436463, + "grad_norm": 1.0679987938098825, + "learning_rate": 1.9828828094489157e-05, + "loss": 0.3940344452857971, + "step": 800 + }, + { + "epoch": 0.21272075421590758, + "grad_norm": 1.0124680733329086, + "learning_rate": 1.9828018228139647e-05, + "loss": 0.35597044229507446, + "step": 801 + }, + { + "epoch": 0.21298632319745053, + "grad_norm": 1.197291261672491, + "learning_rate": 1.9827206467064133e-05, + "loss": 0.4309435784816742, + "step": 802 + }, + { + "epoch": 0.21325189217899349, + "grad_norm": 1.0158009285134544, + "learning_rate": 1.9826392811419113e-05, + "loss": 0.37327438592910767, + "step": 803 + }, + { + "epoch": 0.21351746116053644, + "grad_norm": 0.9944187944281718, + "learning_rate": 1.9825577261361454e-05, + "loss": 0.35214242339134216, + "step": 804 + }, + { + "epoch": 0.21378303014207942, + "grad_norm": 1.1575422458756877, + "learning_rate": 1.982475981704838e-05, + "loss": 0.41114968061447144, + "step": 805 + }, + { + "epoch": 0.21404859912362237, + "grad_norm": 0.9719994027948292, + "learning_rate": 1.9823940478637486e-05, + "loss": 0.3632299304008484, + "step": 806 + }, + { + "epoch": 0.21431416810516532, + "grad_norm": 1.1699036102992622, + "learning_rate": 1.9823119246286727e-05, + "loss": 0.39640772342681885, + "step": 807 + }, + { + "epoch": 0.21457973708670827, + "grad_norm": 1.002397111320771, + "learning_rate": 1.9822296120154433e-05, + "loss": 0.39356929063796997, + "step": 808 + }, + { + "epoch": 0.21484530606825122, + "grad_norm": 1.061754718166072, + "learning_rate": 1.9821471100399294e-05, + "loss": 0.3710761070251465, + "step": 809 + }, + { + "epoch": 0.21511087504979418, + "grad_norm": 0.9713246248834058, + "learning_rate": 1.9820644187180354e-05, + "loss": 0.35515087842941284, + "step": 810 + }, + { + "epoch": 0.21537644403133713, + "grad_norm": 1.0166244205196049, + "learning_rate": 1.981981538065704e-05, + "loss": 0.3803205192089081, + "step": 811 + }, + { + "epoch": 0.2156420130128801, + "grad_norm": 1.0421456761704733, + "learning_rate": 1.9818984680989134e-05, + "loss": 0.40275394916534424, + "step": 812 + }, + { + "epoch": 0.21590758199442306, + "grad_norm": 1.0872785008811605, + "learning_rate": 1.9818152088336786e-05, + "loss": 0.3711051344871521, + "step": 813 + }, + { + "epoch": 0.216173150975966, + "grad_norm": 1.0872190904032264, + "learning_rate": 1.9817317602860512e-05, + "loss": 0.4198985695838928, + "step": 814 + }, + { + "epoch": 0.21643871995750896, + "grad_norm": 0.9931448766878032, + "learning_rate": 1.9816481224721185e-05, + "loss": 0.38333773612976074, + "step": 815 + }, + { + "epoch": 0.21670428893905191, + "grad_norm": 1.1679000778390602, + "learning_rate": 1.9815642954080055e-05, + "loss": 0.3959774971008301, + "step": 816 + }, + { + "epoch": 0.21696985792059487, + "grad_norm": 1.1013876458182361, + "learning_rate": 1.9814802791098728e-05, + "loss": 0.3475337326526642, + "step": 817 + }, + { + "epoch": 0.21723542690213782, + "grad_norm": 1.06867842878894, + "learning_rate": 1.981396073593918e-05, + "loss": 0.369370698928833, + "step": 818 + }, + { + "epoch": 0.2175009958836808, + "grad_norm": 1.085763343280496, + "learning_rate": 1.9813116788763744e-05, + "loss": 0.3515776991844177, + "step": 819 + }, + { + "epoch": 0.21776656486522375, + "grad_norm": 1.0780206278908893, + "learning_rate": 1.9812270949735124e-05, + "loss": 0.3637402355670929, + "step": 820 + }, + { + "epoch": 0.2180321338467667, + "grad_norm": 1.0342672695189807, + "learning_rate": 1.9811423219016395e-05, + "loss": 0.3930947780609131, + "step": 821 + }, + { + "epoch": 0.21829770282830965, + "grad_norm": 1.102521832922822, + "learning_rate": 1.981057359677098e-05, + "loss": 0.40081048011779785, + "step": 822 + }, + { + "epoch": 0.2185632718098526, + "grad_norm": 1.0386373096164698, + "learning_rate": 1.9809722083162682e-05, + "loss": 0.3831724226474762, + "step": 823 + }, + { + "epoch": 0.21882884079139556, + "grad_norm": 1.0516274934858763, + "learning_rate": 1.9808868678355662e-05, + "loss": 0.3919270932674408, + "step": 824 + }, + { + "epoch": 0.2190944097729385, + "grad_norm": 1.0623138704484363, + "learning_rate": 1.9808013382514448e-05, + "loss": 0.41782522201538086, + "step": 825 + }, + { + "epoch": 0.2193599787544815, + "grad_norm": 1.0570337251212087, + "learning_rate": 1.9807156195803926e-05, + "loss": 0.3751329779624939, + "step": 826 + }, + { + "epoch": 0.21962554773602444, + "grad_norm": 1.0009279652164118, + "learning_rate": 1.9806297118389353e-05, + "loss": 0.36451685428619385, + "step": 827 + }, + { + "epoch": 0.2198911167175674, + "grad_norm": 1.1911804759546862, + "learning_rate": 1.9805436150436352e-05, + "loss": 0.3924056887626648, + "step": 828 + }, + { + "epoch": 0.22015668569911034, + "grad_norm": 0.9887238598202497, + "learning_rate": 1.9804573292110906e-05, + "loss": 0.34744757413864136, + "step": 829 + }, + { + "epoch": 0.2204222546806533, + "grad_norm": 1.1506637434477502, + "learning_rate": 1.980370854357936e-05, + "loss": 0.4162982702255249, + "step": 830 + }, + { + "epoch": 0.22068782366219625, + "grad_norm": 1.103994708633239, + "learning_rate": 1.9802841905008434e-05, + "loss": 0.36572596430778503, + "step": 831 + }, + { + "epoch": 0.2209533926437392, + "grad_norm": 1.0028116020560682, + "learning_rate": 1.98019733765652e-05, + "loss": 0.3535170555114746, + "step": 832 + }, + { + "epoch": 0.22121896162528218, + "grad_norm": 1.061392974987333, + "learning_rate": 1.9801102958417107e-05, + "loss": 0.3906480073928833, + "step": 833 + }, + { + "epoch": 0.22148453060682513, + "grad_norm": 1.0646039703833918, + "learning_rate": 1.980023065073195e-05, + "loss": 0.34185755252838135, + "step": 834 + }, + { + "epoch": 0.22175009958836808, + "grad_norm": 1.1983506875652454, + "learning_rate": 1.9799356453677913e-05, + "loss": 0.4216359853744507, + "step": 835 + }, + { + "epoch": 0.22201566856991103, + "grad_norm": 1.038756499639493, + "learning_rate": 1.979848036742352e-05, + "loss": 0.365469366312027, + "step": 836 + }, + { + "epoch": 0.222281237551454, + "grad_norm": 1.0128951338762324, + "learning_rate": 1.9797602392137678e-05, + "loss": 0.3570204973220825, + "step": 837 + }, + { + "epoch": 0.22254680653299694, + "grad_norm": 1.0221196075964396, + "learning_rate": 1.9796722527989646e-05, + "loss": 0.3929975926876068, + "step": 838 + }, + { + "epoch": 0.2228123755145399, + "grad_norm": 1.1512146064832047, + "learning_rate": 1.979584077514905e-05, + "loss": 0.39064258337020874, + "step": 839 + }, + { + "epoch": 0.22307794449608287, + "grad_norm": 1.0559333522375243, + "learning_rate": 1.9794957133785884e-05, + "loss": 0.3626471757888794, + "step": 840 + }, + { + "epoch": 0.22334351347762582, + "grad_norm": 1.0867316997584564, + "learning_rate": 1.9794071604070506e-05, + "loss": 0.4337238371372223, + "step": 841 + }, + { + "epoch": 0.22360908245916877, + "grad_norm": 0.9358033183445809, + "learning_rate": 1.9793184186173632e-05, + "loss": 0.3361967206001282, + "step": 842 + }, + { + "epoch": 0.22387465144071172, + "grad_norm": 0.961043072021178, + "learning_rate": 1.9792294880266346e-05, + "loss": 0.3429332971572876, + "step": 843 + }, + { + "epoch": 0.22414022042225468, + "grad_norm": 1.012773989217256, + "learning_rate": 1.97914036865201e-05, + "loss": 0.39196616411209106, + "step": 844 + }, + { + "epoch": 0.22440578940379763, + "grad_norm": 1.1250916546708978, + "learning_rate": 1.9790510605106697e-05, + "loss": 0.3763045072555542, + "step": 845 + }, + { + "epoch": 0.22467135838534058, + "grad_norm": 1.1139610172600873, + "learning_rate": 1.978961563619832e-05, + "loss": 0.41614070534706116, + "step": 846 + }, + { + "epoch": 0.22493692736688356, + "grad_norm": 1.065347693165354, + "learning_rate": 1.9788718779967506e-05, + "loss": 0.3834165334701538, + "step": 847 + }, + { + "epoch": 0.2252024963484265, + "grad_norm": 0.9834992911039661, + "learning_rate": 1.978782003658716e-05, + "loss": 0.3552364110946655, + "step": 848 + }, + { + "epoch": 0.22546806532996946, + "grad_norm": 1.0365749744504318, + "learning_rate": 1.9786919406230544e-05, + "loss": 0.3857925534248352, + "step": 849 + }, + { + "epoch": 0.22573363431151242, + "grad_norm": 1.0779836727772776, + "learning_rate": 1.9786016889071294e-05, + "loss": 0.3501393795013428, + "step": 850 + }, + { + "epoch": 0.22599920329305537, + "grad_norm": 1.1363104904390704, + "learning_rate": 1.9785112485283404e-05, + "loss": 0.36280643939971924, + "step": 851 + }, + { + "epoch": 0.22626477227459832, + "grad_norm": 1.1791591930929934, + "learning_rate": 1.978420619504123e-05, + "loss": 0.3713894486427307, + "step": 852 + }, + { + "epoch": 0.22653034125614127, + "grad_norm": 1.0682718312185442, + "learning_rate": 1.97832980185195e-05, + "loss": 0.3668733537197113, + "step": 853 + }, + { + "epoch": 0.22679591023768425, + "grad_norm": 1.06232834606136, + "learning_rate": 1.978238795589329e-05, + "loss": 0.4054701626300812, + "step": 854 + }, + { + "epoch": 0.2270614792192272, + "grad_norm": 1.1024819375758403, + "learning_rate": 1.9781476007338058e-05, + "loss": 0.3824681043624878, + "step": 855 + }, + { + "epoch": 0.22732704820077015, + "grad_norm": 1.0604830101195206, + "learning_rate": 1.978056217302961e-05, + "loss": 0.4009544253349304, + "step": 856 + }, + { + "epoch": 0.2275926171823131, + "grad_norm": 1.0150812264671392, + "learning_rate": 1.9779646453144133e-05, + "loss": 0.34773316979408264, + "step": 857 + }, + { + "epoch": 0.22785818616385606, + "grad_norm": 1.0737509474924387, + "learning_rate": 1.977872884785815e-05, + "loss": 0.4067278206348419, + "step": 858 + }, + { + "epoch": 0.228123755145399, + "grad_norm": 1.0566398666110703, + "learning_rate": 1.9777809357348584e-05, + "loss": 0.3843458890914917, + "step": 859 + }, + { + "epoch": 0.22838932412694196, + "grad_norm": 1.083451143522079, + "learning_rate": 1.977688798179269e-05, + "loss": 0.4261704683303833, + "step": 860 + }, + { + "epoch": 0.22865489310848494, + "grad_norm": 1.0145015740681522, + "learning_rate": 1.9775964721368098e-05, + "loss": 0.39109086990356445, + "step": 861 + }, + { + "epoch": 0.2289204620900279, + "grad_norm": 1.1472642326588585, + "learning_rate": 1.9775039576252807e-05, + "loss": 0.39436954259872437, + "step": 862 + }, + { + "epoch": 0.22918603107157084, + "grad_norm": 0.9770870267905873, + "learning_rate": 1.9774112546625168e-05, + "loss": 0.3787967562675476, + "step": 863 + }, + { + "epoch": 0.2294516000531138, + "grad_norm": 1.5071435779935147, + "learning_rate": 1.9773183632663907e-05, + "loss": 0.3729320466518402, + "step": 864 + }, + { + "epoch": 0.22971716903465675, + "grad_norm": 1.0048578103437809, + "learning_rate": 1.9772252834548108e-05, + "loss": 0.3817081153392792, + "step": 865 + }, + { + "epoch": 0.2299827380161997, + "grad_norm": 0.9709592169890221, + "learning_rate": 1.9771320152457212e-05, + "loss": 0.3362218737602234, + "step": 866 + }, + { + "epoch": 0.23024830699774265, + "grad_norm": 1.0194192402395448, + "learning_rate": 1.9770385586571033e-05, + "loss": 0.37274059653282166, + "step": 867 + }, + { + "epoch": 0.23051387597928563, + "grad_norm": 1.058710969457703, + "learning_rate": 1.9769449137069746e-05, + "loss": 0.3832330107688904, + "step": 868 + }, + { + "epoch": 0.23077944496082858, + "grad_norm": 0.9857605594513371, + "learning_rate": 1.9768510804133886e-05, + "loss": 0.37420010566711426, + "step": 869 + }, + { + "epoch": 0.23104501394237154, + "grad_norm": 1.0333482020677847, + "learning_rate": 1.976757058794435e-05, + "loss": 0.35314565896987915, + "step": 870 + }, + { + "epoch": 0.2313105829239145, + "grad_norm": 1.0404097802666386, + "learning_rate": 1.97666284886824e-05, + "loss": 0.34667372703552246, + "step": 871 + }, + { + "epoch": 0.23157615190545744, + "grad_norm": 1.1826768759617956, + "learning_rate": 1.976568450652967e-05, + "loss": 0.3465980589389801, + "step": 872 + }, + { + "epoch": 0.2318417208870004, + "grad_norm": 1.6479387485919323, + "learning_rate": 1.9764738641668137e-05, + "loss": 0.40539389848709106, + "step": 873 + }, + { + "epoch": 0.23210728986854334, + "grad_norm": 1.090454596374008, + "learning_rate": 1.976379089428016e-05, + "loss": 0.35154545307159424, + "step": 874 + }, + { + "epoch": 0.23237285885008632, + "grad_norm": 1.1033163387519414, + "learning_rate": 1.9762841264548453e-05, + "loss": 0.39748087525367737, + "step": 875 + }, + { + "epoch": 0.23263842783162927, + "grad_norm": 1.0600221119400453, + "learning_rate": 1.976188975265609e-05, + "loss": 0.41628387570381165, + "step": 876 + }, + { + "epoch": 0.23290399681317223, + "grad_norm": 1.0805125037340586, + "learning_rate": 1.976093635878652e-05, + "loss": 0.4076233208179474, + "step": 877 + }, + { + "epoch": 0.23316956579471518, + "grad_norm": 0.9221839355888705, + "learning_rate": 1.9759981083123533e-05, + "loss": 0.3262259364128113, + "step": 878 + }, + { + "epoch": 0.23343513477625813, + "grad_norm": 1.1690018828805817, + "learning_rate": 1.9759023925851302e-05, + "loss": 0.36561673879623413, + "step": 879 + }, + { + "epoch": 0.23370070375780108, + "grad_norm": 1.083829918240926, + "learning_rate": 1.9758064887154358e-05, + "loss": 0.36661773920059204, + "step": 880 + }, + { + "epoch": 0.23396627273934403, + "grad_norm": 1.0655263771494812, + "learning_rate": 1.9757103967217587e-05, + "loss": 0.34671685099601746, + "step": 881 + }, + { + "epoch": 0.234231841720887, + "grad_norm": 1.0056372913167473, + "learning_rate": 1.9756141166226246e-05, + "loss": 0.3486331105232239, + "step": 882 + }, + { + "epoch": 0.23449741070242996, + "grad_norm": 1.1177836982205323, + "learning_rate": 1.9755176484365953e-05, + "loss": 0.3883505165576935, + "step": 883 + }, + { + "epoch": 0.23476297968397292, + "grad_norm": 1.0548520245203914, + "learning_rate": 1.9754209921822683e-05, + "loss": 0.3832106590270996, + "step": 884 + }, + { + "epoch": 0.23502854866551587, + "grad_norm": 1.078830112662993, + "learning_rate": 1.975324147878278e-05, + "loss": 0.37876033782958984, + "step": 885 + }, + { + "epoch": 0.23529411764705882, + "grad_norm": 1.0689289829128008, + "learning_rate": 1.975227115543295e-05, + "loss": 0.38931846618652344, + "step": 886 + }, + { + "epoch": 0.23555968662860177, + "grad_norm": 0.956721500767322, + "learning_rate": 1.9751298951960258e-05, + "loss": 0.3581021726131439, + "step": 887 + }, + { + "epoch": 0.23582525561014472, + "grad_norm": 1.0206944172292924, + "learning_rate": 1.9750324868552133e-05, + "loss": 0.35196465253829956, + "step": 888 + }, + { + "epoch": 0.2360908245916877, + "grad_norm": 0.9996206423870837, + "learning_rate": 1.974934890539637e-05, + "loss": 0.3635658025741577, + "step": 889 + }, + { + "epoch": 0.23635639357323066, + "grad_norm": 0.9523927655707425, + "learning_rate": 1.9748371062681122e-05, + "loss": 0.345594197511673, + "step": 890 + }, + { + "epoch": 0.2366219625547736, + "grad_norm": 1.0443032231121456, + "learning_rate": 1.97473913405949e-05, + "loss": 0.357181191444397, + "step": 891 + }, + { + "epoch": 0.23688753153631656, + "grad_norm": 1.0008000126392016, + "learning_rate": 1.974640973932659e-05, + "loss": 0.3264622986316681, + "step": 892 + }, + { + "epoch": 0.2371531005178595, + "grad_norm": 0.9731630083329554, + "learning_rate": 1.9745426259065434e-05, + "loss": 0.37950894236564636, + "step": 893 + }, + { + "epoch": 0.23741866949940246, + "grad_norm": 1.1493289415276364, + "learning_rate": 1.9744440900001027e-05, + "loss": 0.37400782108306885, + "step": 894 + }, + { + "epoch": 0.23768423848094541, + "grad_norm": 1.0325785235739895, + "learning_rate": 1.974345366232334e-05, + "loss": 0.3455463945865631, + "step": 895 + }, + { + "epoch": 0.2379498074624884, + "grad_norm": 1.1059511993758653, + "learning_rate": 1.9742464546222702e-05, + "loss": 0.3605351150035858, + "step": 896 + }, + { + "epoch": 0.23821537644403135, + "grad_norm": 0.9763906212855142, + "learning_rate": 1.97414735518898e-05, + "loss": 0.3839051127433777, + "step": 897 + }, + { + "epoch": 0.2384809454255743, + "grad_norm": 1.0304758127284366, + "learning_rate": 1.974048067951569e-05, + "loss": 0.34562867879867554, + "step": 898 + }, + { + "epoch": 0.23874651440711725, + "grad_norm": 1.1332867443652592, + "learning_rate": 1.9739485929291778e-05, + "loss": 0.3986506760120392, + "step": 899 + }, + { + "epoch": 0.2390120833886602, + "grad_norm": 1.1598961775072092, + "learning_rate": 1.9738489301409848e-05, + "loss": 0.3955162465572357, + "step": 900 + }, + { + "epoch": 0.23927765237020315, + "grad_norm": 1.080226447361195, + "learning_rate": 1.9737490796062036e-05, + "loss": 0.370066374540329, + "step": 901 + }, + { + "epoch": 0.2395432213517461, + "grad_norm": 1.0637004733407822, + "learning_rate": 1.973649041344084e-05, + "loss": 0.3777826726436615, + "step": 902 + }, + { + "epoch": 0.23980879033328908, + "grad_norm": 1.1358293788080334, + "learning_rate": 1.9735488153739128e-05, + "loss": 0.327572226524353, + "step": 903 + }, + { + "epoch": 0.24007435931483204, + "grad_norm": 1.071729158749965, + "learning_rate": 1.973448401715011e-05, + "loss": 0.3921743929386139, + "step": 904 + }, + { + "epoch": 0.240339928296375, + "grad_norm": 1.0635179670685195, + "learning_rate": 1.973347800386739e-05, + "loss": 0.3683379888534546, + "step": 905 + }, + { + "epoch": 0.24060549727791794, + "grad_norm": 1.023832589054702, + "learning_rate": 1.9732470114084905e-05, + "loss": 0.390872597694397, + "step": 906 + }, + { + "epoch": 0.2408710662594609, + "grad_norm": 1.0814023137489452, + "learning_rate": 1.9731460347996964e-05, + "loss": 0.3772459626197815, + "step": 907 + }, + { + "epoch": 0.24113663524100384, + "grad_norm": 1.0280982913686894, + "learning_rate": 1.973044870579824e-05, + "loss": 0.37990954518318176, + "step": 908 + }, + { + "epoch": 0.2414022042225468, + "grad_norm": 1.0035238419205756, + "learning_rate": 1.972943518768377e-05, + "loss": 0.3380817770957947, + "step": 909 + }, + { + "epoch": 0.24166777320408978, + "grad_norm": 0.9879847056007396, + "learning_rate": 1.9728419793848935e-05, + "loss": 0.3348115384578705, + "step": 910 + }, + { + "epoch": 0.24193334218563273, + "grad_norm": 1.0561235323428824, + "learning_rate": 1.9727402524489505e-05, + "loss": 0.36936551332473755, + "step": 911 + }, + { + "epoch": 0.24219891116717568, + "grad_norm": 1.0744513063457712, + "learning_rate": 1.9726383379801593e-05, + "loss": 0.3871539235115051, + "step": 912 + }, + { + "epoch": 0.24246448014871863, + "grad_norm": 1.0904556770971818, + "learning_rate": 1.9725362359981676e-05, + "loss": 0.37087059020996094, + "step": 913 + }, + { + "epoch": 0.24273004913026158, + "grad_norm": 0.9802916629421812, + "learning_rate": 1.9724339465226595e-05, + "loss": 0.35582688450813293, + "step": 914 + }, + { + "epoch": 0.24299561811180453, + "grad_norm": 1.0947021466091125, + "learning_rate": 1.9723314695733557e-05, + "loss": 0.38500669598579407, + "step": 915 + }, + { + "epoch": 0.2432611870933475, + "grad_norm": 0.9834121517145057, + "learning_rate": 1.9722288051700116e-05, + "loss": 0.32470762729644775, + "step": 916 + }, + { + "epoch": 0.24352675607489047, + "grad_norm": 1.0805011919993295, + "learning_rate": 1.9721259533324207e-05, + "loss": 0.3822774589061737, + "step": 917 + }, + { + "epoch": 0.24379232505643342, + "grad_norm": 0.9937398719966192, + "learning_rate": 1.972022914080411e-05, + "loss": 0.38374873995780945, + "step": 918 + }, + { + "epoch": 0.24405789403797637, + "grad_norm": 1.0550770033370775, + "learning_rate": 1.9719196874338472e-05, + "loss": 0.3419352173805237, + "step": 919 + }, + { + "epoch": 0.24432346301951932, + "grad_norm": 1.0164630853495407, + "learning_rate": 1.9718162734126308e-05, + "loss": 0.3294275403022766, + "step": 920 + }, + { + "epoch": 0.24458903200106227, + "grad_norm": 1.0668295499881337, + "learning_rate": 1.9717126720366982e-05, + "loss": 0.3585365414619446, + "step": 921 + }, + { + "epoch": 0.24485460098260522, + "grad_norm": 1.0609325079201495, + "learning_rate": 1.9716088833260225e-05, + "loss": 0.38130316138267517, + "step": 922 + }, + { + "epoch": 0.24512016996414818, + "grad_norm": 1.0577067392982809, + "learning_rate": 1.9715049073006133e-05, + "loss": 0.3745136260986328, + "step": 923 + }, + { + "epoch": 0.24538573894569116, + "grad_norm": 1.0457228779122651, + "learning_rate": 1.971400743980516e-05, + "loss": 0.3771660327911377, + "step": 924 + }, + { + "epoch": 0.2456513079272341, + "grad_norm": 1.0133861698501567, + "learning_rate": 1.971296393385812e-05, + "loss": 0.29661691188812256, + "step": 925 + }, + { + "epoch": 0.24591687690877706, + "grad_norm": 0.9516714902458889, + "learning_rate": 1.9711918555366184e-05, + "loss": 0.33783960342407227, + "step": 926 + }, + { + "epoch": 0.24618244589032, + "grad_norm": 1.2469460687001952, + "learning_rate": 1.971087130453089e-05, + "loss": 0.42983683943748474, + "step": 927 + }, + { + "epoch": 0.24644801487186296, + "grad_norm": 0.9725914261438413, + "learning_rate": 1.9709822181554142e-05, + "loss": 0.32242363691329956, + "step": 928 + }, + { + "epoch": 0.24671358385340592, + "grad_norm": 1.0989308968162201, + "learning_rate": 1.970877118663819e-05, + "loss": 0.3576955795288086, + "step": 929 + }, + { + "epoch": 0.24697915283494887, + "grad_norm": 1.116595385391156, + "learning_rate": 1.9707718319985663e-05, + "loss": 0.4185359477996826, + "step": 930 + }, + { + "epoch": 0.24724472181649185, + "grad_norm": 1.1178442474909813, + "learning_rate": 1.970666358179953e-05, + "loss": 0.35377705097198486, + "step": 931 + }, + { + "epoch": 0.2475102907980348, + "grad_norm": 1.1350743092525455, + "learning_rate": 1.9705606972283143e-05, + "loss": 0.3860151171684265, + "step": 932 + }, + { + "epoch": 0.24777585977957775, + "grad_norm": 1.1915035264404457, + "learning_rate": 1.9704548491640195e-05, + "loss": 0.39463168382644653, + "step": 933 + }, + { + "epoch": 0.2480414287611207, + "grad_norm": 1.0462444044755623, + "learning_rate": 1.9703488140074752e-05, + "loss": 0.3670084774494171, + "step": 934 + }, + { + "epoch": 0.24830699774266365, + "grad_norm": 1.2914788702644175, + "learning_rate": 1.9702425917791242e-05, + "loss": 0.388730525970459, + "step": 935 + }, + { + "epoch": 0.2485725667242066, + "grad_norm": 1.128517931307855, + "learning_rate": 1.970136182499444e-05, + "loss": 0.38767656683921814, + "step": 936 + }, + { + "epoch": 0.24883813570574956, + "grad_norm": 1.0771582387425684, + "learning_rate": 1.9700295861889497e-05, + "loss": 0.35394930839538574, + "step": 937 + }, + { + "epoch": 0.24910370468729254, + "grad_norm": 1.0639329095738126, + "learning_rate": 1.9699228028681917e-05, + "loss": 0.3360324501991272, + "step": 938 + }, + { + "epoch": 0.2493692736688355, + "grad_norm": 1.116621384383513, + "learning_rate": 1.9698158325577563e-05, + "loss": 0.390169233083725, + "step": 939 + }, + { + "epoch": 0.24963484265037844, + "grad_norm": 1.108635788765439, + "learning_rate": 1.9697086752782666e-05, + "loss": 0.3921571671962738, + "step": 940 + }, + { + "epoch": 0.2499004116319214, + "grad_norm": 1.0665933445619122, + "learning_rate": 1.9696013310503808e-05, + "loss": 0.3795739710330963, + "step": 941 + }, + { + "epoch": 0.25016598061346434, + "grad_norm": 1.2202319167117164, + "learning_rate": 1.9694937998947935e-05, + "loss": 0.3891025185585022, + "step": 942 + }, + { + "epoch": 0.2504315495950073, + "grad_norm": 0.9751921056908068, + "learning_rate": 1.9693860818322357e-05, + "loss": 0.3548225164413452, + "step": 943 + }, + { + "epoch": 0.25069711857655025, + "grad_norm": 1.0555900207888067, + "learning_rate": 1.9692781768834747e-05, + "loss": 0.3696819543838501, + "step": 944 + }, + { + "epoch": 0.2509626875580932, + "grad_norm": 1.1322184210541604, + "learning_rate": 1.9691700850693126e-05, + "loss": 0.3906037211418152, + "step": 945 + }, + { + "epoch": 0.25122825653963615, + "grad_norm": 1.072434154806742, + "learning_rate": 1.9690618064105883e-05, + "loss": 0.38181206583976746, + "step": 946 + }, + { + "epoch": 0.2514938255211791, + "grad_norm": 1.0644124497842522, + "learning_rate": 1.9689533409281765e-05, + "loss": 0.36904582381248474, + "step": 947 + }, + { + "epoch": 0.25175939450272206, + "grad_norm": 1.097105891991116, + "learning_rate": 1.9688446886429885e-05, + "loss": 0.3635823130607605, + "step": 948 + }, + { + "epoch": 0.25202496348426506, + "grad_norm": 0.9954310874837226, + "learning_rate": 1.9687358495759713e-05, + "loss": 0.3527260422706604, + "step": 949 + }, + { + "epoch": 0.252290532465808, + "grad_norm": 1.1902017812011518, + "learning_rate": 1.968626823748107e-05, + "loss": 0.3781110346317291, + "step": 950 + }, + { + "epoch": 0.25255610144735097, + "grad_norm": 1.0346217070487125, + "learning_rate": 1.968517611180415e-05, + "loss": 0.3931560814380646, + "step": 951 + }, + { + "epoch": 0.2528216704288939, + "grad_norm": 1.0783245371828571, + "learning_rate": 1.9684082118939503e-05, + "loss": 0.39111074805259705, + "step": 952 + }, + { + "epoch": 0.25308723941043687, + "grad_norm": 1.2090013193363973, + "learning_rate": 1.9682986259098037e-05, + "loss": 0.385967880487442, + "step": 953 + }, + { + "epoch": 0.2533528083919798, + "grad_norm": 1.0103878099057118, + "learning_rate": 1.9681888532491022e-05, + "loss": 0.34006553888320923, + "step": 954 + }, + { + "epoch": 0.2536183773735228, + "grad_norm": 1.0077784550534965, + "learning_rate": 1.9680788939330086e-05, + "loss": 0.36069998145103455, + "step": 955 + }, + { + "epoch": 0.2538839463550657, + "grad_norm": 1.090649670414093, + "learning_rate": 1.9679687479827212e-05, + "loss": 0.3354898691177368, + "step": 956 + }, + { + "epoch": 0.2541495153366087, + "grad_norm": 1.0691933766101984, + "learning_rate": 1.9678584154194756e-05, + "loss": 0.35667335987091064, + "step": 957 + }, + { + "epoch": 0.25441508431815163, + "grad_norm": 1.2652121820599898, + "learning_rate": 1.9677478962645422e-05, + "loss": 0.4003029465675354, + "step": 958 + }, + { + "epoch": 0.2546806532996946, + "grad_norm": 1.0313200756086844, + "learning_rate": 1.9676371905392278e-05, + "loss": 0.34397056698799133, + "step": 959 + }, + { + "epoch": 0.25494622228123753, + "grad_norm": 1.0544706314753822, + "learning_rate": 1.9675262982648757e-05, + "loss": 0.35319578647613525, + "step": 960 + }, + { + "epoch": 0.2552117912627805, + "grad_norm": 1.0179000224070893, + "learning_rate": 1.967415219462864e-05, + "loss": 0.34840327501296997, + "step": 961 + }, + { + "epoch": 0.25547736024432344, + "grad_norm": 0.9360325612494472, + "learning_rate": 1.9673039541546076e-05, + "loss": 0.3298989534378052, + "step": 962 + }, + { + "epoch": 0.25574292922586644, + "grad_norm": 1.0904225305922717, + "learning_rate": 1.9671925023615572e-05, + "loss": 0.38438719511032104, + "step": 963 + }, + { + "epoch": 0.2560084982074094, + "grad_norm": 1.128608711014793, + "learning_rate": 1.9670808641051994e-05, + "loss": 0.3834493160247803, + "step": 964 + }, + { + "epoch": 0.25627406718895235, + "grad_norm": 1.0456501331264114, + "learning_rate": 1.9669690394070564e-05, + "loss": 0.3713288903236389, + "step": 965 + }, + { + "epoch": 0.2565396361704953, + "grad_norm": 1.0864184401996346, + "learning_rate": 1.966857028288687e-05, + "loss": 0.37564241886138916, + "step": 966 + }, + { + "epoch": 0.25680520515203825, + "grad_norm": 1.0329676619050974, + "learning_rate": 1.9667448307716857e-05, + "loss": 0.30162689089775085, + "step": 967 + }, + { + "epoch": 0.2570707741335812, + "grad_norm": 1.0948768995323135, + "learning_rate": 1.9666324468776826e-05, + "loss": 0.35969680547714233, + "step": 968 + }, + { + "epoch": 0.25733634311512416, + "grad_norm": 1.206651724690857, + "learning_rate": 1.9665198766283444e-05, + "loss": 0.40947285294532776, + "step": 969 + }, + { + "epoch": 0.2576019120966671, + "grad_norm": 1.0651964473806064, + "learning_rate": 1.9664071200453726e-05, + "loss": 0.35868343710899353, + "step": 970 + }, + { + "epoch": 0.25786748107821006, + "grad_norm": 1.1330033214419297, + "learning_rate": 1.966294177150506e-05, + "loss": 0.3569234311580658, + "step": 971 + }, + { + "epoch": 0.258133050059753, + "grad_norm": 1.1641224987322216, + "learning_rate": 1.9661810479655184e-05, + "loss": 0.3381764888763428, + "step": 972 + }, + { + "epoch": 0.25839861904129596, + "grad_norm": 1.535927577191984, + "learning_rate": 1.9660677325122196e-05, + "loss": 0.39847785234451294, + "step": 973 + }, + { + "epoch": 0.2586641880228389, + "grad_norm": 0.9608622914302752, + "learning_rate": 1.965954230812456e-05, + "loss": 0.33162468671798706, + "step": 974 + }, + { + "epoch": 0.25892975700438187, + "grad_norm": 1.0421688584245348, + "learning_rate": 1.9658405428881087e-05, + "loss": 0.3627605438232422, + "step": 975 + }, + { + "epoch": 0.2591953259859248, + "grad_norm": 1.0501672081861986, + "learning_rate": 1.9657266687610965e-05, + "loss": 0.3253796100616455, + "step": 976 + }, + { + "epoch": 0.2594608949674678, + "grad_norm": 1.0198628618780734, + "learning_rate": 1.9656126084533716e-05, + "loss": 0.3341265916824341, + "step": 977 + }, + { + "epoch": 0.2597264639490108, + "grad_norm": 1.0202967346949672, + "learning_rate": 1.9654983619869242e-05, + "loss": 0.3714970052242279, + "step": 978 + }, + { + "epoch": 0.25999203293055373, + "grad_norm": 1.0333982958482495, + "learning_rate": 1.9653839293837798e-05, + "loss": 0.3360912501811981, + "step": 979 + }, + { + "epoch": 0.2602576019120967, + "grad_norm": 1.0322459892827835, + "learning_rate": 1.9652693106659995e-05, + "loss": 0.3780854642391205, + "step": 980 + }, + { + "epoch": 0.26052317089363963, + "grad_norm": 1.1062219940451128, + "learning_rate": 1.9651545058556803e-05, + "loss": 0.33595478534698486, + "step": 981 + }, + { + "epoch": 0.2607887398751826, + "grad_norm": 1.111464982167328, + "learning_rate": 1.965039514974955e-05, + "loss": 0.3608357012271881, + "step": 982 + }, + { + "epoch": 0.26105430885672554, + "grad_norm": 1.0024532391943957, + "learning_rate": 1.964924338045993e-05, + "loss": 0.3807666599750519, + "step": 983 + }, + { + "epoch": 0.2613198778382685, + "grad_norm": 1.0213030373156555, + "learning_rate": 1.964808975090999e-05, + "loss": 0.3551647663116455, + "step": 984 + }, + { + "epoch": 0.26158544681981144, + "grad_norm": 1.0761922389740786, + "learning_rate": 1.9646934261322135e-05, + "loss": 0.3771904706954956, + "step": 985 + }, + { + "epoch": 0.2618510158013544, + "grad_norm": 1.1925998045571422, + "learning_rate": 1.964577691191913e-05, + "loss": 0.41103222966194153, + "step": 986 + }, + { + "epoch": 0.26211658478289734, + "grad_norm": 1.0270282722515527, + "learning_rate": 1.9644617702924093e-05, + "loss": 0.34439292550086975, + "step": 987 + }, + { + "epoch": 0.2623821537644403, + "grad_norm": 1.1578988390038234, + "learning_rate": 1.9643456634560515e-05, + "loss": 0.41214391589164734, + "step": 988 + }, + { + "epoch": 0.26264772274598325, + "grad_norm": 0.9879567855265076, + "learning_rate": 1.9642293707052232e-05, + "loss": 0.3186502754688263, + "step": 989 + }, + { + "epoch": 0.2629132917275262, + "grad_norm": 1.039224300824638, + "learning_rate": 1.9641128920623438e-05, + "loss": 0.3534559905529022, + "step": 990 + }, + { + "epoch": 0.2631788607090692, + "grad_norm": 1.0867820667103292, + "learning_rate": 1.96399622754987e-05, + "loss": 0.35217320919036865, + "step": 991 + }, + { + "epoch": 0.26344442969061216, + "grad_norm": 0.954421559413849, + "learning_rate": 1.9638793771902924e-05, + "loss": 0.31661587953567505, + "step": 992 + }, + { + "epoch": 0.2637099986721551, + "grad_norm": 0.9881195075112362, + "learning_rate": 1.9637623410061392e-05, + "loss": 0.32468482851982117, + "step": 993 + }, + { + "epoch": 0.26397556765369806, + "grad_norm": 1.0355017939200293, + "learning_rate": 1.9636451190199727e-05, + "loss": 0.346771776676178, + "step": 994 + }, + { + "epoch": 0.264241136635241, + "grad_norm": 1.0997948902450267, + "learning_rate": 1.9635277112543928e-05, + "loss": 0.36409270763397217, + "step": 995 + }, + { + "epoch": 0.26450670561678397, + "grad_norm": 1.2132528670947562, + "learning_rate": 1.963410117732034e-05, + "loss": 0.404967725276947, + "step": 996 + }, + { + "epoch": 0.2647722745983269, + "grad_norm": 1.1962964423617835, + "learning_rate": 1.9632923384755666e-05, + "loss": 0.39506661891937256, + "step": 997 + }, + { + "epoch": 0.26503784357986987, + "grad_norm": 1.1967751692769375, + "learning_rate": 1.9631743735076972e-05, + "loss": 0.3833203911781311, + "step": 998 + }, + { + "epoch": 0.2653034125614128, + "grad_norm": 1.083140773107417, + "learning_rate": 1.9630562228511682e-05, + "loss": 0.34522518515586853, + "step": 999 + }, + { + "epoch": 0.2655689815429558, + "grad_norm": 1.1367328076589556, + "learning_rate": 1.962937886528758e-05, + "loss": 0.3818400800228119, + "step": 1000 + }, + { + "epoch": 0.2658345505244987, + "grad_norm": 1.2496699132911573, + "learning_rate": 1.9628193645632796e-05, + "loss": 0.40827828645706177, + "step": 1001 + }, + { + "epoch": 0.2661001195060417, + "grad_norm": 1.0406728708542907, + "learning_rate": 1.962700656977583e-05, + "loss": 0.3448852002620697, + "step": 1002 + }, + { + "epoch": 0.26636568848758463, + "grad_norm": 1.1035895986897222, + "learning_rate": 1.9625817637945542e-05, + "loss": 0.36560773849487305, + "step": 1003 + }, + { + "epoch": 0.2666312574691276, + "grad_norm": 1.1637977684704512, + "learning_rate": 1.962462685037114e-05, + "loss": 0.38305893540382385, + "step": 1004 + }, + { + "epoch": 0.2668968264506706, + "grad_norm": 1.0320363555261158, + "learning_rate": 1.962343420728219e-05, + "loss": 0.3562568426132202, + "step": 1005 + }, + { + "epoch": 0.26716239543221354, + "grad_norm": 1.18312934129538, + "learning_rate": 1.9622239708908626e-05, + "loss": 0.37458860874176025, + "step": 1006 + }, + { + "epoch": 0.2674279644137565, + "grad_norm": 1.058042672523148, + "learning_rate": 1.9621043355480726e-05, + "loss": 0.35852503776550293, + "step": 1007 + }, + { + "epoch": 0.26769353339529944, + "grad_norm": 1.0975239398171568, + "learning_rate": 1.961984514722914e-05, + "loss": 0.4056578278541565, + "step": 1008 + }, + { + "epoch": 0.2679591023768424, + "grad_norm": 1.1773057151207822, + "learning_rate": 1.9618645084384863e-05, + "loss": 0.4531296491622925, + "step": 1009 + }, + { + "epoch": 0.26822467135838535, + "grad_norm": 0.9095840908563808, + "learning_rate": 1.9617443167179256e-05, + "loss": 0.3356376886367798, + "step": 1010 + }, + { + "epoch": 0.2684902403399283, + "grad_norm": 1.09880831555839, + "learning_rate": 1.9616239395844033e-05, + "loss": 0.38045161962509155, + "step": 1011 + }, + { + "epoch": 0.26875580932147125, + "grad_norm": 1.028451509847456, + "learning_rate": 1.9615033770611268e-05, + "loss": 0.3549511730670929, + "step": 1012 + }, + { + "epoch": 0.2690213783030142, + "grad_norm": 1.0546213631772847, + "learning_rate": 1.9613826291713393e-05, + "loss": 0.33363252878189087, + "step": 1013 + }, + { + "epoch": 0.26928694728455715, + "grad_norm": 0.9539256345754278, + "learning_rate": 1.961261695938319e-05, + "loss": 0.3443339467048645, + "step": 1014 + }, + { + "epoch": 0.2695525162661001, + "grad_norm": 0.9897755385014708, + "learning_rate": 1.9611405773853807e-05, + "loss": 0.3258364796638489, + "step": 1015 + }, + { + "epoch": 0.26981808524764306, + "grad_norm": 1.0357196980681809, + "learning_rate": 1.961019273535875e-05, + "loss": 0.357122540473938, + "step": 1016 + }, + { + "epoch": 0.270083654229186, + "grad_norm": 0.9668495504097999, + "learning_rate": 1.9608977844131875e-05, + "loss": 0.32092082500457764, + "step": 1017 + }, + { + "epoch": 0.27034922321072896, + "grad_norm": 1.0067299219043435, + "learning_rate": 1.96077611004074e-05, + "loss": 0.36354511976242065, + "step": 1018 + }, + { + "epoch": 0.27061479219227197, + "grad_norm": 1.0982243281899924, + "learning_rate": 1.9606542504419895e-05, + "loss": 0.37128758430480957, + "step": 1019 + }, + { + "epoch": 0.2708803611738149, + "grad_norm": 1.1112959838703056, + "learning_rate": 1.9605322056404294e-05, + "loss": 0.3732859790325165, + "step": 1020 + }, + { + "epoch": 0.2711459301553579, + "grad_norm": 1.0058814849372155, + "learning_rate": 1.9604099756595885e-05, + "loss": 0.32642674446105957, + "step": 1021 + }, + { + "epoch": 0.2714114991369008, + "grad_norm": 1.10371255398192, + "learning_rate": 1.9602875605230313e-05, + "loss": 0.376791775226593, + "step": 1022 + }, + { + "epoch": 0.2716770681184438, + "grad_norm": 1.0603007725295257, + "learning_rate": 1.960164960254358e-05, + "loss": 0.34514784812927246, + "step": 1023 + }, + { + "epoch": 0.27194263709998673, + "grad_norm": 1.225533197470795, + "learning_rate": 1.9600421748772044e-05, + "loss": 0.3752189576625824, + "step": 1024 + }, + { + "epoch": 0.2722082060815297, + "grad_norm": 1.0783483670765837, + "learning_rate": 1.959919204415242e-05, + "loss": 0.33100831508636475, + "step": 1025 + }, + { + "epoch": 0.27247377506307263, + "grad_norm": 1.1910668751599112, + "learning_rate": 1.9597960488921785e-05, + "loss": 0.42713654041290283, + "step": 1026 + }, + { + "epoch": 0.2727393440446156, + "grad_norm": 1.110777223027095, + "learning_rate": 1.9596727083317565e-05, + "loss": 0.3746519684791565, + "step": 1027 + }, + { + "epoch": 0.27300491302615854, + "grad_norm": 1.1133725792972708, + "learning_rate": 1.9595491827577543e-05, + "loss": 0.39962098002433777, + "step": 1028 + }, + { + "epoch": 0.2732704820077015, + "grad_norm": 1.0544310192284179, + "learning_rate": 1.9594254721939866e-05, + "loss": 0.35112401843070984, + "step": 1029 + }, + { + "epoch": 0.27353605098924444, + "grad_norm": 1.0749153592990304, + "learning_rate": 1.9593015766643037e-05, + "loss": 0.3648139238357544, + "step": 1030 + }, + { + "epoch": 0.2738016199707874, + "grad_norm": 1.0268996180520502, + "learning_rate": 1.9591774961925902e-05, + "loss": 0.31544098258018494, + "step": 1031 + }, + { + "epoch": 0.27406718895233034, + "grad_norm": 1.1260952074052377, + "learning_rate": 1.959053230802768e-05, + "loss": 0.3593738079071045, + "step": 1032 + }, + { + "epoch": 0.27433275793387335, + "grad_norm": 1.1009303195981317, + "learning_rate": 1.958928780518794e-05, + "loss": 0.39784368872642517, + "step": 1033 + }, + { + "epoch": 0.2745983269154163, + "grad_norm": 1.1304731324804922, + "learning_rate": 1.9588041453646606e-05, + "loss": 0.3869936168193817, + "step": 1034 + }, + { + "epoch": 0.27486389589695925, + "grad_norm": 0.9803124730292929, + "learning_rate": 1.958679325364396e-05, + "loss": 0.31108593940734863, + "step": 1035 + }, + { + "epoch": 0.2751294648785022, + "grad_norm": 1.098791994520666, + "learning_rate": 1.958554320542064e-05, + "loss": 0.3917708098888397, + "step": 1036 + }, + { + "epoch": 0.27539503386004516, + "grad_norm": 0.9969159455112034, + "learning_rate": 1.958429130921764e-05, + "loss": 0.36782944202423096, + "step": 1037 + }, + { + "epoch": 0.2756606028415881, + "grad_norm": 0.9381100088398062, + "learning_rate": 1.9583037565276314e-05, + "loss": 0.36196422576904297, + "step": 1038 + }, + { + "epoch": 0.27592617182313106, + "grad_norm": 1.0783473143219733, + "learning_rate": 1.9581781973838368e-05, + "loss": 0.32208555936813354, + "step": 1039 + }, + { + "epoch": 0.276191740804674, + "grad_norm": 0.9653316626874986, + "learning_rate": 1.958052453514586e-05, + "loss": 0.33451759815216064, + "step": 1040 + }, + { + "epoch": 0.27645730978621696, + "grad_norm": 1.0328342572912144, + "learning_rate": 1.9579265249441216e-05, + "loss": 0.3228047788143158, + "step": 1041 + }, + { + "epoch": 0.2767228787677599, + "grad_norm": 1.0944658380016739, + "learning_rate": 1.957800411696721e-05, + "loss": 0.36992791295051575, + "step": 1042 + }, + { + "epoch": 0.27698844774930287, + "grad_norm": 0.9799580951396849, + "learning_rate": 1.9576741137966967e-05, + "loss": 0.3072342276573181, + "step": 1043 + }, + { + "epoch": 0.2772540167308458, + "grad_norm": 1.0637046756594408, + "learning_rate": 1.9575476312683985e-05, + "loss": 0.3372080326080322, + "step": 1044 + }, + { + "epoch": 0.27751958571238877, + "grad_norm": 1.0509701364189301, + "learning_rate": 1.95742096413621e-05, + "loss": 0.34725332260131836, + "step": 1045 + }, + { + "epoch": 0.2777851546939317, + "grad_norm": 1.1053591471100805, + "learning_rate": 1.9572941124245516e-05, + "loss": 0.36714982986450195, + "step": 1046 + }, + { + "epoch": 0.27805072367547473, + "grad_norm": 1.208127444221669, + "learning_rate": 1.957167076157878e-05, + "loss": 0.4163498282432556, + "step": 1047 + }, + { + "epoch": 0.2783162926570177, + "grad_norm": 1.1861975128714084, + "learning_rate": 1.9570398553606815e-05, + "loss": 0.40059348940849304, + "step": 1048 + }, + { + "epoch": 0.27858186163856063, + "grad_norm": 1.085993120538819, + "learning_rate": 1.956912450057488e-05, + "loss": 0.3622320294380188, + "step": 1049 + }, + { + "epoch": 0.2788474306201036, + "grad_norm": 1.1326017870689584, + "learning_rate": 1.9567848602728595e-05, + "loss": 0.35159534215927124, + "step": 1050 + }, + { + "epoch": 0.27911299960164654, + "grad_norm": 0.9516936878211085, + "learning_rate": 1.9566570860313944e-05, + "loss": 0.3093762993812561, + "step": 1051 + }, + { + "epoch": 0.2793785685831895, + "grad_norm": 1.040326152894859, + "learning_rate": 1.9565291273577255e-05, + "loss": 0.341474324464798, + "step": 1052 + }, + { + "epoch": 0.27964413756473244, + "grad_norm": 1.0885626452470811, + "learning_rate": 1.9564009842765225e-05, + "loss": 0.35376566648483276, + "step": 1053 + }, + { + "epoch": 0.2799097065462754, + "grad_norm": 1.09154548256864, + "learning_rate": 1.9562726568124892e-05, + "loss": 0.3487662374973297, + "step": 1054 + }, + { + "epoch": 0.28017527552781835, + "grad_norm": 1.014222924008021, + "learning_rate": 1.956144144990366e-05, + "loss": 0.3610745370388031, + "step": 1055 + }, + { + "epoch": 0.2804408445093613, + "grad_norm": 0.9789890869027496, + "learning_rate": 1.9560154488349284e-05, + "loss": 0.33230137825012207, + "step": 1056 + }, + { + "epoch": 0.28070641349090425, + "grad_norm": 1.0104241821081763, + "learning_rate": 1.9558865683709875e-05, + "loss": 0.310351699590683, + "step": 1057 + }, + { + "epoch": 0.2809719824724472, + "grad_norm": 1.1188708821966176, + "learning_rate": 1.9557575036233897e-05, + "loss": 0.39930224418640137, + "step": 1058 + }, + { + "epoch": 0.28123755145399015, + "grad_norm": 1.0498907782820184, + "learning_rate": 1.955628254617017e-05, + "loss": 0.3345295488834381, + "step": 1059 + }, + { + "epoch": 0.2815031204355331, + "grad_norm": 1.1059864789744056, + "learning_rate": 1.9554988213767875e-05, + "loss": 0.37963107228279114, + "step": 1060 + }, + { + "epoch": 0.2817686894170761, + "grad_norm": 1.0825219178132603, + "learning_rate": 1.9553692039276545e-05, + "loss": 0.3923654854297638, + "step": 1061 + }, + { + "epoch": 0.28203425839861906, + "grad_norm": 1.0736283126776336, + "learning_rate": 1.9552394022946068e-05, + "loss": 0.363646924495697, + "step": 1062 + }, + { + "epoch": 0.282299827380162, + "grad_norm": 1.1051684289136041, + "learning_rate": 1.9551094165026677e-05, + "loss": 0.35486382246017456, + "step": 1063 + }, + { + "epoch": 0.28256539636170497, + "grad_norm": 1.0845117937449689, + "learning_rate": 1.954979246576898e-05, + "loss": 0.35215455293655396, + "step": 1064 + }, + { + "epoch": 0.2828309653432479, + "grad_norm": 1.1587243435425785, + "learning_rate": 1.9548488925423924e-05, + "loss": 0.3936809003353119, + "step": 1065 + }, + { + "epoch": 0.28309653432479087, + "grad_norm": 1.0399965264634783, + "learning_rate": 1.9547183544242817e-05, + "loss": 0.36852866411209106, + "step": 1066 + }, + { + "epoch": 0.2833621033063338, + "grad_norm": 1.0679817467710029, + "learning_rate": 1.954587632247732e-05, + "loss": 0.3552001714706421, + "step": 1067 + }, + { + "epoch": 0.2836276722878768, + "grad_norm": 1.1330169189394568, + "learning_rate": 1.9544567260379455e-05, + "loss": 0.3684498965740204, + "step": 1068 + }, + { + "epoch": 0.2838932412694197, + "grad_norm": 0.9857931835351914, + "learning_rate": 1.9543256358201586e-05, + "loss": 0.3367026448249817, + "step": 1069 + }, + { + "epoch": 0.2841588102509627, + "grad_norm": 1.0677692738667734, + "learning_rate": 1.9541943616196443e-05, + "loss": 0.3702335059642792, + "step": 1070 + }, + { + "epoch": 0.28442437923250563, + "grad_norm": 1.1114119189633371, + "learning_rate": 1.9540629034617108e-05, + "loss": 0.3430984318256378, + "step": 1071 + }, + { + "epoch": 0.2846899482140486, + "grad_norm": 1.1406170357402363, + "learning_rate": 1.953931261371702e-05, + "loss": 0.36514735221862793, + "step": 1072 + }, + { + "epoch": 0.28495551719559153, + "grad_norm": 1.0428104806049732, + "learning_rate": 1.9537994353749963e-05, + "loss": 0.3524945080280304, + "step": 1073 + }, + { + "epoch": 0.2852210861771345, + "grad_norm": 1.0283973360981475, + "learning_rate": 1.9536674254970088e-05, + "loss": 0.32405683398246765, + "step": 1074 + }, + { + "epoch": 0.2854866551586775, + "grad_norm": 1.0649875575316718, + "learning_rate": 1.9535352317631888e-05, + "loss": 0.30863165855407715, + "step": 1075 + }, + { + "epoch": 0.28575222414022045, + "grad_norm": 1.0647565002745494, + "learning_rate": 1.953402854199022e-05, + "loss": 0.34343889355659485, + "step": 1076 + }, + { + "epoch": 0.2860177931217634, + "grad_norm": 1.2339349330872973, + "learning_rate": 1.9532702928300292e-05, + "loss": 0.3639434576034546, + "step": 1077 + }, + { + "epoch": 0.28628336210330635, + "grad_norm": 1.0888261251069975, + "learning_rate": 1.9531375476817667e-05, + "loss": 0.3380300998687744, + "step": 1078 + }, + { + "epoch": 0.2865489310848493, + "grad_norm": 1.1078839119175599, + "learning_rate": 1.9530046187798267e-05, + "loss": 0.3323265016078949, + "step": 1079 + }, + { + "epoch": 0.28681450006639225, + "grad_norm": 1.0529271541493659, + "learning_rate": 1.9528715061498355e-05, + "loss": 0.3439220190048218, + "step": 1080 + }, + { + "epoch": 0.2870800690479352, + "grad_norm": 1.088357435010649, + "learning_rate": 1.952738209817456e-05, + "loss": 0.36376965045928955, + "step": 1081 + }, + { + "epoch": 0.28734563802947816, + "grad_norm": 1.0188116446188513, + "learning_rate": 1.952604729808386e-05, + "loss": 0.3281211853027344, + "step": 1082 + }, + { + "epoch": 0.2876112070110211, + "grad_norm": 1.0999135645201878, + "learning_rate": 1.9524710661483594e-05, + "loss": 0.3538089990615845, + "step": 1083 + }, + { + "epoch": 0.28787677599256406, + "grad_norm": 1.1475903462769852, + "learning_rate": 1.9523372188631442e-05, + "loss": 0.3982803225517273, + "step": 1084 + }, + { + "epoch": 0.288142344974107, + "grad_norm": 1.11408923860859, + "learning_rate": 1.9522031879785453e-05, + "loss": 0.3958810567855835, + "step": 1085 + }, + { + "epoch": 0.28840791395564996, + "grad_norm": 1.191451776763126, + "learning_rate": 1.9520689735204016e-05, + "loss": 0.40133988857269287, + "step": 1086 + }, + { + "epoch": 0.2886734829371929, + "grad_norm": 1.048862195613205, + "learning_rate": 1.9519345755145886e-05, + "loss": 0.32411646842956543, + "step": 1087 + }, + { + "epoch": 0.28893905191873587, + "grad_norm": 1.210003646730205, + "learning_rate": 1.9517999939870166e-05, + "loss": 0.38678207993507385, + "step": 1088 + }, + { + "epoch": 0.2892046209002789, + "grad_norm": 1.0663258874668164, + "learning_rate": 1.951665228963631e-05, + "loss": 0.36829686164855957, + "step": 1089 + }, + { + "epoch": 0.2894701898818218, + "grad_norm": 0.9884592653808488, + "learning_rate": 1.9515302804704134e-05, + "loss": 0.38631704449653625, + "step": 1090 + }, + { + "epoch": 0.2897357588633648, + "grad_norm": 1.1934503112083867, + "learning_rate": 1.9513951485333798e-05, + "loss": 0.39288902282714844, + "step": 1091 + }, + { + "epoch": 0.29000132784490773, + "grad_norm": 1.0804742457342014, + "learning_rate": 1.9512598331785822e-05, + "loss": 0.3655658960342407, + "step": 1092 + }, + { + "epoch": 0.2902668968264507, + "grad_norm": 0.9929300268939649, + "learning_rate": 1.9511243344321076e-05, + "loss": 0.3263852596282959, + "step": 1093 + }, + { + "epoch": 0.29053246580799363, + "grad_norm": 1.1166275426043832, + "learning_rate": 1.9509886523200792e-05, + "loss": 0.37939125299453735, + "step": 1094 + }, + { + "epoch": 0.2907980347895366, + "grad_norm": 1.074761796186792, + "learning_rate": 1.9508527868686543e-05, + "loss": 0.34218865633010864, + "step": 1095 + }, + { + "epoch": 0.29106360377107954, + "grad_norm": 1.036633851483027, + "learning_rate": 1.9507167381040263e-05, + "loss": 0.368261456489563, + "step": 1096 + }, + { + "epoch": 0.2913291727526225, + "grad_norm": 1.083724731335207, + "learning_rate": 1.950580506052424e-05, + "loss": 0.36133286356925964, + "step": 1097 + }, + { + "epoch": 0.29159474173416544, + "grad_norm": 1.0542758401630365, + "learning_rate": 1.9504440907401113e-05, + "loss": 0.3667418658733368, + "step": 1098 + }, + { + "epoch": 0.2918603107157084, + "grad_norm": 0.9961595646698646, + "learning_rate": 1.950307492193387e-05, + "loss": 0.34444570541381836, + "step": 1099 + }, + { + "epoch": 0.29212587969725134, + "grad_norm": 1.1203470867439278, + "learning_rate": 1.9501707104385863e-05, + "loss": 0.41261589527130127, + "step": 1100 + }, + { + "epoch": 0.2923914486787943, + "grad_norm": 1.0847270622391922, + "learning_rate": 1.9500337455020788e-05, + "loss": 0.3762981593608856, + "step": 1101 + }, + { + "epoch": 0.29265701766033725, + "grad_norm": 1.108635996430537, + "learning_rate": 1.9498965974102697e-05, + "loss": 0.3527417480945587, + "step": 1102 + }, + { + "epoch": 0.29292258664188026, + "grad_norm": 1.1555485155020386, + "learning_rate": 1.9497592661895996e-05, + "loss": 0.34812286496162415, + "step": 1103 + }, + { + "epoch": 0.2931881556234232, + "grad_norm": 0.9844968948580171, + "learning_rate": 1.9496217518665444e-05, + "loss": 0.33663398027420044, + "step": 1104 + }, + { + "epoch": 0.29345372460496616, + "grad_norm": 0.997090208380272, + "learning_rate": 1.9494840544676156e-05, + "loss": 0.3632991313934326, + "step": 1105 + }, + { + "epoch": 0.2937192935865091, + "grad_norm": 1.3515018592791732, + "learning_rate": 1.9493461740193587e-05, + "loss": 0.37389490008354187, + "step": 1106 + }, + { + "epoch": 0.29398486256805206, + "grad_norm": 1.204356467911551, + "learning_rate": 1.949208110548356e-05, + "loss": 0.3634020686149597, + "step": 1107 + }, + { + "epoch": 0.294250431549595, + "grad_norm": 1.0778805299295515, + "learning_rate": 1.9490698640812247e-05, + "loss": 0.36032742261886597, + "step": 1108 + }, + { + "epoch": 0.29451600053113797, + "grad_norm": 1.1504972318858309, + "learning_rate": 1.9489314346446164e-05, + "loss": 0.3385765552520752, + "step": 1109 + }, + { + "epoch": 0.2947815695126809, + "grad_norm": 1.0946200184976398, + "learning_rate": 1.9487928222652195e-05, + "loss": 0.3751915991306305, + "step": 1110 + }, + { + "epoch": 0.29504713849422387, + "grad_norm": 1.0903856446796527, + "learning_rate": 1.9486540269697564e-05, + "loss": 0.36069825291633606, + "step": 1111 + }, + { + "epoch": 0.2953127074757668, + "grad_norm": 1.009573568422265, + "learning_rate": 1.948515048784985e-05, + "loss": 0.32703787088394165, + "step": 1112 + }, + { + "epoch": 0.2955782764573098, + "grad_norm": 0.9196963642088989, + "learning_rate": 1.948375887737699e-05, + "loss": 0.312494158744812, + "step": 1113 + }, + { + "epoch": 0.2958438454388527, + "grad_norm": 0.9880564768480579, + "learning_rate": 1.9482365438547272e-05, + "loss": 0.30626165866851807, + "step": 1114 + }, + { + "epoch": 0.2961094144203957, + "grad_norm": 1.07827456569524, + "learning_rate": 1.948097017162933e-05, + "loss": 0.3625817894935608, + "step": 1115 + }, + { + "epoch": 0.29637498340193863, + "grad_norm": 1.1789711489550672, + "learning_rate": 1.9479573076892152e-05, + "loss": 0.38403773307800293, + "step": 1116 + }, + { + "epoch": 0.2966405523834816, + "grad_norm": 1.0638061154391991, + "learning_rate": 1.9478174154605093e-05, + "loss": 0.3645164966583252, + "step": 1117 + }, + { + "epoch": 0.2969061213650246, + "grad_norm": 1.0428170431433939, + "learning_rate": 1.9476773405037836e-05, + "loss": 0.3714389503002167, + "step": 1118 + }, + { + "epoch": 0.29717169034656754, + "grad_norm": 1.1488169814057956, + "learning_rate": 1.9475370828460436e-05, + "loss": 0.39809900522232056, + "step": 1119 + }, + { + "epoch": 0.2974372593281105, + "grad_norm": 1.0702503358715294, + "learning_rate": 1.9473966425143292e-05, + "loss": 0.3698490262031555, + "step": 1120 + }, + { + "epoch": 0.29770282830965344, + "grad_norm": 1.0166542138266799, + "learning_rate": 1.947256019535716e-05, + "loss": 0.3072658181190491, + "step": 1121 + }, + { + "epoch": 0.2979683972911964, + "grad_norm": 1.0479599499698302, + "learning_rate": 1.947115213937314e-05, + "loss": 0.3294365406036377, + "step": 1122 + }, + { + "epoch": 0.29823396627273935, + "grad_norm": 1.007749929257712, + "learning_rate": 1.9469742257462684e-05, + "loss": 0.34933674335479736, + "step": 1123 + }, + { + "epoch": 0.2984995352542823, + "grad_norm": 1.133473784296847, + "learning_rate": 1.946833054989761e-05, + "loss": 0.34586772322654724, + "step": 1124 + }, + { + "epoch": 0.29876510423582525, + "grad_norm": 1.0225090189343862, + "learning_rate": 1.9466917016950076e-05, + "loss": 0.33158159255981445, + "step": 1125 + }, + { + "epoch": 0.2990306732173682, + "grad_norm": 1.0162208348084125, + "learning_rate": 1.946550165889259e-05, + "loss": 0.32665887475013733, + "step": 1126 + }, + { + "epoch": 0.29929624219891116, + "grad_norm": 1.1065475895733048, + "learning_rate": 1.946408447599802e-05, + "loss": 0.3333032429218292, + "step": 1127 + }, + { + "epoch": 0.2995618111804541, + "grad_norm": 1.0958997421479173, + "learning_rate": 1.9462665468539582e-05, + "loss": 0.3747228980064392, + "step": 1128 + }, + { + "epoch": 0.29982738016199706, + "grad_norm": 0.9447906277138843, + "learning_rate": 1.9461244636790845e-05, + "loss": 0.34040436148643494, + "step": 1129 + }, + { + "epoch": 0.30009294914354, + "grad_norm": 1.0062775259583612, + "learning_rate": 1.9459821981025723e-05, + "loss": 0.3279584050178528, + "step": 1130 + }, + { + "epoch": 0.30035851812508296, + "grad_norm": 1.136819731097147, + "learning_rate": 1.9458397501518496e-05, + "loss": 0.33507707715034485, + "step": 1131 + }, + { + "epoch": 0.30062408710662597, + "grad_norm": 0.9978141677663763, + "learning_rate": 1.945697119854378e-05, + "loss": 0.3511529862880707, + "step": 1132 + }, + { + "epoch": 0.3008896560881689, + "grad_norm": 1.1038696900269844, + "learning_rate": 1.945554307237655e-05, + "loss": 0.33260345458984375, + "step": 1133 + }, + { + "epoch": 0.3011552250697119, + "grad_norm": 1.1267244347055163, + "learning_rate": 1.9454113123292133e-05, + "loss": 0.37698423862457275, + "step": 1134 + }, + { + "epoch": 0.3014207940512548, + "grad_norm": 1.0482054605062838, + "learning_rate": 1.945268135156621e-05, + "loss": 0.34843316674232483, + "step": 1135 + }, + { + "epoch": 0.3016863630327978, + "grad_norm": 1.1518938911568848, + "learning_rate": 1.9451247757474805e-05, + "loss": 0.38723987340927124, + "step": 1136 + }, + { + "epoch": 0.30195193201434073, + "grad_norm": 1.0597410032778982, + "learning_rate": 1.9449812341294302e-05, + "loss": 0.3836795389652252, + "step": 1137 + }, + { + "epoch": 0.3022175009958837, + "grad_norm": 0.9828275773453091, + "learning_rate": 1.9448375103301424e-05, + "loss": 0.3362433612346649, + "step": 1138 + }, + { + "epoch": 0.30248306997742663, + "grad_norm": 1.0750556057741842, + "learning_rate": 1.9446936043773264e-05, + "loss": 0.3615792393684387, + "step": 1139 + }, + { + "epoch": 0.3027486389589696, + "grad_norm": 1.0233339727957385, + "learning_rate": 1.944549516298725e-05, + "loss": 0.33693915605545044, + "step": 1140 + }, + { + "epoch": 0.30301420794051254, + "grad_norm": 1.0074205515838075, + "learning_rate": 1.9444052461221167e-05, + "loss": 0.32611170411109924, + "step": 1141 + }, + { + "epoch": 0.3032797769220555, + "grad_norm": 1.0257687736898828, + "learning_rate": 1.9442607938753153e-05, + "loss": 0.3504132032394409, + "step": 1142 + }, + { + "epoch": 0.30354534590359844, + "grad_norm": 1.081217851264946, + "learning_rate": 1.944116159586169e-05, + "loss": 0.3598168194293976, + "step": 1143 + }, + { + "epoch": 0.3038109148851414, + "grad_norm": 1.025673115447757, + "learning_rate": 1.9439713432825625e-05, + "loss": 0.33447909355163574, + "step": 1144 + }, + { + "epoch": 0.30407648386668434, + "grad_norm": 0.9795127759513904, + "learning_rate": 1.943826344992414e-05, + "loss": 0.34026333689689636, + "step": 1145 + }, + { + "epoch": 0.30434205284822735, + "grad_norm": 1.070042442644686, + "learning_rate": 1.9436811647436772e-05, + "loss": 0.323203980922699, + "step": 1146 + }, + { + "epoch": 0.3046076218297703, + "grad_norm": 1.0588861737680213, + "learning_rate": 1.943535802564342e-05, + "loss": 0.332398921251297, + "step": 1147 + }, + { + "epoch": 0.30487319081131325, + "grad_norm": 1.175168490214782, + "learning_rate": 1.9433902584824316e-05, + "loss": 0.3882995545864105, + "step": 1148 + }, + { + "epoch": 0.3051387597928562, + "grad_norm": 1.093435738226519, + "learning_rate": 1.943244532526006e-05, + "loss": 0.35262739658355713, + "step": 1149 + }, + { + "epoch": 0.30540432877439916, + "grad_norm": 1.1043029209432185, + "learning_rate": 1.9430986247231586e-05, + "loss": 0.39694511890411377, + "step": 1150 + }, + { + "epoch": 0.3056698977559421, + "grad_norm": 1.1276348856512544, + "learning_rate": 1.9429525351020197e-05, + "loss": 0.3692580759525299, + "step": 1151 + }, + { + "epoch": 0.30593546673748506, + "grad_norm": 1.1284903074468042, + "learning_rate": 1.9428062636907526e-05, + "loss": 0.3685402572154999, + "step": 1152 + }, + { + "epoch": 0.306201035719028, + "grad_norm": 1.1120189967723886, + "learning_rate": 1.9426598105175575e-05, + "loss": 0.37557253241539, + "step": 1153 + }, + { + "epoch": 0.30646660470057097, + "grad_norm": 0.9544414078231065, + "learning_rate": 1.9425131756106687e-05, + "loss": 0.3323203921318054, + "step": 1154 + }, + { + "epoch": 0.3067321736821139, + "grad_norm": 1.085159318227953, + "learning_rate": 1.9423663589983554e-05, + "loss": 0.37262290716171265, + "step": 1155 + }, + { + "epoch": 0.30699774266365687, + "grad_norm": 1.138203326668225, + "learning_rate": 1.9422193607089224e-05, + "loss": 0.36621618270874023, + "step": 1156 + }, + { + "epoch": 0.3072633116451998, + "grad_norm": 1.0326975743253168, + "learning_rate": 1.942072180770709e-05, + "loss": 0.3844982385635376, + "step": 1157 + }, + { + "epoch": 0.3075288806267428, + "grad_norm": 0.9983252957319158, + "learning_rate": 1.94192481921209e-05, + "loss": 0.3229531943798065, + "step": 1158 + }, + { + "epoch": 0.3077944496082857, + "grad_norm": 1.0805327657153956, + "learning_rate": 1.9417772760614745e-05, + "loss": 0.34862661361694336, + "step": 1159 + }, + { + "epoch": 0.30806001858982873, + "grad_norm": 1.0329581193958253, + "learning_rate": 1.941629551347308e-05, + "loss": 0.35496509075164795, + "step": 1160 + }, + { + "epoch": 0.3083255875713717, + "grad_norm": 1.051163133463375, + "learning_rate": 1.9414816450980686e-05, + "loss": 0.3695065975189209, + "step": 1161 + }, + { + "epoch": 0.30859115655291464, + "grad_norm": 1.0254769076684076, + "learning_rate": 1.9413335573422723e-05, + "loss": 0.3472525179386139, + "step": 1162 + }, + { + "epoch": 0.3088567255344576, + "grad_norm": 1.008969123299064, + "learning_rate": 1.9411852881084683e-05, + "loss": 0.3447483479976654, + "step": 1163 + }, + { + "epoch": 0.30912229451600054, + "grad_norm": 0.9333424416365893, + "learning_rate": 1.941036837425241e-05, + "loss": 0.31047824025154114, + "step": 1164 + }, + { + "epoch": 0.3093878634975435, + "grad_norm": 1.0570471012152007, + "learning_rate": 1.9408882053212094e-05, + "loss": 0.34502410888671875, + "step": 1165 + }, + { + "epoch": 0.30965343247908644, + "grad_norm": 1.1849442151759089, + "learning_rate": 1.940739391825029e-05, + "loss": 0.3663109540939331, + "step": 1166 + }, + { + "epoch": 0.3099190014606294, + "grad_norm": 1.1136723468346887, + "learning_rate": 1.9405903969653887e-05, + "loss": 0.3635792136192322, + "step": 1167 + }, + { + "epoch": 0.31018457044217235, + "grad_norm": 1.0769441486287206, + "learning_rate": 1.940441220771013e-05, + "loss": 0.359528124332428, + "step": 1168 + }, + { + "epoch": 0.3104501394237153, + "grad_norm": 1.043185528474707, + "learning_rate": 1.9402918632706618e-05, + "loss": 0.32566630840301514, + "step": 1169 + }, + { + "epoch": 0.31071570840525825, + "grad_norm": 1.0286897614370414, + "learning_rate": 1.940142324493129e-05, + "loss": 0.34758460521698, + "step": 1170 + }, + { + "epoch": 0.3109812773868012, + "grad_norm": 1.0148570847451444, + "learning_rate": 1.9399926044672438e-05, + "loss": 0.3484055995941162, + "step": 1171 + }, + { + "epoch": 0.31124684636834415, + "grad_norm": 1.1806099587394492, + "learning_rate": 1.93984270322187e-05, + "loss": 0.41958773136138916, + "step": 1172 + }, + { + "epoch": 0.3115124153498871, + "grad_norm": 1.085314216258339, + "learning_rate": 1.9396926207859085e-05, + "loss": 0.3578398525714874, + "step": 1173 + }, + { + "epoch": 0.3117779843314301, + "grad_norm": 1.0721505496116728, + "learning_rate": 1.9395423571882917e-05, + "loss": 0.38140422105789185, + "step": 1174 + }, + { + "epoch": 0.31204355331297307, + "grad_norm": 1.1224661464468277, + "learning_rate": 1.9393919124579898e-05, + "loss": 0.3782861828804016, + "step": 1175 + }, + { + "epoch": 0.312309122294516, + "grad_norm": 1.0482874367837718, + "learning_rate": 1.939241286624006e-05, + "loss": 0.3211040496826172, + "step": 1176 + }, + { + "epoch": 0.31257469127605897, + "grad_norm": 0.9909015391020882, + "learning_rate": 1.9390904797153795e-05, + "loss": 0.3090783953666687, + "step": 1177 + }, + { + "epoch": 0.3128402602576019, + "grad_norm": 1.0203166402095418, + "learning_rate": 1.938939491761184e-05, + "loss": 0.3542889654636383, + "step": 1178 + }, + { + "epoch": 0.3131058292391449, + "grad_norm": 1.016567110972503, + "learning_rate": 1.9387883227905285e-05, + "loss": 0.369164377450943, + "step": 1179 + }, + { + "epoch": 0.3133713982206878, + "grad_norm": 1.1492868354113897, + "learning_rate": 1.9386369728325562e-05, + "loss": 0.35200801491737366, + "step": 1180 + }, + { + "epoch": 0.3136369672022308, + "grad_norm": 1.1332626811675575, + "learning_rate": 1.9384854419164454e-05, + "loss": 0.3696276843547821, + "step": 1181 + }, + { + "epoch": 0.31390253618377373, + "grad_norm": 0.9856387823657043, + "learning_rate": 1.9383337300714104e-05, + "loss": 0.3403652012348175, + "step": 1182 + }, + { + "epoch": 0.3141681051653167, + "grad_norm": 0.9608300998441986, + "learning_rate": 1.9381818373266987e-05, + "loss": 0.3307063579559326, + "step": 1183 + }, + { + "epoch": 0.31443367414685963, + "grad_norm": 1.002604353314113, + "learning_rate": 1.9380297637115933e-05, + "loss": 0.3223465085029602, + "step": 1184 + }, + { + "epoch": 0.3146992431284026, + "grad_norm": 1.1668926481270334, + "learning_rate": 1.9378775092554124e-05, + "loss": 0.4013838768005371, + "step": 1185 + }, + { + "epoch": 0.31496481210994554, + "grad_norm": 1.2376602965184098, + "learning_rate": 1.9377250739875095e-05, + "loss": 0.3596574664115906, + "step": 1186 + }, + { + "epoch": 0.3152303810914885, + "grad_norm": 1.0683740579575798, + "learning_rate": 1.937572457937271e-05, + "loss": 0.41639968752861023, + "step": 1187 + }, + { + "epoch": 0.3154959500730315, + "grad_norm": 0.950341293536979, + "learning_rate": 1.9374196611341212e-05, + "loss": 0.3001318573951721, + "step": 1188 + }, + { + "epoch": 0.31576151905457445, + "grad_norm": 1.0390515723802394, + "learning_rate": 1.937266683607516e-05, + "loss": 0.33238667249679565, + "step": 1189 + }, + { + "epoch": 0.3160270880361174, + "grad_norm": 1.0559788990716998, + "learning_rate": 1.9371135253869483e-05, + "loss": 0.33638086915016174, + "step": 1190 + }, + { + "epoch": 0.31629265701766035, + "grad_norm": 1.0736881782093415, + "learning_rate": 1.9369601865019452e-05, + "loss": 0.34445878863334656, + "step": 1191 + }, + { + "epoch": 0.3165582259992033, + "grad_norm": 1.116672373820781, + "learning_rate": 1.9368066669820684e-05, + "loss": 0.33554553985595703, + "step": 1192 + }, + { + "epoch": 0.31682379498074625, + "grad_norm": 1.2940820576034424, + "learning_rate": 1.936652966856915e-05, + "loss": 0.3668493628501892, + "step": 1193 + }, + { + "epoch": 0.3170893639622892, + "grad_norm": 1.1460266164336763, + "learning_rate": 1.9364990861561163e-05, + "loss": 0.3813396990299225, + "step": 1194 + }, + { + "epoch": 0.31735493294383216, + "grad_norm": 1.048871056336621, + "learning_rate": 1.936345024909339e-05, + "loss": 0.33625900745391846, + "step": 1195 + }, + { + "epoch": 0.3176205019253751, + "grad_norm": 1.0238786804477913, + "learning_rate": 1.9361907831462836e-05, + "loss": 0.31131428480148315, + "step": 1196 + }, + { + "epoch": 0.31788607090691806, + "grad_norm": 0.9751456398999766, + "learning_rate": 1.936036360896687e-05, + "loss": 0.32571589946746826, + "step": 1197 + }, + { + "epoch": 0.318151639888461, + "grad_norm": 1.1296061558872548, + "learning_rate": 1.9358817581903193e-05, + "loss": 0.36207717657089233, + "step": 1198 + }, + { + "epoch": 0.31841720887000396, + "grad_norm": 1.062344543153862, + "learning_rate": 1.9357269750569864e-05, + "loss": 0.3743855059146881, + "step": 1199 + }, + { + "epoch": 0.3186827778515469, + "grad_norm": 1.1254060799620074, + "learning_rate": 1.9355720115265283e-05, + "loss": 0.3862137794494629, + "step": 1200 + }, + { + "epoch": 0.31894834683308987, + "grad_norm": 1.1135871061204583, + "learning_rate": 1.935416867628821e-05, + "loss": 0.33353424072265625, + "step": 1201 + }, + { + "epoch": 0.3192139158146329, + "grad_norm": 9.759113022509682, + "learning_rate": 1.9352615433937733e-05, + "loss": 0.3277953267097473, + "step": 1202 + }, + { + "epoch": 0.3194794847961758, + "grad_norm": 1.104737565124737, + "learning_rate": 1.9351060388513304e-05, + "loss": 0.38247692584991455, + "step": 1203 + }, + { + "epoch": 0.3197450537777188, + "grad_norm": 1.0645482624060865, + "learning_rate": 1.9349503540314724e-05, + "loss": 0.3330709934234619, + "step": 1204 + }, + { + "epoch": 0.32001062275926173, + "grad_norm": 1.1382102351287038, + "learning_rate": 1.9347944889642125e-05, + "loss": 0.3809449076652527, + "step": 1205 + }, + { + "epoch": 0.3202761917408047, + "grad_norm": 0.9591245399492223, + "learning_rate": 1.9346384436796e-05, + "loss": 0.33623188734054565, + "step": 1206 + }, + { + "epoch": 0.32054176072234764, + "grad_norm": 1.0414583731283242, + "learning_rate": 1.9344822182077184e-05, + "loss": 0.35465264320373535, + "step": 1207 + }, + { + "epoch": 0.3208073297038906, + "grad_norm": 1.0419539507532576, + "learning_rate": 1.9343258125786866e-05, + "loss": 0.3532233238220215, + "step": 1208 + }, + { + "epoch": 0.32107289868543354, + "grad_norm": 0.972348986123494, + "learning_rate": 1.9341692268226572e-05, + "loss": 0.3498903512954712, + "step": 1209 + }, + { + "epoch": 0.3213384676669765, + "grad_norm": 1.057700016356479, + "learning_rate": 1.9340124609698185e-05, + "loss": 0.36124879121780396, + "step": 1210 + }, + { + "epoch": 0.32160403664851944, + "grad_norm": 1.1891126233384992, + "learning_rate": 1.933855515050393e-05, + "loss": 0.38535434007644653, + "step": 1211 + }, + { + "epoch": 0.3218696056300624, + "grad_norm": 1.1201736183139164, + "learning_rate": 1.9336983890946383e-05, + "loss": 0.39999911189079285, + "step": 1212 + }, + { + "epoch": 0.32213517461160535, + "grad_norm": 1.1396977359685507, + "learning_rate": 1.9335410831328457e-05, + "loss": 0.3519791066646576, + "step": 1213 + }, + { + "epoch": 0.3224007435931483, + "grad_norm": 1.1624196201646915, + "learning_rate": 1.9333835971953424e-05, + "loss": 0.35882368683815, + "step": 1214 + }, + { + "epoch": 0.32266631257469125, + "grad_norm": 1.2089532713833613, + "learning_rate": 1.93322593131249e-05, + "loss": 0.36132001876831055, + "step": 1215 + }, + { + "epoch": 0.32293188155623426, + "grad_norm": 1.0741169297687752, + "learning_rate": 1.9330680855146845e-05, + "loss": 0.36840832233428955, + "step": 1216 + }, + { + "epoch": 0.3231974505377772, + "grad_norm": 1.1553079333487188, + "learning_rate": 1.9329100598323563e-05, + "loss": 0.3755963444709778, + "step": 1217 + }, + { + "epoch": 0.32346301951932016, + "grad_norm": 1.1792888887437214, + "learning_rate": 1.9327518542959717e-05, + "loss": 0.400601863861084, + "step": 1218 + }, + { + "epoch": 0.3237285885008631, + "grad_norm": 1.0342294479515497, + "learning_rate": 1.93259346893603e-05, + "loss": 0.3100128769874573, + "step": 1219 + }, + { + "epoch": 0.32399415748240606, + "grad_norm": 1.0633052239431813, + "learning_rate": 1.9324349037830665e-05, + "loss": 0.3439880609512329, + "step": 1220 + }, + { + "epoch": 0.324259726463949, + "grad_norm": 1.1634088151631976, + "learning_rate": 1.9322761588676505e-05, + "loss": 0.3612631559371948, + "step": 1221 + }, + { + "epoch": 0.32452529544549197, + "grad_norm": 1.1292400605185824, + "learning_rate": 1.9321172342203863e-05, + "loss": 0.38202327489852905, + "step": 1222 + }, + { + "epoch": 0.3247908644270349, + "grad_norm": 1.0253004653890312, + "learning_rate": 1.9319581298719127e-05, + "loss": 0.3405265808105469, + "step": 1223 + }, + { + "epoch": 0.32505643340857787, + "grad_norm": 1.1499639639111883, + "learning_rate": 1.931798845852903e-05, + "loss": 0.4110907018184662, + "step": 1224 + }, + { + "epoch": 0.3253220023901208, + "grad_norm": 1.2758168253168263, + "learning_rate": 1.9316393821940654e-05, + "loss": 0.3007548451423645, + "step": 1225 + }, + { + "epoch": 0.3255875713716638, + "grad_norm": 2.5438383009304673, + "learning_rate": 1.9314797389261426e-05, + "loss": 0.32769858837127686, + "step": 1226 + }, + { + "epoch": 0.3258531403532067, + "grad_norm": 1.0370704182885782, + "learning_rate": 1.931319916079912e-05, + "loss": 0.3619830310344696, + "step": 1227 + }, + { + "epoch": 0.3261187093347497, + "grad_norm": 1.2983573666738066, + "learning_rate": 1.9311599136861853e-05, + "loss": 0.3470210134983063, + "step": 1228 + }, + { + "epoch": 0.32638427831629263, + "grad_norm": 1.145435126731274, + "learning_rate": 1.9309997317758093e-05, + "loss": 0.3471665382385254, + "step": 1229 + }, + { + "epoch": 0.32664984729783564, + "grad_norm": 1.0757592201920594, + "learning_rate": 1.930839370379665e-05, + "loss": 0.3717760443687439, + "step": 1230 + }, + { + "epoch": 0.3269154162793786, + "grad_norm": 1.1173068015382108, + "learning_rate": 1.9306788295286687e-05, + "loss": 0.37279975414276123, + "step": 1231 + }, + { + "epoch": 0.32718098526092154, + "grad_norm": 1.1523781527891401, + "learning_rate": 1.93051810925377e-05, + "loss": 0.3884522020816803, + "step": 1232 + }, + { + "epoch": 0.3274465542424645, + "grad_norm": 1.1200431222189422, + "learning_rate": 1.9303572095859545e-05, + "loss": 0.4277604818344116, + "step": 1233 + }, + { + "epoch": 0.32771212322400745, + "grad_norm": 1.1197023145386935, + "learning_rate": 1.9301961305562415e-05, + "loss": 0.2888818681240082, + "step": 1234 + }, + { + "epoch": 0.3279776922055504, + "grad_norm": 1.0271311895282893, + "learning_rate": 1.9300348721956854e-05, + "loss": 0.3134511709213257, + "step": 1235 + }, + { + "epoch": 0.32824326118709335, + "grad_norm": 1.0800984792046815, + "learning_rate": 1.9298734345353745e-05, + "loss": 0.38525280356407166, + "step": 1236 + }, + { + "epoch": 0.3285088301686363, + "grad_norm": 1.134011749036063, + "learning_rate": 1.9297118176064324e-05, + "loss": 0.3692918121814728, + "step": 1237 + }, + { + "epoch": 0.32877439915017925, + "grad_norm": 1.0348260315377988, + "learning_rate": 1.9295500214400165e-05, + "loss": 0.3443421721458435, + "step": 1238 + }, + { + "epoch": 0.3290399681317222, + "grad_norm": 1.0129455663017488, + "learning_rate": 1.9293880460673197e-05, + "loss": 0.3228621184825897, + "step": 1239 + }, + { + "epoch": 0.32930553711326516, + "grad_norm": 1.0116024279908165, + "learning_rate": 1.9292258915195688e-05, + "loss": 0.330943763256073, + "step": 1240 + }, + { + "epoch": 0.3295711060948081, + "grad_norm": 1.1814587344422625, + "learning_rate": 1.929063557828025e-05, + "loss": 0.356637567281723, + "step": 1241 + }, + { + "epoch": 0.32983667507635106, + "grad_norm": 0.9888159780201056, + "learning_rate": 1.9289010450239843e-05, + "loss": 0.3481113910675049, + "step": 1242 + }, + { + "epoch": 0.330102244057894, + "grad_norm": 1.1876931030431213, + "learning_rate": 1.928738353138778e-05, + "loss": 0.36579906940460205, + "step": 1243 + }, + { + "epoch": 0.330367813039437, + "grad_norm": 1.0281454378567854, + "learning_rate": 1.9285754822037705e-05, + "loss": 0.33025234937667847, + "step": 1244 + }, + { + "epoch": 0.33063338202097997, + "grad_norm": 1.0936673160473642, + "learning_rate": 1.9284124322503613e-05, + "loss": 0.34848469495773315, + "step": 1245 + }, + { + "epoch": 0.3308989510025229, + "grad_norm": 1.1232405017277023, + "learning_rate": 1.928249203309985e-05, + "loss": 0.3523876368999481, + "step": 1246 + }, + { + "epoch": 0.3311645199840659, + "grad_norm": 1.140153458583263, + "learning_rate": 1.92808579541411e-05, + "loss": 0.3695565462112427, + "step": 1247 + }, + { + "epoch": 0.3314300889656088, + "grad_norm": 1.0267337296320096, + "learning_rate": 1.9279222085942396e-05, + "loss": 0.3557945191860199, + "step": 1248 + }, + { + "epoch": 0.3316956579471518, + "grad_norm": 1.0261133198060035, + "learning_rate": 1.9277584428819113e-05, + "loss": 0.3015502989292145, + "step": 1249 + }, + { + "epoch": 0.33196122692869473, + "grad_norm": 0.9384869314897972, + "learning_rate": 1.9275944983086964e-05, + "loss": 0.31333664059638977, + "step": 1250 + }, + { + "epoch": 0.3322267959102377, + "grad_norm": 1.103154580638619, + "learning_rate": 1.9274303749062028e-05, + "loss": 0.36595287919044495, + "step": 1251 + }, + { + "epoch": 0.33249236489178063, + "grad_norm": 1.0573816777840739, + "learning_rate": 1.9272660727060705e-05, + "loss": 0.3400266170501709, + "step": 1252 + }, + { + "epoch": 0.3327579338733236, + "grad_norm": 1.0994664368429343, + "learning_rate": 1.927101591739976e-05, + "loss": 0.3642529547214508, + "step": 1253 + }, + { + "epoch": 0.33302350285486654, + "grad_norm": 1.08059410662081, + "learning_rate": 1.926936932039628e-05, + "loss": 0.3418777287006378, + "step": 1254 + }, + { + "epoch": 0.3332890718364095, + "grad_norm": 1.0881678177934593, + "learning_rate": 1.9267720936367723e-05, + "loss": 0.33382388949394226, + "step": 1255 + }, + { + "epoch": 0.33355464081795244, + "grad_norm": 1.1227567600503816, + "learning_rate": 1.926607076563187e-05, + "loss": 0.36257779598236084, + "step": 1256 + }, + { + "epoch": 0.3338202097994954, + "grad_norm": 1.5546101865012443, + "learning_rate": 1.926441880850686e-05, + "loss": 0.3018002510070801, + "step": 1257 + }, + { + "epoch": 0.3340857787810384, + "grad_norm": 1.0263747105982135, + "learning_rate": 1.9262765065311165e-05, + "loss": 0.3373662233352661, + "step": 1258 + }, + { + "epoch": 0.33435134776258135, + "grad_norm": 1.0001644182280367, + "learning_rate": 1.9261109536363613e-05, + "loss": 0.3555397391319275, + "step": 1259 + }, + { + "epoch": 0.3346169167441243, + "grad_norm": 1.1519069907937776, + "learning_rate": 1.925945222198336e-05, + "loss": 0.3004256784915924, + "step": 1260 + }, + { + "epoch": 0.33488248572566726, + "grad_norm": 2.328412351070072, + "learning_rate": 1.925779312248993e-05, + "loss": 0.33299940824508667, + "step": 1261 + }, + { + "epoch": 0.3351480547072102, + "grad_norm": 1.0617967738999583, + "learning_rate": 1.9256132238203166e-05, + "loss": 0.3715725541114807, + "step": 1262 + }, + { + "epoch": 0.33541362368875316, + "grad_norm": 1.0140049717249513, + "learning_rate": 1.9254469569443274e-05, + "loss": 0.35133951902389526, + "step": 1263 + }, + { + "epoch": 0.3356791926702961, + "grad_norm": 0.9980129680534503, + "learning_rate": 1.92528051165308e-05, + "loss": 0.3328818380832672, + "step": 1264 + }, + { + "epoch": 0.33594476165183906, + "grad_norm": 1.0764552464682182, + "learning_rate": 1.925113887978662e-05, + "loss": 0.3665468692779541, + "step": 1265 + }, + { + "epoch": 0.336210330633382, + "grad_norm": 1.0446302802374996, + "learning_rate": 1.9249470859531976e-05, + "loss": 0.3489571511745453, + "step": 1266 + }, + { + "epoch": 0.33647589961492497, + "grad_norm": 1.0629721705272823, + "learning_rate": 1.9247801056088433e-05, + "loss": 0.30038982629776, + "step": 1267 + }, + { + "epoch": 0.3367414685964679, + "grad_norm": 1.1798569183028156, + "learning_rate": 1.9246129469777918e-05, + "loss": 0.4163355827331543, + "step": 1268 + }, + { + "epoch": 0.33700703757801087, + "grad_norm": 1.0428552063046848, + "learning_rate": 1.924445610092269e-05, + "loss": 0.33687612414360046, + "step": 1269 + }, + { + "epoch": 0.3372726065595538, + "grad_norm": 1.0466869124167506, + "learning_rate": 1.924278094984535e-05, + "loss": 0.3448297679424286, + "step": 1270 + }, + { + "epoch": 0.3375381755410968, + "grad_norm": 1.0979384797680924, + "learning_rate": 1.9241104016868853e-05, + "loss": 0.35257208347320557, + "step": 1271 + }, + { + "epoch": 0.3378037445226398, + "grad_norm": 1.0794393535441016, + "learning_rate": 1.9239425302316487e-05, + "loss": 0.34880566596984863, + "step": 1272 + }, + { + "epoch": 0.33806931350418273, + "grad_norm": 1.1081978913885613, + "learning_rate": 1.9237744806511895e-05, + "loss": 0.33643782138824463, + "step": 1273 + }, + { + "epoch": 0.3383348824857257, + "grad_norm": 1.0185962864877929, + "learning_rate": 1.9236062529779057e-05, + "loss": 0.32345050573349, + "step": 1274 + }, + { + "epoch": 0.33860045146726864, + "grad_norm": 1.0547576972102612, + "learning_rate": 1.9234378472442286e-05, + "loss": 0.33983978629112244, + "step": 1275 + }, + { + "epoch": 0.3388660204488116, + "grad_norm": 1.0305326470674594, + "learning_rate": 1.923269263482626e-05, + "loss": 0.32825571298599243, + "step": 1276 + }, + { + "epoch": 0.33913158943035454, + "grad_norm": 1.0836151603415423, + "learning_rate": 1.923100501725598e-05, + "loss": 0.3434044122695923, + "step": 1277 + }, + { + "epoch": 0.3393971584118975, + "grad_norm": 1.1293248576076373, + "learning_rate": 1.9229315620056805e-05, + "loss": 0.3463204503059387, + "step": 1278 + }, + { + "epoch": 0.33966272739344044, + "grad_norm": 1.0476463818396518, + "learning_rate": 1.9227624443554425e-05, + "loss": 0.3608240485191345, + "step": 1279 + }, + { + "epoch": 0.3399282963749834, + "grad_norm": 1.111712780266586, + "learning_rate": 1.9225931488074882e-05, + "loss": 0.36131763458251953, + "step": 1280 + }, + { + "epoch": 0.34019386535652635, + "grad_norm": 0.9948222919660873, + "learning_rate": 1.922423675394456e-05, + "loss": 0.3270101547241211, + "step": 1281 + }, + { + "epoch": 0.3404594343380693, + "grad_norm": 1.1047356141038558, + "learning_rate": 1.922254024149018e-05, + "loss": 0.3551778495311737, + "step": 1282 + }, + { + "epoch": 0.34072500331961225, + "grad_norm": 1.1057498393465535, + "learning_rate": 1.9220841951038815e-05, + "loss": 0.3686622381210327, + "step": 1283 + }, + { + "epoch": 0.3409905723011552, + "grad_norm": 1.0810198379819234, + "learning_rate": 1.921914188291787e-05, + "loss": 0.35161536931991577, + "step": 1284 + }, + { + "epoch": 0.34125614128269816, + "grad_norm": 1.1489267376414198, + "learning_rate": 1.92174400374551e-05, + "loss": 0.3549870550632477, + "step": 1285 + }, + { + "epoch": 0.34152171026424116, + "grad_norm": 1.0904860537070935, + "learning_rate": 1.9215736414978593e-05, + "loss": 0.36780738830566406, + "step": 1286 + }, + { + "epoch": 0.3417872792457841, + "grad_norm": 1.132171748367688, + "learning_rate": 1.9214031015816803e-05, + "loss": 0.36060047149658203, + "step": 1287 + }, + { + "epoch": 0.34205284822732707, + "grad_norm": 1.0753334155968608, + "learning_rate": 1.9212323840298502e-05, + "loss": 0.32578715682029724, + "step": 1288 + }, + { + "epoch": 0.34231841720887, + "grad_norm": 1.0380534929488934, + "learning_rate": 1.9210614888752813e-05, + "loss": 0.3505493402481079, + "step": 1289 + }, + { + "epoch": 0.34258398619041297, + "grad_norm": 1.0227959332298084, + "learning_rate": 1.9208904161509203e-05, + "loss": 0.32681795954704285, + "step": 1290 + }, + { + "epoch": 0.3428495551719559, + "grad_norm": 1.0227973616384467, + "learning_rate": 1.9207191658897473e-05, + "loss": 0.34808459877967834, + "step": 1291 + }, + { + "epoch": 0.3431151241534989, + "grad_norm": 1.0810974703490968, + "learning_rate": 1.920547738124779e-05, + "loss": 0.3588678240776062, + "step": 1292 + }, + { + "epoch": 0.3433806931350418, + "grad_norm": 1.2030053357742059, + "learning_rate": 1.9203761328890626e-05, + "loss": 0.3528832495212555, + "step": 1293 + }, + { + "epoch": 0.3436462621165848, + "grad_norm": 1.35729757891191, + "learning_rate": 1.9202043502156833e-05, + "loss": 0.33549001812934875, + "step": 1294 + }, + { + "epoch": 0.34391183109812773, + "grad_norm": 1.0986147605525078, + "learning_rate": 1.920032390137758e-05, + "loss": 0.3466021418571472, + "step": 1295 + }, + { + "epoch": 0.3441774000796707, + "grad_norm": 1.0492164389172054, + "learning_rate": 1.9198602526884388e-05, + "loss": 0.35646146535873413, + "step": 1296 + }, + { + "epoch": 0.34444296906121363, + "grad_norm": 1.0348991752364494, + "learning_rate": 1.9196879379009112e-05, + "loss": 0.3442128300666809, + "step": 1297 + }, + { + "epoch": 0.3447085380427566, + "grad_norm": 1.083291442034964, + "learning_rate": 1.9195154458083962e-05, + "loss": 0.3854391872882843, + "step": 1298 + }, + { + "epoch": 0.34497410702429954, + "grad_norm": 1.202325074766952, + "learning_rate": 1.9193427764441477e-05, + "loss": 0.376137375831604, + "step": 1299 + }, + { + "epoch": 0.34523967600584254, + "grad_norm": 1.1591691335477168, + "learning_rate": 1.9191699298414547e-05, + "loss": 0.3115769028663635, + "step": 1300 + }, + { + "epoch": 0.3455052449873855, + "grad_norm": 1.125127529667975, + "learning_rate": 1.9189969060336396e-05, + "loss": 0.32553282380104065, + "step": 1301 + }, + { + "epoch": 0.34577081396892845, + "grad_norm": 1.2442677252107, + "learning_rate": 1.9188237050540597e-05, + "loss": 0.39529356360435486, + "step": 1302 + }, + { + "epoch": 0.3460363829504714, + "grad_norm": 1.016155926476122, + "learning_rate": 1.9186503269361063e-05, + "loss": 0.3027458190917969, + "step": 1303 + }, + { + "epoch": 0.34630195193201435, + "grad_norm": 1.2178145504108082, + "learning_rate": 1.918476771713204e-05, + "loss": 0.39317795634269714, + "step": 1304 + }, + { + "epoch": 0.3465675209135573, + "grad_norm": 1.1358253756284789, + "learning_rate": 1.918303039418813e-05, + "loss": 0.3730325698852539, + "step": 1305 + }, + { + "epoch": 0.34683308989510025, + "grad_norm": 1.0835224567793253, + "learning_rate": 1.918129130086426e-05, + "loss": 0.34862780570983887, + "step": 1306 + }, + { + "epoch": 0.3470986588766432, + "grad_norm": 1.106131252801308, + "learning_rate": 1.9179550437495707e-05, + "loss": 0.32139018177986145, + "step": 1307 + }, + { + "epoch": 0.34736422785818616, + "grad_norm": 1.118754726003564, + "learning_rate": 1.91778078044181e-05, + "loss": 0.37246090173721313, + "step": 1308 + }, + { + "epoch": 0.3476297968397291, + "grad_norm": 1.035507147337034, + "learning_rate": 1.9176063401967386e-05, + "loss": 0.30985957384109497, + "step": 1309 + }, + { + "epoch": 0.34789536582127206, + "grad_norm": 1.1303664709170593, + "learning_rate": 1.917431723047987e-05, + "loss": 0.3713758587837219, + "step": 1310 + }, + { + "epoch": 0.348160934802815, + "grad_norm": 1.076206973404712, + "learning_rate": 1.9172569290292193e-05, + "loss": 0.3465833067893982, + "step": 1311 + }, + { + "epoch": 0.34842650378435797, + "grad_norm": 1.1789932919731194, + "learning_rate": 1.917081958174134e-05, + "loss": 0.34807220101356506, + "step": 1312 + }, + { + "epoch": 0.3486920727659009, + "grad_norm": 1.0178456651378849, + "learning_rate": 1.9169068105164627e-05, + "loss": 0.3369640111923218, + "step": 1313 + }, + { + "epoch": 0.3489576417474439, + "grad_norm": 1.1714339652663717, + "learning_rate": 1.9167314860899724e-05, + "loss": 0.3521544337272644, + "step": 1314 + }, + { + "epoch": 0.3492232107289869, + "grad_norm": 0.9756562815370131, + "learning_rate": 1.9165559849284635e-05, + "loss": 0.3256300687789917, + "step": 1315 + }, + { + "epoch": 0.34948877971052983, + "grad_norm": 1.1173269078403432, + "learning_rate": 1.9163803070657706e-05, + "loss": 0.32401931285858154, + "step": 1316 + }, + { + "epoch": 0.3497543486920728, + "grad_norm": 1.104564951170044, + "learning_rate": 1.916204452535762e-05, + "loss": 0.372749924659729, + "step": 1317 + }, + { + "epoch": 0.35001991767361573, + "grad_norm": 1.053240444697934, + "learning_rate": 1.9160284213723407e-05, + "loss": 0.35853224992752075, + "step": 1318 + }, + { + "epoch": 0.3502854866551587, + "grad_norm": 1.048325144857422, + "learning_rate": 1.9158522136094433e-05, + "loss": 0.32850801944732666, + "step": 1319 + }, + { + "epoch": 0.35055105563670164, + "grad_norm": 1.1274703494911789, + "learning_rate": 1.9156758292810404e-05, + "loss": 0.3548474907875061, + "step": 1320 + }, + { + "epoch": 0.3508166246182446, + "grad_norm": 1.10371779317482, + "learning_rate": 1.9154992684211372e-05, + "loss": 0.38709041476249695, + "step": 1321 + }, + { + "epoch": 0.35108219359978754, + "grad_norm": 1.1369910570736041, + "learning_rate": 1.9153225310637726e-05, + "loss": 0.40369266271591187, + "step": 1322 + }, + { + "epoch": 0.3513477625813305, + "grad_norm": 1.179710362637603, + "learning_rate": 1.9151456172430186e-05, + "loss": 0.3570155203342438, + "step": 1323 + }, + { + "epoch": 0.35161333156287344, + "grad_norm": 1.0315056954444073, + "learning_rate": 1.9149685269929833e-05, + "loss": 0.34426411986351013, + "step": 1324 + }, + { + "epoch": 0.3518789005444164, + "grad_norm": 1.0980268876500368, + "learning_rate": 1.9147912603478066e-05, + "loss": 0.35666006803512573, + "step": 1325 + }, + { + "epoch": 0.35214446952595935, + "grad_norm": 1.0320732816254274, + "learning_rate": 1.9146138173416643e-05, + "loss": 0.36225512623786926, + "step": 1326 + }, + { + "epoch": 0.3524100385075023, + "grad_norm": 1.0499655117353668, + "learning_rate": 1.9144361980087643e-05, + "loss": 0.3312349319458008, + "step": 1327 + }, + { + "epoch": 0.3526756074890453, + "grad_norm": 1.0828461821707789, + "learning_rate": 1.9142584023833506e-05, + "loss": 0.3590523302555084, + "step": 1328 + }, + { + "epoch": 0.35294117647058826, + "grad_norm": 1.2432343198034153, + "learning_rate": 1.9140804304996997e-05, + "loss": 0.341480016708374, + "step": 1329 + }, + { + "epoch": 0.3532067454521312, + "grad_norm": 1.0165353851066345, + "learning_rate": 1.913902282392122e-05, + "loss": 0.37246501445770264, + "step": 1330 + }, + { + "epoch": 0.35347231443367416, + "grad_norm": 1.0959834963108057, + "learning_rate": 1.913723958094963e-05, + "loss": 0.33834031224250793, + "step": 1331 + }, + { + "epoch": 0.3537378834152171, + "grad_norm": 1.0066884605687934, + "learning_rate": 1.913545457642601e-05, + "loss": 0.29285067319869995, + "step": 1332 + }, + { + "epoch": 0.35400345239676007, + "grad_norm": 1.0768479974972798, + "learning_rate": 1.913366781069449e-05, + "loss": 0.2903720736503601, + "step": 1333 + }, + { + "epoch": 0.354269021378303, + "grad_norm": 1.1311334028851072, + "learning_rate": 1.913187928409954e-05, + "loss": 0.36428314447402954, + "step": 1334 + }, + { + "epoch": 0.35453459035984597, + "grad_norm": 1.0473346547130091, + "learning_rate": 1.9130088996985967e-05, + "loss": 0.3379477560520172, + "step": 1335 + }, + { + "epoch": 0.3548001593413889, + "grad_norm": 1.0963924260325884, + "learning_rate": 1.912829694969891e-05, + "loss": 0.35286659002304077, + "step": 1336 + }, + { + "epoch": 0.3550657283229319, + "grad_norm": 1.1930831242867357, + "learning_rate": 1.9126503142583864e-05, + "loss": 0.3670174479484558, + "step": 1337 + }, + { + "epoch": 0.3553312973044748, + "grad_norm": 1.1294601866875984, + "learning_rate": 1.9124707575986642e-05, + "loss": 0.3422902226448059, + "step": 1338 + }, + { + "epoch": 0.3555968662860178, + "grad_norm": 0.9984746022499613, + "learning_rate": 1.912291025025342e-05, + "loss": 0.29778385162353516, + "step": 1339 + }, + { + "epoch": 0.35586243526756073, + "grad_norm": 1.1907673127670892, + "learning_rate": 1.91211111657307e-05, + "loss": 0.36249661445617676, + "step": 1340 + }, + { + "epoch": 0.3561280042491037, + "grad_norm": 1.1054946723600563, + "learning_rate": 1.9119310322765315e-05, + "loss": 0.340925395488739, + "step": 1341 + }, + { + "epoch": 0.3563935732306467, + "grad_norm": 1.1964466720866056, + "learning_rate": 1.9117507721704455e-05, + "loss": 0.35674089193344116, + "step": 1342 + }, + { + "epoch": 0.35665914221218964, + "grad_norm": 1.1077144979302902, + "learning_rate": 1.9115703362895636e-05, + "loss": 0.3602067828178406, + "step": 1343 + }, + { + "epoch": 0.3569247111937326, + "grad_norm": 1.1669501112510636, + "learning_rate": 1.9113897246686716e-05, + "loss": 0.35211697220802307, + "step": 1344 + }, + { + "epoch": 0.35719028017527554, + "grad_norm": 1.1098565168791754, + "learning_rate": 1.91120893734259e-05, + "loss": 0.3706115484237671, + "step": 1345 + }, + { + "epoch": 0.3574558491568185, + "grad_norm": 0.955637908965499, + "learning_rate": 1.9110279743461717e-05, + "loss": 0.3365110754966736, + "step": 1346 + }, + { + "epoch": 0.35772141813836145, + "grad_norm": 1.2071736385011052, + "learning_rate": 1.9108468357143047e-05, + "loss": 0.40012121200561523, + "step": 1347 + }, + { + "epoch": 0.3579869871199044, + "grad_norm": 1.1409634140225444, + "learning_rate": 1.91066552148191e-05, + "loss": 0.4003351926803589, + "step": 1348 + }, + { + "epoch": 0.35825255610144735, + "grad_norm": 1.0613274196364288, + "learning_rate": 1.910484031683943e-05, + "loss": 0.3574616014957428, + "step": 1349 + }, + { + "epoch": 0.3585181250829903, + "grad_norm": 1.0904662824068834, + "learning_rate": 1.910302366355393e-05, + "loss": 0.3345073461532593, + "step": 1350 + }, + { + "epoch": 0.35878369406453325, + "grad_norm": 1.0532412802136695, + "learning_rate": 1.910120525531283e-05, + "loss": 0.3467676341533661, + "step": 1351 + }, + { + "epoch": 0.3590492630460762, + "grad_norm": 1.0529131768701299, + "learning_rate": 1.9099385092466695e-05, + "loss": 0.32433655858039856, + "step": 1352 + }, + { + "epoch": 0.35931483202761916, + "grad_norm": 1.0442908892383016, + "learning_rate": 1.909756317536643e-05, + "loss": 0.3366447985172272, + "step": 1353 + }, + { + "epoch": 0.3595804010091621, + "grad_norm": 1.0770054348386777, + "learning_rate": 1.909573950436328e-05, + "loss": 0.310118168592453, + "step": 1354 + }, + { + "epoch": 0.35984596999070506, + "grad_norm": 1.4782002462322321, + "learning_rate": 1.909391407980883e-05, + "loss": 0.3503451943397522, + "step": 1355 + }, + { + "epoch": 0.36011153897224807, + "grad_norm": 1.0889726916887852, + "learning_rate": 1.9092086902054996e-05, + "loss": 0.3375343978404999, + "step": 1356 + }, + { + "epoch": 0.360377107953791, + "grad_norm": 0.9368081121032712, + "learning_rate": 1.909025797145404e-05, + "loss": 0.3056451082229614, + "step": 1357 + }, + { + "epoch": 0.360642676935334, + "grad_norm": 0.9554491579006472, + "learning_rate": 1.9088427288358556e-05, + "loss": 0.3063391447067261, + "step": 1358 + }, + { + "epoch": 0.3609082459168769, + "grad_norm": 0.9358824747825566, + "learning_rate": 1.908659485312148e-05, + "loss": 0.3055405616760254, + "step": 1359 + }, + { + "epoch": 0.3611738148984199, + "grad_norm": 1.1828231629690173, + "learning_rate": 1.908476066609608e-05, + "loss": 0.38323235511779785, + "step": 1360 + }, + { + "epoch": 0.36143938387996283, + "grad_norm": 1.0971994038941366, + "learning_rate": 1.908292472763597e-05, + "loss": 0.33526092767715454, + "step": 1361 + }, + { + "epoch": 0.3617049528615058, + "grad_norm": 1.0449346093027478, + "learning_rate": 1.9081087038095094e-05, + "loss": 0.34485238790512085, + "step": 1362 + }, + { + "epoch": 0.36197052184304873, + "grad_norm": 1.0943982229718532, + "learning_rate": 1.907924759782774e-05, + "loss": 0.2963239252567291, + "step": 1363 + }, + { + "epoch": 0.3622360908245917, + "grad_norm": 1.2033822452903298, + "learning_rate": 1.9077406407188532e-05, + "loss": 0.3536864221096039, + "step": 1364 + }, + { + "epoch": 0.36250165980613464, + "grad_norm": 1.1739216512613182, + "learning_rate": 1.907556346653242e-05, + "loss": 0.3724798858165741, + "step": 1365 + }, + { + "epoch": 0.3627672287876776, + "grad_norm": 1.2035474175290464, + "learning_rate": 1.9073718776214717e-05, + "loss": 0.36241161823272705, + "step": 1366 + }, + { + "epoch": 0.36303279776922054, + "grad_norm": 1.2262905723198394, + "learning_rate": 1.9071872336591042e-05, + "loss": 0.3484225273132324, + "step": 1367 + }, + { + "epoch": 0.3632983667507635, + "grad_norm": 1.11285184075262, + "learning_rate": 1.9070024148017375e-05, + "loss": 0.33606311678886414, + "step": 1368 + }, + { + "epoch": 0.36356393573230644, + "grad_norm": 1.076908267109863, + "learning_rate": 1.906817421085002e-05, + "loss": 0.3263503909111023, + "step": 1369 + }, + { + "epoch": 0.36382950471384945, + "grad_norm": 1.126388175466026, + "learning_rate": 1.906632252544563e-05, + "loss": 0.33454492688179016, + "step": 1370 + }, + { + "epoch": 0.3640950736953924, + "grad_norm": 1.1264022314316273, + "learning_rate": 1.9064469092161185e-05, + "loss": 0.34858438372612, + "step": 1371 + }, + { + "epoch": 0.36436064267693535, + "grad_norm": 1.0527021112264499, + "learning_rate": 1.9062613911354005e-05, + "loss": 0.3466234505176544, + "step": 1372 + }, + { + "epoch": 0.3646262116584783, + "grad_norm": 1.0325760706581486, + "learning_rate": 1.9060756983381743e-05, + "loss": 0.33574312925338745, + "step": 1373 + }, + { + "epoch": 0.36489178064002126, + "grad_norm": 1.0321788657369535, + "learning_rate": 1.90588983086024e-05, + "loss": 0.3012363016605377, + "step": 1374 + }, + { + "epoch": 0.3651573496215642, + "grad_norm": 1.0033389586223882, + "learning_rate": 1.90570378873743e-05, + "loss": 0.3050191402435303, + "step": 1375 + }, + { + "epoch": 0.36542291860310716, + "grad_norm": 1.0078763869776561, + "learning_rate": 1.905517572005611e-05, + "loss": 0.35090070962905884, + "step": 1376 + }, + { + "epoch": 0.3656884875846501, + "grad_norm": 1.011051809727729, + "learning_rate": 1.9053311807006845e-05, + "loss": 0.3276262581348419, + "step": 1377 + }, + { + "epoch": 0.36595405656619306, + "grad_norm": 1.300904148134606, + "learning_rate": 1.9051446148585833e-05, + "loss": 0.3303500711917877, + "step": 1378 + }, + { + "epoch": 0.366219625547736, + "grad_norm": 1.113413634877815, + "learning_rate": 1.9049578745152754e-05, + "loss": 0.3748486042022705, + "step": 1379 + }, + { + "epoch": 0.36648519452927897, + "grad_norm": 0.8707302355459249, + "learning_rate": 1.9047709597067628e-05, + "loss": 0.30339744687080383, + "step": 1380 + }, + { + "epoch": 0.3667507635108219, + "grad_norm": 1.0245709544347914, + "learning_rate": 1.9045838704690796e-05, + "loss": 0.31811147928237915, + "step": 1381 + }, + { + "epoch": 0.36701633249236487, + "grad_norm": 1.1759156162745943, + "learning_rate": 1.9043966068382945e-05, + "loss": 0.3541119694709778, + "step": 1382 + }, + { + "epoch": 0.3672819014739078, + "grad_norm": 1.0874467494483675, + "learning_rate": 1.9042091688505104e-05, + "loss": 0.36639657616615295, + "step": 1383 + }, + { + "epoch": 0.36754747045545083, + "grad_norm": 1.0242460437241268, + "learning_rate": 1.9040215565418628e-05, + "loss": 0.35859787464141846, + "step": 1384 + }, + { + "epoch": 0.3678130394369938, + "grad_norm": 1.017105790679022, + "learning_rate": 1.9038337699485207e-05, + "loss": 0.3210521340370178, + "step": 1385 + }, + { + "epoch": 0.36807860841853673, + "grad_norm": 1.0362268895966902, + "learning_rate": 1.9036458091066875e-05, + "loss": 0.3207433819770813, + "step": 1386 + }, + { + "epoch": 0.3683441774000797, + "grad_norm": 0.9948382455278952, + "learning_rate": 1.9034576740526e-05, + "loss": 0.3475082218647003, + "step": 1387 + }, + { + "epoch": 0.36860974638162264, + "grad_norm": 1.167057707852143, + "learning_rate": 1.903269364822528e-05, + "loss": 0.33252987265586853, + "step": 1388 + }, + { + "epoch": 0.3688753153631656, + "grad_norm": 1.0281516525035093, + "learning_rate": 1.903080881452776e-05, + "loss": 0.32200103998184204, + "step": 1389 + }, + { + "epoch": 0.36914088434470854, + "grad_norm": 1.0752934055327636, + "learning_rate": 1.9028922239796803e-05, + "loss": 0.34780022501945496, + "step": 1390 + }, + { + "epoch": 0.3694064533262515, + "grad_norm": 1.1028643639363398, + "learning_rate": 1.902703392439613e-05, + "loss": 0.35411912202835083, + "step": 1391 + }, + { + "epoch": 0.36967202230779445, + "grad_norm": 1.6627965093255739, + "learning_rate": 1.9025143868689773e-05, + "loss": 0.35232803225517273, + "step": 1392 + }, + { + "epoch": 0.3699375912893374, + "grad_norm": 1.168292115519334, + "learning_rate": 1.9023252073042128e-05, + "loss": 0.38561391830444336, + "step": 1393 + }, + { + "epoch": 0.37020316027088035, + "grad_norm": 0.9982322437598163, + "learning_rate": 1.9021358537817897e-05, + "loss": 0.3184170126914978, + "step": 1394 + }, + { + "epoch": 0.3704687292524233, + "grad_norm": 1.0557333187102689, + "learning_rate": 1.9019463263382142e-05, + "loss": 0.32455068826675415, + "step": 1395 + }, + { + "epoch": 0.37073429823396625, + "grad_norm": 1.0862364532602506, + "learning_rate": 1.901756625010024e-05, + "loss": 0.32998934388160706, + "step": 1396 + }, + { + "epoch": 0.3709998672155092, + "grad_norm": 1.1350071137219766, + "learning_rate": 1.901566749833792e-05, + "loss": 0.3361780643463135, + "step": 1397 + }, + { + "epoch": 0.37126543619705216, + "grad_norm": 1.1483051699341575, + "learning_rate": 1.9013767008461236e-05, + "loss": 0.3618711829185486, + "step": 1398 + }, + { + "epoch": 0.37153100517859516, + "grad_norm": 1.1250978483748488, + "learning_rate": 1.901186478083658e-05, + "loss": 0.3904131054878235, + "step": 1399 + }, + { + "epoch": 0.3717965741601381, + "grad_norm": 1.0885741580509858, + "learning_rate": 1.9009960815830676e-05, + "loss": 0.35742759704589844, + "step": 1400 + }, + { + "epoch": 0.37206214314168107, + "grad_norm": 1.073570835222054, + "learning_rate": 1.9008055113810595e-05, + "loss": 0.32880812883377075, + "step": 1401 + }, + { + "epoch": 0.372327712123224, + "grad_norm": 1.0645240727318732, + "learning_rate": 1.9006147675143724e-05, + "loss": 0.3379839360713959, + "step": 1402 + }, + { + "epoch": 0.37259328110476697, + "grad_norm": 1.1363528922504198, + "learning_rate": 1.90042385001978e-05, + "loss": 0.3635789453983307, + "step": 1403 + }, + { + "epoch": 0.3728588500863099, + "grad_norm": 1.1103620354136925, + "learning_rate": 1.900232758934089e-05, + "loss": 0.3462461233139038, + "step": 1404 + }, + { + "epoch": 0.3731244190678529, + "grad_norm": 1.1087128591527484, + "learning_rate": 1.900041494294139e-05, + "loss": 0.34578579664230347, + "step": 1405 + }, + { + "epoch": 0.3733899880493958, + "grad_norm": 1.1067984269435176, + "learning_rate": 1.899850056136804e-05, + "loss": 0.36266931891441345, + "step": 1406 + }, + { + "epoch": 0.3736555570309388, + "grad_norm": 1.089685836132972, + "learning_rate": 1.899658444498991e-05, + "loss": 0.34019365906715393, + "step": 1407 + }, + { + "epoch": 0.37392112601248173, + "grad_norm": 1.0009475991478056, + "learning_rate": 1.8994666594176404e-05, + "loss": 0.3057953119277954, + "step": 1408 + }, + { + "epoch": 0.3741866949940247, + "grad_norm": 1.1008245937613312, + "learning_rate": 1.8992747009297265e-05, + "loss": 0.3663131892681122, + "step": 1409 + }, + { + "epoch": 0.37445226397556763, + "grad_norm": 1.0696938984110862, + "learning_rate": 1.8990825690722557e-05, + "loss": 0.3402065634727478, + "step": 1410 + }, + { + "epoch": 0.3747178329571106, + "grad_norm": 1.017664192724319, + "learning_rate": 1.8988902638822693e-05, + "loss": 0.3437868654727936, + "step": 1411 + }, + { + "epoch": 0.37498340193865354, + "grad_norm": 1.2246388577961873, + "learning_rate": 1.8986977853968416e-05, + "loss": 0.40972524881362915, + "step": 1412 + }, + { + "epoch": 0.37524897092019655, + "grad_norm": 1.0293557658064552, + "learning_rate": 1.89850513365308e-05, + "loss": 0.3237977921962738, + "step": 1413 + }, + { + "epoch": 0.3755145399017395, + "grad_norm": 0.9581631299919097, + "learning_rate": 1.8983123086881254e-05, + "loss": 0.3146173357963562, + "step": 1414 + }, + { + "epoch": 0.37578010888328245, + "grad_norm": 0.9942979474502576, + "learning_rate": 1.8981193105391524e-05, + "loss": 0.33485543727874756, + "step": 1415 + }, + { + "epoch": 0.3760456778648254, + "grad_norm": 1.0963696340494955, + "learning_rate": 1.8979261392433685e-05, + "loss": 0.36379897594451904, + "step": 1416 + }, + { + "epoch": 0.37631124684636835, + "grad_norm": 0.902828061805848, + "learning_rate": 1.8977327948380154e-05, + "loss": 0.2737882137298584, + "step": 1417 + }, + { + "epoch": 0.3765768158279113, + "grad_norm": 1.1168765744666191, + "learning_rate": 1.897539277360367e-05, + "loss": 0.3554575443267822, + "step": 1418 + }, + { + "epoch": 0.37684238480945426, + "grad_norm": 1.0021058464909711, + "learning_rate": 1.897345586847731e-05, + "loss": 0.3297621011734009, + "step": 1419 + }, + { + "epoch": 0.3771079537909972, + "grad_norm": 1.1638469907551372, + "learning_rate": 1.8971517233374497e-05, + "loss": 0.32272985577583313, + "step": 1420 + }, + { + "epoch": 0.37737352277254016, + "grad_norm": 1.0280583772355378, + "learning_rate": 1.8969576868668967e-05, + "loss": 0.32175642251968384, + "step": 1421 + }, + { + "epoch": 0.3776390917540831, + "grad_norm": 1.1136468557030246, + "learning_rate": 1.8967634774734807e-05, + "loss": 0.35973137617111206, + "step": 1422 + }, + { + "epoch": 0.37790466073562606, + "grad_norm": 1.1892680335343753, + "learning_rate": 1.8965690951946424e-05, + "loss": 0.3385169506072998, + "step": 1423 + }, + { + "epoch": 0.378170229717169, + "grad_norm": 1.1245023779822048, + "learning_rate": 1.8963745400678564e-05, + "loss": 0.3683067560195923, + "step": 1424 + }, + { + "epoch": 0.37843579869871197, + "grad_norm": 1.1630069521478075, + "learning_rate": 1.896179812130631e-05, + "loss": 0.3711622357368469, + "step": 1425 + }, + { + "epoch": 0.3787013676802549, + "grad_norm": 1.015020556732164, + "learning_rate": 1.895984911420507e-05, + "loss": 0.30416572093963623, + "step": 1426 + }, + { + "epoch": 0.3789669366617979, + "grad_norm": 1.079958708031102, + "learning_rate": 1.8957898379750598e-05, + "loss": 0.3439522385597229, + "step": 1427 + }, + { + "epoch": 0.3792325056433409, + "grad_norm": 1.1382084488728177, + "learning_rate": 1.895594591831896e-05, + "loss": 0.3663806617259979, + "step": 1428 + }, + { + "epoch": 0.37949807462488383, + "grad_norm": 1.0501527452156108, + "learning_rate": 1.895399173028658e-05, + "loss": 0.32132354378700256, + "step": 1429 + }, + { + "epoch": 0.3797636436064268, + "grad_norm": 0.9916462964383544, + "learning_rate": 1.8952035816030196e-05, + "loss": 0.3040635585784912, + "step": 1430 + }, + { + "epoch": 0.38002921258796973, + "grad_norm": 1.1155299107557486, + "learning_rate": 1.8950078175926886e-05, + "loss": 0.3548869788646698, + "step": 1431 + }, + { + "epoch": 0.3802947815695127, + "grad_norm": 1.1280933582225339, + "learning_rate": 1.894811881035406e-05, + "loss": 0.3114319443702698, + "step": 1432 + }, + { + "epoch": 0.38056035055105564, + "grad_norm": 1.151174980739505, + "learning_rate": 1.894615771968946e-05, + "loss": 0.3589673936367035, + "step": 1433 + }, + { + "epoch": 0.3808259195325986, + "grad_norm": 1.1074661491088642, + "learning_rate": 1.894419490431116e-05, + "loss": 0.3073863983154297, + "step": 1434 + }, + { + "epoch": 0.38109148851414154, + "grad_norm": 1.0689323921068359, + "learning_rate": 1.8942230364597572e-05, + "loss": 0.32474076747894287, + "step": 1435 + }, + { + "epoch": 0.3813570574956845, + "grad_norm": 2.6127931856999314, + "learning_rate": 1.8940264100927432e-05, + "loss": 0.3363546133041382, + "step": 1436 + }, + { + "epoch": 0.38162262647722744, + "grad_norm": 0.9995665434586938, + "learning_rate": 1.8938296113679814e-05, + "loss": 0.33679312467575073, + "step": 1437 + }, + { + "epoch": 0.3818881954587704, + "grad_norm": 1.0113319573344832, + "learning_rate": 1.8936326403234125e-05, + "loss": 0.33171382546424866, + "step": 1438 + }, + { + "epoch": 0.38215376444031335, + "grad_norm": 1.0880785150495547, + "learning_rate": 1.8934354969970097e-05, + "loss": 0.3717402219772339, + "step": 1439 + }, + { + "epoch": 0.3824193334218563, + "grad_norm": 1.1102375952968466, + "learning_rate": 1.8932381814267802e-05, + "loss": 0.335337370634079, + "step": 1440 + }, + { + "epoch": 0.3826849024033993, + "grad_norm": 1.010201255539417, + "learning_rate": 1.893040693650764e-05, + "loss": 0.32745444774627686, + "step": 1441 + }, + { + "epoch": 0.38295047138494226, + "grad_norm": 1.045820108792802, + "learning_rate": 1.892843033707035e-05, + "loss": 0.34863507747650146, + "step": 1442 + }, + { + "epoch": 0.3832160403664852, + "grad_norm": 1.0344465763282014, + "learning_rate": 1.8926452016336987e-05, + "loss": 0.3428313732147217, + "step": 1443 + }, + { + "epoch": 0.38348160934802816, + "grad_norm": 0.9882681324904586, + "learning_rate": 1.8924471974688956e-05, + "loss": 0.3223801851272583, + "step": 1444 + }, + { + "epoch": 0.3837471783295711, + "grad_norm": 1.2003387152989082, + "learning_rate": 1.8922490212507983e-05, + "loss": 0.33248746395111084, + "step": 1445 + }, + { + "epoch": 0.38401274731111407, + "grad_norm": 1.0404747226700646, + "learning_rate": 1.8920506730176125e-05, + "loss": 0.3472076654434204, + "step": 1446 + }, + { + "epoch": 0.384278316292657, + "grad_norm": 1.229166058737197, + "learning_rate": 1.891852152807578e-05, + "loss": 0.4385136365890503, + "step": 1447 + }, + { + "epoch": 0.38454388527419997, + "grad_norm": 1.0444838405880497, + "learning_rate": 1.8916534606589666e-05, + "loss": 0.36871540546417236, + "step": 1448 + }, + { + "epoch": 0.3848094542557429, + "grad_norm": 1.0803859921763799, + "learning_rate": 1.8914545966100843e-05, + "loss": 0.3136710524559021, + "step": 1449 + }, + { + "epoch": 0.3850750232372859, + "grad_norm": 1.0902031451870209, + "learning_rate": 1.891255560699269e-05, + "loss": 0.3236457109451294, + "step": 1450 + }, + { + "epoch": 0.3853405922188288, + "grad_norm": 0.9936714818929803, + "learning_rate": 1.8910563529648933e-05, + "loss": 0.3176822066307068, + "step": 1451 + }, + { + "epoch": 0.3856061612003718, + "grad_norm": 1.0635659473367998, + "learning_rate": 1.890856973445362e-05, + "loss": 0.3531719744205475, + "step": 1452 + }, + { + "epoch": 0.38587173018191473, + "grad_norm": 0.9470574553293423, + "learning_rate": 1.8906574221791127e-05, + "loss": 0.2911416292190552, + "step": 1453 + }, + { + "epoch": 0.3861372991634577, + "grad_norm": 1.0992858203425024, + "learning_rate": 1.890457699204617e-05, + "loss": 0.3522392511367798, + "step": 1454 + }, + { + "epoch": 0.3864028681450007, + "grad_norm": 1.1706910837372075, + "learning_rate": 1.8902578045603787e-05, + "loss": 0.3724471628665924, + "step": 1455 + }, + { + "epoch": 0.38666843712654364, + "grad_norm": 1.1807687078274312, + "learning_rate": 1.890057738284935e-05, + "loss": 0.2935449481010437, + "step": 1456 + }, + { + "epoch": 0.3869340061080866, + "grad_norm": 1.1181603604376231, + "learning_rate": 1.8898575004168568e-05, + "loss": 0.3413137197494507, + "step": 1457 + }, + { + "epoch": 0.38719957508962954, + "grad_norm": 1.1002740783107277, + "learning_rate": 1.8896570909947477e-05, + "loss": 0.32282277941703796, + "step": 1458 + }, + { + "epoch": 0.3874651440711725, + "grad_norm": 1.0071931608273124, + "learning_rate": 1.8894565100572435e-05, + "loss": 0.3285476565361023, + "step": 1459 + }, + { + "epoch": 0.38773071305271545, + "grad_norm": 1.010871057653593, + "learning_rate": 1.8892557576430147e-05, + "loss": 0.29517480731010437, + "step": 1460 + }, + { + "epoch": 0.3879962820342584, + "grad_norm": 0.9710184588467288, + "learning_rate": 1.8890548337907636e-05, + "loss": 0.2913149297237396, + "step": 1461 + }, + { + "epoch": 0.38826185101580135, + "grad_norm": 1.096024980027641, + "learning_rate": 1.8888537385392258e-05, + "loss": 0.32154160737991333, + "step": 1462 + }, + { + "epoch": 0.3885274199973443, + "grad_norm": 1.157775550745099, + "learning_rate": 1.88865247192717e-05, + "loss": 0.30677905678749084, + "step": 1463 + }, + { + "epoch": 0.38879298897888726, + "grad_norm": 1.1509749466488566, + "learning_rate": 1.888451033993399e-05, + "loss": 0.37568169832229614, + "step": 1464 + }, + { + "epoch": 0.3890585579604302, + "grad_norm": 1.0554287268781006, + "learning_rate": 1.8882494247767465e-05, + "loss": 0.34972083568573, + "step": 1465 + }, + { + "epoch": 0.38932412694197316, + "grad_norm": 1.1253148629548142, + "learning_rate": 1.888047644316081e-05, + "loss": 0.3198736906051636, + "step": 1466 + }, + { + "epoch": 0.3895896959235161, + "grad_norm": 1.0268445477998984, + "learning_rate": 1.887845692650303e-05, + "loss": 0.3405846953392029, + "step": 1467 + }, + { + "epoch": 0.38985526490505906, + "grad_norm": 1.1800981831391237, + "learning_rate": 1.8876435698183465e-05, + "loss": 0.3600257337093353, + "step": 1468 + }, + { + "epoch": 0.39012083388660207, + "grad_norm": 1.042232512137109, + "learning_rate": 1.887441275859179e-05, + "loss": 0.32415103912353516, + "step": 1469 + }, + { + "epoch": 0.390386402868145, + "grad_norm": 1.1736259107415346, + "learning_rate": 1.8872388108117995e-05, + "loss": 0.3450891673564911, + "step": 1470 + }, + { + "epoch": 0.390651971849688, + "grad_norm": 1.0534871304087963, + "learning_rate": 1.8870361747152416e-05, + "loss": 0.3210057318210602, + "step": 1471 + }, + { + "epoch": 0.3909175408312309, + "grad_norm": 1.1749127166764717, + "learning_rate": 1.8868333676085707e-05, + "loss": 0.3615706264972687, + "step": 1472 + }, + { + "epoch": 0.3911831098127739, + "grad_norm": 1.0750237065987462, + "learning_rate": 1.8866303895308856e-05, + "loss": 0.34149813652038574, + "step": 1473 + }, + { + "epoch": 0.39144867879431683, + "grad_norm": 0.91786674858188, + "learning_rate": 1.8864272405213188e-05, + "loss": 0.2795295715332031, + "step": 1474 + }, + { + "epoch": 0.3917142477758598, + "grad_norm": 1.1110559595870293, + "learning_rate": 1.8862239206190337e-05, + "loss": 0.3459053933620453, + "step": 1475 + }, + { + "epoch": 0.39197981675740273, + "grad_norm": 1.1048084354602663, + "learning_rate": 1.8860204298632294e-05, + "loss": 0.3531072735786438, + "step": 1476 + }, + { + "epoch": 0.3922453857389457, + "grad_norm": 1.128095083544478, + "learning_rate": 1.8858167682931357e-05, + "loss": 0.3788977265357971, + "step": 1477 + }, + { + "epoch": 0.39251095472048864, + "grad_norm": 1.3263027090109385, + "learning_rate": 1.8856129359480163e-05, + "loss": 0.3210671544075012, + "step": 1478 + }, + { + "epoch": 0.3927765237020316, + "grad_norm": 1.0773816671223826, + "learning_rate": 1.8854089328671673e-05, + "loss": 0.3442102074623108, + "step": 1479 + }, + { + "epoch": 0.39304209268357454, + "grad_norm": 1.0501956367137624, + "learning_rate": 1.885204759089919e-05, + "loss": 0.29128211736679077, + "step": 1480 + }, + { + "epoch": 0.3933076616651175, + "grad_norm": 1.1403330671915806, + "learning_rate": 1.885000414655633e-05, + "loss": 0.3601154088973999, + "step": 1481 + }, + { + "epoch": 0.39357323064666044, + "grad_norm": 1.032058056545269, + "learning_rate": 1.8847958996037042e-05, + "loss": 0.3173052668571472, + "step": 1482 + }, + { + "epoch": 0.39383879962820345, + "grad_norm": 1.0840123249628424, + "learning_rate": 1.8845912139735616e-05, + "loss": 0.32759106159210205, + "step": 1483 + }, + { + "epoch": 0.3941043686097464, + "grad_norm": 1.0868479290241493, + "learning_rate": 1.8843863578046657e-05, + "loss": 0.3213586211204529, + "step": 1484 + }, + { + "epoch": 0.39436993759128935, + "grad_norm": 1.0263834848721582, + "learning_rate": 1.8841813311365105e-05, + "loss": 0.342970073223114, + "step": 1485 + }, + { + "epoch": 0.3946355065728323, + "grad_norm": 1.1467746465148738, + "learning_rate": 1.883976134008622e-05, + "loss": 0.3852401375770569, + "step": 1486 + }, + { + "epoch": 0.39490107555437526, + "grad_norm": 1.0974253808771965, + "learning_rate": 1.883770766460561e-05, + "loss": 0.2965390682220459, + "step": 1487 + }, + { + "epoch": 0.3951666445359182, + "grad_norm": 1.1655078685340161, + "learning_rate": 1.883565228531919e-05, + "loss": 0.3899655044078827, + "step": 1488 + }, + { + "epoch": 0.39543221351746116, + "grad_norm": 1.1086105484757183, + "learning_rate": 1.8833595202623222e-05, + "loss": 0.339199423789978, + "step": 1489 + }, + { + "epoch": 0.3956977824990041, + "grad_norm": 1.049526058190211, + "learning_rate": 1.8831536416914278e-05, + "loss": 0.3121682405471802, + "step": 1490 + }, + { + "epoch": 0.39596335148054707, + "grad_norm": 1.073417591294797, + "learning_rate": 1.8829475928589272e-05, + "loss": 0.31947991251945496, + "step": 1491 + }, + { + "epoch": 0.39622892046209, + "grad_norm": 1.1660176936819076, + "learning_rate": 1.882741373804544e-05, + "loss": 0.3569333553314209, + "step": 1492 + }, + { + "epoch": 0.39649448944363297, + "grad_norm": 1.1521030930761056, + "learning_rate": 1.882534984568035e-05, + "loss": 0.3739020526409149, + "step": 1493 + }, + { + "epoch": 0.3967600584251759, + "grad_norm": 1.0930221251915908, + "learning_rate": 1.882328425189189e-05, + "loss": 0.34350353479385376, + "step": 1494 + }, + { + "epoch": 0.3970256274067189, + "grad_norm": 1.0780622136577362, + "learning_rate": 1.882121695707829e-05, + "loss": 0.3103981614112854, + "step": 1495 + }, + { + "epoch": 0.3972911963882618, + "grad_norm": 1.066229649085828, + "learning_rate": 1.8819147961638104e-05, + "loss": 0.33847716450691223, + "step": 1496 + }, + { + "epoch": 0.39755676536980483, + "grad_norm": 0.943119049120047, + "learning_rate": 1.8817077265970196e-05, + "loss": 0.3080996870994568, + "step": 1497 + }, + { + "epoch": 0.3978223343513478, + "grad_norm": 0.9758181744675688, + "learning_rate": 1.8815004870473777e-05, + "loss": 0.3247831463813782, + "step": 1498 + }, + { + "epoch": 0.39808790333289074, + "grad_norm": 0.9965389459031595, + "learning_rate": 1.8812930775548387e-05, + "loss": 0.2919698655605316, + "step": 1499 + }, + { + "epoch": 0.3983534723144337, + "grad_norm": 1.1815639690812958, + "learning_rate": 1.8810854981593883e-05, + "loss": 0.3627319931983948, + "step": 1500 + }, + { + "epoch": 0.39861904129597664, + "grad_norm": 1.0245222516327634, + "learning_rate": 1.880877748901045e-05, + "loss": 0.3619319796562195, + "step": 1501 + }, + { + "epoch": 0.3988846102775196, + "grad_norm": 1.0294076265521692, + "learning_rate": 1.8806698298198608e-05, + "loss": 0.3393789827823639, + "step": 1502 + }, + { + "epoch": 0.39915017925906254, + "grad_norm": 1.1375999694611314, + "learning_rate": 1.88046174095592e-05, + "loss": 0.3736116886138916, + "step": 1503 + }, + { + "epoch": 0.3994157482406055, + "grad_norm": 0.9615847393601772, + "learning_rate": 1.8802534823493395e-05, + "loss": 0.32829388976097107, + "step": 1504 + }, + { + "epoch": 0.39968131722214845, + "grad_norm": 1.004520084683698, + "learning_rate": 1.8800450540402694e-05, + "loss": 0.340041846036911, + "step": 1505 + }, + { + "epoch": 0.3999468862036914, + "grad_norm": 1.6423190284198783, + "learning_rate": 1.8798364560688917e-05, + "loss": 0.2830736041069031, + "step": 1506 + }, + { + "epoch": 0.40021245518523435, + "grad_norm": 1.126838308447994, + "learning_rate": 1.8796276884754224e-05, + "loss": 0.33011579513549805, + "step": 1507 + }, + { + "epoch": 0.4004780241667773, + "grad_norm": 1.0024833819275993, + "learning_rate": 1.8794187513001088e-05, + "loss": 0.2893834114074707, + "step": 1508 + }, + { + "epoch": 0.40074359314832025, + "grad_norm": 1.0682148927963429, + "learning_rate": 1.8792096445832317e-05, + "loss": 0.3590015172958374, + "step": 1509 + }, + { + "epoch": 0.4010091621298632, + "grad_norm": 1.1883404603513603, + "learning_rate": 1.8790003683651045e-05, + "loss": 0.3968508541584015, + "step": 1510 + }, + { + "epoch": 0.4012747311114062, + "grad_norm": 1.1506641785596874, + "learning_rate": 1.878790922686073e-05, + "loss": 0.324398934841156, + "step": 1511 + }, + { + "epoch": 0.40154030009294917, + "grad_norm": 1.0455658872732225, + "learning_rate": 1.8785813075865164e-05, + "loss": 0.35111895203590393, + "step": 1512 + }, + { + "epoch": 0.4018058690744921, + "grad_norm": 1.055231257150353, + "learning_rate": 1.8783715231068452e-05, + "loss": 0.28124356269836426, + "step": 1513 + }, + { + "epoch": 0.40207143805603507, + "grad_norm": 1.0070468428923411, + "learning_rate": 1.878161569287504e-05, + "loss": 0.28962311148643494, + "step": 1514 + }, + { + "epoch": 0.402337007037578, + "grad_norm": 1.0934983041480315, + "learning_rate": 1.877951446168969e-05, + "loss": 0.3646606206893921, + "step": 1515 + }, + { + "epoch": 0.402602576019121, + "grad_norm": 1.1065863254454682, + "learning_rate": 1.8777411537917497e-05, + "loss": 0.2815355360507965, + "step": 1516 + }, + { + "epoch": 0.4028681450006639, + "grad_norm": 1.1372178900816394, + "learning_rate": 1.877530692196388e-05, + "loss": 0.33208370208740234, + "step": 1517 + }, + { + "epoch": 0.4031337139822069, + "grad_norm": 1.0968319662456871, + "learning_rate": 1.8773200614234587e-05, + "loss": 0.33741289377212524, + "step": 1518 + }, + { + "epoch": 0.40339928296374983, + "grad_norm": 1.1178822197952292, + "learning_rate": 1.877109261513568e-05, + "loss": 0.31304073333740234, + "step": 1519 + }, + { + "epoch": 0.4036648519452928, + "grad_norm": 1.264796618244999, + "learning_rate": 1.8768982925073566e-05, + "loss": 0.32556387782096863, + "step": 1520 + }, + { + "epoch": 0.40393042092683573, + "grad_norm": 1.1057344226732335, + "learning_rate": 1.8766871544454963e-05, + "loss": 0.3584224581718445, + "step": 1521 + }, + { + "epoch": 0.4041959899083787, + "grad_norm": 1.0109621512685618, + "learning_rate": 1.8764758473686918e-05, + "loss": 0.2864416837692261, + "step": 1522 + }, + { + "epoch": 0.40446155888992164, + "grad_norm": 1.0390539229722413, + "learning_rate": 1.8762643713176815e-05, + "loss": 0.28925320506095886, + "step": 1523 + }, + { + "epoch": 0.4047271278714646, + "grad_norm": 1.022628245189221, + "learning_rate": 1.876052726333235e-05, + "loss": 0.30940550565719604, + "step": 1524 + }, + { + "epoch": 0.4049926968530076, + "grad_norm": 1.1648500528958037, + "learning_rate": 1.875840912456155e-05, + "loss": 0.3463154733181, + "step": 1525 + }, + { + "epoch": 0.40525826583455055, + "grad_norm": 1.1823420506345301, + "learning_rate": 1.8756289297272764e-05, + "loss": 0.3349658250808716, + "step": 1526 + }, + { + "epoch": 0.4055238348160935, + "grad_norm": 1.0511817500052025, + "learning_rate": 1.8754167781874674e-05, + "loss": 0.32588714361190796, + "step": 1527 + }, + { + "epoch": 0.40578940379763645, + "grad_norm": 1.0750045197041278, + "learning_rate": 1.875204457877628e-05, + "loss": 0.33787310123443604, + "step": 1528 + }, + { + "epoch": 0.4060549727791794, + "grad_norm": 1.0444881434472735, + "learning_rate": 1.8749919688386912e-05, + "loss": 0.3223261833190918, + "step": 1529 + }, + { + "epoch": 0.40632054176072235, + "grad_norm": 1.2251483540500576, + "learning_rate": 1.8747793111116226e-05, + "loss": 0.38505882024765015, + "step": 1530 + }, + { + "epoch": 0.4065861107422653, + "grad_norm": 1.077913563059366, + "learning_rate": 1.8745664847374197e-05, + "loss": 0.33071833848953247, + "step": 1531 + }, + { + "epoch": 0.40685167972380826, + "grad_norm": 1.2405893427169952, + "learning_rate": 1.874353489757113e-05, + "loss": 0.36603987216949463, + "step": 1532 + }, + { + "epoch": 0.4071172487053512, + "grad_norm": 0.9982674001932202, + "learning_rate": 1.874140326211766e-05, + "loss": 0.3103085160255432, + "step": 1533 + }, + { + "epoch": 0.40738281768689416, + "grad_norm": 1.1470515997968143, + "learning_rate": 1.873926994142473e-05, + "loss": 0.3471127152442932, + "step": 1534 + }, + { + "epoch": 0.4076483866684371, + "grad_norm": 1.0759117431352352, + "learning_rate": 1.873713493590363e-05, + "loss": 0.33152899146080017, + "step": 1535 + }, + { + "epoch": 0.40791395564998006, + "grad_norm": 1.0887192073538825, + "learning_rate": 1.8734998245965958e-05, + "loss": 0.340177059173584, + "step": 1536 + }, + { + "epoch": 0.408179524631523, + "grad_norm": 1.175803638176176, + "learning_rate": 1.8732859872023644e-05, + "loss": 0.3331618010997772, + "step": 1537 + }, + { + "epoch": 0.40844509361306597, + "grad_norm": 1.0971311272588662, + "learning_rate": 1.8730719814488937e-05, + "loss": 0.3911997675895691, + "step": 1538 + }, + { + "epoch": 0.408710662594609, + "grad_norm": 1.0986179012488992, + "learning_rate": 1.8728578073774427e-05, + "loss": 0.3699817955493927, + "step": 1539 + }, + { + "epoch": 0.4089762315761519, + "grad_norm": 1.086312859301249, + "learning_rate": 1.8726434650293e-05, + "loss": 0.31567275524139404, + "step": 1540 + }, + { + "epoch": 0.4092418005576949, + "grad_norm": 1.1099279461258769, + "learning_rate": 1.8724289544457897e-05, + "loss": 0.3387305438518524, + "step": 1541 + }, + { + "epoch": 0.40950736953923783, + "grad_norm": 1.6366665349052443, + "learning_rate": 1.8722142756682663e-05, + "loss": 0.3460234999656677, + "step": 1542 + }, + { + "epoch": 0.4097729385207808, + "grad_norm": 1.1109783591024025, + "learning_rate": 1.8719994287381173e-05, + "loss": 0.35653382539749146, + "step": 1543 + }, + { + "epoch": 0.41003850750232373, + "grad_norm": 1.1054235252004945, + "learning_rate": 1.8717844136967626e-05, + "loss": 0.3828277885913849, + "step": 1544 + }, + { + "epoch": 0.4103040764838667, + "grad_norm": 1.0929819002464054, + "learning_rate": 1.871569230585655e-05, + "loss": 0.35883858799934387, + "step": 1545 + }, + { + "epoch": 0.41056964546540964, + "grad_norm": 0.988264800308937, + "learning_rate": 1.8713538794462783e-05, + "loss": 0.27414464950561523, + "step": 1546 + }, + { + "epoch": 0.4108352144469526, + "grad_norm": 1.0216234157414708, + "learning_rate": 1.871138360320151e-05, + "loss": 0.2924337387084961, + "step": 1547 + }, + { + "epoch": 0.41110078342849554, + "grad_norm": 1.1264719097344291, + "learning_rate": 1.8709226732488216e-05, + "loss": 0.34270918369293213, + "step": 1548 + }, + { + "epoch": 0.4113663524100385, + "grad_norm": 1.056133674601812, + "learning_rate": 1.870706818273872e-05, + "loss": 0.33866482973098755, + "step": 1549 + }, + { + "epoch": 0.41163192139158145, + "grad_norm": 1.0578429496037574, + "learning_rate": 1.8704907954369176e-05, + "loss": 0.3350633382797241, + "step": 1550 + }, + { + "epoch": 0.4118974903731244, + "grad_norm": 1.0981882806330738, + "learning_rate": 1.870274604779604e-05, + "loss": 0.32763785123825073, + "step": 1551 + }, + { + "epoch": 0.41216305935466735, + "grad_norm": 1.1235534336905566, + "learning_rate": 1.8700582463436102e-05, + "loss": 0.3130378723144531, + "step": 1552 + }, + { + "epoch": 0.41242862833621036, + "grad_norm": 1.1311593123986747, + "learning_rate": 1.8698417201706484e-05, + "loss": 0.34318777918815613, + "step": 1553 + }, + { + "epoch": 0.4126941973177533, + "grad_norm": 1.038517953287962, + "learning_rate": 1.8696250263024617e-05, + "loss": 0.3250104784965515, + "step": 1554 + }, + { + "epoch": 0.41295976629929626, + "grad_norm": 1.1047081419569766, + "learning_rate": 1.869408164780826e-05, + "loss": 0.3409217298030853, + "step": 1555 + }, + { + "epoch": 0.4132253352808392, + "grad_norm": 0.9892429720688775, + "learning_rate": 1.86919113564755e-05, + "loss": 0.2885017395019531, + "step": 1556 + }, + { + "epoch": 0.41349090426238216, + "grad_norm": 0.9861078966083267, + "learning_rate": 1.8689739389444744e-05, + "loss": 0.31912562251091003, + "step": 1557 + }, + { + "epoch": 0.4137564732439251, + "grad_norm": 1.0037060940033242, + "learning_rate": 1.8687565747134716e-05, + "loss": 0.29874011874198914, + "step": 1558 + }, + { + "epoch": 0.41402204222546807, + "grad_norm": 1.0308167425812278, + "learning_rate": 1.8685390429964473e-05, + "loss": 0.3132701516151428, + "step": 1559 + }, + { + "epoch": 0.414287611207011, + "grad_norm": 1.0029824533275895, + "learning_rate": 1.868321343835339e-05, + "loss": 0.31158843636512756, + "step": 1560 + }, + { + "epoch": 0.41455318018855397, + "grad_norm": 0.959841401113078, + "learning_rate": 1.8681034772721167e-05, + "loss": 0.30490344762802124, + "step": 1561 + }, + { + "epoch": 0.4148187491700969, + "grad_norm": 1.1053356359227535, + "learning_rate": 1.867885443348782e-05, + "loss": 0.3150998055934906, + "step": 1562 + }, + { + "epoch": 0.4150843181516399, + "grad_norm": 1.0578010897773087, + "learning_rate": 1.86766724210737e-05, + "loss": 0.3391645550727844, + "step": 1563 + }, + { + "epoch": 0.4153498871331828, + "grad_norm": 1.1317933031731224, + "learning_rate": 1.8674488735899466e-05, + "loss": 0.35013002157211304, + "step": 1564 + }, + { + "epoch": 0.4156154561147258, + "grad_norm": 1.1514144052665038, + "learning_rate": 1.867230337838611e-05, + "loss": 0.3455789387226105, + "step": 1565 + }, + { + "epoch": 0.41588102509626873, + "grad_norm": 1.0985743755307058, + "learning_rate": 1.8670116348954945e-05, + "loss": 0.3179319500923157, + "step": 1566 + }, + { + "epoch": 0.41614659407781174, + "grad_norm": 1.046997092909125, + "learning_rate": 1.8667927648027596e-05, + "loss": 0.3628920018672943, + "step": 1567 + }, + { + "epoch": 0.4164121630593547, + "grad_norm": 1.1175553372657145, + "learning_rate": 1.8665737276026033e-05, + "loss": 0.33599400520324707, + "step": 1568 + }, + { + "epoch": 0.41667773204089764, + "grad_norm": 1.0741100001694928, + "learning_rate": 1.8663545233372524e-05, + "loss": 0.31519144773483276, + "step": 1569 + }, + { + "epoch": 0.4169433010224406, + "grad_norm": 1.0564388001425704, + "learning_rate": 1.8661351520489667e-05, + "loss": 0.3326237201690674, + "step": 1570 + }, + { + "epoch": 0.41720887000398355, + "grad_norm": 1.0506499046982631, + "learning_rate": 1.865915613780039e-05, + "loss": 0.35254499316215515, + "step": 1571 + }, + { + "epoch": 0.4174744389855265, + "grad_norm": 1.134962500533026, + "learning_rate": 1.8656959085727936e-05, + "loss": 0.36689436435699463, + "step": 1572 + }, + { + "epoch": 0.41774000796706945, + "grad_norm": 1.104702895545828, + "learning_rate": 1.8654760364695873e-05, + "loss": 0.3113600015640259, + "step": 1573 + }, + { + "epoch": 0.4180055769486124, + "grad_norm": 1.0072243279377031, + "learning_rate": 1.865255997512808e-05, + "loss": 0.3336432874202728, + "step": 1574 + }, + { + "epoch": 0.41827114593015535, + "grad_norm": 1.1762721663897004, + "learning_rate": 1.8650357917448774e-05, + "loss": 0.3657492995262146, + "step": 1575 + }, + { + "epoch": 0.4185367149116983, + "grad_norm": 1.1286123264778107, + "learning_rate": 1.864815419208248e-05, + "loss": 0.3087846338748932, + "step": 1576 + }, + { + "epoch": 0.41880228389324126, + "grad_norm": 1.059893684126419, + "learning_rate": 1.8645948799454058e-05, + "loss": 0.31422343850135803, + "step": 1577 + }, + { + "epoch": 0.4190678528747842, + "grad_norm": 1.0232345658393134, + "learning_rate": 1.8643741739988672e-05, + "loss": 0.3172760009765625, + "step": 1578 + }, + { + "epoch": 0.41933342185632716, + "grad_norm": 1.131569038679809, + "learning_rate": 1.8641533014111824e-05, + "loss": 0.36819136142730713, + "step": 1579 + }, + { + "epoch": 0.4195989908378701, + "grad_norm": 1.0215370560204735, + "learning_rate": 1.863932262224933e-05, + "loss": 0.29081088304519653, + "step": 1580 + }, + { + "epoch": 0.4198645598194131, + "grad_norm": 1.0406040134422527, + "learning_rate": 1.8637110564827325e-05, + "loss": 0.3209632635116577, + "step": 1581 + }, + { + "epoch": 0.42013012880095607, + "grad_norm": 1.9161132832998955, + "learning_rate": 1.863489684227227e-05, + "loss": 0.3357914686203003, + "step": 1582 + }, + { + "epoch": 0.420395697782499, + "grad_norm": 1.0469990353974015, + "learning_rate": 1.8632681455010937e-05, + "loss": 0.285677969455719, + "step": 1583 + }, + { + "epoch": 0.420661266764042, + "grad_norm": 1.1491447855439996, + "learning_rate": 1.8630464403470435e-05, + "loss": 0.377876341342926, + "step": 1584 + }, + { + "epoch": 0.4209268357455849, + "grad_norm": 1.0642007656116979, + "learning_rate": 1.8628245688078187e-05, + "loss": 0.3141768276691437, + "step": 1585 + }, + { + "epoch": 0.4211924047271279, + "grad_norm": 1.078787810404599, + "learning_rate": 1.8626025309261927e-05, + "loss": 0.34249693155288696, + "step": 1586 + }, + { + "epoch": 0.42145797370867083, + "grad_norm": 1.1583509747022063, + "learning_rate": 1.8623803267449722e-05, + "loss": 0.32564717531204224, + "step": 1587 + }, + { + "epoch": 0.4217235426902138, + "grad_norm": 1.0623179841052965, + "learning_rate": 1.8621579563069957e-05, + "loss": 0.3425004184246063, + "step": 1588 + }, + { + "epoch": 0.42198911167175673, + "grad_norm": 1.05392590229203, + "learning_rate": 1.8619354196551333e-05, + "loss": 0.3676222562789917, + "step": 1589 + }, + { + "epoch": 0.4222546806532997, + "grad_norm": 0.9612536546184688, + "learning_rate": 1.8617127168322877e-05, + "loss": 0.28915971517562866, + "step": 1590 + }, + { + "epoch": 0.42252024963484264, + "grad_norm": 1.1293248025877465, + "learning_rate": 1.8614898478813933e-05, + "loss": 0.3387221097946167, + "step": 1591 + }, + { + "epoch": 0.4227858186163856, + "grad_norm": 1.0804518757125117, + "learning_rate": 1.8612668128454164e-05, + "loss": 0.33886784315109253, + "step": 1592 + }, + { + "epoch": 0.42305138759792854, + "grad_norm": 1.0780507904890781, + "learning_rate": 1.8610436117673557e-05, + "loss": 0.3364121913909912, + "step": 1593 + }, + { + "epoch": 0.4233169565794715, + "grad_norm": 1.0590527240631433, + "learning_rate": 1.8608202446902418e-05, + "loss": 0.3661370873451233, + "step": 1594 + }, + { + "epoch": 0.4235825255610145, + "grad_norm": 1.254416564930449, + "learning_rate": 1.8605967116571372e-05, + "loss": 0.2980557680130005, + "step": 1595 + }, + { + "epoch": 0.42384809454255745, + "grad_norm": 1.180518248335952, + "learning_rate": 1.8603730127111363e-05, + "loss": 0.36112043261528015, + "step": 1596 + }, + { + "epoch": 0.4241136635241004, + "grad_norm": 0.9967676484164163, + "learning_rate": 1.860149147895366e-05, + "loss": 0.30641958117485046, + "step": 1597 + }, + { + "epoch": 0.42437923250564336, + "grad_norm": 1.06006138769355, + "learning_rate": 1.8599251172529836e-05, + "loss": 0.3312561511993408, + "step": 1598 + }, + { + "epoch": 0.4246448014871863, + "grad_norm": 1.070580032885208, + "learning_rate": 1.859700920827181e-05, + "loss": 0.3757131099700928, + "step": 1599 + }, + { + "epoch": 0.42491037046872926, + "grad_norm": 1.0514692584176801, + "learning_rate": 1.8594765586611805e-05, + "loss": 0.3225080370903015, + "step": 1600 + }, + { + "epoch": 0.4251759394502722, + "grad_norm": 1.0857454483782787, + "learning_rate": 1.859252030798236e-05, + "loss": 0.35943928360939026, + "step": 1601 + }, + { + "epoch": 0.42544150843181516, + "grad_norm": 0.9907794348406631, + "learning_rate": 1.859027337281633e-05, + "loss": 0.29319390654563904, + "step": 1602 + }, + { + "epoch": 0.4257070774133581, + "grad_norm": 1.1441852776057728, + "learning_rate": 1.8588024781546914e-05, + "loss": 0.32320237159729004, + "step": 1603 + }, + { + "epoch": 0.42597264639490107, + "grad_norm": 1.1070076098385897, + "learning_rate": 1.8585774534607606e-05, + "loss": 0.3381520211696625, + "step": 1604 + }, + { + "epoch": 0.426238215376444, + "grad_norm": 0.9826840529093485, + "learning_rate": 1.858352263243223e-05, + "loss": 0.30010825395584106, + "step": 1605 + }, + { + "epoch": 0.42650378435798697, + "grad_norm": 0.9805553200940528, + "learning_rate": 1.8581269075454918e-05, + "loss": 0.26282748579978943, + "step": 1606 + }, + { + "epoch": 0.4267693533395299, + "grad_norm": 1.0395702570014627, + "learning_rate": 1.857901386411014e-05, + "loss": 0.33613401651382446, + "step": 1607 + }, + { + "epoch": 0.4270349223210729, + "grad_norm": 1.1625768546626036, + "learning_rate": 1.8576756998832667e-05, + "loss": 0.34522315859794617, + "step": 1608 + }, + { + "epoch": 0.4273004913026159, + "grad_norm": 1.0776480516530333, + "learning_rate": 1.8574498480057598e-05, + "loss": 0.3253153860569, + "step": 1609 + }, + { + "epoch": 0.42756606028415883, + "grad_norm": 1.177683979502923, + "learning_rate": 1.8572238308220347e-05, + "loss": 0.32180655002593994, + "step": 1610 + }, + { + "epoch": 0.4278316292657018, + "grad_norm": 1.2444289754345055, + "learning_rate": 1.856997648375665e-05, + "loss": 0.3274008333683014, + "step": 1611 + }, + { + "epoch": 0.42809719824724474, + "grad_norm": 1.006782047196068, + "learning_rate": 1.8567713007102565e-05, + "loss": 0.3196510374546051, + "step": 1612 + }, + { + "epoch": 0.4283627672287877, + "grad_norm": 1.0069133029708661, + "learning_rate": 1.8565447878694455e-05, + "loss": 0.2759617567062378, + "step": 1613 + }, + { + "epoch": 0.42862833621033064, + "grad_norm": 1.1572573238869637, + "learning_rate": 1.8563181098969017e-05, + "loss": 0.35069289803504944, + "step": 1614 + }, + { + "epoch": 0.4288939051918736, + "grad_norm": 1.1400434606874466, + "learning_rate": 1.8560912668363253e-05, + "loss": 0.3388484716415405, + "step": 1615 + }, + { + "epoch": 0.42915947417341654, + "grad_norm": 1.0338736294243014, + "learning_rate": 1.8558642587314496e-05, + "loss": 0.34116029739379883, + "step": 1616 + }, + { + "epoch": 0.4294250431549595, + "grad_norm": 1.0487376701262667, + "learning_rate": 1.8556370856260387e-05, + "loss": 0.30212706327438354, + "step": 1617 + }, + { + "epoch": 0.42969061213650245, + "grad_norm": 1.0633174136084793, + "learning_rate": 1.855409747563889e-05, + "loss": 0.32250338792800903, + "step": 1618 + }, + { + "epoch": 0.4299561811180454, + "grad_norm": 1.132237618998821, + "learning_rate": 1.8551822445888285e-05, + "loss": 0.35972943902015686, + "step": 1619 + }, + { + "epoch": 0.43022175009958835, + "grad_norm": 0.9921112897877987, + "learning_rate": 1.8549545767447174e-05, + "loss": 0.3112533390522003, + "step": 1620 + }, + { + "epoch": 0.4304873190811313, + "grad_norm": 1.0331176116114555, + "learning_rate": 1.854726744075447e-05, + "loss": 0.3044458031654358, + "step": 1621 + }, + { + "epoch": 0.43075288806267426, + "grad_norm": 1.0421498129424722, + "learning_rate": 1.8544987466249412e-05, + "loss": 0.3261772096157074, + "step": 1622 + }, + { + "epoch": 0.43101845704421726, + "grad_norm": 1.3249821498842442, + "learning_rate": 1.8542705844371544e-05, + "loss": 0.3485907018184662, + "step": 1623 + }, + { + "epoch": 0.4312840260257602, + "grad_norm": 2.6643478315387576, + "learning_rate": 1.8540422575560747e-05, + "loss": 0.3016113340854645, + "step": 1624 + }, + { + "epoch": 0.43154959500730317, + "grad_norm": 1.021133157663628, + "learning_rate": 1.8538137660257198e-05, + "loss": 0.35383081436157227, + "step": 1625 + }, + { + "epoch": 0.4318151639888461, + "grad_norm": 1.170997891522692, + "learning_rate": 1.8535851098901406e-05, + "loss": 0.32015109062194824, + "step": 1626 + }, + { + "epoch": 0.43208073297038907, + "grad_norm": 1.1526156179794622, + "learning_rate": 1.8533562891934195e-05, + "loss": 0.3801743984222412, + "step": 1627 + }, + { + "epoch": 0.432346301951932, + "grad_norm": 1.0686097183664227, + "learning_rate": 1.85312730397967e-05, + "loss": 0.33140939474105835, + "step": 1628 + }, + { + "epoch": 0.432611870933475, + "grad_norm": 1.232101025230023, + "learning_rate": 1.8528981542930382e-05, + "loss": 0.4052904546260834, + "step": 1629 + }, + { + "epoch": 0.4328774399150179, + "grad_norm": 1.0850305465298753, + "learning_rate": 1.8526688401777014e-05, + "loss": 0.3661607801914215, + "step": 1630 + }, + { + "epoch": 0.4331430088965609, + "grad_norm": 1.0520968780833948, + "learning_rate": 1.852439361677868e-05, + "loss": 0.33260756731033325, + "step": 1631 + }, + { + "epoch": 0.43340857787810383, + "grad_norm": 1.0137607762513057, + "learning_rate": 1.85220971883778e-05, + "loss": 0.30222776532173157, + "step": 1632 + }, + { + "epoch": 0.4336741468596468, + "grad_norm": 1.1138822281677037, + "learning_rate": 1.8519799117017086e-05, + "loss": 0.3444751799106598, + "step": 1633 + }, + { + "epoch": 0.43393971584118973, + "grad_norm": 1.0896517914007275, + "learning_rate": 1.8517499403139586e-05, + "loss": 0.33887404203414917, + "step": 1634 + }, + { + "epoch": 0.4342052848227327, + "grad_norm": 0.9260010903737679, + "learning_rate": 1.8515198047188652e-05, + "loss": 0.287893146276474, + "step": 1635 + }, + { + "epoch": 0.43447085380427564, + "grad_norm": 1.0080783350179279, + "learning_rate": 1.8512895049607965e-05, + "loss": 0.32236215472221375, + "step": 1636 + }, + { + "epoch": 0.43473642278581864, + "grad_norm": 1.0861808896793093, + "learning_rate": 1.8510590410841515e-05, + "loss": 0.30670079588890076, + "step": 1637 + }, + { + "epoch": 0.4350019917673616, + "grad_norm": 1.045996826542631, + "learning_rate": 1.8508284131333604e-05, + "loss": 0.34104713797569275, + "step": 1638 + }, + { + "epoch": 0.43526756074890455, + "grad_norm": 1.13616869746559, + "learning_rate": 1.8505976211528857e-05, + "loss": 0.3402378559112549, + "step": 1639 + }, + { + "epoch": 0.4355331297304475, + "grad_norm": 1.1414650328718847, + "learning_rate": 1.8503666651872217e-05, + "loss": 0.35236096382141113, + "step": 1640 + }, + { + "epoch": 0.43579869871199045, + "grad_norm": 1.1137846416322885, + "learning_rate": 1.850135545280894e-05, + "loss": 0.3385634422302246, + "step": 1641 + }, + { + "epoch": 0.4360642676935334, + "grad_norm": 1.0049349552180111, + "learning_rate": 1.849904261478459e-05, + "loss": 0.32222414016723633, + "step": 1642 + }, + { + "epoch": 0.43632983667507635, + "grad_norm": 1.1246487142505726, + "learning_rate": 1.8496728138245062e-05, + "loss": 0.3251120448112488, + "step": 1643 + }, + { + "epoch": 0.4365954056566193, + "grad_norm": 1.3230672810485753, + "learning_rate": 1.8494412023636563e-05, + "loss": 0.3199063837528229, + "step": 1644 + }, + { + "epoch": 0.43686097463816226, + "grad_norm": 1.031106173264746, + "learning_rate": 1.8492094271405605e-05, + "loss": 0.3470883071422577, + "step": 1645 + }, + { + "epoch": 0.4371265436197052, + "grad_norm": 1.1420067933967792, + "learning_rate": 1.848977488199903e-05, + "loss": 0.319596529006958, + "step": 1646 + }, + { + "epoch": 0.43739211260124816, + "grad_norm": 1.172387725238046, + "learning_rate": 1.848745385586398e-05, + "loss": 0.3445591628551483, + "step": 1647 + }, + { + "epoch": 0.4376576815827911, + "grad_norm": 1.0622512502557289, + "learning_rate": 1.848513119344793e-05, + "loss": 0.35861149430274963, + "step": 1648 + }, + { + "epoch": 0.43792325056433407, + "grad_norm": 1.3423176489021205, + "learning_rate": 1.8482806895198658e-05, + "loss": 0.36727622151374817, + "step": 1649 + }, + { + "epoch": 0.438188819545877, + "grad_norm": 1.0985203266462633, + "learning_rate": 1.848048096156426e-05, + "loss": 0.3505704402923584, + "step": 1650 + }, + { + "epoch": 0.43845438852742, + "grad_norm": 1.050005044594017, + "learning_rate": 1.8478153392993154e-05, + "loss": 0.3508742153644562, + "step": 1651 + }, + { + "epoch": 0.438719957508963, + "grad_norm": 1.0688095584032915, + "learning_rate": 1.8475824189934063e-05, + "loss": 0.32757264375686646, + "step": 1652 + }, + { + "epoch": 0.43898552649050593, + "grad_norm": 1.0768843323365103, + "learning_rate": 1.8473493352836032e-05, + "loss": 0.3117530643939972, + "step": 1653 + }, + { + "epoch": 0.4392510954720489, + "grad_norm": 1.1751248406507369, + "learning_rate": 1.8471160882148417e-05, + "loss": 0.3506043553352356, + "step": 1654 + }, + { + "epoch": 0.43951666445359183, + "grad_norm": 1.1247697965204402, + "learning_rate": 1.8468826778320892e-05, + "loss": 0.33997148275375366, + "step": 1655 + }, + { + "epoch": 0.4397822334351348, + "grad_norm": 1.007133328419329, + "learning_rate": 1.8466491041803446e-05, + "loss": 0.30060335993766785, + "step": 1656 + }, + { + "epoch": 0.44004780241667774, + "grad_norm": 0.9546594059496064, + "learning_rate": 1.846415367304638e-05, + "loss": 0.3057805597782135, + "step": 1657 + }, + { + "epoch": 0.4403133713982207, + "grad_norm": 1.006954520739026, + "learning_rate": 1.846181467250031e-05, + "loss": 0.30772098898887634, + "step": 1658 + }, + { + "epoch": 0.44057894037976364, + "grad_norm": 1.043209753174748, + "learning_rate": 1.845947404061617e-05, + "loss": 0.3183813989162445, + "step": 1659 + }, + { + "epoch": 0.4408445093613066, + "grad_norm": 1.0413807475941115, + "learning_rate": 1.8457131777845204e-05, + "loss": 0.2986184358596802, + "step": 1660 + }, + { + "epoch": 0.44111007834284954, + "grad_norm": 1.0330249735438937, + "learning_rate": 1.8454787884638973e-05, + "loss": 0.33342432975769043, + "step": 1661 + }, + { + "epoch": 0.4413756473243925, + "grad_norm": 1.6337494282252796, + "learning_rate": 1.8452442361449353e-05, + "loss": 0.33435192704200745, + "step": 1662 + }, + { + "epoch": 0.44164121630593545, + "grad_norm": 1.1084487395338765, + "learning_rate": 1.8450095208728537e-05, + "loss": 0.31596100330352783, + "step": 1663 + }, + { + "epoch": 0.4419067852874784, + "grad_norm": 1.0372033094770008, + "learning_rate": 1.8447746426929022e-05, + "loss": 0.29850512742996216, + "step": 1664 + }, + { + "epoch": 0.4421723542690214, + "grad_norm": 1.1891933812209383, + "learning_rate": 1.8445396016503628e-05, + "loss": 0.34898555278778076, + "step": 1665 + }, + { + "epoch": 0.44243792325056436, + "grad_norm": 1.0486597661615855, + "learning_rate": 1.8443043977905484e-05, + "loss": 0.283272385597229, + "step": 1666 + }, + { + "epoch": 0.4427034922321073, + "grad_norm": 1.041766578180328, + "learning_rate": 1.844069031158804e-05, + "loss": 0.32765433192253113, + "step": 1667 + }, + { + "epoch": 0.44296906121365026, + "grad_norm": 1.1465241668847563, + "learning_rate": 1.8438335018005052e-05, + "loss": 0.347957044839859, + "step": 1668 + }, + { + "epoch": 0.4432346301951932, + "grad_norm": 1.1330493919292772, + "learning_rate": 1.8435978097610594e-05, + "loss": 0.36188018321990967, + "step": 1669 + }, + { + "epoch": 0.44350019917673617, + "grad_norm": 1.1541714860130494, + "learning_rate": 1.843361955085905e-05, + "loss": 0.35944315791130066, + "step": 1670 + }, + { + "epoch": 0.4437657681582791, + "grad_norm": 1.0564596521414393, + "learning_rate": 1.8431259378205122e-05, + "loss": 0.33441367745399475, + "step": 1671 + }, + { + "epoch": 0.44403133713982207, + "grad_norm": 1.1043363461383413, + "learning_rate": 1.8428897580103827e-05, + "loss": 0.3157849907875061, + "step": 1672 + }, + { + "epoch": 0.444296906121365, + "grad_norm": 1.0760645254646117, + "learning_rate": 1.8426534157010486e-05, + "loss": 0.33416497707366943, + "step": 1673 + }, + { + "epoch": 0.444562475102908, + "grad_norm": 1.1629646905519946, + "learning_rate": 1.842416910938074e-05, + "loss": 0.3611617684364319, + "step": 1674 + }, + { + "epoch": 0.4448280440844509, + "grad_norm": 1.079831089952362, + "learning_rate": 1.8421802437670546e-05, + "loss": 0.3030395805835724, + "step": 1675 + }, + { + "epoch": 0.4450936130659939, + "grad_norm": 0.9867988845558019, + "learning_rate": 1.8419434142336167e-05, + "loss": 0.30281510949134827, + "step": 1676 + }, + { + "epoch": 0.44535918204753683, + "grad_norm": 1.2041533085675928, + "learning_rate": 1.8417064223834184e-05, + "loss": 0.3489738404750824, + "step": 1677 + }, + { + "epoch": 0.4456247510290798, + "grad_norm": 1.0320394434428715, + "learning_rate": 1.8414692682621487e-05, + "loss": 0.30453425645828247, + "step": 1678 + }, + { + "epoch": 0.44589032001062273, + "grad_norm": 0.9586890082829097, + "learning_rate": 1.841231951915528e-05, + "loss": 0.28717339038848877, + "step": 1679 + }, + { + "epoch": 0.44615588899216574, + "grad_norm": 1.0685350052372018, + "learning_rate": 1.840994473389309e-05, + "loss": 0.3227912187576294, + "step": 1680 + }, + { + "epoch": 0.4464214579737087, + "grad_norm": 1.0774879432227336, + "learning_rate": 1.8407568327292737e-05, + "loss": 0.3575928807258606, + "step": 1681 + }, + { + "epoch": 0.44668702695525164, + "grad_norm": 1.0240612597420884, + "learning_rate": 1.840519029981237e-05, + "loss": 0.35601454973220825, + "step": 1682 + }, + { + "epoch": 0.4469525959367946, + "grad_norm": 1.1829639598617365, + "learning_rate": 1.8402810651910444e-05, + "loss": 0.34867429733276367, + "step": 1683 + }, + { + "epoch": 0.44721816491833755, + "grad_norm": 1.0185115495756123, + "learning_rate": 1.8400429384045724e-05, + "loss": 0.3333359360694885, + "step": 1684 + }, + { + "epoch": 0.4474837338998805, + "grad_norm": 1.1658514468774803, + "learning_rate": 1.8398046496677296e-05, + "loss": 0.3269057273864746, + "step": 1685 + }, + { + "epoch": 0.44774930288142345, + "grad_norm": 1.0186865264151983, + "learning_rate": 1.839566199026455e-05, + "loss": 0.3507213890552521, + "step": 1686 + }, + { + "epoch": 0.4480148718629664, + "grad_norm": 1.0962029873559684, + "learning_rate": 1.8393275865267185e-05, + "loss": 0.32935822010040283, + "step": 1687 + }, + { + "epoch": 0.44828044084450935, + "grad_norm": 1.168811125319112, + "learning_rate": 1.8390888122145225e-05, + "loss": 0.3780096769332886, + "step": 1688 + }, + { + "epoch": 0.4485460098260523, + "grad_norm": 1.08432540630583, + "learning_rate": 1.8388498761358997e-05, + "loss": 0.3412250578403473, + "step": 1689 + }, + { + "epoch": 0.44881157880759526, + "grad_norm": 1.0725143861051711, + "learning_rate": 1.838610778336914e-05, + "loss": 0.33751022815704346, + "step": 1690 + }, + { + "epoch": 0.4490771477891382, + "grad_norm": 1.113628501747759, + "learning_rate": 1.8383715188636608e-05, + "loss": 0.35736170411109924, + "step": 1691 + }, + { + "epoch": 0.44934271677068116, + "grad_norm": 1.0608679340591776, + "learning_rate": 1.8381320977622664e-05, + "loss": 0.3133913278579712, + "step": 1692 + }, + { + "epoch": 0.4496082857522241, + "grad_norm": 1.0696112323301112, + "learning_rate": 1.8378925150788886e-05, + "loss": 0.2890821099281311, + "step": 1693 + }, + { + "epoch": 0.4498738547337671, + "grad_norm": 1.0759892831738864, + "learning_rate": 1.8376527708597155e-05, + "loss": 0.34016966819763184, + "step": 1694 + }, + { + "epoch": 0.45013942371531007, + "grad_norm": 1.0933611032669988, + "learning_rate": 1.8374128651509676e-05, + "loss": 0.3502900302410126, + "step": 1695 + }, + { + "epoch": 0.450404992696853, + "grad_norm": 1.1956521483077693, + "learning_rate": 1.8371727979988957e-05, + "loss": 0.31828251481056213, + "step": 1696 + }, + { + "epoch": 0.450670561678396, + "grad_norm": 1.1739995891800665, + "learning_rate": 1.836932569449782e-05, + "loss": 0.33322471380233765, + "step": 1697 + }, + { + "epoch": 0.4509361306599389, + "grad_norm": 0.977715581129718, + "learning_rate": 1.8366921795499394e-05, + "loss": 0.28489458560943604, + "step": 1698 + }, + { + "epoch": 0.4512016996414819, + "grad_norm": 1.0351592490047028, + "learning_rate": 1.8364516283457127e-05, + "loss": 0.3125787079334259, + "step": 1699 + }, + { + "epoch": 0.45146726862302483, + "grad_norm": 1.6801930060854708, + "learning_rate": 1.8362109158834767e-05, + "loss": 0.3352596163749695, + "step": 1700 + }, + { + "epoch": 0.4517328376045678, + "grad_norm": 1.0152758212914303, + "learning_rate": 1.8359700422096385e-05, + "loss": 0.2986747622489929, + "step": 1701 + }, + { + "epoch": 0.45199840658611073, + "grad_norm": 1.0704573865215896, + "learning_rate": 1.8357290073706355e-05, + "loss": 0.3276829123497009, + "step": 1702 + }, + { + "epoch": 0.4522639755676537, + "grad_norm": 1.05119725558451, + "learning_rate": 1.8354878114129368e-05, + "loss": 0.3183029890060425, + "step": 1703 + }, + { + "epoch": 0.45252954454919664, + "grad_norm": 1.0595099003295023, + "learning_rate": 1.835246454383041e-05, + "loss": 0.32149460911750793, + "step": 1704 + }, + { + "epoch": 0.4527951135307396, + "grad_norm": 1.0365725372264356, + "learning_rate": 1.8350049363274802e-05, + "loss": 0.2963859438896179, + "step": 1705 + }, + { + "epoch": 0.45306068251228254, + "grad_norm": 1.132218144997021, + "learning_rate": 1.8347632572928154e-05, + "loss": 0.35251080989837646, + "step": 1706 + }, + { + "epoch": 0.4533262514938255, + "grad_norm": 1.1840188868504486, + "learning_rate": 1.8345214173256395e-05, + "loss": 0.3585474491119385, + "step": 1707 + }, + { + "epoch": 0.4535918204753685, + "grad_norm": 1.1792148584627284, + "learning_rate": 1.834279416472577e-05, + "loss": 0.32339078187942505, + "step": 1708 + }, + { + "epoch": 0.45385738945691145, + "grad_norm": 1.030916532610971, + "learning_rate": 1.8340372547802822e-05, + "loss": 0.3473295569419861, + "step": 1709 + }, + { + "epoch": 0.4541229584384544, + "grad_norm": 1.149162033618886, + "learning_rate": 1.833794932295441e-05, + "loss": 0.35146117210388184, + "step": 1710 + }, + { + "epoch": 0.45438852741999736, + "grad_norm": 1.080751163824508, + "learning_rate": 1.833552449064771e-05, + "loss": 0.29697534441947937, + "step": 1711 + }, + { + "epoch": 0.4546540964015403, + "grad_norm": 1.0590764839143914, + "learning_rate": 1.8333098051350197e-05, + "loss": 0.30980685353279114, + "step": 1712 + }, + { + "epoch": 0.45491966538308326, + "grad_norm": 1.2023264217964575, + "learning_rate": 1.8330670005529657e-05, + "loss": 0.3271983861923218, + "step": 1713 + }, + { + "epoch": 0.4551852343646262, + "grad_norm": 1.061456665590969, + "learning_rate": 1.8328240353654193e-05, + "loss": 0.3421804904937744, + "step": 1714 + }, + { + "epoch": 0.45545080334616916, + "grad_norm": 0.988281834877126, + "learning_rate": 1.8325809096192207e-05, + "loss": 0.2949771285057068, + "step": 1715 + }, + { + "epoch": 0.4557163723277121, + "grad_norm": 1.1467541005281106, + "learning_rate": 1.832337623361242e-05, + "loss": 0.35578668117523193, + "step": 1716 + }, + { + "epoch": 0.45598194130925507, + "grad_norm": 1.099618839558401, + "learning_rate": 1.832094176638387e-05, + "loss": 0.3714647889137268, + "step": 1717 + }, + { + "epoch": 0.456247510290798, + "grad_norm": 1.116087725713372, + "learning_rate": 1.8318505694975877e-05, + "loss": 0.36253875494003296, + "step": 1718 + }, + { + "epoch": 0.45651307927234097, + "grad_norm": 1.0310426822464949, + "learning_rate": 1.8316068019858093e-05, + "loss": 0.3148016035556793, + "step": 1719 + }, + { + "epoch": 0.4567786482538839, + "grad_norm": 1.0869949789046671, + "learning_rate": 1.8313628741500476e-05, + "loss": 0.3420512080192566, + "step": 1720 + }, + { + "epoch": 0.4570442172354269, + "grad_norm": 1.0955610437646774, + "learning_rate": 1.831118786037329e-05, + "loss": 0.2941698431968689, + "step": 1721 + }, + { + "epoch": 0.4573097862169699, + "grad_norm": 0.9987507632564111, + "learning_rate": 1.83087453769471e-05, + "loss": 0.3033481240272522, + "step": 1722 + }, + { + "epoch": 0.45757535519851283, + "grad_norm": 1.0508818993675257, + "learning_rate": 1.8306301291692798e-05, + "loss": 0.3405943810939789, + "step": 1723 + }, + { + "epoch": 0.4578409241800558, + "grad_norm": 1.0291343903638976, + "learning_rate": 1.8303855605081567e-05, + "loss": 0.32217931747436523, + "step": 1724 + }, + { + "epoch": 0.45810649316159874, + "grad_norm": 1.1797464113481113, + "learning_rate": 1.8301408317584913e-05, + "loss": 0.3627573847770691, + "step": 1725 + }, + { + "epoch": 0.4583720621431417, + "grad_norm": 1.1425882725361838, + "learning_rate": 1.829895942967464e-05, + "loss": 0.3512224853038788, + "step": 1726 + }, + { + "epoch": 0.45863763112468464, + "grad_norm": 1.1358093316461328, + "learning_rate": 1.8296508941822868e-05, + "loss": 0.35433265566825867, + "step": 1727 + }, + { + "epoch": 0.4589032001062276, + "grad_norm": 1.1217406683513973, + "learning_rate": 1.829405685450202e-05, + "loss": 0.33105185627937317, + "step": 1728 + }, + { + "epoch": 0.45916876908777055, + "grad_norm": 1.0087946676492725, + "learning_rate": 1.829160316818483e-05, + "loss": 0.31765925884246826, + "step": 1729 + }, + { + "epoch": 0.4594343380693135, + "grad_norm": 1.0268902541251206, + "learning_rate": 1.8289147883344338e-05, + "loss": 0.3276101350784302, + "step": 1730 + }, + { + "epoch": 0.45969990705085645, + "grad_norm": 2.1185922480389676, + "learning_rate": 1.8286691000453895e-05, + "loss": 0.2921130061149597, + "step": 1731 + }, + { + "epoch": 0.4599654760323994, + "grad_norm": 0.9680106013727008, + "learning_rate": 1.828423251998716e-05, + "loss": 0.3025062382221222, + "step": 1732 + }, + { + "epoch": 0.46023104501394235, + "grad_norm": 1.0299077884479195, + "learning_rate": 1.82817724424181e-05, + "loss": 0.3128702640533447, + "step": 1733 + }, + { + "epoch": 0.4604966139954853, + "grad_norm": 0.9957682350134235, + "learning_rate": 1.8279310768220987e-05, + "loss": 0.31156033277511597, + "step": 1734 + }, + { + "epoch": 0.46076218297702826, + "grad_norm": 1.0327514294429654, + "learning_rate": 1.82768474978704e-05, + "loss": 0.30409976840019226, + "step": 1735 + }, + { + "epoch": 0.46102775195857126, + "grad_norm": 1.0533664417585449, + "learning_rate": 1.827438263184124e-05, + "loss": 0.305557519197464, + "step": 1736 + }, + { + "epoch": 0.4612933209401142, + "grad_norm": 1.1216722893854725, + "learning_rate": 1.827191617060869e-05, + "loss": 0.36079999804496765, + "step": 1737 + }, + { + "epoch": 0.46155888992165717, + "grad_norm": 1.0546022345807051, + "learning_rate": 1.8269448114648264e-05, + "loss": 0.3341830372810364, + "step": 1738 + }, + { + "epoch": 0.4618244589032001, + "grad_norm": 1.0085785444907966, + "learning_rate": 1.8266978464435764e-05, + "loss": 0.3222450017929077, + "step": 1739 + }, + { + "epoch": 0.46209002788474307, + "grad_norm": 1.112818872130856, + "learning_rate": 1.826450722044732e-05, + "loss": 0.34665441513061523, + "step": 1740 + }, + { + "epoch": 0.462355596866286, + "grad_norm": 1.1112300040840664, + "learning_rate": 1.8262034383159357e-05, + "loss": 0.31024169921875, + "step": 1741 + }, + { + "epoch": 0.462621165847829, + "grad_norm": 1.2322752248386413, + "learning_rate": 1.8259559953048606e-05, + "loss": 0.2950369119644165, + "step": 1742 + }, + { + "epoch": 0.4628867348293719, + "grad_norm": 1.109045795536776, + "learning_rate": 1.8257083930592102e-05, + "loss": 0.3378523886203766, + "step": 1743 + }, + { + "epoch": 0.4631523038109149, + "grad_norm": 0.9899845397184047, + "learning_rate": 1.8254606316267204e-05, + "loss": 0.2930060923099518, + "step": 1744 + }, + { + "epoch": 0.46341787279245783, + "grad_norm": 1.079619676645024, + "learning_rate": 1.8252127110551564e-05, + "loss": 0.3236517012119293, + "step": 1745 + }, + { + "epoch": 0.4636834417740008, + "grad_norm": 0.9852877201201444, + "learning_rate": 1.824964631392314e-05, + "loss": 0.3010406196117401, + "step": 1746 + }, + { + "epoch": 0.46394901075554373, + "grad_norm": 1.0095585954453505, + "learning_rate": 1.8247163926860204e-05, + "loss": 0.3269607424736023, + "step": 1747 + }, + { + "epoch": 0.4642145797370867, + "grad_norm": 1.0474961373680607, + "learning_rate": 1.8244679949841328e-05, + "loss": 0.3437904715538025, + "step": 1748 + }, + { + "epoch": 0.46448014871862964, + "grad_norm": 1.1512723462780612, + "learning_rate": 1.8242194383345394e-05, + "loss": 0.37820738554000854, + "step": 1749 + }, + { + "epoch": 0.46474571770017264, + "grad_norm": 1.0989334641357904, + "learning_rate": 1.8239707227851592e-05, + "loss": 0.3365899920463562, + "step": 1750 + }, + { + "epoch": 0.4650112866817156, + "grad_norm": 0.9943228703349263, + "learning_rate": 1.8237218483839414e-05, + "loss": 0.30418774485588074, + "step": 1751 + }, + { + "epoch": 0.46527685566325855, + "grad_norm": 0.9379554406122236, + "learning_rate": 1.823472815178866e-05, + "loss": 0.2923222780227661, + "step": 1752 + }, + { + "epoch": 0.4655424246448015, + "grad_norm": 1.1096787188742467, + "learning_rate": 1.823223623217944e-05, + "loss": 0.3358995020389557, + "step": 1753 + }, + { + "epoch": 0.46580799362634445, + "grad_norm": 1.0997620749237405, + "learning_rate": 1.822974272549216e-05, + "loss": 0.3413343131542206, + "step": 1754 + }, + { + "epoch": 0.4660735626078874, + "grad_norm": 1.0873990469892099, + "learning_rate": 1.822724763220755e-05, + "loss": 0.33553364872932434, + "step": 1755 + }, + { + "epoch": 0.46633913158943036, + "grad_norm": 1.0957210856960815, + "learning_rate": 1.8224750952806626e-05, + "loss": 0.35896626114845276, + "step": 1756 + }, + { + "epoch": 0.4666047005709733, + "grad_norm": 1.1032076691430248, + "learning_rate": 1.8222252687770718e-05, + "loss": 0.35345566272735596, + "step": 1757 + }, + { + "epoch": 0.46687026955251626, + "grad_norm": 1.0034635235769087, + "learning_rate": 1.8219752837581466e-05, + "loss": 0.3146013617515564, + "step": 1758 + }, + { + "epoch": 0.4671358385340592, + "grad_norm": 1.0191336075935247, + "learning_rate": 1.8217251402720807e-05, + "loss": 0.33270642161369324, + "step": 1759 + }, + { + "epoch": 0.46740140751560216, + "grad_norm": 1.030475428136688, + "learning_rate": 1.821474838367099e-05, + "loss": 0.3172033727169037, + "step": 1760 + }, + { + "epoch": 0.4676669764971451, + "grad_norm": 1.6535016363051902, + "learning_rate": 1.8212243780914578e-05, + "loss": 0.3277033567428589, + "step": 1761 + }, + { + "epoch": 0.46793254547868807, + "grad_norm": 1.1570228647748637, + "learning_rate": 1.820973759493441e-05, + "loss": 0.3523799777030945, + "step": 1762 + }, + { + "epoch": 0.468198114460231, + "grad_norm": 1.0907259849913267, + "learning_rate": 1.8207229826213664e-05, + "loss": 0.32437676191329956, + "step": 1763 + }, + { + "epoch": 0.468463683441774, + "grad_norm": 1.1347618214788342, + "learning_rate": 1.82047204752358e-05, + "loss": 0.34185051918029785, + "step": 1764 + }, + { + "epoch": 0.468729252423317, + "grad_norm": 1.0561382700570243, + "learning_rate": 1.8202209542484594e-05, + "loss": 0.32034197449684143, + "step": 1765 + }, + { + "epoch": 0.46899482140485993, + "grad_norm": 1.097207173265362, + "learning_rate": 1.8199697028444125e-05, + "loss": 0.30969515442848206, + "step": 1766 + }, + { + "epoch": 0.4692603903864029, + "grad_norm": 0.9320632629292236, + "learning_rate": 1.8197182933598776e-05, + "loss": 0.24751389026641846, + "step": 1767 + }, + { + "epoch": 0.46952595936794583, + "grad_norm": 1.2001835130139573, + "learning_rate": 1.8194667258433235e-05, + "loss": 0.3859948217868805, + "step": 1768 + }, + { + "epoch": 0.4697915283494888, + "grad_norm": 1.0989779617923678, + "learning_rate": 1.819215000343249e-05, + "loss": 0.29364967346191406, + "step": 1769 + }, + { + "epoch": 0.47005709733103174, + "grad_norm": 1.1161641657952082, + "learning_rate": 1.8189631169081845e-05, + "loss": 0.3560323715209961, + "step": 1770 + }, + { + "epoch": 0.4703226663125747, + "grad_norm": 1.6505675097600017, + "learning_rate": 1.8187110755866898e-05, + "loss": 0.3458098769187927, + "step": 1771 + }, + { + "epoch": 0.47058823529411764, + "grad_norm": 1.0148526914708587, + "learning_rate": 1.8184588764273555e-05, + "loss": 0.32131001353263855, + "step": 1772 + }, + { + "epoch": 0.4708538042756606, + "grad_norm": 1.0453234866463608, + "learning_rate": 1.8182065194788024e-05, + "loss": 0.3011054992675781, + "step": 1773 + }, + { + "epoch": 0.47111937325720354, + "grad_norm": 1.1076832582073854, + "learning_rate": 1.8179540047896827e-05, + "loss": 0.3314674496650696, + "step": 1774 + }, + { + "epoch": 0.4713849422387465, + "grad_norm": 1.0853788387965118, + "learning_rate": 1.8177013324086774e-05, + "loss": 0.3437536060810089, + "step": 1775 + }, + { + "epoch": 0.47165051122028945, + "grad_norm": 1.166112048160084, + "learning_rate": 1.8174485023844993e-05, + "loss": 0.36137935519218445, + "step": 1776 + }, + { + "epoch": 0.4719160802018324, + "grad_norm": 1.0726359370167762, + "learning_rate": 1.8171955147658905e-05, + "loss": 0.34018874168395996, + "step": 1777 + }, + { + "epoch": 0.4721816491833754, + "grad_norm": 1.0596665602066746, + "learning_rate": 1.8169423696016245e-05, + "loss": 0.33298587799072266, + "step": 1778 + }, + { + "epoch": 0.47244721816491836, + "grad_norm": 1.1107712039752602, + "learning_rate": 1.816689066940505e-05, + "loss": 0.3649418354034424, + "step": 1779 + }, + { + "epoch": 0.4727127871464613, + "grad_norm": 1.0148859742506888, + "learning_rate": 1.8164356068313646e-05, + "loss": 0.32419171929359436, + "step": 1780 + }, + { + "epoch": 0.47297835612800426, + "grad_norm": 1.047167823612948, + "learning_rate": 1.8161819893230688e-05, + "loss": 0.288555383682251, + "step": 1781 + }, + { + "epoch": 0.4732439251095472, + "grad_norm": 1.005455205363293, + "learning_rate": 1.815928214464511e-05, + "loss": 0.3231011629104614, + "step": 1782 + }, + { + "epoch": 0.47350949409109017, + "grad_norm": 1.0470674131364166, + "learning_rate": 1.815674282304617e-05, + "loss": 0.29310134053230286, + "step": 1783 + }, + { + "epoch": 0.4737750630726331, + "grad_norm": 1.0390137248114197, + "learning_rate": 1.815420192892341e-05, + "loss": 0.32683852314949036, + "step": 1784 + }, + { + "epoch": 0.47404063205417607, + "grad_norm": 1.0353379429668699, + "learning_rate": 1.8151659462766685e-05, + "loss": 0.3200969099998474, + "step": 1785 + }, + { + "epoch": 0.474306201035719, + "grad_norm": 1.051359679014311, + "learning_rate": 1.814911542506616e-05, + "loss": 0.3091360032558441, + "step": 1786 + }, + { + "epoch": 0.474571770017262, + "grad_norm": 1.1630088603070372, + "learning_rate": 1.814656981631229e-05, + "loss": 0.3679049611091614, + "step": 1787 + }, + { + "epoch": 0.4748373389988049, + "grad_norm": 1.1065634125772459, + "learning_rate": 1.814402263699584e-05, + "loss": 0.290119469165802, + "step": 1788 + }, + { + "epoch": 0.4751029079803479, + "grad_norm": 1.0987492456650414, + "learning_rate": 1.8141473887607874e-05, + "loss": 0.31878861784935, + "step": 1789 + }, + { + "epoch": 0.47536847696189083, + "grad_norm": 1.1254389921885528, + "learning_rate": 1.8138923568639763e-05, + "loss": 0.35820287466049194, + "step": 1790 + }, + { + "epoch": 0.4756340459434338, + "grad_norm": 1.0046454439717083, + "learning_rate": 1.8136371680583176e-05, + "loss": 0.2924647629261017, + "step": 1791 + }, + { + "epoch": 0.4758996149249768, + "grad_norm": 1.2202907606610718, + "learning_rate": 1.8133818223930092e-05, + "loss": 0.3799927234649658, + "step": 1792 + }, + { + "epoch": 0.47616518390651974, + "grad_norm": 1.1097316301591598, + "learning_rate": 1.8131263199172783e-05, + "loss": 0.3505420386791229, + "step": 1793 + }, + { + "epoch": 0.4764307528880627, + "grad_norm": 1.1021438648339534, + "learning_rate": 1.8128706606803823e-05, + "loss": 0.3291688859462738, + "step": 1794 + }, + { + "epoch": 0.47669632186960564, + "grad_norm": 1.0814065231113215, + "learning_rate": 1.8126148447316104e-05, + "loss": 0.34079697728157043, + "step": 1795 + }, + { + "epoch": 0.4769618908511486, + "grad_norm": 1.2185578909639558, + "learning_rate": 1.8123588721202802e-05, + "loss": 0.2898064851760864, + "step": 1796 + }, + { + "epoch": 0.47722745983269155, + "grad_norm": 1.0448194415877836, + "learning_rate": 1.8121027428957402e-05, + "loss": 0.32089224457740784, + "step": 1797 + }, + { + "epoch": 0.4774930288142345, + "grad_norm": 1.903396083379018, + "learning_rate": 1.8118464571073697e-05, + "loss": 0.3402039408683777, + "step": 1798 + }, + { + "epoch": 0.47775859779577745, + "grad_norm": 1.1693256768707747, + "learning_rate": 1.8115900148045767e-05, + "loss": 0.29904159903526306, + "step": 1799 + }, + { + "epoch": 0.4780241667773204, + "grad_norm": 1.0688058843932313, + "learning_rate": 1.8113334160368007e-05, + "loss": 0.34074240922927856, + "step": 1800 + }, + { + "epoch": 0.47828973575886335, + "grad_norm": 1.0404364284009804, + "learning_rate": 1.811076660853511e-05, + "loss": 0.28566253185272217, + "step": 1801 + }, + { + "epoch": 0.4785553047404063, + "grad_norm": 1.0267154270839738, + "learning_rate": 1.8108197493042065e-05, + "loss": 0.34523358941078186, + "step": 1802 + }, + { + "epoch": 0.47882087372194926, + "grad_norm": 1.0082361251695107, + "learning_rate": 1.8105626814384173e-05, + "loss": 0.3261171281337738, + "step": 1803 + }, + { + "epoch": 0.4790864427034922, + "grad_norm": 1.0353580811121572, + "learning_rate": 1.8103054573057027e-05, + "loss": 0.2915942966938019, + "step": 1804 + }, + { + "epoch": 0.47935201168503516, + "grad_norm": 1.117140176261941, + "learning_rate": 1.810048076955653e-05, + "loss": 0.2999255657196045, + "step": 1805 + }, + { + "epoch": 0.47961758066657817, + "grad_norm": 1.0967176640726466, + "learning_rate": 1.8097905404378874e-05, + "loss": 0.3294594883918762, + "step": 1806 + }, + { + "epoch": 0.4798831496481211, + "grad_norm": 1.025641731681811, + "learning_rate": 1.8095328478020563e-05, + "loss": 0.30720093846321106, + "step": 1807 + }, + { + "epoch": 0.4801487186296641, + "grad_norm": 1.0583824100775536, + "learning_rate": 1.8092749990978395e-05, + "loss": 0.31076985597610474, + "step": 1808 + }, + { + "epoch": 0.480414287611207, + "grad_norm": 1.0650372083327142, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.3182013928890228, + "step": 1809 + }, + { + "epoch": 0.48067985659275, + "grad_norm": 1.1560421045272382, + "learning_rate": 1.8087588336831206e-05, + "loss": 0.325716108083725, + "step": 1810 + }, + { + "epoch": 0.48094542557429293, + "grad_norm": 1.034822212222003, + "learning_rate": 1.8085005170721287e-05, + "loss": 0.3148769736289978, + "step": 1811 + }, + { + "epoch": 0.4812109945558359, + "grad_norm": 0.9998987744353804, + "learning_rate": 1.8082420445917727e-05, + "loss": 0.30645644664764404, + "step": 1812 + }, + { + "epoch": 0.48147656353737883, + "grad_norm": 0.9765412034449941, + "learning_rate": 1.807983416291883e-05, + "loss": 0.2978900969028473, + "step": 1813 + }, + { + "epoch": 0.4817421325189218, + "grad_norm": 1.1281577444413164, + "learning_rate": 1.8077246322223194e-05, + "loss": 0.34340181946754456, + "step": 1814 + }, + { + "epoch": 0.48200770150046474, + "grad_norm": 1.0940690010095575, + "learning_rate": 1.8074656924329733e-05, + "loss": 0.3272106349468231, + "step": 1815 + }, + { + "epoch": 0.4822732704820077, + "grad_norm": 1.0823130111098402, + "learning_rate": 1.807206596973765e-05, + "loss": 0.31061962246894836, + "step": 1816 + }, + { + "epoch": 0.48253883946355064, + "grad_norm": 1.1134329507970786, + "learning_rate": 1.8069473458946445e-05, + "loss": 0.28947243094444275, + "step": 1817 + }, + { + "epoch": 0.4828044084450936, + "grad_norm": 1.066867737773279, + "learning_rate": 1.8066879392455932e-05, + "loss": 0.35057532787323, + "step": 1818 + }, + { + "epoch": 0.48306997742663654, + "grad_norm": 1.5202577425125505, + "learning_rate": 1.8064283770766212e-05, + "loss": 0.31032001972198486, + "step": 1819 + }, + { + "epoch": 0.48333554640817955, + "grad_norm": 1.1166414917810035, + "learning_rate": 1.8061686594377685e-05, + "loss": 0.3802293539047241, + "step": 1820 + }, + { + "epoch": 0.4836011153897225, + "grad_norm": 1.122052528401037, + "learning_rate": 1.8059087863791066e-05, + "loss": 0.3306402564048767, + "step": 1821 + }, + { + "epoch": 0.48386668437126545, + "grad_norm": 1.051177925612534, + "learning_rate": 1.8056487579507352e-05, + "loss": 0.32170724868774414, + "step": 1822 + }, + { + "epoch": 0.4841322533528084, + "grad_norm": 1.0182895505748566, + "learning_rate": 1.8053885742027854e-05, + "loss": 0.35058924555778503, + "step": 1823 + }, + { + "epoch": 0.48439782233435136, + "grad_norm": 1.079491665486815, + "learning_rate": 1.8051282351854168e-05, + "loss": 0.3796595335006714, + "step": 1824 + }, + { + "epoch": 0.4846633913158943, + "grad_norm": 1.0882057457557335, + "learning_rate": 1.8048677409488205e-05, + "loss": 0.28997284173965454, + "step": 1825 + }, + { + "epoch": 0.48492896029743726, + "grad_norm": 1.7307038017833063, + "learning_rate": 1.804607091543216e-05, + "loss": 0.35110151767730713, + "step": 1826 + }, + { + "epoch": 0.4851945292789802, + "grad_norm": 1.1036882170711018, + "learning_rate": 1.8043462870188535e-05, + "loss": 0.3194088637828827, + "step": 1827 + }, + { + "epoch": 0.48546009826052317, + "grad_norm": 1.0664676604065728, + "learning_rate": 1.8040853274260137e-05, + "loss": 0.28777945041656494, + "step": 1828 + }, + { + "epoch": 0.4857256672420661, + "grad_norm": 1.0702584286398438, + "learning_rate": 1.803824212815006e-05, + "loss": 0.3642069697380066, + "step": 1829 + }, + { + "epoch": 0.48599123622360907, + "grad_norm": 1.0626897024145745, + "learning_rate": 1.80356294323617e-05, + "loss": 0.32396575808525085, + "step": 1830 + }, + { + "epoch": 0.486256805205152, + "grad_norm": 1.205959051296984, + "learning_rate": 1.8033015187398758e-05, + "loss": 0.36421436071395874, + "step": 1831 + }, + { + "epoch": 0.486522374186695, + "grad_norm": 1.0011906322370974, + "learning_rate": 1.8030399393765227e-05, + "loss": 0.3170832395553589, + "step": 1832 + }, + { + "epoch": 0.4867879431682379, + "grad_norm": 0.9739220394650455, + "learning_rate": 1.8027782051965408e-05, + "loss": 0.3003416359424591, + "step": 1833 + }, + { + "epoch": 0.48705351214978093, + "grad_norm": 1.0701369618567955, + "learning_rate": 1.802516316250388e-05, + "loss": 0.30362898111343384, + "step": 1834 + }, + { + "epoch": 0.4873190811313239, + "grad_norm": 1.0466563888798912, + "learning_rate": 1.802254272588555e-05, + "loss": 0.32721444964408875, + "step": 1835 + }, + { + "epoch": 0.48758465011286684, + "grad_norm": 1.345049864677536, + "learning_rate": 1.8019920742615596e-05, + "loss": 0.317483514547348, + "step": 1836 + }, + { + "epoch": 0.4878502190944098, + "grad_norm": 1.0589953518283157, + "learning_rate": 1.801729721319951e-05, + "loss": 0.2928479015827179, + "step": 1837 + }, + { + "epoch": 0.48811578807595274, + "grad_norm": 1.1098495840377043, + "learning_rate": 1.8014672138143073e-05, + "loss": 0.3425772190093994, + "step": 1838 + }, + { + "epoch": 0.4883813570574957, + "grad_norm": 1.0286414092040284, + "learning_rate": 1.801204551795238e-05, + "loss": 0.334087997674942, + "step": 1839 + }, + { + "epoch": 0.48864692603903864, + "grad_norm": 1.0797374159140127, + "learning_rate": 1.80094173531338e-05, + "loss": 0.3186641335487366, + "step": 1840 + }, + { + "epoch": 0.4889124950205816, + "grad_norm": 1.0361897985848911, + "learning_rate": 1.800678764419401e-05, + "loss": 0.3153733015060425, + "step": 1841 + }, + { + "epoch": 0.48917806400212455, + "grad_norm": 1.070217807683518, + "learning_rate": 1.8004156391640004e-05, + "loss": 0.3323214054107666, + "step": 1842 + }, + { + "epoch": 0.4894436329836675, + "grad_norm": 0.9455521865874897, + "learning_rate": 1.8001523595979043e-05, + "loss": 0.2856762409210205, + "step": 1843 + }, + { + "epoch": 0.48970920196521045, + "grad_norm": 1.0256135363684138, + "learning_rate": 1.79988892577187e-05, + "loss": 0.32493725419044495, + "step": 1844 + }, + { + "epoch": 0.4899747709467534, + "grad_norm": 1.1082860888483268, + "learning_rate": 1.7996253377366846e-05, + "loss": 0.350448876619339, + "step": 1845 + }, + { + "epoch": 0.49024033992829635, + "grad_norm": 1.096249407467401, + "learning_rate": 1.7993615955431648e-05, + "loss": 0.32246965169906616, + "step": 1846 + }, + { + "epoch": 0.4905059089098393, + "grad_norm": 0.9715072313794847, + "learning_rate": 1.799097699242157e-05, + "loss": 0.302636057138443, + "step": 1847 + }, + { + "epoch": 0.4907714778913823, + "grad_norm": 1.1573319310132777, + "learning_rate": 1.7988336488845374e-05, + "loss": 0.34280693531036377, + "step": 1848 + }, + { + "epoch": 0.49103704687292526, + "grad_norm": 1.1205814585182334, + "learning_rate": 1.7985694445212118e-05, + "loss": 0.3650673031806946, + "step": 1849 + }, + { + "epoch": 0.4913026158544682, + "grad_norm": 1.1348057531260405, + "learning_rate": 1.798305086203115e-05, + "loss": 0.33800822496414185, + "step": 1850 + }, + { + "epoch": 0.49156818483601117, + "grad_norm": 1.0428655272942455, + "learning_rate": 1.7980405739812134e-05, + "loss": 0.31522083282470703, + "step": 1851 + }, + { + "epoch": 0.4918337538175541, + "grad_norm": 1.177464907100392, + "learning_rate": 1.7977759079065003e-05, + "loss": 0.3374335765838623, + "step": 1852 + }, + { + "epoch": 0.49209932279909707, + "grad_norm": 1.060278247692231, + "learning_rate": 1.7975110880300018e-05, + "loss": 0.33803191781044006, + "step": 1853 + }, + { + "epoch": 0.49236489178064, + "grad_norm": 1.0982376140773644, + "learning_rate": 1.797246114402771e-05, + "loss": 0.37764933705329895, + "step": 1854 + }, + { + "epoch": 0.492630460762183, + "grad_norm": 0.9654297547716862, + "learning_rate": 1.796980987075892e-05, + "loss": 0.3075840473175049, + "step": 1855 + }, + { + "epoch": 0.4928960297437259, + "grad_norm": 0.9768928030686648, + "learning_rate": 1.7967157061004782e-05, + "loss": 0.306305855512619, + "step": 1856 + }, + { + "epoch": 0.4931615987252689, + "grad_norm": 1.0225684543938522, + "learning_rate": 1.796450271527673e-05, + "loss": 0.3474302291870117, + "step": 1857 + }, + { + "epoch": 0.49342716770681183, + "grad_norm": 1.0243106870487633, + "learning_rate": 1.7961846834086483e-05, + "loss": 0.31059685349464417, + "step": 1858 + }, + { + "epoch": 0.4936927366883548, + "grad_norm": 1.0236396527349367, + "learning_rate": 1.795918941794607e-05, + "loss": 0.346218079328537, + "step": 1859 + }, + { + "epoch": 0.49395830566989773, + "grad_norm": 0.9969229384493907, + "learning_rate": 1.7956530467367805e-05, + "loss": 0.28371214866638184, + "step": 1860 + }, + { + "epoch": 0.4942238746514407, + "grad_norm": 0.8979156608776232, + "learning_rate": 1.7953869982864306e-05, + "loss": 0.27775150537490845, + "step": 1861 + }, + { + "epoch": 0.4944894436329837, + "grad_norm": 1.279703247293047, + "learning_rate": 1.795120796494848e-05, + "loss": 0.328782856464386, + "step": 1862 + }, + { + "epoch": 0.49475501261452665, + "grad_norm": 1.0950381369417217, + "learning_rate": 1.7948544414133534e-05, + "loss": 0.33220064640045166, + "step": 1863 + }, + { + "epoch": 0.4950205815960696, + "grad_norm": 1.0528449584388764, + "learning_rate": 1.794587933093297e-05, + "loss": 0.32681554555892944, + "step": 1864 + }, + { + "epoch": 0.49528615057761255, + "grad_norm": 1.1023465974826758, + "learning_rate": 1.7943212715860586e-05, + "loss": 0.32202866673469543, + "step": 1865 + }, + { + "epoch": 0.4955517195591555, + "grad_norm": 2.266456857585339, + "learning_rate": 1.7940544569430468e-05, + "loss": 0.3051350712776184, + "step": 1866 + }, + { + "epoch": 0.49581728854069845, + "grad_norm": 1.1617568134775966, + "learning_rate": 1.793787489215701e-05, + "loss": 0.3924705386161804, + "step": 1867 + }, + { + "epoch": 0.4960828575222414, + "grad_norm": 1.018817969430421, + "learning_rate": 1.793520368455489e-05, + "loss": 0.30267882347106934, + "step": 1868 + }, + { + "epoch": 0.49634842650378436, + "grad_norm": 1.0585020042998596, + "learning_rate": 1.793253094713909e-05, + "loss": 0.3150729238986969, + "step": 1869 + }, + { + "epoch": 0.4966139954853273, + "grad_norm": 1.314679145900761, + "learning_rate": 1.7929856680424872e-05, + "loss": 0.33814147114753723, + "step": 1870 + }, + { + "epoch": 0.49687956446687026, + "grad_norm": 1.010460021909887, + "learning_rate": 1.7927180884927814e-05, + "loss": 0.31929856538772583, + "step": 1871 + }, + { + "epoch": 0.4971451334484132, + "grad_norm": 1.1376790681693039, + "learning_rate": 1.7924503561163775e-05, + "loss": 0.3797461688518524, + "step": 1872 + }, + { + "epoch": 0.49741070242995616, + "grad_norm": 1.057594588942085, + "learning_rate": 1.792182470964891e-05, + "loss": 0.3056377172470093, + "step": 1873 + }, + { + "epoch": 0.4976762714114991, + "grad_norm": 1.1254473942016883, + "learning_rate": 1.7919144330899668e-05, + "loss": 0.3526398539543152, + "step": 1874 + }, + { + "epoch": 0.49794184039304207, + "grad_norm": 1.0289140670533532, + "learning_rate": 1.79164624254328e-05, + "loss": 0.3183595538139343, + "step": 1875 + }, + { + "epoch": 0.4982074093745851, + "grad_norm": 1.1908370019011798, + "learning_rate": 1.791377899376534e-05, + "loss": 0.3604113459587097, + "step": 1876 + }, + { + "epoch": 0.498472978356128, + "grad_norm": 1.1651856770093412, + "learning_rate": 1.7911094036414623e-05, + "loss": 0.3219848573207855, + "step": 1877 + }, + { + "epoch": 0.498738547337671, + "grad_norm": 1.0586801467718077, + "learning_rate": 1.7908407553898282e-05, + "loss": 0.28773394227027893, + "step": 1878 + }, + { + "epoch": 0.49900411631921393, + "grad_norm": 1.0649509880321448, + "learning_rate": 1.7905719546734233e-05, + "loss": 0.31453996896743774, + "step": 1879 + }, + { + "epoch": 0.4992696853007569, + "grad_norm": 0.9878415524405192, + "learning_rate": 1.7903030015440696e-05, + "loss": 0.2947153151035309, + "step": 1880 + }, + { + "epoch": 0.49953525428229983, + "grad_norm": 1.0652111521233423, + "learning_rate": 1.7900338960536178e-05, + "loss": 0.313723087310791, + "step": 1881 + }, + { + "epoch": 0.4998008232638428, + "grad_norm": 1.0853994840945123, + "learning_rate": 1.7897646382539485e-05, + "loss": 0.3385108709335327, + "step": 1882 + }, + { + "epoch": 0.5000663922453857, + "grad_norm": 1.0993457819479324, + "learning_rate": 1.7894952281969712e-05, + "loss": 0.31417039036750793, + "step": 1883 + }, + { + "epoch": 0.5003319612269287, + "grad_norm": 1.1452192213941934, + "learning_rate": 1.7892256659346253e-05, + "loss": 0.3555717468261719, + "step": 1884 + }, + { + "epoch": 0.5005975302084716, + "grad_norm": 1.1989261836629121, + "learning_rate": 1.7889559515188793e-05, + "loss": 0.3724518120288849, + "step": 1885 + }, + { + "epoch": 0.5008630991900146, + "grad_norm": 1.0516015708006068, + "learning_rate": 1.7886860850017306e-05, + "loss": 0.32646167278289795, + "step": 1886 + }, + { + "epoch": 0.5011286681715575, + "grad_norm": 1.079300223054909, + "learning_rate": 1.7884160664352062e-05, + "loss": 0.31072959303855896, + "step": 1887 + }, + { + "epoch": 0.5013942371531005, + "grad_norm": 0.9518526173941219, + "learning_rate": 1.7881458958713628e-05, + "loss": 0.26987242698669434, + "step": 1888 + }, + { + "epoch": 0.5016598061346434, + "grad_norm": 0.9908294117764815, + "learning_rate": 1.787875573362286e-05, + "loss": 0.30105817317962646, + "step": 1889 + }, + { + "epoch": 0.5019253751161864, + "grad_norm": 1.0444226583374554, + "learning_rate": 1.7876050989600908e-05, + "loss": 0.31277188658714294, + "step": 1890 + }, + { + "epoch": 0.5021909440977294, + "grad_norm": 1.0192470233304842, + "learning_rate": 1.7873344727169214e-05, + "loss": 0.31068161129951477, + "step": 1891 + }, + { + "epoch": 0.5024565130792723, + "grad_norm": 1.0797105219167356, + "learning_rate": 1.7870636946849512e-05, + "loss": 0.3491121530532837, + "step": 1892 + }, + { + "epoch": 0.5027220820608153, + "grad_norm": 1.0753654491775293, + "learning_rate": 1.7867927649163838e-05, + "loss": 0.3223581612110138, + "step": 1893 + }, + { + "epoch": 0.5029876510423582, + "grad_norm": 1.1295999155195493, + "learning_rate": 1.7865216834634506e-05, + "loss": 0.345224529504776, + "step": 1894 + }, + { + "epoch": 0.5032532200239012, + "grad_norm": 1.1419032071310418, + "learning_rate": 1.7862504503784123e-05, + "loss": 0.3408205211162567, + "step": 1895 + }, + { + "epoch": 0.5035187890054441, + "grad_norm": 0.9713066472066385, + "learning_rate": 1.7859790657135608e-05, + "loss": 0.2680068016052246, + "step": 1896 + }, + { + "epoch": 0.5037843579869872, + "grad_norm": 0.9186813995364894, + "learning_rate": 1.7857075295212148e-05, + "loss": 0.29733535647392273, + "step": 1897 + }, + { + "epoch": 0.5040499269685301, + "grad_norm": 1.1196248802118025, + "learning_rate": 1.785435841853724e-05, + "loss": 0.34820133447647095, + "step": 1898 + }, + { + "epoch": 0.5043154959500731, + "grad_norm": 1.134445876132798, + "learning_rate": 1.785164002763466e-05, + "loss": 0.3306594491004944, + "step": 1899 + }, + { + "epoch": 0.504581064931616, + "grad_norm": 1.0579272410020724, + "learning_rate": 1.7848920123028482e-05, + "loss": 0.3166846036911011, + "step": 1900 + }, + { + "epoch": 0.504846633913159, + "grad_norm": 1.2213509498849395, + "learning_rate": 1.784619870524308e-05, + "loss": 0.3406408727169037, + "step": 1901 + }, + { + "epoch": 0.5051122028947019, + "grad_norm": 1.0410168562106317, + "learning_rate": 1.78434757748031e-05, + "loss": 0.36358171701431274, + "step": 1902 + }, + { + "epoch": 0.5053777718762449, + "grad_norm": 1.0510382236040618, + "learning_rate": 1.7840751332233498e-05, + "loss": 0.34045761823654175, + "step": 1903 + }, + { + "epoch": 0.5056433408577878, + "grad_norm": 1.0566120463915532, + "learning_rate": 1.783802537805951e-05, + "loss": 0.3442475199699402, + "step": 1904 + }, + { + "epoch": 0.5059089098393308, + "grad_norm": 1.1632822330113848, + "learning_rate": 1.7835297912806675e-05, + "loss": 0.3488585650920868, + "step": 1905 + }, + { + "epoch": 0.5061744788208737, + "grad_norm": 1.098650773563784, + "learning_rate": 1.7832568937000808e-05, + "loss": 0.3340107500553131, + "step": 1906 + }, + { + "epoch": 0.5064400478024167, + "grad_norm": 1.0195614065654457, + "learning_rate": 1.7829838451168027e-05, + "loss": 0.3206177353858948, + "step": 1907 + }, + { + "epoch": 0.5067056167839596, + "grad_norm": 1.0219563874782234, + "learning_rate": 1.782710645583473e-05, + "loss": 0.2851010262966156, + "step": 1908 + }, + { + "epoch": 0.5069711857655026, + "grad_norm": 1.0249326570563306, + "learning_rate": 1.782437295152763e-05, + "loss": 0.31850844621658325, + "step": 1909 + }, + { + "epoch": 0.5072367547470455, + "grad_norm": 1.0890541355083159, + "learning_rate": 1.7821637938773704e-05, + "loss": 0.3343108892440796, + "step": 1910 + }, + { + "epoch": 0.5075023237285885, + "grad_norm": 1.1131994842325255, + "learning_rate": 1.781890141810023e-05, + "loss": 0.3423745930194855, + "step": 1911 + }, + { + "epoch": 0.5077678927101315, + "grad_norm": 1.057536319451762, + "learning_rate": 1.7816163390034775e-05, + "loss": 0.30980780720710754, + "step": 1912 + }, + { + "epoch": 0.5080334616916744, + "grad_norm": 1.0099692843485935, + "learning_rate": 1.7813423855105203e-05, + "loss": 0.31217479705810547, + "step": 1913 + }, + { + "epoch": 0.5082990306732174, + "grad_norm": 1.0721675523916532, + "learning_rate": 1.7810682813839664e-05, + "loss": 0.34741947054862976, + "step": 1914 + }, + { + "epoch": 0.5085645996547603, + "grad_norm": 1.1098427332228447, + "learning_rate": 1.7807940266766595e-05, + "loss": 0.32275527715682983, + "step": 1915 + }, + { + "epoch": 0.5088301686363033, + "grad_norm": 1.1130434711054393, + "learning_rate": 1.7805196214414728e-05, + "loss": 0.32760411500930786, + "step": 1916 + }, + { + "epoch": 0.5090957376178462, + "grad_norm": 1.1445787919507704, + "learning_rate": 1.7802450657313086e-05, + "loss": 0.3877720832824707, + "step": 1917 + }, + { + "epoch": 0.5093613065993892, + "grad_norm": 1.1135916509560913, + "learning_rate": 1.779970359599098e-05, + "loss": 0.33458876609802246, + "step": 1918 + }, + { + "epoch": 0.5096268755809321, + "grad_norm": 0.9826034605244246, + "learning_rate": 1.7796955030978007e-05, + "loss": 0.30603206157684326, + "step": 1919 + }, + { + "epoch": 0.5098924445624751, + "grad_norm": 0.9902684589377142, + "learning_rate": 1.7794204962804063e-05, + "loss": 0.2920286953449249, + "step": 1920 + }, + { + "epoch": 0.510158013544018, + "grad_norm": 1.1034173597508874, + "learning_rate": 1.7791453391999325e-05, + "loss": 0.32407981157302856, + "step": 1921 + }, + { + "epoch": 0.510423582525561, + "grad_norm": 1.3200648964540613, + "learning_rate": 1.7788700319094263e-05, + "loss": 0.30423563718795776, + "step": 1922 + }, + { + "epoch": 0.5106891515071039, + "grad_norm": 1.1213502448496324, + "learning_rate": 1.7785945744619642e-05, + "loss": 0.34691399335861206, + "step": 1923 + }, + { + "epoch": 0.5109547204886469, + "grad_norm": 1.0498801582672959, + "learning_rate": 1.7783189669106503e-05, + "loss": 0.3217603266239166, + "step": 1924 + }, + { + "epoch": 0.5112202894701899, + "grad_norm": 1.1943957961346587, + "learning_rate": 1.7780432093086198e-05, + "loss": 0.365132212638855, + "step": 1925 + }, + { + "epoch": 0.5114858584517329, + "grad_norm": 0.9783494867108459, + "learning_rate": 1.7777673017090344e-05, + "loss": 0.29662930965423584, + "step": 1926 + }, + { + "epoch": 0.5117514274332758, + "grad_norm": 1.0707541061431447, + "learning_rate": 1.7774912441650857e-05, + "loss": 0.3324819803237915, + "step": 1927 + }, + { + "epoch": 0.5120169964148188, + "grad_norm": 1.0040789031204058, + "learning_rate": 1.7772150367299953e-05, + "loss": 0.29331067204475403, + "step": 1928 + }, + { + "epoch": 0.5122825653963617, + "grad_norm": 1.064062495235822, + "learning_rate": 1.7769386794570117e-05, + "loss": 0.3158259987831116, + "step": 1929 + }, + { + "epoch": 0.5125481343779047, + "grad_norm": 1.020159871349018, + "learning_rate": 1.7766621723994145e-05, + "loss": 0.2824791967868805, + "step": 1930 + }, + { + "epoch": 0.5128137033594476, + "grad_norm": 1.0493215169042918, + "learning_rate": 1.7763855156105097e-05, + "loss": 0.2690732777118683, + "step": 1931 + }, + { + "epoch": 0.5130792723409906, + "grad_norm": 1.043157004637876, + "learning_rate": 1.7761087091436346e-05, + "loss": 0.31360942125320435, + "step": 1932 + }, + { + "epoch": 0.5133448413225336, + "grad_norm": 0.9858891902519169, + "learning_rate": 1.7758317530521535e-05, + "loss": 0.28334349393844604, + "step": 1933 + }, + { + "epoch": 0.5136104103040765, + "grad_norm": 1.1739380172138798, + "learning_rate": 1.7755546473894604e-05, + "loss": 0.3857404589653015, + "step": 1934 + }, + { + "epoch": 0.5138759792856195, + "grad_norm": 1.0280582546011092, + "learning_rate": 1.7752773922089784e-05, + "loss": 0.2852492332458496, + "step": 1935 + }, + { + "epoch": 0.5141415482671624, + "grad_norm": 1.003050995152578, + "learning_rate": 1.7749999875641585e-05, + "loss": 0.2959831953048706, + "step": 1936 + }, + { + "epoch": 0.5144071172487054, + "grad_norm": 1.100974201889633, + "learning_rate": 1.7747224335084815e-05, + "loss": 0.3129635453224182, + "step": 1937 + }, + { + "epoch": 0.5146726862302483, + "grad_norm": 1.0336946735940622, + "learning_rate": 1.774444730095456e-05, + "loss": 0.31391531229019165, + "step": 1938 + }, + { + "epoch": 0.5149382552117913, + "grad_norm": 1.0155253897885985, + "learning_rate": 1.7741668773786202e-05, + "loss": 0.30274757742881775, + "step": 1939 + }, + { + "epoch": 0.5152038241933342, + "grad_norm": 1.026561688701391, + "learning_rate": 1.7738888754115413e-05, + "loss": 0.29162222146987915, + "step": 1940 + }, + { + "epoch": 0.5154693931748772, + "grad_norm": 1.045931473256506, + "learning_rate": 1.7736107242478143e-05, + "loss": 0.30358970165252686, + "step": 1941 + }, + { + "epoch": 0.5157349621564201, + "grad_norm": 1.11915386227621, + "learning_rate": 1.7733324239410634e-05, + "loss": 0.32268065214157104, + "step": 1942 + }, + { + "epoch": 0.5160005311379631, + "grad_norm": 1.0626040245012975, + "learning_rate": 1.7730539745449417e-05, + "loss": 0.31925222277641296, + "step": 1943 + }, + { + "epoch": 0.516266100119506, + "grad_norm": 1.1170224886553113, + "learning_rate": 1.7727753761131312e-05, + "loss": 0.32883748412132263, + "step": 1944 + }, + { + "epoch": 0.516531669101049, + "grad_norm": 1.101510406621582, + "learning_rate": 1.7724966286993425e-05, + "loss": 0.3212829530239105, + "step": 1945 + }, + { + "epoch": 0.5167972380825919, + "grad_norm": 1.1477333753851342, + "learning_rate": 1.772217732357314e-05, + "loss": 0.32909759879112244, + "step": 1946 + }, + { + "epoch": 0.5170628070641349, + "grad_norm": 33.3722959000957, + "learning_rate": 1.7719386871408147e-05, + "loss": 0.3451213538646698, + "step": 1947 + }, + { + "epoch": 0.5173283760456778, + "grad_norm": 1.0792459943819739, + "learning_rate": 1.7716594931036402e-05, + "loss": 0.318422794342041, + "step": 1948 + }, + { + "epoch": 0.5175939450272208, + "grad_norm": 1.1243494025490273, + "learning_rate": 1.7713801502996166e-05, + "loss": 0.3165292739868164, + "step": 1949 + }, + { + "epoch": 0.5178595140087637, + "grad_norm": 1.1353818628503742, + "learning_rate": 1.7711006587825975e-05, + "loss": 0.3116700351238251, + "step": 1950 + }, + { + "epoch": 0.5181250829903067, + "grad_norm": 1.2005138291757869, + "learning_rate": 1.7708210186064656e-05, + "loss": 0.32102686166763306, + "step": 1951 + }, + { + "epoch": 0.5183906519718496, + "grad_norm": 1.079523368082095, + "learning_rate": 1.7705412298251323e-05, + "loss": 0.33025500178337097, + "step": 1952 + }, + { + "epoch": 0.5186562209533926, + "grad_norm": 1.2087703844513067, + "learning_rate": 1.7702612924925377e-05, + "loss": 0.36113062500953674, + "step": 1953 + }, + { + "epoch": 0.5189217899349357, + "grad_norm": 1.1242566727618883, + "learning_rate": 1.7699812066626503e-05, + "loss": 0.3092479109764099, + "step": 1954 + }, + { + "epoch": 0.5191873589164786, + "grad_norm": 1.117146005158035, + "learning_rate": 1.769700972389467e-05, + "loss": 0.3389117419719696, + "step": 1955 + }, + { + "epoch": 0.5194529278980216, + "grad_norm": 1.1525168535902064, + "learning_rate": 1.7694205897270147e-05, + "loss": 0.3225803077220917, + "step": 1956 + }, + { + "epoch": 0.5197184968795645, + "grad_norm": 1.0237361691251219, + "learning_rate": 1.7691400587293467e-05, + "loss": 0.3226786255836487, + "step": 1957 + }, + { + "epoch": 0.5199840658611075, + "grad_norm": 1.0060672564491426, + "learning_rate": 1.7688593794505466e-05, + "loss": 0.27708399295806885, + "step": 1958 + }, + { + "epoch": 0.5202496348426504, + "grad_norm": 1.0763214880079806, + "learning_rate": 1.768578551944726e-05, + "loss": 0.36100950837135315, + "step": 1959 + }, + { + "epoch": 0.5205152038241934, + "grad_norm": 1.043549985204807, + "learning_rate": 1.768297576266025e-05, + "loss": 0.3138211965560913, + "step": 1960 + }, + { + "epoch": 0.5207807728057363, + "grad_norm": 1.0618046264640966, + "learning_rate": 1.7680164524686128e-05, + "loss": 0.33959656953811646, + "step": 1961 + }, + { + "epoch": 0.5210463417872793, + "grad_norm": 0.9826913420332539, + "learning_rate": 1.7677351806066863e-05, + "loss": 0.3093605637550354, + "step": 1962 + }, + { + "epoch": 0.5213119107688222, + "grad_norm": 1.13307401094871, + "learning_rate": 1.7674537607344717e-05, + "loss": 0.3098641633987427, + "step": 1963 + }, + { + "epoch": 0.5215774797503652, + "grad_norm": 1.0810255128706003, + "learning_rate": 1.767172192906223e-05, + "loss": 0.35172683000564575, + "step": 1964 + }, + { + "epoch": 0.5218430487319081, + "grad_norm": 1.0729896509671073, + "learning_rate": 1.7668904771762242e-05, + "loss": 0.3535798192024231, + "step": 1965 + }, + { + "epoch": 0.5221086177134511, + "grad_norm": 1.2521081937006913, + "learning_rate": 1.766608613598785e-05, + "loss": 0.36183854937553406, + "step": 1966 + }, + { + "epoch": 0.522374186694994, + "grad_norm": 1.0735439944400962, + "learning_rate": 1.7663266022282473e-05, + "loss": 0.35995131731033325, + "step": 1967 + }, + { + "epoch": 0.522639755676537, + "grad_norm": 1.117054454049305, + "learning_rate": 1.766044443118978e-05, + "loss": 0.38672733306884766, + "step": 1968 + }, + { + "epoch": 0.5229053246580799, + "grad_norm": 1.0862044019422723, + "learning_rate": 1.765762136325375e-05, + "loss": 0.3389524221420288, + "step": 1969 + }, + { + "epoch": 0.5231708936396229, + "grad_norm": 0.9847521483407152, + "learning_rate": 1.7654796819018635e-05, + "loss": 0.3325779139995575, + "step": 1970 + }, + { + "epoch": 0.5234364626211658, + "grad_norm": 1.014607581135561, + "learning_rate": 1.7651970799028976e-05, + "loss": 0.328407347202301, + "step": 1971 + }, + { + "epoch": 0.5237020316027088, + "grad_norm": 0.9793310107257689, + "learning_rate": 1.764914330382959e-05, + "loss": 0.3050537705421448, + "step": 1972 + }, + { + "epoch": 0.5239676005842517, + "grad_norm": 1.1408686145630131, + "learning_rate": 1.7646314333965588e-05, + "loss": 0.35500285029411316, + "step": 1973 + }, + { + "epoch": 0.5242331695657947, + "grad_norm": 1.1035893819341516, + "learning_rate": 1.7643483889982364e-05, + "loss": 0.30319780111312866, + "step": 1974 + }, + { + "epoch": 0.5244987385473376, + "grad_norm": 1.0161223434375823, + "learning_rate": 1.7640651972425592e-05, + "loss": 0.315757691860199, + "step": 1975 + }, + { + "epoch": 0.5247643075288806, + "grad_norm": 1.0278713767432786, + "learning_rate": 1.7637818581841234e-05, + "loss": 0.28562331199645996, + "step": 1976 + }, + { + "epoch": 0.5250298765104235, + "grad_norm": 1.017204404946826, + "learning_rate": 1.763498371877553e-05, + "loss": 0.29798296093940735, + "step": 1977 + }, + { + "epoch": 0.5252954454919665, + "grad_norm": 1.1245986087835715, + "learning_rate": 1.763214738377501e-05, + "loss": 0.2923639416694641, + "step": 1978 + }, + { + "epoch": 0.5255610144735094, + "grad_norm": 1.0282257211254215, + "learning_rate": 1.7629309577386492e-05, + "loss": 0.2858009934425354, + "step": 1979 + }, + { + "epoch": 0.5258265834550524, + "grad_norm": 1.1185725636940211, + "learning_rate": 1.7626470300157064e-05, + "loss": 0.3615952134132385, + "step": 1980 + }, + { + "epoch": 0.5260921524365954, + "grad_norm": 1.1357118701340632, + "learning_rate": 1.762362955263411e-05, + "loss": 0.36142098903656006, + "step": 1981 + }, + { + "epoch": 0.5263577214181384, + "grad_norm": 1.1305105783283786, + "learning_rate": 1.762078733536529e-05, + "loss": 0.3335961699485779, + "step": 1982 + }, + { + "epoch": 0.5266232903996814, + "grad_norm": 1.2367655641806865, + "learning_rate": 1.761794364889855e-05, + "loss": 0.34549272060394287, + "step": 1983 + }, + { + "epoch": 0.5268888593812243, + "grad_norm": 1.1166612317693478, + "learning_rate": 1.761509849378212e-05, + "loss": 0.3177812993526459, + "step": 1984 + }, + { + "epoch": 0.5271544283627673, + "grad_norm": 1.1485560676920734, + "learning_rate": 1.7612251870564515e-05, + "loss": 0.33191388845443726, + "step": 1985 + }, + { + "epoch": 0.5274199973443102, + "grad_norm": 1.0807821541967428, + "learning_rate": 1.7609403779794523e-05, + "loss": 0.30732038617134094, + "step": 1986 + }, + { + "epoch": 0.5276855663258532, + "grad_norm": 1.1038043700347457, + "learning_rate": 1.7606554222021226e-05, + "loss": 0.33012068271636963, + "step": 1987 + }, + { + "epoch": 0.5279511353073961, + "grad_norm": 1.2233212729045404, + "learning_rate": 1.760370319779399e-05, + "loss": 0.3396066427230835, + "step": 1988 + }, + { + "epoch": 0.5282167042889391, + "grad_norm": 1.0755028443639627, + "learning_rate": 1.7600850707662454e-05, + "loss": 0.29053401947021484, + "step": 1989 + }, + { + "epoch": 0.528482273270482, + "grad_norm": 1.0859289781343007, + "learning_rate": 1.7597996752176545e-05, + "loss": 0.32927206158638, + "step": 1990 + }, + { + "epoch": 0.528747842252025, + "grad_norm": 1.0494460781018915, + "learning_rate": 1.759514133188647e-05, + "loss": 0.309224933385849, + "step": 1991 + }, + { + "epoch": 0.5290134112335679, + "grad_norm": 1.0870307368096292, + "learning_rate": 1.7592284447342725e-05, + "loss": 0.31973862648010254, + "step": 1992 + }, + { + "epoch": 0.5292789802151109, + "grad_norm": 1.0491029702582455, + "learning_rate": 1.758942609909608e-05, + "loss": 0.3331080377101898, + "step": 1993 + }, + { + "epoch": 0.5295445491966538, + "grad_norm": 1.0710245753206995, + "learning_rate": 1.7586566287697592e-05, + "loss": 0.32755160331726074, + "step": 1994 + }, + { + "epoch": 0.5298101181781968, + "grad_norm": 1.0377451052992368, + "learning_rate": 1.7583705013698602e-05, + "loss": 0.31942498683929443, + "step": 1995 + }, + { + "epoch": 0.5300756871597397, + "grad_norm": 1.1665695354682926, + "learning_rate": 1.7580842277650723e-05, + "loss": 0.3199199438095093, + "step": 1996 + }, + { + "epoch": 0.5303412561412827, + "grad_norm": 0.9680761404148592, + "learning_rate": 1.7577978080105864e-05, + "loss": 0.28153708577156067, + "step": 1997 + }, + { + "epoch": 0.5306068251228256, + "grad_norm": 1.0336529884327843, + "learning_rate": 1.7575112421616203e-05, + "loss": 0.3050921559333801, + "step": 1998 + }, + { + "epoch": 0.5308723941043686, + "grad_norm": 1.0836881519572394, + "learning_rate": 1.7572245302734208e-05, + "loss": 0.3242149353027344, + "step": 1999 + }, + { + "epoch": 0.5311379630859115, + "grad_norm": 0.9889139549595165, + "learning_rate": 1.7569376724012622e-05, + "loss": 0.29947227239608765, + "step": 2000 + }, + { + "epoch": 0.5314035320674545, + "grad_norm": 1.132976441688301, + "learning_rate": 1.756650668600448e-05, + "loss": 0.3229755163192749, + "step": 2001 + }, + { + "epoch": 0.5316691010489975, + "grad_norm": 1.0802391073518836, + "learning_rate": 1.7563635189263086e-05, + "loss": 0.3544544577598572, + "step": 2002 + }, + { + "epoch": 0.5319346700305404, + "grad_norm": 1.0996284853033707, + "learning_rate": 1.756076223434203e-05, + "loss": 0.32807621359825134, + "step": 2003 + }, + { + "epoch": 0.5322002390120834, + "grad_norm": 0.9920629294688551, + "learning_rate": 1.7557887821795192e-05, + "loss": 0.3057190477848053, + "step": 2004 + }, + { + "epoch": 0.5324658079936263, + "grad_norm": 1.0234244423063892, + "learning_rate": 1.7555011952176716e-05, + "loss": 0.29419198632240295, + "step": 2005 + }, + { + "epoch": 0.5327313769751693, + "grad_norm": 0.9799120327217228, + "learning_rate": 1.755213462604104e-05, + "loss": 0.3232089877128601, + "step": 2006 + }, + { + "epoch": 0.5329969459567122, + "grad_norm": 1.0186576745896931, + "learning_rate": 1.7549255843942875e-05, + "loss": 0.29784274101257324, + "step": 2007 + }, + { + "epoch": 0.5332625149382552, + "grad_norm": 1.0470325382276877, + "learning_rate": 1.7546375606437216e-05, + "loss": 0.31421899795532227, + "step": 2008 + }, + { + "epoch": 0.5335280839197981, + "grad_norm": 1.0641694414781755, + "learning_rate": 1.7543493914079345e-05, + "loss": 0.30681121349334717, + "step": 2009 + }, + { + "epoch": 0.5337936529013412, + "grad_norm": 1.0092085906510277, + "learning_rate": 1.7540610767424813e-05, + "loss": 0.3114027976989746, + "step": 2010 + }, + { + "epoch": 0.5340592218828841, + "grad_norm": 1.0064230726553411, + "learning_rate": 1.753772616702946e-05, + "loss": 0.3030378520488739, + "step": 2011 + }, + { + "epoch": 0.5343247908644271, + "grad_norm": 1.1096181297712675, + "learning_rate": 1.75348401134494e-05, + "loss": 0.30272024869918823, + "step": 2012 + }, + { + "epoch": 0.53459035984597, + "grad_norm": 1.049795668852804, + "learning_rate": 1.7531952607241033e-05, + "loss": 0.35117241740226746, + "step": 2013 + }, + { + "epoch": 0.534855928827513, + "grad_norm": 1.2552056089457548, + "learning_rate": 1.7529063648961035e-05, + "loss": 0.297889769077301, + "step": 2014 + }, + { + "epoch": 0.5351214978090559, + "grad_norm": 1.1238332501182418, + "learning_rate": 1.752617323916636e-05, + "loss": 0.32858210802078247, + "step": 2015 + }, + { + "epoch": 0.5353870667905989, + "grad_norm": 1.117582559290418, + "learning_rate": 1.7523281378414246e-05, + "loss": 0.3095484673976898, + "step": 2016 + }, + { + "epoch": 0.5356526357721418, + "grad_norm": 1.1072331793921826, + "learning_rate": 1.752038806726222e-05, + "loss": 0.34490731358528137, + "step": 2017 + }, + { + "epoch": 0.5359182047536848, + "grad_norm": 1.1427367564985542, + "learning_rate": 1.751749330626806e-05, + "loss": 0.35144859552383423, + "step": 2018 + }, + { + "epoch": 0.5361837737352277, + "grad_norm": 1.0337528414474293, + "learning_rate": 1.751459709598985e-05, + "loss": 0.26337549090385437, + "step": 2019 + }, + { + "epoch": 0.5364493427167707, + "grad_norm": 1.0719958558069054, + "learning_rate": 1.7511699436985952e-05, + "loss": 0.3235297203063965, + "step": 2020 + }, + { + "epoch": 0.5367149116983136, + "grad_norm": 1.1655117185465573, + "learning_rate": 1.7508800329814993e-05, + "loss": 0.35195302963256836, + "step": 2021 + }, + { + "epoch": 0.5369804806798566, + "grad_norm": 1.0547432431007058, + "learning_rate": 1.7505899775035887e-05, + "loss": 0.3226467967033386, + "step": 2022 + }, + { + "epoch": 0.5372460496613995, + "grad_norm": 1.0406958245289468, + "learning_rate": 1.750299777320783e-05, + "loss": 0.30616605281829834, + "step": 2023 + }, + { + "epoch": 0.5375116186429425, + "grad_norm": 1.074902411593199, + "learning_rate": 1.7500094324890294e-05, + "loss": 0.3007400333881378, + "step": 2024 + }, + { + "epoch": 0.5377771876244855, + "grad_norm": 1.1883491645763606, + "learning_rate": 1.7497189430643025e-05, + "loss": 0.35409432649612427, + "step": 2025 + }, + { + "epoch": 0.5380427566060284, + "grad_norm": 1.6951314154408594, + "learning_rate": 1.7494283091026053e-05, + "loss": 0.33718281984329224, + "step": 2026 + }, + { + "epoch": 0.5383083255875714, + "grad_norm": 1.0940933435725269, + "learning_rate": 1.749137530659969e-05, + "loss": 0.3589650094509125, + "step": 2027 + }, + { + "epoch": 0.5385738945691143, + "grad_norm": 1.1114345705753812, + "learning_rate": 1.7488466077924525e-05, + "loss": 0.35314273834228516, + "step": 2028 + }, + { + "epoch": 0.5388394635506573, + "grad_norm": 1.017869922891923, + "learning_rate": 1.7485555405561412e-05, + "loss": 0.28393587470054626, + "step": 2029 + }, + { + "epoch": 0.5391050325322002, + "grad_norm": 1.0276825009259218, + "learning_rate": 1.7482643290071503e-05, + "loss": 0.3262496292591095, + "step": 2030 + }, + { + "epoch": 0.5393706015137432, + "grad_norm": 1.122887144479208, + "learning_rate": 1.7479729732016218e-05, + "loss": 0.3549670875072479, + "step": 2031 + }, + { + "epoch": 0.5396361704952861, + "grad_norm": 1.0211791251004596, + "learning_rate": 1.7476814731957253e-05, + "loss": 0.30668947100639343, + "step": 2032 + }, + { + "epoch": 0.5399017394768291, + "grad_norm": 0.9278865240006526, + "learning_rate": 1.747389829045659e-05, + "loss": 0.2942228317260742, + "step": 2033 + }, + { + "epoch": 0.540167308458372, + "grad_norm": 1.023956047651912, + "learning_rate": 1.7470980408076484e-05, + "loss": 0.3166583478450775, + "step": 2034 + }, + { + "epoch": 0.540432877439915, + "grad_norm": 1.1503051826481139, + "learning_rate": 1.7468061085379467e-05, + "loss": 0.35149675607681274, + "step": 2035 + }, + { + "epoch": 0.5406984464214579, + "grad_norm": 1.1081467050264138, + "learning_rate": 1.7465140322928353e-05, + "loss": 0.32645004987716675, + "step": 2036 + }, + { + "epoch": 0.5409640154030009, + "grad_norm": 1.1656339653416823, + "learning_rate": 1.7462218121286224e-05, + "loss": 0.3078027367591858, + "step": 2037 + }, + { + "epoch": 0.5412295843845439, + "grad_norm": 1.0310810248927436, + "learning_rate": 1.7459294481016452e-05, + "loss": 0.28726300597190857, + "step": 2038 + }, + { + "epoch": 0.5414951533660869, + "grad_norm": 1.028103971871598, + "learning_rate": 1.7456369402682675e-05, + "loss": 0.29330572485923767, + "step": 2039 + }, + { + "epoch": 0.5417607223476298, + "grad_norm": 1.176742297493161, + "learning_rate": 1.7453442886848818e-05, + "loss": 0.3151019215583801, + "step": 2040 + }, + { + "epoch": 0.5420262913291728, + "grad_norm": 1.0830810759861134, + "learning_rate": 1.745051493407908e-05, + "loss": 0.3267561197280884, + "step": 2041 + }, + { + "epoch": 0.5422918603107157, + "grad_norm": 1.0462822233377385, + "learning_rate": 1.7447585544937933e-05, + "loss": 0.2834410071372986, + "step": 2042 + }, + { + "epoch": 0.5425574292922587, + "grad_norm": 0.9922210453154783, + "learning_rate": 1.7444654719990128e-05, + "loss": 0.29896080493927, + "step": 2043 + }, + { + "epoch": 0.5428229982738016, + "grad_norm": 1.0716195406510356, + "learning_rate": 1.7441722459800695e-05, + "loss": 0.3084600865840912, + "step": 2044 + }, + { + "epoch": 0.5430885672553446, + "grad_norm": 1.100381998832612, + "learning_rate": 1.743878876493494e-05, + "loss": 0.3178163170814514, + "step": 2045 + }, + { + "epoch": 0.5433541362368876, + "grad_norm": 1.1512124937535644, + "learning_rate": 1.743585363595844e-05, + "loss": 0.32886385917663574, + "step": 2046 + }, + { + "epoch": 0.5436197052184305, + "grad_norm": 1.0499932799675828, + "learning_rate": 1.743291707343706e-05, + "loss": 0.31810784339904785, + "step": 2047 + }, + { + "epoch": 0.5438852741999735, + "grad_norm": 0.994229574171737, + "learning_rate": 1.7429979077936928e-05, + "loss": 0.3003198504447937, + "step": 2048 + }, + { + "epoch": 0.5441508431815164, + "grad_norm": 1.1622503660754158, + "learning_rate": 1.7427039650024462e-05, + "loss": 0.33889323472976685, + "step": 2049 + }, + { + "epoch": 0.5444164121630594, + "grad_norm": 1.062972427778211, + "learning_rate": 1.7424098790266343e-05, + "loss": 0.3238763213157654, + "step": 2050 + }, + { + "epoch": 0.5446819811446023, + "grad_norm": 1.3651581380225686, + "learning_rate": 1.742115649922954e-05, + "loss": 0.34304776787757874, + "step": 2051 + }, + { + "epoch": 0.5449475501261453, + "grad_norm": 1.1192647204238841, + "learning_rate": 1.741821277748128e-05, + "loss": 0.31528347730636597, + "step": 2052 + }, + { + "epoch": 0.5452131191076882, + "grad_norm": 1.0728286121769783, + "learning_rate": 1.7415267625589094e-05, + "loss": 0.2992726266384125, + "step": 2053 + }, + { + "epoch": 0.5454786880892312, + "grad_norm": 1.0217638219637288, + "learning_rate": 1.741232104412076e-05, + "loss": 0.31706419587135315, + "step": 2054 + }, + { + "epoch": 0.5457442570707741, + "grad_norm": 1.8373163603702176, + "learning_rate": 1.7409373033644355e-05, + "loss": 0.2887676954269409, + "step": 2055 + }, + { + "epoch": 0.5460098260523171, + "grad_norm": 1.1434290988558236, + "learning_rate": 1.740642359472821e-05, + "loss": 0.3410964906215668, + "step": 2056 + }, + { + "epoch": 0.54627539503386, + "grad_norm": 1.0501323660770627, + "learning_rate": 1.740347272794095e-05, + "loss": 0.3711693286895752, + "step": 2057 + }, + { + "epoch": 0.546540964015403, + "grad_norm": 1.10922453334831, + "learning_rate": 1.7400520433851457e-05, + "loss": 0.3512499928474426, + "step": 2058 + }, + { + "epoch": 0.5468065329969459, + "grad_norm": 1.0790222544341648, + "learning_rate": 1.739756671302891e-05, + "loss": 0.3136678636074066, + "step": 2059 + }, + { + "epoch": 0.5470721019784889, + "grad_norm": 1.0417668658369865, + "learning_rate": 1.7394611566042748e-05, + "loss": 0.2983730435371399, + "step": 2060 + }, + { + "epoch": 0.5473376709600318, + "grad_norm": 1.1233530419836393, + "learning_rate": 1.7391654993462686e-05, + "loss": 0.36603933572769165, + "step": 2061 + }, + { + "epoch": 0.5476032399415748, + "grad_norm": 1.1758952832381078, + "learning_rate": 1.7388696995858717e-05, + "loss": 0.3651789128780365, + "step": 2062 + }, + { + "epoch": 0.5478688089231177, + "grad_norm": 1.2065493864331982, + "learning_rate": 1.7385737573801108e-05, + "loss": 0.30580615997314453, + "step": 2063 + }, + { + "epoch": 0.5481343779046607, + "grad_norm": 0.981372496476623, + "learning_rate": 1.7382776727860406e-05, + "loss": 0.2630755305290222, + "step": 2064 + }, + { + "epoch": 0.5483999468862036, + "grad_norm": 1.0020540486713174, + "learning_rate": 1.7379814458607416e-05, + "loss": 0.2947537899017334, + "step": 2065 + }, + { + "epoch": 0.5486655158677467, + "grad_norm": 1.034048631807644, + "learning_rate": 1.737685076661324e-05, + "loss": 0.3119455873966217, + "step": 2066 + }, + { + "epoch": 0.5489310848492897, + "grad_norm": 1.052273536899897, + "learning_rate": 1.7373885652449237e-05, + "loss": 0.3162347972393036, + "step": 2067 + }, + { + "epoch": 0.5491966538308326, + "grad_norm": 1.2320011234530202, + "learning_rate": 1.7370919116687047e-05, + "loss": 0.34120452404022217, + "step": 2068 + }, + { + "epoch": 0.5494622228123756, + "grad_norm": 1.095244169583748, + "learning_rate": 1.7367951159898583e-05, + "loss": 0.3126780092716217, + "step": 2069 + }, + { + "epoch": 0.5497277917939185, + "grad_norm": 0.9591128480333501, + "learning_rate": 1.7364981782656033e-05, + "loss": 0.2833349406719208, + "step": 2070 + }, + { + "epoch": 0.5499933607754615, + "grad_norm": 1.0921809927618633, + "learning_rate": 1.7362010985531855e-05, + "loss": 0.31617453694343567, + "step": 2071 + }, + { + "epoch": 0.5502589297570044, + "grad_norm": 1.0809700153666713, + "learning_rate": 1.735903876909879e-05, + "loss": 0.31372442841529846, + "step": 2072 + }, + { + "epoch": 0.5505244987385474, + "grad_norm": 1.1616077591637106, + "learning_rate": 1.735606513392984e-05, + "loss": 0.3500489592552185, + "step": 2073 + }, + { + "epoch": 0.5507900677200903, + "grad_norm": 1.0373404262028456, + "learning_rate": 1.735309008059829e-05, + "loss": 0.3219031095504761, + "step": 2074 + }, + { + "epoch": 0.5510556367016333, + "grad_norm": 1.0701365395287485, + "learning_rate": 1.7350113609677694e-05, + "loss": 0.32419610023498535, + "step": 2075 + }, + { + "epoch": 0.5513212056831762, + "grad_norm": 1.1054492395059694, + "learning_rate": 1.7347135721741874e-05, + "loss": 0.34804612398147583, + "step": 2076 + }, + { + "epoch": 0.5515867746647192, + "grad_norm": 1.09814942010155, + "learning_rate": 1.7344156417364946e-05, + "loss": 0.33105939626693726, + "step": 2077 + }, + { + "epoch": 0.5518523436462621, + "grad_norm": 1.0139790776190714, + "learning_rate": 1.7341175697121273e-05, + "loss": 0.3426011800765991, + "step": 2078 + }, + { + "epoch": 0.5521179126278051, + "grad_norm": 1.1120942872149455, + "learning_rate": 1.7338193561585507e-05, + "loss": 0.33207643032073975, + "step": 2079 + }, + { + "epoch": 0.552383481609348, + "grad_norm": 0.9807946500665143, + "learning_rate": 1.7335210011332573e-05, + "loss": 0.31849467754364014, + "step": 2080 + }, + { + "epoch": 0.552649050590891, + "grad_norm": 1.081622565959563, + "learning_rate": 1.7332225046937655e-05, + "loss": 0.3549337685108185, + "step": 2081 + }, + { + "epoch": 0.5529146195724339, + "grad_norm": 0.9652343930669623, + "learning_rate": 1.7329238668976224e-05, + "loss": 0.2850857377052307, + "step": 2082 + }, + { + "epoch": 0.5531801885539769, + "grad_norm": 1.1370461672740964, + "learning_rate": 1.732625087802402e-05, + "loss": 0.3277609348297119, + "step": 2083 + }, + { + "epoch": 0.5534457575355198, + "grad_norm": 1.0712095451099939, + "learning_rate": 1.732326167465705e-05, + "loss": 0.2951444983482361, + "step": 2084 + }, + { + "epoch": 0.5537113265170628, + "grad_norm": 1.0893938459197319, + "learning_rate": 1.7320271059451597e-05, + "loss": 0.36634138226509094, + "step": 2085 + }, + { + "epoch": 0.5539768954986057, + "grad_norm": 1.060256238160636, + "learning_rate": 1.7317279032984222e-05, + "loss": 0.3407907783985138, + "step": 2086 + }, + { + "epoch": 0.5542424644801487, + "grad_norm": 1.0563310141876696, + "learning_rate": 1.7314285595831747e-05, + "loss": 0.34038978815078735, + "step": 2087 + }, + { + "epoch": 0.5545080334616916, + "grad_norm": 1.0558109709205228, + "learning_rate": 1.7311290748571273e-05, + "loss": 0.337898313999176, + "step": 2088 + }, + { + "epoch": 0.5547736024432346, + "grad_norm": 1.1543867929059073, + "learning_rate": 1.7308294491780175e-05, + "loss": 0.3250765800476074, + "step": 2089 + }, + { + "epoch": 0.5550391714247775, + "grad_norm": 1.101568217376945, + "learning_rate": 1.730529682603609e-05, + "loss": 0.31562721729278564, + "step": 2090 + }, + { + "epoch": 0.5553047404063205, + "grad_norm": 1.2678079753749867, + "learning_rate": 1.730229775191693e-05, + "loss": 0.32757896184921265, + "step": 2091 + }, + { + "epoch": 0.5555703093878634, + "grad_norm": 1.1010819086774664, + "learning_rate": 1.7299297270000894e-05, + "loss": 0.35861605405807495, + "step": 2092 + }, + { + "epoch": 0.5558358783694064, + "grad_norm": 1.0999873688088635, + "learning_rate": 1.7296295380866425e-05, + "loss": 0.3383220434188843, + "step": 2093 + }, + { + "epoch": 0.5561014473509495, + "grad_norm": 1.1431134206724336, + "learning_rate": 1.7293292085092263e-05, + "loss": 0.30144187808036804, + "step": 2094 + }, + { + "epoch": 0.5563670163324924, + "grad_norm": 1.0354659821546437, + "learning_rate": 1.72902873832574e-05, + "loss": 0.2626546323299408, + "step": 2095 + }, + { + "epoch": 0.5566325853140354, + "grad_norm": 1.0939710377386638, + "learning_rate": 1.7287281275941112e-05, + "loss": 0.3289363980293274, + "step": 2096 + }, + { + "epoch": 0.5568981542955783, + "grad_norm": 0.9797533003070389, + "learning_rate": 1.7284273763722943e-05, + "loss": 0.26631784439086914, + "step": 2097 + }, + { + "epoch": 0.5571637232771213, + "grad_norm": 1.0035421194069876, + "learning_rate": 1.7281264847182697e-05, + "loss": 0.3051939606666565, + "step": 2098 + }, + { + "epoch": 0.5574292922586642, + "grad_norm": 1.0515034870910809, + "learning_rate": 1.7278254526900468e-05, + "loss": 0.34456121921539307, + "step": 2099 + }, + { + "epoch": 0.5576948612402072, + "grad_norm": 1.2038994359149542, + "learning_rate": 1.72752428034566e-05, + "loss": 0.2747807502746582, + "step": 2100 + }, + { + "epoch": 0.5579604302217501, + "grad_norm": 2.186270123050143, + "learning_rate": 1.7272229677431723e-05, + "loss": 0.31111812591552734, + "step": 2101 + }, + { + "epoch": 0.5582259992032931, + "grad_norm": 1.0150701360001215, + "learning_rate": 1.7269215149406737e-05, + "loss": 0.29648226499557495, + "step": 2102 + }, + { + "epoch": 0.558491568184836, + "grad_norm": 0.9846402594569152, + "learning_rate": 1.72661992199628e-05, + "loss": 0.28303876519203186, + "step": 2103 + }, + { + "epoch": 0.558757137166379, + "grad_norm": 1.1069492435421613, + "learning_rate": 1.726318188968135e-05, + "loss": 0.30540165305137634, + "step": 2104 + }, + { + "epoch": 0.5590227061479219, + "grad_norm": 1.2177152582591586, + "learning_rate": 1.726016315914409e-05, + "loss": 0.31810393929481506, + "step": 2105 + }, + { + "epoch": 0.5592882751294649, + "grad_norm": 1.134577587954556, + "learning_rate": 1.7257143028933004e-05, + "loss": 0.33605068922042847, + "step": 2106 + }, + { + "epoch": 0.5595538441110078, + "grad_norm": 1.089019585879268, + "learning_rate": 1.725412149963033e-05, + "loss": 0.3340590298175812, + "step": 2107 + }, + { + "epoch": 0.5598194130925508, + "grad_norm": 0.9872121137775324, + "learning_rate": 1.7251098571818586e-05, + "loss": 0.29560500383377075, + "step": 2108 + }, + { + "epoch": 0.5600849820740937, + "grad_norm": 1.0964006197085026, + "learning_rate": 1.7248074246080555e-05, + "loss": 0.30100107192993164, + "step": 2109 + }, + { + "epoch": 0.5603505510556367, + "grad_norm": 1.1506338140671328, + "learning_rate": 1.7245048522999294e-05, + "loss": 0.35551172494888306, + "step": 2110 + }, + { + "epoch": 0.5606161200371796, + "grad_norm": 1.0513397818607815, + "learning_rate": 1.724202140315812e-05, + "loss": 0.3182663023471832, + "step": 2111 + }, + { + "epoch": 0.5608816890187226, + "grad_norm": 1.092960095111009, + "learning_rate": 1.723899288714064e-05, + "loss": 0.3160201609134674, + "step": 2112 + }, + { + "epoch": 0.5611472580002655, + "grad_norm": 1.0656744789709975, + "learning_rate": 1.72359629755307e-05, + "loss": 0.3126063942909241, + "step": 2113 + }, + { + "epoch": 0.5614128269818085, + "grad_norm": 1.0376603045942787, + "learning_rate": 1.723293166891244e-05, + "loss": 0.3222552239894867, + "step": 2114 + }, + { + "epoch": 0.5616783959633515, + "grad_norm": 1.1154320347150413, + "learning_rate": 1.722989896787026e-05, + "loss": 0.33601805567741394, + "step": 2115 + }, + { + "epoch": 0.5619439649448944, + "grad_norm": 1.0241046952841495, + "learning_rate": 1.722686487298883e-05, + "loss": 0.28679755330085754, + "step": 2116 + }, + { + "epoch": 0.5622095339264374, + "grad_norm": 0.9498185678215705, + "learning_rate": 1.722382938485308e-05, + "loss": 0.2895340323448181, + "step": 2117 + }, + { + "epoch": 0.5624751029079803, + "grad_norm": 1.3753225282493697, + "learning_rate": 1.7220792504048227e-05, + "loss": 0.310183048248291, + "step": 2118 + }, + { + "epoch": 0.5627406718895233, + "grad_norm": 0.9776305745351022, + "learning_rate": 1.7217754231159737e-05, + "loss": 0.2768586277961731, + "step": 2119 + }, + { + "epoch": 0.5630062408710662, + "grad_norm": 0.9838874956474448, + "learning_rate": 1.7214714566773358e-05, + "loss": 0.2785574793815613, + "step": 2120 + }, + { + "epoch": 0.5632718098526092, + "grad_norm": 1.1815363465765012, + "learning_rate": 1.72116735114751e-05, + "loss": 0.30544358491897583, + "step": 2121 + }, + { + "epoch": 0.5635373788341522, + "grad_norm": 1.0704755380783626, + "learning_rate": 1.7208631065851243e-05, + "loss": 0.31662559509277344, + "step": 2122 + }, + { + "epoch": 0.5638029478156952, + "grad_norm": 0.9893085866675072, + "learning_rate": 1.7205587230488335e-05, + "loss": 0.31466105580329895, + "step": 2123 + }, + { + "epoch": 0.5640685167972381, + "grad_norm": 1.1520731756820097, + "learning_rate": 1.720254200597319e-05, + "loss": 0.3471367359161377, + "step": 2124 + }, + { + "epoch": 0.5643340857787811, + "grad_norm": 1.056530578075146, + "learning_rate": 1.7199495392892892e-05, + "loss": 0.3325269818305969, + "step": 2125 + }, + { + "epoch": 0.564599654760324, + "grad_norm": 1.1040662937900534, + "learning_rate": 1.7196447391834797e-05, + "loss": 0.32423460483551025, + "step": 2126 + }, + { + "epoch": 0.564865223741867, + "grad_norm": 1.0403895710374138, + "learning_rate": 1.7193398003386514e-05, + "loss": 0.3083527088165283, + "step": 2127 + }, + { + "epoch": 0.5651307927234099, + "grad_norm": 1.1794029606730059, + "learning_rate": 1.7190347228135933e-05, + "loss": 0.3418716490268707, + "step": 2128 + }, + { + "epoch": 0.5653963617049529, + "grad_norm": 1.0509473075306943, + "learning_rate": 1.7187295066671214e-05, + "loss": 0.33037957549095154, + "step": 2129 + }, + { + "epoch": 0.5656619306864958, + "grad_norm": 1.229094630243538, + "learning_rate": 1.7184241519580767e-05, + "loss": 0.3383673131465912, + "step": 2130 + }, + { + "epoch": 0.5659274996680388, + "grad_norm": 0.9364933789266218, + "learning_rate": 1.718118658745329e-05, + "loss": 0.27756133675575256, + "step": 2131 + }, + { + "epoch": 0.5661930686495817, + "grad_norm": 1.1307081535546069, + "learning_rate": 1.717813027087773e-05, + "loss": 0.2987852692604065, + "step": 2132 + }, + { + "epoch": 0.5664586376311247, + "grad_norm": 1.0924971268375117, + "learning_rate": 1.717507257044331e-05, + "loss": 0.30016621947288513, + "step": 2133 + }, + { + "epoch": 0.5667242066126676, + "grad_norm": 1.0923612277165435, + "learning_rate": 1.7172013486739528e-05, + "loss": 0.31592345237731934, + "step": 2134 + }, + { + "epoch": 0.5669897755942106, + "grad_norm": 1.0932899901018698, + "learning_rate": 1.716895302035613e-05, + "loss": 0.3500048816204071, + "step": 2135 + }, + { + "epoch": 0.5672553445757536, + "grad_norm": 1.0529476139624208, + "learning_rate": 1.7165891171883134e-05, + "loss": 0.32069307565689087, + "step": 2136 + }, + { + "epoch": 0.5675209135572965, + "grad_norm": 1.10329279559138, + "learning_rate": 1.7162827941910837e-05, + "loss": 0.3100130558013916, + "step": 2137 + }, + { + "epoch": 0.5677864825388395, + "grad_norm": 1.080836142172887, + "learning_rate": 1.715976333102979e-05, + "loss": 0.3205985128879547, + "step": 2138 + }, + { + "epoch": 0.5680520515203824, + "grad_norm": 1.0861679281182697, + "learning_rate": 1.715669733983081e-05, + "loss": 0.3243224024772644, + "step": 2139 + }, + { + "epoch": 0.5683176205019254, + "grad_norm": 1.0818895017967487, + "learning_rate": 1.7153629968904997e-05, + "loss": 0.3278832733631134, + "step": 2140 + }, + { + "epoch": 0.5685831894834683, + "grad_norm": 0.9949896264020713, + "learning_rate": 1.7150561218843693e-05, + "loss": 0.29137033224105835, + "step": 2141 + }, + { + "epoch": 0.5688487584650113, + "grad_norm": 1.0470808838345107, + "learning_rate": 1.7147491090238516e-05, + "loss": 0.3065168857574463, + "step": 2142 + }, + { + "epoch": 0.5691143274465542, + "grad_norm": 1.0368441449557109, + "learning_rate": 1.7144419583681354e-05, + "loss": 0.3367912173271179, + "step": 2143 + }, + { + "epoch": 0.5693798964280972, + "grad_norm": 1.086220090850542, + "learning_rate": 1.7141346699764357e-05, + "loss": 0.32278239727020264, + "step": 2144 + }, + { + "epoch": 0.5696454654096401, + "grad_norm": 1.080765529331453, + "learning_rate": 1.713827243907994e-05, + "loss": 0.2887166440486908, + "step": 2145 + }, + { + "epoch": 0.5699110343911831, + "grad_norm": 1.1353258061614586, + "learning_rate": 1.713519680222079e-05, + "loss": 0.33214619755744934, + "step": 2146 + }, + { + "epoch": 0.570176603372726, + "grad_norm": 1.1145274058321384, + "learning_rate": 1.7132119789779846e-05, + "loss": 0.2865470051765442, + "step": 2147 + }, + { + "epoch": 0.570442172354269, + "grad_norm": 1.1145678631141913, + "learning_rate": 1.7129041402350317e-05, + "loss": 0.32746967673301697, + "step": 2148 + }, + { + "epoch": 0.5707077413358119, + "grad_norm": 1.0454330804264187, + "learning_rate": 1.712596164052569e-05, + "loss": 0.3029513359069824, + "step": 2149 + }, + { + "epoch": 0.570973310317355, + "grad_norm": 0.9779058393705973, + "learning_rate": 1.7122880504899698e-05, + "loss": 0.3052698075771332, + "step": 2150 + }, + { + "epoch": 0.5712388792988979, + "grad_norm": 1.055591157713499, + "learning_rate": 1.7119797996066355e-05, + "loss": 0.29221272468566895, + "step": 2151 + }, + { + "epoch": 0.5715044482804409, + "grad_norm": 1.0014263274293047, + "learning_rate": 1.711671411461993e-05, + "loss": 0.3165368139743805, + "step": 2152 + }, + { + "epoch": 0.5717700172619838, + "grad_norm": 1.0763149059705845, + "learning_rate": 1.7113628861154953e-05, + "loss": 0.30877187848091125, + "step": 2153 + }, + { + "epoch": 0.5720355862435268, + "grad_norm": 1.0826550246568385, + "learning_rate": 1.711054223626623e-05, + "loss": 0.2985781729221344, + "step": 2154 + }, + { + "epoch": 0.5723011552250697, + "grad_norm": 1.1063225967671673, + "learning_rate": 1.7107454240548825e-05, + "loss": 0.3449699878692627, + "step": 2155 + }, + { + "epoch": 0.5725667242066127, + "grad_norm": 1.0430022801820942, + "learning_rate": 1.7104364874598066e-05, + "loss": 0.3219606578350067, + "step": 2156 + }, + { + "epoch": 0.5728322931881557, + "grad_norm": 1.0017795464639185, + "learning_rate": 1.710127413900955e-05, + "loss": 0.3059350550174713, + "step": 2157 + }, + { + "epoch": 0.5730978621696986, + "grad_norm": 1.0027463566346577, + "learning_rate": 1.7098182034379132e-05, + "loss": 0.29461371898651123, + "step": 2158 + }, + { + "epoch": 0.5733634311512416, + "grad_norm": 1.0159484116581767, + "learning_rate": 1.709508856130293e-05, + "loss": 0.2998795509338379, + "step": 2159 + }, + { + "epoch": 0.5736290001327845, + "grad_norm": 1.0092216110834475, + "learning_rate": 1.7091993720377336e-05, + "loss": 0.28214582800865173, + "step": 2160 + }, + { + "epoch": 0.5738945691143275, + "grad_norm": 1.2106483053766084, + "learning_rate": 1.708889751219899e-05, + "loss": 0.3036864697933197, + "step": 2161 + }, + { + "epoch": 0.5741601380958704, + "grad_norm": 1.1139097359759478, + "learning_rate": 1.7085799937364815e-05, + "loss": 0.34146320819854736, + "step": 2162 + }, + { + "epoch": 0.5744257070774134, + "grad_norm": 1.0631963944232283, + "learning_rate": 1.708270099647198e-05, + "loss": 0.33996909856796265, + "step": 2163 + }, + { + "epoch": 0.5746912760589563, + "grad_norm": 1.0779467399705778, + "learning_rate": 1.7079600690117924e-05, + "loss": 0.3308744728565216, + "step": 2164 + }, + { + "epoch": 0.5749568450404993, + "grad_norm": 1.0447240453690412, + "learning_rate": 1.707649901890035e-05, + "loss": 0.2945587933063507, + "step": 2165 + }, + { + "epoch": 0.5752224140220422, + "grad_norm": 1.0321317558144223, + "learning_rate": 1.7073395983417227e-05, + "loss": 0.30348697304725647, + "step": 2166 + }, + { + "epoch": 0.5754879830035852, + "grad_norm": 1.025806147580304, + "learning_rate": 1.707029158426678e-05, + "loss": 0.28789055347442627, + "step": 2167 + }, + { + "epoch": 0.5757535519851281, + "grad_norm": 1.168965754707192, + "learning_rate": 1.7067185822047502e-05, + "loss": 0.3026643693447113, + "step": 2168 + }, + { + "epoch": 0.5760191209666711, + "grad_norm": 1.1108861255752682, + "learning_rate": 1.7064078697358147e-05, + "loss": 0.34021061658859253, + "step": 2169 + }, + { + "epoch": 0.576284689948214, + "grad_norm": 1.1062563353075296, + "learning_rate": 1.7060970210797735e-05, + "loss": 0.32793867588043213, + "step": 2170 + }, + { + "epoch": 0.576550258929757, + "grad_norm": 1.1692826638365306, + "learning_rate": 1.705786036296554e-05, + "loss": 0.36144691705703735, + "step": 2171 + }, + { + "epoch": 0.5768158279112999, + "grad_norm": 1.1177501875227254, + "learning_rate": 1.7054749154461105e-05, + "loss": 0.3630291223526001, + "step": 2172 + }, + { + "epoch": 0.5770813968928429, + "grad_norm": 1.144365708172633, + "learning_rate": 1.705163658588424e-05, + "loss": 0.34964969754219055, + "step": 2173 + }, + { + "epoch": 0.5773469658743858, + "grad_norm": 1.0298961015626151, + "learning_rate": 1.7048522657835004e-05, + "loss": 0.2877815067768097, + "step": 2174 + }, + { + "epoch": 0.5776125348559288, + "grad_norm": 1.1148926749607628, + "learning_rate": 1.7045407370913732e-05, + "loss": 0.3185664713382721, + "step": 2175 + }, + { + "epoch": 0.5778781038374717, + "grad_norm": 1.0393243287048395, + "learning_rate": 1.704229072572101e-05, + "loss": 0.3035257160663605, + "step": 2176 + }, + { + "epoch": 0.5781436728190147, + "grad_norm": 1.048139429574759, + "learning_rate": 1.7039172722857695e-05, + "loss": 0.325702965259552, + "step": 2177 + }, + { + "epoch": 0.5784092418005577, + "grad_norm": 1.1046410504333486, + "learning_rate": 1.7036053362924896e-05, + "loss": 0.32837462425231934, + "step": 2178 + }, + { + "epoch": 0.5786748107821007, + "grad_norm": 1.066094854816524, + "learning_rate": 1.703293264652399e-05, + "loss": 0.3430028259754181, + "step": 2179 + }, + { + "epoch": 0.5789403797636437, + "grad_norm": 1.1007701198247044, + "learning_rate": 1.702981057425662e-05, + "loss": 0.32792964577674866, + "step": 2180 + }, + { + "epoch": 0.5792059487451866, + "grad_norm": 0.9964902607677808, + "learning_rate": 1.7026687146724675e-05, + "loss": 0.3037140965461731, + "step": 2181 + }, + { + "epoch": 0.5794715177267296, + "grad_norm": 0.9962684392556416, + "learning_rate": 1.7023562364530322e-05, + "loss": 0.33083540201187134, + "step": 2182 + }, + { + "epoch": 0.5797370867082725, + "grad_norm": 0.9979777099745417, + "learning_rate": 1.702043622827598e-05, + "loss": 0.3108663260936737, + "step": 2183 + }, + { + "epoch": 0.5800026556898155, + "grad_norm": 0.9618495492417584, + "learning_rate": 1.7017308738564336e-05, + "loss": 0.2939792573451996, + "step": 2184 + }, + { + "epoch": 0.5802682246713584, + "grad_norm": 1.1315656989934186, + "learning_rate": 1.7014179895998322e-05, + "loss": 0.3686106503009796, + "step": 2185 + }, + { + "epoch": 0.5805337936529014, + "grad_norm": 1.0524191997810952, + "learning_rate": 1.7011049701181152e-05, + "loss": 0.3497159779071808, + "step": 2186 + }, + { + "epoch": 0.5807993626344443, + "grad_norm": 1.0989364128809138, + "learning_rate": 1.7007918154716286e-05, + "loss": 0.31730401515960693, + "step": 2187 + }, + { + "epoch": 0.5810649316159873, + "grad_norm": 1.0000330799865447, + "learning_rate": 1.7004785257207456e-05, + "loss": 0.3064701557159424, + "step": 2188 + }, + { + "epoch": 0.5813305005975302, + "grad_norm": 1.1111458283716926, + "learning_rate": 1.7001651009258635e-05, + "loss": 0.37174129486083984, + "step": 2189 + }, + { + "epoch": 0.5815960695790732, + "grad_norm": 1.068050904458805, + "learning_rate": 1.699851541147408e-05, + "loss": 0.3548140823841095, + "step": 2190 + }, + { + "epoch": 0.5818616385606161, + "grad_norm": 1.2340650081251097, + "learning_rate": 1.6995378464458292e-05, + "loss": 0.3486049473285675, + "step": 2191 + }, + { + "epoch": 0.5821272075421591, + "grad_norm": 1.996025853729682, + "learning_rate": 1.6992240168816037e-05, + "loss": 0.3083210587501526, + "step": 2192 + }, + { + "epoch": 0.582392776523702, + "grad_norm": 1.0284637251594817, + "learning_rate": 1.6989100525152346e-05, + "loss": 0.3006829619407654, + "step": 2193 + }, + { + "epoch": 0.582658345505245, + "grad_norm": 1.103386023825705, + "learning_rate": 1.6985959534072502e-05, + "loss": 0.32856425642967224, + "step": 2194 + }, + { + "epoch": 0.5829239144867879, + "grad_norm": 1.1293873964177752, + "learning_rate": 1.6982817196182052e-05, + "loss": 0.3382526934146881, + "step": 2195 + }, + { + "epoch": 0.5831894834683309, + "grad_norm": 1.0326113865244562, + "learning_rate": 1.69796735120868e-05, + "loss": 0.3311583399772644, + "step": 2196 + }, + { + "epoch": 0.5834550524498738, + "grad_norm": 1.0267321140886136, + "learning_rate": 1.6976528482392815e-05, + "loss": 0.312778115272522, + "step": 2197 + }, + { + "epoch": 0.5837206214314168, + "grad_norm": 1.0148067463802801, + "learning_rate": 1.697338210770642e-05, + "loss": 0.2996736466884613, + "step": 2198 + }, + { + "epoch": 0.5839861904129597, + "grad_norm": 1.1885772355333009, + "learning_rate": 1.6970234388634192e-05, + "loss": 0.344571590423584, + "step": 2199 + }, + { + "epoch": 0.5842517593945027, + "grad_norm": 0.9183671512098872, + "learning_rate": 1.6967085325782984e-05, + "loss": 0.25299468636512756, + "step": 2200 + }, + { + "epoch": 0.5845173283760456, + "grad_norm": 1.042142544774348, + "learning_rate": 1.6963934919759896e-05, + "loss": 0.3080691695213318, + "step": 2201 + }, + { + "epoch": 0.5847828973575886, + "grad_norm": 1.0216299822000434, + "learning_rate": 1.6960783171172286e-05, + "loss": 0.27491697669029236, + "step": 2202 + }, + { + "epoch": 0.5850484663391315, + "grad_norm": 1.1629234714983534, + "learning_rate": 1.6957630080627772e-05, + "loss": 0.3422500193119049, + "step": 2203 + }, + { + "epoch": 0.5853140353206745, + "grad_norm": 1.0832524871656921, + "learning_rate": 1.695447564873424e-05, + "loss": 0.27703234553337097, + "step": 2204 + }, + { + "epoch": 0.5855796043022174, + "grad_norm": 1.0275000328668338, + "learning_rate": 1.6951319876099825e-05, + "loss": 0.3088543117046356, + "step": 2205 + }, + { + "epoch": 0.5858451732837605, + "grad_norm": 1.0671359142705343, + "learning_rate": 1.694816276333292e-05, + "loss": 0.29875609278678894, + "step": 2206 + }, + { + "epoch": 0.5861107422653035, + "grad_norm": 1.0185982306074886, + "learning_rate": 1.6945004311042176e-05, + "loss": 0.30804386734962463, + "step": 2207 + }, + { + "epoch": 0.5863763112468464, + "grad_norm": 1.081134235929082, + "learning_rate": 1.694184451983651e-05, + "loss": 0.3324572741985321, + "step": 2208 + }, + { + "epoch": 0.5866418802283894, + "grad_norm": 1.0822730402391103, + "learning_rate": 1.6938683390325096e-05, + "loss": 0.30302488803863525, + "step": 2209 + }, + { + "epoch": 0.5869074492099323, + "grad_norm": 1.1499037543983048, + "learning_rate": 1.6935520923117355e-05, + "loss": 0.3264358341693878, + "step": 2210 + }, + { + "epoch": 0.5871730181914753, + "grad_norm": 1.1305858167915457, + "learning_rate": 1.693235711882298e-05, + "loss": 0.3172164261341095, + "step": 2211 + }, + { + "epoch": 0.5874385871730182, + "grad_norm": 0.9910314790510931, + "learning_rate": 1.6929191978051908e-05, + "loss": 0.300851047039032, + "step": 2212 + }, + { + "epoch": 0.5877041561545612, + "grad_norm": 1.1122516205102002, + "learning_rate": 1.6926025501414352e-05, + "loss": 0.2887764871120453, + "step": 2213 + }, + { + "epoch": 0.5879697251361041, + "grad_norm": 1.0991421920944897, + "learning_rate": 1.692285768952076e-05, + "loss": 0.3246796727180481, + "step": 2214 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1069795382063548, + "learning_rate": 1.6919688542981852e-05, + "loss": 0.30595412850379944, + "step": 2215 + }, + { + "epoch": 0.58850086309919, + "grad_norm": 1.068918741300791, + "learning_rate": 1.6916518062408604e-05, + "loss": 0.2885501980781555, + "step": 2216 + }, + { + "epoch": 0.588766432080733, + "grad_norm": 1.066918066226772, + "learning_rate": 1.6913346248412245e-05, + "loss": 0.34449082612991333, + "step": 2217 + }, + { + "epoch": 0.5890320010622759, + "grad_norm": 1.0585511422631098, + "learning_rate": 1.6910173101604267e-05, + "loss": 0.29410409927368164, + "step": 2218 + }, + { + "epoch": 0.5892975700438189, + "grad_norm": 1.1710793080996782, + "learning_rate": 1.690699862259641e-05, + "loss": 0.3250378370285034, + "step": 2219 + }, + { + "epoch": 0.5895631390253618, + "grad_norm": 1.3327292763951073, + "learning_rate": 1.690382281200068e-05, + "loss": 0.34420648217201233, + "step": 2220 + }, + { + "epoch": 0.5898287080069048, + "grad_norm": 1.1196949637967406, + "learning_rate": 1.6900645670429338e-05, + "loss": 0.33951860666275024, + "step": 2221 + }, + { + "epoch": 0.5900942769884477, + "grad_norm": 1.064177847952839, + "learning_rate": 1.6897467198494892e-05, + "loss": 0.35045644640922546, + "step": 2222 + }, + { + "epoch": 0.5903598459699907, + "grad_norm": 1.0378256375427404, + "learning_rate": 1.689428739681012e-05, + "loss": 0.3262789845466614, + "step": 2223 + }, + { + "epoch": 0.5906254149515336, + "grad_norm": 1.0662878016953237, + "learning_rate": 1.689110626598805e-05, + "loss": 0.2959234118461609, + "step": 2224 + }, + { + "epoch": 0.5908909839330766, + "grad_norm": 1.040953230887288, + "learning_rate": 1.6887923806641965e-05, + "loss": 0.3185187876224518, + "step": 2225 + }, + { + "epoch": 0.5911565529146195, + "grad_norm": 0.9754385668000993, + "learning_rate": 1.6884740019385403e-05, + "loss": 0.2861860692501068, + "step": 2226 + }, + { + "epoch": 0.5914221218961625, + "grad_norm": 1.0067160421449919, + "learning_rate": 1.6881554904832163e-05, + "loss": 0.28718897700309753, + "step": 2227 + }, + { + "epoch": 0.5916876908777055, + "grad_norm": 1.0412433017248806, + "learning_rate": 1.68783684635963e-05, + "loss": 0.2919235825538635, + "step": 2228 + }, + { + "epoch": 0.5919532598592484, + "grad_norm": 0.9981457951279066, + "learning_rate": 1.687518069629212e-05, + "loss": 0.29265689849853516, + "step": 2229 + }, + { + "epoch": 0.5922188288407914, + "grad_norm": 1.105624159979672, + "learning_rate": 1.6871991603534183e-05, + "loss": 0.3257937431335449, + "step": 2230 + }, + { + "epoch": 0.5924843978223343, + "grad_norm": 0.9776528734928177, + "learning_rate": 1.6868801185937318e-05, + "loss": 0.30709922313690186, + "step": 2231 + }, + { + "epoch": 0.5927499668038773, + "grad_norm": 1.0470693079191735, + "learning_rate": 1.6865609444116594e-05, + "loss": 0.34016695618629456, + "step": 2232 + }, + { + "epoch": 0.5930155357854202, + "grad_norm": 3.119158292180646, + "learning_rate": 1.686241637868734e-05, + "loss": 0.27988332509994507, + "step": 2233 + }, + { + "epoch": 0.5932811047669632, + "grad_norm": 1.0478488923431404, + "learning_rate": 1.685922199026514e-05, + "loss": 0.33241748809814453, + "step": 2234 + }, + { + "epoch": 0.5935466737485062, + "grad_norm": 1.131470783603603, + "learning_rate": 1.685602627946584e-05, + "loss": 0.29636645317077637, + "step": 2235 + }, + { + "epoch": 0.5938122427300492, + "grad_norm": 1.0270882549188534, + "learning_rate": 1.6852829246905532e-05, + "loss": 0.32173705101013184, + "step": 2236 + }, + { + "epoch": 0.5940778117115921, + "grad_norm": 1.0825392737706068, + "learning_rate": 1.6849630893200567e-05, + "loss": 0.318726122379303, + "step": 2237 + }, + { + "epoch": 0.5943433806931351, + "grad_norm": 1.0382165285294276, + "learning_rate": 1.684643121896755e-05, + "loss": 0.3085494339466095, + "step": 2238 + }, + { + "epoch": 0.594608949674678, + "grad_norm": 1.0527313536489507, + "learning_rate": 1.684323022482334e-05, + "loss": 0.3402160406112671, + "step": 2239 + }, + { + "epoch": 0.594874518656221, + "grad_norm": 1.0380085019224927, + "learning_rate": 1.684002791138505e-05, + "loss": 0.28099578619003296, + "step": 2240 + }, + { + "epoch": 0.5951400876377639, + "grad_norm": 1.0821564922133853, + "learning_rate": 1.6836824279270053e-05, + "loss": 0.3049670159816742, + "step": 2241 + }, + { + "epoch": 0.5954056566193069, + "grad_norm": 1.0644252940512267, + "learning_rate": 1.6833619329095966e-05, + "loss": 0.2999834716320038, + "step": 2242 + }, + { + "epoch": 0.5956712256008498, + "grad_norm": 1.0828247808996563, + "learning_rate": 1.6830413061480663e-05, + "loss": 0.2976648509502411, + "step": 2243 + }, + { + "epoch": 0.5959367945823928, + "grad_norm": 0.9516700397999099, + "learning_rate": 1.6827205477042282e-05, + "loss": 0.2937200963497162, + "step": 2244 + }, + { + "epoch": 0.5962023635639357, + "grad_norm": 0.9800041770842799, + "learning_rate": 1.6823996576399208e-05, + "loss": 0.27944231033325195, + "step": 2245 + }, + { + "epoch": 0.5964679325454787, + "grad_norm": 1.2497901059935828, + "learning_rate": 1.6820786360170073e-05, + "loss": 0.37821248173713684, + "step": 2246 + }, + { + "epoch": 0.5967335015270216, + "grad_norm": 1.0764913922139379, + "learning_rate": 1.681757482897377e-05, + "loss": 0.31929296255111694, + "step": 2247 + }, + { + "epoch": 0.5969990705085646, + "grad_norm": 1.0997353700477965, + "learning_rate": 1.6814361983429446e-05, + "loss": 0.29905542731285095, + "step": 2248 + }, + { + "epoch": 0.5972646394901076, + "grad_norm": 1.1012066663218303, + "learning_rate": 1.6811147824156503e-05, + "loss": 0.31056714057922363, + "step": 2249 + }, + { + "epoch": 0.5975302084716505, + "grad_norm": 1.0740873036211436, + "learning_rate": 1.6807932351774585e-05, + "loss": 0.3311445415019989, + "step": 2250 + }, + { + "epoch": 0.5977957774531935, + "grad_norm": 0.9539008733822649, + "learning_rate": 1.6804715566903603e-05, + "loss": 0.28413334488868713, + "step": 2251 + }, + { + "epoch": 0.5980613464347364, + "grad_norm": 1.068533794622215, + "learning_rate": 1.6801497470163717e-05, + "loss": 0.27681154012680054, + "step": 2252 + }, + { + "epoch": 0.5983269154162794, + "grad_norm": 1.0654200190327086, + "learning_rate": 1.679827806217533e-05, + "loss": 0.290216863155365, + "step": 2253 + }, + { + "epoch": 0.5985924843978223, + "grad_norm": 1.1041469834048565, + "learning_rate": 1.6795057343559115e-05, + "loss": 0.31263259053230286, + "step": 2254 + }, + { + "epoch": 0.5988580533793653, + "grad_norm": 1.126601485756597, + "learning_rate": 1.6791835314935984e-05, + "loss": 0.31527474522590637, + "step": 2255 + }, + { + "epoch": 0.5991236223609082, + "grad_norm": 1.078203294441185, + "learning_rate": 1.6788611976927104e-05, + "loss": 0.308803915977478, + "step": 2256 + }, + { + "epoch": 0.5993891913424512, + "grad_norm": 1.0503773076355036, + "learning_rate": 1.6785387330153898e-05, + "loss": 0.3038686215877533, + "step": 2257 + }, + { + "epoch": 0.5996547603239941, + "grad_norm": 1.0216209005739547, + "learning_rate": 1.6782161375238045e-05, + "loss": 0.32485973834991455, + "step": 2258 + }, + { + "epoch": 0.5999203293055371, + "grad_norm": 1.182450532742011, + "learning_rate": 1.6778934112801467e-05, + "loss": 0.32350587844848633, + "step": 2259 + }, + { + "epoch": 0.60018589828708, + "grad_norm": 1.0888151703509321, + "learning_rate": 1.6775705543466337e-05, + "loss": 0.31593745946884155, + "step": 2260 + }, + { + "epoch": 0.600451467268623, + "grad_norm": 1.0882766479814592, + "learning_rate": 1.6772475667855098e-05, + "loss": 0.3266843855381012, + "step": 2261 + }, + { + "epoch": 0.6007170362501659, + "grad_norm": 1.1815872316974045, + "learning_rate": 1.676924448659042e-05, + "loss": 0.3334394693374634, + "step": 2262 + }, + { + "epoch": 0.600982605231709, + "grad_norm": 1.1019346354795203, + "learning_rate": 1.676601200029524e-05, + "loss": 0.29688704013824463, + "step": 2263 + }, + { + "epoch": 0.6012481742132519, + "grad_norm": 1.0675092497220116, + "learning_rate": 1.6762778209592744e-05, + "loss": 0.3163599967956543, + "step": 2264 + }, + { + "epoch": 0.6015137431947949, + "grad_norm": 3.310146638883422, + "learning_rate": 1.675954311510637e-05, + "loss": 0.3001909554004669, + "step": 2265 + }, + { + "epoch": 0.6017793121763378, + "grad_norm": 1.052342150287052, + "learning_rate": 1.6756306717459804e-05, + "loss": 0.306442528963089, + "step": 2266 + }, + { + "epoch": 0.6020448811578808, + "grad_norm": 1.0462245388504205, + "learning_rate": 1.6753069017276988e-05, + "loss": 0.32714736461639404, + "step": 2267 + }, + { + "epoch": 0.6023104501394237, + "grad_norm": 1.1462408299032063, + "learning_rate": 1.6749830015182106e-05, + "loss": 0.3276352286338806, + "step": 2268 + }, + { + "epoch": 0.6025760191209667, + "grad_norm": 1.196238497855594, + "learning_rate": 1.6746589711799607e-05, + "loss": 0.3151017427444458, + "step": 2269 + }, + { + "epoch": 0.6028415881025097, + "grad_norm": 1.0342963680315473, + "learning_rate": 1.674334810775418e-05, + "loss": 0.30252715945243835, + "step": 2270 + }, + { + "epoch": 0.6031071570840526, + "grad_norm": 1.013150034994447, + "learning_rate": 1.674010520367077e-05, + "loss": 0.28994205594062805, + "step": 2271 + }, + { + "epoch": 0.6033727260655956, + "grad_norm": 1.060884408167446, + "learning_rate": 1.6736861000174566e-05, + "loss": 0.31821542978286743, + "step": 2272 + }, + { + "epoch": 0.6036382950471385, + "grad_norm": 1.0745731746159097, + "learning_rate": 1.6733615497891018e-05, + "loss": 0.33488404750823975, + "step": 2273 + }, + { + "epoch": 0.6039038640286815, + "grad_norm": 1.1687722013665731, + "learning_rate": 1.6730368697445815e-05, + "loss": 0.32545825839042664, + "step": 2274 + }, + { + "epoch": 0.6041694330102244, + "grad_norm": 1.0959659967153625, + "learning_rate": 1.6727120599464904e-05, + "loss": 0.3229105770587921, + "step": 2275 + }, + { + "epoch": 0.6044350019917674, + "grad_norm": 1.0190980223229251, + "learning_rate": 1.672387120457448e-05, + "loss": 0.29090648889541626, + "step": 2276 + }, + { + "epoch": 0.6047005709733103, + "grad_norm": 1.0135966931724694, + "learning_rate": 1.6720620513400993e-05, + "loss": 0.3102695345878601, + "step": 2277 + }, + { + "epoch": 0.6049661399548533, + "grad_norm": 0.9853472262099896, + "learning_rate": 1.6717368526571133e-05, + "loss": 0.3104533851146698, + "step": 2278 + }, + { + "epoch": 0.6052317089363962, + "grad_norm": 1.0624907138843722, + "learning_rate": 1.671411524471184e-05, + "loss": 0.3340798616409302, + "step": 2279 + }, + { + "epoch": 0.6054972779179392, + "grad_norm": 0.9362556276145145, + "learning_rate": 1.6710860668450318e-05, + "loss": 0.2807982563972473, + "step": 2280 + }, + { + "epoch": 0.6057628468994821, + "grad_norm": 1.0604829312359818, + "learning_rate": 1.6707604798414005e-05, + "loss": 0.28892064094543457, + "step": 2281 + }, + { + "epoch": 0.6060284158810251, + "grad_norm": 1.1005771261022437, + "learning_rate": 1.6704347635230594e-05, + "loss": 0.29660698771476746, + "step": 2282 + }, + { + "epoch": 0.606293984862568, + "grad_norm": 1.0826898129560842, + "learning_rate": 1.6701089179528032e-05, + "loss": 0.32079893350601196, + "step": 2283 + }, + { + "epoch": 0.606559553844111, + "grad_norm": 1.0711524337358722, + "learning_rate": 1.6697829431934508e-05, + "loss": 0.3464012145996094, + "step": 2284 + }, + { + "epoch": 0.6068251228256539, + "grad_norm": 1.113831391037599, + "learning_rate": 1.669456839307846e-05, + "loss": 0.3378494381904602, + "step": 2285 + }, + { + "epoch": 0.6070906918071969, + "grad_norm": 1.1314381443012484, + "learning_rate": 1.6691306063588583e-05, + "loss": 0.2856704294681549, + "step": 2286 + }, + { + "epoch": 0.6073562607887398, + "grad_norm": 1.117095467957477, + "learning_rate": 1.6688042444093816e-05, + "loss": 0.317970871925354, + "step": 2287 + }, + { + "epoch": 0.6076218297702828, + "grad_norm": 0.9765740214705895, + "learning_rate": 1.6684777535223338e-05, + "loss": 0.3067381978034973, + "step": 2288 + }, + { + "epoch": 0.6078873987518257, + "grad_norm": 0.9795122588790717, + "learning_rate": 1.6681511337606594e-05, + "loss": 0.28682243824005127, + "step": 2289 + }, + { + "epoch": 0.6081529677333687, + "grad_norm": 1.0967806384391572, + "learning_rate": 1.667824385187327e-05, + "loss": 0.30516478419303894, + "step": 2290 + }, + { + "epoch": 0.6084185367149118, + "grad_norm": 1.2090889717256932, + "learning_rate": 1.6674975078653284e-05, + "loss": 0.3114034831523895, + "step": 2291 + }, + { + "epoch": 0.6086841056964547, + "grad_norm": 1.045779035897072, + "learning_rate": 1.6671705018576837e-05, + "loss": 0.3119916617870331, + "step": 2292 + }, + { + "epoch": 0.6089496746779977, + "grad_norm": 1.0110290976394836, + "learning_rate": 1.666843367227434e-05, + "loss": 0.2695278823375702, + "step": 2293 + }, + { + "epoch": 0.6092152436595406, + "grad_norm": 1.1042693591067085, + "learning_rate": 1.6665161040376483e-05, + "loss": 0.32162508368492126, + "step": 2294 + }, + { + "epoch": 0.6094808126410836, + "grad_norm": 1.1533266295102853, + "learning_rate": 1.6661887123514183e-05, + "loss": 0.3115222752094269, + "step": 2295 + }, + { + "epoch": 0.6097463816226265, + "grad_norm": 1.1903173397636237, + "learning_rate": 1.6658611922318618e-05, + "loss": 0.3239362835884094, + "step": 2296 + }, + { + "epoch": 0.6100119506041695, + "grad_norm": 1.0224008240467277, + "learning_rate": 1.66553354374212e-05, + "loss": 0.29716256260871887, + "step": 2297 + }, + { + "epoch": 0.6102775195857124, + "grad_norm": 1.1579823586849616, + "learning_rate": 1.6652057669453606e-05, + "loss": 0.3337557911872864, + "step": 2298 + }, + { + "epoch": 0.6105430885672554, + "grad_norm": 1.0726602627394455, + "learning_rate": 1.6648778619047747e-05, + "loss": 0.30258649587631226, + "step": 2299 + }, + { + "epoch": 0.6108086575487983, + "grad_norm": 1.0836532202857172, + "learning_rate": 1.6645498286835784e-05, + "loss": 0.3151426315307617, + "step": 2300 + }, + { + "epoch": 0.6110742265303413, + "grad_norm": 0.9639622977001232, + "learning_rate": 1.664221667345013e-05, + "loss": 0.274954617023468, + "step": 2301 + }, + { + "epoch": 0.6113397955118842, + "grad_norm": 1.0454921478368049, + "learning_rate": 1.6638933779523437e-05, + "loss": 0.3055363893508911, + "step": 2302 + }, + { + "epoch": 0.6116053644934272, + "grad_norm": 1.0132221767482874, + "learning_rate": 1.663564960568861e-05, + "loss": 0.30296921730041504, + "step": 2303 + }, + { + "epoch": 0.6118709334749701, + "grad_norm": 1.0766188111034134, + "learning_rate": 1.66323641525788e-05, + "loss": 0.3118343651294708, + "step": 2304 + }, + { + "epoch": 0.6121365024565131, + "grad_norm": 1.164685781665666, + "learning_rate": 1.6629077420827405e-05, + "loss": 0.3277447819709778, + "step": 2305 + }, + { + "epoch": 0.612402071438056, + "grad_norm": 1.11996036014055, + "learning_rate": 1.6625789411068063e-05, + "loss": 0.307643860578537, + "step": 2306 + }, + { + "epoch": 0.612667640419599, + "grad_norm": 1.0752891079202938, + "learning_rate": 1.6622500123934665e-05, + "loss": 0.3043777346611023, + "step": 2307 + }, + { + "epoch": 0.6129332094011419, + "grad_norm": 1.1229566611504027, + "learning_rate": 1.6619209560061352e-05, + "loss": 0.28634852170944214, + "step": 2308 + }, + { + "epoch": 0.6131987783826849, + "grad_norm": 1.1746890844036781, + "learning_rate": 1.6615917720082503e-05, + "loss": 0.33200016617774963, + "step": 2309 + }, + { + "epoch": 0.6134643473642278, + "grad_norm": 1.0620493011215435, + "learning_rate": 1.661262460463274e-05, + "loss": 0.26568055152893066, + "step": 2310 + }, + { + "epoch": 0.6137299163457708, + "grad_norm": 1.0408157138123326, + "learning_rate": 1.6609330214346945e-05, + "loss": 0.2772855758666992, + "step": 2311 + }, + { + "epoch": 0.6139954853273137, + "grad_norm": 1.2060076126932109, + "learning_rate": 1.6606034549860236e-05, + "loss": 0.3330409824848175, + "step": 2312 + }, + { + "epoch": 0.6142610543088567, + "grad_norm": 1.0235644562455184, + "learning_rate": 1.6602737611807975e-05, + "loss": 0.27702978253364563, + "step": 2313 + }, + { + "epoch": 0.6145266232903996, + "grad_norm": 1.1266755606893777, + "learning_rate": 1.6599439400825775e-05, + "loss": 0.29985183477401733, + "step": 2314 + }, + { + "epoch": 0.6147921922719426, + "grad_norm": 1.0266522277907775, + "learning_rate": 1.659613991754949e-05, + "loss": 0.2666100859642029, + "step": 2315 + }, + { + "epoch": 0.6150577612534855, + "grad_norm": 1.0676553477298287, + "learning_rate": 1.6592839162615223e-05, + "loss": 0.2968613803386688, + "step": 2316 + }, + { + "epoch": 0.6153233302350285, + "grad_norm": 1.26155090118547, + "learning_rate": 1.6589537136659326e-05, + "loss": 0.2693714499473572, + "step": 2317 + }, + { + "epoch": 0.6155888992165715, + "grad_norm": 1.1411779960646509, + "learning_rate": 1.658623384031838e-05, + "loss": 0.3192713260650635, + "step": 2318 + }, + { + "epoch": 0.6158544681981145, + "grad_norm": 1.099028639770974, + "learning_rate": 1.658292927422923e-05, + "loss": 0.2958469092845917, + "step": 2319 + }, + { + "epoch": 0.6161200371796575, + "grad_norm": 1.0613129939040433, + "learning_rate": 1.657962343902895e-05, + "loss": 0.28580743074417114, + "step": 2320 + }, + { + "epoch": 0.6163856061612004, + "grad_norm": 1.2105545865052383, + "learning_rate": 1.6576316335354875e-05, + "loss": 0.34325680136680603, + "step": 2321 + }, + { + "epoch": 0.6166511751427434, + "grad_norm": 1.076014963599046, + "learning_rate": 1.657300796384457e-05, + "loss": 0.3220894932746887, + "step": 2322 + }, + { + "epoch": 0.6169167441242863, + "grad_norm": 1.003861259990267, + "learning_rate": 1.656969832513585e-05, + "loss": 0.2934642434120178, + "step": 2323 + }, + { + "epoch": 0.6171823131058293, + "grad_norm": 1.0182182491222724, + "learning_rate": 1.656638741986677e-05, + "loss": 0.3066999912261963, + "step": 2324 + }, + { + "epoch": 0.6174478820873722, + "grad_norm": 1.0780285957414313, + "learning_rate": 1.6563075248675645e-05, + "loss": 0.2947896122932434, + "step": 2325 + }, + { + "epoch": 0.6177134510689152, + "grad_norm": 1.1567241875430703, + "learning_rate": 1.6559761812201018e-05, + "loss": 0.33616161346435547, + "step": 2326 + }, + { + "epoch": 0.6179790200504581, + "grad_norm": 1.0754490235924812, + "learning_rate": 1.6556447111081678e-05, + "loss": 0.29555875062942505, + "step": 2327 + }, + { + "epoch": 0.6182445890320011, + "grad_norm": 1.0070791342344025, + "learning_rate": 1.655313114595666e-05, + "loss": 0.276498019695282, + "step": 2328 + }, + { + "epoch": 0.618510158013544, + "grad_norm": 1.0894248364537533, + "learning_rate": 1.6549813917465242e-05, + "loss": 0.3081165552139282, + "step": 2329 + }, + { + "epoch": 0.618775726995087, + "grad_norm": 1.2153046006588315, + "learning_rate": 1.654649542624695e-05, + "loss": 0.3610053062438965, + "step": 2330 + }, + { + "epoch": 0.6190412959766299, + "grad_norm": 1.0676492266011808, + "learning_rate": 1.654317567294155e-05, + "loss": 0.2775106430053711, + "step": 2331 + }, + { + "epoch": 0.6193068649581729, + "grad_norm": 4.371469554540211, + "learning_rate": 1.653985465818905e-05, + "loss": 0.2915893793106079, + "step": 2332 + }, + { + "epoch": 0.6195724339397158, + "grad_norm": 1.0032536414224313, + "learning_rate": 1.6536532382629696e-05, + "loss": 0.30868977308273315, + "step": 2333 + }, + { + "epoch": 0.6198380029212588, + "grad_norm": 1.1011191125099704, + "learning_rate": 1.6533208846903996e-05, + "loss": 0.3083038330078125, + "step": 2334 + }, + { + "epoch": 0.6201035719028017, + "grad_norm": 0.9895882037041855, + "learning_rate": 1.652988405165268e-05, + "loss": 0.25192466378211975, + "step": 2335 + }, + { + "epoch": 0.6203691408843447, + "grad_norm": 1.1020677364796136, + "learning_rate": 1.6526557997516737e-05, + "loss": 0.32154130935668945, + "step": 2336 + }, + { + "epoch": 0.6206347098658876, + "grad_norm": 1.1174587266065723, + "learning_rate": 1.6523230685137382e-05, + "loss": 0.2860945165157318, + "step": 2337 + }, + { + "epoch": 0.6209002788474306, + "grad_norm": 1.1647384960602913, + "learning_rate": 1.6519902115156084e-05, + "loss": 0.3279789984226227, + "step": 2338 + }, + { + "epoch": 0.6211658478289735, + "grad_norm": 1.062678685453679, + "learning_rate": 1.6516572288214555e-05, + "loss": 0.3082200884819031, + "step": 2339 + }, + { + "epoch": 0.6214314168105165, + "grad_norm": 1.1253285275737313, + "learning_rate": 1.6513241204954745e-05, + "loss": 0.29032304883003235, + "step": 2340 + }, + { + "epoch": 0.6216969857920595, + "grad_norm": 1.004918906125766, + "learning_rate": 1.6509908866018843e-05, + "loss": 0.3096848130226135, + "step": 2341 + }, + { + "epoch": 0.6219625547736024, + "grad_norm": 1.021047856460921, + "learning_rate": 1.6506575272049294e-05, + "loss": 0.309989333152771, + "step": 2342 + }, + { + "epoch": 0.6222281237551454, + "grad_norm": 1.119097166323709, + "learning_rate": 1.6503240423688768e-05, + "loss": 0.311350554227829, + "step": 2343 + }, + { + "epoch": 0.6224936927366883, + "grad_norm": 1.0659510240862446, + "learning_rate": 1.6499904321580187e-05, + "loss": 0.3313952386379242, + "step": 2344 + }, + { + "epoch": 0.6227592617182313, + "grad_norm": 1.0702797293760455, + "learning_rate": 1.649656696636671e-05, + "loss": 0.2984781265258789, + "step": 2345 + }, + { + "epoch": 0.6230248306997742, + "grad_norm": 1.0312282361562104, + "learning_rate": 1.6493228358691748e-05, + "loss": 0.3058238625526428, + "step": 2346 + }, + { + "epoch": 0.6232903996813173, + "grad_norm": 1.0462474005488736, + "learning_rate": 1.6489888499198935e-05, + "loss": 0.33439138531684875, + "step": 2347 + }, + { + "epoch": 0.6235559686628602, + "grad_norm": 1.0386002000588619, + "learning_rate": 1.6486547388532157e-05, + "loss": 0.2883133292198181, + "step": 2348 + }, + { + "epoch": 0.6238215376444032, + "grad_norm": 0.9997410916606129, + "learning_rate": 1.648320502733555e-05, + "loss": 0.30258435010910034, + "step": 2349 + }, + { + "epoch": 0.6240871066259461, + "grad_norm": 1.0226158069339855, + "learning_rate": 1.6479861416253476e-05, + "loss": 0.316353440284729, + "step": 2350 + }, + { + "epoch": 0.6243526756074891, + "grad_norm": 1.0638089423798769, + "learning_rate": 1.647651655593054e-05, + "loss": 0.3230556547641754, + "step": 2351 + }, + { + "epoch": 0.624618244589032, + "grad_norm": 1.2043111611037318, + "learning_rate": 1.6473170447011593e-05, + "loss": 0.3327128291130066, + "step": 2352 + }, + { + "epoch": 0.624883813570575, + "grad_norm": 1.081123131766037, + "learning_rate": 1.6469823090141733e-05, + "loss": 0.3152993619441986, + "step": 2353 + }, + { + "epoch": 0.6251493825521179, + "grad_norm": 1.0655193061859811, + "learning_rate": 1.6466474485966286e-05, + "loss": 0.26792511343955994, + "step": 2354 + }, + { + "epoch": 0.6254149515336609, + "grad_norm": 1.121022507517606, + "learning_rate": 1.6463124635130824e-05, + "loss": 0.31665652990341187, + "step": 2355 + }, + { + "epoch": 0.6256805205152038, + "grad_norm": 1.0108098757868682, + "learning_rate": 1.645977353828115e-05, + "loss": 0.29573655128479004, + "step": 2356 + }, + { + "epoch": 0.6259460894967468, + "grad_norm": 1.0973823257435635, + "learning_rate": 1.6456421196063334e-05, + "loss": 0.3210436999797821, + "step": 2357 + }, + { + "epoch": 0.6262116584782897, + "grad_norm": 1.2424369194288305, + "learning_rate": 1.6453067609123656e-05, + "loss": 0.2837316691875458, + "step": 2358 + }, + { + "epoch": 0.6264772274598327, + "grad_norm": 1.0217734190114693, + "learning_rate": 1.6449712778108645e-05, + "loss": 0.2885812520980835, + "step": 2359 + }, + { + "epoch": 0.6267427964413756, + "grad_norm": 1.1369177274860889, + "learning_rate": 1.6446356703665078e-05, + "loss": 0.34908249974250793, + "step": 2360 + }, + { + "epoch": 0.6270083654229186, + "grad_norm": 0.9942151080492051, + "learning_rate": 1.6442999386439967e-05, + "loss": 0.30398470163345337, + "step": 2361 + }, + { + "epoch": 0.6272739344044616, + "grad_norm": 0.9838105681310805, + "learning_rate": 1.6439640827080565e-05, + "loss": 0.2780487537384033, + "step": 2362 + }, + { + "epoch": 0.6275395033860045, + "grad_norm": 0.956534505955689, + "learning_rate": 1.6436281026234357e-05, + "loss": 0.2575770616531372, + "step": 2363 + }, + { + "epoch": 0.6278050723675475, + "grad_norm": 0.9675911826739493, + "learning_rate": 1.6432919984549077e-05, + "loss": 0.2888547480106354, + "step": 2364 + }, + { + "epoch": 0.6280706413490904, + "grad_norm": 1.2303845977564731, + "learning_rate": 1.6429557702672694e-05, + "loss": 0.3259009122848511, + "step": 2365 + }, + { + "epoch": 0.6283362103306334, + "grad_norm": 1.3923197622537806, + "learning_rate": 1.6426194181253415e-05, + "loss": 0.2899959683418274, + "step": 2366 + }, + { + "epoch": 0.6286017793121763, + "grad_norm": 1.058685915432802, + "learning_rate": 1.6422829420939688e-05, + "loss": 0.28471851348876953, + "step": 2367 + }, + { + "epoch": 0.6288673482937193, + "grad_norm": 1.0822140266216713, + "learning_rate": 1.64194634223802e-05, + "loss": 0.2958947420120239, + "step": 2368 + }, + { + "epoch": 0.6291329172752622, + "grad_norm": 1.1251439755337522, + "learning_rate": 1.6416096186223872e-05, + "loss": 0.3089750111103058, + "step": 2369 + }, + { + "epoch": 0.6293984862568052, + "grad_norm": 1.0517657351777636, + "learning_rate": 1.641272771311987e-05, + "loss": 0.31597089767456055, + "step": 2370 + }, + { + "epoch": 0.6296640552383481, + "grad_norm": 1.237586073778816, + "learning_rate": 1.6409358003717598e-05, + "loss": 0.2968488931655884, + "step": 2371 + }, + { + "epoch": 0.6299296242198911, + "grad_norm": 1.0062603647307793, + "learning_rate": 1.6405987058666694e-05, + "loss": 0.27532660961151123, + "step": 2372 + }, + { + "epoch": 0.630195193201434, + "grad_norm": 1.0061271713511417, + "learning_rate": 1.6402614878617037e-05, + "loss": 0.2800731956958771, + "step": 2373 + }, + { + "epoch": 0.630460762182977, + "grad_norm": 1.0867786948587836, + "learning_rate": 1.6399241464218744e-05, + "loss": 0.31728652119636536, + "step": 2374 + }, + { + "epoch": 0.63072633116452, + "grad_norm": 1.0634834793994077, + "learning_rate": 1.6395866816122167e-05, + "loss": 0.2776367664337158, + "step": 2375 + }, + { + "epoch": 0.630991900146063, + "grad_norm": 1.2696308030410766, + "learning_rate": 1.63924909349779e-05, + "loss": 0.3308418095111847, + "step": 2376 + }, + { + "epoch": 0.6312574691276059, + "grad_norm": 1.027144235831433, + "learning_rate": 1.6389113821436775e-05, + "loss": 0.31589487195014954, + "step": 2377 + }, + { + "epoch": 0.6315230381091489, + "grad_norm": 0.9983142729953255, + "learning_rate": 1.6385735476149855e-05, + "loss": 0.27181899547576904, + "step": 2378 + }, + { + "epoch": 0.6317886070906918, + "grad_norm": 1.0656862561919935, + "learning_rate": 1.638235589976845e-05, + "loss": 0.2603747546672821, + "step": 2379 + }, + { + "epoch": 0.6320541760722348, + "grad_norm": 1.0543823342651422, + "learning_rate": 1.63789750929441e-05, + "loss": 0.29050707817077637, + "step": 2380 + }, + { + "epoch": 0.6323197450537777, + "grad_norm": 1.0310549396867945, + "learning_rate": 1.6375593056328586e-05, + "loss": 0.2979413866996765, + "step": 2381 + }, + { + "epoch": 0.6325853140353207, + "grad_norm": 1.0460005843129836, + "learning_rate": 1.6372209790573926e-05, + "loss": 0.30875420570373535, + "step": 2382 + }, + { + "epoch": 0.6328508830168637, + "grad_norm": 0.9698416111844145, + "learning_rate": 1.6368825296332366e-05, + "loss": 0.2755935788154602, + "step": 2383 + }, + { + "epoch": 0.6331164519984066, + "grad_norm": 1.1336778567410772, + "learning_rate": 1.6365439574256406e-05, + "loss": 0.3459136486053467, + "step": 2384 + }, + { + "epoch": 0.6333820209799496, + "grad_norm": 1.116018329054477, + "learning_rate": 1.6362052624998767e-05, + "loss": 0.29043829441070557, + "step": 2385 + }, + { + "epoch": 0.6336475899614925, + "grad_norm": 1.123039696178655, + "learning_rate": 1.635866444921242e-05, + "loss": 0.321551114320755, + "step": 2386 + }, + { + "epoch": 0.6339131589430355, + "grad_norm": 1.0451682936950502, + "learning_rate": 1.6355275047550553e-05, + "loss": 0.28478139638900757, + "step": 2387 + }, + { + "epoch": 0.6341787279245784, + "grad_norm": 1.060617338056141, + "learning_rate": 1.6351884420666616e-05, + "loss": 0.30913087725639343, + "step": 2388 + }, + { + "epoch": 0.6344442969061214, + "grad_norm": 1.0996519301974148, + "learning_rate": 1.6348492569214275e-05, + "loss": 0.328342467546463, + "step": 2389 + }, + { + "epoch": 0.6347098658876643, + "grad_norm": 1.0657562962668374, + "learning_rate": 1.634509949384744e-05, + "loss": 0.3291119933128357, + "step": 2390 + }, + { + "epoch": 0.6349754348692073, + "grad_norm": 1.0805286951038287, + "learning_rate": 1.6341705195220257e-05, + "loss": 0.3542378544807434, + "step": 2391 + }, + { + "epoch": 0.6352410038507502, + "grad_norm": 1.1387422668526126, + "learning_rate": 1.63383096739871e-05, + "loss": 0.3167935609817505, + "step": 2392 + }, + { + "epoch": 0.6355065728322932, + "grad_norm": 0.9614211236141011, + "learning_rate": 1.63349129308026e-05, + "loss": 0.27623263001441956, + "step": 2393 + }, + { + "epoch": 0.6357721418138361, + "grad_norm": 1.1351525352268206, + "learning_rate": 1.6331514966321596e-05, + "loss": 0.3615761399269104, + "step": 2394 + }, + { + "epoch": 0.6360377107953791, + "grad_norm": 1.1430561223010627, + "learning_rate": 1.632811578119918e-05, + "loss": 0.3503292500972748, + "step": 2395 + }, + { + "epoch": 0.636303279776922, + "grad_norm": 1.0400637290516392, + "learning_rate": 1.6324715376090673e-05, + "loss": 0.2994767129421234, + "step": 2396 + }, + { + "epoch": 0.636568848758465, + "grad_norm": 1.2836743734514182, + "learning_rate": 1.6321313751651638e-05, + "loss": 0.29903143644332886, + "step": 2397 + }, + { + "epoch": 0.6368344177400079, + "grad_norm": 1.0273086079776361, + "learning_rate": 1.6317910908537865e-05, + "loss": 0.310536652803421, + "step": 2398 + }, + { + "epoch": 0.6370999867215509, + "grad_norm": 1.2820707601171073, + "learning_rate": 1.6314506847405382e-05, + "loss": 0.32584354281425476, + "step": 2399 + }, + { + "epoch": 0.6373655557030938, + "grad_norm": 1.186095937719991, + "learning_rate": 1.6311101568910448e-05, + "loss": 0.3536352217197418, + "step": 2400 + }, + { + "epoch": 0.6376311246846368, + "grad_norm": 1.0361661707144088, + "learning_rate": 1.6307695073709565e-05, + "loss": 0.3198434114456177, + "step": 2401 + }, + { + "epoch": 0.6378966936661797, + "grad_norm": 0.8809138916670839, + "learning_rate": 1.6304287362459462e-05, + "loss": 0.264182448387146, + "step": 2402 + }, + { + "epoch": 0.6381622626477228, + "grad_norm": 1.0526335869529386, + "learning_rate": 1.6300878435817115e-05, + "loss": 0.31182044744491577, + "step": 2403 + }, + { + "epoch": 0.6384278316292658, + "grad_norm": 1.0495886453587215, + "learning_rate": 1.6297468294439708e-05, + "loss": 0.28221404552459717, + "step": 2404 + }, + { + "epoch": 0.6386934006108087, + "grad_norm": 1.0211141314743026, + "learning_rate": 1.6294056938984693e-05, + "loss": 0.27788785099983215, + "step": 2405 + }, + { + "epoch": 0.6389589695923517, + "grad_norm": 1.068610455564362, + "learning_rate": 1.6290644370109728e-05, + "loss": 0.3300796151161194, + "step": 2406 + }, + { + "epoch": 0.6392245385738946, + "grad_norm": 1.0949996094795582, + "learning_rate": 1.628723058847272e-05, + "loss": 0.32170963287353516, + "step": 2407 + }, + { + "epoch": 0.6394901075554376, + "grad_norm": 1.1320309851276869, + "learning_rate": 1.628381559473181e-05, + "loss": 0.3243589997291565, + "step": 2408 + }, + { + "epoch": 0.6397556765369805, + "grad_norm": 1.4458945786524546, + "learning_rate": 1.6280399389545358e-05, + "loss": 0.311046838760376, + "step": 2409 + }, + { + "epoch": 0.6400212455185235, + "grad_norm": 1.0237689913585555, + "learning_rate": 1.6276981973571973e-05, + "loss": 0.2642543911933899, + "step": 2410 + }, + { + "epoch": 0.6402868145000664, + "grad_norm": 1.1424399755044237, + "learning_rate": 1.62735633474705e-05, + "loss": 0.3593730926513672, + "step": 2411 + }, + { + "epoch": 0.6405523834816094, + "grad_norm": 1.1145611429504636, + "learning_rate": 1.62701435119e-05, + "loss": 0.3147425353527069, + "step": 2412 + }, + { + "epoch": 0.6408179524631523, + "grad_norm": 1.1400749315540035, + "learning_rate": 1.6266722467519783e-05, + "loss": 0.32639142870903015, + "step": 2413 + }, + { + "epoch": 0.6410835214446953, + "grad_norm": 1.1011849489387644, + "learning_rate": 1.626330021498938e-05, + "loss": 0.32113659381866455, + "step": 2414 + }, + { + "epoch": 0.6413490904262382, + "grad_norm": 1.0371621680767618, + "learning_rate": 1.6259876754968568e-05, + "loss": 0.3188290297985077, + "step": 2415 + }, + { + "epoch": 0.6416146594077812, + "grad_norm": 1.076893351246201, + "learning_rate": 1.625645208811734e-05, + "loss": 0.3145543932914734, + "step": 2416 + }, + { + "epoch": 0.6418802283893241, + "grad_norm": 1.1368093372185335, + "learning_rate": 1.6253026215095943e-05, + "loss": 0.30433323979377747, + "step": 2417 + }, + { + "epoch": 0.6421457973708671, + "grad_norm": 1.1042321396184265, + "learning_rate": 1.6249599136564837e-05, + "loss": 0.30946728587150574, + "step": 2418 + }, + { + "epoch": 0.64241136635241, + "grad_norm": 0.991248414026241, + "learning_rate": 1.6246170853184726e-05, + "loss": 0.26245906949043274, + "step": 2419 + }, + { + "epoch": 0.642676935333953, + "grad_norm": 1.1213671588278835, + "learning_rate": 1.624274136561654e-05, + "loss": 0.31468862295150757, + "step": 2420 + }, + { + "epoch": 0.6429425043154959, + "grad_norm": 1.0200744973975597, + "learning_rate": 1.6239310674521443e-05, + "loss": 0.28946155309677124, + "step": 2421 + }, + { + "epoch": 0.6432080732970389, + "grad_norm": 1.1088143851501708, + "learning_rate": 1.6235878780560835e-05, + "loss": 0.26272106170654297, + "step": 2422 + }, + { + "epoch": 0.6434736422785818, + "grad_norm": 1.1185700160494145, + "learning_rate": 1.6232445684396347e-05, + "loss": 0.3094574213027954, + "step": 2423 + }, + { + "epoch": 0.6437392112601248, + "grad_norm": 0.9377280048944331, + "learning_rate": 1.6229011386689832e-05, + "loss": 0.2503833770751953, + "step": 2424 + }, + { + "epoch": 0.6440047802416677, + "grad_norm": 0.9657663244207705, + "learning_rate": 1.6225575888103387e-05, + "loss": 0.2655009627342224, + "step": 2425 + }, + { + "epoch": 0.6442703492232107, + "grad_norm": 1.123117061290067, + "learning_rate": 1.6222139189299336e-05, + "loss": 0.2819611728191376, + "step": 2426 + }, + { + "epoch": 0.6445359182047536, + "grad_norm": 1.0859641118248262, + "learning_rate": 1.6218701290940232e-05, + "loss": 0.2956068217754364, + "step": 2427 + }, + { + "epoch": 0.6448014871862966, + "grad_norm": 1.2445728810553593, + "learning_rate": 1.6215262193688862e-05, + "loss": 0.3330997824668884, + "step": 2428 + }, + { + "epoch": 0.6450670561678395, + "grad_norm": 1.0073602881165937, + "learning_rate": 1.6211821898208242e-05, + "loss": 0.25897055864334106, + "step": 2429 + }, + { + "epoch": 0.6453326251493825, + "grad_norm": 1.1228221759016932, + "learning_rate": 1.6208380405161623e-05, + "loss": 0.3119947016239166, + "step": 2430 + }, + { + "epoch": 0.6455981941309256, + "grad_norm": 1.143631742936843, + "learning_rate": 1.6204937715212482e-05, + "loss": 0.30833956599235535, + "step": 2431 + }, + { + "epoch": 0.6458637631124685, + "grad_norm": 1.1584271404994573, + "learning_rate": 1.620149382902453e-05, + "loss": 0.2935214638710022, + "step": 2432 + }, + { + "epoch": 0.6461293320940115, + "grad_norm": 1.6063755788258844, + "learning_rate": 1.619804874726171e-05, + "loss": 0.24297356605529785, + "step": 2433 + }, + { + "epoch": 0.6463949010755544, + "grad_norm": 1.14218339304969, + "learning_rate": 1.6194602470588186e-05, + "loss": 0.319774866104126, + "step": 2434 + }, + { + "epoch": 0.6466604700570974, + "grad_norm": 1.1751618225153557, + "learning_rate": 1.6191154999668368e-05, + "loss": 0.29197463393211365, + "step": 2435 + }, + { + "epoch": 0.6469260390386403, + "grad_norm": 1.1008916130088804, + "learning_rate": 1.6187706335166882e-05, + "loss": 0.2939727306365967, + "step": 2436 + }, + { + "epoch": 0.6471916080201833, + "grad_norm": 1.0935449463761302, + "learning_rate": 1.6184256477748595e-05, + "loss": 0.2941162586212158, + "step": 2437 + }, + { + "epoch": 0.6474571770017262, + "grad_norm": 1.1336931987797143, + "learning_rate": 1.6180805428078593e-05, + "loss": 0.2823144197463989, + "step": 2438 + }, + { + "epoch": 0.6477227459832692, + "grad_norm": 1.0912252779984561, + "learning_rate": 1.61773531868222e-05, + "loss": 0.30048274993896484, + "step": 2439 + }, + { + "epoch": 0.6479883149648121, + "grad_norm": 1.183044095349839, + "learning_rate": 1.617389975464497e-05, + "loss": 0.30927354097366333, + "step": 2440 + }, + { + "epoch": 0.6482538839463551, + "grad_norm": 1.166570736507726, + "learning_rate": 1.6170445132212678e-05, + "loss": 0.34835004806518555, + "step": 2441 + }, + { + "epoch": 0.648519452927898, + "grad_norm": 1.0325781129961564, + "learning_rate": 1.616698932019134e-05, + "loss": 0.2890225648880005, + "step": 2442 + }, + { + "epoch": 0.648785021909441, + "grad_norm": 1.1182329319338478, + "learning_rate": 1.6163532319247195e-05, + "loss": 0.31410521268844604, + "step": 2443 + }, + { + "epoch": 0.6490505908909839, + "grad_norm": 0.9213656240638256, + "learning_rate": 1.616007413004671e-05, + "loss": 0.267375111579895, + "step": 2444 + }, + { + "epoch": 0.6493161598725269, + "grad_norm": 1.1587177777274813, + "learning_rate": 1.6156614753256583e-05, + "loss": 0.3300023376941681, + "step": 2445 + }, + { + "epoch": 0.6495817288540698, + "grad_norm": 1.0295072511714587, + "learning_rate": 1.615315418954374e-05, + "loss": 0.2822847366333008, + "step": 2446 + }, + { + "epoch": 0.6498472978356128, + "grad_norm": 1.1626615137060834, + "learning_rate": 1.6149692439575348e-05, + "loss": 0.3093401789665222, + "step": 2447 + }, + { + "epoch": 0.6501128668171557, + "grad_norm": 1.0475923101386018, + "learning_rate": 1.6146229504018777e-05, + "loss": 0.2892506718635559, + "step": 2448 + }, + { + "epoch": 0.6503784357986987, + "grad_norm": 0.9972012319936079, + "learning_rate": 1.6142765383541643e-05, + "loss": 0.2805558741092682, + "step": 2449 + }, + { + "epoch": 0.6506440047802416, + "grad_norm": 1.0535842654025462, + "learning_rate": 1.6139300078811794e-05, + "loss": 0.29852935671806335, + "step": 2450 + }, + { + "epoch": 0.6509095737617846, + "grad_norm": 1.193949473615032, + "learning_rate": 1.6135833590497295e-05, + "loss": 0.3567991256713867, + "step": 2451 + }, + { + "epoch": 0.6511751427433276, + "grad_norm": 1.1265709697559396, + "learning_rate": 1.6132365919266442e-05, + "loss": 0.29564782977104187, + "step": 2452 + }, + { + "epoch": 0.6514407117248705, + "grad_norm": 1.011180050217134, + "learning_rate": 1.612889706578777e-05, + "loss": 0.30027297139167786, + "step": 2453 + }, + { + "epoch": 0.6517062807064135, + "grad_norm": 1.0908136110597069, + "learning_rate": 1.6125427030730027e-05, + "loss": 0.3318096697330475, + "step": 2454 + }, + { + "epoch": 0.6519718496879564, + "grad_norm": 1.0728958387824694, + "learning_rate": 1.612195581476219e-05, + "loss": 0.30962997674942017, + "step": 2455 + }, + { + "epoch": 0.6522374186694994, + "grad_norm": 1.2969539714019946, + "learning_rate": 1.6118483418553476e-05, + "loss": 0.3152836859226227, + "step": 2456 + }, + { + "epoch": 0.6525029876510423, + "grad_norm": 1.0160215490589632, + "learning_rate": 1.6115009842773322e-05, + "loss": 0.26117920875549316, + "step": 2457 + }, + { + "epoch": 0.6527685566325853, + "grad_norm": 0.9780826840488046, + "learning_rate": 1.6111535088091388e-05, + "loss": 0.2705717384815216, + "step": 2458 + }, + { + "epoch": 0.6530341256141283, + "grad_norm": 1.112935626593024, + "learning_rate": 1.6108059155177568e-05, + "loss": 0.3281205892562866, + "step": 2459 + }, + { + "epoch": 0.6532996945956713, + "grad_norm": 1.0805050021999307, + "learning_rate": 1.6104582044701983e-05, + "loss": 0.3300125002861023, + "step": 2460 + }, + { + "epoch": 0.6535652635772142, + "grad_norm": 1.0596352955938992, + "learning_rate": 1.6101103757334973e-05, + "loss": 0.29286977648735046, + "step": 2461 + }, + { + "epoch": 0.6538308325587572, + "grad_norm": 1.114611766363321, + "learning_rate": 1.6097624293747115e-05, + "loss": 0.2920498847961426, + "step": 2462 + }, + { + "epoch": 0.6540964015403001, + "grad_norm": 1.0455118881549736, + "learning_rate": 1.609414365460921e-05, + "loss": 0.31018689274787903, + "step": 2463 + }, + { + "epoch": 0.6543619705218431, + "grad_norm": 1.0028130278859915, + "learning_rate": 1.609066184059228e-05, + "loss": 0.26806512475013733, + "step": 2464 + }, + { + "epoch": 0.654627539503386, + "grad_norm": 1.0385768164913443, + "learning_rate": 1.608717885236758e-05, + "loss": 0.29770639538764954, + "step": 2465 + }, + { + "epoch": 0.654893108484929, + "grad_norm": 1.0811683391440958, + "learning_rate": 1.6083694690606592e-05, + "loss": 0.36161965131759644, + "step": 2466 + }, + { + "epoch": 0.6551586774664719, + "grad_norm": 1.1455214370068598, + "learning_rate": 1.6080209355981016e-05, + "loss": 0.36114081740379333, + "step": 2467 + }, + { + "epoch": 0.6554242464480149, + "grad_norm": 0.9911085328884063, + "learning_rate": 1.6076722849162786e-05, + "loss": 0.28924882411956787, + "step": 2468 + }, + { + "epoch": 0.6556898154295578, + "grad_norm": 1.1198872767040324, + "learning_rate": 1.6073235170824058e-05, + "loss": 0.3088049292564392, + "step": 2469 + }, + { + "epoch": 0.6559553844111008, + "grad_norm": 1.062389027957873, + "learning_rate": 1.6069746321637216e-05, + "loss": 0.2684907615184784, + "step": 2470 + }, + { + "epoch": 0.6562209533926437, + "grad_norm": 0.9850175058697045, + "learning_rate": 1.6066256302274873e-05, + "loss": 0.2674641013145447, + "step": 2471 + }, + { + "epoch": 0.6564865223741867, + "grad_norm": 1.0658104164235327, + "learning_rate": 1.6062765113409854e-05, + "loss": 0.2865106165409088, + "step": 2472 + }, + { + "epoch": 0.6567520913557297, + "grad_norm": 1.1117203943537428, + "learning_rate": 1.605927275571523e-05, + "loss": 0.33163607120513916, + "step": 2473 + }, + { + "epoch": 0.6570176603372726, + "grad_norm": 1.1177244627769223, + "learning_rate": 1.6055779229864276e-05, + "loss": 0.32725927233695984, + "step": 2474 + }, + { + "epoch": 0.6572832293188156, + "grad_norm": 1.171322314473831, + "learning_rate": 1.605228453653051e-05, + "loss": 0.31537747383117676, + "step": 2475 + }, + { + "epoch": 0.6575487983003585, + "grad_norm": 1.0855461390356589, + "learning_rate": 1.604878867638767e-05, + "loss": 0.29331761598587036, + "step": 2476 + }, + { + "epoch": 0.6578143672819015, + "grad_norm": 1.0342424424241736, + "learning_rate": 1.6045291650109706e-05, + "loss": 0.315193772315979, + "step": 2477 + }, + { + "epoch": 0.6580799362634444, + "grad_norm": 1.2286540067411784, + "learning_rate": 1.6041793458370812e-05, + "loss": 0.3595796227455139, + "step": 2478 + }, + { + "epoch": 0.6583455052449874, + "grad_norm": 1.0251892797499218, + "learning_rate": 1.6038294101845394e-05, + "loss": 0.3069949150085449, + "step": 2479 + }, + { + "epoch": 0.6586110742265303, + "grad_norm": 1.1576253586981062, + "learning_rate": 1.603479358120809e-05, + "loss": 0.3154812455177307, + "step": 2480 + }, + { + "epoch": 0.6588766432080733, + "grad_norm": 1.1008921076459075, + "learning_rate": 1.6031291897133756e-05, + "loss": 0.3005039691925049, + "step": 2481 + }, + { + "epoch": 0.6591422121896162, + "grad_norm": 1.1463594149599334, + "learning_rate": 1.6027789050297476e-05, + "loss": 0.2885095775127411, + "step": 2482 + }, + { + "epoch": 0.6594077811711592, + "grad_norm": 1.002066881102099, + "learning_rate": 1.602428504137456e-05, + "loss": 0.291950523853302, + "step": 2483 + }, + { + "epoch": 0.6596733501527021, + "grad_norm": 1.0919380790727968, + "learning_rate": 1.6020779871040538e-05, + "loss": 0.31630760431289673, + "step": 2484 + }, + { + "epoch": 0.6599389191342451, + "grad_norm": 1.0827567425634856, + "learning_rate": 1.6017273539971167e-05, + "loss": 0.29767507314682007, + "step": 2485 + }, + { + "epoch": 0.660204488115788, + "grad_norm": 1.036820980968177, + "learning_rate": 1.601376604884242e-05, + "loss": 0.2882775664329529, + "step": 2486 + }, + { + "epoch": 0.6604700570973311, + "grad_norm": 1.0885135950320362, + "learning_rate": 1.601025739833051e-05, + "loss": 0.325736403465271, + "step": 2487 + }, + { + "epoch": 0.660735626078874, + "grad_norm": 1.048580856774253, + "learning_rate": 1.6006747589111854e-05, + "loss": 0.3007255792617798, + "step": 2488 + }, + { + "epoch": 0.661001195060417, + "grad_norm": 1.146836506523448, + "learning_rate": 1.6003236621863107e-05, + "loss": 0.33199968934059143, + "step": 2489 + }, + { + "epoch": 0.6612667640419599, + "grad_norm": 1.1430196866694278, + "learning_rate": 1.5999724497261138e-05, + "loss": 0.3784569799900055, + "step": 2490 + }, + { + "epoch": 0.6615323330235029, + "grad_norm": 1.0506667031587968, + "learning_rate": 1.5996211215983052e-05, + "loss": 0.28146931529045105, + "step": 2491 + }, + { + "epoch": 0.6617979020050458, + "grad_norm": 1.0621415260673002, + "learning_rate": 1.599269677870616e-05, + "loss": 0.32187730073928833, + "step": 2492 + }, + { + "epoch": 0.6620634709865888, + "grad_norm": 1.0631524880676668, + "learning_rate": 1.5989181186108003e-05, + "loss": 0.3021823465824127, + "step": 2493 + }, + { + "epoch": 0.6623290399681317, + "grad_norm": 1.0248198480240434, + "learning_rate": 1.5985664438866354e-05, + "loss": 0.3309648334980011, + "step": 2494 + }, + { + "epoch": 0.6625946089496747, + "grad_norm": 1.0183038789118495, + "learning_rate": 1.598214653765919e-05, + "loss": 0.2939694821834564, + "step": 2495 + }, + { + "epoch": 0.6628601779312177, + "grad_norm": 1.0091208408649601, + "learning_rate": 1.597862748316473e-05, + "loss": 0.31219810247421265, + "step": 2496 + }, + { + "epoch": 0.6631257469127606, + "grad_norm": 1.3669850946739606, + "learning_rate": 1.5975107276061405e-05, + "loss": 0.29435622692108154, + "step": 2497 + }, + { + "epoch": 0.6633913158943036, + "grad_norm": 1.0359724885535866, + "learning_rate": 1.5971585917027864e-05, + "loss": 0.27167004346847534, + "step": 2498 + }, + { + "epoch": 0.6636568848758465, + "grad_norm": 1.121619558624798, + "learning_rate": 1.5968063406742988e-05, + "loss": 0.3360658884048462, + "step": 2499 + }, + { + "epoch": 0.6639224538573895, + "grad_norm": 1.0767207810238415, + "learning_rate": 1.596453974588587e-05, + "loss": 0.2994089424610138, + "step": 2500 + }, + { + "epoch": 0.6641880228389324, + "grad_norm": 1.0997593865705806, + "learning_rate": 1.596101493513584e-05, + "loss": 0.32302889227867126, + "step": 2501 + }, + { + "epoch": 0.6644535918204754, + "grad_norm": 1.1249891187970829, + "learning_rate": 1.595748897517243e-05, + "loss": 0.3122987747192383, + "step": 2502 + }, + { + "epoch": 0.6647191608020183, + "grad_norm": 1.014108779554691, + "learning_rate": 1.5953961866675408e-05, + "loss": 0.2746438980102539, + "step": 2503 + }, + { + "epoch": 0.6649847297835613, + "grad_norm": 1.0758059481680302, + "learning_rate": 1.5950433610324758e-05, + "loss": 0.3043097257614136, + "step": 2504 + }, + { + "epoch": 0.6652502987651042, + "grad_norm": 1.2204942135197403, + "learning_rate": 1.594690420680069e-05, + "loss": 0.3208698332309723, + "step": 2505 + }, + { + "epoch": 0.6655158677466472, + "grad_norm": 1.1502218188727449, + "learning_rate": 1.5943373656783628e-05, + "loss": 0.317341148853302, + "step": 2506 + }, + { + "epoch": 0.6657814367281901, + "grad_norm": 1.1223078751349502, + "learning_rate": 1.5939841960954218e-05, + "loss": 0.3250347673892975, + "step": 2507 + }, + { + "epoch": 0.6660470057097331, + "grad_norm": 1.066903715567463, + "learning_rate": 1.5936309119993333e-05, + "loss": 0.32255828380584717, + "step": 2508 + }, + { + "epoch": 0.666312574691276, + "grad_norm": 1.0591506680476068, + "learning_rate": 1.593277513458206e-05, + "loss": 0.3247614800930023, + "step": 2509 + }, + { + "epoch": 0.666578143672819, + "grad_norm": 1.087253896768941, + "learning_rate": 1.5929240005401715e-05, + "loss": 0.34171730279922485, + "step": 2510 + }, + { + "epoch": 0.6668437126543619, + "grad_norm": 1.092874100004657, + "learning_rate": 1.5925703733133823e-05, + "loss": 0.30671584606170654, + "step": 2511 + }, + { + "epoch": 0.6671092816359049, + "grad_norm": 1.1250075389065, + "learning_rate": 1.5922166318460138e-05, + "loss": 0.3387908339500427, + "step": 2512 + }, + { + "epoch": 0.6673748506174478, + "grad_norm": 1.0272141820522305, + "learning_rate": 1.5918627762062635e-05, + "loss": 0.2772873044013977, + "step": 2513 + }, + { + "epoch": 0.6676404195989908, + "grad_norm": 1.0802689739154336, + "learning_rate": 1.59150880646235e-05, + "loss": 0.31555238366127014, + "step": 2514 + }, + { + "epoch": 0.6679059885805337, + "grad_norm": 0.9930963010924009, + "learning_rate": 1.5911547226825154e-05, + "loss": 0.2821594476699829, + "step": 2515 + }, + { + "epoch": 0.6681715575620768, + "grad_norm": 1.098936156337469, + "learning_rate": 1.5908005249350217e-05, + "loss": 0.3176054358482361, + "step": 2516 + }, + { + "epoch": 0.6684371265436198, + "grad_norm": 1.083365844116071, + "learning_rate": 1.590446213288155e-05, + "loss": 0.28484907746315, + "step": 2517 + }, + { + "epoch": 0.6687026955251627, + "grad_norm": 1.0028500327966023, + "learning_rate": 1.590091787810222e-05, + "loss": 0.25227850675582886, + "step": 2518 + }, + { + "epoch": 0.6689682645067057, + "grad_norm": 0.993931866088294, + "learning_rate": 1.5897372485695514e-05, + "loss": 0.276819109916687, + "step": 2519 + }, + { + "epoch": 0.6692338334882486, + "grad_norm": 1.1883846939575156, + "learning_rate": 1.589382595634495e-05, + "loss": 0.27944183349609375, + "step": 2520 + }, + { + "epoch": 0.6694994024697916, + "grad_norm": 1.0217591474349375, + "learning_rate": 1.589027829073425e-05, + "loss": 0.295337975025177, + "step": 2521 + }, + { + "epoch": 0.6697649714513345, + "grad_norm": 1.0940479681497102, + "learning_rate": 1.5886729489547365e-05, + "loss": 0.31168580055236816, + "step": 2522 + }, + { + "epoch": 0.6700305404328775, + "grad_norm": 1.0847233646991081, + "learning_rate": 1.5883179553468465e-05, + "loss": 0.34520941972732544, + "step": 2523 + }, + { + "epoch": 0.6702961094144204, + "grad_norm": 1.0941539012056998, + "learning_rate": 1.587962848318193e-05, + "loss": 0.3121863901615143, + "step": 2524 + }, + { + "epoch": 0.6705616783959634, + "grad_norm": 1.2414605611463847, + "learning_rate": 1.587607627937237e-05, + "loss": 0.3450377583503723, + "step": 2525 + }, + { + "epoch": 0.6708272473775063, + "grad_norm": 1.0575484463097053, + "learning_rate": 1.58725229427246e-05, + "loss": 0.33431196212768555, + "step": 2526 + }, + { + "epoch": 0.6710928163590493, + "grad_norm": 2.8101197900274433, + "learning_rate": 1.5868968473923675e-05, + "loss": 0.2753226161003113, + "step": 2527 + }, + { + "epoch": 0.6713583853405922, + "grad_norm": 1.1171540013343635, + "learning_rate": 1.586541287365484e-05, + "loss": 0.31394219398498535, + "step": 2528 + }, + { + "epoch": 0.6716239543221352, + "grad_norm": 1.0940027543433968, + "learning_rate": 1.586185614260358e-05, + "loss": 0.352859765291214, + "step": 2529 + }, + { + "epoch": 0.6718895233036781, + "grad_norm": 1.158790754412002, + "learning_rate": 1.5858298281455592e-05, + "loss": 0.3182204067707062, + "step": 2530 + }, + { + "epoch": 0.6721550922852211, + "grad_norm": 1.0901686159979078, + "learning_rate": 1.5854739290896785e-05, + "loss": 0.3107008934020996, + "step": 2531 + }, + { + "epoch": 0.672420661266764, + "grad_norm": 1.0367853416177613, + "learning_rate": 1.5851179171613294e-05, + "loss": 0.2737328112125397, + "step": 2532 + }, + { + "epoch": 0.672686230248307, + "grad_norm": 1.070700914663809, + "learning_rate": 1.5847617924291466e-05, + "loss": 0.2744509279727936, + "step": 2533 + }, + { + "epoch": 0.6729517992298499, + "grad_norm": 1.0763385778363233, + "learning_rate": 1.584405554961787e-05, + "loss": 0.3149082660675049, + "step": 2534 + }, + { + "epoch": 0.6732173682113929, + "grad_norm": 1.1199335422347676, + "learning_rate": 1.584049204827929e-05, + "loss": 0.32643741369247437, + "step": 2535 + }, + { + "epoch": 0.6734829371929358, + "grad_norm": 1.1153920819002263, + "learning_rate": 1.583692742096272e-05, + "loss": 0.31901559233665466, + "step": 2536 + }, + { + "epoch": 0.6737485061744788, + "grad_norm": 1.037012713250851, + "learning_rate": 1.583336166835539e-05, + "loss": 0.3020802140235901, + "step": 2537 + }, + { + "epoch": 0.6740140751560217, + "grad_norm": 0.9884255382698084, + "learning_rate": 1.5829794791144723e-05, + "loss": 0.29683804512023926, + "step": 2538 + }, + { + "epoch": 0.6742796441375647, + "grad_norm": 1.0549080502640127, + "learning_rate": 1.582622679001838e-05, + "loss": 0.2898966073989868, + "step": 2539 + }, + { + "epoch": 0.6745452131191076, + "grad_norm": 1.0628349250468347, + "learning_rate": 1.582265766566422e-05, + "loss": 0.2665000855922699, + "step": 2540 + }, + { + "epoch": 0.6748107821006506, + "grad_norm": 1.1059852721256176, + "learning_rate": 1.581908741877034e-05, + "loss": 0.2987207770347595, + "step": 2541 + }, + { + "epoch": 0.6750763510821935, + "grad_norm": 1.1051901132495052, + "learning_rate": 1.5815516050025032e-05, + "loss": 0.32591086626052856, + "step": 2542 + }, + { + "epoch": 0.6753419200637365, + "grad_norm": 0.9752097662975195, + "learning_rate": 1.581194356011682e-05, + "loss": 0.28181299567222595, + "step": 2543 + }, + { + "epoch": 0.6756074890452796, + "grad_norm": 1.0983389872703522, + "learning_rate": 1.5808369949734433e-05, + "loss": 0.3256041407585144, + "step": 2544 + }, + { + "epoch": 0.6758730580268225, + "grad_norm": 1.1228012917357884, + "learning_rate": 1.5804795219566825e-05, + "loss": 0.3079703152179718, + "step": 2545 + }, + { + "epoch": 0.6761386270083655, + "grad_norm": 1.1504916593616519, + "learning_rate": 1.580121937030316e-05, + "loss": 0.3364162743091583, + "step": 2546 + }, + { + "epoch": 0.6764041959899084, + "grad_norm": 1.046870504650359, + "learning_rate": 1.5797642402632816e-05, + "loss": 0.2774898111820221, + "step": 2547 + }, + { + "epoch": 0.6766697649714514, + "grad_norm": 1.1108782100380157, + "learning_rate": 1.5794064317245396e-05, + "loss": 0.33260244131088257, + "step": 2548 + }, + { + "epoch": 0.6769353339529943, + "grad_norm": 1.16229568793775, + "learning_rate": 1.5790485114830708e-05, + "loss": 0.3327571153640747, + "step": 2549 + }, + { + "epoch": 0.6772009029345373, + "grad_norm": 1.1256526679188055, + "learning_rate": 1.5786904796078783e-05, + "loss": 0.28527912497520447, + "step": 2550 + }, + { + "epoch": 0.6774664719160802, + "grad_norm": 1.1757868172389025, + "learning_rate": 1.5783323361679865e-05, + "loss": 0.3100908100605011, + "step": 2551 + }, + { + "epoch": 0.6777320408976232, + "grad_norm": 1.1187226402475792, + "learning_rate": 1.577974081232441e-05, + "loss": 0.3434574007987976, + "step": 2552 + }, + { + "epoch": 0.6779976098791661, + "grad_norm": 1.0691671390255433, + "learning_rate": 1.5776157148703094e-05, + "loss": 0.3151341676712036, + "step": 2553 + }, + { + "epoch": 0.6782631788607091, + "grad_norm": 1.1432839314923735, + "learning_rate": 1.5772572371506803e-05, + "loss": 0.33334124088287354, + "step": 2554 + }, + { + "epoch": 0.678528747842252, + "grad_norm": 0.9718187941404679, + "learning_rate": 1.576898648142664e-05, + "loss": 0.26933547854423523, + "step": 2555 + }, + { + "epoch": 0.678794316823795, + "grad_norm": 1.0146251280063243, + "learning_rate": 1.576539947915392e-05, + "loss": 0.3087029755115509, + "step": 2556 + }, + { + "epoch": 0.6790598858053379, + "grad_norm": 2.0746649121309244, + "learning_rate": 1.576181136538018e-05, + "loss": 0.32620540261268616, + "step": 2557 + }, + { + "epoch": 0.6793254547868809, + "grad_norm": 1.0462752825892652, + "learning_rate": 1.575822214079716e-05, + "loss": 0.29112139344215393, + "step": 2558 + }, + { + "epoch": 0.6795910237684238, + "grad_norm": 1.108770761520566, + "learning_rate": 1.5754631806096822e-05, + "loss": 0.3394843339920044, + "step": 2559 + }, + { + "epoch": 0.6798565927499668, + "grad_norm": 1.0789431162979184, + "learning_rate": 1.5751040361971342e-05, + "loss": 0.32754629850387573, + "step": 2560 + }, + { + "epoch": 0.6801221617315097, + "grad_norm": 1.055729440740922, + "learning_rate": 1.574744780911311e-05, + "loss": 0.2829592823982239, + "step": 2561 + }, + { + "epoch": 0.6803877307130527, + "grad_norm": 3.1916720491195423, + "learning_rate": 1.5743854148214724e-05, + "loss": 0.2718046307563782, + "step": 2562 + }, + { + "epoch": 0.6806532996945956, + "grad_norm": 1.0355755791413483, + "learning_rate": 1.5740259379969002e-05, + "loss": 0.29244256019592285, + "step": 2563 + }, + { + "epoch": 0.6809188686761386, + "grad_norm": 1.0678189150114252, + "learning_rate": 1.5736663505068972e-05, + "loss": 0.2925388514995575, + "step": 2564 + }, + { + "epoch": 0.6811844376576816, + "grad_norm": 1.109826571766002, + "learning_rate": 1.5733066524207875e-05, + "loss": 0.26742440462112427, + "step": 2565 + }, + { + "epoch": 0.6814500066392245, + "grad_norm": 1.0365586719986022, + "learning_rate": 1.5729468438079167e-05, + "loss": 0.33688807487487793, + "step": 2566 + }, + { + "epoch": 0.6817155756207675, + "grad_norm": 1.0939355325909954, + "learning_rate": 1.5725869247376514e-05, + "loss": 0.2953096330165863, + "step": 2567 + }, + { + "epoch": 0.6819811446023104, + "grad_norm": 1.081510188555139, + "learning_rate": 1.5722268952793806e-05, + "loss": 0.321500301361084, + "step": 2568 + }, + { + "epoch": 0.6822467135838534, + "grad_norm": 1.1427798210793014, + "learning_rate": 1.5718667555025127e-05, + "loss": 0.29148590564727783, + "step": 2569 + }, + { + "epoch": 0.6825122825653963, + "grad_norm": 1.0849106130015975, + "learning_rate": 1.5715065054764792e-05, + "loss": 0.26887139678001404, + "step": 2570 + }, + { + "epoch": 0.6827778515469393, + "grad_norm": 0.9118900514894542, + "learning_rate": 1.5711461452707316e-05, + "loss": 0.2698139250278473, + "step": 2571 + }, + { + "epoch": 0.6830434205284823, + "grad_norm": 0.9420578172190551, + "learning_rate": 1.5707856749547433e-05, + "loss": 0.264956533908844, + "step": 2572 + }, + { + "epoch": 0.6833089895100253, + "grad_norm": 1.0786584040903482, + "learning_rate": 1.5704250945980085e-05, + "loss": 0.32535314559936523, + "step": 2573 + }, + { + "epoch": 0.6835745584915682, + "grad_norm": 1.1132312438200667, + "learning_rate": 1.5700644042700432e-05, + "loss": 0.30529654026031494, + "step": 2574 + }, + { + "epoch": 0.6838401274731112, + "grad_norm": 0.9518994724553314, + "learning_rate": 1.569703604040384e-05, + "loss": 0.27253150939941406, + "step": 2575 + }, + { + "epoch": 0.6841056964546541, + "grad_norm": 1.0559070796873817, + "learning_rate": 1.5693426939785886e-05, + "loss": 0.27451053261756897, + "step": 2576 + }, + { + "epoch": 0.6843712654361971, + "grad_norm": 1.1393124405849042, + "learning_rate": 1.5689816741542374e-05, + "loss": 0.33280283212661743, + "step": 2577 + }, + { + "epoch": 0.68463683441774, + "grad_norm": 1.1306113061745138, + "learning_rate": 1.5686205446369293e-05, + "loss": 0.2911887764930725, + "step": 2578 + }, + { + "epoch": 0.684902403399283, + "grad_norm": 1.0940465986734231, + "learning_rate": 1.5682593054962866e-05, + "loss": 0.2950279116630554, + "step": 2579 + }, + { + "epoch": 0.6851679723808259, + "grad_norm": 1.0911163136563768, + "learning_rate": 1.5678979568019518e-05, + "loss": 0.3267458975315094, + "step": 2580 + }, + { + "epoch": 0.6854335413623689, + "grad_norm": 1.2739312763430675, + "learning_rate": 1.5675364986235887e-05, + "loss": 0.3209132254123688, + "step": 2581 + }, + { + "epoch": 0.6856991103439118, + "grad_norm": 1.1101887519376679, + "learning_rate": 1.5671749310308818e-05, + "loss": 0.3186662197113037, + "step": 2582 + }, + { + "epoch": 0.6859646793254548, + "grad_norm": 0.9652854961372175, + "learning_rate": 1.566813254093538e-05, + "loss": 0.24875827133655548, + "step": 2583 + }, + { + "epoch": 0.6862302483069977, + "grad_norm": 1.0684425959326884, + "learning_rate": 1.5664514678812835e-05, + "loss": 0.26657983660697937, + "step": 2584 + }, + { + "epoch": 0.6864958172885407, + "grad_norm": 1.0670123202559558, + "learning_rate": 1.5660895724638666e-05, + "loss": 0.2889682650566101, + "step": 2585 + }, + { + "epoch": 0.6867613862700837, + "grad_norm": 1.2310590689373582, + "learning_rate": 1.5657275679110564e-05, + "loss": 0.32035061717033386, + "step": 2586 + }, + { + "epoch": 0.6870269552516266, + "grad_norm": 0.9946580402808185, + "learning_rate": 1.5653654542926435e-05, + "loss": 0.2844264507293701, + "step": 2587 + }, + { + "epoch": 0.6872925242331696, + "grad_norm": 1.0738818938413612, + "learning_rate": 1.5650032316784388e-05, + "loss": 0.27645713090896606, + "step": 2588 + }, + { + "epoch": 0.6875580932147125, + "grad_norm": 1.0078062598096618, + "learning_rate": 1.5646409001382745e-05, + "loss": 0.29902809858322144, + "step": 2589 + }, + { + "epoch": 0.6878236621962555, + "grad_norm": 1.0662439819494403, + "learning_rate": 1.564278459742004e-05, + "loss": 0.28179824352264404, + "step": 2590 + }, + { + "epoch": 0.6880892311777984, + "grad_norm": 0.9959782320912598, + "learning_rate": 1.563915910559502e-05, + "loss": 0.30527305603027344, + "step": 2591 + }, + { + "epoch": 0.6883548001593414, + "grad_norm": 0.9640464455731136, + "learning_rate": 1.5635532526606625e-05, + "loss": 0.29411792755126953, + "step": 2592 + }, + { + "epoch": 0.6886203691408843, + "grad_norm": 1.0659796212639145, + "learning_rate": 1.563190486115403e-05, + "loss": 0.32294154167175293, + "step": 2593 + }, + { + "epoch": 0.6888859381224273, + "grad_norm": 1.0983041505312465, + "learning_rate": 1.5628276109936594e-05, + "loss": 0.31873172521591187, + "step": 2594 + }, + { + "epoch": 0.6891515071039702, + "grad_norm": 1.2163401358885952, + "learning_rate": 1.5624646273653908e-05, + "loss": 0.37790048122406006, + "step": 2595 + }, + { + "epoch": 0.6894170760855132, + "grad_norm": 1.0271206309222516, + "learning_rate": 1.5621015353005754e-05, + "loss": 0.27596205472946167, + "step": 2596 + }, + { + "epoch": 0.6896826450670561, + "grad_norm": 1.2915034278595348, + "learning_rate": 1.5617383348692135e-05, + "loss": 0.30952686071395874, + "step": 2597 + }, + { + "epoch": 0.6899482140485991, + "grad_norm": 1.089414433310086, + "learning_rate": 1.5613750261413256e-05, + "loss": 0.2933235764503479, + "step": 2598 + }, + { + "epoch": 0.690213783030142, + "grad_norm": 1.1151043496896997, + "learning_rate": 1.5610116091869538e-05, + "loss": 0.2961776554584503, + "step": 2599 + }, + { + "epoch": 0.6904793520116851, + "grad_norm": 1.0596230408388436, + "learning_rate": 1.56064808407616e-05, + "loss": 0.2843313217163086, + "step": 2600 + }, + { + "epoch": 0.690744920993228, + "grad_norm": 1.0545406618996236, + "learning_rate": 1.560284450879028e-05, + "loss": 0.29366564750671387, + "step": 2601 + }, + { + "epoch": 0.691010489974771, + "grad_norm": 1.028254286030692, + "learning_rate": 1.5599207096656614e-05, + "loss": 0.32668614387512207, + "step": 2602 + }, + { + "epoch": 0.6912760589563139, + "grad_norm": 1.1962201821774399, + "learning_rate": 1.5595568605061858e-05, + "loss": 0.344367653131485, + "step": 2603 + }, + { + "epoch": 0.6915416279378569, + "grad_norm": 1.2250839657368426, + "learning_rate": 1.5591929034707468e-05, + "loss": 0.2875809371471405, + "step": 2604 + }, + { + "epoch": 0.6918071969193998, + "grad_norm": 0.9717157700868733, + "learning_rate": 1.5588288386295113e-05, + "loss": 0.2688799202442169, + "step": 2605 + }, + { + "epoch": 0.6920727659009428, + "grad_norm": 1.2520016236289049, + "learning_rate": 1.558464666052667e-05, + "loss": 0.28575828671455383, + "step": 2606 + }, + { + "epoch": 0.6923383348824858, + "grad_norm": 1.0741907315089707, + "learning_rate": 1.5581003858104203e-05, + "loss": 0.2800632119178772, + "step": 2607 + }, + { + "epoch": 0.6926039038640287, + "grad_norm": 1.096176752690496, + "learning_rate": 1.5577359979730022e-05, + "loss": 0.3066416382789612, + "step": 2608 + }, + { + "epoch": 0.6928694728455717, + "grad_norm": 1.0146792499875503, + "learning_rate": 1.5573715026106617e-05, + "loss": 0.3164110779762268, + "step": 2609 + }, + { + "epoch": 0.6931350418271146, + "grad_norm": 1.0292100354922897, + "learning_rate": 1.5570068997936686e-05, + "loss": 0.2908422350883484, + "step": 2610 + }, + { + "epoch": 0.6934006108086576, + "grad_norm": 0.9996966110923509, + "learning_rate": 1.5566421895923148e-05, + "loss": 0.29055240750312805, + "step": 2611 + }, + { + "epoch": 0.6936661797902005, + "grad_norm": 1.1296077877181152, + "learning_rate": 1.556277372076912e-05, + "loss": 0.3247227370738983, + "step": 2612 + }, + { + "epoch": 0.6939317487717435, + "grad_norm": 1.0869397458201258, + "learning_rate": 1.555912447317792e-05, + "loss": 0.29944315552711487, + "step": 2613 + }, + { + "epoch": 0.6941973177532864, + "grad_norm": 1.140637727836958, + "learning_rate": 1.5555474153853092e-05, + "loss": 0.2984931170940399, + "step": 2614 + }, + { + "epoch": 0.6944628867348294, + "grad_norm": 1.0644561032518303, + "learning_rate": 1.5551822763498364e-05, + "loss": 0.301285982131958, + "step": 2615 + }, + { + "epoch": 0.6947284557163723, + "grad_norm": 1.0271314049069311, + "learning_rate": 1.5548170302817683e-05, + "loss": 0.2862967252731323, + "step": 2616 + }, + { + "epoch": 0.6949940246979153, + "grad_norm": 1.0216494335731472, + "learning_rate": 1.5544516772515207e-05, + "loss": 0.3071482181549072, + "step": 2617 + }, + { + "epoch": 0.6952595936794582, + "grad_norm": 1.153798162838472, + "learning_rate": 1.5540862173295285e-05, + "loss": 0.33668914437294006, + "step": 2618 + }, + { + "epoch": 0.6955251626610012, + "grad_norm": 1.0451730984690786, + "learning_rate": 1.5537206505862486e-05, + "loss": 0.32204627990722656, + "step": 2619 + }, + { + "epoch": 0.6957907316425441, + "grad_norm": 1.083101648134336, + "learning_rate": 1.5533549770921576e-05, + "loss": 0.30210041999816895, + "step": 2620 + }, + { + "epoch": 0.6960563006240871, + "grad_norm": 1.1518417167078652, + "learning_rate": 1.5529891969177535e-05, + "loss": 0.3116886019706726, + "step": 2621 + }, + { + "epoch": 0.69632186960563, + "grad_norm": 1.1473344970327815, + "learning_rate": 1.5526233101335543e-05, + "loss": 0.3460058867931366, + "step": 2622 + }, + { + "epoch": 0.696587438587173, + "grad_norm": 1.0477810576486106, + "learning_rate": 1.552257316810098e-05, + "loss": 0.30080512166023254, + "step": 2623 + }, + { + "epoch": 0.6968530075687159, + "grad_norm": 1.1107090823955428, + "learning_rate": 1.5518912170179447e-05, + "loss": 0.3381347954273224, + "step": 2624 + }, + { + "epoch": 0.6971185765502589, + "grad_norm": 1.0737064011248665, + "learning_rate": 1.5515250108276733e-05, + "loss": 0.30345672369003296, + "step": 2625 + }, + { + "epoch": 0.6973841455318018, + "grad_norm": 1.1809134250993814, + "learning_rate": 1.5511586983098847e-05, + "loss": 0.3002641797065735, + "step": 2626 + }, + { + "epoch": 0.6976497145133448, + "grad_norm": 0.9975793486319376, + "learning_rate": 1.5507922795351992e-05, + "loss": 0.2848126292228699, + "step": 2627 + }, + { + "epoch": 0.6979152834948879, + "grad_norm": 1.1203755244922207, + "learning_rate": 1.5504257545742585e-05, + "loss": 0.32360371947288513, + "step": 2628 + }, + { + "epoch": 0.6981808524764308, + "grad_norm": 1.0674295201271842, + "learning_rate": 1.5500591234977237e-05, + "loss": 0.2970595955848694, + "step": 2629 + }, + { + "epoch": 0.6984464214579738, + "grad_norm": 1.1343972682519483, + "learning_rate": 1.5496923863762773e-05, + "loss": 0.35431474447250366, + "step": 2630 + }, + { + "epoch": 0.6987119904395167, + "grad_norm": 1.027377246814574, + "learning_rate": 1.549325543280622e-05, + "loss": 0.30133551359176636, + "step": 2631 + }, + { + "epoch": 0.6989775594210597, + "grad_norm": 1.066148832325447, + "learning_rate": 1.5489585942814807e-05, + "loss": 0.3013160824775696, + "step": 2632 + }, + { + "epoch": 0.6992431284026026, + "grad_norm": 1.1981871164483473, + "learning_rate": 1.5485915394495967e-05, + "loss": 0.3291313052177429, + "step": 2633 + }, + { + "epoch": 0.6995086973841456, + "grad_norm": 1.3083774012082008, + "learning_rate": 1.5482243788557336e-05, + "loss": 0.32308053970336914, + "step": 2634 + }, + { + "epoch": 0.6997742663656885, + "grad_norm": 1.0802428984314951, + "learning_rate": 1.5478571125706762e-05, + "loss": 0.321450412273407, + "step": 2635 + }, + { + "epoch": 0.7000398353472315, + "grad_norm": 1.1144035500723286, + "learning_rate": 1.547489740665229e-05, + "loss": 0.30871254205703735, + "step": 2636 + }, + { + "epoch": 0.7003054043287744, + "grad_norm": 1.1599776854022048, + "learning_rate": 1.5471222632102168e-05, + "loss": 0.29414835572242737, + "step": 2637 + }, + { + "epoch": 0.7005709733103174, + "grad_norm": 1.019484878273918, + "learning_rate": 1.546754680276485e-05, + "loss": 0.2841604948043823, + "step": 2638 + }, + { + "epoch": 0.7008365422918603, + "grad_norm": 1.039625714192533, + "learning_rate": 1.546386991934899e-05, + "loss": 0.2895316183567047, + "step": 2639 + }, + { + "epoch": 0.7011021112734033, + "grad_norm": 1.0418724746200432, + "learning_rate": 1.546019198256345e-05, + "loss": 0.310278058052063, + "step": 2640 + }, + { + "epoch": 0.7013676802549462, + "grad_norm": 1.1737622034955963, + "learning_rate": 1.5456512993117297e-05, + "loss": 0.3000732660293579, + "step": 2641 + }, + { + "epoch": 0.7016332492364892, + "grad_norm": 1.034060473081883, + "learning_rate": 1.545283295171979e-05, + "loss": 0.2650133967399597, + "step": 2642 + }, + { + "epoch": 0.7018988182180321, + "grad_norm": 1.1833814596994714, + "learning_rate": 1.5449151859080395e-05, + "loss": 0.3414345681667328, + "step": 2643 + }, + { + "epoch": 0.7021643871995751, + "grad_norm": 0.9407765615747015, + "learning_rate": 1.5445469715908793e-05, + "loss": 0.26955321431159973, + "step": 2644 + }, + { + "epoch": 0.702429956181118, + "grad_norm": 1.0775826100815478, + "learning_rate": 1.5441786522914855e-05, + "loss": 0.3028743863105774, + "step": 2645 + }, + { + "epoch": 0.702695525162661, + "grad_norm": 1.1630883359211883, + "learning_rate": 1.5438102280808653e-05, + "loss": 0.28710106015205383, + "step": 2646 + }, + { + "epoch": 0.7029610941442039, + "grad_norm": 1.0828201415955274, + "learning_rate": 1.543441699030047e-05, + "loss": 0.33343076705932617, + "step": 2647 + }, + { + "epoch": 0.7032266631257469, + "grad_norm": 2.8774903725783445, + "learning_rate": 1.543073065210078e-05, + "loss": 0.27760642766952515, + "step": 2648 + }, + { + "epoch": 0.7034922321072898, + "grad_norm": 1.0939125975780095, + "learning_rate": 1.5427043266920276e-05, + "loss": 0.2844334840774536, + "step": 2649 + }, + { + "epoch": 0.7037578010888328, + "grad_norm": 1.0671776711844796, + "learning_rate": 1.542335483546983e-05, + "loss": 0.28979432582855225, + "step": 2650 + }, + { + "epoch": 0.7040233700703757, + "grad_norm": 1.1018820862649594, + "learning_rate": 1.5419665358460537e-05, + "loss": 0.313267320394516, + "step": 2651 + }, + { + "epoch": 0.7042889390519187, + "grad_norm": 1.122792570050495, + "learning_rate": 1.5415974836603676e-05, + "loss": 0.26702141761779785, + "step": 2652 + }, + { + "epoch": 0.7045545080334616, + "grad_norm": 1.084104909381419, + "learning_rate": 1.5412283270610752e-05, + "loss": 0.3256012499332428, + "step": 2653 + }, + { + "epoch": 0.7048200770150046, + "grad_norm": 1.1096374178765924, + "learning_rate": 1.540859066119344e-05, + "loss": 0.3035642206668854, + "step": 2654 + }, + { + "epoch": 0.7050856459965475, + "grad_norm": 1.1410920430169775, + "learning_rate": 1.5404897009063636e-05, + "loss": 0.32206645607948303, + "step": 2655 + }, + { + "epoch": 0.7053512149780906, + "grad_norm": 0.9596610334229038, + "learning_rate": 1.5401202314933436e-05, + "loss": 0.3023940920829773, + "step": 2656 + }, + { + "epoch": 0.7056167839596336, + "grad_norm": 0.9678878502259071, + "learning_rate": 1.539750657951513e-05, + "loss": 0.2839987277984619, + "step": 2657 + }, + { + "epoch": 0.7058823529411765, + "grad_norm": 0.9744312269236198, + "learning_rate": 1.5393809803521213e-05, + "loss": 0.2488149106502533, + "step": 2658 + }, + { + "epoch": 0.7061479219227195, + "grad_norm": 1.0311988168007409, + "learning_rate": 1.539011198766438e-05, + "loss": 0.27156201004981995, + "step": 2659 + }, + { + "epoch": 0.7064134909042624, + "grad_norm": 1.0925039664890526, + "learning_rate": 1.5386413132657528e-05, + "loss": 0.3038437068462372, + "step": 2660 + }, + { + "epoch": 0.7066790598858054, + "grad_norm": 0.9713190505037098, + "learning_rate": 1.5382713239213746e-05, + "loss": 0.27626922726631165, + "step": 2661 + }, + { + "epoch": 0.7069446288673483, + "grad_norm": 1.9675808121081846, + "learning_rate": 1.537901230804634e-05, + "loss": 0.27338162064552307, + "step": 2662 + }, + { + "epoch": 0.7072101978488913, + "grad_norm": 0.9540020890839573, + "learning_rate": 1.5375310339868798e-05, + "loss": 0.2635098099708557, + "step": 2663 + }, + { + "epoch": 0.7074757668304342, + "grad_norm": 1.1274430903932144, + "learning_rate": 1.537160733539482e-05, + "loss": 0.3245551288127899, + "step": 2664 + }, + { + "epoch": 0.7077413358119772, + "grad_norm": 1.1100804783644485, + "learning_rate": 1.53679032953383e-05, + "loss": 0.3226238787174225, + "step": 2665 + }, + { + "epoch": 0.7080069047935201, + "grad_norm": 1.0972084780717322, + "learning_rate": 1.536419822041333e-05, + "loss": 0.31588318943977356, + "step": 2666 + }, + { + "epoch": 0.7082724737750631, + "grad_norm": 1.031778059845932, + "learning_rate": 1.536049211133421e-05, + "loss": 0.2494429647922516, + "step": 2667 + }, + { + "epoch": 0.708538042756606, + "grad_norm": 1.1110915785079796, + "learning_rate": 1.5356784968815436e-05, + "loss": 0.30966901779174805, + "step": 2668 + }, + { + "epoch": 0.708803611738149, + "grad_norm": 1.1803956993815392, + "learning_rate": 1.5353076793571692e-05, + "loss": 0.29383328557014465, + "step": 2669 + }, + { + "epoch": 0.7090691807196919, + "grad_norm": 1.086625008831518, + "learning_rate": 1.5349367586317875e-05, + "loss": 0.30337825417518616, + "step": 2670 + }, + { + "epoch": 0.7093347497012349, + "grad_norm": 1.0049086741144315, + "learning_rate": 1.5345657347769082e-05, + "loss": 0.28128665685653687, + "step": 2671 + }, + { + "epoch": 0.7096003186827778, + "grad_norm": 1.1819105498956106, + "learning_rate": 1.5341946078640594e-05, + "loss": 0.35167062282562256, + "step": 2672 + }, + { + "epoch": 0.7098658876643208, + "grad_norm": 1.0441531577784944, + "learning_rate": 1.533823377964791e-05, + "loss": 0.30409517884254456, + "step": 2673 + }, + { + "epoch": 0.7101314566458637, + "grad_norm": 1.013441954819978, + "learning_rate": 1.5334520451506706e-05, + "loss": 0.2667735815048218, + "step": 2674 + }, + { + "epoch": 0.7103970256274067, + "grad_norm": 1.130854753100919, + "learning_rate": 1.5330806094932876e-05, + "loss": 0.290219247341156, + "step": 2675 + }, + { + "epoch": 0.7106625946089496, + "grad_norm": 1.120803532670259, + "learning_rate": 1.5327090710642503e-05, + "loss": 0.33118927478790283, + "step": 2676 + }, + { + "epoch": 0.7109281635904926, + "grad_norm": 1.2896959817209073, + "learning_rate": 1.5323374299351867e-05, + "loss": 0.34287041425704956, + "step": 2677 + }, + { + "epoch": 0.7111937325720356, + "grad_norm": 1.0183367847991263, + "learning_rate": 1.531965686177745e-05, + "loss": 0.27093711495399475, + "step": 2678 + }, + { + "epoch": 0.7114593015535785, + "grad_norm": 1.0913550671130643, + "learning_rate": 1.531593839863593e-05, + "loss": 0.2987911105155945, + "step": 2679 + }, + { + "epoch": 0.7117248705351215, + "grad_norm": 1.0145664449432468, + "learning_rate": 1.5312218910644185e-05, + "loss": 0.2914583086967468, + "step": 2680 + }, + { + "epoch": 0.7119904395166644, + "grad_norm": 1.0712171950199525, + "learning_rate": 1.530849839851928e-05, + "loss": 0.34159964323043823, + "step": 2681 + }, + { + "epoch": 0.7122560084982074, + "grad_norm": 1.0132523095253043, + "learning_rate": 1.5304776862978496e-05, + "loss": 0.28327372670173645, + "step": 2682 + }, + { + "epoch": 0.7125215774797503, + "grad_norm": 1.0473430655235008, + "learning_rate": 1.5301054304739292e-05, + "loss": 0.2902851104736328, + "step": 2683 + }, + { + "epoch": 0.7127871464612934, + "grad_norm": 1.106440530120003, + "learning_rate": 1.5297330724519344e-05, + "loss": 0.3192726969718933, + "step": 2684 + }, + { + "epoch": 0.7130527154428363, + "grad_norm": 1.0682705697817987, + "learning_rate": 1.5293606123036508e-05, + "loss": 0.30242764949798584, + "step": 2685 + }, + { + "epoch": 0.7133182844243793, + "grad_norm": 1.0059439200202651, + "learning_rate": 1.528988050100884e-05, + "loss": 0.2718653082847595, + "step": 2686 + }, + { + "epoch": 0.7135838534059222, + "grad_norm": 1.019566462631627, + "learning_rate": 1.52861538591546e-05, + "loss": 0.3014821708202362, + "step": 2687 + }, + { + "epoch": 0.7138494223874652, + "grad_norm": 1.1473508187880241, + "learning_rate": 1.528242619819224e-05, + "loss": 0.3378177881240845, + "step": 2688 + }, + { + "epoch": 0.7141149913690081, + "grad_norm": 1.0632179838195628, + "learning_rate": 1.5278697518840415e-05, + "loss": 0.29286471009254456, + "step": 2689 + }, + { + "epoch": 0.7143805603505511, + "grad_norm": 1.1140242619678895, + "learning_rate": 1.527496782181796e-05, + "loss": 0.3371768593788147, + "step": 2690 + }, + { + "epoch": 0.714646129332094, + "grad_norm": 1.0421377750374783, + "learning_rate": 1.5271237107843925e-05, + "loss": 0.30571556091308594, + "step": 2691 + }, + { + "epoch": 0.714911698313637, + "grad_norm": 1.0650624138184501, + "learning_rate": 1.526750537763754e-05, + "loss": 0.33064618706703186, + "step": 2692 + }, + { + "epoch": 0.7151772672951799, + "grad_norm": 1.0787164498543842, + "learning_rate": 1.5263772631918242e-05, + "loss": 0.3369274139404297, + "step": 2693 + }, + { + "epoch": 0.7154428362767229, + "grad_norm": 1.079249778019668, + "learning_rate": 1.5260038871405663e-05, + "loss": 0.2422705739736557, + "step": 2694 + }, + { + "epoch": 0.7157084052582658, + "grad_norm": 1.3990281605221084, + "learning_rate": 1.5256304096819628e-05, + "loss": 0.35786008834838867, + "step": 2695 + }, + { + "epoch": 0.7159739742398088, + "grad_norm": 1.0368618301698236, + "learning_rate": 1.5252568308880155e-05, + "loss": 0.2853243052959442, + "step": 2696 + }, + { + "epoch": 0.7162395432213517, + "grad_norm": 1.1300838792843926, + "learning_rate": 1.5248831508307459e-05, + "loss": 0.2903040051460266, + "step": 2697 + }, + { + "epoch": 0.7165051122028947, + "grad_norm": 1.0779989148221412, + "learning_rate": 1.5245093695821954e-05, + "loss": 0.3375359773635864, + "step": 2698 + }, + { + "epoch": 0.7167706811844377, + "grad_norm": 0.9828776196369989, + "learning_rate": 1.5241354872144242e-05, + "loss": 0.27855974435806274, + "step": 2699 + }, + { + "epoch": 0.7170362501659806, + "grad_norm": 1.0672391327565405, + "learning_rate": 1.5237615037995129e-05, + "loss": 0.32226768136024475, + "step": 2700 + }, + { + "epoch": 0.7173018191475236, + "grad_norm": 1.1089458515112456, + "learning_rate": 1.5233874194095606e-05, + "loss": 0.32856303453445435, + "step": 2701 + }, + { + "epoch": 0.7175673881290665, + "grad_norm": 1.15556869357308, + "learning_rate": 1.5230132341166868e-05, + "loss": 0.31619006395339966, + "step": 2702 + }, + { + "epoch": 0.7178329571106095, + "grad_norm": 1.09474796019269, + "learning_rate": 1.5226389479930296e-05, + "loss": 0.29736411571502686, + "step": 2703 + }, + { + "epoch": 0.7180985260921524, + "grad_norm": 1.0969127487202406, + "learning_rate": 1.5222645611107477e-05, + "loss": 0.2767728865146637, + "step": 2704 + }, + { + "epoch": 0.7183640950736954, + "grad_norm": 1.054074095850648, + "learning_rate": 1.5218900735420174e-05, + "loss": 0.30994221568107605, + "step": 2705 + }, + { + "epoch": 0.7186296640552383, + "grad_norm": 1.0931807335310835, + "learning_rate": 1.5215154853590362e-05, + "loss": 0.3419484496116638, + "step": 2706 + }, + { + "epoch": 0.7188952330367813, + "grad_norm": 1.0503021732812985, + "learning_rate": 1.5211407966340203e-05, + "loss": 0.3063664436340332, + "step": 2707 + }, + { + "epoch": 0.7191608020183242, + "grad_norm": 1.0345938706194526, + "learning_rate": 1.520766007439205e-05, + "loss": 0.2856604754924774, + "step": 2708 + }, + { + "epoch": 0.7194263709998672, + "grad_norm": 0.9757823992785323, + "learning_rate": 1.5203911178468453e-05, + "loss": 0.23257851600646973, + "step": 2709 + }, + { + "epoch": 0.7196919399814101, + "grad_norm": 1.0292145399058534, + "learning_rate": 1.5200161279292154e-05, + "loss": 0.31451839208602905, + "step": 2710 + }, + { + "epoch": 0.7199575089629531, + "grad_norm": 1.1017577588578753, + "learning_rate": 1.5196410377586095e-05, + "loss": 0.30298277735710144, + "step": 2711 + }, + { + "epoch": 0.7202230779444961, + "grad_norm": 1.0759590578514124, + "learning_rate": 1.5192658474073398e-05, + "loss": 0.28654640913009644, + "step": 2712 + }, + { + "epoch": 0.7204886469260391, + "grad_norm": 1.1189221983197806, + "learning_rate": 1.5188905569477391e-05, + "loss": 0.3148455023765564, + "step": 2713 + }, + { + "epoch": 0.720754215907582, + "grad_norm": 1.079970608729249, + "learning_rate": 1.5185151664521585e-05, + "loss": 0.3004840612411499, + "step": 2714 + }, + { + "epoch": 0.721019784889125, + "grad_norm": 1.206470642332625, + "learning_rate": 1.518139675992969e-05, + "loss": 0.3378010392189026, + "step": 2715 + }, + { + "epoch": 0.721285353870668, + "grad_norm": 1.0802971688897103, + "learning_rate": 1.517764085642561e-05, + "loss": 0.3084215223789215, + "step": 2716 + }, + { + "epoch": 0.7215509228522109, + "grad_norm": 1.1196175790564493, + "learning_rate": 1.517388395473344e-05, + "loss": 0.3434324264526367, + "step": 2717 + }, + { + "epoch": 0.7218164918337538, + "grad_norm": 1.2084125695848371, + "learning_rate": 1.517012605557746e-05, + "loss": 0.2862265706062317, + "step": 2718 + }, + { + "epoch": 0.7220820608152968, + "grad_norm": 0.9574562560549519, + "learning_rate": 1.5166367159682156e-05, + "loss": 0.2760370671749115, + "step": 2719 + }, + { + "epoch": 0.7223476297968398, + "grad_norm": 1.0623260792686084, + "learning_rate": 1.5162607267772194e-05, + "loss": 0.26659202575683594, + "step": 2720 + }, + { + "epoch": 0.7226131987783827, + "grad_norm": 1.069380288412464, + "learning_rate": 1.5158846380572439e-05, + "loss": 0.31900978088378906, + "step": 2721 + }, + { + "epoch": 0.7228787677599257, + "grad_norm": 0.9775730121294547, + "learning_rate": 1.5155084498807941e-05, + "loss": 0.2983658015727997, + "step": 2722 + }, + { + "epoch": 0.7231443367414686, + "grad_norm": 1.0202126383266699, + "learning_rate": 1.5151321623203953e-05, + "loss": 0.3086162805557251, + "step": 2723 + }, + { + "epoch": 0.7234099057230116, + "grad_norm": 1.2685875339489936, + "learning_rate": 1.5147557754485908e-05, + "loss": 0.3233461380004883, + "step": 2724 + }, + { + "epoch": 0.7236754747045545, + "grad_norm": 1.1386667332230644, + "learning_rate": 1.5143792893379441e-05, + "loss": 0.2979195713996887, + "step": 2725 + }, + { + "epoch": 0.7239410436860975, + "grad_norm": 0.9598628443474388, + "learning_rate": 1.5140027040610367e-05, + "loss": 0.27854713797569275, + "step": 2726 + }, + { + "epoch": 0.7242066126676404, + "grad_norm": 1.0735596908703036, + "learning_rate": 1.5136260196904704e-05, + "loss": 0.293560266494751, + "step": 2727 + }, + { + "epoch": 0.7244721816491834, + "grad_norm": 1.1273149809893865, + "learning_rate": 1.513249236298865e-05, + "loss": 0.3033742308616638, + "step": 2728 + }, + { + "epoch": 0.7247377506307263, + "grad_norm": 1.1425183002588892, + "learning_rate": 1.51287235395886e-05, + "loss": 0.27958324551582336, + "step": 2729 + }, + { + "epoch": 0.7250033196122693, + "grad_norm": 1.022839475112705, + "learning_rate": 1.512495372743114e-05, + "loss": 0.3063122034072876, + "step": 2730 + }, + { + "epoch": 0.7252688885938122, + "grad_norm": 1.0524007495354166, + "learning_rate": 1.5121182927243043e-05, + "loss": 0.29126864671707153, + "step": 2731 + }, + { + "epoch": 0.7255344575753552, + "grad_norm": 1.0517432179455284, + "learning_rate": 1.5117411139751279e-05, + "loss": 0.27507084608078003, + "step": 2732 + }, + { + "epoch": 0.7258000265568981, + "grad_norm": 1.1167955582078537, + "learning_rate": 1.5113638365682996e-05, + "loss": 0.3432404398918152, + "step": 2733 + }, + { + "epoch": 0.7260655955384411, + "grad_norm": 1.0687371329401973, + "learning_rate": 1.5109864605765552e-05, + "loss": 0.27633196115493774, + "step": 2734 + }, + { + "epoch": 0.726331164519984, + "grad_norm": 1.0811244514830984, + "learning_rate": 1.5106089860726474e-05, + "loss": 0.274509072303772, + "step": 2735 + }, + { + "epoch": 0.726596733501527, + "grad_norm": 0.97012581020674, + "learning_rate": 1.5102314131293494e-05, + "loss": 0.26650723814964294, + "step": 2736 + }, + { + "epoch": 0.7268623024830699, + "grad_norm": 0.9681782432226156, + "learning_rate": 1.5098537418194524e-05, + "loss": 0.24476298689842224, + "step": 2737 + }, + { + "epoch": 0.7271278714646129, + "grad_norm": 1.1154772400244737, + "learning_rate": 1.5094759722157671e-05, + "loss": 0.3337150812149048, + "step": 2738 + }, + { + "epoch": 0.7273934404461558, + "grad_norm": 1.0187825093211873, + "learning_rate": 1.509098104391123e-05, + "loss": 0.3147660195827484, + "step": 2739 + }, + { + "epoch": 0.7276590094276989, + "grad_norm": 0.969229068573487, + "learning_rate": 1.5087201384183687e-05, + "loss": 0.2613281309604645, + "step": 2740 + }, + { + "epoch": 0.7279245784092419, + "grad_norm": 1.0641712204852296, + "learning_rate": 1.5083420743703717e-05, + "loss": 0.2773926854133606, + "step": 2741 + }, + { + "epoch": 0.7281901473907848, + "grad_norm": 1.0826759541494775, + "learning_rate": 1.5079639123200179e-05, + "loss": 0.30515575408935547, + "step": 2742 + }, + { + "epoch": 0.7284557163723278, + "grad_norm": 1.0619554532285063, + "learning_rate": 1.5075856523402128e-05, + "loss": 0.3174355626106262, + "step": 2743 + }, + { + "epoch": 0.7287212853538707, + "grad_norm": 0.9676487172589012, + "learning_rate": 1.5072072945038802e-05, + "loss": 0.25163760781288147, + "step": 2744 + }, + { + "epoch": 0.7289868543354137, + "grad_norm": 1.009992458232401, + "learning_rate": 1.5068288388839634e-05, + "loss": 0.28822118043899536, + "step": 2745 + }, + { + "epoch": 0.7292524233169566, + "grad_norm": 1.1623698216562623, + "learning_rate": 1.5064502855534237e-05, + "loss": 0.3129134476184845, + "step": 2746 + }, + { + "epoch": 0.7295179922984996, + "grad_norm": 1.0993962878508883, + "learning_rate": 1.5060716345852423e-05, + "loss": 0.332313597202301, + "step": 2747 + }, + { + "epoch": 0.7297835612800425, + "grad_norm": 1.1989932540466257, + "learning_rate": 1.5056928860524181e-05, + "loss": 0.3425176739692688, + "step": 2748 + }, + { + "epoch": 0.7300491302615855, + "grad_norm": 1.006044605592889, + "learning_rate": 1.5053140400279693e-05, + "loss": 0.2737991511821747, + "step": 2749 + }, + { + "epoch": 0.7303146992431284, + "grad_norm": 0.963162900300573, + "learning_rate": 1.5049350965849337e-05, + "loss": 0.27506589889526367, + "step": 2750 + }, + { + "epoch": 0.7305802682246714, + "grad_norm": 0.9901021314780329, + "learning_rate": 1.5045560557963663e-05, + "loss": 0.25581830739974976, + "step": 2751 + }, + { + "epoch": 0.7308458372062143, + "grad_norm": 1.0977147554610498, + "learning_rate": 1.5041769177353423e-05, + "loss": 0.31746333837509155, + "step": 2752 + }, + { + "epoch": 0.7311114061877573, + "grad_norm": 1.142455577048558, + "learning_rate": 1.5037976824749545e-05, + "loss": 0.3119337260723114, + "step": 2753 + }, + { + "epoch": 0.7313769751693002, + "grad_norm": 1.0824713857839723, + "learning_rate": 1.5034183500883153e-05, + "loss": 0.3330266773700714, + "step": 2754 + }, + { + "epoch": 0.7316425441508432, + "grad_norm": 1.1870819737785345, + "learning_rate": 1.5030389206485554e-05, + "loss": 0.2794867753982544, + "step": 2755 + }, + { + "epoch": 0.7319081131323861, + "grad_norm": 1.0826714009199063, + "learning_rate": 1.5026593942288248e-05, + "loss": 0.33273079991340637, + "step": 2756 + }, + { + "epoch": 0.7321736821139291, + "grad_norm": 1.1000195904608074, + "learning_rate": 1.502279770902291e-05, + "loss": 0.30673256516456604, + "step": 2757 + }, + { + "epoch": 0.732439251095472, + "grad_norm": 1.1311236734843304, + "learning_rate": 1.5019000507421412e-05, + "loss": 0.3126910924911499, + "step": 2758 + }, + { + "epoch": 0.732704820077015, + "grad_norm": 1.1665747930638253, + "learning_rate": 1.5015202338215811e-05, + "loss": 0.35423290729522705, + "step": 2759 + }, + { + "epoch": 0.7329703890585579, + "grad_norm": 1.0691634248957984, + "learning_rate": 1.5011403202138346e-05, + "loss": 0.31541377305984497, + "step": 2760 + }, + { + "epoch": 0.7332359580401009, + "grad_norm": 3.4446251175420257, + "learning_rate": 1.5007603099921451e-05, + "loss": 0.31460440158843994, + "step": 2761 + }, + { + "epoch": 0.7335015270216438, + "grad_norm": 1.0828016056563536, + "learning_rate": 1.5003802032297735e-05, + "loss": 0.2786293923854828, + "step": 2762 + }, + { + "epoch": 0.7337670960031868, + "grad_norm": 1.1025311021139896, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.27977997064590454, + "step": 2763 + }, + { + "epoch": 0.7340326649847297, + "grad_norm": 1.1136339551828278, + "learning_rate": 1.4996197003761237e-05, + "loss": 0.2933383584022522, + "step": 2764 + }, + { + "epoch": 0.7342982339662727, + "grad_norm": 1.0743056930311463, + "learning_rate": 1.4992393044314617e-05, + "loss": 0.30623573064804077, + "step": 2765 + }, + { + "epoch": 0.7345638029478156, + "grad_norm": 1.112681662128017, + "learning_rate": 1.4988588122393497e-05, + "loss": 0.28665077686309814, + "step": 2766 + }, + { + "epoch": 0.7348293719293586, + "grad_norm": 1.0268941907147413, + "learning_rate": 1.4984782238731422e-05, + "loss": 0.3245697021484375, + "step": 2767 + }, + { + "epoch": 0.7350949409109017, + "grad_norm": 1.118864717612721, + "learning_rate": 1.4980975394062122e-05, + "loss": 0.29477447271347046, + "step": 2768 + }, + { + "epoch": 0.7353605098924446, + "grad_norm": 1.009879072463833, + "learning_rate": 1.4977167589119508e-05, + "loss": 0.29174134135246277, + "step": 2769 + }, + { + "epoch": 0.7356260788739876, + "grad_norm": 1.010733766191454, + "learning_rate": 1.4973358824637687e-05, + "loss": 0.29473474621772766, + "step": 2770 + }, + { + "epoch": 0.7358916478555305, + "grad_norm": 1.3454647120520804, + "learning_rate": 1.4969549101350938e-05, + "loss": 0.3095156252384186, + "step": 2771 + }, + { + "epoch": 0.7361572168370735, + "grad_norm": 1.0578448721867733, + "learning_rate": 1.4965738419993733e-05, + "loss": 0.26295265555381775, + "step": 2772 + }, + { + "epoch": 0.7364227858186164, + "grad_norm": 1.0590497560307077, + "learning_rate": 1.4961926781300723e-05, + "loss": 0.2989509701728821, + "step": 2773 + }, + { + "epoch": 0.7366883548001594, + "grad_norm": 1.0783454816561941, + "learning_rate": 1.4958114186006756e-05, + "loss": 0.31087079644203186, + "step": 2774 + }, + { + "epoch": 0.7369539237817023, + "grad_norm": 1.0953647378016445, + "learning_rate": 1.4954300634846845e-05, + "loss": 0.3063197433948517, + "step": 2775 + }, + { + "epoch": 0.7372194927632453, + "grad_norm": 1.0858506486148067, + "learning_rate": 1.4950486128556208e-05, + "loss": 0.3149424195289612, + "step": 2776 + }, + { + "epoch": 0.7374850617447882, + "grad_norm": 1.0199984929310564, + "learning_rate": 1.4946670667870224e-05, + "loss": 0.2724878191947937, + "step": 2777 + }, + { + "epoch": 0.7377506307263312, + "grad_norm": 1.0033150283887489, + "learning_rate": 1.4942854253524479e-05, + "loss": 0.2556690275669098, + "step": 2778 + }, + { + "epoch": 0.7380161997078741, + "grad_norm": 1.0594159401263619, + "learning_rate": 1.4939036886254727e-05, + "loss": 0.2704542875289917, + "step": 2779 + }, + { + "epoch": 0.7382817686894171, + "grad_norm": 1.052456117640013, + "learning_rate": 1.4935218566796918e-05, + "loss": 0.26762163639068604, + "step": 2780 + }, + { + "epoch": 0.73854733767096, + "grad_norm": 1.1328164222449624, + "learning_rate": 1.4931399295887172e-05, + "loss": 0.3376831114292145, + "step": 2781 + }, + { + "epoch": 0.738812906652503, + "grad_norm": 1.0695003562166123, + "learning_rate": 1.4927579074261803e-05, + "loss": 0.2980082631111145, + "step": 2782 + }, + { + "epoch": 0.7390784756340459, + "grad_norm": 1.0340858480290613, + "learning_rate": 1.4923757902657306e-05, + "loss": 0.27693796157836914, + "step": 2783 + }, + { + "epoch": 0.7393440446155889, + "grad_norm": 1.0204290883803, + "learning_rate": 1.4919935781810353e-05, + "loss": 0.3109282851219177, + "step": 2784 + }, + { + "epoch": 0.7396096135971318, + "grad_norm": 1.12631585013599, + "learning_rate": 1.4916112712457807e-05, + "loss": 0.3123949468135834, + "step": 2785 + }, + { + "epoch": 0.7398751825786748, + "grad_norm": 1.143039341014623, + "learning_rate": 1.4912288695336709e-05, + "loss": 0.3232062757015228, + "step": 2786 + }, + { + "epoch": 0.7401407515602177, + "grad_norm": 1.0315778016896975, + "learning_rate": 1.4908463731184287e-05, + "loss": 0.2685563862323761, + "step": 2787 + }, + { + "epoch": 0.7404063205417607, + "grad_norm": 1.076569860938466, + "learning_rate": 1.4904637820737945e-05, + "loss": 0.25752881169319153, + "step": 2788 + }, + { + "epoch": 0.7406718895233037, + "grad_norm": 1.2236263687690485, + "learning_rate": 1.4900810964735279e-05, + "loss": 0.2887497544288635, + "step": 2789 + }, + { + "epoch": 0.7409374585048466, + "grad_norm": 1.126755867019387, + "learning_rate": 1.489698316391406e-05, + "loss": 0.28804779052734375, + "step": 2790 + }, + { + "epoch": 0.7412030274863896, + "grad_norm": 1.0931262335064922, + "learning_rate": 1.489315441901224e-05, + "loss": 0.2684408724308014, + "step": 2791 + }, + { + "epoch": 0.7414685964679325, + "grad_norm": 1.0509233991385625, + "learning_rate": 1.4889324730767959e-05, + "loss": 0.31945526599884033, + "step": 2792 + }, + { + "epoch": 0.7417341654494755, + "grad_norm": 1.3391113530092205, + "learning_rate": 1.488549409991953e-05, + "loss": 0.34446024894714355, + "step": 2793 + }, + { + "epoch": 0.7419997344310184, + "grad_norm": 1.094751814978447, + "learning_rate": 1.488166252720546e-05, + "loss": 0.28849151730537415, + "step": 2794 + }, + { + "epoch": 0.7422653034125614, + "grad_norm": 1.0431424597135226, + "learning_rate": 1.4877830013364429e-05, + "loss": 0.2793633043766022, + "step": 2795 + }, + { + "epoch": 0.7425308723941043, + "grad_norm": 1.1811188011136542, + "learning_rate": 1.4873996559135298e-05, + "loss": 0.3211687505245209, + "step": 2796 + }, + { + "epoch": 0.7427964413756474, + "grad_norm": 1.004634818722801, + "learning_rate": 1.4870162165257114e-05, + "loss": 0.26225876808166504, + "step": 2797 + }, + { + "epoch": 0.7430620103571903, + "grad_norm": 1.7885293848946355, + "learning_rate": 1.4866326832469105e-05, + "loss": 0.3100029528141022, + "step": 2798 + }, + { + "epoch": 0.7433275793387333, + "grad_norm": 1.0428487423040855, + "learning_rate": 1.4862490561510675e-05, + "loss": 0.29399827122688293, + "step": 2799 + }, + { + "epoch": 0.7435931483202762, + "grad_norm": 0.9886298200418341, + "learning_rate": 1.4858653353121412e-05, + "loss": 0.27357399463653564, + "step": 2800 + }, + { + "epoch": 0.7438587173018192, + "grad_norm": 1.1101962385134683, + "learning_rate": 1.4854815208041087e-05, + "loss": 0.34575730562210083, + "step": 2801 + }, + { + "epoch": 0.7441242862833621, + "grad_norm": 1.0351474931606812, + "learning_rate": 1.4850976127009644e-05, + "loss": 0.28487247228622437, + "step": 2802 + }, + { + "epoch": 0.7443898552649051, + "grad_norm": 1.0283492066128257, + "learning_rate": 1.484713611076722e-05, + "loss": 0.264443576335907, + "step": 2803 + }, + { + "epoch": 0.744655424246448, + "grad_norm": 1.085429543255666, + "learning_rate": 1.4843295160054116e-05, + "loss": 0.32750973105430603, + "step": 2804 + }, + { + "epoch": 0.744920993227991, + "grad_norm": 1.0136013055294886, + "learning_rate": 1.4839453275610827e-05, + "loss": 0.24080191552639008, + "step": 2805 + }, + { + "epoch": 0.7451865622095339, + "grad_norm": 1.1486643921382949, + "learning_rate": 1.4835610458178025e-05, + "loss": 0.31667011976242065, + "step": 2806 + }, + { + "epoch": 0.7454521311910769, + "grad_norm": 1.0103490185384167, + "learning_rate": 1.4831766708496553e-05, + "loss": 0.2754175066947937, + "step": 2807 + }, + { + "epoch": 0.7457177001726198, + "grad_norm": 1.0607394107689443, + "learning_rate": 1.482792202730745e-05, + "loss": 0.2890132963657379, + "step": 2808 + }, + { + "epoch": 0.7459832691541628, + "grad_norm": 1.049970305589495, + "learning_rate": 1.4824076415351918e-05, + "loss": 0.3402877748012543, + "step": 2809 + }, + { + "epoch": 0.7462488381357057, + "grad_norm": 1.0879104018503691, + "learning_rate": 1.4820229873371347e-05, + "loss": 0.3167210519313812, + "step": 2810 + }, + { + "epoch": 0.7465144071172487, + "grad_norm": 0.9983910427341833, + "learning_rate": 1.4816382402107308e-05, + "loss": 0.2653643786907196, + "step": 2811 + }, + { + "epoch": 0.7467799760987917, + "grad_norm": 1.2191167585139304, + "learning_rate": 1.4812534002301547e-05, + "loss": 0.3202674984931946, + "step": 2812 + }, + { + "epoch": 0.7470455450803346, + "grad_norm": 1.0461975743299208, + "learning_rate": 1.4808684674695985e-05, + "loss": 0.2942724823951721, + "step": 2813 + }, + { + "epoch": 0.7473111140618776, + "grad_norm": 1.0581736193326858, + "learning_rate": 1.480483442003273e-05, + "loss": 0.28640663623809814, + "step": 2814 + }, + { + "epoch": 0.7475766830434205, + "grad_norm": 0.9932743335315769, + "learning_rate": 1.4800983239054071e-05, + "loss": 0.26214420795440674, + "step": 2815 + }, + { + "epoch": 0.7478422520249635, + "grad_norm": 1.0324489729554576, + "learning_rate": 1.4797131132502464e-05, + "loss": 0.3288992643356323, + "step": 2816 + }, + { + "epoch": 0.7481078210065064, + "grad_norm": 0.9775792939666473, + "learning_rate": 1.4793278101120551e-05, + "loss": 0.2622208297252655, + "step": 2817 + }, + { + "epoch": 0.7483733899880494, + "grad_norm": 1.0856486279870832, + "learning_rate": 1.4789424145651152e-05, + "loss": 0.3223533034324646, + "step": 2818 + }, + { + "epoch": 0.7486389589695923, + "grad_norm": 0.9640735701611682, + "learning_rate": 1.4785569266837264e-05, + "loss": 0.25849875807762146, + "step": 2819 + }, + { + "epoch": 0.7489045279511353, + "grad_norm": 1.20204465384733, + "learning_rate": 1.478171346542206e-05, + "loss": 0.3477833569049835, + "step": 2820 + }, + { + "epoch": 0.7491700969326782, + "grad_norm": 1.0577809669167442, + "learning_rate": 1.4777856742148897e-05, + "loss": 0.2799205780029297, + "step": 2821 + }, + { + "epoch": 0.7494356659142212, + "grad_norm": 1.624939710599736, + "learning_rate": 1.4773999097761304e-05, + "loss": 0.2591988444328308, + "step": 2822 + }, + { + "epoch": 0.7497012348957641, + "grad_norm": 1.2869478314125868, + "learning_rate": 1.477014053300299e-05, + "loss": 0.30161747336387634, + "step": 2823 + }, + { + "epoch": 0.7499668038773071, + "grad_norm": 1.0738509532979332, + "learning_rate": 1.4766281048617837e-05, + "loss": 0.28202176094055176, + "step": 2824 + }, + { + "epoch": 0.7502323728588501, + "grad_norm": 1.0042946509670743, + "learning_rate": 1.4762420645349912e-05, + "loss": 0.26074907183647156, + "step": 2825 + }, + { + "epoch": 0.7504979418403931, + "grad_norm": 1.1385436298617553, + "learning_rate": 1.4758559323943455e-05, + "loss": 0.2822819948196411, + "step": 2826 + }, + { + "epoch": 0.750763510821936, + "grad_norm": 1.1069166183989807, + "learning_rate": 1.4754697085142879e-05, + "loss": 0.2704991102218628, + "step": 2827 + }, + { + "epoch": 0.751029079803479, + "grad_norm": 1.1005590878466516, + "learning_rate": 1.4750833929692785e-05, + "loss": 0.2627401053905487, + "step": 2828 + }, + { + "epoch": 0.751294648785022, + "grad_norm": 1.0886740028659867, + "learning_rate": 1.474696985833794e-05, + "loss": 0.2898240089416504, + "step": 2829 + }, + { + "epoch": 0.7515602177665649, + "grad_norm": 1.0291450176805186, + "learning_rate": 1.4743104871823291e-05, + "loss": 0.30080029368400574, + "step": 2830 + }, + { + "epoch": 0.7518257867481078, + "grad_norm": 1.0953597523125502, + "learning_rate": 1.473923897089396e-05, + "loss": 0.2950359284877777, + "step": 2831 + }, + { + "epoch": 0.7520913557296508, + "grad_norm": 1.1129882579718784, + "learning_rate": 1.4735372156295253e-05, + "loss": 0.31936827301979065, + "step": 2832 + }, + { + "epoch": 0.7523569247111938, + "grad_norm": 1.1117484749822675, + "learning_rate": 1.4731504428772642e-05, + "loss": 0.2771468460559845, + "step": 2833 + }, + { + "epoch": 0.7526224936927367, + "grad_norm": 1.1332551367729735, + "learning_rate": 1.4727635789071779e-05, + "loss": 0.3135997951030731, + "step": 2834 + }, + { + "epoch": 0.7528880626742797, + "grad_norm": 1.1215560189558773, + "learning_rate": 1.4723766237938495e-05, + "loss": 0.29874372482299805, + "step": 2835 + }, + { + "epoch": 0.7531536316558226, + "grad_norm": 1.0292177835845961, + "learning_rate": 1.4719895776118789e-05, + "loss": 0.249681293964386, + "step": 2836 + }, + { + "epoch": 0.7534192006373656, + "grad_norm": 1.0567186687732057, + "learning_rate": 1.4716024404358847e-05, + "loss": 0.28544771671295166, + "step": 2837 + }, + { + "epoch": 0.7536847696189085, + "grad_norm": 1.1290911495331684, + "learning_rate": 1.4712152123405018e-05, + "loss": 0.32532355189323425, + "step": 2838 + }, + { + "epoch": 0.7539503386004515, + "grad_norm": 1.1212187873017119, + "learning_rate": 1.4708278934003835e-05, + "loss": 0.31663140654563904, + "step": 2839 + }, + { + "epoch": 0.7542159075819944, + "grad_norm": 1.123142254862964, + "learning_rate": 1.4704404836902005e-05, + "loss": 0.30552318692207336, + "step": 2840 + }, + { + "epoch": 0.7544814765635374, + "grad_norm": 1.1574657252500693, + "learning_rate": 1.47005298328464e-05, + "loss": 0.3019601106643677, + "step": 2841 + }, + { + "epoch": 0.7547470455450803, + "grad_norm": 1.0814580547673966, + "learning_rate": 1.4696653922584084e-05, + "loss": 0.321606308221817, + "step": 2842 + }, + { + "epoch": 0.7550126145266233, + "grad_norm": 1.138590953455986, + "learning_rate": 1.4692777106862281e-05, + "loss": 0.2709462642669678, + "step": 2843 + }, + { + "epoch": 0.7552781835081662, + "grad_norm": 1.1366302949330385, + "learning_rate": 1.46888993864284e-05, + "loss": 0.2882609963417053, + "step": 2844 + }, + { + "epoch": 0.7555437524897092, + "grad_norm": 0.9948609987035232, + "learning_rate": 1.4685020762030019e-05, + "loss": 0.25843000411987305, + "step": 2845 + }, + { + "epoch": 0.7558093214712521, + "grad_norm": 1.1002004205654323, + "learning_rate": 1.4681141234414889e-05, + "loss": 0.30962038040161133, + "step": 2846 + }, + { + "epoch": 0.7560748904527951, + "grad_norm": 1.2025960097123465, + "learning_rate": 1.4677260804330938e-05, + "loss": 0.304874062538147, + "step": 2847 + }, + { + "epoch": 0.756340459434338, + "grad_norm": 1.2287867091921092, + "learning_rate": 1.4673379472526268e-05, + "loss": 0.3425619602203369, + "step": 2848 + }, + { + "epoch": 0.756606028415881, + "grad_norm": 1.0701256182117689, + "learning_rate": 1.4669497239749153e-05, + "loss": 0.3002302050590515, + "step": 2849 + }, + { + "epoch": 0.7568715973974239, + "grad_norm": 1.1005370830207322, + "learning_rate": 1.4665614106748038e-05, + "loss": 0.31008803844451904, + "step": 2850 + }, + { + "epoch": 0.7571371663789669, + "grad_norm": 1.0175712407141912, + "learning_rate": 1.4661730074271551e-05, + "loss": 0.27829408645629883, + "step": 2851 + }, + { + "epoch": 0.7574027353605098, + "grad_norm": 1.0501959661073665, + "learning_rate": 1.4657845143068488e-05, + "loss": 0.25915467739105225, + "step": 2852 + }, + { + "epoch": 0.7576683043420529, + "grad_norm": 1.0719536636155031, + "learning_rate": 1.4653959313887813e-05, + "loss": 0.2843416929244995, + "step": 2853 + }, + { + "epoch": 0.7579338733235959, + "grad_norm": 1.0489373710223147, + "learning_rate": 1.465007258747867e-05, + "loss": 0.2851647138595581, + "step": 2854 + }, + { + "epoch": 0.7581994423051388, + "grad_norm": 1.085754694338766, + "learning_rate": 1.4646184964590378e-05, + "loss": 0.266017884016037, + "step": 2855 + }, + { + "epoch": 0.7584650112866818, + "grad_norm": 1.0789098348141843, + "learning_rate": 1.4642296445972421e-05, + "loss": 0.30142179131507874, + "step": 2856 + }, + { + "epoch": 0.7587305802682247, + "grad_norm": 0.9904299934324251, + "learning_rate": 1.463840703237446e-05, + "loss": 0.2878327965736389, + "step": 2857 + }, + { + "epoch": 0.7589961492497677, + "grad_norm": 1.114310168260114, + "learning_rate": 1.4634516724546326e-05, + "loss": 0.2919169068336487, + "step": 2858 + }, + { + "epoch": 0.7592617182313106, + "grad_norm": 0.9954308342175644, + "learning_rate": 1.4630625523238027e-05, + "loss": 0.2530924081802368, + "step": 2859 + }, + { + "epoch": 0.7595272872128536, + "grad_norm": 1.0858688189416337, + "learning_rate": 1.462673342919974e-05, + "loss": 0.3009106516838074, + "step": 2860 + }, + { + "epoch": 0.7597928561943965, + "grad_norm": 1.1572533440881312, + "learning_rate": 1.4622840443181817e-05, + "loss": 0.3114222288131714, + "step": 2861 + }, + { + "epoch": 0.7600584251759395, + "grad_norm": 1.2224434370177688, + "learning_rate": 1.4618946565934775e-05, + "loss": 0.344540536403656, + "step": 2862 + }, + { + "epoch": 0.7603239941574824, + "grad_norm": 1.0685722656113568, + "learning_rate": 1.4615051798209312e-05, + "loss": 0.263607919216156, + "step": 2863 + }, + { + "epoch": 0.7605895631390254, + "grad_norm": 1.018611353798299, + "learning_rate": 1.4611156140756293e-05, + "loss": 0.2685706317424774, + "step": 2864 + }, + { + "epoch": 0.7608551321205683, + "grad_norm": 1.1431197890714058, + "learning_rate": 1.4607259594326752e-05, + "loss": 0.32342326641082764, + "step": 2865 + }, + { + "epoch": 0.7611207011021113, + "grad_norm": 1.182050624874759, + "learning_rate": 1.4603362159671902e-05, + "loss": 0.3088849186897278, + "step": 2866 + }, + { + "epoch": 0.7613862700836542, + "grad_norm": 1.0482348167122462, + "learning_rate": 1.4599463837543114e-05, + "loss": 0.26718589663505554, + "step": 2867 + }, + { + "epoch": 0.7616518390651972, + "grad_norm": 1.0051992534296357, + "learning_rate": 1.4595564628691944e-05, + "loss": 0.29511263966560364, + "step": 2868 + }, + { + "epoch": 0.7619174080467401, + "grad_norm": 1.0974088254649037, + "learning_rate": 1.4591664533870118e-05, + "loss": 0.2940484285354614, + "step": 2869 + }, + { + "epoch": 0.7621829770282831, + "grad_norm": 1.1564456059915547, + "learning_rate": 1.4587763553829521e-05, + "loss": 0.28167295455932617, + "step": 2870 + }, + { + "epoch": 0.762448546009826, + "grad_norm": 1.0590804851451585, + "learning_rate": 1.4583861689322219e-05, + "loss": 0.3362962007522583, + "step": 2871 + }, + { + "epoch": 0.762714114991369, + "grad_norm": 1.1206777555300773, + "learning_rate": 1.4579958941100445e-05, + "loss": 0.3003339171409607, + "step": 2872 + }, + { + "epoch": 0.7629796839729119, + "grad_norm": 1.0572512051509857, + "learning_rate": 1.4576055309916602e-05, + "loss": 0.3191443979740143, + "step": 2873 + }, + { + "epoch": 0.7632452529544549, + "grad_norm": 1.0684782615871369, + "learning_rate": 1.4572150796523265e-05, + "loss": 0.30804574489593506, + "step": 2874 + }, + { + "epoch": 0.7635108219359978, + "grad_norm": 1.0214046475154577, + "learning_rate": 1.4568245401673178e-05, + "loss": 0.32462549209594727, + "step": 2875 + }, + { + "epoch": 0.7637763909175408, + "grad_norm": 1.1357318078490404, + "learning_rate": 1.4564339126119254e-05, + "loss": 0.27751386165618896, + "step": 2876 + }, + { + "epoch": 0.7640419598990837, + "grad_norm": 1.0701221152994065, + "learning_rate": 1.4560431970614578e-05, + "loss": 0.27194011211395264, + "step": 2877 + }, + { + "epoch": 0.7643075288806267, + "grad_norm": 1.134082938487784, + "learning_rate": 1.4556523935912406e-05, + "loss": 0.28701072931289673, + "step": 2878 + }, + { + "epoch": 0.7645730978621696, + "grad_norm": 1.0814539768930527, + "learning_rate": 1.4552615022766156e-05, + "loss": 0.3278783857822418, + "step": 2879 + }, + { + "epoch": 0.7648386668437126, + "grad_norm": 1.096499511679905, + "learning_rate": 1.4548705231929426e-05, + "loss": 0.3292006254196167, + "step": 2880 + }, + { + "epoch": 0.7651042358252557, + "grad_norm": 1.30563906707581, + "learning_rate": 1.4544794564155971e-05, + "loss": 0.33038759231567383, + "step": 2881 + }, + { + "epoch": 0.7653698048067986, + "grad_norm": 1.0799053745016685, + "learning_rate": 1.4540883020199725e-05, + "loss": 0.29183000326156616, + "step": 2882 + }, + { + "epoch": 0.7656353737883416, + "grad_norm": 1.049945067498866, + "learning_rate": 1.4536970600814789e-05, + "loss": 0.28066399693489075, + "step": 2883 + }, + { + "epoch": 0.7659009427698845, + "grad_norm": 1.0673215015420034, + "learning_rate": 1.4533057306755427e-05, + "loss": 0.2832046151161194, + "step": 2884 + }, + { + "epoch": 0.7661665117514275, + "grad_norm": 1.0799218487874103, + "learning_rate": 1.4529143138776078e-05, + "loss": 0.3006540834903717, + "step": 2885 + }, + { + "epoch": 0.7664320807329704, + "grad_norm": 0.965945374746046, + "learning_rate": 1.4525228097631351e-05, + "loss": 0.2793240547180176, + "step": 2886 + }, + { + "epoch": 0.7666976497145134, + "grad_norm": 1.0791298696355873, + "learning_rate": 1.452131218407602e-05, + "loss": 0.2895192503929138, + "step": 2887 + }, + { + "epoch": 0.7669632186960563, + "grad_norm": 1.1085071656285739, + "learning_rate": 1.4517395398865022e-05, + "loss": 0.27707618474960327, + "step": 2888 + }, + { + "epoch": 0.7672287876775993, + "grad_norm": 0.9801959170871006, + "learning_rate": 1.4513477742753465e-05, + "loss": 0.29167065024375916, + "step": 2889 + }, + { + "epoch": 0.7674943566591422, + "grad_norm": 0.9760628575291594, + "learning_rate": 1.4509559216496631e-05, + "loss": 0.2670987844467163, + "step": 2890 + }, + { + "epoch": 0.7677599256406852, + "grad_norm": 1.0541213606202946, + "learning_rate": 1.4505639820849968e-05, + "loss": 0.3025206923484802, + "step": 2891 + }, + { + "epoch": 0.7680254946222281, + "grad_norm": 1.0721054101606857, + "learning_rate": 1.4501719556569087e-05, + "loss": 0.3104705512523651, + "step": 2892 + }, + { + "epoch": 0.7682910636037711, + "grad_norm": 1.1715745485021363, + "learning_rate": 1.4497798424409766e-05, + "loss": 0.2972267270088196, + "step": 2893 + }, + { + "epoch": 0.768556632585314, + "grad_norm": 1.3084992927105763, + "learning_rate": 1.4493876425127957e-05, + "loss": 0.34956347942352295, + "step": 2894 + }, + { + "epoch": 0.768822201566857, + "grad_norm": 1.0910589486872886, + "learning_rate": 1.4489953559479775e-05, + "loss": 0.3122873902320862, + "step": 2895 + }, + { + "epoch": 0.7690877705483999, + "grad_norm": 1.0070263080445798, + "learning_rate": 1.4486029828221497e-05, + "loss": 0.29645755887031555, + "step": 2896 + }, + { + "epoch": 0.7693533395299429, + "grad_norm": 1.1312479199974272, + "learning_rate": 1.448210523210958e-05, + "loss": 0.33357223868370056, + "step": 2897 + }, + { + "epoch": 0.7696189085114858, + "grad_norm": 1.0807209302083978, + "learning_rate": 1.4478179771900634e-05, + "loss": 0.2780191898345947, + "step": 2898 + }, + { + "epoch": 0.7698844774930288, + "grad_norm": 1.098992372480737, + "learning_rate": 1.447425344835144e-05, + "loss": 0.31503236293792725, + "step": 2899 + }, + { + "epoch": 0.7701500464745717, + "grad_norm": 1.0152023365250116, + "learning_rate": 1.4470326262218955e-05, + "loss": 0.2843332290649414, + "step": 2900 + }, + { + "epoch": 0.7704156154561147, + "grad_norm": 1.1041753681410225, + "learning_rate": 1.4466398214260286e-05, + "loss": 0.305475652217865, + "step": 2901 + }, + { + "epoch": 0.7706811844376577, + "grad_norm": 1.0159008972115877, + "learning_rate": 1.446246930523272e-05, + "loss": 0.28418007493019104, + "step": 2902 + }, + { + "epoch": 0.7709467534192006, + "grad_norm": 2.0289726917266027, + "learning_rate": 1.44585395358937e-05, + "loss": 0.28237032890319824, + "step": 2903 + }, + { + "epoch": 0.7712123224007436, + "grad_norm": 1.1334683720848762, + "learning_rate": 1.4454608907000843e-05, + "loss": 0.33727777004241943, + "step": 2904 + }, + { + "epoch": 0.7714778913822865, + "grad_norm": 1.1393257541232447, + "learning_rate": 1.4450677419311925e-05, + "loss": 0.2977198660373688, + "step": 2905 + }, + { + "epoch": 0.7717434603638295, + "grad_norm": 1.0793508547506123, + "learning_rate": 1.4446745073584891e-05, + "loss": 0.3095981776714325, + "step": 2906 + }, + { + "epoch": 0.7720090293453724, + "grad_norm": 1.138471500425881, + "learning_rate": 1.4442811870577851e-05, + "loss": 0.29808440804481506, + "step": 2907 + }, + { + "epoch": 0.7722745983269154, + "grad_norm": 1.2668271633221484, + "learning_rate": 1.4438877811049079e-05, + "loss": 0.32444530725479126, + "step": 2908 + }, + { + "epoch": 0.7725401673084584, + "grad_norm": 1.0229226464155372, + "learning_rate": 1.443494289575702e-05, + "loss": 0.24782602488994598, + "step": 2909 + }, + { + "epoch": 0.7728057362900014, + "grad_norm": 1.079755307057506, + "learning_rate": 1.4431007125460274e-05, + "loss": 0.31289762258529663, + "step": 2910 + }, + { + "epoch": 0.7730713052715443, + "grad_norm": 1.0928540626872372, + "learning_rate": 1.4427070500917615e-05, + "loss": 0.31444042921066284, + "step": 2911 + }, + { + "epoch": 0.7733368742530873, + "grad_norm": 1.1235251868548595, + "learning_rate": 1.4423133022887973e-05, + "loss": 0.31347882747650146, + "step": 2912 + }, + { + "epoch": 0.7736024432346302, + "grad_norm": 1.1449169077961199, + "learning_rate": 1.4419194692130453e-05, + "loss": 0.3025411367416382, + "step": 2913 + }, + { + "epoch": 0.7738680122161732, + "grad_norm": 0.9734590933720824, + "learning_rate": 1.4415255509404316e-05, + "loss": 0.2954581081867218, + "step": 2914 + }, + { + "epoch": 0.7741335811977161, + "grad_norm": 1.051295802747811, + "learning_rate": 1.4411315475468988e-05, + "loss": 0.2675531506538391, + "step": 2915 + }, + { + "epoch": 0.7743991501792591, + "grad_norm": 1.0207923958770302, + "learning_rate": 1.4407374591084064e-05, + "loss": 0.29307854175567627, + "step": 2916 + }, + { + "epoch": 0.774664719160802, + "grad_norm": 0.9134258889524259, + "learning_rate": 1.4403432857009295e-05, + "loss": 0.2805953025817871, + "step": 2917 + }, + { + "epoch": 0.774930288142345, + "grad_norm": 1.1114518211112974, + "learning_rate": 1.439949027400461e-05, + "loss": 0.30805838108062744, + "step": 2918 + }, + { + "epoch": 0.7751958571238879, + "grad_norm": 1.063187320260136, + "learning_rate": 1.4395546842830085e-05, + "loss": 0.31501835584640503, + "step": 2919 + }, + { + "epoch": 0.7754614261054309, + "grad_norm": 1.025310766436644, + "learning_rate": 1.4391602564245975e-05, + "loss": 0.2719186246395111, + "step": 2920 + }, + { + "epoch": 0.7757269950869738, + "grad_norm": 1.0474571998069828, + "learning_rate": 1.4387657439012677e-05, + "loss": 0.29554325342178345, + "step": 2921 + }, + { + "epoch": 0.7759925640685168, + "grad_norm": 1.0103166752174864, + "learning_rate": 1.4383711467890776e-05, + "loss": 0.2993816137313843, + "step": 2922 + }, + { + "epoch": 0.7762581330500598, + "grad_norm": 1.087143911717871, + "learning_rate": 1.4379764651641004e-05, + "loss": 0.3412264883518219, + "step": 2923 + }, + { + "epoch": 0.7765237020316027, + "grad_norm": 1.3163055539647115, + "learning_rate": 1.4375816991024263e-05, + "loss": 0.3137913942337036, + "step": 2924 + }, + { + "epoch": 0.7767892710131457, + "grad_norm": 1.0026858390591848, + "learning_rate": 1.4371868486801611e-05, + "loss": 0.2710151672363281, + "step": 2925 + }, + { + "epoch": 0.7770548399946886, + "grad_norm": 1.060508746597415, + "learning_rate": 1.4367919139734279e-05, + "loss": 0.28521692752838135, + "step": 2926 + }, + { + "epoch": 0.7773204089762316, + "grad_norm": 0.9938687291505847, + "learning_rate": 1.4363968950583651e-05, + "loss": 0.2889919579029083, + "step": 2927 + }, + { + "epoch": 0.7775859779577745, + "grad_norm": 1.0641534591195945, + "learning_rate": 1.436001792011128e-05, + "loss": 0.31562381982803345, + "step": 2928 + }, + { + "epoch": 0.7778515469393175, + "grad_norm": 0.980719397790632, + "learning_rate": 1.4356066049078871e-05, + "loss": 0.2747528553009033, + "step": 2929 + }, + { + "epoch": 0.7781171159208604, + "grad_norm": 1.0890864939874727, + "learning_rate": 1.4352113338248303e-05, + "loss": 0.2918938398361206, + "step": 2930 + }, + { + "epoch": 0.7783826849024034, + "grad_norm": 1.1375978489291394, + "learning_rate": 1.4348159788381615e-05, + "loss": 0.3348507285118103, + "step": 2931 + }, + { + "epoch": 0.7786482538839463, + "grad_norm": 1.049930284325584, + "learning_rate": 1.4344205400241e-05, + "loss": 0.27206242084503174, + "step": 2932 + }, + { + "epoch": 0.7789138228654893, + "grad_norm": 1.0635705360778813, + "learning_rate": 1.434025017458882e-05, + "loss": 0.28496092557907104, + "step": 2933 + }, + { + "epoch": 0.7791793918470322, + "grad_norm": 1.1207237235097192, + "learning_rate": 1.4336294112187595e-05, + "loss": 0.3080131411552429, + "step": 2934 + }, + { + "epoch": 0.7794449608285752, + "grad_norm": 1.1562549835000784, + "learning_rate": 1.4332337213800008e-05, + "loss": 0.3116779029369354, + "step": 2935 + }, + { + "epoch": 0.7797105298101181, + "grad_norm": 1.0230593279992428, + "learning_rate": 1.43283794801889e-05, + "loss": 0.26526543498039246, + "step": 2936 + }, + { + "epoch": 0.7799760987916612, + "grad_norm": 1.0768548459396885, + "learning_rate": 1.4324420912117274e-05, + "loss": 0.2829325497150421, + "step": 2937 + }, + { + "epoch": 0.7802416677732041, + "grad_norm": 1.197165846783245, + "learning_rate": 1.43204615103483e-05, + "loss": 0.34146445989608765, + "step": 2938 + }, + { + "epoch": 0.7805072367547471, + "grad_norm": 1.1418950254878286, + "learning_rate": 1.43165012756453e-05, + "loss": 0.316609650850296, + "step": 2939 + }, + { + "epoch": 0.78077280573629, + "grad_norm": 1.119861281862994, + "learning_rate": 1.4312540208771766e-05, + "loss": 0.3215107321739197, + "step": 2940 + }, + { + "epoch": 0.781038374717833, + "grad_norm": 1.0591732101512668, + "learning_rate": 1.4308578310491342e-05, + "loss": 0.2834000587463379, + "step": 2941 + }, + { + "epoch": 0.781303943699376, + "grad_norm": 1.1186376453102755, + "learning_rate": 1.430461558156783e-05, + "loss": 0.30184993147850037, + "step": 2942 + }, + { + "epoch": 0.7815695126809189, + "grad_norm": 1.1319557052801907, + "learning_rate": 1.4300652022765207e-05, + "loss": 0.3299996256828308, + "step": 2943 + }, + { + "epoch": 0.7818350816624619, + "grad_norm": 1.1269288601015153, + "learning_rate": 1.4296687634847592e-05, + "loss": 0.27565228939056396, + "step": 2944 + }, + { + "epoch": 0.7821006506440048, + "grad_norm": 1.1019395409868211, + "learning_rate": 1.4292722418579278e-05, + "loss": 0.30347493290901184, + "step": 2945 + }, + { + "epoch": 0.7823662196255478, + "grad_norm": 1.125677517693181, + "learning_rate": 1.4288756374724709e-05, + "loss": 0.31469428539276123, + "step": 2946 + }, + { + "epoch": 0.7826317886070907, + "grad_norm": 1.0500101449680372, + "learning_rate": 1.4284789504048493e-05, + "loss": 0.27361029386520386, + "step": 2947 + }, + { + "epoch": 0.7828973575886337, + "grad_norm": 1.057442611584268, + "learning_rate": 1.428082180731539e-05, + "loss": 0.29180705547332764, + "step": 2948 + }, + { + "epoch": 0.7831629265701766, + "grad_norm": 1.0218659697209738, + "learning_rate": 1.4276853285290334e-05, + "loss": 0.281120628118515, + "step": 2949 + }, + { + "epoch": 0.7834284955517196, + "grad_norm": 1.0029783457826962, + "learning_rate": 1.4272883938738406e-05, + "loss": 0.26144471764564514, + "step": 2950 + }, + { + "epoch": 0.7836940645332625, + "grad_norm": 1.0904458839940374, + "learning_rate": 1.4268913768424848e-05, + "loss": 0.3118991255760193, + "step": 2951 + }, + { + "epoch": 0.7839596335148055, + "grad_norm": 1.0581869365443632, + "learning_rate": 1.4264942775115065e-05, + "loss": 0.29352328181266785, + "step": 2952 + }, + { + "epoch": 0.7842252024963484, + "grad_norm": 1.025234952757571, + "learning_rate": 1.426097095957461e-05, + "loss": 0.2687748968601227, + "step": 2953 + }, + { + "epoch": 0.7844907714778914, + "grad_norm": 1.0817782920006436, + "learning_rate": 1.4256998322569212e-05, + "loss": 0.3106890916824341, + "step": 2954 + }, + { + "epoch": 0.7847563404594343, + "grad_norm": 1.0039841255701216, + "learning_rate": 1.4253024864864742e-05, + "loss": 0.2522161304950714, + "step": 2955 + }, + { + "epoch": 0.7850219094409773, + "grad_norm": 1.031799618380073, + "learning_rate": 1.424905058722724e-05, + "loss": 0.2994377613067627, + "step": 2956 + }, + { + "epoch": 0.7852874784225202, + "grad_norm": 1.295564211303899, + "learning_rate": 1.4245075490422893e-05, + "loss": 0.3753565549850464, + "step": 2957 + }, + { + "epoch": 0.7855530474040632, + "grad_norm": 1.2386689798654595, + "learning_rate": 1.424109957521806e-05, + "loss": 0.29544737935066223, + "step": 2958 + }, + { + "epoch": 0.7858186163856061, + "grad_norm": 1.0381164701705432, + "learning_rate": 1.423712284237925e-05, + "loss": 0.307847797870636, + "step": 2959 + }, + { + "epoch": 0.7860841853671491, + "grad_norm": 1.1107576873332587, + "learning_rate": 1.4233145292673127e-05, + "loss": 0.31758183240890503, + "step": 2960 + }, + { + "epoch": 0.786349754348692, + "grad_norm": 1.0358601319268448, + "learning_rate": 1.4229166926866517e-05, + "loss": 0.307254433631897, + "step": 2961 + }, + { + "epoch": 0.786615323330235, + "grad_norm": 1.2228062733167704, + "learning_rate": 1.42251877457264e-05, + "loss": 0.3513748049736023, + "step": 2962 + }, + { + "epoch": 0.7868808923117779, + "grad_norm": 1.1359729522705007, + "learning_rate": 1.422120775001992e-05, + "loss": 0.3025718629360199, + "step": 2963 + }, + { + "epoch": 0.7871464612933209, + "grad_norm": 1.076503168390535, + "learning_rate": 1.4217226940514367e-05, + "loss": 0.2922811508178711, + "step": 2964 + }, + { + "epoch": 0.787412030274864, + "grad_norm": 1.07297262661661, + "learning_rate": 1.42132453179772e-05, + "loss": 0.29599297046661377, + "step": 2965 + }, + { + "epoch": 0.7876775992564069, + "grad_norm": 0.992121967255531, + "learning_rate": 1.4209262883176025e-05, + "loss": 0.28336548805236816, + "step": 2966 + }, + { + "epoch": 0.7879431682379499, + "grad_norm": 1.0655541697156172, + "learning_rate": 1.4205279636878613e-05, + "loss": 0.3100801110267639, + "step": 2967 + }, + { + "epoch": 0.7882087372194928, + "grad_norm": 1.165527486411767, + "learning_rate": 1.4201295579852881e-05, + "loss": 0.33067989349365234, + "step": 2968 + }, + { + "epoch": 0.7884743062010358, + "grad_norm": 1.1896877635723886, + "learning_rate": 1.4197310712866909e-05, + "loss": 0.282347172498703, + "step": 2969 + }, + { + "epoch": 0.7887398751825787, + "grad_norm": 1.0769183433483809, + "learning_rate": 1.419332503668894e-05, + "loss": 0.30585426092147827, + "step": 2970 + }, + { + "epoch": 0.7890054441641217, + "grad_norm": 1.0616062054836604, + "learning_rate": 1.4189338552087351e-05, + "loss": 0.3011561632156372, + "step": 2971 + }, + { + "epoch": 0.7892710131456646, + "grad_norm": 0.9722574451184507, + "learning_rate": 1.4185351259830705e-05, + "loss": 0.2700524926185608, + "step": 2972 + }, + { + "epoch": 0.7895365821272076, + "grad_norm": 1.0849811262666431, + "learning_rate": 1.4181363160687693e-05, + "loss": 0.2963382303714752, + "step": 2973 + }, + { + "epoch": 0.7898021511087505, + "grad_norm": 1.0388990841328773, + "learning_rate": 1.4177374255427183e-05, + "loss": 0.27132824063301086, + "step": 2974 + }, + { + "epoch": 0.7900677200902935, + "grad_norm": 0.9602477794817199, + "learning_rate": 1.417338454481818e-05, + "loss": 0.2539706826210022, + "step": 2975 + }, + { + "epoch": 0.7903332890718364, + "grad_norm": 1.0972216427869486, + "learning_rate": 1.416939402962986e-05, + "loss": 0.28465601801872253, + "step": 2976 + }, + { + "epoch": 0.7905988580533794, + "grad_norm": 1.1885027397372414, + "learning_rate": 1.4165402710631544e-05, + "loss": 0.3020748198032379, + "step": 2977 + }, + { + "epoch": 0.7908644270349223, + "grad_norm": 1.0709231597298363, + "learning_rate": 1.416141058859271e-05, + "loss": 0.3157690465450287, + "step": 2978 + }, + { + "epoch": 0.7911299960164653, + "grad_norm": 1.0874979641604023, + "learning_rate": 1.4157417664282994e-05, + "loss": 0.2720191776752472, + "step": 2979 + }, + { + "epoch": 0.7913955649980082, + "grad_norm": 1.0670143355557837, + "learning_rate": 1.4153423938472185e-05, + "loss": 0.2931746542453766, + "step": 2980 + }, + { + "epoch": 0.7916611339795512, + "grad_norm": 1.0836941185599118, + "learning_rate": 1.4149429411930226e-05, + "loss": 0.2683875560760498, + "step": 2981 + }, + { + "epoch": 0.7919267029610941, + "grad_norm": 1.0454189872619364, + "learning_rate": 1.4145434085427216e-05, + "loss": 0.2559819519519806, + "step": 2982 + }, + { + "epoch": 0.7921922719426371, + "grad_norm": 1.1028368657772893, + "learning_rate": 1.4141437959733404e-05, + "loss": 0.2845582365989685, + "step": 2983 + }, + { + "epoch": 0.79245784092418, + "grad_norm": 1.05827279827959, + "learning_rate": 1.4137441035619197e-05, + "loss": 0.26766544580459595, + "step": 2984 + }, + { + "epoch": 0.792723409905723, + "grad_norm": 1.2459472391823172, + "learning_rate": 1.4133443313855155e-05, + "loss": 0.32089024782180786, + "step": 2985 + }, + { + "epoch": 0.7929889788872659, + "grad_norm": 1.053106908199776, + "learning_rate": 1.4129444795211993e-05, + "loss": 0.2756182551383972, + "step": 2986 + }, + { + "epoch": 0.7932545478688089, + "grad_norm": 1.231241306668284, + "learning_rate": 1.4125445480460573e-05, + "loss": 0.29487302899360657, + "step": 2987 + }, + { + "epoch": 0.7935201168503518, + "grad_norm": 1.1738297230948855, + "learning_rate": 1.4121445370371922e-05, + "loss": 0.3362561762332916, + "step": 2988 + }, + { + "epoch": 0.7937856858318948, + "grad_norm": 1.1591988507026376, + "learning_rate": 1.4117444465717209e-05, + "loss": 0.2986692488193512, + "step": 2989 + }, + { + "epoch": 0.7940512548134377, + "grad_norm": 1.0341012671875776, + "learning_rate": 1.4113442767267766e-05, + "loss": 0.2725266218185425, + "step": 2990 + }, + { + "epoch": 0.7943168237949807, + "grad_norm": 1.1125466640148414, + "learning_rate": 1.4109440275795071e-05, + "loss": 0.29827257990837097, + "step": 2991 + }, + { + "epoch": 0.7945823927765236, + "grad_norm": 1.0512885973195232, + "learning_rate": 1.410543699207076e-05, + "loss": 0.2506203055381775, + "step": 2992 + }, + { + "epoch": 0.7948479617580667, + "grad_norm": 0.9867416114744889, + "learning_rate": 1.410143291686661e-05, + "loss": 0.2675034701824188, + "step": 2993 + }, + { + "epoch": 0.7951135307396097, + "grad_norm": 1.1763547306282318, + "learning_rate": 1.4097428050954571e-05, + "loss": 0.34528690576553345, + "step": 2994 + }, + { + "epoch": 0.7953790997211526, + "grad_norm": 1.1374135219725177, + "learning_rate": 1.4093422395106726e-05, + "loss": 0.27551063895225525, + "step": 2995 + }, + { + "epoch": 0.7956446687026956, + "grad_norm": 1.1195982376159075, + "learning_rate": 1.408941595009532e-05, + "loss": 0.3176268935203552, + "step": 2996 + }, + { + "epoch": 0.7959102376842385, + "grad_norm": 1.1804373403956752, + "learning_rate": 1.408540871669275e-05, + "loss": 0.30056723952293396, + "step": 2997 + }, + { + "epoch": 0.7961758066657815, + "grad_norm": 1.124570387942151, + "learning_rate": 1.4081400695671562e-05, + "loss": 0.32109886407852173, + "step": 2998 + }, + { + "epoch": 0.7964413756473244, + "grad_norm": 1.1262740571855958, + "learning_rate": 1.4077391887804457e-05, + "loss": 0.33622005581855774, + "step": 2999 + }, + { + "epoch": 0.7967069446288674, + "grad_norm": 1.1195153536613822, + "learning_rate": 1.4073382293864283e-05, + "loss": 0.3054961860179901, + "step": 3000 + }, + { + "epoch": 0.7969725136104103, + "grad_norm": 1.1210721039096916, + "learning_rate": 1.4069371914624044e-05, + "loss": 0.3022462725639343, + "step": 3001 + }, + { + "epoch": 0.7972380825919533, + "grad_norm": 1.0116555063320039, + "learning_rate": 1.4065360750856891e-05, + "loss": 0.2500512897968292, + "step": 3002 + }, + { + "epoch": 0.7975036515734962, + "grad_norm": 1.233947002119444, + "learning_rate": 1.4061348803336135e-05, + "loss": 0.2960171699523926, + "step": 3003 + }, + { + "epoch": 0.7977692205550392, + "grad_norm": 3.53476121579318, + "learning_rate": 1.4057336072835228e-05, + "loss": 0.2941724359989166, + "step": 3004 + }, + { + "epoch": 0.7980347895365821, + "grad_norm": 1.0143157952003843, + "learning_rate": 1.4053322560127779e-05, + "loss": 0.2827858328819275, + "step": 3005 + }, + { + "epoch": 0.7983003585181251, + "grad_norm": 1.34417890867956, + "learning_rate": 1.4049308265987544e-05, + "loss": 0.32525116205215454, + "step": 3006 + }, + { + "epoch": 0.798565927499668, + "grad_norm": 1.1622605286979444, + "learning_rate": 1.4045293191188431e-05, + "loss": 0.26509979367256165, + "step": 3007 + }, + { + "epoch": 0.798831496481211, + "grad_norm": 1.1649049829769997, + "learning_rate": 1.4041277336504503e-05, + "loss": 0.3462742567062378, + "step": 3008 + }, + { + "epoch": 0.7990970654627539, + "grad_norm": 1.118975693723979, + "learning_rate": 1.4037260702709967e-05, + "loss": 0.2971092164516449, + "step": 3009 + }, + { + "epoch": 0.7993626344442969, + "grad_norm": 1.0541078602131526, + "learning_rate": 1.4033243290579182e-05, + "loss": 0.32359808683395386, + "step": 3010 + }, + { + "epoch": 0.7996282034258398, + "grad_norm": 0.9819968107477214, + "learning_rate": 1.4029225100886657e-05, + "loss": 0.2949031591415405, + "step": 3011 + }, + { + "epoch": 0.7998937724073828, + "grad_norm": 0.9639154080405838, + "learning_rate": 1.4025206134407051e-05, + "loss": 0.29888901114463806, + "step": 3012 + }, + { + "epoch": 0.8001593413889257, + "grad_norm": 1.0921369087209054, + "learning_rate": 1.4021186391915181e-05, + "loss": 0.2999705672264099, + "step": 3013 + }, + { + "epoch": 0.8004249103704687, + "grad_norm": 1.027092536189555, + "learning_rate": 1.4017165874185996e-05, + "loss": 0.2725638449192047, + "step": 3014 + }, + { + "epoch": 0.8006904793520117, + "grad_norm": 1.6251260873819724, + "learning_rate": 1.4013144581994609e-05, + "loss": 0.2809314727783203, + "step": 3015 + }, + { + "epoch": 0.8009560483335546, + "grad_norm": 1.194026798460289, + "learning_rate": 1.400912251611628e-05, + "loss": 0.30335327982902527, + "step": 3016 + }, + { + "epoch": 0.8012216173150976, + "grad_norm": 1.0526756572542106, + "learning_rate": 1.400509967732641e-05, + "loss": 0.27780598402023315, + "step": 3017 + }, + { + "epoch": 0.8014871862966405, + "grad_norm": 1.0036615790617616, + "learning_rate": 1.400107606640056e-05, + "loss": 0.2865309715270996, + "step": 3018 + }, + { + "epoch": 0.8017527552781835, + "grad_norm": 1.067182271229665, + "learning_rate": 1.3997051684114431e-05, + "loss": 0.2691546082496643, + "step": 3019 + }, + { + "epoch": 0.8020183242597264, + "grad_norm": 1.0174199108878024, + "learning_rate": 1.3993026531243876e-05, + "loss": 0.30289226770401, + "step": 3020 + }, + { + "epoch": 0.8022838932412695, + "grad_norm": 1.1180967643802684, + "learning_rate": 1.3989000608564905e-05, + "loss": 0.2767682671546936, + "step": 3021 + }, + { + "epoch": 0.8025494622228124, + "grad_norm": 1.1982508587685934, + "learning_rate": 1.3984973916853657e-05, + "loss": 0.3423742353916168, + "step": 3022 + }, + { + "epoch": 0.8028150312043554, + "grad_norm": 1.1718790013716964, + "learning_rate": 1.3980946456886439e-05, + "loss": 0.3000536561012268, + "step": 3023 + }, + { + "epoch": 0.8030806001858983, + "grad_norm": 1.1431161282459077, + "learning_rate": 1.3976918229439698e-05, + "loss": 0.3071063756942749, + "step": 3024 + }, + { + "epoch": 0.8033461691674413, + "grad_norm": 1.6885640285561154, + "learning_rate": 1.397288923529002e-05, + "loss": 0.31261157989501953, + "step": 3025 + }, + { + "epoch": 0.8036117381489842, + "grad_norm": 1.0076153318556622, + "learning_rate": 1.3968859475214156e-05, + "loss": 0.2658939063549042, + "step": 3026 + }, + { + "epoch": 0.8038773071305272, + "grad_norm": 1.0309089161631302, + "learning_rate": 1.3964828949988993e-05, + "loss": 0.2772905230522156, + "step": 3027 + }, + { + "epoch": 0.8041428761120701, + "grad_norm": 1.1271894525974708, + "learning_rate": 1.396079766039157e-05, + "loss": 0.2903479337692261, + "step": 3028 + }, + { + "epoch": 0.8044084450936131, + "grad_norm": 1.2165332424367126, + "learning_rate": 1.3956765607199069e-05, + "loss": 0.35709524154663086, + "step": 3029 + }, + { + "epoch": 0.804674014075156, + "grad_norm": 1.0863328323430816, + "learning_rate": 1.3952732791188828e-05, + "loss": 0.2929389774799347, + "step": 3030 + }, + { + "epoch": 0.804939583056699, + "grad_norm": 0.999480167032172, + "learning_rate": 1.3948699213138321e-05, + "loss": 0.2609884440898895, + "step": 3031 + }, + { + "epoch": 0.805205152038242, + "grad_norm": 1.0946442757602284, + "learning_rate": 1.394466487382518e-05, + "loss": 0.3026544749736786, + "step": 3032 + }, + { + "epoch": 0.8054707210197849, + "grad_norm": 1.0415601836945267, + "learning_rate": 1.394062977402717e-05, + "loss": 0.28281137347221375, + "step": 3033 + }, + { + "epoch": 0.8057362900013278, + "grad_norm": 0.9908513124522437, + "learning_rate": 1.3936593914522214e-05, + "loss": 0.26189178228378296, + "step": 3034 + }, + { + "epoch": 0.8060018589828708, + "grad_norm": 1.0541854732158313, + "learning_rate": 1.3932557296088383e-05, + "loss": 0.27987509965896606, + "step": 3035 + }, + { + "epoch": 0.8062674279644138, + "grad_norm": 0.9961129101435677, + "learning_rate": 1.3928519919503884e-05, + "loss": 0.2857724130153656, + "step": 3036 + }, + { + "epoch": 0.8065329969459567, + "grad_norm": 0.9752377302684325, + "learning_rate": 1.3924481785547076e-05, + "loss": 0.28102418780326843, + "step": 3037 + }, + { + "epoch": 0.8067985659274997, + "grad_norm": 1.06882045524996, + "learning_rate": 1.3920442894996464e-05, + "loss": 0.30250412225723267, + "step": 3038 + }, + { + "epoch": 0.8070641349090426, + "grad_norm": 0.9854538363943691, + "learning_rate": 1.3916403248630703e-05, + "loss": 0.28951483964920044, + "step": 3039 + }, + { + "epoch": 0.8073297038905856, + "grad_norm": 0.990016753911339, + "learning_rate": 1.3912362847228585e-05, + "loss": 0.28455328941345215, + "step": 3040 + }, + { + "epoch": 0.8075952728721285, + "grad_norm": 1.0887176497400486, + "learning_rate": 1.3908321691569048e-05, + "loss": 0.29541105031967163, + "step": 3041 + }, + { + "epoch": 0.8078608418536715, + "grad_norm": 1.162648796815669, + "learning_rate": 1.3904279782431187e-05, + "loss": 0.3057629466056824, + "step": 3042 + }, + { + "epoch": 0.8081264108352144, + "grad_norm": 1.0909846424659564, + "learning_rate": 1.3900237120594226e-05, + "loss": 0.3204082250595093, + "step": 3043 + }, + { + "epoch": 0.8083919798167574, + "grad_norm": 0.9793203113476959, + "learning_rate": 1.3896193706837551e-05, + "loss": 0.28629523515701294, + "step": 3044 + }, + { + "epoch": 0.8086575487983003, + "grad_norm": 1.1874958252714642, + "learning_rate": 1.389214954194068e-05, + "loss": 0.298164427280426, + "step": 3045 + }, + { + "epoch": 0.8089231177798433, + "grad_norm": 1.005892758898695, + "learning_rate": 1.3888104626683282e-05, + "loss": 0.27309298515319824, + "step": 3046 + }, + { + "epoch": 0.8091886867613862, + "grad_norm": 0.9950263488620656, + "learning_rate": 1.3884058961845166e-05, + "loss": 0.25635263323783875, + "step": 3047 + }, + { + "epoch": 0.8094542557429292, + "grad_norm": 1.002808171969614, + "learning_rate": 1.3880012548206292e-05, + "loss": 0.29926127195358276, + "step": 3048 + }, + { + "epoch": 0.8097198247244722, + "grad_norm": 0.9867331912864394, + "learning_rate": 1.387596538654676e-05, + "loss": 0.26633137464523315, + "step": 3049 + }, + { + "epoch": 0.8099853937060152, + "grad_norm": 1.0757993931692869, + "learning_rate": 1.387191747764681e-05, + "loss": 0.28725534677505493, + "step": 3050 + }, + { + "epoch": 0.8102509626875581, + "grad_norm": 1.4955713597704303, + "learning_rate": 1.3867868822286838e-05, + "loss": 0.3015314042568207, + "step": 3051 + }, + { + "epoch": 0.8105165316691011, + "grad_norm": 1.048643971484194, + "learning_rate": 1.3863819421247375e-05, + "loss": 0.3054691553115845, + "step": 3052 + }, + { + "epoch": 0.810782100650644, + "grad_norm": 1.1596568650600225, + "learning_rate": 1.3859769275309097e-05, + "loss": 0.26315444707870483, + "step": 3053 + }, + { + "epoch": 0.811047669632187, + "grad_norm": 1.024319547072995, + "learning_rate": 1.3855718385252824e-05, + "loss": 0.2973077595233917, + "step": 3054 + }, + { + "epoch": 0.81131323861373, + "grad_norm": 1.1845129171721744, + "learning_rate": 1.385166675185952e-05, + "loss": 0.32824432849884033, + "step": 3055 + }, + { + "epoch": 0.8115788075952729, + "grad_norm": 1.2351976774044444, + "learning_rate": 1.3847614375910292e-05, + "loss": 0.3127811849117279, + "step": 3056 + }, + { + "epoch": 0.8118443765768159, + "grad_norm": 1.0840317870226388, + "learning_rate": 1.384356125818639e-05, + "loss": 0.2631932497024536, + "step": 3057 + }, + { + "epoch": 0.8121099455583588, + "grad_norm": 1.0251225163823416, + "learning_rate": 1.3839507399469213e-05, + "loss": 0.2856106162071228, + "step": 3058 + }, + { + "epoch": 0.8123755145399018, + "grad_norm": 1.2604810760435325, + "learning_rate": 1.3835452800540288e-05, + "loss": 0.28986629843711853, + "step": 3059 + }, + { + "epoch": 0.8126410835214447, + "grad_norm": 1.0804422287227695, + "learning_rate": 1.3831397462181298e-05, + "loss": 0.28411972522735596, + "step": 3060 + }, + { + "epoch": 0.8129066525029877, + "grad_norm": 1.117697190248139, + "learning_rate": 1.3827341385174063e-05, + "loss": 0.3234354853630066, + "step": 3061 + }, + { + "epoch": 0.8131722214845306, + "grad_norm": 0.9917598533716923, + "learning_rate": 1.3823284570300551e-05, + "loss": 0.24779736995697021, + "step": 3062 + }, + { + "epoch": 0.8134377904660736, + "grad_norm": 1.1743500466494587, + "learning_rate": 1.3819227018342865e-05, + "loss": 0.3306904137134552, + "step": 3063 + }, + { + "epoch": 0.8137033594476165, + "grad_norm": 1.1120224667451313, + "learning_rate": 1.3815168730083254e-05, + "loss": 0.31705451011657715, + "step": 3064 + }, + { + "epoch": 0.8139689284291595, + "grad_norm": 1.1351768868234977, + "learning_rate": 1.3811109706304105e-05, + "loss": 0.29830047488212585, + "step": 3065 + }, + { + "epoch": 0.8142344974107024, + "grad_norm": 1.1496885073051233, + "learning_rate": 1.3807049947787954e-05, + "loss": 0.30605942010879517, + "step": 3066 + }, + { + "epoch": 0.8145000663922454, + "grad_norm": 1.0745429008877887, + "learning_rate": 1.3802989455317475e-05, + "loss": 0.3139193058013916, + "step": 3067 + }, + { + "epoch": 0.8147656353737883, + "grad_norm": 1.0541430221228831, + "learning_rate": 1.3798928229675478e-05, + "loss": 0.3175879716873169, + "step": 3068 + }, + { + "epoch": 0.8150312043553313, + "grad_norm": 1.0450888698469754, + "learning_rate": 1.3794866271644922e-05, + "loss": 0.26391106843948364, + "step": 3069 + }, + { + "epoch": 0.8152967733368742, + "grad_norm": 0.945534402365018, + "learning_rate": 1.3790803582008906e-05, + "loss": 0.24128863215446472, + "step": 3070 + }, + { + "epoch": 0.8155623423184172, + "grad_norm": 1.1627322372772537, + "learning_rate": 1.378674016155067e-05, + "loss": 0.3249368965625763, + "step": 3071 + }, + { + "epoch": 0.8158279112999601, + "grad_norm": 1.0060562228451158, + "learning_rate": 1.3782676011053592e-05, + "loss": 0.2871986925601959, + "step": 3072 + }, + { + "epoch": 0.8160934802815031, + "grad_norm": 1.1624248444882197, + "learning_rate": 1.377861113130119e-05, + "loss": 0.29047372937202454, + "step": 3073 + }, + { + "epoch": 0.816359049263046, + "grad_norm": 1.0925698386610025, + "learning_rate": 1.3774545523077122e-05, + "loss": 0.3055281341075897, + "step": 3074 + }, + { + "epoch": 0.816624618244589, + "grad_norm": 0.9197098274775629, + "learning_rate": 1.37704791871652e-05, + "loss": 0.2565494179725647, + "step": 3075 + }, + { + "epoch": 0.8168901872261319, + "grad_norm": 1.0377185359248249, + "learning_rate": 1.3766412124349358e-05, + "loss": 0.3016049861907959, + "step": 3076 + }, + { + "epoch": 0.8171557562076749, + "grad_norm": 1.0790995041055653, + "learning_rate": 1.3762344335413677e-05, + "loss": 0.3021200895309448, + "step": 3077 + }, + { + "epoch": 0.817421325189218, + "grad_norm": 1.0643017770253544, + "learning_rate": 1.3758275821142382e-05, + "loss": 0.3024774193763733, + "step": 3078 + }, + { + "epoch": 0.8176868941707609, + "grad_norm": 1.0591328005001268, + "learning_rate": 1.3754206582319836e-05, + "loss": 0.33114269375801086, + "step": 3079 + }, + { + "epoch": 0.8179524631523039, + "grad_norm": 1.0815809107319383, + "learning_rate": 1.3750136619730534e-05, + "loss": 0.27339494228363037, + "step": 3080 + }, + { + "epoch": 0.8182180321338468, + "grad_norm": 1.170674128986789, + "learning_rate": 1.3746065934159123e-05, + "loss": 0.2827128767967224, + "step": 3081 + }, + { + "epoch": 0.8184836011153898, + "grad_norm": 1.1064880736532463, + "learning_rate": 1.3741994526390379e-05, + "loss": 0.2972746193408966, + "step": 3082 + }, + { + "epoch": 0.8187491700969327, + "grad_norm": 1.143548636761381, + "learning_rate": 1.3737922397209222e-05, + "loss": 0.29932117462158203, + "step": 3083 + }, + { + "epoch": 0.8190147390784757, + "grad_norm": 1.0415876434255473, + "learning_rate": 1.3733849547400713e-05, + "loss": 0.28307998180389404, + "step": 3084 + }, + { + "epoch": 0.8192803080600186, + "grad_norm": 1.1070561443231863, + "learning_rate": 1.3729775977750048e-05, + "loss": 0.2885883152484894, + "step": 3085 + }, + { + "epoch": 0.8195458770415616, + "grad_norm": 1.1106477390667713, + "learning_rate": 1.3725701689042564e-05, + "loss": 0.28837913274765015, + "step": 3086 + }, + { + "epoch": 0.8198114460231045, + "grad_norm": 1.0553526039271008, + "learning_rate": 1.3721626682063733e-05, + "loss": 0.2775058150291443, + "step": 3087 + }, + { + "epoch": 0.8200770150046475, + "grad_norm": 1.153176622627066, + "learning_rate": 1.3717550957599172e-05, + "loss": 0.2813493609428406, + "step": 3088 + }, + { + "epoch": 0.8203425839861904, + "grad_norm": 1.1477738573738745, + "learning_rate": 1.371347451643463e-05, + "loss": 0.2677592933177948, + "step": 3089 + }, + { + "epoch": 0.8206081529677334, + "grad_norm": 1.184705398593534, + "learning_rate": 1.3709397359355998e-05, + "loss": 0.3104957938194275, + "step": 3090 + }, + { + "epoch": 0.8208737219492763, + "grad_norm": 1.1714327280441006, + "learning_rate": 1.3705319487149303e-05, + "loss": 0.29315799474716187, + "step": 3091 + }, + { + "epoch": 0.8211392909308193, + "grad_norm": 1.1179168081295616, + "learning_rate": 1.370124090060071e-05, + "loss": 0.3044348657131195, + "step": 3092 + }, + { + "epoch": 0.8214048599123622, + "grad_norm": 1.1122209585212142, + "learning_rate": 1.3697161600496525e-05, + "loss": 0.2918691635131836, + "step": 3093 + }, + { + "epoch": 0.8216704288939052, + "grad_norm": 1.0702091422822353, + "learning_rate": 1.3693081587623187e-05, + "loss": 0.2887750267982483, + "step": 3094 + }, + { + "epoch": 0.8219359978754481, + "grad_norm": 1.1155429990394359, + "learning_rate": 1.3689000862767274e-05, + "loss": 0.3055661916732788, + "step": 3095 + }, + { + "epoch": 0.8222015668569911, + "grad_norm": 1.0251756704247361, + "learning_rate": 1.3684919426715504e-05, + "loss": 0.271525114774704, + "step": 3096 + }, + { + "epoch": 0.822467135838534, + "grad_norm": 1.1269584199088303, + "learning_rate": 1.3680837280254726e-05, + "loss": 0.3220426142215729, + "step": 3097 + }, + { + "epoch": 0.822732704820077, + "grad_norm": 1.0149552227204566, + "learning_rate": 1.3676754424171935e-05, + "loss": 0.29091203212738037, + "step": 3098 + }, + { + "epoch": 0.8229982738016199, + "grad_norm": 1.051328362150218, + "learning_rate": 1.3672670859254252e-05, + "loss": 0.2928692102432251, + "step": 3099 + }, + { + "epoch": 0.8232638427831629, + "grad_norm": 1.0366528987524315, + "learning_rate": 1.3668586586288942e-05, + "loss": 0.28635919094085693, + "step": 3100 + }, + { + "epoch": 0.8235294117647058, + "grad_norm": 1.0374876833794577, + "learning_rate": 1.3664501606063402e-05, + "loss": 0.2912571430206299, + "step": 3101 + }, + { + "epoch": 0.8237949807462488, + "grad_norm": 1.051516198651511, + "learning_rate": 1.3660415919365178e-05, + "loss": 0.2783615291118622, + "step": 3102 + }, + { + "epoch": 0.8240605497277917, + "grad_norm": 1.088921494432588, + "learning_rate": 1.365632952698193e-05, + "loss": 0.3064395785331726, + "step": 3103 + }, + { + "epoch": 0.8243261187093347, + "grad_norm": 1.023130230207284, + "learning_rate": 1.3652242429701477e-05, + "loss": 0.2528907358646393, + "step": 3104 + }, + { + "epoch": 0.8245916876908777, + "grad_norm": 1.0503421945431453, + "learning_rate": 1.3648154628311754e-05, + "loss": 0.2648676633834839, + "step": 3105 + }, + { + "epoch": 0.8248572566724207, + "grad_norm": 1.2732480631249905, + "learning_rate": 1.3644066123600846e-05, + "loss": 0.33425620198249817, + "step": 3106 + }, + { + "epoch": 0.8251228256539637, + "grad_norm": 1.0925062122156084, + "learning_rate": 1.3639976916356965e-05, + "loss": 0.3108072280883789, + "step": 3107 + }, + { + "epoch": 0.8253883946355066, + "grad_norm": 1.0815679409684162, + "learning_rate": 1.3635887007368467e-05, + "loss": 0.2860543131828308, + "step": 3108 + }, + { + "epoch": 0.8256539636170496, + "grad_norm": 1.0711932859903586, + "learning_rate": 1.3631796397423833e-05, + "loss": 0.25440749526023865, + "step": 3109 + }, + { + "epoch": 0.8259195325985925, + "grad_norm": 1.1006663978120534, + "learning_rate": 1.3627705087311687e-05, + "loss": 0.2676115334033966, + "step": 3110 + }, + { + "epoch": 0.8261851015801355, + "grad_norm": 1.1597529133358384, + "learning_rate": 1.3623613077820788e-05, + "loss": 0.28977078199386597, + "step": 3111 + }, + { + "epoch": 0.8264506705616784, + "grad_norm": 1.1046761011596355, + "learning_rate": 1.361952036974002e-05, + "loss": 0.30161401629447937, + "step": 3112 + }, + { + "epoch": 0.8267162395432214, + "grad_norm": 1.135120464396266, + "learning_rate": 1.3615426963858416e-05, + "loss": 0.28676310181617737, + "step": 3113 + }, + { + "epoch": 0.8269818085247643, + "grad_norm": 1.100109147839879, + "learning_rate": 1.361133286096513e-05, + "loss": 0.2957243323326111, + "step": 3114 + }, + { + "epoch": 0.8272473775063073, + "grad_norm": 1.0691905028493969, + "learning_rate": 1.3607238061849461e-05, + "loss": 0.3036375343799591, + "step": 3115 + }, + { + "epoch": 0.8275129464878502, + "grad_norm": 1.1142331461612014, + "learning_rate": 1.360314256730084e-05, + "loss": 0.31175294518470764, + "step": 3116 + }, + { + "epoch": 0.8277785154693932, + "grad_norm": 1.0665802680669934, + "learning_rate": 1.3599046378108825e-05, + "loss": 0.30212485790252686, + "step": 3117 + }, + { + "epoch": 0.8280440844509361, + "grad_norm": 1.1992776426845386, + "learning_rate": 1.3594949495063117e-05, + "loss": 0.3290692865848541, + "step": 3118 + }, + { + "epoch": 0.8283096534324791, + "grad_norm": 1.007005509411099, + "learning_rate": 1.3590851918953542e-05, + "loss": 0.25952839851379395, + "step": 3119 + }, + { + "epoch": 0.828575222414022, + "grad_norm": 1.0949064818424232, + "learning_rate": 1.3586753650570069e-05, + "loss": 0.27737247943878174, + "step": 3120 + }, + { + "epoch": 0.828840791395565, + "grad_norm": 1.0156990629875267, + "learning_rate": 1.3582654690702795e-05, + "loss": 0.29415374994277954, + "step": 3121 + }, + { + "epoch": 0.8291063603771079, + "grad_norm": 1.066804105313739, + "learning_rate": 1.3578555040141948e-05, + "loss": 0.29197627305984497, + "step": 3122 + }, + { + "epoch": 0.8293719293586509, + "grad_norm": 1.1089730397237387, + "learning_rate": 1.3574454699677893e-05, + "loss": 0.30318522453308105, + "step": 3123 + }, + { + "epoch": 0.8296374983401938, + "grad_norm": 1.0916871079120407, + "learning_rate": 1.357035367010113e-05, + "loss": 0.3184241056442261, + "step": 3124 + }, + { + "epoch": 0.8299030673217368, + "grad_norm": 1.3286365770942894, + "learning_rate": 1.3566251952202288e-05, + "loss": 0.30330199003219604, + "step": 3125 + }, + { + "epoch": 0.8301686363032797, + "grad_norm": 1.1117453782986153, + "learning_rate": 1.356214954677213e-05, + "loss": 0.25366994738578796, + "step": 3126 + }, + { + "epoch": 0.8304342052848227, + "grad_norm": 1.109752753436135, + "learning_rate": 1.3558046454601552e-05, + "loss": 0.3213343918323517, + "step": 3127 + }, + { + "epoch": 0.8306997742663657, + "grad_norm": 1.0918389418395038, + "learning_rate": 1.355394267648158e-05, + "loss": 0.3012468218803406, + "step": 3128 + }, + { + "epoch": 0.8309653432479086, + "grad_norm": 1.1319633441718049, + "learning_rate": 1.3549838213203374e-05, + "loss": 0.3272971510887146, + "step": 3129 + }, + { + "epoch": 0.8312309122294516, + "grad_norm": 1.0778057413430624, + "learning_rate": 1.354573306555823e-05, + "loss": 0.30032482743263245, + "step": 3130 + }, + { + "epoch": 0.8314964812109945, + "grad_norm": 1.0778331818873157, + "learning_rate": 1.3541627234337567e-05, + "loss": 0.2820669412612915, + "step": 3131 + }, + { + "epoch": 0.8317620501925375, + "grad_norm": 1.0187129279356677, + "learning_rate": 1.3537520720332943e-05, + "loss": 0.2638673782348633, + "step": 3132 + }, + { + "epoch": 0.8320276191740804, + "grad_norm": 1.0843507637886551, + "learning_rate": 1.3533413524336043e-05, + "loss": 0.2766842246055603, + "step": 3133 + }, + { + "epoch": 0.8322931881556235, + "grad_norm": 1.2660530642163288, + "learning_rate": 1.3529305647138689e-05, + "loss": 0.330536425113678, + "step": 3134 + }, + { + "epoch": 0.8325587571371664, + "grad_norm": 1.0925834195413107, + "learning_rate": 1.3525197089532833e-05, + "loss": 0.30375364422798157, + "step": 3135 + }, + { + "epoch": 0.8328243261187094, + "grad_norm": 1.1657669106128519, + "learning_rate": 1.3521087852310555e-05, + "loss": 0.3092171549797058, + "step": 3136 + }, + { + "epoch": 0.8330898951002523, + "grad_norm": 1.1686338102407274, + "learning_rate": 1.3516977936264062e-05, + "loss": 0.28651195764541626, + "step": 3137 + }, + { + "epoch": 0.8333554640817953, + "grad_norm": 1.0845327487717817, + "learning_rate": 1.3512867342185705e-05, + "loss": 0.2882133722305298, + "step": 3138 + }, + { + "epoch": 0.8336210330633382, + "grad_norm": 1.1325019700739036, + "learning_rate": 1.3508756070867955e-05, + "loss": 0.30633628368377686, + "step": 3139 + }, + { + "epoch": 0.8338866020448812, + "grad_norm": 1.090943303162736, + "learning_rate": 1.3504644123103415e-05, + "loss": 0.2819565236568451, + "step": 3140 + }, + { + "epoch": 0.8341521710264241, + "grad_norm": 1.0804420637943886, + "learning_rate": 1.3500531499684819e-05, + "loss": 0.29544374346733093, + "step": 3141 + }, + { + "epoch": 0.8344177400079671, + "grad_norm": 1.10400689114043, + "learning_rate": 1.3496418201405037e-05, + "loss": 0.29383376240730286, + "step": 3142 + }, + { + "epoch": 0.83468330898951, + "grad_norm": 0.9862964562028984, + "learning_rate": 1.3492304229057062e-05, + "loss": 0.24945983290672302, + "step": 3143 + }, + { + "epoch": 0.834948877971053, + "grad_norm": 1.2055608503616826, + "learning_rate": 1.3488189583434023e-05, + "loss": 0.338919997215271, + "step": 3144 + }, + { + "epoch": 0.835214446952596, + "grad_norm": 1.071166648249549, + "learning_rate": 1.348407426532917e-05, + "loss": 0.29555821418762207, + "step": 3145 + }, + { + "epoch": 0.8354800159341389, + "grad_norm": 1.0650010322896095, + "learning_rate": 1.3479958275535887e-05, + "loss": 0.31038299202919006, + "step": 3146 + }, + { + "epoch": 0.8357455849156818, + "grad_norm": 1.021351909092412, + "learning_rate": 1.347584161484769e-05, + "loss": 0.2595089077949524, + "step": 3147 + }, + { + "epoch": 0.8360111538972248, + "grad_norm": 1.1885926674667484, + "learning_rate": 1.3471724284058227e-05, + "loss": 0.3287338614463806, + "step": 3148 + }, + { + "epoch": 0.8362767228787678, + "grad_norm": 1.1997618392346763, + "learning_rate": 1.3467606283961268e-05, + "loss": 0.3109680414199829, + "step": 3149 + }, + { + "epoch": 0.8365422918603107, + "grad_norm": 1.0762954067078139, + "learning_rate": 1.346348761535071e-05, + "loss": 0.2584227919578552, + "step": 3150 + }, + { + "epoch": 0.8368078608418537, + "grad_norm": 1.137771769139511, + "learning_rate": 1.345936827902059e-05, + "loss": 0.3038554787635803, + "step": 3151 + }, + { + "epoch": 0.8370734298233966, + "grad_norm": 1.029659281383911, + "learning_rate": 1.3455248275765067e-05, + "loss": 0.28267812728881836, + "step": 3152 + }, + { + "epoch": 0.8373389988049396, + "grad_norm": 1.163661242492436, + "learning_rate": 1.3451127606378425e-05, + "loss": 0.3328094184398651, + "step": 3153 + }, + { + "epoch": 0.8376045677864825, + "grad_norm": 1.084045978606854, + "learning_rate": 1.3447006271655082e-05, + "loss": 0.3235865533351898, + "step": 3154 + }, + { + "epoch": 0.8378701367680255, + "grad_norm": 1.037100355990568, + "learning_rate": 1.3442884272389583e-05, + "loss": 0.25394493341445923, + "step": 3155 + }, + { + "epoch": 0.8381357057495684, + "grad_norm": 1.1250984496593863, + "learning_rate": 1.3438761609376604e-05, + "loss": 0.29841768741607666, + "step": 3156 + }, + { + "epoch": 0.8384012747311114, + "grad_norm": 1.1999100818775306, + "learning_rate": 1.3434638283410942e-05, + "loss": 0.3161924183368683, + "step": 3157 + }, + { + "epoch": 0.8386668437126543, + "grad_norm": 0.9017579941601053, + "learning_rate": 1.3430514295287526e-05, + "loss": 0.22781039774417877, + "step": 3158 + }, + { + "epoch": 0.8389324126941973, + "grad_norm": 1.0534948555265085, + "learning_rate": 1.3426389645801415e-05, + "loss": 0.2947984039783478, + "step": 3159 + }, + { + "epoch": 0.8391979816757402, + "grad_norm": 1.0286789238265646, + "learning_rate": 1.342226433574779e-05, + "loss": 0.2827467918395996, + "step": 3160 + }, + { + "epoch": 0.8394635506572832, + "grad_norm": 1.0453932660244052, + "learning_rate": 1.3418138365921962e-05, + "loss": 0.3149232268333435, + "step": 3161 + }, + { + "epoch": 0.8397291196388262, + "grad_norm": 1.2487567497076437, + "learning_rate": 1.3414011737119373e-05, + "loss": 0.33154603838920593, + "step": 3162 + }, + { + "epoch": 0.8399946886203692, + "grad_norm": 1.074983718750332, + "learning_rate": 1.3409884450135581e-05, + "loss": 0.28532034158706665, + "step": 3163 + }, + { + "epoch": 0.8402602576019121, + "grad_norm": 1.0695327636228384, + "learning_rate": 1.3405756505766286e-05, + "loss": 0.2539500892162323, + "step": 3164 + }, + { + "epoch": 0.8405258265834551, + "grad_norm": 1.0653532722719707, + "learning_rate": 1.3401627904807302e-05, + "loss": 0.3023888170719147, + "step": 3165 + }, + { + "epoch": 0.840791395564998, + "grad_norm": 1.0811844194203637, + "learning_rate": 1.3397498648054579e-05, + "loss": 0.3088506758213043, + "step": 3166 + }, + { + "epoch": 0.841056964546541, + "grad_norm": 1.2249048833028835, + "learning_rate": 1.3393368736304184e-05, + "loss": 0.3223467469215393, + "step": 3167 + }, + { + "epoch": 0.841322533528084, + "grad_norm": 1.0772937869709083, + "learning_rate": 1.3389238170352318e-05, + "loss": 0.2541419565677643, + "step": 3168 + }, + { + "epoch": 0.8415881025096269, + "grad_norm": 1.0463826735598363, + "learning_rate": 1.3385106950995308e-05, + "loss": 0.2915497422218323, + "step": 3169 + }, + { + "epoch": 0.8418536714911699, + "grad_norm": 1.1726858597591174, + "learning_rate": 1.3380975079029598e-05, + "loss": 0.2907465994358063, + "step": 3170 + }, + { + "epoch": 0.8421192404727128, + "grad_norm": 1.0581221380369799, + "learning_rate": 1.337684255525177e-05, + "loss": 0.2587417960166931, + "step": 3171 + }, + { + "epoch": 0.8423848094542558, + "grad_norm": 1.1080472137531636, + "learning_rate": 1.3372709380458522e-05, + "loss": 0.2932469844818115, + "step": 3172 + }, + { + "epoch": 0.8426503784357987, + "grad_norm": 1.2359417241278925, + "learning_rate": 1.3368575555446681e-05, + "loss": 0.31451860070228577, + "step": 3173 + }, + { + "epoch": 0.8429159474173417, + "grad_norm": 1.067745190297883, + "learning_rate": 1.3364441081013205e-05, + "loss": 0.24513742327690125, + "step": 3174 + }, + { + "epoch": 0.8431815163988846, + "grad_norm": 1.0795526820997523, + "learning_rate": 1.3360305957955166e-05, + "loss": 0.29781201481819153, + "step": 3175 + }, + { + "epoch": 0.8434470853804276, + "grad_norm": 1.3176130252584213, + "learning_rate": 1.3356170187069775e-05, + "loss": 0.30925726890563965, + "step": 3176 + }, + { + "epoch": 0.8437126543619705, + "grad_norm": 1.1110632932678028, + "learning_rate": 1.3352033769154347e-05, + "loss": 0.2822851538658142, + "step": 3177 + }, + { + "epoch": 0.8439782233435135, + "grad_norm": 1.0033731418220575, + "learning_rate": 1.3347896705006344e-05, + "loss": 0.2511071264743805, + "step": 3178 + }, + { + "epoch": 0.8442437923250564, + "grad_norm": 1.1921629041957855, + "learning_rate": 1.3343758995423344e-05, + "loss": 0.3002505302429199, + "step": 3179 + }, + { + "epoch": 0.8445093613065994, + "grad_norm": 0.9942107511416755, + "learning_rate": 1.3339620641203043e-05, + "loss": 0.285504549741745, + "step": 3180 + }, + { + "epoch": 0.8447749302881423, + "grad_norm": 1.1880306222164103, + "learning_rate": 1.3335481643143271e-05, + "loss": 0.31988856196403503, + "step": 3181 + }, + { + "epoch": 0.8450404992696853, + "grad_norm": 1.0905691447057935, + "learning_rate": 1.3331342002041973e-05, + "loss": 0.29330819845199585, + "step": 3182 + }, + { + "epoch": 0.8453060682512282, + "grad_norm": 1.049547579497453, + "learning_rate": 1.3327201718697232e-05, + "loss": 0.28694427013397217, + "step": 3183 + }, + { + "epoch": 0.8455716372327712, + "grad_norm": 1.0561569710297949, + "learning_rate": 1.3323060793907239e-05, + "loss": 0.24912211298942566, + "step": 3184 + }, + { + "epoch": 0.8458372062143141, + "grad_norm": 1.1346018526864223, + "learning_rate": 1.3318919228470315e-05, + "loss": 0.28117647767066956, + "step": 3185 + }, + { + "epoch": 0.8461027751958571, + "grad_norm": 1.2524387900920857, + "learning_rate": 1.3314777023184907e-05, + "loss": 0.3176446557044983, + "step": 3186 + }, + { + "epoch": 0.8463683441774, + "grad_norm": 1.0728463380702977, + "learning_rate": 1.3310634178849583e-05, + "loss": 0.31205689907073975, + "step": 3187 + }, + { + "epoch": 0.846633913158943, + "grad_norm": 1.1500545538779043, + "learning_rate": 1.3306490696263034e-05, + "loss": 0.29942232370376587, + "step": 3188 + }, + { + "epoch": 0.8468994821404859, + "grad_norm": 1.161750107962421, + "learning_rate": 1.3302346576224077e-05, + "loss": 0.3149508833885193, + "step": 3189 + }, + { + "epoch": 0.847165051122029, + "grad_norm": 1.0924626607758976, + "learning_rate": 1.3298201819531646e-05, + "loss": 0.2930619418621063, + "step": 3190 + }, + { + "epoch": 0.847430620103572, + "grad_norm": 1.0958680594537196, + "learning_rate": 1.3294056426984804e-05, + "loss": 0.3089582920074463, + "step": 3191 + }, + { + "epoch": 0.8476961890851149, + "grad_norm": 1.2175163313381927, + "learning_rate": 1.3289910399382733e-05, + "loss": 0.3120991587638855, + "step": 3192 + }, + { + "epoch": 0.8479617580666579, + "grad_norm": 1.0535688994558223, + "learning_rate": 1.3285763737524738e-05, + "loss": 0.2728833258152008, + "step": 3193 + }, + { + "epoch": 0.8482273270482008, + "grad_norm": 1.0457465617551238, + "learning_rate": 1.3281616442210246e-05, + "loss": 0.2833358347415924, + "step": 3194 + }, + { + "epoch": 0.8484928960297438, + "grad_norm": 1.0714039101779447, + "learning_rate": 1.3277468514238803e-05, + "loss": 0.26218950748443604, + "step": 3195 + }, + { + "epoch": 0.8487584650112867, + "grad_norm": 1.0938436245702892, + "learning_rate": 1.3273319954410088e-05, + "loss": 0.3120720386505127, + "step": 3196 + }, + { + "epoch": 0.8490240339928297, + "grad_norm": 1.0412833763909957, + "learning_rate": 1.3269170763523892e-05, + "loss": 0.2748696208000183, + "step": 3197 + }, + { + "epoch": 0.8492896029743726, + "grad_norm": 1.0148051769031237, + "learning_rate": 1.326502094238013e-05, + "loss": 0.2892690598964691, + "step": 3198 + }, + { + "epoch": 0.8495551719559156, + "grad_norm": 1.068648430192615, + "learning_rate": 1.3260870491778835e-05, + "loss": 0.26583510637283325, + "step": 3199 + }, + { + "epoch": 0.8498207409374585, + "grad_norm": 1.105620955007001, + "learning_rate": 1.325671941252017e-05, + "loss": 0.31602388620376587, + "step": 3200 + }, + { + "epoch": 0.8500863099190015, + "grad_norm": 1.068517421778971, + "learning_rate": 1.3252567705404409e-05, + "loss": 0.2980017364025116, + "step": 3201 + }, + { + "epoch": 0.8503518789005444, + "grad_norm": 1.0740685936810315, + "learning_rate": 1.3248415371231957e-05, + "loss": 0.27081727981567383, + "step": 3202 + }, + { + "epoch": 0.8506174478820874, + "grad_norm": 1.2590520587844396, + "learning_rate": 1.3244262410803333e-05, + "loss": 0.28895002603530884, + "step": 3203 + }, + { + "epoch": 0.8508830168636303, + "grad_norm": 1.1373552047630993, + "learning_rate": 1.3240108824919176e-05, + "loss": 0.30804315209388733, + "step": 3204 + }, + { + "epoch": 0.8511485858451733, + "grad_norm": 1.1074447190812993, + "learning_rate": 1.3235954614380253e-05, + "loss": 0.28173667192459106, + "step": 3205 + }, + { + "epoch": 0.8514141548267162, + "grad_norm": 1.097058715769224, + "learning_rate": 1.3231799779987445e-05, + "loss": 0.3113047778606415, + "step": 3206 + }, + { + "epoch": 0.8516797238082592, + "grad_norm": 1.0285862677327642, + "learning_rate": 1.3227644322541754e-05, + "loss": 0.247248113155365, + "step": 3207 + }, + { + "epoch": 0.8519452927898021, + "grad_norm": 1.1032823581833329, + "learning_rate": 1.3223488242844309e-05, + "loss": 0.27078187465667725, + "step": 3208 + }, + { + "epoch": 0.8522108617713451, + "grad_norm": 1.0635139884249352, + "learning_rate": 1.321933154169634e-05, + "loss": 0.2749357223510742, + "step": 3209 + }, + { + "epoch": 0.852476430752888, + "grad_norm": 1.0129100217319345, + "learning_rate": 1.3215174219899224e-05, + "loss": 0.25382956862449646, + "step": 3210 + }, + { + "epoch": 0.852741999734431, + "grad_norm": 1.0528151094235563, + "learning_rate": 1.3211016278254436e-05, + "loss": 0.3237685263156891, + "step": 3211 + }, + { + "epoch": 0.8530075687159739, + "grad_norm": 1.273911241149791, + "learning_rate": 1.3206857717563581e-05, + "loss": 0.2899032235145569, + "step": 3212 + }, + { + "epoch": 0.8532731376975169, + "grad_norm": 1.040323856520164, + "learning_rate": 1.3202698538628376e-05, + "loss": 0.25997933745384216, + "step": 3213 + }, + { + "epoch": 0.8535387066790598, + "grad_norm": 1.121125084608177, + "learning_rate": 1.3198538742250668e-05, + "loss": 0.3228183090686798, + "step": 3214 + }, + { + "epoch": 0.8538042756606028, + "grad_norm": 1.1002230220524851, + "learning_rate": 1.3194378329232413e-05, + "loss": 0.31993368268013, + "step": 3215 + }, + { + "epoch": 0.8540698446421457, + "grad_norm": 1.157115702913611, + "learning_rate": 1.3190217300375694e-05, + "loss": 0.29520007967948914, + "step": 3216 + }, + { + "epoch": 0.8543354136236887, + "grad_norm": 1.0898926058638614, + "learning_rate": 1.3186055656482702e-05, + "loss": 0.31073522567749023, + "step": 3217 + }, + { + "epoch": 0.8546009826052318, + "grad_norm": 1.1465583376043518, + "learning_rate": 1.3181893398355752e-05, + "loss": 0.34354183077812195, + "step": 3218 + }, + { + "epoch": 0.8548665515867747, + "grad_norm": 1.179928846812524, + "learning_rate": 1.3177730526797286e-05, + "loss": 0.27676698565483093, + "step": 3219 + }, + { + "epoch": 0.8551321205683177, + "grad_norm": 1.0792983255501365, + "learning_rate": 1.3173567042609852e-05, + "loss": 0.27313530445098877, + "step": 3220 + }, + { + "epoch": 0.8553976895498606, + "grad_norm": 0.9249374113484707, + "learning_rate": 1.3169402946596119e-05, + "loss": 0.2517555058002472, + "step": 3221 + }, + { + "epoch": 0.8556632585314036, + "grad_norm": 1.0684778793194236, + "learning_rate": 1.3165238239558878e-05, + "loss": 0.29700207710266113, + "step": 3222 + }, + { + "epoch": 0.8559288275129465, + "grad_norm": 1.1262235464302217, + "learning_rate": 1.3161072922301037e-05, + "loss": 0.3182620704174042, + "step": 3223 + }, + { + "epoch": 0.8561943964944895, + "grad_norm": 1.123570804553303, + "learning_rate": 1.3156906995625615e-05, + "loss": 0.3112961947917938, + "step": 3224 + }, + { + "epoch": 0.8564599654760324, + "grad_norm": 1.1746597736734636, + "learning_rate": 1.3152740460335757e-05, + "loss": 0.3080563545227051, + "step": 3225 + }, + { + "epoch": 0.8567255344575754, + "grad_norm": 1.1646363575237453, + "learning_rate": 1.3148573317234726e-05, + "loss": 0.31197935342788696, + "step": 3226 + }, + { + "epoch": 0.8569911034391183, + "grad_norm": 1.0455051980244612, + "learning_rate": 1.3144405567125886e-05, + "loss": 0.27377086877822876, + "step": 3227 + }, + { + "epoch": 0.8572566724206613, + "grad_norm": 1.050528412475655, + "learning_rate": 1.3140237210812741e-05, + "loss": 0.25303182005882263, + "step": 3228 + }, + { + "epoch": 0.8575222414022042, + "grad_norm": 1.0664458431943622, + "learning_rate": 1.3136068249098899e-05, + "loss": 0.27949726581573486, + "step": 3229 + }, + { + "epoch": 0.8577878103837472, + "grad_norm": 1.0907347405782384, + "learning_rate": 1.3131898682788082e-05, + "loss": 0.278359055519104, + "step": 3230 + }, + { + "epoch": 0.8580533793652901, + "grad_norm": 1.081462335761227, + "learning_rate": 1.312772851268414e-05, + "loss": 0.28507643938064575, + "step": 3231 + }, + { + "epoch": 0.8583189483468331, + "grad_norm": 1.0256133822907842, + "learning_rate": 1.3123557739591026e-05, + "loss": 0.2689790427684784, + "step": 3232 + }, + { + "epoch": 0.858584517328376, + "grad_norm": 1.1569049456144243, + "learning_rate": 1.3119386364312821e-05, + "loss": 0.31956973671913147, + "step": 3233 + }, + { + "epoch": 0.858850086309919, + "grad_norm": 1.0914807974802394, + "learning_rate": 1.3115214387653711e-05, + "loss": 0.2837323546409607, + "step": 3234 + }, + { + "epoch": 0.8591156552914619, + "grad_norm": 1.0015578039784754, + "learning_rate": 1.3111041810418011e-05, + "loss": 0.2756272554397583, + "step": 3235 + }, + { + "epoch": 0.8593812242730049, + "grad_norm": 1.0283979772106548, + "learning_rate": 1.3106868633410139e-05, + "loss": 0.2664923369884491, + "step": 3236 + }, + { + "epoch": 0.8596467932545478, + "grad_norm": 1.2217960050611696, + "learning_rate": 1.3102694857434637e-05, + "loss": 0.2842246890068054, + "step": 3237 + }, + { + "epoch": 0.8599123622360908, + "grad_norm": 1.0632739499737671, + "learning_rate": 1.3098520483296159e-05, + "loss": 0.3066467344760895, + "step": 3238 + }, + { + "epoch": 0.8601779312176338, + "grad_norm": 1.148754786147734, + "learning_rate": 1.3094345511799478e-05, + "loss": 0.3042510151863098, + "step": 3239 + }, + { + "epoch": 0.8604435001991767, + "grad_norm": 0.9995895975923785, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.2753696143627167, + "step": 3240 + }, + { + "epoch": 0.8607090691807197, + "grad_norm": 1.0325788591675433, + "learning_rate": 1.3085993779951154e-05, + "loss": 0.2561766803264618, + "step": 3241 + }, + { + "epoch": 0.8609746381622626, + "grad_norm": 1.2136300404308455, + "learning_rate": 1.3081817021209626e-05, + "loss": 0.297982782125473, + "step": 3242 + }, + { + "epoch": 0.8612402071438056, + "grad_norm": 1.0615498924909679, + "learning_rate": 1.3077639668330124e-05, + "loss": 0.2961920499801636, + "step": 3243 + }, + { + "epoch": 0.8615057761253485, + "grad_norm": 1.1445145037694135, + "learning_rate": 1.3073461722117991e-05, + "loss": 0.2868857979774475, + "step": 3244 + }, + { + "epoch": 0.8617713451068915, + "grad_norm": 0.9475657969770804, + "learning_rate": 1.3069283183378683e-05, + "loss": 0.22930951416492462, + "step": 3245 + }, + { + "epoch": 0.8620369140884345, + "grad_norm": 1.1416904771862697, + "learning_rate": 1.306510405291778e-05, + "loss": 0.29737964272499084, + "step": 3246 + }, + { + "epoch": 0.8623024830699775, + "grad_norm": 1.0401904023883137, + "learning_rate": 1.3060924331540964e-05, + "loss": 0.2764522433280945, + "step": 3247 + }, + { + "epoch": 0.8625680520515204, + "grad_norm": 0.9863739655208709, + "learning_rate": 1.3056744020054039e-05, + "loss": 0.27608832716941833, + "step": 3248 + }, + { + "epoch": 0.8628336210330634, + "grad_norm": 1.0115944755696356, + "learning_rate": 1.3052563119262915e-05, + "loss": 0.25667035579681396, + "step": 3249 + }, + { + "epoch": 0.8630991900146063, + "grad_norm": 1.1289498412687866, + "learning_rate": 1.3048381629973622e-05, + "loss": 0.3015863597393036, + "step": 3250 + }, + { + "epoch": 0.8633647589961493, + "grad_norm": 1.123802742380982, + "learning_rate": 1.3044199552992307e-05, + "loss": 0.2798422873020172, + "step": 3251 + }, + { + "epoch": 0.8636303279776922, + "grad_norm": 1.1385670465264601, + "learning_rate": 1.304001688912522e-05, + "loss": 0.2856596112251282, + "step": 3252 + }, + { + "epoch": 0.8638958969592352, + "grad_norm": 1.2094473565150297, + "learning_rate": 1.303583363917873e-05, + "loss": 0.30247554183006287, + "step": 3253 + }, + { + "epoch": 0.8641614659407781, + "grad_norm": 1.1517937069448307, + "learning_rate": 1.303164980395932e-05, + "loss": 0.26817965507507324, + "step": 3254 + }, + { + "epoch": 0.8644270349223211, + "grad_norm": 1.197653632931973, + "learning_rate": 1.3027465384273579e-05, + "loss": 0.26919034123420715, + "step": 3255 + }, + { + "epoch": 0.864692603903864, + "grad_norm": 1.1206851183742237, + "learning_rate": 1.3023280380928223e-05, + "loss": 0.29495447874069214, + "step": 3256 + }, + { + "epoch": 0.864958172885407, + "grad_norm": 1.0428738517831404, + "learning_rate": 1.3019094794730063e-05, + "loss": 0.26766717433929443, + "step": 3257 + }, + { + "epoch": 0.86522374186695, + "grad_norm": 0.9998039586765358, + "learning_rate": 1.3014908626486032e-05, + "loss": 0.2573341131210327, + "step": 3258 + }, + { + "epoch": 0.8654893108484929, + "grad_norm": 1.226366277313196, + "learning_rate": 1.3010721877003177e-05, + "loss": 0.32776498794555664, + "step": 3259 + }, + { + "epoch": 0.8657548798300359, + "grad_norm": 1.1631189448763641, + "learning_rate": 1.3006534547088651e-05, + "loss": 0.3107950687408447, + "step": 3260 + }, + { + "epoch": 0.8660204488115788, + "grad_norm": 1.0476224109192296, + "learning_rate": 1.3002346637549726e-05, + "loss": 0.26143360137939453, + "step": 3261 + }, + { + "epoch": 0.8662860177931218, + "grad_norm": 1.035123297672666, + "learning_rate": 1.2998158149193773e-05, + "loss": 0.25666722655296326, + "step": 3262 + }, + { + "epoch": 0.8665515867746647, + "grad_norm": 1.1492097701405037, + "learning_rate": 1.2993969082828296e-05, + "loss": 0.2982695698738098, + "step": 3263 + }, + { + "epoch": 0.8668171557562077, + "grad_norm": 1.0937256102841277, + "learning_rate": 1.2989779439260888e-05, + "loss": 0.30144304037094116, + "step": 3264 + }, + { + "epoch": 0.8670827247377506, + "grad_norm": 1.0563159913050848, + "learning_rate": 1.2985589219299264e-05, + "loss": 0.30421534180641174, + "step": 3265 + }, + { + "epoch": 0.8673482937192936, + "grad_norm": 1.0698350081311019, + "learning_rate": 1.298139842375125e-05, + "loss": 0.23653842508792877, + "step": 3266 + }, + { + "epoch": 0.8676138627008365, + "grad_norm": 1.2059661362441823, + "learning_rate": 1.2977207053424781e-05, + "loss": 0.284118115901947, + "step": 3267 + }, + { + "epoch": 0.8678794316823795, + "grad_norm": 1.0387152548948486, + "learning_rate": 1.2973015109127907e-05, + "loss": 0.30857348442077637, + "step": 3268 + }, + { + "epoch": 0.8681450006639224, + "grad_norm": 1.0987728632322369, + "learning_rate": 1.2968822591668784e-05, + "loss": 0.2826589047908783, + "step": 3269 + }, + { + "epoch": 0.8684105696454654, + "grad_norm": 1.109218087764862, + "learning_rate": 1.2964629501855678e-05, + "loss": 0.27634552121162415, + "step": 3270 + }, + { + "epoch": 0.8686761386270083, + "grad_norm": 1.0217259699141916, + "learning_rate": 1.296043584049697e-05, + "loss": 0.25823545455932617, + "step": 3271 + }, + { + "epoch": 0.8689417076085513, + "grad_norm": 1.148249635090711, + "learning_rate": 1.2956241608401145e-05, + "loss": 0.28939294815063477, + "step": 3272 + }, + { + "epoch": 0.8692072765900942, + "grad_norm": 1.0622455952024017, + "learning_rate": 1.2952046806376806e-05, + "loss": 0.3042459785938263, + "step": 3273 + }, + { + "epoch": 0.8694728455716373, + "grad_norm": 1.042505415392428, + "learning_rate": 1.2947851435232658e-05, + "loss": 0.2834415137767792, + "step": 3274 + }, + { + "epoch": 0.8697384145531802, + "grad_norm": 1.144903021800522, + "learning_rate": 1.2943655495777518e-05, + "loss": 0.28226330876350403, + "step": 3275 + }, + { + "epoch": 0.8700039835347232, + "grad_norm": 1.023547316743189, + "learning_rate": 1.2939458988820317e-05, + "loss": 0.2796105742454529, + "step": 3276 + }, + { + "epoch": 0.8702695525162661, + "grad_norm": 0.9903193313068561, + "learning_rate": 1.2935261915170091e-05, + "loss": 0.24790553748607635, + "step": 3277 + }, + { + "epoch": 0.8705351214978091, + "grad_norm": 1.0279177898991045, + "learning_rate": 1.2931064275635987e-05, + "loss": 0.25101587176322937, + "step": 3278 + }, + { + "epoch": 0.870800690479352, + "grad_norm": 1.1728597267839225, + "learning_rate": 1.2926866071027257e-05, + "loss": 0.3060816526412964, + "step": 3279 + }, + { + "epoch": 0.871066259460895, + "grad_norm": 1.1510511467115991, + "learning_rate": 1.2922667302153268e-05, + "loss": 0.3137212097644806, + "step": 3280 + }, + { + "epoch": 0.871331828442438, + "grad_norm": 0.9977159840643061, + "learning_rate": 1.2918467969823497e-05, + "loss": 0.2391548752784729, + "step": 3281 + }, + { + "epoch": 0.8715973974239809, + "grad_norm": 1.2003880700717509, + "learning_rate": 1.2914268074847516e-05, + "loss": 0.3219330608844757, + "step": 3282 + }, + { + "epoch": 0.8718629664055239, + "grad_norm": 1.126134187698585, + "learning_rate": 1.2910067618035025e-05, + "loss": 0.2934436798095703, + "step": 3283 + }, + { + "epoch": 0.8721285353870668, + "grad_norm": 1.2016016844780073, + "learning_rate": 1.2905866600195815e-05, + "loss": 0.2919486165046692, + "step": 3284 + }, + { + "epoch": 0.8723941043686098, + "grad_norm": 1.1895929482131946, + "learning_rate": 1.2901665022139796e-05, + "loss": 0.2840641438961029, + "step": 3285 + }, + { + "epoch": 0.8726596733501527, + "grad_norm": 1.0215741253911979, + "learning_rate": 1.2897462884676983e-05, + "loss": 0.24151530861854553, + "step": 3286 + }, + { + "epoch": 0.8729252423316957, + "grad_norm": 1.0040194757671277, + "learning_rate": 1.28932601886175e-05, + "loss": 0.24515505135059357, + "step": 3287 + }, + { + "epoch": 0.8731908113132386, + "grad_norm": 1.2173512735867882, + "learning_rate": 1.2889056934771577e-05, + "loss": 0.2561264634132385, + "step": 3288 + }, + { + "epoch": 0.8734563802947816, + "grad_norm": 1.1645401251165897, + "learning_rate": 1.2884853123949547e-05, + "loss": 0.2798641622066498, + "step": 3289 + }, + { + "epoch": 0.8737219492763245, + "grad_norm": 1.2693161910394721, + "learning_rate": 1.288064875696186e-05, + "loss": 0.35207298398017883, + "step": 3290 + }, + { + "epoch": 0.8739875182578675, + "grad_norm": 1.0184365377421387, + "learning_rate": 1.2876443834619066e-05, + "loss": 0.2778821289539337, + "step": 3291 + }, + { + "epoch": 0.8742530872394104, + "grad_norm": 1.044209880952949, + "learning_rate": 1.2872238357731825e-05, + "loss": 0.2691737413406372, + "step": 3292 + }, + { + "epoch": 0.8745186562209534, + "grad_norm": 1.1392637940929287, + "learning_rate": 1.2868032327110904e-05, + "loss": 0.25476595759391785, + "step": 3293 + }, + { + "epoch": 0.8747842252024963, + "grad_norm": 1.012064080488804, + "learning_rate": 1.2863825743567174e-05, + "loss": 0.258474737405777, + "step": 3294 + }, + { + "epoch": 0.8750497941840393, + "grad_norm": 1.17733236715245, + "learning_rate": 1.285961860791162e-05, + "loss": 0.32421568036079407, + "step": 3295 + }, + { + "epoch": 0.8753153631655822, + "grad_norm": 1.0747747984737868, + "learning_rate": 1.2855410920955323e-05, + "loss": 0.3090333342552185, + "step": 3296 + }, + { + "epoch": 0.8755809321471252, + "grad_norm": 1.1729934635240566, + "learning_rate": 1.2851202683509476e-05, + "loss": 0.26548707485198975, + "step": 3297 + }, + { + "epoch": 0.8758465011286681, + "grad_norm": 2.497627852681845, + "learning_rate": 1.2846993896385378e-05, + "loss": 0.3002355098724365, + "step": 3298 + }, + { + "epoch": 0.8761120701102111, + "grad_norm": 1.1706582997439863, + "learning_rate": 1.2842784560394433e-05, + "loss": 0.2924933135509491, + "step": 3299 + }, + { + "epoch": 0.876377639091754, + "grad_norm": 1.1544391256229967, + "learning_rate": 1.2838574676348155e-05, + "loss": 0.2886514663696289, + "step": 3300 + }, + { + "epoch": 0.876643208073297, + "grad_norm": 1.1131138367993383, + "learning_rate": 1.2834364245058155e-05, + "loss": 0.29821154475212097, + "step": 3301 + }, + { + "epoch": 0.87690877705484, + "grad_norm": 1.0278540671542709, + "learning_rate": 1.2830153267336159e-05, + "loss": 0.2656530737876892, + "step": 3302 + }, + { + "epoch": 0.877174346036383, + "grad_norm": 1.2018449655833119, + "learning_rate": 1.282594174399399e-05, + "loss": 0.3437826633453369, + "step": 3303 + }, + { + "epoch": 0.877439915017926, + "grad_norm": 1.0564301800372577, + "learning_rate": 1.2821729675843581e-05, + "loss": 0.29773175716400146, + "step": 3304 + }, + { + "epoch": 0.8777054839994689, + "grad_norm": 1.0707167209814024, + "learning_rate": 1.2817517063696973e-05, + "loss": 0.29772818088531494, + "step": 3305 + }, + { + "epoch": 0.8779710529810119, + "grad_norm": 1.1530012432828134, + "learning_rate": 1.2813303908366303e-05, + "loss": 0.3266611099243164, + "step": 3306 + }, + { + "epoch": 0.8782366219625548, + "grad_norm": 1.0044541774243023, + "learning_rate": 1.2809090210663818e-05, + "loss": 0.26599690318107605, + "step": 3307 + }, + { + "epoch": 0.8785021909440978, + "grad_norm": 1.0142651525790767, + "learning_rate": 1.2804875971401872e-05, + "loss": 0.27988117933273315, + "step": 3308 + }, + { + "epoch": 0.8787677599256407, + "grad_norm": 1.0221522532224918, + "learning_rate": 1.2800661191392916e-05, + "loss": 0.2630334496498108, + "step": 3309 + }, + { + "epoch": 0.8790333289071837, + "grad_norm": 1.022950247187023, + "learning_rate": 1.2796445871449517e-05, + "loss": 0.2628091871738434, + "step": 3310 + }, + { + "epoch": 0.8792988978887266, + "grad_norm": 1.1994310454875075, + "learning_rate": 1.2792230012384333e-05, + "loss": 0.3443898558616638, + "step": 3311 + }, + { + "epoch": 0.8795644668702696, + "grad_norm": 1.0673533832636588, + "learning_rate": 1.2788013615010136e-05, + "loss": 0.2966022491455078, + "step": 3312 + }, + { + "epoch": 0.8798300358518125, + "grad_norm": 1.1030087744198647, + "learning_rate": 1.2783796680139793e-05, + "loss": 0.2995494604110718, + "step": 3313 + }, + { + "epoch": 0.8800956048333555, + "grad_norm": 1.0504434000468303, + "learning_rate": 1.2779579208586283e-05, + "loss": 0.2652590870857239, + "step": 3314 + }, + { + "epoch": 0.8803611738148984, + "grad_norm": 1.1388460976467547, + "learning_rate": 1.2775361201162684e-05, + "loss": 0.3145690858364105, + "step": 3315 + }, + { + "epoch": 0.8806267427964414, + "grad_norm": 1.040210802651612, + "learning_rate": 1.2771142658682175e-05, + "loss": 0.25744086503982544, + "step": 3316 + }, + { + "epoch": 0.8808923117779843, + "grad_norm": 1.1618029117732733, + "learning_rate": 1.2766923581958046e-05, + "loss": 0.3129793405532837, + "step": 3317 + }, + { + "epoch": 0.8811578807595273, + "grad_norm": 1.166975234876197, + "learning_rate": 1.2762703971803684e-05, + "loss": 0.233384907245636, + "step": 3318 + }, + { + "epoch": 0.8814234497410702, + "grad_norm": 0.9242808009438505, + "learning_rate": 1.2758483829032579e-05, + "loss": 0.2422962635755539, + "step": 3319 + }, + { + "epoch": 0.8816890187226132, + "grad_norm": 1.0844595421589949, + "learning_rate": 1.2754263154458328e-05, + "loss": 0.2801973819732666, + "step": 3320 + }, + { + "epoch": 0.8819545877041561, + "grad_norm": 1.294346594070355, + "learning_rate": 1.2750041948894621e-05, + "loss": 0.30659937858581543, + "step": 3321 + }, + { + "epoch": 0.8822201566856991, + "grad_norm": 1.0921019252616484, + "learning_rate": 1.274582021315526e-05, + "loss": 0.28527066111564636, + "step": 3322 + }, + { + "epoch": 0.882485725667242, + "grad_norm": 1.0598264473011552, + "learning_rate": 1.2741597948054146e-05, + "loss": 0.23065675795078278, + "step": 3323 + }, + { + "epoch": 0.882751294648785, + "grad_norm": 1.0918730747592962, + "learning_rate": 1.2737375154405283e-05, + "loss": 0.2727832794189453, + "step": 3324 + }, + { + "epoch": 0.8830168636303279, + "grad_norm": 1.0789259788038712, + "learning_rate": 1.273315183302277e-05, + "loss": 0.26809507608413696, + "step": 3325 + }, + { + "epoch": 0.8832824326118709, + "grad_norm": 1.1647625824499415, + "learning_rate": 1.2728927984720823e-05, + "loss": 0.3250407576560974, + "step": 3326 + }, + { + "epoch": 0.8835480015934138, + "grad_norm": 1.0915300736309757, + "learning_rate": 1.2724703610313742e-05, + "loss": 0.2651330232620239, + "step": 3327 + }, + { + "epoch": 0.8838135705749568, + "grad_norm": 1.206298710080754, + "learning_rate": 1.2720478710615944e-05, + "loss": 0.27337920665740967, + "step": 3328 + }, + { + "epoch": 0.8840791395564997, + "grad_norm": 1.0282478968996285, + "learning_rate": 1.2716253286441935e-05, + "loss": 0.2664092183113098, + "step": 3329 + }, + { + "epoch": 0.8843447085380428, + "grad_norm": 1.1354570950284573, + "learning_rate": 1.2712027338606323e-05, + "loss": 0.27927765250205994, + "step": 3330 + }, + { + "epoch": 0.8846102775195858, + "grad_norm": 1.1204979208217445, + "learning_rate": 1.270780086792383e-05, + "loss": 0.27241113781929016, + "step": 3331 + }, + { + "epoch": 0.8848758465011287, + "grad_norm": 1.0795162414965664, + "learning_rate": 1.2703573875209264e-05, + "loss": 0.28279373049736023, + "step": 3332 + }, + { + "epoch": 0.8851414154826717, + "grad_norm": 1.1634487658284207, + "learning_rate": 1.2699346361277538e-05, + "loss": 0.3011108934879303, + "step": 3333 + }, + { + "epoch": 0.8854069844642146, + "grad_norm": 2.772716513531517, + "learning_rate": 1.2695118326943671e-05, + "loss": 0.3071288764476776, + "step": 3334 + }, + { + "epoch": 0.8856725534457576, + "grad_norm": 1.0969950934626527, + "learning_rate": 1.2690889773022778e-05, + "loss": 0.2688761353492737, + "step": 3335 + }, + { + "epoch": 0.8859381224273005, + "grad_norm": 1.1363327585955358, + "learning_rate": 1.2686660700330074e-05, + "loss": 0.2788669466972351, + "step": 3336 + }, + { + "epoch": 0.8862036914088435, + "grad_norm": 1.0884694079711634, + "learning_rate": 1.268243110968087e-05, + "loss": 0.2801516652107239, + "step": 3337 + }, + { + "epoch": 0.8864692603903864, + "grad_norm": 1.0414904749451368, + "learning_rate": 1.2678201001890587e-05, + "loss": 0.2876908779144287, + "step": 3338 + }, + { + "epoch": 0.8867348293719294, + "grad_norm": 1.1731879069090343, + "learning_rate": 1.2673970377774733e-05, + "loss": 0.27709734439849854, + "step": 3339 + }, + { + "epoch": 0.8870003983534723, + "grad_norm": 1.2053408848372587, + "learning_rate": 1.266973923814893e-05, + "loss": 0.3191622793674469, + "step": 3340 + }, + { + "epoch": 0.8872659673350153, + "grad_norm": 1.098682297791164, + "learning_rate": 1.2665507583828889e-05, + "loss": 0.2873385548591614, + "step": 3341 + }, + { + "epoch": 0.8875315363165582, + "grad_norm": 1.1730973936717166, + "learning_rate": 1.2661275415630421e-05, + "loss": 0.2922922372817993, + "step": 3342 + }, + { + "epoch": 0.8877971052981012, + "grad_norm": 1.1127017834272521, + "learning_rate": 1.2657042734369443e-05, + "loss": 0.305694043636322, + "step": 3343 + }, + { + "epoch": 0.8880626742796441, + "grad_norm": 1.120364019457983, + "learning_rate": 1.2652809540861958e-05, + "loss": 0.29108062386512756, + "step": 3344 + }, + { + "epoch": 0.8883282432611871, + "grad_norm": 1.076655765525218, + "learning_rate": 1.2648575835924084e-05, + "loss": 0.24170495569705963, + "step": 3345 + }, + { + "epoch": 0.88859381224273, + "grad_norm": 1.4853370236272063, + "learning_rate": 1.2644341620372025e-05, + "loss": 0.2987719476222992, + "step": 3346 + }, + { + "epoch": 0.888859381224273, + "grad_norm": 0.9743774864126274, + "learning_rate": 1.2640106895022088e-05, + "loss": 0.21037599444389343, + "step": 3347 + }, + { + "epoch": 0.889124950205816, + "grad_norm": 1.034527053965976, + "learning_rate": 1.2635871660690677e-05, + "loss": 0.25263655185699463, + "step": 3348 + }, + { + "epoch": 0.8893905191873589, + "grad_norm": 1.2196740502064325, + "learning_rate": 1.2631635918194301e-05, + "loss": 0.30169543623924255, + "step": 3349 + }, + { + "epoch": 0.8896560881689018, + "grad_norm": 1.0624381650731511, + "learning_rate": 1.2627399668349554e-05, + "loss": 0.26982420682907104, + "step": 3350 + }, + { + "epoch": 0.8899216571504448, + "grad_norm": 1.1785068724165282, + "learning_rate": 1.262316291197314e-05, + "loss": 0.3281899690628052, + "step": 3351 + }, + { + "epoch": 0.8901872261319878, + "grad_norm": 1.1157278400935415, + "learning_rate": 1.2618925649881852e-05, + "loss": 0.30140435695648193, + "step": 3352 + }, + { + "epoch": 0.8904527951135307, + "grad_norm": 0.9928732296573972, + "learning_rate": 1.261468788289259e-05, + "loss": 0.22343885898590088, + "step": 3353 + }, + { + "epoch": 0.8907183640950737, + "grad_norm": 1.0410264886026745, + "learning_rate": 1.261044961182234e-05, + "loss": 0.2889901399612427, + "step": 3354 + }, + { + "epoch": 0.8909839330766166, + "grad_norm": 1.0933214790144683, + "learning_rate": 1.260621083748819e-05, + "loss": 0.27896153926849365, + "step": 3355 + }, + { + "epoch": 0.8912495020581596, + "grad_norm": 1.077111437166839, + "learning_rate": 1.2601971560707328e-05, + "loss": 0.29390811920166016, + "step": 3356 + }, + { + "epoch": 0.8915150710397025, + "grad_norm": 1.0468332572471015, + "learning_rate": 1.2597731782297036e-05, + "loss": 0.2872384190559387, + "step": 3357 + }, + { + "epoch": 0.8917806400212455, + "grad_norm": 1.3094137802442116, + "learning_rate": 1.2593491503074698e-05, + "loss": 0.29753726720809937, + "step": 3358 + }, + { + "epoch": 0.8920462090027885, + "grad_norm": 1.1441306843080605, + "learning_rate": 1.2589250723857782e-05, + "loss": 0.31631946563720703, + "step": 3359 + }, + { + "epoch": 0.8923117779843315, + "grad_norm": 1.1374138683367387, + "learning_rate": 1.2585009445463867e-05, + "loss": 0.2932048738002777, + "step": 3360 + }, + { + "epoch": 0.8925773469658744, + "grad_norm": 1.0483655110874528, + "learning_rate": 1.2580767668710614e-05, + "loss": 0.2902034521102905, + "step": 3361 + }, + { + "epoch": 0.8928429159474174, + "grad_norm": 1.0712531988705474, + "learning_rate": 1.2576525394415795e-05, + "loss": 0.2596299648284912, + "step": 3362 + }, + { + "epoch": 0.8931084849289603, + "grad_norm": 1.1916540375753872, + "learning_rate": 1.2572282623397268e-05, + "loss": 0.29102641344070435, + "step": 3363 + }, + { + "epoch": 0.8933740539105033, + "grad_norm": 1.236954620143465, + "learning_rate": 1.2568039356472985e-05, + "loss": 0.2970406711101532, + "step": 3364 + }, + { + "epoch": 0.8936396228920462, + "grad_norm": 1.1384210267422126, + "learning_rate": 1.2563795594461003e-05, + "loss": 0.2916618585586548, + "step": 3365 + }, + { + "epoch": 0.8939051918735892, + "grad_norm": 1.1769911575713834, + "learning_rate": 1.2559551338179468e-05, + "loss": 0.3217374086380005, + "step": 3366 + }, + { + "epoch": 0.8941707608551321, + "grad_norm": 1.1228623922561494, + "learning_rate": 1.255530658844662e-05, + "loss": 0.3000059425830841, + "step": 3367 + }, + { + "epoch": 0.8944363298366751, + "grad_norm": 1.2170346898517979, + "learning_rate": 1.2551061346080804e-05, + "loss": 0.2848728895187378, + "step": 3368 + }, + { + "epoch": 0.894701898818218, + "grad_norm": 1.3197542136745113, + "learning_rate": 1.2546815611900442e-05, + "loss": 0.3328903317451477, + "step": 3369 + }, + { + "epoch": 0.894967467799761, + "grad_norm": 1.0838958961687528, + "learning_rate": 1.2542569386724069e-05, + "loss": 0.2920045256614685, + "step": 3370 + }, + { + "epoch": 0.895233036781304, + "grad_norm": 1.0679716869166582, + "learning_rate": 1.2538322671370305e-05, + "loss": 0.30370092391967773, + "step": 3371 + }, + { + "epoch": 0.8954986057628469, + "grad_norm": 1.069215534600395, + "learning_rate": 1.2534075466657866e-05, + "loss": 0.24454624950885773, + "step": 3372 + }, + { + "epoch": 0.8957641747443899, + "grad_norm": 1.172481734803523, + "learning_rate": 1.2529827773405566e-05, + "loss": 0.30908581614494324, + "step": 3373 + }, + { + "epoch": 0.8960297437259328, + "grad_norm": 1.1095939186212227, + "learning_rate": 1.2525579592432304e-05, + "loss": 0.2792360782623291, + "step": 3374 + }, + { + "epoch": 0.8962953127074758, + "grad_norm": 1.0658472517819026, + "learning_rate": 1.2521330924557087e-05, + "loss": 0.285555362701416, + "step": 3375 + }, + { + "epoch": 0.8965608816890187, + "grad_norm": 1.1649386203925687, + "learning_rate": 1.2517081770599002e-05, + "loss": 0.3159451484680176, + "step": 3376 + }, + { + "epoch": 0.8968264506705617, + "grad_norm": 1.2867424735092035, + "learning_rate": 1.2512832131377237e-05, + "loss": 0.35929200053215027, + "step": 3377 + }, + { + "epoch": 0.8970920196521046, + "grad_norm": 1.0781651079446009, + "learning_rate": 1.2508582007711074e-05, + "loss": 0.28624874353408813, + "step": 3378 + }, + { + "epoch": 0.8973575886336476, + "grad_norm": 1.0156684050998903, + "learning_rate": 1.2504331400419884e-05, + "loss": 0.27670109272003174, + "step": 3379 + }, + { + "epoch": 0.8976231576151905, + "grad_norm": 1.0786636895703534, + "learning_rate": 1.2500080310323139e-05, + "loss": 0.2894589304924011, + "step": 3380 + }, + { + "epoch": 0.8978887265967335, + "grad_norm": 1.1385795160382524, + "learning_rate": 1.2495828738240396e-05, + "loss": 0.31378716230392456, + "step": 3381 + }, + { + "epoch": 0.8981542955782764, + "grad_norm": 1.3149597134232174, + "learning_rate": 1.2491576684991306e-05, + "loss": 0.33676713705062866, + "step": 3382 + }, + { + "epoch": 0.8984198645598194, + "grad_norm": 0.9814689350619926, + "learning_rate": 1.2487324151395618e-05, + "loss": 0.2875351011753082, + "step": 3383 + }, + { + "epoch": 0.8986854335413623, + "grad_norm": 1.1646557221945626, + "learning_rate": 1.2483071138273168e-05, + "loss": 0.29729989171028137, + "step": 3384 + }, + { + "epoch": 0.8989510025229053, + "grad_norm": 1.0864970585536224, + "learning_rate": 1.2478817646443888e-05, + "loss": 0.3227398991584778, + "step": 3385 + }, + { + "epoch": 0.8992165715044482, + "grad_norm": 1.1586445900518523, + "learning_rate": 1.2474563676727803e-05, + "loss": 0.2664690315723419, + "step": 3386 + }, + { + "epoch": 0.8994821404859913, + "grad_norm": 1.1748792923054732, + "learning_rate": 1.2470309229945021e-05, + "loss": 0.29543352127075195, + "step": 3387 + }, + { + "epoch": 0.8997477094675342, + "grad_norm": 0.9899792334789409, + "learning_rate": 1.2466054306915756e-05, + "loss": 0.26658856868743896, + "step": 3388 + }, + { + "epoch": 0.9000132784490772, + "grad_norm": 1.123207894421506, + "learning_rate": 1.2461798908460305e-05, + "loss": 0.2899627387523651, + "step": 3389 + }, + { + "epoch": 0.9002788474306201, + "grad_norm": 1.1137567335053833, + "learning_rate": 1.245754303539906e-05, + "loss": 0.2708336114883423, + "step": 3390 + }, + { + "epoch": 0.9005444164121631, + "grad_norm": 1.1459655330577214, + "learning_rate": 1.2453286688552502e-05, + "loss": 0.28124746680259705, + "step": 3391 + }, + { + "epoch": 0.900809985393706, + "grad_norm": 1.0470005335558448, + "learning_rate": 1.2449029868741202e-05, + "loss": 0.2599399983882904, + "step": 3392 + }, + { + "epoch": 0.901075554375249, + "grad_norm": 0.9576026734877732, + "learning_rate": 1.2444772576785828e-05, + "loss": 0.25035667419433594, + "step": 3393 + }, + { + "epoch": 0.901341123356792, + "grad_norm": 1.1148471766082222, + "learning_rate": 1.2440514813507136e-05, + "loss": 0.2772521376609802, + "step": 3394 + }, + { + "epoch": 0.9016066923383349, + "grad_norm": 1.103787889433512, + "learning_rate": 1.2436256579725969e-05, + "loss": 0.3282839357852936, + "step": 3395 + }, + { + "epoch": 0.9018722613198779, + "grad_norm": 1.080988888326222, + "learning_rate": 1.2431997876263269e-05, + "loss": 0.2507914900779724, + "step": 3396 + }, + { + "epoch": 0.9021378303014208, + "grad_norm": 1.1123927965933749, + "learning_rate": 1.2427738703940055e-05, + "loss": 0.2620914876461029, + "step": 3397 + }, + { + "epoch": 0.9024033992829638, + "grad_norm": 1.0713438905056172, + "learning_rate": 1.2423479063577458e-05, + "loss": 0.26561641693115234, + "step": 3398 + }, + { + "epoch": 0.9026689682645067, + "grad_norm": 1.151582271756571, + "learning_rate": 1.2419218955996677e-05, + "loss": 0.2998678386211395, + "step": 3399 + }, + { + "epoch": 0.9029345372460497, + "grad_norm": 1.0484454707225395, + "learning_rate": 1.2414958382019017e-05, + "loss": 0.2368398755788803, + "step": 3400 + }, + { + "epoch": 0.9032001062275926, + "grad_norm": 1.0429929570241405, + "learning_rate": 1.241069734246586e-05, + "loss": 0.2623558044433594, + "step": 3401 + }, + { + "epoch": 0.9034656752091356, + "grad_norm": 1.0283944167565489, + "learning_rate": 1.2406435838158686e-05, + "loss": 0.2693074941635132, + "step": 3402 + }, + { + "epoch": 0.9037312441906785, + "grad_norm": 1.1211950634171715, + "learning_rate": 1.2402173869919063e-05, + "loss": 0.2933652698993683, + "step": 3403 + }, + { + "epoch": 0.9039968131722215, + "grad_norm": 1.0858313001207585, + "learning_rate": 1.2397911438568651e-05, + "loss": 0.28515487909317017, + "step": 3404 + }, + { + "epoch": 0.9042623821537644, + "grad_norm": 1.1243916508543286, + "learning_rate": 1.2393648544929193e-05, + "loss": 0.282942533493042, + "step": 3405 + }, + { + "epoch": 0.9045279511353074, + "grad_norm": 1.112018853789466, + "learning_rate": 1.2389385189822526e-05, + "loss": 0.28300392627716064, + "step": 3406 + }, + { + "epoch": 0.9047935201168503, + "grad_norm": 1.0490322847853841, + "learning_rate": 1.2385121374070577e-05, + "loss": 0.25697019696235657, + "step": 3407 + }, + { + "epoch": 0.9050590890983933, + "grad_norm": 1.15038978087342, + "learning_rate": 1.2380857098495355e-05, + "loss": 0.31156057119369507, + "step": 3408 + }, + { + "epoch": 0.9053246580799362, + "grad_norm": 1.1544066045654053, + "learning_rate": 1.2376592363918967e-05, + "loss": 0.2943422794342041, + "step": 3409 + }, + { + "epoch": 0.9055902270614792, + "grad_norm": 0.9968457114080438, + "learning_rate": 1.2372327171163596e-05, + "loss": 0.2792074680328369, + "step": 3410 + }, + { + "epoch": 0.9058557960430221, + "grad_norm": 1.0328662447203703, + "learning_rate": 1.2368061521051526e-05, + "loss": 0.2547443211078644, + "step": 3411 + }, + { + "epoch": 0.9061213650245651, + "grad_norm": 1.068901181257851, + "learning_rate": 1.2363795414405125e-05, + "loss": 0.25637373328208923, + "step": 3412 + }, + { + "epoch": 0.906386934006108, + "grad_norm": 1.1660475318941728, + "learning_rate": 1.2359528852046844e-05, + "loss": 0.3269123435020447, + "step": 3413 + }, + { + "epoch": 0.906652502987651, + "grad_norm": 1.0197427295072394, + "learning_rate": 1.2355261834799232e-05, + "loss": 0.28538423776626587, + "step": 3414 + }, + { + "epoch": 0.906918071969194, + "grad_norm": 1.1343354993973966, + "learning_rate": 1.2350994363484915e-05, + "loss": 0.2961096167564392, + "step": 3415 + }, + { + "epoch": 0.907183640950737, + "grad_norm": 1.0930595123597455, + "learning_rate": 1.2346726438926613e-05, + "loss": 0.3134537935256958, + "step": 3416 + }, + { + "epoch": 0.90744920993228, + "grad_norm": 1.018679268761631, + "learning_rate": 1.2342458061947129e-05, + "loss": 0.2614031434059143, + "step": 3417 + }, + { + "epoch": 0.9077147789138229, + "grad_norm": 1.0403373381004117, + "learning_rate": 1.2338189233369357e-05, + "loss": 0.27166056632995605, + "step": 3418 + }, + { + "epoch": 0.9079803478953659, + "grad_norm": 1.0735839504787106, + "learning_rate": 1.2333919954016277e-05, + "loss": 0.26053497195243835, + "step": 3419 + }, + { + "epoch": 0.9082459168769088, + "grad_norm": 1.1112591016079632, + "learning_rate": 1.2329650224710956e-05, + "loss": 0.3109636902809143, + "step": 3420 + }, + { + "epoch": 0.9085114858584518, + "grad_norm": 1.081828404421451, + "learning_rate": 1.232538004627655e-05, + "loss": 0.2576507329940796, + "step": 3421 + }, + { + "epoch": 0.9087770548399947, + "grad_norm": 1.0981308884589311, + "learning_rate": 1.2321109419536292e-05, + "loss": 0.2525216341018677, + "step": 3422 + }, + { + "epoch": 0.9090426238215377, + "grad_norm": 1.0732531844020532, + "learning_rate": 1.2316838345313517e-05, + "loss": 0.2483336180448532, + "step": 3423 + }, + { + "epoch": 0.9093081928030806, + "grad_norm": 1.1592146270526706, + "learning_rate": 1.2312566824431631e-05, + "loss": 0.26372796297073364, + "step": 3424 + }, + { + "epoch": 0.9095737617846236, + "grad_norm": 1.1537675520237485, + "learning_rate": 1.2308294857714138e-05, + "loss": 0.2933644950389862, + "step": 3425 + }, + { + "epoch": 0.9098393307661665, + "grad_norm": 1.0330883162146767, + "learning_rate": 1.2304022445984618e-05, + "loss": 0.2543371915817261, + "step": 3426 + }, + { + "epoch": 0.9101048997477095, + "grad_norm": 1.1689002717846686, + "learning_rate": 1.2299749590066745e-05, + "loss": 0.29246431589126587, + "step": 3427 + }, + { + "epoch": 0.9103704687292524, + "grad_norm": 1.0141798843769114, + "learning_rate": 1.2295476290784273e-05, + "loss": 0.2475431263446808, + "step": 3428 + }, + { + "epoch": 0.9106360377107954, + "grad_norm": 1.1845034794986053, + "learning_rate": 1.2291202548961042e-05, + "loss": 0.3312363624572754, + "step": 3429 + }, + { + "epoch": 0.9109016066923383, + "grad_norm": 1.0459618447051044, + "learning_rate": 1.2286928365420987e-05, + "loss": 0.25192639231681824, + "step": 3430 + }, + { + "epoch": 0.9111671756738813, + "grad_norm": 1.2038671566275931, + "learning_rate": 1.2282653740988114e-05, + "loss": 0.23189345002174377, + "step": 3431 + }, + { + "epoch": 0.9114327446554242, + "grad_norm": 1.17767221221897, + "learning_rate": 1.2278378676486522e-05, + "loss": 0.2888398766517639, + "step": 3432 + }, + { + "epoch": 0.9116983136369672, + "grad_norm": 1.1295595703903276, + "learning_rate": 1.2274103172740387e-05, + "loss": 0.2857785224914551, + "step": 3433 + }, + { + "epoch": 0.9119638826185101, + "grad_norm": 1.039533312390003, + "learning_rate": 1.2269827230573986e-05, + "loss": 0.23961025476455688, + "step": 3434 + }, + { + "epoch": 0.9122294516000531, + "grad_norm": 1.1192521835175562, + "learning_rate": 1.2265550850811663e-05, + "loss": 0.2791004478931427, + "step": 3435 + }, + { + "epoch": 0.912495020581596, + "grad_norm": 1.052040685054951, + "learning_rate": 1.2261274034277858e-05, + "loss": 0.2875480651855469, + "step": 3436 + }, + { + "epoch": 0.912760589563139, + "grad_norm": 1.12188070500717, + "learning_rate": 1.2256996781797086e-05, + "loss": 0.29422929883003235, + "step": 3437 + }, + { + "epoch": 0.9130261585446819, + "grad_norm": 1.2976046274469295, + "learning_rate": 1.225271909419395e-05, + "loss": 0.27114444971084595, + "step": 3438 + }, + { + "epoch": 0.9132917275262249, + "grad_norm": 1.0684416452719028, + "learning_rate": 1.2248440972293146e-05, + "loss": 0.3007166385650635, + "step": 3439 + }, + { + "epoch": 0.9135572965077678, + "grad_norm": 1.1408150577224654, + "learning_rate": 1.224416241691944e-05, + "loss": 0.28550055623054504, + "step": 3440 + }, + { + "epoch": 0.9138228654893108, + "grad_norm": 1.1159473328967766, + "learning_rate": 1.2239883428897687e-05, + "loss": 0.2861761450767517, + "step": 3441 + }, + { + "epoch": 0.9140884344708538, + "grad_norm": 1.1186358936011263, + "learning_rate": 1.2235604009052823e-05, + "loss": 0.3288506865501404, + "step": 3442 + }, + { + "epoch": 0.9143540034523968, + "grad_norm": 1.2101661293343442, + "learning_rate": 1.2231324158209876e-05, + "loss": 0.33189019560813904, + "step": 3443 + }, + { + "epoch": 0.9146195724339398, + "grad_norm": 0.9931883995236199, + "learning_rate": 1.2227043877193947e-05, + "loss": 0.20846885442733765, + "step": 3444 + }, + { + "epoch": 0.9148851414154827, + "grad_norm": 0.9579263575635046, + "learning_rate": 1.2222763166830223e-05, + "loss": 0.25184741616249084, + "step": 3445 + }, + { + "epoch": 0.9151507103970257, + "grad_norm": 1.0775642304955, + "learning_rate": 1.2218482027943977e-05, + "loss": 0.2954701781272888, + "step": 3446 + }, + { + "epoch": 0.9154162793785686, + "grad_norm": 1.055908963813806, + "learning_rate": 1.221420046136056e-05, + "loss": 0.263336718082428, + "step": 3447 + }, + { + "epoch": 0.9156818483601116, + "grad_norm": 1.2181481624195412, + "learning_rate": 1.2209918467905405e-05, + "loss": 0.31178128719329834, + "step": 3448 + }, + { + "epoch": 0.9159474173416545, + "grad_norm": 1.1248939907914326, + "learning_rate": 1.2205636048404037e-05, + "loss": 0.30373090505599976, + "step": 3449 + }, + { + "epoch": 0.9162129863231975, + "grad_norm": 1.1316476755108689, + "learning_rate": 1.2201353203682052e-05, + "loss": 0.31057459115982056, + "step": 3450 + }, + { + "epoch": 0.9164785553047404, + "grad_norm": 1.0432699213656527, + "learning_rate": 1.2197069934565126e-05, + "loss": 0.26834744215011597, + "step": 3451 + }, + { + "epoch": 0.9167441242862834, + "grad_norm": 1.0235490532622333, + "learning_rate": 1.2192786241879033e-05, + "loss": 0.30224066972732544, + "step": 3452 + }, + { + "epoch": 0.9170096932678263, + "grad_norm": 1.1136690118430506, + "learning_rate": 1.2188502126449616e-05, + "loss": 0.28249508142471313, + "step": 3453 + }, + { + "epoch": 0.9172752622493693, + "grad_norm": 1.0210144972314754, + "learning_rate": 1.2184217589102798e-05, + "loss": 0.24823793768882751, + "step": 3454 + }, + { + "epoch": 0.9175408312309122, + "grad_norm": 1.1878687209379464, + "learning_rate": 1.2179932630664589e-05, + "loss": 0.32556289434432983, + "step": 3455 + }, + { + "epoch": 0.9178064002124552, + "grad_norm": 1.0899520670240972, + "learning_rate": 1.217564725196108e-05, + "loss": 0.29420584440231323, + "step": 3456 + }, + { + "epoch": 0.9180719691939981, + "grad_norm": 1.028247015068141, + "learning_rate": 1.2171361453818437e-05, + "loss": 0.29294469952583313, + "step": 3457 + }, + { + "epoch": 0.9183375381755411, + "grad_norm": 1.0399893903415627, + "learning_rate": 1.2167075237062918e-05, + "loss": 0.3173823952674866, + "step": 3458 + }, + { + "epoch": 0.918603107157084, + "grad_norm": 1.1571492956528482, + "learning_rate": 1.2162788602520851e-05, + "loss": 0.32950159907341003, + "step": 3459 + }, + { + "epoch": 0.918868676138627, + "grad_norm": 1.0478118037587627, + "learning_rate": 1.2158501551018647e-05, + "loss": 0.3011544942855835, + "step": 3460 + }, + { + "epoch": 0.91913424512017, + "grad_norm": 1.0135067760604335, + "learning_rate": 1.2154214083382802e-05, + "loss": 0.25775954127311707, + "step": 3461 + }, + { + "epoch": 0.9193998141017129, + "grad_norm": 1.0514508898774713, + "learning_rate": 1.214992620043989e-05, + "loss": 0.286748468875885, + "step": 3462 + }, + { + "epoch": 0.9196653830832558, + "grad_norm": 1.1050004366949897, + "learning_rate": 1.214563790301656e-05, + "loss": 0.30588221549987793, + "step": 3463 + }, + { + "epoch": 0.9199309520647988, + "grad_norm": 1.0079666808538812, + "learning_rate": 1.214134919193955e-05, + "loss": 0.23506608605384827, + "step": 3464 + }, + { + "epoch": 0.9201965210463418, + "grad_norm": 1.037364536446331, + "learning_rate": 1.2137060068035672e-05, + "loss": 0.2612350285053253, + "step": 3465 + }, + { + "epoch": 0.9204620900278847, + "grad_norm": 1.0810309706979688, + "learning_rate": 1.2132770532131815e-05, + "loss": 0.3268318772315979, + "step": 3466 + }, + { + "epoch": 0.9207276590094277, + "grad_norm": 1.0723394192428657, + "learning_rate": 1.2128480585054951e-05, + "loss": 0.2970179319381714, + "step": 3467 + }, + { + "epoch": 0.9209932279909706, + "grad_norm": 1.0036147426745694, + "learning_rate": 1.2124190227632138e-05, + "loss": 0.2910206615924835, + "step": 3468 + }, + { + "epoch": 0.9212587969725136, + "grad_norm": 1.1089890742219906, + "learning_rate": 1.2119899460690496e-05, + "loss": 0.3000222444534302, + "step": 3469 + }, + { + "epoch": 0.9215243659540565, + "grad_norm": 1.1166450826016983, + "learning_rate": 1.2115608285057242e-05, + "loss": 0.30304765701293945, + "step": 3470 + }, + { + "epoch": 0.9217899349355996, + "grad_norm": 0.9893826238823328, + "learning_rate": 1.2111316701559663e-05, + "loss": 0.26393038034439087, + "step": 3471 + }, + { + "epoch": 0.9220555039171425, + "grad_norm": 1.1384217438340345, + "learning_rate": 1.2107024711025128e-05, + "loss": 0.3111063838005066, + "step": 3472 + }, + { + "epoch": 0.9223210728986855, + "grad_norm": 0.9599961450252364, + "learning_rate": 1.2102732314281073e-05, + "loss": 0.2897321581840515, + "step": 3473 + }, + { + "epoch": 0.9225866418802284, + "grad_norm": 1.1396280258666305, + "learning_rate": 1.2098439512155028e-05, + "loss": 0.2835896611213684, + "step": 3474 + }, + { + "epoch": 0.9228522108617714, + "grad_norm": 1.0165194494005183, + "learning_rate": 1.2094146305474596e-05, + "loss": 0.27648821473121643, + "step": 3475 + }, + { + "epoch": 0.9231177798433143, + "grad_norm": 1.1221504506656363, + "learning_rate": 1.2089852695067457e-05, + "loss": 0.2528097629547119, + "step": 3476 + }, + { + "epoch": 0.9233833488248573, + "grad_norm": 1.1105562286202324, + "learning_rate": 1.2085558681761361e-05, + "loss": 0.2750067412853241, + "step": 3477 + }, + { + "epoch": 0.9236489178064002, + "grad_norm": 1.1199967050670125, + "learning_rate": 1.2081264266384148e-05, + "loss": 0.3115938901901245, + "step": 3478 + }, + { + "epoch": 0.9239144867879432, + "grad_norm": 1.1203071431737686, + "learning_rate": 1.2076969449763734e-05, + "loss": 0.2858419418334961, + "step": 3479 + }, + { + "epoch": 0.9241800557694861, + "grad_norm": 1.051118385350032, + "learning_rate": 1.2072674232728105e-05, + "loss": 0.24990032613277435, + "step": 3480 + }, + { + "epoch": 0.9244456247510291, + "grad_norm": 1.2991104394876676, + "learning_rate": 1.206837861610533e-05, + "loss": 0.23106999695301056, + "step": 3481 + }, + { + "epoch": 0.924711193732572, + "grad_norm": 1.0396779513824141, + "learning_rate": 1.2064082600723546e-05, + "loss": 0.2737967371940613, + "step": 3482 + }, + { + "epoch": 0.924976762714115, + "grad_norm": 1.1890061925781694, + "learning_rate": 1.2059786187410984e-05, + "loss": 0.2810317873954773, + "step": 3483 + }, + { + "epoch": 0.925242331695658, + "grad_norm": 1.1358698893490913, + "learning_rate": 1.2055489376995938e-05, + "loss": 0.30852559208869934, + "step": 3484 + }, + { + "epoch": 0.9255079006772009, + "grad_norm": 1.1003932874354148, + "learning_rate": 1.2051192170306784e-05, + "loss": 0.2956348657608032, + "step": 3485 + }, + { + "epoch": 0.9257734696587439, + "grad_norm": 1.18261367067389, + "learning_rate": 1.204689456817197e-05, + "loss": 0.2825953960418701, + "step": 3486 + }, + { + "epoch": 0.9260390386402868, + "grad_norm": 1.2502616697865143, + "learning_rate": 1.2042596571420025e-05, + "loss": 0.3351168632507324, + "step": 3487 + }, + { + "epoch": 0.9263046076218298, + "grad_norm": 1.2354469073344645, + "learning_rate": 1.2038298180879548e-05, + "loss": 0.2718926668167114, + "step": 3488 + }, + { + "epoch": 0.9265701766033727, + "grad_norm": 1.1387239259181285, + "learning_rate": 1.2033999397379223e-05, + "loss": 0.29036587476730347, + "step": 3489 + }, + { + "epoch": 0.9268357455849157, + "grad_norm": 0.9499049433325992, + "learning_rate": 1.2029700221747804e-05, + "loss": 0.22917689383029938, + "step": 3490 + }, + { + "epoch": 0.9271013145664586, + "grad_norm": 1.2322966399012754, + "learning_rate": 1.2025400654814119e-05, + "loss": 0.2963443398475647, + "step": 3491 + }, + { + "epoch": 0.9273668835480016, + "grad_norm": 1.100231072465541, + "learning_rate": 1.2021100697407075e-05, + "loss": 0.2866464853286743, + "step": 3492 + }, + { + "epoch": 0.9276324525295445, + "grad_norm": 1.1717529025248212, + "learning_rate": 1.2016800350355654e-05, + "loss": 0.3069216012954712, + "step": 3493 + }, + { + "epoch": 0.9278980215110875, + "grad_norm": 1.0745448017128252, + "learning_rate": 1.2012499614488913e-05, + "loss": 0.27206870913505554, + "step": 3494 + }, + { + "epoch": 0.9281635904926304, + "grad_norm": 1.0995365532444106, + "learning_rate": 1.2008198490635978e-05, + "loss": 0.32130372524261475, + "step": 3495 + }, + { + "epoch": 0.9284291594741734, + "grad_norm": 1.151015013814654, + "learning_rate": 1.2003896979626061e-05, + "loss": 0.30631259083747864, + "step": 3496 + }, + { + "epoch": 0.9286947284557163, + "grad_norm": 1.125856079122124, + "learning_rate": 1.199959508228844e-05, + "loss": 0.3005716800689697, + "step": 3497 + }, + { + "epoch": 0.9289602974372593, + "grad_norm": 0.9983757548693274, + "learning_rate": 1.1995292799452472e-05, + "loss": 0.2381039410829544, + "step": 3498 + }, + { + "epoch": 0.9292258664188023, + "grad_norm": 1.1338580261514946, + "learning_rate": 1.1990990131947582e-05, + "loss": 0.31764286756515503, + "step": 3499 + }, + { + "epoch": 0.9294914354003453, + "grad_norm": 1.1445030838538803, + "learning_rate": 1.1986687080603273e-05, + "loss": 0.3029370903968811, + "step": 3500 + }, + { + "epoch": 0.9297570043818882, + "grad_norm": 1.0814133109661386, + "learning_rate": 1.198238364624913e-05, + "loss": 0.30967646837234497, + "step": 3501 + }, + { + "epoch": 0.9300225733634312, + "grad_norm": 1.0376796287878236, + "learning_rate": 1.1978079829714799e-05, + "loss": 0.24687506258487701, + "step": 3502 + }, + { + "epoch": 0.9302881423449741, + "grad_norm": 1.0529899744692286, + "learning_rate": 1.1973775631830007e-05, + "loss": 0.25909408926963806, + "step": 3503 + }, + { + "epoch": 0.9305537113265171, + "grad_norm": 1.1136411983367804, + "learning_rate": 1.196947105342455e-05, + "loss": 0.281025230884552, + "step": 3504 + }, + { + "epoch": 0.93081928030806, + "grad_norm": 1.2858712177395888, + "learning_rate": 1.1965166095328302e-05, + "loss": 0.33401811122894287, + "step": 3505 + }, + { + "epoch": 0.931084849289603, + "grad_norm": 0.9732764276792689, + "learning_rate": 1.1960860758371208e-05, + "loss": 0.25839388370513916, + "step": 3506 + }, + { + "epoch": 0.931350418271146, + "grad_norm": 0.954364218435113, + "learning_rate": 1.1956555043383286e-05, + "loss": 0.23343560099601746, + "step": 3507 + }, + { + "epoch": 0.9316159872526889, + "grad_norm": 1.176408931412559, + "learning_rate": 1.1952248951194629e-05, + "loss": 0.31106436252593994, + "step": 3508 + }, + { + "epoch": 0.9318815562342319, + "grad_norm": 1.108418204277134, + "learning_rate": 1.1947942482635395e-05, + "loss": 0.29152095317840576, + "step": 3509 + }, + { + "epoch": 0.9321471252157748, + "grad_norm": 1.2651732065185788, + "learning_rate": 1.1943635638535827e-05, + "loss": 0.31517675518989563, + "step": 3510 + }, + { + "epoch": 0.9324126941973178, + "grad_norm": 1.2309480505410157, + "learning_rate": 1.1939328419726231e-05, + "loss": 0.33221137523651123, + "step": 3511 + }, + { + "epoch": 0.9326782631788607, + "grad_norm": 1.2277892053470791, + "learning_rate": 1.193502082703699e-05, + "loss": 0.314359575510025, + "step": 3512 + }, + { + "epoch": 0.9329438321604037, + "grad_norm": 1.129757464324541, + "learning_rate": 1.1930712861298553e-05, + "loss": 0.2879924178123474, + "step": 3513 + }, + { + "epoch": 0.9332094011419466, + "grad_norm": 1.1622909402406336, + "learning_rate": 1.1926404523341443e-05, + "loss": 0.2732955515384674, + "step": 3514 + }, + { + "epoch": 0.9334749701234896, + "grad_norm": 1.1586501434218468, + "learning_rate": 1.1922095813996264e-05, + "loss": 0.32156097888946533, + "step": 3515 + }, + { + "epoch": 0.9337405391050325, + "grad_norm": 1.110486475282156, + "learning_rate": 1.1917786734093682e-05, + "loss": 0.2694319486618042, + "step": 3516 + }, + { + "epoch": 0.9340061080865755, + "grad_norm": 1.0871387001943549, + "learning_rate": 1.1913477284464434e-05, + "loss": 0.3049655258655548, + "step": 3517 + }, + { + "epoch": 0.9342716770681184, + "grad_norm": 1.0962864613999421, + "learning_rate": 1.1909167465939334e-05, + "loss": 0.30053725838661194, + "step": 3518 + }, + { + "epoch": 0.9345372460496614, + "grad_norm": 1.0261517334123498, + "learning_rate": 1.1904857279349265e-05, + "loss": 0.2611788809299469, + "step": 3519 + }, + { + "epoch": 0.9348028150312043, + "grad_norm": 1.1400957154071245, + "learning_rate": 1.1900546725525175e-05, + "loss": 0.28344646096229553, + "step": 3520 + }, + { + "epoch": 0.9350683840127473, + "grad_norm": 1.067093022484818, + "learning_rate": 1.1896235805298093e-05, + "loss": 0.2504042685031891, + "step": 3521 + }, + { + "epoch": 0.9353339529942902, + "grad_norm": 1.0534608212516616, + "learning_rate": 1.1891924519499113e-05, + "loss": 0.27877938747406006, + "step": 3522 + }, + { + "epoch": 0.9355995219758332, + "grad_norm": 1.046331705593262, + "learning_rate": 1.1887612868959394e-05, + "loss": 0.28176525235176086, + "step": 3523 + }, + { + "epoch": 0.9358650909573761, + "grad_norm": 1.1750063194789062, + "learning_rate": 1.1883300854510178e-05, + "loss": 0.32376354932785034, + "step": 3524 + }, + { + "epoch": 0.9361306599389191, + "grad_norm": 1.0908366283033504, + "learning_rate": 1.1878988476982772e-05, + "loss": 0.2846054434776306, + "step": 3525 + }, + { + "epoch": 0.936396228920462, + "grad_norm": 1.0507783491664777, + "learning_rate": 1.1874675737208546e-05, + "loss": 0.25711044669151306, + "step": 3526 + }, + { + "epoch": 0.9366617979020051, + "grad_norm": 1.078360429057703, + "learning_rate": 1.1870362636018946e-05, + "loss": 0.2810837924480438, + "step": 3527 + }, + { + "epoch": 0.936927366883548, + "grad_norm": 1.2088151262046463, + "learning_rate": 1.186604917424549e-05, + "loss": 0.3090322017669678, + "step": 3528 + }, + { + "epoch": 0.937192935865091, + "grad_norm": 1.061646146170892, + "learning_rate": 1.1861735352719763e-05, + "loss": 0.2797972559928894, + "step": 3529 + }, + { + "epoch": 0.937458504846634, + "grad_norm": 1.3937474116807773, + "learning_rate": 1.1857421172273415e-05, + "loss": 0.3124893605709076, + "step": 3530 + }, + { + "epoch": 0.9377240738281769, + "grad_norm": 1.1043040217194096, + "learning_rate": 1.1853106633738174e-05, + "loss": 0.28317195177078247, + "step": 3531 + }, + { + "epoch": 0.9379896428097199, + "grad_norm": 1.0483798154842934, + "learning_rate": 1.1848791737945823e-05, + "loss": 0.27804574370384216, + "step": 3532 + }, + { + "epoch": 0.9382552117912628, + "grad_norm": 1.1007797171562173, + "learning_rate": 1.1844476485728236e-05, + "loss": 0.24936731159687042, + "step": 3533 + }, + { + "epoch": 0.9385207807728058, + "grad_norm": 1.16922301793574, + "learning_rate": 1.1840160877917335e-05, + "loss": 0.296974778175354, + "step": 3534 + }, + { + "epoch": 0.9387863497543487, + "grad_norm": 1.1172266681075624, + "learning_rate": 1.1835844915345117e-05, + "loss": 0.3048890233039856, + "step": 3535 + }, + { + "epoch": 0.9390519187358917, + "grad_norm": 1.0372698095624082, + "learning_rate": 1.1831528598843654e-05, + "loss": 0.2703601121902466, + "step": 3536 + }, + { + "epoch": 0.9393174877174346, + "grad_norm": 1.123009081238491, + "learning_rate": 1.1827211929245075e-05, + "loss": 0.30738013982772827, + "step": 3537 + }, + { + "epoch": 0.9395830566989776, + "grad_norm": 1.0660333251952498, + "learning_rate": 1.1822894907381589e-05, + "loss": 0.26538529992103577, + "step": 3538 + }, + { + "epoch": 0.9398486256805205, + "grad_norm": 1.1050453871275616, + "learning_rate": 1.1818577534085462e-05, + "loss": 0.26795464754104614, + "step": 3539 + }, + { + "epoch": 0.9401141946620635, + "grad_norm": 1.1533311536850575, + "learning_rate": 1.1814259810189034e-05, + "loss": 0.30891868472099304, + "step": 3540 + }, + { + "epoch": 0.9403797636436064, + "grad_norm": 1.8167204702159565, + "learning_rate": 1.1809941736524713e-05, + "loss": 0.29164037108421326, + "step": 3541 + }, + { + "epoch": 0.9406453326251494, + "grad_norm": 1.0875424396631934, + "learning_rate": 1.180562331392497e-05, + "loss": 0.30322739481925964, + "step": 3542 + }, + { + "epoch": 0.9409109016066923, + "grad_norm": 1.0765622649066557, + "learning_rate": 1.1801304543222349e-05, + "loss": 0.275432288646698, + "step": 3543 + }, + { + "epoch": 0.9411764705882353, + "grad_norm": 1.1566847425916267, + "learning_rate": 1.1796985425249459e-05, + "loss": 0.2788141965866089, + "step": 3544 + }, + { + "epoch": 0.9414420395697782, + "grad_norm": 1.203313197377309, + "learning_rate": 1.1792665960838967e-05, + "loss": 0.24254676699638367, + "step": 3545 + }, + { + "epoch": 0.9417076085513212, + "grad_norm": 1.1050026210111878, + "learning_rate": 1.1788346150823625e-05, + "loss": 0.2803058326244354, + "step": 3546 + }, + { + "epoch": 0.9419731775328641, + "grad_norm": 1.0993090963339842, + "learning_rate": 1.1784025996036232e-05, + "loss": 0.3068317174911499, + "step": 3547 + }, + { + "epoch": 0.9422387465144071, + "grad_norm": 0.9977731134117688, + "learning_rate": 1.1779705497309673e-05, + "loss": 0.23124024271965027, + "step": 3548 + }, + { + "epoch": 0.94250431549595, + "grad_norm": 1.080710306089679, + "learning_rate": 1.177538465547688e-05, + "loss": 0.2815462648868561, + "step": 3549 + }, + { + "epoch": 0.942769884477493, + "grad_norm": 1.1118952137889662, + "learning_rate": 1.1771063471370862e-05, + "loss": 0.29448196291923523, + "step": 3550 + }, + { + "epoch": 0.9430354534590359, + "grad_norm": 1.2691077751501818, + "learning_rate": 1.1766741945824698e-05, + "loss": 0.3176615834236145, + "step": 3551 + }, + { + "epoch": 0.9433010224405789, + "grad_norm": 1.1390071879475103, + "learning_rate": 1.1762420079671527e-05, + "loss": 0.29126274585723877, + "step": 3552 + }, + { + "epoch": 0.9435665914221218, + "grad_norm": 1.084504171285626, + "learning_rate": 1.1758097873744547e-05, + "loss": 0.27074337005615234, + "step": 3553 + }, + { + "epoch": 0.9438321604036648, + "grad_norm": 1.0495499557301764, + "learning_rate": 1.175377532887703e-05, + "loss": 0.2756083011627197, + "step": 3554 + }, + { + "epoch": 0.9440977293852079, + "grad_norm": 1.1028881447166687, + "learning_rate": 1.1749452445902315e-05, + "loss": 0.26918384432792664, + "step": 3555 + }, + { + "epoch": 0.9443632983667508, + "grad_norm": 1.0856468025535497, + "learning_rate": 1.17451292256538e-05, + "loss": 0.2550349235534668, + "step": 3556 + }, + { + "epoch": 0.9446288673482938, + "grad_norm": 1.0791996633460945, + "learning_rate": 1.1740805668964954e-05, + "loss": 0.2601481080055237, + "step": 3557 + }, + { + "epoch": 0.9448944363298367, + "grad_norm": 1.1367109564667788, + "learning_rate": 1.1736481776669307e-05, + "loss": 0.2848352789878845, + "step": 3558 + }, + { + "epoch": 0.9451600053113797, + "grad_norm": 1.1168278064757895, + "learning_rate": 1.173215754960045e-05, + "loss": 0.266584575176239, + "step": 3559 + }, + { + "epoch": 0.9454255742929226, + "grad_norm": 0.9979692557530664, + "learning_rate": 1.172783298859205e-05, + "loss": 0.25037410855293274, + "step": 3560 + }, + { + "epoch": 0.9456911432744656, + "grad_norm": 1.1049326363207628, + "learning_rate": 1.1723508094477825e-05, + "loss": 0.30239278078079224, + "step": 3561 + }, + { + "epoch": 0.9459567122560085, + "grad_norm": 1.0413977608943958, + "learning_rate": 1.1719182868091567e-05, + "loss": 0.2893553078174591, + "step": 3562 + }, + { + "epoch": 0.9462222812375515, + "grad_norm": 1.215187947788902, + "learning_rate": 1.1714857310267124e-05, + "loss": 0.2840202748775482, + "step": 3563 + }, + { + "epoch": 0.9464878502190944, + "grad_norm": 1.0615180068139964, + "learning_rate": 1.1710531421838422e-05, + "loss": 0.2614031732082367, + "step": 3564 + }, + { + "epoch": 0.9467534192006374, + "grad_norm": 1.0290230331800772, + "learning_rate": 1.1706205203639433e-05, + "loss": 0.267095148563385, + "step": 3565 + }, + { + "epoch": 0.9470189881821803, + "grad_norm": 1.2397291626994196, + "learning_rate": 1.1701878656504206e-05, + "loss": 0.25835227966308594, + "step": 3566 + }, + { + "epoch": 0.9472845571637233, + "grad_norm": 1.1319162410146095, + "learning_rate": 1.1697551781266845e-05, + "loss": 0.27547580003738403, + "step": 3567 + }, + { + "epoch": 0.9475501261452662, + "grad_norm": 1.089656044815204, + "learning_rate": 1.169322457876152e-05, + "loss": 0.251165509223938, + "step": 3568 + }, + { + "epoch": 0.9478156951268092, + "grad_norm": 1.2350323802819905, + "learning_rate": 1.1688897049822467e-05, + "loss": 0.2738516926765442, + "step": 3569 + }, + { + "epoch": 0.9480812641083521, + "grad_norm": 1.0315369616879289, + "learning_rate": 1.1684569195283981e-05, + "loss": 0.2745274305343628, + "step": 3570 + }, + { + "epoch": 0.9483468330898951, + "grad_norm": 1.180099592022995, + "learning_rate": 1.1680241015980423e-05, + "loss": 0.28586819767951965, + "step": 3571 + }, + { + "epoch": 0.948612402071438, + "grad_norm": 1.2233918967574897, + "learning_rate": 1.167591251274621e-05, + "loss": 0.2559577524662018, + "step": 3572 + }, + { + "epoch": 0.948877971052981, + "grad_norm": 1.155824963337958, + "learning_rate": 1.1671583686415833e-05, + "loss": 0.26069143414497375, + "step": 3573 + }, + { + "epoch": 0.949143540034524, + "grad_norm": 1.078529730225554, + "learning_rate": 1.1667254537823838e-05, + "loss": 0.26866453886032104, + "step": 3574 + }, + { + "epoch": 0.9494091090160669, + "grad_norm": 1.0772599867154102, + "learning_rate": 1.166292506780483e-05, + "loss": 0.25285348296165466, + "step": 3575 + }, + { + "epoch": 0.9496746779976099, + "grad_norm": 1.1335172942215501, + "learning_rate": 1.1658595277193479e-05, + "loss": 0.3330434262752533, + "step": 3576 + }, + { + "epoch": 0.9499402469791528, + "grad_norm": 1.076438251163932, + "learning_rate": 1.1654265166824522e-05, + "loss": 0.2789473533630371, + "step": 3577 + }, + { + "epoch": 0.9502058159606958, + "grad_norm": 1.2746037306212283, + "learning_rate": 1.164993473753275e-05, + "loss": 0.30984824895858765, + "step": 3578 + }, + { + "epoch": 0.9504713849422387, + "grad_norm": 1.0517088315750878, + "learning_rate": 1.164560399015302e-05, + "loss": 0.23881833255290985, + "step": 3579 + }, + { + "epoch": 0.9507369539237817, + "grad_norm": 1.1012484750770577, + "learning_rate": 1.164127292552025e-05, + "loss": 0.3027937114238739, + "step": 3580 + }, + { + "epoch": 0.9510025229053246, + "grad_norm": 1.1998484228117954, + "learning_rate": 1.1636941544469413e-05, + "loss": 0.2901906371116638, + "step": 3581 + }, + { + "epoch": 0.9512680918868676, + "grad_norm": 1.069491787313744, + "learning_rate": 1.1632609847835556e-05, + "loss": 0.28961148858070374, + "step": 3582 + }, + { + "epoch": 0.9515336608684106, + "grad_norm": 1.0782542825887276, + "learning_rate": 1.1628277836453774e-05, + "loss": 0.2730783224105835, + "step": 3583 + }, + { + "epoch": 0.9517992298499536, + "grad_norm": 1.0952017771476839, + "learning_rate": 1.1623945511159232e-05, + "loss": 0.3195485770702362, + "step": 3584 + }, + { + "epoch": 0.9520647988314965, + "grad_norm": 1.1514370971708257, + "learning_rate": 1.1619612872787144e-05, + "loss": 0.3097516894340515, + "step": 3585 + }, + { + "epoch": 0.9523303678130395, + "grad_norm": 1.0422990071728377, + "learning_rate": 1.1615279922172796e-05, + "loss": 0.2716284692287445, + "step": 3586 + }, + { + "epoch": 0.9525959367945824, + "grad_norm": 0.9669355988334725, + "learning_rate": 1.1610946660151531e-05, + "loss": 0.2601209878921509, + "step": 3587 + }, + { + "epoch": 0.9528615057761254, + "grad_norm": 1.1027425019898653, + "learning_rate": 1.1606613087558748e-05, + "loss": 0.28665289282798767, + "step": 3588 + }, + { + "epoch": 0.9531270747576683, + "grad_norm": 1.082078861677668, + "learning_rate": 1.1602279205229912e-05, + "loss": 0.3019893765449524, + "step": 3589 + }, + { + "epoch": 0.9533926437392113, + "grad_norm": 0.9778282797717269, + "learning_rate": 1.1597945014000537e-05, + "loss": 0.2635146677494049, + "step": 3590 + }, + { + "epoch": 0.9536582127207542, + "grad_norm": 1.0527782897227813, + "learning_rate": 1.1593610514706217e-05, + "loss": 0.2704858183860779, + "step": 3591 + }, + { + "epoch": 0.9539237817022972, + "grad_norm": 1.2295509988273574, + "learning_rate": 1.1589275708182581e-05, + "loss": 0.31997931003570557, + "step": 3592 + }, + { + "epoch": 0.9541893506838401, + "grad_norm": 1.1529907760165448, + "learning_rate": 1.1584940595265332e-05, + "loss": 0.2308788150548935, + "step": 3593 + }, + { + "epoch": 0.9544549196653831, + "grad_norm": 1.0980235303762964, + "learning_rate": 1.1580605176790229e-05, + "loss": 0.28886470198631287, + "step": 3594 + }, + { + "epoch": 0.954720488646926, + "grad_norm": 1.313883667721807, + "learning_rate": 1.157626945359309e-05, + "loss": 0.30698686838150024, + "step": 3595 + }, + { + "epoch": 0.954986057628469, + "grad_norm": 1.1087251273709688, + "learning_rate": 1.1571933426509789e-05, + "loss": 0.27475905418395996, + "step": 3596 + }, + { + "epoch": 0.955251626610012, + "grad_norm": 1.1064883207545173, + "learning_rate": 1.1567597096376264e-05, + "loss": 0.2568071484565735, + "step": 3597 + }, + { + "epoch": 0.9555171955915549, + "grad_norm": 1.28706485993144, + "learning_rate": 1.1563260464028507e-05, + "loss": 0.2574060261249542, + "step": 3598 + }, + { + "epoch": 0.9557827645730979, + "grad_norm": 1.193494963897618, + "learning_rate": 1.1558923530302571e-05, + "loss": 0.2847997546195984, + "step": 3599 + }, + { + "epoch": 0.9560483335546408, + "grad_norm": 1.0723094070831873, + "learning_rate": 1.155458629603456e-05, + "loss": 0.2594734728336334, + "step": 3600 + }, + { + "epoch": 0.9563139025361838, + "grad_norm": 1.0020160427681732, + "learning_rate": 1.155024876206065e-05, + "loss": 0.2300589680671692, + "step": 3601 + }, + { + "epoch": 0.9565794715177267, + "grad_norm": 1.1475438454718678, + "learning_rate": 1.1545910929217059e-05, + "loss": 0.29174795746803284, + "step": 3602 + }, + { + "epoch": 0.9568450404992697, + "grad_norm": 1.0425930414114217, + "learning_rate": 1.1541572798340076e-05, + "loss": 0.2666400074958801, + "step": 3603 + }, + { + "epoch": 0.9571106094808126, + "grad_norm": 1.0067559469755134, + "learning_rate": 1.1537234370266035e-05, + "loss": 0.24651308357715607, + "step": 3604 + }, + { + "epoch": 0.9573761784623556, + "grad_norm": 1.1542471481522265, + "learning_rate": 1.1532895645831339e-05, + "loss": 0.29991376399993896, + "step": 3605 + }, + { + "epoch": 0.9576417474438985, + "grad_norm": 1.0631305192934537, + "learning_rate": 1.1528556625872443e-05, + "loss": 0.27713578939437866, + "step": 3606 + }, + { + "epoch": 0.9579073164254415, + "grad_norm": 1.0497999275546905, + "learning_rate": 1.1524217311225857e-05, + "loss": 0.26503294706344604, + "step": 3607 + }, + { + "epoch": 0.9581728854069844, + "grad_norm": 1.1479000180189152, + "learning_rate": 1.1519877702728149e-05, + "loss": 0.28627675771713257, + "step": 3608 + }, + { + "epoch": 0.9584384543885274, + "grad_norm": 1.0333891142616893, + "learning_rate": 1.1515537801215944e-05, + "loss": 0.26862916350364685, + "step": 3609 + }, + { + "epoch": 0.9587040233700703, + "grad_norm": 1.2518522451268181, + "learning_rate": 1.1511197607525926e-05, + "loss": 0.29697147011756897, + "step": 3610 + }, + { + "epoch": 0.9589695923516134, + "grad_norm": 1.0668919106736792, + "learning_rate": 1.1506857122494832e-05, + "loss": 0.2980155944824219, + "step": 3611 + }, + { + "epoch": 0.9592351613331563, + "grad_norm": 1.1016644329026075, + "learning_rate": 1.1502516346959458e-05, + "loss": 0.2847440838813782, + "step": 3612 + }, + { + "epoch": 0.9595007303146993, + "grad_norm": 1.1131533712076647, + "learning_rate": 1.149817528175665e-05, + "loss": 0.2812016010284424, + "step": 3613 + }, + { + "epoch": 0.9597662992962422, + "grad_norm": 1.0387818826049915, + "learning_rate": 1.1493833927723319e-05, + "loss": 0.26856982707977295, + "step": 3614 + }, + { + "epoch": 0.9600318682777852, + "grad_norm": 1.0595715138301371, + "learning_rate": 1.1489492285696424e-05, + "loss": 0.2651693820953369, + "step": 3615 + }, + { + "epoch": 0.9602974372593281, + "grad_norm": 1.1384265947297394, + "learning_rate": 1.1485150356512986e-05, + "loss": 0.29811644554138184, + "step": 3616 + }, + { + "epoch": 0.9605630062408711, + "grad_norm": 1.0449713925688802, + "learning_rate": 1.1480808141010071e-05, + "loss": 0.2622855007648468, + "step": 3617 + }, + { + "epoch": 0.960828575222414, + "grad_norm": 1.1964334046740135, + "learning_rate": 1.1476465640024814e-05, + "loss": 0.3067246377468109, + "step": 3618 + }, + { + "epoch": 0.961094144203957, + "grad_norm": 1.0999678942020576, + "learning_rate": 1.1472122854394394e-05, + "loss": 0.25928011536598206, + "step": 3619 + }, + { + "epoch": 0.9613597131855, + "grad_norm": 1.0356853160291564, + "learning_rate": 1.146777978495605e-05, + "loss": 0.2574170231819153, + "step": 3620 + }, + { + "epoch": 0.9616252821670429, + "grad_norm": 1.1366453776894136, + "learning_rate": 1.1463436432547073e-05, + "loss": 0.2845388650894165, + "step": 3621 + }, + { + "epoch": 0.9618908511485859, + "grad_norm": 1.1067131961561003, + "learning_rate": 1.145909279800481e-05, + "loss": 0.28735876083374023, + "step": 3622 + }, + { + "epoch": 0.9621564201301288, + "grad_norm": 1.100639151702203, + "learning_rate": 1.1454748882166666e-05, + "loss": 0.25739723443984985, + "step": 3623 + }, + { + "epoch": 0.9624219891116718, + "grad_norm": 1.0743852778260963, + "learning_rate": 1.1450404685870098e-05, + "loss": 0.25144338607788086, + "step": 3624 + }, + { + "epoch": 0.9626875580932147, + "grad_norm": 1.0451944769292063, + "learning_rate": 1.144606020995261e-05, + "loss": 0.23981891572475433, + "step": 3625 + }, + { + "epoch": 0.9629531270747577, + "grad_norm": 1.1215387475511582, + "learning_rate": 1.1441715455251764e-05, + "loss": 0.30925339460372925, + "step": 3626 + }, + { + "epoch": 0.9632186960563006, + "grad_norm": 1.1193965021491372, + "learning_rate": 1.1437370422605184e-05, + "loss": 0.2559184432029724, + "step": 3627 + }, + { + "epoch": 0.9634842650378436, + "grad_norm": 1.221260182162867, + "learning_rate": 1.1433025112850542e-05, + "loss": 0.3001229166984558, + "step": 3628 + }, + { + "epoch": 0.9637498340193865, + "grad_norm": 0.9957913669659347, + "learning_rate": 1.1428679526825557e-05, + "loss": 0.24304218590259552, + "step": 3629 + }, + { + "epoch": 0.9640154030009295, + "grad_norm": 1.0405086595778643, + "learning_rate": 1.1424333665368011e-05, + "loss": 0.25677186250686646, + "step": 3630 + }, + { + "epoch": 0.9642809719824724, + "grad_norm": 1.0362119568252992, + "learning_rate": 1.141998752931573e-05, + "loss": 0.2589085102081299, + "step": 3631 + }, + { + "epoch": 0.9645465409640154, + "grad_norm": 1.1004952842028541, + "learning_rate": 1.1415641119506601e-05, + "loss": 0.2588059604167938, + "step": 3632 + }, + { + "epoch": 0.9648121099455583, + "grad_norm": 1.1379378571012249, + "learning_rate": 1.1411294436778562e-05, + "loss": 0.26097869873046875, + "step": 3633 + }, + { + "epoch": 0.9650776789271013, + "grad_norm": 1.2218308438631786, + "learning_rate": 1.1406947481969598e-05, + "loss": 0.26022520661354065, + "step": 3634 + }, + { + "epoch": 0.9653432479086442, + "grad_norm": 1.0737420773814035, + "learning_rate": 1.140260025591775e-05, + "loss": 0.26242876052856445, + "step": 3635 + }, + { + "epoch": 0.9656088168901872, + "grad_norm": 1.1396910340144906, + "learning_rate": 1.1398252759461119e-05, + "loss": 0.30035555362701416, + "step": 3636 + }, + { + "epoch": 0.9658743858717301, + "grad_norm": 1.1365210980452296, + "learning_rate": 1.1393904993437848e-05, + "loss": 0.26388341188430786, + "step": 3637 + }, + { + "epoch": 0.9661399548532731, + "grad_norm": 1.06242333907382, + "learning_rate": 1.1389556958686132e-05, + "loss": 0.28116434812545776, + "step": 3638 + }, + { + "epoch": 0.966405523834816, + "grad_norm": 1.0513966621960738, + "learning_rate": 1.1385208656044222e-05, + "loss": 0.25372493267059326, + "step": 3639 + }, + { + "epoch": 0.9666710928163591, + "grad_norm": 1.1171784181414381, + "learning_rate": 1.1380860086350422e-05, + "loss": 0.2648317813873291, + "step": 3640 + }, + { + "epoch": 0.966936661797902, + "grad_norm": 1.0508956007113521, + "learning_rate": 1.1376511250443082e-05, + "loss": 0.26981276273727417, + "step": 3641 + }, + { + "epoch": 0.967202230779445, + "grad_norm": 1.1513465918880585, + "learning_rate": 1.1372162149160608e-05, + "loss": 0.2934207618236542, + "step": 3642 + }, + { + "epoch": 0.967467799760988, + "grad_norm": 0.9705407845284122, + "learning_rate": 1.1367812783341454e-05, + "loss": 0.24250900745391846, + "step": 3643 + }, + { + "epoch": 0.9677333687425309, + "grad_norm": 1.0409007473472116, + "learning_rate": 1.1363463153824125e-05, + "loss": 0.2565772235393524, + "step": 3644 + }, + { + "epoch": 0.9679989377240739, + "grad_norm": 1.2386980142351325, + "learning_rate": 1.1359113261447183e-05, + "loss": 0.28407829999923706, + "step": 3645 + }, + { + "epoch": 0.9682645067056168, + "grad_norm": 1.1134220293120092, + "learning_rate": 1.1354763107049234e-05, + "loss": 0.2974489629268646, + "step": 3646 + }, + { + "epoch": 0.9685300756871598, + "grad_norm": 1.1611486704366027, + "learning_rate": 1.1350412691468935e-05, + "loss": 0.27539899945259094, + "step": 3647 + }, + { + "epoch": 0.9687956446687027, + "grad_norm": 1.1777496863563888, + "learning_rate": 1.1346062015544997e-05, + "loss": 0.28256523609161377, + "step": 3648 + }, + { + "epoch": 0.9690612136502457, + "grad_norm": 1.0910813538672366, + "learning_rate": 1.1341711080116176e-05, + "loss": 0.27582883834838867, + "step": 3649 + }, + { + "epoch": 0.9693267826317886, + "grad_norm": 1.2299419127493794, + "learning_rate": 1.1337359886021285e-05, + "loss": 0.3199389576911926, + "step": 3650 + }, + { + "epoch": 0.9695923516133316, + "grad_norm": 1.078226808322517, + "learning_rate": 1.1333008434099178e-05, + "loss": 0.2922326922416687, + "step": 3651 + }, + { + "epoch": 0.9698579205948745, + "grad_norm": 1.1833154338367669, + "learning_rate": 1.1328656725188767e-05, + "loss": 0.285635381937027, + "step": 3652 + }, + { + "epoch": 0.9701234895764175, + "grad_norm": 1.1606724829825772, + "learning_rate": 1.1324304760129009e-05, + "loss": 0.3347492814064026, + "step": 3653 + }, + { + "epoch": 0.9703890585579604, + "grad_norm": 1.1079831575977723, + "learning_rate": 1.1319952539758912e-05, + "loss": 0.27379873394966125, + "step": 3654 + }, + { + "epoch": 0.9706546275395034, + "grad_norm": 1.2487680540467303, + "learning_rate": 1.1315600064917534e-05, + "loss": 0.27911311388015747, + "step": 3655 + }, + { + "epoch": 0.9709201965210463, + "grad_norm": 1.187492816658345, + "learning_rate": 1.1311247336443982e-05, + "loss": 0.25750118494033813, + "step": 3656 + }, + { + "epoch": 0.9711857655025893, + "grad_norm": 1.1010343448161526, + "learning_rate": 1.1306894355177405e-05, + "loss": 0.28723078966140747, + "step": 3657 + }, + { + "epoch": 0.9714513344841322, + "grad_norm": 1.0378840795289885, + "learning_rate": 1.1302541121957008e-05, + "loss": 0.25269389152526855, + "step": 3658 + }, + { + "epoch": 0.9717169034656752, + "grad_norm": 1.1923604766845932, + "learning_rate": 1.1298187637622046e-05, + "loss": 0.3041607439517975, + "step": 3659 + }, + { + "epoch": 0.9719824724472181, + "grad_norm": 1.0812687625707742, + "learning_rate": 1.1293833903011819e-05, + "loss": 0.2826605439186096, + "step": 3660 + }, + { + "epoch": 0.9722480414287611, + "grad_norm": 1.1010565715724137, + "learning_rate": 1.1289479918965675e-05, + "loss": 0.2830520570278168, + "step": 3661 + }, + { + "epoch": 0.972513610410304, + "grad_norm": 1.0160541896764337, + "learning_rate": 1.1285125686323011e-05, + "loss": 0.24295952916145325, + "step": 3662 + }, + { + "epoch": 0.972779179391847, + "grad_norm": 1.108181435484162, + "learning_rate": 1.1280771205923269e-05, + "loss": 0.28775808215141296, + "step": 3663 + }, + { + "epoch": 0.97304474837339, + "grad_norm": 0.9715417125511246, + "learning_rate": 1.127641647860595e-05, + "loss": 0.24650296568870544, + "step": 3664 + }, + { + "epoch": 0.9733103173549329, + "grad_norm": 0.9305293200248026, + "learning_rate": 1.1272061505210584e-05, + "loss": 0.22344040870666504, + "step": 3665 + }, + { + "epoch": 0.9735758863364758, + "grad_norm": 1.0859092127038839, + "learning_rate": 1.1267706286576759e-05, + "loss": 0.26920852065086365, + "step": 3666 + }, + { + "epoch": 0.9738414553180188, + "grad_norm": 1.1792674236289236, + "learning_rate": 1.1263350823544115e-05, + "loss": 0.27615875005722046, + "step": 3667 + }, + { + "epoch": 0.9741070242995619, + "grad_norm": 1.0470064037587914, + "learning_rate": 1.1258995116952334e-05, + "loss": 0.2768712043762207, + "step": 3668 + }, + { + "epoch": 0.9743725932811048, + "grad_norm": 1.0568329464095596, + "learning_rate": 1.1254639167641141e-05, + "loss": 0.27764153480529785, + "step": 3669 + }, + { + "epoch": 0.9746381622626478, + "grad_norm": 1.139437307258024, + "learning_rate": 1.1250282976450316e-05, + "loss": 0.27423611283302307, + "step": 3670 + }, + { + "epoch": 0.9749037312441907, + "grad_norm": 1.1238013222894891, + "learning_rate": 1.1245926544219676e-05, + "loss": 0.2626228332519531, + "step": 3671 + }, + { + "epoch": 0.9751693002257337, + "grad_norm": 1.2807555997920204, + "learning_rate": 1.1241569871789096e-05, + "loss": 0.25524014234542847, + "step": 3672 + }, + { + "epoch": 0.9754348692072766, + "grad_norm": 1.1042234540757712, + "learning_rate": 1.1237212959998485e-05, + "loss": 0.30857735872268677, + "step": 3673 + }, + { + "epoch": 0.9757004381888196, + "grad_norm": 1.0235359310129009, + "learning_rate": 1.1232855809687807e-05, + "loss": 0.25099021196365356, + "step": 3674 + }, + { + "epoch": 0.9759660071703625, + "grad_norm": 1.0116202981123898, + "learning_rate": 1.1228498421697068e-05, + "loss": 0.22664576768875122, + "step": 3675 + }, + { + "epoch": 0.9762315761519055, + "grad_norm": 1.151038777130998, + "learning_rate": 1.1224140796866322e-05, + "loss": 0.24727366864681244, + "step": 3676 + }, + { + "epoch": 0.9764971451334484, + "grad_norm": 1.160849411640656, + "learning_rate": 1.121978293603567e-05, + "loss": 0.2561935782432556, + "step": 3677 + }, + { + "epoch": 0.9767627141149914, + "grad_norm": 1.10648815955184, + "learning_rate": 1.1215424840045254e-05, + "loss": 0.2594214677810669, + "step": 3678 + }, + { + "epoch": 0.9770282830965343, + "grad_norm": 1.130419852826836, + "learning_rate": 1.1211066509735265e-05, + "loss": 0.2383778691291809, + "step": 3679 + }, + { + "epoch": 0.9772938520780773, + "grad_norm": 1.2393377504128167, + "learning_rate": 1.1206707945945934e-05, + "loss": 0.2864387035369873, + "step": 3680 + }, + { + "epoch": 0.9775594210596202, + "grad_norm": 1.2012269867709167, + "learning_rate": 1.1202349149517541e-05, + "loss": 0.30415672063827515, + "step": 3681 + }, + { + "epoch": 0.9778249900411632, + "grad_norm": 1.1590063847406842, + "learning_rate": 1.1197990121290415e-05, + "loss": 0.3030807375907898, + "step": 3682 + }, + { + "epoch": 0.9780905590227061, + "grad_norm": 1.1251124481371277, + "learning_rate": 1.1193630862104922e-05, + "loss": 0.2518938481807709, + "step": 3683 + }, + { + "epoch": 0.9783561280042491, + "grad_norm": 1.2096921428918863, + "learning_rate": 1.1189271372801474e-05, + "loss": 0.25353187322616577, + "step": 3684 + }, + { + "epoch": 0.978621696985792, + "grad_norm": 1.401372369430627, + "learning_rate": 1.1184911654220534e-05, + "loss": 0.30639684200286865, + "step": 3685 + }, + { + "epoch": 0.978887265967335, + "grad_norm": 1.1636733460077495, + "learning_rate": 1.1180551707202602e-05, + "loss": 0.295099139213562, + "step": 3686 + }, + { + "epoch": 0.979152834948878, + "grad_norm": 1.0596592048702305, + "learning_rate": 1.1176191532588224e-05, + "loss": 0.2428167164325714, + "step": 3687 + }, + { + "epoch": 0.9794184039304209, + "grad_norm": 1.0401088292404943, + "learning_rate": 1.1171831131217989e-05, + "loss": 0.2716362774372101, + "step": 3688 + }, + { + "epoch": 0.9796839729119639, + "grad_norm": 1.1130709970940986, + "learning_rate": 1.1167470503932534e-05, + "loss": 0.28350287675857544, + "step": 3689 + }, + { + "epoch": 0.9799495418935068, + "grad_norm": 1.0214004744947676, + "learning_rate": 1.1163109651572535e-05, + "loss": 0.2776945233345032, + "step": 3690 + }, + { + "epoch": 0.9802151108750498, + "grad_norm": 1.041237294346951, + "learning_rate": 1.115874857497871e-05, + "loss": 0.2712942063808441, + "step": 3691 + }, + { + "epoch": 0.9804806798565927, + "grad_norm": 1.058232702389033, + "learning_rate": 1.1154387274991829e-05, + "loss": 0.2530008852481842, + "step": 3692 + }, + { + "epoch": 0.9807462488381357, + "grad_norm": 1.0327043619893976, + "learning_rate": 1.1150025752452693e-05, + "loss": 0.24889500439167023, + "step": 3693 + }, + { + "epoch": 0.9810118178196786, + "grad_norm": 1.1013842404358833, + "learning_rate": 1.1145664008202158e-05, + "loss": 0.3051255941390991, + "step": 3694 + }, + { + "epoch": 0.9812773868012216, + "grad_norm": 1.0503003262830894, + "learning_rate": 1.1141302043081112e-05, + "loss": 0.24781765043735504, + "step": 3695 + }, + { + "epoch": 0.9815429557827646, + "grad_norm": 1.2510153019418302, + "learning_rate": 1.1136939857930497e-05, + "loss": 0.3021858036518097, + "step": 3696 + }, + { + "epoch": 0.9818085247643076, + "grad_norm": 1.1052947984569603, + "learning_rate": 1.1132577453591284e-05, + "loss": 0.3026372194290161, + "step": 3697 + }, + { + "epoch": 0.9820740937458505, + "grad_norm": 1.2367828155450835, + "learning_rate": 1.1128214830904494e-05, + "loss": 0.31511861085891724, + "step": 3698 + }, + { + "epoch": 0.9823396627273935, + "grad_norm": 1.076549494496895, + "learning_rate": 1.112385199071119e-05, + "loss": 0.27885258197784424, + "step": 3699 + }, + { + "epoch": 0.9826052317089364, + "grad_norm": 1.0546536629749794, + "learning_rate": 1.1119488933852477e-05, + "loss": 0.2724893391132355, + "step": 3700 + }, + { + "epoch": 0.9828708006904794, + "grad_norm": 1.0683428715266594, + "learning_rate": 1.1115125661169503e-05, + "loss": 0.2836218774318695, + "step": 3701 + }, + { + "epoch": 0.9831363696720223, + "grad_norm": 1.1039385208642913, + "learning_rate": 1.111076217350345e-05, + "loss": 0.24220457673072815, + "step": 3702 + }, + { + "epoch": 0.9834019386535653, + "grad_norm": 1.1586770288767172, + "learning_rate": 1.1106398471695554e-05, + "loss": 0.28599557280540466, + "step": 3703 + }, + { + "epoch": 0.9836675076351082, + "grad_norm": 1.0806945340822165, + "learning_rate": 1.110203455658708e-05, + "loss": 0.30559849739074707, + "step": 3704 + }, + { + "epoch": 0.9839330766166512, + "grad_norm": 1.0573640293446354, + "learning_rate": 1.109767042901934e-05, + "loss": 0.2763117551803589, + "step": 3705 + }, + { + "epoch": 0.9841986455981941, + "grad_norm": 0.9563131800944344, + "learning_rate": 1.109330608983369e-05, + "loss": 0.2028101086616516, + "step": 3706 + }, + { + "epoch": 0.9844642145797371, + "grad_norm": 0.9787835815750591, + "learning_rate": 1.1088941539871515e-05, + "loss": 0.25386112928390503, + "step": 3707 + }, + { + "epoch": 0.98472978356128, + "grad_norm": 1.075996733851366, + "learning_rate": 1.1084576779974257e-05, + "loss": 0.2588289976119995, + "step": 3708 + }, + { + "epoch": 0.984995352542823, + "grad_norm": 1.3003014971272602, + "learning_rate": 1.1080211810983385e-05, + "loss": 0.3201071321964264, + "step": 3709 + }, + { + "epoch": 0.985260921524366, + "grad_norm": 1.2030478206249715, + "learning_rate": 1.107584663374042e-05, + "loss": 0.28439003229141235, + "step": 3710 + }, + { + "epoch": 0.9855264905059089, + "grad_norm": 1.060347062251152, + "learning_rate": 1.1071481249086908e-05, + "loss": 0.2734091579914093, + "step": 3711 + }, + { + "epoch": 0.9857920594874519, + "grad_norm": 1.2115603819692051, + "learning_rate": 1.1067115657864451e-05, + "loss": 0.2917581796646118, + "step": 3712 + }, + { + "epoch": 0.9860576284689948, + "grad_norm": 1.2063997459644484, + "learning_rate": 1.1062749860914681e-05, + "loss": 0.3569914996623993, + "step": 3713 + }, + { + "epoch": 0.9863231974505378, + "grad_norm": 1.127711451799425, + "learning_rate": 1.1058383859079271e-05, + "loss": 0.2574514150619507, + "step": 3714 + }, + { + "epoch": 0.9865887664320807, + "grad_norm": 1.119813552337215, + "learning_rate": 1.1054017653199936e-05, + "loss": 0.3035826086997986, + "step": 3715 + }, + { + "epoch": 0.9868543354136237, + "grad_norm": 1.5863085854725767, + "learning_rate": 1.1049651244118424e-05, + "loss": 0.28067824244499207, + "step": 3716 + }, + { + "epoch": 0.9871199043951666, + "grad_norm": 1.0916600834300794, + "learning_rate": 1.1045284632676535e-05, + "loss": 0.2511579394340515, + "step": 3717 + }, + { + "epoch": 0.9873854733767096, + "grad_norm": 1.2657546371764674, + "learning_rate": 1.1040917819716097e-05, + "loss": 0.3059889078140259, + "step": 3718 + }, + { + "epoch": 0.9876510423582525, + "grad_norm": 1.1224253435238671, + "learning_rate": 1.103655080607898e-05, + "loss": 0.2642200291156769, + "step": 3719 + }, + { + "epoch": 0.9879166113397955, + "grad_norm": 1.0969568004465404, + "learning_rate": 1.1032183592607094e-05, + "loss": 0.2743483781814575, + "step": 3720 + }, + { + "epoch": 0.9881821803213384, + "grad_norm": 1.1317768374698567, + "learning_rate": 1.1027816180142383e-05, + "loss": 0.2597433030605316, + "step": 3721 + }, + { + "epoch": 0.9884477493028814, + "grad_norm": 1.0759312888673545, + "learning_rate": 1.1023448569526834e-05, + "loss": 0.24439337849617004, + "step": 3722 + }, + { + "epoch": 0.9887133182844243, + "grad_norm": 1.0386429343076329, + "learning_rate": 1.1019080761602473e-05, + "loss": 0.2520195245742798, + "step": 3723 + }, + { + "epoch": 0.9889788872659674, + "grad_norm": 1.0921837996926786, + "learning_rate": 1.1014712757211359e-05, + "loss": 0.2904737889766693, + "step": 3724 + }, + { + "epoch": 0.9892444562475103, + "grad_norm": 1.12008182824954, + "learning_rate": 1.1010344557195588e-05, + "loss": 0.28096869587898254, + "step": 3725 + }, + { + "epoch": 0.9895100252290533, + "grad_norm": 1.8392230806075218, + "learning_rate": 1.1005976162397309e-05, + "loss": 0.317839652299881, + "step": 3726 + }, + { + "epoch": 0.9897755942105962, + "grad_norm": 1.19381185696067, + "learning_rate": 1.100160757365869e-05, + "loss": 0.29213201999664307, + "step": 3727 + }, + { + "epoch": 0.9900411631921392, + "grad_norm": 1.215113877896921, + "learning_rate": 1.0997238791821943e-05, + "loss": 0.27034991979599, + "step": 3728 + }, + { + "epoch": 0.9903067321736821, + "grad_norm": 1.2893524723691567, + "learning_rate": 1.0992869817729317e-05, + "loss": 0.30504971742630005, + "step": 3729 + }, + { + "epoch": 0.9905723011552251, + "grad_norm": 1.109889585740049, + "learning_rate": 1.09885006522231e-05, + "loss": 0.30673110485076904, + "step": 3730 + }, + { + "epoch": 0.990837870136768, + "grad_norm": 1.0963153712692437, + "learning_rate": 1.0984131296145616e-05, + "loss": 0.27990686893463135, + "step": 3731 + }, + { + "epoch": 0.991103439118311, + "grad_norm": 1.0228240366531471, + "learning_rate": 1.0979761750339225e-05, + "loss": 0.24379019439220428, + "step": 3732 + }, + { + "epoch": 0.991369008099854, + "grad_norm": 1.1055702239918885, + "learning_rate": 1.0975392015646323e-05, + "loss": 0.30554595589637756, + "step": 3733 + }, + { + "epoch": 0.9916345770813969, + "grad_norm": 1.062606047652276, + "learning_rate": 1.0971022092909342e-05, + "loss": 0.245269775390625, + "step": 3734 + }, + { + "epoch": 0.9919001460629399, + "grad_norm": 1.0977829197687445, + "learning_rate": 1.0966651982970757e-05, + "loss": 0.2732948064804077, + "step": 3735 + }, + { + "epoch": 0.9921657150444828, + "grad_norm": 0.992060831416128, + "learning_rate": 1.0962281686673071e-05, + "loss": 0.25989004969596863, + "step": 3736 + }, + { + "epoch": 0.9924312840260258, + "grad_norm": 1.1415489224758493, + "learning_rate": 1.0957911204858824e-05, + "loss": 0.32891198992729187, + "step": 3737 + }, + { + "epoch": 0.9926968530075687, + "grad_norm": 1.094277657297916, + "learning_rate": 1.0953540538370591e-05, + "loss": 0.29184675216674805, + "step": 3738 + }, + { + "epoch": 0.9929624219891117, + "grad_norm": 1.1381026162174743, + "learning_rate": 1.094916968805099e-05, + "loss": 0.2784018814563751, + "step": 3739 + }, + { + "epoch": 0.9932279909706546, + "grad_norm": 1.1670677505581852, + "learning_rate": 1.094479865474267e-05, + "loss": 0.26586195826530457, + "step": 3740 + }, + { + "epoch": 0.9934935599521976, + "grad_norm": 0.9575913416137994, + "learning_rate": 1.094042743928831e-05, + "loss": 0.24593298137187958, + "step": 3741 + }, + { + "epoch": 0.9937591289337405, + "grad_norm": 1.065966707682552, + "learning_rate": 1.0936056042530632e-05, + "loss": 0.2462792694568634, + "step": 3742 + }, + { + "epoch": 0.9940246979152835, + "grad_norm": 1.2074020558104472, + "learning_rate": 1.0931684465312388e-05, + "loss": 0.2688900828361511, + "step": 3743 + }, + { + "epoch": 0.9942902668968264, + "grad_norm": 1.099682442025033, + "learning_rate": 1.0927312708476367e-05, + "loss": 0.2842782735824585, + "step": 3744 + }, + { + "epoch": 0.9945558358783694, + "grad_norm": 1.0548829148077135, + "learning_rate": 1.0922940772865393e-05, + "loss": 0.249299556016922, + "step": 3745 + }, + { + "epoch": 0.9948214048599123, + "grad_norm": 1.175705262338143, + "learning_rate": 1.0918568659322325e-05, + "loss": 0.2765413522720337, + "step": 3746 + }, + { + "epoch": 0.9950869738414553, + "grad_norm": 1.1414819691892306, + "learning_rate": 1.0914196368690049e-05, + "loss": 0.29750365018844604, + "step": 3747 + }, + { + "epoch": 0.9953525428229982, + "grad_norm": 1.153321336461836, + "learning_rate": 1.0909823901811496e-05, + "loss": 0.25272879004478455, + "step": 3748 + }, + { + "epoch": 0.9956181118045412, + "grad_norm": 1.1906489486154657, + "learning_rate": 1.0905451259529626e-05, + "loss": 0.3056861460208893, + "step": 3749 + }, + { + "epoch": 0.9958836807860841, + "grad_norm": 1.1596775625362263, + "learning_rate": 1.090107844268743e-05, + "loss": 0.26723814010620117, + "step": 3750 + }, + { + "epoch": 0.9961492497676271, + "grad_norm": 1.167023454532776, + "learning_rate": 1.0896705452127943e-05, + "loss": 0.29998716711997986, + "step": 3751 + }, + { + "epoch": 0.9964148187491702, + "grad_norm": 1.1519689723038142, + "learning_rate": 1.0892332288694216e-05, + "loss": 0.2690891623497009, + "step": 3752 + }, + { + "epoch": 0.9966803877307131, + "grad_norm": 1.1385088428140973, + "learning_rate": 1.0887958953229349e-05, + "loss": 0.25555333495140076, + "step": 3753 + }, + { + "epoch": 0.996945956712256, + "grad_norm": 1.1617836993376212, + "learning_rate": 1.088358544657647e-05, + "loss": 0.27788421511650085, + "step": 3754 + }, + { + "epoch": 0.997211525693799, + "grad_norm": 1.0981105518173184, + "learning_rate": 1.0879211769578734e-05, + "loss": 0.2566586136817932, + "step": 3755 + }, + { + "epoch": 0.997477094675342, + "grad_norm": 1.1742409056404244, + "learning_rate": 1.0874837923079339e-05, + "loss": 0.3028980493545532, + "step": 3756 + }, + { + "epoch": 0.9977426636568849, + "grad_norm": 1.151070664269376, + "learning_rate": 1.0870463907921512e-05, + "loss": 0.30244824290275574, + "step": 3757 + }, + { + "epoch": 0.9980082326384279, + "grad_norm": 1.0175517300218122, + "learning_rate": 1.086608972494851e-05, + "loss": 0.2610962390899658, + "step": 3758 + }, + { + "epoch": 0.9982738016199708, + "grad_norm": 1.1587347636182326, + "learning_rate": 1.0861715375003623e-05, + "loss": 0.2733536660671234, + "step": 3759 + }, + { + "epoch": 0.9985393706015138, + "grad_norm": 1.094010099730521, + "learning_rate": 1.0857340858930175e-05, + "loss": 0.2915020287036896, + "step": 3760 + }, + { + "epoch": 0.9988049395830567, + "grad_norm": 1.1164899423303463, + "learning_rate": 1.085296617757152e-05, + "loss": 0.2940186560153961, + "step": 3761 + }, + { + "epoch": 0.9990705085645997, + "grad_norm": 1.1441195343158572, + "learning_rate": 1.0848591331771045e-05, + "loss": 0.3002738952636719, + "step": 3762 + }, + { + "epoch": 0.9993360775461426, + "grad_norm": 1.0530840422742196, + "learning_rate": 1.0844216322372172e-05, + "loss": 0.284588485956192, + "step": 3763 + }, + { + "epoch": 0.9996016465276856, + "grad_norm": 1.0971261053209735, + "learning_rate": 1.0839841150218347e-05, + "loss": 0.29395923018455505, + "step": 3764 + }, + { + "epoch": 0.9998672155092285, + "grad_norm": 1.1355876604442514, + "learning_rate": 1.083546581615305e-05, + "loss": 0.2574613094329834, + "step": 3765 + }, + { + "epoch": 1.0, + "grad_norm": 1.535375625820537, + "learning_rate": 1.0831090321019801e-05, + "loss": 0.177712082862854, + "step": 3766 + }, + { + "epoch": 1.000265568981543, + "grad_norm": 1.1101315935040728, + "learning_rate": 1.0826714665662139e-05, + "loss": 0.29758381843566895, + "step": 3767 + }, + { + "epoch": 1.000531137963086, + "grad_norm": 1.055973006911073, + "learning_rate": 1.0822338850923644e-05, + "loss": 0.23377545177936554, + "step": 3768 + }, + { + "epoch": 1.0007967069446289, + "grad_norm": 1.1573191222761028, + "learning_rate": 1.0817962877647911e-05, + "loss": 0.2505020797252655, + "step": 3769 + }, + { + "epoch": 1.0010622759261718, + "grad_norm": 1.0395021899779042, + "learning_rate": 1.0813586746678584e-05, + "loss": 0.26122647523880005, + "step": 3770 + }, + { + "epoch": 1.0013278449077148, + "grad_norm": 1.1508778318464672, + "learning_rate": 1.0809210458859327e-05, + "loss": 0.27962177991867065, + "step": 3771 + }, + { + "epoch": 1.0015934138892577, + "grad_norm": 1.0479777844917506, + "learning_rate": 1.080483401503384e-05, + "loss": 0.21921640634536743, + "step": 3772 + }, + { + "epoch": 1.0018589828708007, + "grad_norm": 1.1277812491041006, + "learning_rate": 1.0800457416045845e-05, + "loss": 0.24623796343803406, + "step": 3773 + }, + { + "epoch": 1.0021245518523436, + "grad_norm": 1.259401152466985, + "learning_rate": 1.0796080662739098e-05, + "loss": 0.3130728006362915, + "step": 3774 + }, + { + "epoch": 1.0023901208338866, + "grad_norm": 1.1209083810179328, + "learning_rate": 1.0791703755957392e-05, + "loss": 0.2548064589500427, + "step": 3775 + }, + { + "epoch": 1.0026556898154295, + "grad_norm": 1.1167206534835417, + "learning_rate": 1.078732669654454e-05, + "loss": 0.20517288148403168, + "step": 3776 + }, + { + "epoch": 1.0029212587969725, + "grad_norm": 1.1055374385175383, + "learning_rate": 1.0782949485344385e-05, + "loss": 0.2634897530078888, + "step": 3777 + }, + { + "epoch": 1.0031868277785154, + "grad_norm": 1.3696848286677328, + "learning_rate": 1.0778572123200804e-05, + "loss": 0.2743223309516907, + "step": 3778 + }, + { + "epoch": 1.0034523967600584, + "grad_norm": 0.9930991365195264, + "learning_rate": 1.0774194610957695e-05, + "loss": 0.24595436453819275, + "step": 3779 + }, + { + "epoch": 1.0037179657416013, + "grad_norm": 1.0885778480679946, + "learning_rate": 1.0769816949459002e-05, + "loss": 0.2508128881454468, + "step": 3780 + }, + { + "epoch": 1.0039835347231443, + "grad_norm": 1.1243431648812525, + "learning_rate": 1.0765439139548677e-05, + "loss": 0.2326367199420929, + "step": 3781 + }, + { + "epoch": 1.0042491037046872, + "grad_norm": 1.1514050771182385, + "learning_rate": 1.0761061182070716e-05, + "loss": 0.2888404130935669, + "step": 3782 + }, + { + "epoch": 1.0045146726862302, + "grad_norm": 1.1399638718055765, + "learning_rate": 1.0756683077869133e-05, + "loss": 0.2804296612739563, + "step": 3783 + }, + { + "epoch": 1.0047802416677731, + "grad_norm": 1.1286027319524963, + "learning_rate": 1.0752304827787979e-05, + "loss": 0.2644953429698944, + "step": 3784 + }, + { + "epoch": 1.005045810649316, + "grad_norm": 1.2396532451569051, + "learning_rate": 1.0747926432671323e-05, + "loss": 0.297788143157959, + "step": 3785 + }, + { + "epoch": 1.005311379630859, + "grad_norm": 1.065071455363874, + "learning_rate": 1.0743547893363276e-05, + "loss": 0.2644156515598297, + "step": 3786 + }, + { + "epoch": 1.005576948612402, + "grad_norm": 1.1640867578019738, + "learning_rate": 1.073916921070796e-05, + "loss": 0.23818905651569366, + "step": 3787 + }, + { + "epoch": 1.005842517593945, + "grad_norm": 1.11872081222192, + "learning_rate": 1.0734790385549538e-05, + "loss": 0.2544933259487152, + "step": 3788 + }, + { + "epoch": 1.006108086575488, + "grad_norm": 1.0836442452511366, + "learning_rate": 1.0730411418732198e-05, + "loss": 0.2569275498390198, + "step": 3789 + }, + { + "epoch": 1.0063736555570308, + "grad_norm": 1.0348585374954582, + "learning_rate": 1.0726032311100153e-05, + "loss": 0.2248159945011139, + "step": 3790 + }, + { + "epoch": 1.0066392245385738, + "grad_norm": 1.1242207493876892, + "learning_rate": 1.072165306349764e-05, + "loss": 0.25541940331459045, + "step": 3791 + }, + { + "epoch": 1.0069047935201167, + "grad_norm": 9.328291099250833, + "learning_rate": 1.0717273676768924e-05, + "loss": 0.24429568648338318, + "step": 3792 + }, + { + "epoch": 1.0071703625016597, + "grad_norm": 1.0574884647737486, + "learning_rate": 1.0712894151758306e-05, + "loss": 0.2586621344089508, + "step": 3793 + }, + { + "epoch": 1.0074359314832027, + "grad_norm": 1.165205157800888, + "learning_rate": 1.0708514489310103e-05, + "loss": 0.28685104846954346, + "step": 3794 + }, + { + "epoch": 1.0077015004647458, + "grad_norm": 1.1536672746294196, + "learning_rate": 1.0704134690268661e-05, + "loss": 0.2847924530506134, + "step": 3795 + }, + { + "epoch": 1.0079670694462888, + "grad_norm": 1.1168453704329862, + "learning_rate": 1.0699754755478358e-05, + "loss": 0.24646440148353577, + "step": 3796 + }, + { + "epoch": 1.0082326384278317, + "grad_norm": 1.217438590106057, + "learning_rate": 1.0695374685783586e-05, + "loss": 0.22286385297775269, + "step": 3797 + }, + { + "epoch": 1.0084982074093747, + "grad_norm": 1.1352166249232278, + "learning_rate": 1.069099448202878e-05, + "loss": 0.2524179518222809, + "step": 3798 + }, + { + "epoch": 1.0087637763909176, + "grad_norm": 1.109981913009372, + "learning_rate": 1.0686614145058387e-05, + "loss": 0.2625758647918701, + "step": 3799 + }, + { + "epoch": 1.0090293453724606, + "grad_norm": 1.0622342238121125, + "learning_rate": 1.0682233675716884e-05, + "loss": 0.25318068265914917, + "step": 3800 + }, + { + "epoch": 1.0092949143540035, + "grad_norm": 1.073699024276181, + "learning_rate": 1.0677853074848774e-05, + "loss": 0.24224570393562317, + "step": 3801 + }, + { + "epoch": 1.0095604833355465, + "grad_norm": 1.1995813349182267, + "learning_rate": 1.0673472343298588e-05, + "loss": 0.28595417737960815, + "step": 3802 + }, + { + "epoch": 1.0098260523170894, + "grad_norm": 1.1558738404506108, + "learning_rate": 1.0669091481910874e-05, + "loss": 0.26894015073776245, + "step": 3803 + }, + { + "epoch": 1.0100916212986324, + "grad_norm": 1.0901744125075639, + "learning_rate": 1.0664710491530214e-05, + "loss": 0.2605208158493042, + "step": 3804 + }, + { + "epoch": 1.0103571902801753, + "grad_norm": 1.082458382717597, + "learning_rate": 1.0660329373001212e-05, + "loss": 0.2595113515853882, + "step": 3805 + }, + { + "epoch": 1.0106227592617183, + "grad_norm": 1.2467081294979763, + "learning_rate": 1.0655948127168494e-05, + "loss": 0.27478674054145813, + "step": 3806 + }, + { + "epoch": 1.0108883282432612, + "grad_norm": 1.0742167098010935, + "learning_rate": 1.0651566754876715e-05, + "loss": 0.2587064504623413, + "step": 3807 + }, + { + "epoch": 1.0111538972248042, + "grad_norm": 1.0593019665426413, + "learning_rate": 1.064718525697055e-05, + "loss": 0.2420537769794464, + "step": 3808 + }, + { + "epoch": 1.0114194662063472, + "grad_norm": 1.1660072059036033, + "learning_rate": 1.0642803634294699e-05, + "loss": 0.29424652457237244, + "step": 3809 + }, + { + "epoch": 1.01168503518789, + "grad_norm": 1.0902934718743655, + "learning_rate": 1.0638421887693887e-05, + "loss": 0.25162142515182495, + "step": 3810 + }, + { + "epoch": 1.011950604169433, + "grad_norm": 1.1456242703963635, + "learning_rate": 1.0634040018012865e-05, + "loss": 0.25661247968673706, + "step": 3811 + }, + { + "epoch": 1.012216173150976, + "grad_norm": 1.0060634238068926, + "learning_rate": 1.0629658026096408e-05, + "loss": 0.2042091339826584, + "step": 3812 + }, + { + "epoch": 1.012481742132519, + "grad_norm": 1.0129340658577524, + "learning_rate": 1.0625275912789307e-05, + "loss": 0.22496266663074493, + "step": 3813 + }, + { + "epoch": 1.012747311114062, + "grad_norm": 1.1382961966722176, + "learning_rate": 1.0620893678936385e-05, + "loss": 0.23609521985054016, + "step": 3814 + }, + { + "epoch": 1.0130128800956049, + "grad_norm": 1.2645443214744188, + "learning_rate": 1.0616511325382486e-05, + "loss": 0.2561722993850708, + "step": 3815 + }, + { + "epoch": 1.0132784490771478, + "grad_norm": 1.1379816472778304, + "learning_rate": 1.0612128852972474e-05, + "loss": 0.2617529630661011, + "step": 3816 + }, + { + "epoch": 1.0135440180586908, + "grad_norm": 1.1862833237483508, + "learning_rate": 1.060774626255124e-05, + "loss": 0.2633543014526367, + "step": 3817 + }, + { + "epoch": 1.0138095870402337, + "grad_norm": 1.0263666085354948, + "learning_rate": 1.0603363554963693e-05, + "loss": 0.19401729106903076, + "step": 3818 + }, + { + "epoch": 1.0140751560217767, + "grad_norm": 1.0891094169836097, + "learning_rate": 1.0598980731054765e-05, + "loss": 0.2583369016647339, + "step": 3819 + }, + { + "epoch": 1.0143407250033196, + "grad_norm": 1.1826598806695992, + "learning_rate": 1.0594597791669419e-05, + "loss": 0.26138922572135925, + "step": 3820 + }, + { + "epoch": 1.0146062939848626, + "grad_norm": 1.1580137447688548, + "learning_rate": 1.0590214737652632e-05, + "loss": 0.2506800591945648, + "step": 3821 + }, + { + "epoch": 1.0148718629664055, + "grad_norm": 1.032579662550809, + "learning_rate": 1.0585831569849405e-05, + "loss": 0.21569974720478058, + "step": 3822 + }, + { + "epoch": 1.0151374319479485, + "grad_norm": 1.37079648056154, + "learning_rate": 1.0581448289104759e-05, + "loss": 0.2765602767467499, + "step": 3823 + }, + { + "epoch": 1.0154030009294914, + "grad_norm": 1.2046968903946047, + "learning_rate": 1.0577064896263743e-05, + "loss": 0.25180384516716003, + "step": 3824 + }, + { + "epoch": 1.0156685699110344, + "grad_norm": 1.0796182560924539, + "learning_rate": 1.0572681392171417e-05, + "loss": 0.24164071679115295, + "step": 3825 + }, + { + "epoch": 1.0159341388925773, + "grad_norm": 1.1523354919316235, + "learning_rate": 1.0568297777672875e-05, + "loss": 0.24206972122192383, + "step": 3826 + }, + { + "epoch": 1.0161997078741203, + "grad_norm": 1.115771237946875, + "learning_rate": 1.0563914053613227e-05, + "loss": 0.24563468992710114, + "step": 3827 + }, + { + "epoch": 1.0164652768556632, + "grad_norm": 1.121826691352643, + "learning_rate": 1.0559530220837593e-05, + "loss": 0.23226243257522583, + "step": 3828 + }, + { + "epoch": 1.0167308458372062, + "grad_norm": 1.4499652400392462, + "learning_rate": 1.0555146280191137e-05, + "loss": 0.2245083749294281, + "step": 3829 + }, + { + "epoch": 1.0169964148187491, + "grad_norm": 1.1230707875328865, + "learning_rate": 1.0550762232519023e-05, + "loss": 0.24455049633979797, + "step": 3830 + }, + { + "epoch": 1.017261983800292, + "grad_norm": 1.1434011419253403, + "learning_rate": 1.0546378078666448e-05, + "loss": 0.2540651857852936, + "step": 3831 + }, + { + "epoch": 1.017527552781835, + "grad_norm": 1.222189193306495, + "learning_rate": 1.0541993819478622e-05, + "loss": 0.23392565548419952, + "step": 3832 + }, + { + "epoch": 1.017793121763378, + "grad_norm": 1.239236731837986, + "learning_rate": 1.053760945580078e-05, + "loss": 0.21601927280426025, + "step": 3833 + }, + { + "epoch": 1.018058690744921, + "grad_norm": 1.1697918037357793, + "learning_rate": 1.0533224988478176e-05, + "loss": 0.24622616171836853, + "step": 3834 + }, + { + "epoch": 1.018324259726464, + "grad_norm": 1.186224891573799, + "learning_rate": 1.0528840418356086e-05, + "loss": 0.2774650752544403, + "step": 3835 + }, + { + "epoch": 1.0185898287080069, + "grad_norm": 1.1218094293898884, + "learning_rate": 1.0524455746279795e-05, + "loss": 0.22323890030384064, + "step": 3836 + }, + { + "epoch": 1.0188553976895498, + "grad_norm": 1.0569207532138136, + "learning_rate": 1.0520070973094622e-05, + "loss": 0.21901552379131317, + "step": 3837 + }, + { + "epoch": 1.0191209666710928, + "grad_norm": 1.1936231752235407, + "learning_rate": 1.0515686099645901e-05, + "loss": 0.3037784695625305, + "step": 3838 + }, + { + "epoch": 1.0193865356526357, + "grad_norm": 1.0847362828180318, + "learning_rate": 1.0511301126778984e-05, + "loss": 0.22658365964889526, + "step": 3839 + }, + { + "epoch": 1.0196521046341787, + "grad_norm": 1.09040618490447, + "learning_rate": 1.0506916055339237e-05, + "loss": 0.23144160211086273, + "step": 3840 + }, + { + "epoch": 1.0199176736157216, + "grad_norm": 1.28339134317777, + "learning_rate": 1.0502530886172055e-05, + "loss": 0.25658899545669556, + "step": 3841 + }, + { + "epoch": 1.0201832425972646, + "grad_norm": 0.9689646092731519, + "learning_rate": 1.0498145620122845e-05, + "loss": 0.19658756256103516, + "step": 3842 + }, + { + "epoch": 1.0204488115788075, + "grad_norm": 1.0949311372526576, + "learning_rate": 1.049376025803703e-05, + "loss": 0.19045208394527435, + "step": 3843 + }, + { + "epoch": 1.0207143805603505, + "grad_norm": 1.1626763108379607, + "learning_rate": 1.0489374800760066e-05, + "loss": 0.2577810287475586, + "step": 3844 + }, + { + "epoch": 1.0209799495418934, + "grad_norm": 1.1521055149329589, + "learning_rate": 1.048498924913741e-05, + "loss": 0.2807403802871704, + "step": 3845 + }, + { + "epoch": 1.0212455185234364, + "grad_norm": 1.2275557893789377, + "learning_rate": 1.0480603604014545e-05, + "loss": 0.2710269093513489, + "step": 3846 + }, + { + "epoch": 1.0215110875049793, + "grad_norm": 1.173604136076929, + "learning_rate": 1.0476217866236974e-05, + "loss": 0.2560620903968811, + "step": 3847 + }, + { + "epoch": 1.0217766564865223, + "grad_norm": 1.1571778426612858, + "learning_rate": 1.0471832036650217e-05, + "loss": 0.2599894404411316, + "step": 3848 + }, + { + "epoch": 1.0220422254680652, + "grad_norm": 1.1339420848197217, + "learning_rate": 1.046744611609981e-05, + "loss": 0.2411944717168808, + "step": 3849 + }, + { + "epoch": 1.0223077944496084, + "grad_norm": 1.1528658942490468, + "learning_rate": 1.0463060105431303e-05, + "loss": 0.25216251611709595, + "step": 3850 + }, + { + "epoch": 1.0225733634311513, + "grad_norm": 1.1884423925105638, + "learning_rate": 1.0458674005490263e-05, + "loss": 0.255629301071167, + "step": 3851 + }, + { + "epoch": 1.0228389324126943, + "grad_norm": 1.0777718220336832, + "learning_rate": 1.0454287817122291e-05, + "loss": 0.24032849073410034, + "step": 3852 + }, + { + "epoch": 1.0231045013942373, + "grad_norm": 1.1154013609024198, + "learning_rate": 1.0449901541172983e-05, + "loss": 0.23188306391239166, + "step": 3853 + }, + { + "epoch": 1.0233700703757802, + "grad_norm": 1.149374478972437, + "learning_rate": 1.0445515178487965e-05, + "loss": 0.2718146741390228, + "step": 3854 + }, + { + "epoch": 1.0236356393573232, + "grad_norm": 1.460691184866812, + "learning_rate": 1.0441128729912876e-05, + "loss": 0.30279839038848877, + "step": 3855 + }, + { + "epoch": 1.023901208338866, + "grad_norm": 1.0711762201816422, + "learning_rate": 1.0436742196293368e-05, + "loss": 0.2185024917125702, + "step": 3856 + }, + { + "epoch": 1.024166777320409, + "grad_norm": 1.2737960148140446, + "learning_rate": 1.0432355578475118e-05, + "loss": 0.2956481873989105, + "step": 3857 + }, + { + "epoch": 1.024432346301952, + "grad_norm": 1.1913794327080105, + "learning_rate": 1.0427968877303809e-05, + "loss": 0.28460678458213806, + "step": 3858 + }, + { + "epoch": 1.024697915283495, + "grad_norm": 1.1716718579119476, + "learning_rate": 1.0423582093625146e-05, + "loss": 0.24597057700157166, + "step": 3859 + }, + { + "epoch": 1.024963484265038, + "grad_norm": 0.987642591779768, + "learning_rate": 1.0419195228284856e-05, + "loss": 0.23986583948135376, + "step": 3860 + }, + { + "epoch": 1.0252290532465809, + "grad_norm": 1.0867576400643644, + "learning_rate": 1.0414808282128668e-05, + "loss": 0.2489446997642517, + "step": 3861 + }, + { + "epoch": 1.0254946222281238, + "grad_norm": 1.1200031637603385, + "learning_rate": 1.0410421256002334e-05, + "loss": 0.26777884364128113, + "step": 3862 + }, + { + "epoch": 1.0257601912096668, + "grad_norm": 1.1645962699086565, + "learning_rate": 1.0406034150751625e-05, + "loss": 0.23506489396095276, + "step": 3863 + }, + { + "epoch": 1.0260257601912097, + "grad_norm": 1.1861093965134106, + "learning_rate": 1.040164696722232e-05, + "loss": 0.2526484429836273, + "step": 3864 + }, + { + "epoch": 1.0262913291727527, + "grad_norm": 1.1320109702434422, + "learning_rate": 1.0397259706260216e-05, + "loss": 0.2179267853498459, + "step": 3865 + }, + { + "epoch": 1.0265568981542956, + "grad_norm": 1.0267487594121727, + "learning_rate": 1.0392872368711126e-05, + "loss": 0.2431088387966156, + "step": 3866 + }, + { + "epoch": 1.0268224671358386, + "grad_norm": 1.1394336459602463, + "learning_rate": 1.0388484955420877e-05, + "loss": 0.26101407408714294, + "step": 3867 + }, + { + "epoch": 1.0270880361173815, + "grad_norm": 1.0741553283028158, + "learning_rate": 1.0384097467235308e-05, + "loss": 0.23780573904514313, + "step": 3868 + }, + { + "epoch": 1.0273536050989245, + "grad_norm": 1.467981467949694, + "learning_rate": 1.0379709905000278e-05, + "loss": 0.2469894289970398, + "step": 3869 + }, + { + "epoch": 1.0276191740804674, + "grad_norm": 1.074989572738127, + "learning_rate": 1.0375322269561658e-05, + "loss": 0.21271926164627075, + "step": 3870 + }, + { + "epoch": 1.0278847430620104, + "grad_norm": 1.1192343716648714, + "learning_rate": 1.0370934561765331e-05, + "loss": 0.22995726764202118, + "step": 3871 + }, + { + "epoch": 1.0281503120435533, + "grad_norm": 1.2051770162428763, + "learning_rate": 1.0366546782457196e-05, + "loss": 0.27448171377182007, + "step": 3872 + }, + { + "epoch": 1.0284158810250963, + "grad_norm": 1.232887313588547, + "learning_rate": 1.0362158932483165e-05, + "loss": 0.25459539890289307, + "step": 3873 + }, + { + "epoch": 1.0286814500066392, + "grad_norm": 1.1436601222318827, + "learning_rate": 1.0357771012689162e-05, + "loss": 0.23213380575180054, + "step": 3874 + }, + { + "epoch": 1.0289470189881822, + "grad_norm": 1.107979602389345, + "learning_rate": 1.0353383023921127e-05, + "loss": 0.2219776064157486, + "step": 3875 + }, + { + "epoch": 1.0292125879697251, + "grad_norm": 1.2445278934711803, + "learning_rate": 1.0348994967025012e-05, + "loss": 0.27059125900268555, + "step": 3876 + }, + { + "epoch": 1.029478156951268, + "grad_norm": 1.2314072238589235, + "learning_rate": 1.034460684284678e-05, + "loss": 0.26921501755714417, + "step": 3877 + }, + { + "epoch": 1.029743725932811, + "grad_norm": 1.153389282583655, + "learning_rate": 1.0340218652232419e-05, + "loss": 0.24727991223335266, + "step": 3878 + }, + { + "epoch": 1.030009294914354, + "grad_norm": 1.2105369925319034, + "learning_rate": 1.0335830396027912e-05, + "loss": 0.26276054978370667, + "step": 3879 + }, + { + "epoch": 1.030274863895897, + "grad_norm": 1.1222835146983237, + "learning_rate": 1.0331442075079268e-05, + "loss": 0.25906458497047424, + "step": 3880 + }, + { + "epoch": 1.03054043287744, + "grad_norm": 1.1936099182612667, + "learning_rate": 1.0327053690232498e-05, + "loss": 0.2708794176578522, + "step": 3881 + }, + { + "epoch": 1.0308060018589829, + "grad_norm": 1.1283814494585969, + "learning_rate": 1.0322665242333634e-05, + "loss": 0.24968653917312622, + "step": 3882 + }, + { + "epoch": 1.0310715708405258, + "grad_norm": 1.1912763351930955, + "learning_rate": 1.0318276732228716e-05, + "loss": 0.2669135332107544, + "step": 3883 + }, + { + "epoch": 1.0313371398220688, + "grad_norm": 1.0733368423352447, + "learning_rate": 1.0313888160763799e-05, + "loss": 0.24173730611801147, + "step": 3884 + }, + { + "epoch": 1.0316027088036117, + "grad_norm": 1.4084549111395024, + "learning_rate": 1.0309499528784948e-05, + "loss": 0.27513059973716736, + "step": 3885 + }, + { + "epoch": 1.0318682777851547, + "grad_norm": 1.163470416419209, + "learning_rate": 1.0305110837138235e-05, + "loss": 0.2512688934803009, + "step": 3886 + }, + { + "epoch": 1.0321338467666976, + "grad_norm": 1.100016135139411, + "learning_rate": 1.0300722086669753e-05, + "loss": 0.2584962844848633, + "step": 3887 + }, + { + "epoch": 1.0323994157482406, + "grad_norm": 1.1125458904355436, + "learning_rate": 1.0296333278225599e-05, + "loss": 0.23692303895950317, + "step": 3888 + }, + { + "epoch": 1.0326649847297835, + "grad_norm": 1.1981051682884363, + "learning_rate": 1.0291944412651884e-05, + "loss": 0.2570871114730835, + "step": 3889 + }, + { + "epoch": 1.0329305537113265, + "grad_norm": 1.1839354606788588, + "learning_rate": 1.028755549079473e-05, + "loss": 0.2896367609500885, + "step": 3890 + }, + { + "epoch": 1.0331961226928694, + "grad_norm": 0.958593784491898, + "learning_rate": 1.0283166513500267e-05, + "loss": 0.19990365207195282, + "step": 3891 + }, + { + "epoch": 1.0334616916744124, + "grad_norm": 1.1157517117826752, + "learning_rate": 1.0278777481614639e-05, + "loss": 0.25235646963119507, + "step": 3892 + }, + { + "epoch": 1.0337272606559553, + "grad_norm": 1.1808927381569394, + "learning_rate": 1.0274388395984003e-05, + "loss": 0.23675012588500977, + "step": 3893 + }, + { + "epoch": 1.0339928296374983, + "grad_norm": 1.1370597202642294, + "learning_rate": 1.026999925745452e-05, + "loss": 0.250516414642334, + "step": 3894 + }, + { + "epoch": 1.0342583986190412, + "grad_norm": 1.0692414219621886, + "learning_rate": 1.0265610066872365e-05, + "loss": 0.24573490023612976, + "step": 3895 + }, + { + "epoch": 1.0345239676005842, + "grad_norm": 1.085358990363196, + "learning_rate": 1.026122082508372e-05, + "loss": 0.2473086714744568, + "step": 3896 + }, + { + "epoch": 1.0347895365821271, + "grad_norm": 1.162338198859519, + "learning_rate": 1.0256831532934783e-05, + "loss": 0.26546406745910645, + "step": 3897 + }, + { + "epoch": 1.03505510556367, + "grad_norm": 1.1034436628854154, + "learning_rate": 1.0252442191271754e-05, + "loss": 0.2565246522426605, + "step": 3898 + }, + { + "epoch": 1.035320674545213, + "grad_norm": 1.0272875416109402, + "learning_rate": 1.0248052800940846e-05, + "loss": 0.24923476576805115, + "step": 3899 + }, + { + "epoch": 1.035586243526756, + "grad_norm": 1.1519345059696067, + "learning_rate": 1.0243663362788286e-05, + "loss": 0.3079240322113037, + "step": 3900 + }, + { + "epoch": 1.035851812508299, + "grad_norm": 1.0586971174066726, + "learning_rate": 1.0239273877660302e-05, + "loss": 0.2482951581478119, + "step": 3901 + }, + { + "epoch": 1.036117381489842, + "grad_norm": 1.1495296797401515, + "learning_rate": 1.0234884346403138e-05, + "loss": 0.2626204192638397, + "step": 3902 + }, + { + "epoch": 1.0363829504713848, + "grad_norm": 1.0578834148114886, + "learning_rate": 1.023049476986304e-05, + "loss": 0.23181654512882233, + "step": 3903 + }, + { + "epoch": 1.0366485194529278, + "grad_norm": 1.2527800012652353, + "learning_rate": 1.0226105148886272e-05, + "loss": 0.29164040088653564, + "step": 3904 + }, + { + "epoch": 1.0369140884344707, + "grad_norm": 1.034136654365203, + "learning_rate": 1.0221715484319094e-05, + "loss": 0.22025801241397858, + "step": 3905 + }, + { + "epoch": 1.0371796574160137, + "grad_norm": 1.1162047929812215, + "learning_rate": 1.021732577700779e-05, + "loss": 0.2819385826587677, + "step": 3906 + }, + { + "epoch": 1.0374452263975567, + "grad_norm": 1.0524498644463125, + "learning_rate": 1.0212936027798637e-05, + "loss": 0.24709002673625946, + "step": 3907 + }, + { + "epoch": 1.0377107953790998, + "grad_norm": 0.9984579723832369, + "learning_rate": 1.0208546237537928e-05, + "loss": 0.22570034861564636, + "step": 3908 + }, + { + "epoch": 1.0379763643606428, + "grad_norm": 1.1543900299803864, + "learning_rate": 1.0204156407071964e-05, + "loss": 0.25642865896224976, + "step": 3909 + }, + { + "epoch": 1.0382419333421857, + "grad_norm": 1.1657404882715603, + "learning_rate": 1.0199766537247053e-05, + "loss": 0.25970256328582764, + "step": 3910 + }, + { + "epoch": 1.0385075023237287, + "grad_norm": 1.1347864223586095, + "learning_rate": 1.019537662890951e-05, + "loss": 0.2560003101825714, + "step": 3911 + }, + { + "epoch": 1.0387730713052716, + "grad_norm": 1.3160565196765366, + "learning_rate": 1.0190986682905656e-05, + "loss": 0.28138649463653564, + "step": 3912 + }, + { + "epoch": 1.0390386402868146, + "grad_norm": 1.4353879235637104, + "learning_rate": 1.0186596700081825e-05, + "loss": 0.23531222343444824, + "step": 3913 + }, + { + "epoch": 1.0393042092683575, + "grad_norm": 1.1850676655471586, + "learning_rate": 1.018220668128435e-05, + "loss": 0.24912862479686737, + "step": 3914 + }, + { + "epoch": 1.0395697782499005, + "grad_norm": 1.0811585337632708, + "learning_rate": 1.0177816627359575e-05, + "loss": 0.24188724160194397, + "step": 3915 + }, + { + "epoch": 1.0398353472314434, + "grad_norm": 1.2093489820950423, + "learning_rate": 1.0173426539153853e-05, + "loss": 0.2709474563598633, + "step": 3916 + }, + { + "epoch": 1.0401009162129864, + "grad_norm": 1.1793292324294091, + "learning_rate": 1.0169036417513538e-05, + "loss": 0.2400204837322235, + "step": 3917 + }, + { + "epoch": 1.0403664851945293, + "grad_norm": 1.0489256907825586, + "learning_rate": 1.0164646263284993e-05, + "loss": 0.2687132954597473, + "step": 3918 + }, + { + "epoch": 1.0406320541760723, + "grad_norm": 1.1628887826217675, + "learning_rate": 1.0160256077314592e-05, + "loss": 0.25139346718788147, + "step": 3919 + }, + { + "epoch": 1.0408976231576152, + "grad_norm": 1.1762633281473511, + "learning_rate": 1.0155865860448712e-05, + "loss": 0.25873464345932007, + "step": 3920 + }, + { + "epoch": 1.0411631921391582, + "grad_norm": 1.1207165962030725, + "learning_rate": 1.0151475613533732e-05, + "loss": 0.2510434687137604, + "step": 3921 + }, + { + "epoch": 1.0414287611207012, + "grad_norm": 1.2260247662339232, + "learning_rate": 1.0147085337416036e-05, + "loss": 0.24567106366157532, + "step": 3922 + }, + { + "epoch": 1.041694330102244, + "grad_norm": 1.1642096823951156, + "learning_rate": 1.0142695032942024e-05, + "loss": 0.25028282403945923, + "step": 3923 + }, + { + "epoch": 1.041959899083787, + "grad_norm": 1.140963361472911, + "learning_rate": 1.0138304700958096e-05, + "loss": 0.23542484641075134, + "step": 3924 + }, + { + "epoch": 1.04222546806533, + "grad_norm": 1.2475887570620718, + "learning_rate": 1.0133914342310649e-05, + "loss": 0.28974449634552, + "step": 3925 + }, + { + "epoch": 1.042491037046873, + "grad_norm": 1.0648736453755918, + "learning_rate": 1.0129523957846097e-05, + "loss": 0.23417247831821442, + "step": 3926 + }, + { + "epoch": 1.042756606028416, + "grad_norm": 1.1427047582178407, + "learning_rate": 1.0125133548410852e-05, + "loss": 0.23247018456459045, + "step": 3927 + }, + { + "epoch": 1.0430221750099589, + "grad_norm": 1.1496713132119072, + "learning_rate": 1.0120743114851337e-05, + "loss": 0.23860129714012146, + "step": 3928 + }, + { + "epoch": 1.0432877439915018, + "grad_norm": 1.1567405333157526, + "learning_rate": 1.0116352658013973e-05, + "loss": 0.2609105706214905, + "step": 3929 + }, + { + "epoch": 1.0435533129730448, + "grad_norm": 1.2453984448185509, + "learning_rate": 1.0111962178745187e-05, + "loss": 0.2559507489204407, + "step": 3930 + }, + { + "epoch": 1.0438188819545877, + "grad_norm": 1.2247288020965454, + "learning_rate": 1.0107571677891415e-05, + "loss": 0.2708527147769928, + "step": 3931 + }, + { + "epoch": 1.0440844509361307, + "grad_norm": 1.2373037230453465, + "learning_rate": 1.0103181156299091e-05, + "loss": 0.25884875655174255, + "step": 3932 + }, + { + "epoch": 1.0443500199176736, + "grad_norm": 1.3022673165052032, + "learning_rate": 1.0098790614814658e-05, + "loss": 0.2631877660751343, + "step": 3933 + }, + { + "epoch": 1.0446155888992166, + "grad_norm": 1.0267097797291302, + "learning_rate": 1.0094400054284559e-05, + "loss": 0.27179086208343506, + "step": 3934 + }, + { + "epoch": 1.0448811578807595, + "grad_norm": 2.1081344450494144, + "learning_rate": 1.0090009475555245e-05, + "loss": 0.21690386533737183, + "step": 3935 + }, + { + "epoch": 1.0451467268623025, + "grad_norm": 1.0188398651288513, + "learning_rate": 1.0085618879473162e-05, + "loss": 0.20192815363407135, + "step": 3936 + }, + { + "epoch": 1.0454122958438454, + "grad_norm": 1.213624997308106, + "learning_rate": 1.0081228266884773e-05, + "loss": 0.2680777907371521, + "step": 3937 + }, + { + "epoch": 1.0456778648253884, + "grad_norm": 1.1871222610891168, + "learning_rate": 1.007683763863653e-05, + "loss": 0.2566579580307007, + "step": 3938 + }, + { + "epoch": 1.0459434338069313, + "grad_norm": 1.1229802475790265, + "learning_rate": 1.0072446995574895e-05, + "loss": 0.2508152723312378, + "step": 3939 + }, + { + "epoch": 1.0462090027884743, + "grad_norm": 1.0850640213400236, + "learning_rate": 1.0068056338546335e-05, + "loss": 0.2880190908908844, + "step": 3940 + }, + { + "epoch": 1.0464745717700172, + "grad_norm": 1.1129549761108044, + "learning_rate": 1.0063665668397316e-05, + "loss": 0.2646787464618683, + "step": 3941 + }, + { + "epoch": 1.0467401407515602, + "grad_norm": 1.1116528447502043, + "learning_rate": 1.0059274985974305e-05, + "loss": 0.2327616810798645, + "step": 3942 + }, + { + "epoch": 1.0470057097331031, + "grad_norm": 1.1644185595792014, + "learning_rate": 1.0054884292123778e-05, + "loss": 0.24756258726119995, + "step": 3943 + }, + { + "epoch": 1.047271278714646, + "grad_norm": 1.1010853288322209, + "learning_rate": 1.0050493587692207e-05, + "loss": 0.23657771944999695, + "step": 3944 + }, + { + "epoch": 1.047536847696189, + "grad_norm": 1.1386107444709148, + "learning_rate": 1.0046102873526068e-05, + "loss": 0.2541351616382599, + "step": 3945 + }, + { + "epoch": 1.047802416677732, + "grad_norm": 1.0912263009271301, + "learning_rate": 1.0041712150471839e-05, + "loss": 0.2330317348241806, + "step": 3946 + }, + { + "epoch": 1.048067985659275, + "grad_norm": 1.0696190454357721, + "learning_rate": 1.0037321419375997e-05, + "loss": 0.23411181569099426, + "step": 3947 + }, + { + "epoch": 1.048333554640818, + "grad_norm": 1.1223872975815399, + "learning_rate": 1.0032930681085028e-05, + "loss": 0.2605017125606537, + "step": 3948 + }, + { + "epoch": 1.0485991236223609, + "grad_norm": 1.1766579775240698, + "learning_rate": 1.0028539936445407e-05, + "loss": 0.28651514649391174, + "step": 3949 + }, + { + "epoch": 1.0488646926039038, + "grad_norm": 1.1469362905517786, + "learning_rate": 1.0024149186303628e-05, + "loss": 0.22912876307964325, + "step": 3950 + }, + { + "epoch": 1.0491302615854468, + "grad_norm": 1.206814749340921, + "learning_rate": 1.001975843150617e-05, + "loss": 0.24032847583293915, + "step": 3951 + }, + { + "epoch": 1.0493958305669897, + "grad_norm": 1.0089656289438405, + "learning_rate": 1.0015367672899521e-05, + "loss": 0.17826229333877563, + "step": 3952 + }, + { + "epoch": 1.0496613995485327, + "grad_norm": 1.1440301784208975, + "learning_rate": 1.0010976911330163e-05, + "loss": 0.2619745433330536, + "step": 3953 + }, + { + "epoch": 1.0499269685300756, + "grad_norm": 1.1124743886634039, + "learning_rate": 1.0006586147644585e-05, + "loss": 0.24104374647140503, + "step": 3954 + }, + { + "epoch": 1.0501925375116186, + "grad_norm": 1.2465051058358483, + "learning_rate": 1.0002195382689277e-05, + "loss": 0.22913998365402222, + "step": 3955 + }, + { + "epoch": 1.0504581064931615, + "grad_norm": 1.2288244416278613, + "learning_rate": 9.997804617310724e-06, + "loss": 0.2625126838684082, + "step": 3956 + }, + { + "epoch": 1.0507236754747045, + "grad_norm": 1.1016811290492863, + "learning_rate": 9.993413852355416e-06, + "loss": 0.23098430037498474, + "step": 3957 + }, + { + "epoch": 1.0509892444562474, + "grad_norm": 1.2581954843436995, + "learning_rate": 9.98902308866984e-06, + "loss": 0.2866731882095337, + "step": 3958 + }, + { + "epoch": 1.0512548134377904, + "grad_norm": 1.2595027481112393, + "learning_rate": 9.984632327100482e-06, + "loss": 0.2520306706428528, + "step": 3959 + }, + { + "epoch": 1.0515203824193333, + "grad_norm": 1.2731218614589663, + "learning_rate": 9.980241568493834e-06, + "loss": 0.29688766598701477, + "step": 3960 + }, + { + "epoch": 1.0517859514008763, + "grad_norm": 1.2865298416208544, + "learning_rate": 9.975850813696375e-06, + "loss": 0.2876695990562439, + "step": 3961 + }, + { + "epoch": 1.0520515203824194, + "grad_norm": 1.1190033835182807, + "learning_rate": 9.971460063554595e-06, + "loss": 0.2402629554271698, + "step": 3962 + }, + { + "epoch": 1.0523170893639624, + "grad_norm": 1.288030170241207, + "learning_rate": 9.967069318914977e-06, + "loss": 0.32080164551734924, + "step": 3963 + }, + { + "epoch": 1.0525826583455054, + "grad_norm": 1.3484684025161604, + "learning_rate": 9.962678580624008e-06, + "loss": 0.2642936110496521, + "step": 3964 + }, + { + "epoch": 1.0528482273270483, + "grad_norm": 1.1668064537758471, + "learning_rate": 9.958287849528163e-06, + "loss": 0.255870521068573, + "step": 3965 + }, + { + "epoch": 1.0531137963085913, + "grad_norm": 1.1779058124731279, + "learning_rate": 9.953897126473933e-06, + "loss": 0.2695184350013733, + "step": 3966 + }, + { + "epoch": 1.0533793652901342, + "grad_norm": 1.1937956388734083, + "learning_rate": 9.949506412307795e-06, + "loss": 0.24576464295387268, + "step": 3967 + }, + { + "epoch": 1.0536449342716772, + "grad_norm": 1.210893055599799, + "learning_rate": 9.945115707876224e-06, + "loss": 0.26517459750175476, + "step": 3968 + }, + { + "epoch": 1.05391050325322, + "grad_norm": 1.261309936483727, + "learning_rate": 9.940725014025696e-06, + "loss": 0.30468082427978516, + "step": 3969 + }, + { + "epoch": 1.054176072234763, + "grad_norm": 1.1007633858966879, + "learning_rate": 9.936334331602687e-06, + "loss": 0.25299298763275146, + "step": 3970 + }, + { + "epoch": 1.054441641216306, + "grad_norm": 1.1621642625136148, + "learning_rate": 9.931943661453668e-06, + "loss": 0.2659488320350647, + "step": 3971 + }, + { + "epoch": 1.054707210197849, + "grad_norm": 1.129768041847351, + "learning_rate": 9.92755300442511e-06, + "loss": 0.25957295298576355, + "step": 3972 + }, + { + "epoch": 1.054972779179392, + "grad_norm": 1.0969185518732962, + "learning_rate": 9.923162361363476e-06, + "loss": 0.2416645884513855, + "step": 3973 + }, + { + "epoch": 1.0552383481609349, + "grad_norm": 1.1032067417924427, + "learning_rate": 9.91877173311523e-06, + "loss": 0.2627662122249603, + "step": 3974 + }, + { + "epoch": 1.0555039171424778, + "grad_norm": 1.1485553701369502, + "learning_rate": 9.91438112052684e-06, + "loss": 0.2876631021499634, + "step": 3975 + }, + { + "epoch": 1.0557694861240208, + "grad_norm": 1.1306607772682384, + "learning_rate": 9.90999052444476e-06, + "loss": 0.28336596488952637, + "step": 3976 + }, + { + "epoch": 1.0560350551055637, + "grad_norm": 1.266085815857313, + "learning_rate": 9.905599945715443e-06, + "loss": 0.2970484495162964, + "step": 3977 + }, + { + "epoch": 1.0563006240871067, + "grad_norm": 1.188464425479595, + "learning_rate": 9.901209385185345e-06, + "loss": 0.27202755212783813, + "step": 3978 + }, + { + "epoch": 1.0565661930686496, + "grad_norm": 1.0823738866829473, + "learning_rate": 9.896818843700912e-06, + "loss": 0.2702459990978241, + "step": 3979 + }, + { + "epoch": 1.0568317620501926, + "grad_norm": 1.2166105195755876, + "learning_rate": 9.89242832210859e-06, + "loss": 0.26057881116867065, + "step": 3980 + }, + { + "epoch": 1.0570973310317355, + "grad_norm": 1.1526398422075472, + "learning_rate": 9.888037821254816e-06, + "loss": 0.24006876349449158, + "step": 3981 + }, + { + "epoch": 1.0573629000132785, + "grad_norm": 1.0864441989704317, + "learning_rate": 9.883647341986032e-06, + "loss": 0.2437625676393509, + "step": 3982 + }, + { + "epoch": 1.0576284689948214, + "grad_norm": 1.0572722810626467, + "learning_rate": 9.879256885148666e-06, + "loss": 0.24256819486618042, + "step": 3983 + }, + { + "epoch": 1.0578940379763644, + "grad_norm": 1.2008491436753201, + "learning_rate": 9.874866451589151e-06, + "loss": 0.2714581787586212, + "step": 3984 + }, + { + "epoch": 1.0581596069579073, + "grad_norm": 1.1859043120388024, + "learning_rate": 9.870476042153907e-06, + "loss": 0.30309075117111206, + "step": 3985 + }, + { + "epoch": 1.0584251759394503, + "grad_norm": 1.3001941243887445, + "learning_rate": 9.866085657689355e-06, + "loss": 0.2938288450241089, + "step": 3986 + }, + { + "epoch": 1.0586907449209932, + "grad_norm": 1.1041962963159588, + "learning_rate": 9.86169529904191e-06, + "loss": 0.23748518526554108, + "step": 3987 + }, + { + "epoch": 1.0589563139025362, + "grad_norm": 1.2345572480055271, + "learning_rate": 9.857304967057977e-06, + "loss": 0.2883969247341156, + "step": 3988 + }, + { + "epoch": 1.0592218828840791, + "grad_norm": 1.0871048681541509, + "learning_rate": 9.852914662583966e-06, + "loss": 0.28301289677619934, + "step": 3989 + }, + { + "epoch": 1.059487451865622, + "grad_norm": 1.0733060702724175, + "learning_rate": 9.848524386466273e-06, + "loss": 0.22616548836231232, + "step": 3990 + }, + { + "epoch": 1.059753020847165, + "grad_norm": 1.06530549901144, + "learning_rate": 9.844134139551291e-06, + "loss": 0.2282804250717163, + "step": 3991 + }, + { + "epoch": 1.060018589828708, + "grad_norm": 1.154557745213229, + "learning_rate": 9.839743922685408e-06, + "loss": 0.2407834678888321, + "step": 3992 + }, + { + "epoch": 1.060284158810251, + "grad_norm": 1.0504099183304738, + "learning_rate": 9.835353736715007e-06, + "loss": 0.22690361738204956, + "step": 3993 + }, + { + "epoch": 1.060549727791794, + "grad_norm": 1.529267187296219, + "learning_rate": 9.830963582486465e-06, + "loss": 0.23291411995887756, + "step": 3994 + }, + { + "epoch": 1.0608152967733369, + "grad_norm": 1.0804914844168854, + "learning_rate": 9.82657346084615e-06, + "loss": 0.24524198472499847, + "step": 3995 + }, + { + "epoch": 1.0610808657548798, + "grad_norm": 1.130929241291739, + "learning_rate": 9.822183372640426e-06, + "loss": 0.22087743878364563, + "step": 3996 + }, + { + "epoch": 1.0613464347364228, + "grad_norm": 1.1374060021264791, + "learning_rate": 9.817793318715652e-06, + "loss": 0.2459079772233963, + "step": 3997 + }, + { + "epoch": 1.0616120037179657, + "grad_norm": 1.1393890830478974, + "learning_rate": 9.813403299918178e-06, + "loss": 0.24429920315742493, + "step": 3998 + }, + { + "epoch": 1.0618775726995087, + "grad_norm": 1.140499707599593, + "learning_rate": 9.809013317094345e-06, + "loss": 0.2332335114479065, + "step": 3999 + }, + { + "epoch": 1.0621431416810516, + "grad_norm": 1.2157908167694267, + "learning_rate": 9.804623371090493e-06, + "loss": 0.2861659526824951, + "step": 4000 + }, + { + "epoch": 1.0624087106625946, + "grad_norm": 1.1293440606459217, + "learning_rate": 9.800233462752949e-06, + "loss": 0.22731532156467438, + "step": 4001 + }, + { + "epoch": 1.0626742796441375, + "grad_norm": 1.127775309467411, + "learning_rate": 9.795843592928036e-06, + "loss": 0.245025634765625, + "step": 4002 + }, + { + "epoch": 1.0629398486256805, + "grad_norm": 1.2380242649872155, + "learning_rate": 9.791453762462075e-06, + "loss": 0.2826273441314697, + "step": 4003 + }, + { + "epoch": 1.0632054176072234, + "grad_norm": 1.1330484645300947, + "learning_rate": 9.787063972201368e-06, + "loss": 0.24737229943275452, + "step": 4004 + }, + { + "epoch": 1.0634709865887664, + "grad_norm": 1.3814870803010457, + "learning_rate": 9.782674222992214e-06, + "loss": 0.23368477821350098, + "step": 4005 + }, + { + "epoch": 1.0637365555703093, + "grad_norm": 1.2631953536046527, + "learning_rate": 9.778284515680908e-06, + "loss": 0.2754492461681366, + "step": 4006 + }, + { + "epoch": 1.0640021245518523, + "grad_norm": 1.1906091191722363, + "learning_rate": 9.773894851113732e-06, + "loss": 0.2814168334007263, + "step": 4007 + }, + { + "epoch": 1.0642676935333952, + "grad_norm": 1.1594492512554253, + "learning_rate": 9.769505230136962e-06, + "loss": 0.25388047099113464, + "step": 4008 + }, + { + "epoch": 1.0645332625149382, + "grad_norm": 1.2618382745485697, + "learning_rate": 9.765115653596867e-06, + "loss": 0.25435230135917664, + "step": 4009 + }, + { + "epoch": 1.0647988314964811, + "grad_norm": 1.2251032153283614, + "learning_rate": 9.760726122339698e-06, + "loss": 0.265840083360672, + "step": 4010 + }, + { + "epoch": 1.065064400478024, + "grad_norm": 1.1297656349054435, + "learning_rate": 9.756336637211716e-06, + "loss": 0.2533451020717621, + "step": 4011 + }, + { + "epoch": 1.065329969459567, + "grad_norm": 1.0890158421111886, + "learning_rate": 9.751947199059155e-06, + "loss": 0.25214290618896484, + "step": 4012 + }, + { + "epoch": 1.06559553844111, + "grad_norm": 1.0603532415232781, + "learning_rate": 9.74755780872825e-06, + "loss": 0.25039419531822205, + "step": 4013 + }, + { + "epoch": 1.065861107422653, + "grad_norm": 1.0177623632775965, + "learning_rate": 9.74316846706522e-06, + "loss": 0.21251091361045837, + "step": 4014 + }, + { + "epoch": 1.066126676404196, + "grad_norm": 1.123294230398497, + "learning_rate": 9.738779174916281e-06, + "loss": 0.25898969173431396, + "step": 4015 + }, + { + "epoch": 1.0663922453857388, + "grad_norm": 1.1054663361669936, + "learning_rate": 9.734389933127639e-06, + "loss": 0.2655499577522278, + "step": 4016 + }, + { + "epoch": 1.0666578143672818, + "grad_norm": 1.1153507141873742, + "learning_rate": 9.730000742545485e-06, + "loss": 0.2221338450908661, + "step": 4017 + }, + { + "epoch": 1.0669233833488247, + "grad_norm": 1.1746716643835395, + "learning_rate": 9.725611604016002e-06, + "loss": 0.2567589581012726, + "step": 4018 + }, + { + "epoch": 1.0671889523303677, + "grad_norm": 1.1090772377521565, + "learning_rate": 9.721222518385361e-06, + "loss": 0.24440976977348328, + "step": 4019 + }, + { + "epoch": 1.0674545213119107, + "grad_norm": 1.061787642846094, + "learning_rate": 9.716833486499735e-06, + "loss": 0.2229192852973938, + "step": 4020 + }, + { + "epoch": 1.0677200902934538, + "grad_norm": 1.1014121727705226, + "learning_rate": 9.712444509205273e-06, + "loss": 0.26231470704078674, + "step": 4021 + }, + { + "epoch": 1.0679856592749968, + "grad_norm": 1.2531191320236732, + "learning_rate": 9.708055587348119e-06, + "loss": 0.25099092721939087, + "step": 4022 + }, + { + "epoch": 1.0682512282565397, + "grad_norm": 1.1402160070516023, + "learning_rate": 9.703666721774403e-06, + "loss": 0.22979633510112762, + "step": 4023 + }, + { + "epoch": 1.0685167972380827, + "grad_norm": 1.09571485621585, + "learning_rate": 9.699277913330252e-06, + "loss": 0.2361093908548355, + "step": 4024 + }, + { + "epoch": 1.0687823662196256, + "grad_norm": 1.0765448804717204, + "learning_rate": 9.694889162861768e-06, + "loss": 0.2390863001346588, + "step": 4025 + }, + { + "epoch": 1.0690479352011686, + "grad_norm": 1.2569917808844517, + "learning_rate": 9.690500471215057e-06, + "loss": 0.24917885661125183, + "step": 4026 + }, + { + "epoch": 1.0693135041827115, + "grad_norm": 1.1387127210628816, + "learning_rate": 9.686111839236206e-06, + "loss": 0.24215272068977356, + "step": 4027 + }, + { + "epoch": 1.0695790731642545, + "grad_norm": 1.2809085503832063, + "learning_rate": 9.681723267771284e-06, + "loss": 0.27874231338500977, + "step": 4028 + }, + { + "epoch": 1.0698446421457974, + "grad_norm": 1.1707122559783085, + "learning_rate": 9.677334757666368e-06, + "loss": 0.24076086282730103, + "step": 4029 + }, + { + "epoch": 1.0701102111273404, + "grad_norm": 1.1092369229920938, + "learning_rate": 9.672946309767504e-06, + "loss": 0.2444242238998413, + "step": 4030 + }, + { + "epoch": 1.0703757801088833, + "grad_norm": 1.2086874522857378, + "learning_rate": 9.668557924920735e-06, + "loss": 0.2737279236316681, + "step": 4031 + }, + { + "epoch": 1.0706413490904263, + "grad_norm": 1.1006436240463247, + "learning_rate": 9.664169603972091e-06, + "loss": 0.24105575680732727, + "step": 4032 + }, + { + "epoch": 1.0709069180719692, + "grad_norm": 1.336482466569566, + "learning_rate": 9.659781347767584e-06, + "loss": 0.27791836857795715, + "step": 4033 + }, + { + "epoch": 1.0711724870535122, + "grad_norm": 1.1518461528529822, + "learning_rate": 9.655393157153221e-06, + "loss": 0.255472868680954, + "step": 4034 + }, + { + "epoch": 1.0714380560350552, + "grad_norm": 1.371220848551681, + "learning_rate": 9.651005032974994e-06, + "loss": 0.2523707151412964, + "step": 4035 + }, + { + "epoch": 1.071703625016598, + "grad_norm": 1.235756547113907, + "learning_rate": 9.64661697607888e-06, + "loss": 0.24584606289863586, + "step": 4036 + }, + { + "epoch": 1.071969193998141, + "grad_norm": 1.1497174260677319, + "learning_rate": 9.64222898731084e-06, + "loss": 0.25182732939720154, + "step": 4037 + }, + { + "epoch": 1.072234762979684, + "grad_norm": 1.0822892740683951, + "learning_rate": 9.637841067516837e-06, + "loss": 0.254008412361145, + "step": 4038 + }, + { + "epoch": 1.072500331961227, + "grad_norm": 1.080204167750926, + "learning_rate": 9.633453217542806e-06, + "loss": 0.2314324826002121, + "step": 4039 + }, + { + "epoch": 1.07276590094277, + "grad_norm": 1.1139945732367915, + "learning_rate": 9.62906543823467e-06, + "loss": 0.2256058305501938, + "step": 4040 + }, + { + "epoch": 1.0730314699243129, + "grad_norm": 1.283214941862177, + "learning_rate": 9.624677730438344e-06, + "loss": 0.2577894330024719, + "step": 4041 + }, + { + "epoch": 1.0732970389058558, + "grad_norm": 1.0911199623079508, + "learning_rate": 9.620290094999723e-06, + "loss": 0.23520560562610626, + "step": 4042 + }, + { + "epoch": 1.0735626078873988, + "grad_norm": 1.1791405346126818, + "learning_rate": 9.615902532764695e-06, + "loss": 0.2472849190235138, + "step": 4043 + }, + { + "epoch": 1.0738281768689417, + "grad_norm": 1.2195787110249676, + "learning_rate": 9.611515044579128e-06, + "loss": 0.25053414702415466, + "step": 4044 + }, + { + "epoch": 1.0740937458504847, + "grad_norm": 1.1090102650773974, + "learning_rate": 9.607127631288879e-06, + "loss": 0.24229007959365845, + "step": 4045 + }, + { + "epoch": 1.0743593148320276, + "grad_norm": 1.4628298980675831, + "learning_rate": 9.602740293739786e-06, + "loss": 0.2793073058128357, + "step": 4046 + }, + { + "epoch": 1.0746248838135706, + "grad_norm": 1.225079236387791, + "learning_rate": 9.598353032777682e-06, + "loss": 0.24547399580478668, + "step": 4047 + }, + { + "epoch": 1.0748904527951135, + "grad_norm": 1.1980997957436126, + "learning_rate": 9.593965849248378e-06, + "loss": 0.2776937186717987, + "step": 4048 + }, + { + "epoch": 1.0751560217766565, + "grad_norm": 1.0781858695117066, + "learning_rate": 9.589578743997668e-06, + "loss": 0.22677727043628693, + "step": 4049 + }, + { + "epoch": 1.0754215907581994, + "grad_norm": 1.4867723677136682, + "learning_rate": 9.585191717871336e-06, + "loss": 0.23254704475402832, + "step": 4050 + }, + { + "epoch": 1.0756871597397424, + "grad_norm": 1.3243435003953368, + "learning_rate": 9.580804771715148e-06, + "loss": 0.2899828255176544, + "step": 4051 + }, + { + "epoch": 1.0759527287212853, + "grad_norm": 1.1397018772236696, + "learning_rate": 9.576417906374856e-06, + "loss": 0.24632850289344788, + "step": 4052 + }, + { + "epoch": 1.0762182977028283, + "grad_norm": 1.2322214200527608, + "learning_rate": 9.572031122696196e-06, + "loss": 0.2661561369895935, + "step": 4053 + }, + { + "epoch": 1.0764838666843712, + "grad_norm": 1.1394013200357536, + "learning_rate": 9.567644421524889e-06, + "loss": 0.22364279627799988, + "step": 4054 + }, + { + "epoch": 1.0767494356659142, + "grad_norm": 1.5026366502842776, + "learning_rate": 9.563257803706635e-06, + "loss": 0.26748427748680115, + "step": 4055 + }, + { + "epoch": 1.0770150046474571, + "grad_norm": 1.1794922225625246, + "learning_rate": 9.55887127008713e-06, + "loss": 0.22851283848285675, + "step": 4056 + }, + { + "epoch": 1.077280573629, + "grad_norm": 1.1340260741391435, + "learning_rate": 9.554484821512037e-06, + "loss": 0.2456260323524475, + "step": 4057 + }, + { + "epoch": 1.077546142610543, + "grad_norm": 1.2884657617459025, + "learning_rate": 9.55009845882702e-06, + "loss": 0.2556169629096985, + "step": 4058 + }, + { + "epoch": 1.077811711592086, + "grad_norm": 1.274618544457263, + "learning_rate": 9.545712182877714e-06, + "loss": 0.280727744102478, + "step": 4059 + }, + { + "epoch": 1.078077280573629, + "grad_norm": 1.1205087247319334, + "learning_rate": 9.54132599450974e-06, + "loss": 0.25315386056900024, + "step": 4060 + }, + { + "epoch": 1.078342849555172, + "grad_norm": 1.1990539773915618, + "learning_rate": 9.536939894568704e-06, + "loss": 0.21985477209091187, + "step": 4061 + }, + { + "epoch": 1.0786084185367149, + "grad_norm": 1.1575613416248978, + "learning_rate": 9.532553883900196e-06, + "loss": 0.24329043924808502, + "step": 4062 + }, + { + "epoch": 1.0788739875182578, + "grad_norm": 1.173950465827748, + "learning_rate": 9.528167963349786e-06, + "loss": 0.2362256497144699, + "step": 4063 + }, + { + "epoch": 1.0791395564998008, + "grad_norm": 1.1458704347110154, + "learning_rate": 9.523782133763027e-06, + "loss": 0.23685476183891296, + "step": 4064 + }, + { + "epoch": 1.0794051254813437, + "grad_norm": 1.2383774104342302, + "learning_rate": 9.519396395985456e-06, + "loss": 0.26232481002807617, + "step": 4065 + }, + { + "epoch": 1.0796706944628867, + "grad_norm": 1.2768574792534622, + "learning_rate": 9.515010750862594e-06, + "loss": 0.25196313858032227, + "step": 4066 + }, + { + "epoch": 1.0799362634444296, + "grad_norm": 1.082792256362845, + "learning_rate": 9.510625199239939e-06, + "loss": 0.22520464658737183, + "step": 4067 + }, + { + "epoch": 1.0802018324259726, + "grad_norm": 1.190229461562689, + "learning_rate": 9.506239741962971e-06, + "loss": 0.27422505617141724, + "step": 4068 + }, + { + "epoch": 1.0804674014075155, + "grad_norm": 1.3120430811123187, + "learning_rate": 9.50185437987716e-06, + "loss": 0.2646682560443878, + "step": 4069 + }, + { + "epoch": 1.0807329703890585, + "grad_norm": 1.3425819541318131, + "learning_rate": 9.497469113827949e-06, + "loss": 0.2661365866661072, + "step": 4070 + }, + { + "epoch": 1.0809985393706014, + "grad_norm": 1.1101351469883673, + "learning_rate": 9.493083944660766e-06, + "loss": 0.23156839609146118, + "step": 4071 + }, + { + "epoch": 1.0812641083521444, + "grad_norm": 1.1805541153651362, + "learning_rate": 9.488698873221021e-06, + "loss": 0.25353243947029114, + "step": 4072 + }, + { + "epoch": 1.0815296773336873, + "grad_norm": 1.2862671823918606, + "learning_rate": 9.484313900354099e-06, + "loss": 0.27488404512405396, + "step": 4073 + }, + { + "epoch": 1.0817952463152305, + "grad_norm": 1.4041005997261422, + "learning_rate": 9.479929026905378e-06, + "loss": 0.2580753564834595, + "step": 4074 + }, + { + "epoch": 1.0820608152967734, + "grad_norm": 1.1405056260482733, + "learning_rate": 9.475544253720206e-06, + "loss": 0.2425471544265747, + "step": 4075 + }, + { + "epoch": 1.0823263842783164, + "grad_norm": 1.2040355319488043, + "learning_rate": 9.471159581643918e-06, + "loss": 0.25268295407295227, + "step": 4076 + }, + { + "epoch": 1.0825919532598594, + "grad_norm": 1.1573228524057126, + "learning_rate": 9.466775011521825e-06, + "loss": 0.2683602571487427, + "step": 4077 + }, + { + "epoch": 1.0828575222414023, + "grad_norm": 1.1300610618916742, + "learning_rate": 9.462390544199221e-06, + "loss": 0.24945034086704254, + "step": 4078 + }, + { + "epoch": 1.0831230912229453, + "grad_norm": 1.1698494765527112, + "learning_rate": 9.458006180521379e-06, + "loss": 0.21784156560897827, + "step": 4079 + }, + { + "epoch": 1.0833886602044882, + "grad_norm": 1.136268907040887, + "learning_rate": 9.453621921333554e-06, + "loss": 0.22704020142555237, + "step": 4080 + }, + { + "epoch": 1.0836542291860312, + "grad_norm": 1.1373990713388034, + "learning_rate": 9.449237767480979e-06, + "loss": 0.2532106637954712, + "step": 4081 + }, + { + "epoch": 1.0839197981675741, + "grad_norm": 1.1568862012297532, + "learning_rate": 9.444853719808864e-06, + "loss": 0.27809134125709534, + "step": 4082 + }, + { + "epoch": 1.084185367149117, + "grad_norm": 1.2102387789201872, + "learning_rate": 9.440469779162407e-06, + "loss": 0.25704264640808105, + "step": 4083 + }, + { + "epoch": 1.08445093613066, + "grad_norm": 1.1827141084910668, + "learning_rate": 9.436085946386778e-06, + "loss": 0.2656276226043701, + "step": 4084 + }, + { + "epoch": 1.084716505112203, + "grad_norm": 1.256991317445651, + "learning_rate": 9.431702222327126e-06, + "loss": 0.277826726436615, + "step": 4085 + }, + { + "epoch": 1.084982074093746, + "grad_norm": 1.2975495041461134, + "learning_rate": 9.427318607828584e-06, + "loss": 0.24656976759433746, + "step": 4086 + }, + { + "epoch": 1.0852476430752889, + "grad_norm": 1.1974770836803283, + "learning_rate": 9.42293510373626e-06, + "loss": 0.2498110830783844, + "step": 4087 + }, + { + "epoch": 1.0855132120568318, + "grad_norm": 1.1492935678310237, + "learning_rate": 9.418551710895243e-06, + "loss": 0.24574093520641327, + "step": 4088 + }, + { + "epoch": 1.0857787810383748, + "grad_norm": 1.2274895872775384, + "learning_rate": 9.414168430150601e-06, + "loss": 0.25271761417388916, + "step": 4089 + }, + { + "epoch": 1.0860443500199177, + "grad_norm": 1.1759358027679858, + "learning_rate": 9.409785262347373e-06, + "loss": 0.29269370436668396, + "step": 4090 + }, + { + "epoch": 1.0863099190014607, + "grad_norm": 1.1247973273146177, + "learning_rate": 9.405402208330581e-06, + "loss": 0.244449645280838, + "step": 4091 + }, + { + "epoch": 1.0865754879830036, + "grad_norm": 1.186787867713906, + "learning_rate": 9.401019268945237e-06, + "loss": 0.23785406351089478, + "step": 4092 + }, + { + "epoch": 1.0868410569645466, + "grad_norm": 1.1479686632621091, + "learning_rate": 9.39663644503631e-06, + "loss": 0.2493479996919632, + "step": 4093 + }, + { + "epoch": 1.0871066259460895, + "grad_norm": 1.1474347559215512, + "learning_rate": 9.392253737448764e-06, + "loss": 0.23758000135421753, + "step": 4094 + }, + { + "epoch": 1.0873721949276325, + "grad_norm": 1.0946885138749496, + "learning_rate": 9.387871147027528e-06, + "loss": 0.22560475766658783, + "step": 4095 + }, + { + "epoch": 1.0876377639091754, + "grad_norm": 1.1552533162715968, + "learning_rate": 9.383488674617515e-06, + "loss": 0.2558273673057556, + "step": 4096 + }, + { + "epoch": 1.0879033328907184, + "grad_norm": 1.2619180705972233, + "learning_rate": 9.379106321063618e-06, + "loss": 0.2822023034095764, + "step": 4097 + }, + { + "epoch": 1.0881689018722613, + "grad_norm": 1.2076346653444254, + "learning_rate": 9.374724087210698e-06, + "loss": 0.2596978545188904, + "step": 4098 + }, + { + "epoch": 1.0884344708538043, + "grad_norm": 1.6785014002913365, + "learning_rate": 9.370341973903597e-06, + "loss": 0.25353628396987915, + "step": 4099 + }, + { + "epoch": 1.0887000398353472, + "grad_norm": 1.2184499887942242, + "learning_rate": 9.365959981987135e-06, + "loss": 0.2547294497489929, + "step": 4100 + }, + { + "epoch": 1.0889656088168902, + "grad_norm": 1.40658558629773, + "learning_rate": 9.361578112306115e-06, + "loss": 0.2688470780849457, + "step": 4101 + }, + { + "epoch": 1.0892311777984331, + "grad_norm": 1.207208011814592, + "learning_rate": 9.357196365705303e-06, + "loss": 0.25772029161453247, + "step": 4102 + }, + { + "epoch": 1.089496746779976, + "grad_norm": 1.3552039168974384, + "learning_rate": 9.352814743029454e-06, + "loss": 0.2875550091266632, + "step": 4103 + }, + { + "epoch": 1.089762315761519, + "grad_norm": 1.4164869081453233, + "learning_rate": 9.34843324512329e-06, + "loss": 0.23085735738277435, + "step": 4104 + }, + { + "epoch": 1.090027884743062, + "grad_norm": 1.2013725541896922, + "learning_rate": 9.34405187283151e-06, + "loss": 0.2607901096343994, + "step": 4105 + }, + { + "epoch": 1.090293453724605, + "grad_norm": 1.1738523720935938, + "learning_rate": 9.339670626998791e-06, + "loss": 0.26165345311164856, + "step": 4106 + }, + { + "epoch": 1.090559022706148, + "grad_norm": 1.1931234826270498, + "learning_rate": 9.335289508469789e-06, + "loss": 0.27884238958358765, + "step": 4107 + }, + { + "epoch": 1.0908245916876909, + "grad_norm": 1.283025870689831, + "learning_rate": 9.33090851808913e-06, + "loss": 0.2689289152622223, + "step": 4108 + }, + { + "epoch": 1.0910901606692338, + "grad_norm": 1.2574326426613287, + "learning_rate": 9.326527656701414e-06, + "loss": 0.2633207440376282, + "step": 4109 + }, + { + "epoch": 1.0913557296507768, + "grad_norm": 1.1611202948336292, + "learning_rate": 9.322146925151226e-06, + "loss": 0.26001888513565063, + "step": 4110 + }, + { + "epoch": 1.0916212986323197, + "grad_norm": 1.1436383156785508, + "learning_rate": 9.31776632428312e-06, + "loss": 0.2739099860191345, + "step": 4111 + }, + { + "epoch": 1.0918868676138627, + "grad_norm": 1.1080458686771364, + "learning_rate": 9.313385854941616e-06, + "loss": 0.24885550141334534, + "step": 4112 + }, + { + "epoch": 1.0921524365954056, + "grad_norm": 1.1643870148920956, + "learning_rate": 9.309005517971222e-06, + "loss": 0.2609873414039612, + "step": 4113 + }, + { + "epoch": 1.0924180055769486, + "grad_norm": 1.427636157796487, + "learning_rate": 9.304625314216415e-06, + "loss": 0.28853538632392883, + "step": 4114 + }, + { + "epoch": 1.0926835745584915, + "grad_norm": 1.072833070391428, + "learning_rate": 9.300245244521647e-06, + "loss": 0.2629924714565277, + "step": 4115 + }, + { + "epoch": 1.0929491435400345, + "grad_norm": 1.1804644749067619, + "learning_rate": 9.295865309731342e-06, + "loss": 0.2687820494174957, + "step": 4116 + }, + { + "epoch": 1.0932147125215774, + "grad_norm": 1.0831905202820669, + "learning_rate": 9.2914855106899e-06, + "loss": 0.2293676733970642, + "step": 4117 + }, + { + "epoch": 1.0934802815031204, + "grad_norm": 1.1645005992728827, + "learning_rate": 9.287105848241694e-06, + "loss": 0.25261443853378296, + "step": 4118 + }, + { + "epoch": 1.0937458504846633, + "grad_norm": 1.1209341991417805, + "learning_rate": 9.282726323231077e-06, + "loss": 0.26238197088241577, + "step": 4119 + }, + { + "epoch": 1.0940114194662063, + "grad_norm": 1.1230838898563178, + "learning_rate": 9.278346936502364e-06, + "loss": 0.25718310475349426, + "step": 4120 + }, + { + "epoch": 1.0942769884477492, + "grad_norm": 1.1872711264618019, + "learning_rate": 9.273967688899849e-06, + "loss": 0.23810459673404694, + "step": 4121 + }, + { + "epoch": 1.0945425574292922, + "grad_norm": 1.0680734314830214, + "learning_rate": 9.269588581267804e-06, + "loss": 0.2197081446647644, + "step": 4122 + }, + { + "epoch": 1.0948081264108351, + "grad_norm": 1.1043223190124707, + "learning_rate": 9.265209614450463e-06, + "loss": 0.2429335117340088, + "step": 4123 + }, + { + "epoch": 1.095073695392378, + "grad_norm": 1.1380552272436657, + "learning_rate": 9.260830789292043e-06, + "loss": 0.23028087615966797, + "step": 4124 + }, + { + "epoch": 1.095339264373921, + "grad_norm": 1.2203393500716264, + "learning_rate": 9.25645210663673e-06, + "loss": 0.2783699035644531, + "step": 4125 + }, + { + "epoch": 1.095604833355464, + "grad_norm": 1.1686978964802806, + "learning_rate": 9.25207356732868e-06, + "loss": 0.25055867433547974, + "step": 4126 + }, + { + "epoch": 1.095870402337007, + "grad_norm": 1.2313132067115398, + "learning_rate": 9.247695172212026e-06, + "loss": 0.28629350662231445, + "step": 4127 + }, + { + "epoch": 1.09613597131855, + "grad_norm": 1.2403423880097748, + "learning_rate": 9.24331692213087e-06, + "loss": 0.2626604735851288, + "step": 4128 + }, + { + "epoch": 1.0964015403000928, + "grad_norm": 1.2478078302425437, + "learning_rate": 9.238938817929288e-06, + "loss": 0.237881600856781, + "step": 4129 + }, + { + "epoch": 1.0966671092816358, + "grad_norm": 1.144955023428898, + "learning_rate": 9.234560860451325e-06, + "loss": 0.2602109909057617, + "step": 4130 + }, + { + "epoch": 1.0969326782631788, + "grad_norm": 1.1775071297104545, + "learning_rate": 9.230183050541001e-06, + "loss": 0.2721475064754486, + "step": 4131 + }, + { + "epoch": 1.0971982472447217, + "grad_norm": 1.7664052681173497, + "learning_rate": 9.225805389042307e-06, + "loss": 0.25844910740852356, + "step": 4132 + }, + { + "epoch": 1.0974638162262647, + "grad_norm": 1.1612334633259545, + "learning_rate": 9.221427876799201e-06, + "loss": 0.26671040058135986, + "step": 4133 + }, + { + "epoch": 1.0977293852078078, + "grad_norm": 1.3116748641368057, + "learning_rate": 9.21705051465562e-06, + "loss": 0.2610115706920624, + "step": 4134 + }, + { + "epoch": 1.0979949541893508, + "grad_norm": 1.1348320206960383, + "learning_rate": 9.212673303455464e-06, + "loss": 0.2518802881240845, + "step": 4135 + }, + { + "epoch": 1.0982605231708937, + "grad_norm": 1.2313324732863455, + "learning_rate": 9.20829624404261e-06, + "loss": 0.28600364923477173, + "step": 4136 + }, + { + "epoch": 1.0985260921524367, + "grad_norm": 1.0787729379648288, + "learning_rate": 9.203919337260903e-06, + "loss": 0.2649504840373993, + "step": 4137 + }, + { + "epoch": 1.0987916611339796, + "grad_norm": 1.0717018301402161, + "learning_rate": 9.199542583954159e-06, + "loss": 0.22613298892974854, + "step": 4138 + }, + { + "epoch": 1.0990572301155226, + "grad_norm": 1.1049408193201318, + "learning_rate": 9.195165984966163e-06, + "loss": 0.22546961903572083, + "step": 4139 + }, + { + "epoch": 1.0993227990970655, + "grad_norm": 1.1132579479037434, + "learning_rate": 9.190789541140675e-06, + "loss": 0.20618169009685516, + "step": 4140 + }, + { + "epoch": 1.0995883680786085, + "grad_norm": 1.1910818165933836, + "learning_rate": 9.18641325332142e-06, + "loss": 0.2434382289648056, + "step": 4141 + }, + { + "epoch": 1.0998539370601514, + "grad_norm": 1.0160349259469954, + "learning_rate": 9.182037122352092e-06, + "loss": 0.19114840030670166, + "step": 4142 + }, + { + "epoch": 1.1001195060416944, + "grad_norm": 1.371175220167047, + "learning_rate": 9.17766114907636e-06, + "loss": 0.2793614864349365, + "step": 4143 + }, + { + "epoch": 1.1003850750232373, + "grad_norm": 1.3230746818872392, + "learning_rate": 9.173285334337863e-06, + "loss": 0.2908466160297394, + "step": 4144 + }, + { + "epoch": 1.1006506440047803, + "grad_norm": 1.1707475106499343, + "learning_rate": 9.168909678980199e-06, + "loss": 0.260933518409729, + "step": 4145 + }, + { + "epoch": 1.1009162129863233, + "grad_norm": 1.170079737982666, + "learning_rate": 9.16453418384695e-06, + "loss": 0.2819761037826538, + "step": 4146 + }, + { + "epoch": 1.1011817819678662, + "grad_norm": 1.251357168283767, + "learning_rate": 9.160158849781657e-06, + "loss": 0.25290411710739136, + "step": 4147 + }, + { + "epoch": 1.1014473509494092, + "grad_norm": 1.0782378998536035, + "learning_rate": 9.155783677627831e-06, + "loss": 0.21255841851234436, + "step": 4148 + }, + { + "epoch": 1.101712919930952, + "grad_norm": 0.9808101112826028, + "learning_rate": 9.151408668228958e-06, + "loss": 0.20631751418113708, + "step": 4149 + }, + { + "epoch": 1.101978488912495, + "grad_norm": 1.0273447794760797, + "learning_rate": 9.147033822428484e-06, + "loss": 0.20976273715496063, + "step": 4150 + }, + { + "epoch": 1.102244057894038, + "grad_norm": 1.0193138467531315, + "learning_rate": 9.142659141069828e-06, + "loss": 0.21464477479457855, + "step": 4151 + }, + { + "epoch": 1.102509626875581, + "grad_norm": 1.182770191723374, + "learning_rate": 9.13828462499638e-06, + "loss": 0.2262338101863861, + "step": 4152 + }, + { + "epoch": 1.102775195857124, + "grad_norm": 1.2057409707570275, + "learning_rate": 9.133910275051493e-06, + "loss": 0.26331469416618347, + "step": 4153 + }, + { + "epoch": 1.1030407648386669, + "grad_norm": 1.1729382721759571, + "learning_rate": 9.129536092078488e-06, + "loss": 0.26280921697616577, + "step": 4154 + }, + { + "epoch": 1.1033063338202098, + "grad_norm": 1.1474203361843618, + "learning_rate": 9.12516207692066e-06, + "loss": 0.2527182698249817, + "step": 4155 + }, + { + "epoch": 1.1035719028017528, + "grad_norm": 1.114868090084267, + "learning_rate": 9.120788230421267e-06, + "loss": 0.21416455507278442, + "step": 4156 + }, + { + "epoch": 1.1038374717832957, + "grad_norm": 1.149698502937602, + "learning_rate": 9.116414553423535e-06, + "loss": 0.25882014632225037, + "step": 4157 + }, + { + "epoch": 1.1041030407648387, + "grad_norm": 1.1615644224212993, + "learning_rate": 9.112041046770653e-06, + "loss": 0.20510248839855194, + "step": 4158 + }, + { + "epoch": 1.1043686097463816, + "grad_norm": 1.372282887646487, + "learning_rate": 9.107667711305786e-06, + "loss": 0.2348058819770813, + "step": 4159 + }, + { + "epoch": 1.1046341787279246, + "grad_norm": 1.2389958643414019, + "learning_rate": 9.10329454787206e-06, + "loss": 0.24561384320259094, + "step": 4160 + }, + { + "epoch": 1.1048997477094675, + "grad_norm": 1.133562757165387, + "learning_rate": 9.098921557312573e-06, + "loss": 0.23025226593017578, + "step": 4161 + }, + { + "epoch": 1.1051653166910105, + "grad_norm": 1.2483870007074676, + "learning_rate": 9.094548740470375e-06, + "loss": 0.2724589705467224, + "step": 4162 + }, + { + "epoch": 1.1054308856725534, + "grad_norm": 1.2319217483915181, + "learning_rate": 9.090176098188504e-06, + "loss": 0.25196704268455505, + "step": 4163 + }, + { + "epoch": 1.1056964546540964, + "grad_norm": 1.0723466269314343, + "learning_rate": 9.085803631309953e-06, + "loss": 0.22673696279525757, + "step": 4164 + }, + { + "epoch": 1.1059620236356393, + "grad_norm": 1.3129015386402236, + "learning_rate": 9.081431340677679e-06, + "loss": 0.23913519084453583, + "step": 4165 + }, + { + "epoch": 1.1062275926171823, + "grad_norm": 1.3859005835374885, + "learning_rate": 9.07705922713461e-06, + "loss": 0.2723861336708069, + "step": 4166 + }, + { + "epoch": 1.1064931615987252, + "grad_norm": 1.15651219284811, + "learning_rate": 9.072687291523636e-06, + "loss": 0.262167364358902, + "step": 4167 + }, + { + "epoch": 1.1067587305802682, + "grad_norm": 1.4186208937810438, + "learning_rate": 9.068315534687615e-06, + "loss": 0.2394658625125885, + "step": 4168 + }, + { + "epoch": 1.1070242995618111, + "grad_norm": 1.116555661084851, + "learning_rate": 9.063943957469373e-06, + "loss": 0.2547619938850403, + "step": 4169 + }, + { + "epoch": 1.107289868543354, + "grad_norm": 1.1242129377429575, + "learning_rate": 9.059572560711697e-06, + "loss": 0.24057570099830627, + "step": 4170 + }, + { + "epoch": 1.107555437524897, + "grad_norm": 1.057297781351654, + "learning_rate": 9.055201345257331e-06, + "loss": 0.21729445457458496, + "step": 4171 + }, + { + "epoch": 1.10782100650644, + "grad_norm": 1.2310508574302907, + "learning_rate": 9.05083031194901e-06, + "loss": 0.26590001583099365, + "step": 4172 + }, + { + "epoch": 1.108086575487983, + "grad_norm": 1.2932563576951384, + "learning_rate": 9.04645946162941e-06, + "loss": 0.26114848256111145, + "step": 4173 + }, + { + "epoch": 1.108352144469526, + "grad_norm": 1.1776684059902396, + "learning_rate": 9.04208879514118e-06, + "loss": 0.2255469262599945, + "step": 4174 + }, + { + "epoch": 1.1086177134510689, + "grad_norm": 1.1791871226781019, + "learning_rate": 9.037718313326932e-06, + "loss": 0.2597671151161194, + "step": 4175 + }, + { + "epoch": 1.1088832824326118, + "grad_norm": 1.1140795273935102, + "learning_rate": 9.033348017029247e-06, + "loss": 0.24820469319820404, + "step": 4176 + }, + { + "epoch": 1.1091488514141548, + "grad_norm": 1.2459789693741423, + "learning_rate": 9.028977907090661e-06, + "loss": 0.23886600136756897, + "step": 4177 + }, + { + "epoch": 1.1094144203956977, + "grad_norm": 1.091274384086243, + "learning_rate": 9.024607984353682e-06, + "loss": 0.24204152822494507, + "step": 4178 + }, + { + "epoch": 1.1096799893772407, + "grad_norm": 1.0934112812518066, + "learning_rate": 9.02023824966078e-06, + "loss": 0.23246638476848602, + "step": 4179 + }, + { + "epoch": 1.1099455583587836, + "grad_norm": 1.124332043141092, + "learning_rate": 9.015868703854386e-06, + "loss": 0.25057342648506165, + "step": 4180 + }, + { + "epoch": 1.1102111273403266, + "grad_norm": 1.117105393632997, + "learning_rate": 9.011499347776902e-06, + "loss": 0.2316257357597351, + "step": 4181 + }, + { + "epoch": 1.1104766963218695, + "grad_norm": 1.4294765240232425, + "learning_rate": 9.007130182270685e-06, + "loss": 0.24824783205986023, + "step": 4182 + }, + { + "epoch": 1.1107422653034125, + "grad_norm": 1.1667528236187257, + "learning_rate": 9.002761208178059e-06, + "loss": 0.25174480676651, + "step": 4183 + }, + { + "epoch": 1.1110078342849554, + "grad_norm": 1.0615254217045484, + "learning_rate": 8.998392426341313e-06, + "loss": 0.22364717721939087, + "step": 4184 + }, + { + "epoch": 1.1112734032664984, + "grad_norm": 1.0478203412338092, + "learning_rate": 8.994023837602694e-06, + "loss": 0.2205432504415512, + "step": 4185 + }, + { + "epoch": 1.1115389722480415, + "grad_norm": 1.4181125559874541, + "learning_rate": 8.989655442804413e-06, + "loss": 0.23303675651550293, + "step": 4186 + }, + { + "epoch": 1.1118045412295845, + "grad_norm": 1.2558407878646785, + "learning_rate": 8.985287242788646e-06, + "loss": 0.3003222644329071, + "step": 4187 + }, + { + "epoch": 1.1120701102111274, + "grad_norm": 1.146183553652687, + "learning_rate": 8.980919238397532e-06, + "loss": 0.2734413146972656, + "step": 4188 + }, + { + "epoch": 1.1123356791926704, + "grad_norm": 1.200748942223162, + "learning_rate": 8.976551430473166e-06, + "loss": 0.24086692929267883, + "step": 4189 + }, + { + "epoch": 1.1126012481742134, + "grad_norm": 1.2277073829430902, + "learning_rate": 8.972183819857618e-06, + "loss": 0.2531188130378723, + "step": 4190 + }, + { + "epoch": 1.1128668171557563, + "grad_norm": 1.1067327267341682, + "learning_rate": 8.96781640739291e-06, + "loss": 0.25059640407562256, + "step": 4191 + }, + { + "epoch": 1.1131323861372993, + "grad_norm": 1.1987793097859372, + "learning_rate": 8.963449193921023e-06, + "loss": 0.22427335381507874, + "step": 4192 + }, + { + "epoch": 1.1133979551188422, + "grad_norm": 1.1842662472837817, + "learning_rate": 8.959082180283906e-06, + "loss": 0.28835898637771606, + "step": 4193 + }, + { + "epoch": 1.1136635241003852, + "grad_norm": 1.1161865281550452, + "learning_rate": 8.954715367323468e-06, + "loss": 0.23919034004211426, + "step": 4194 + }, + { + "epoch": 1.1139290930819281, + "grad_norm": 1.186821665962327, + "learning_rate": 8.950348755881578e-06, + "loss": 0.24583986401557922, + "step": 4195 + }, + { + "epoch": 1.114194662063471, + "grad_norm": 1.2519292440490923, + "learning_rate": 8.94598234680007e-06, + "loss": 0.23869696259498596, + "step": 4196 + }, + { + "epoch": 1.114460231045014, + "grad_norm": 1.1662462204488522, + "learning_rate": 8.941616140920734e-06, + "loss": 0.2672434449195862, + "step": 4197 + }, + { + "epoch": 1.114725800026557, + "grad_norm": 1.2253961517889995, + "learning_rate": 8.937250139085322e-06, + "loss": 0.2660336494445801, + "step": 4198 + }, + { + "epoch": 1.1149913690081, + "grad_norm": 1.1608224464613695, + "learning_rate": 8.932884342135552e-06, + "loss": 0.26461780071258545, + "step": 4199 + }, + { + "epoch": 1.1152569379896429, + "grad_norm": 1.1632580978978435, + "learning_rate": 8.928518750913094e-06, + "loss": 0.22947481274604797, + "step": 4200 + }, + { + "epoch": 1.1155225069711858, + "grad_norm": 1.116659758904741, + "learning_rate": 8.924153366259584e-06, + "loss": 0.22715970873832703, + "step": 4201 + }, + { + "epoch": 1.1157880759527288, + "grad_norm": 1.3785482068816968, + "learning_rate": 8.919788189016618e-06, + "loss": 0.2994215190410614, + "step": 4202 + }, + { + "epoch": 1.1160536449342717, + "grad_norm": 1.158412598714371, + "learning_rate": 8.915423220025747e-06, + "loss": 0.2290656566619873, + "step": 4203 + }, + { + "epoch": 1.1163192139158147, + "grad_norm": 1.093685203516635, + "learning_rate": 8.911058460128489e-06, + "loss": 0.22284844517707825, + "step": 4204 + }, + { + "epoch": 1.1165847828973576, + "grad_norm": 1.0534371355750514, + "learning_rate": 8.906693910166316e-06, + "loss": 0.2095392495393753, + "step": 4205 + }, + { + "epoch": 1.1168503518789006, + "grad_norm": 1.197609739800315, + "learning_rate": 8.902329570980665e-06, + "loss": 0.25098133087158203, + "step": 4206 + }, + { + "epoch": 1.1171159208604435, + "grad_norm": 1.1630125842119448, + "learning_rate": 8.897965443412923e-06, + "loss": 0.24768148362636566, + "step": 4207 + }, + { + "epoch": 1.1173814898419865, + "grad_norm": 1.1213395777051767, + "learning_rate": 8.89360152830445e-06, + "loss": 0.22255480289459229, + "step": 4208 + }, + { + "epoch": 1.1176470588235294, + "grad_norm": 1.2306365389400118, + "learning_rate": 8.889237826496551e-06, + "loss": 0.23721200227737427, + "step": 4209 + }, + { + "epoch": 1.1179126278050724, + "grad_norm": 1.1422779685655824, + "learning_rate": 8.8848743388305e-06, + "loss": 0.25002530217170715, + "step": 4210 + }, + { + "epoch": 1.1181781967866153, + "grad_norm": 1.2862841308153614, + "learning_rate": 8.880511066147524e-06, + "loss": 0.27188029885292053, + "step": 4211 + }, + { + "epoch": 1.1184437657681583, + "grad_norm": 1.1517061730387759, + "learning_rate": 8.876148009288813e-06, + "loss": 0.23056066036224365, + "step": 4212 + }, + { + "epoch": 1.1187093347497012, + "grad_norm": 1.172676602980077, + "learning_rate": 8.87178516909551e-06, + "loss": 0.2336079478263855, + "step": 4213 + }, + { + "epoch": 1.1189749037312442, + "grad_norm": 1.1868473876345316, + "learning_rate": 8.86742254640872e-06, + "loss": 0.27449533343315125, + "step": 4214 + }, + { + "epoch": 1.1192404727127871, + "grad_norm": 1.1500112066365369, + "learning_rate": 8.863060142069508e-06, + "loss": 0.24714893102645874, + "step": 4215 + }, + { + "epoch": 1.11950604169433, + "grad_norm": 1.072070573678295, + "learning_rate": 8.858697956918886e-06, + "loss": 0.2155439257621765, + "step": 4216 + }, + { + "epoch": 1.119771610675873, + "grad_norm": 1.1798452175680678, + "learning_rate": 8.854335991797842e-06, + "loss": 0.23189155757427216, + "step": 4217 + }, + { + "epoch": 1.120037179657416, + "grad_norm": 1.0773206236657924, + "learning_rate": 8.849974247547307e-06, + "loss": 0.23413527011871338, + "step": 4218 + }, + { + "epoch": 1.120302748638959, + "grad_norm": 1.1991513784988423, + "learning_rate": 8.845612725008173e-06, + "loss": 0.2569039463996887, + "step": 4219 + }, + { + "epoch": 1.120568317620502, + "grad_norm": 1.1795807532964264, + "learning_rate": 8.84125142502129e-06, + "loss": 0.2699541449546814, + "step": 4220 + }, + { + "epoch": 1.1208338866020449, + "grad_norm": 1.1092727759218166, + "learning_rate": 8.836890348427468e-06, + "loss": 0.27172449231147766, + "step": 4221 + }, + { + "epoch": 1.1210994555835878, + "grad_norm": 1.2315684717645485, + "learning_rate": 8.83252949606747e-06, + "loss": 0.2839444875717163, + "step": 4222 + }, + { + "epoch": 1.1213650245651308, + "grad_norm": 1.1676850588618106, + "learning_rate": 8.828168868782013e-06, + "loss": 0.22782178223133087, + "step": 4223 + }, + { + "epoch": 1.1216305935466737, + "grad_norm": 1.132889704492098, + "learning_rate": 8.82380846741178e-06, + "loss": 0.2567726671695709, + "step": 4224 + }, + { + "epoch": 1.1218961625282167, + "grad_norm": 1.1872540675130212, + "learning_rate": 8.8194482927974e-06, + "loss": 0.25879523158073425, + "step": 4225 + }, + { + "epoch": 1.1221617315097596, + "grad_norm": 1.0193477801534692, + "learning_rate": 8.815088345779466e-06, + "loss": 0.22109058499336243, + "step": 4226 + }, + { + "epoch": 1.1224273004913026, + "grad_norm": 1.1414592493281657, + "learning_rate": 8.810728627198526e-06, + "loss": 0.23615925014019012, + "step": 4227 + }, + { + "epoch": 1.1226928694728455, + "grad_norm": 1.160290266155045, + "learning_rate": 8.806369137895081e-06, + "loss": 0.2751353085041046, + "step": 4228 + }, + { + "epoch": 1.1229584384543885, + "grad_norm": 1.2566953981709197, + "learning_rate": 8.802009878709587e-06, + "loss": 0.2361963391304016, + "step": 4229 + }, + { + "epoch": 1.1232240074359314, + "grad_norm": 1.186723455251228, + "learning_rate": 8.79765085048246e-06, + "loss": 0.22435930371284485, + "step": 4230 + }, + { + "epoch": 1.1234895764174744, + "grad_norm": 1.1759467333820823, + "learning_rate": 8.79329205405407e-06, + "loss": 0.2355855256319046, + "step": 4231 + }, + { + "epoch": 1.1237551453990173, + "grad_norm": 1.1450490838951077, + "learning_rate": 8.78893349026474e-06, + "loss": 0.24127572774887085, + "step": 4232 + }, + { + "epoch": 1.1240207143805603, + "grad_norm": 1.222656849347683, + "learning_rate": 8.784575159954748e-06, + "loss": 0.2677989602088928, + "step": 4233 + }, + { + "epoch": 1.1242862833621032, + "grad_norm": 1.109384474337522, + "learning_rate": 8.78021706396433e-06, + "loss": 0.2283135950565338, + "step": 4234 + }, + { + "epoch": 1.1245518523436462, + "grad_norm": 1.1669732456316693, + "learning_rate": 8.775859203133678e-06, + "loss": 0.2686103582382202, + "step": 4235 + }, + { + "epoch": 1.1248174213251891, + "grad_norm": 1.3869789172842044, + "learning_rate": 8.771501578302934e-06, + "loss": 0.2638726234436035, + "step": 4236 + }, + { + "epoch": 1.125082990306732, + "grad_norm": 1.0752600847920544, + "learning_rate": 8.767144190312196e-06, + "loss": 0.2517441511154175, + "step": 4237 + }, + { + "epoch": 1.125348559288275, + "grad_norm": 1.1903096570499558, + "learning_rate": 8.762787040001518e-06, + "loss": 0.2593642771244049, + "step": 4238 + }, + { + "epoch": 1.125614128269818, + "grad_norm": 1.123653942868709, + "learning_rate": 8.758430128210908e-06, + "loss": 0.23758336901664734, + "step": 4239 + }, + { + "epoch": 1.125879697251361, + "grad_norm": 1.182033088729647, + "learning_rate": 8.754073455780327e-06, + "loss": 0.2557980716228485, + "step": 4240 + }, + { + "epoch": 1.126145266232904, + "grad_norm": 1.1182311632466304, + "learning_rate": 8.74971702354969e-06, + "loss": 0.2484067678451538, + "step": 4241 + }, + { + "epoch": 1.1264108352144468, + "grad_norm": 1.121886097833982, + "learning_rate": 8.745360832358864e-06, + "loss": 0.23103098571300507, + "step": 4242 + }, + { + "epoch": 1.1266764041959898, + "grad_norm": 1.1856800379472048, + "learning_rate": 8.741004883047667e-06, + "loss": 0.2630731463432312, + "step": 4243 + }, + { + "epoch": 1.1269419731775328, + "grad_norm": 1.1814851216743405, + "learning_rate": 8.736649176455885e-06, + "loss": 0.2413114309310913, + "step": 4244 + }, + { + "epoch": 1.1272075421590757, + "grad_norm": 1.1465608986560651, + "learning_rate": 8.732293713423243e-06, + "loss": 0.22463169693946838, + "step": 4245 + }, + { + "epoch": 1.1274731111406187, + "grad_norm": 1.1943136125759177, + "learning_rate": 8.727938494789421e-06, + "loss": 0.23641429841518402, + "step": 4246 + }, + { + "epoch": 1.1277386801221616, + "grad_norm": 1.399290186521162, + "learning_rate": 8.723583521394054e-06, + "loss": 0.2547767162322998, + "step": 4247 + }, + { + "epoch": 1.1280042491037048, + "grad_norm": 1.1274578262359225, + "learning_rate": 8.719228794076733e-06, + "loss": 0.25753074884414673, + "step": 4248 + }, + { + "epoch": 1.1282698180852477, + "grad_norm": 1.2581544322188265, + "learning_rate": 8.714874313676992e-06, + "loss": 0.30602240562438965, + "step": 4249 + }, + { + "epoch": 1.1285353870667907, + "grad_norm": 1.3693509289176364, + "learning_rate": 8.710520081034328e-06, + "loss": 0.28336623311042786, + "step": 4250 + }, + { + "epoch": 1.1288009560483336, + "grad_norm": 1.179198933472593, + "learning_rate": 8.706166096988185e-06, + "loss": 0.24065867066383362, + "step": 4251 + }, + { + "epoch": 1.1290665250298766, + "grad_norm": 1.1350442144429624, + "learning_rate": 8.701812362377954e-06, + "loss": 0.25674968957901, + "step": 4252 + }, + { + "epoch": 1.1293320940114195, + "grad_norm": 1.0526431620404462, + "learning_rate": 8.697458878042992e-06, + "loss": 0.21502923965454102, + "step": 4253 + }, + { + "epoch": 1.1295976629929625, + "grad_norm": 1.199807552125115, + "learning_rate": 8.693105644822598e-06, + "loss": 0.26848286390304565, + "step": 4254 + }, + { + "epoch": 1.1298632319745054, + "grad_norm": 1.1632395937948599, + "learning_rate": 8.688752663556022e-06, + "loss": 0.24283824861049652, + "step": 4255 + }, + { + "epoch": 1.1301288009560484, + "grad_norm": 1.231861138079484, + "learning_rate": 8.684399935082468e-06, + "loss": 0.2511506974697113, + "step": 4256 + }, + { + "epoch": 1.1303943699375913, + "grad_norm": 1.1293067099587706, + "learning_rate": 8.68004746024109e-06, + "loss": 0.23932483792304993, + "step": 4257 + }, + { + "epoch": 1.1306599389191343, + "grad_norm": 1.229437521917496, + "learning_rate": 8.675695239870993e-06, + "loss": 0.30030694603919983, + "step": 4258 + }, + { + "epoch": 1.1309255079006773, + "grad_norm": 1.1154596754627621, + "learning_rate": 8.671343274811238e-06, + "loss": 0.24699059128761292, + "step": 4259 + }, + { + "epoch": 1.1311910768822202, + "grad_norm": 1.1288414782501015, + "learning_rate": 8.666991565900827e-06, + "loss": 0.26828041672706604, + "step": 4260 + }, + { + "epoch": 1.1314566458637632, + "grad_norm": 1.0765132569205758, + "learning_rate": 8.662640113978717e-06, + "loss": 0.2372082769870758, + "step": 4261 + }, + { + "epoch": 1.131722214845306, + "grad_norm": 1.2100447285144145, + "learning_rate": 8.658288919883824e-06, + "loss": 0.26367881894111633, + "step": 4262 + }, + { + "epoch": 1.131987783826849, + "grad_norm": 1.1035052537421275, + "learning_rate": 8.653937984455007e-06, + "loss": 0.2287222146987915, + "step": 4263 + }, + { + "epoch": 1.132253352808392, + "grad_norm": 1.1417963040520365, + "learning_rate": 8.649587308531067e-06, + "loss": 0.244521826505661, + "step": 4264 + }, + { + "epoch": 1.132518921789935, + "grad_norm": 1.2243689126496846, + "learning_rate": 8.64523689295077e-06, + "loss": 0.26912257075309753, + "step": 4265 + }, + { + "epoch": 1.132784490771478, + "grad_norm": 1.2384832947619873, + "learning_rate": 8.64088673855282e-06, + "loss": 0.23002780973911285, + "step": 4266 + }, + { + "epoch": 1.1330500597530209, + "grad_norm": 1.253742603342847, + "learning_rate": 8.636536846175878e-06, + "loss": 0.2561958432197571, + "step": 4267 + }, + { + "epoch": 1.1333156287345638, + "grad_norm": 1.2156026453092519, + "learning_rate": 8.63218721665855e-06, + "loss": 0.25553008913993835, + "step": 4268 + }, + { + "epoch": 1.1335811977161068, + "grad_norm": 1.1992385112791626, + "learning_rate": 8.627837850839398e-06, + "loss": 0.1992083340883255, + "step": 4269 + }, + { + "epoch": 1.1338467666976497, + "grad_norm": 1.3643398602160783, + "learning_rate": 8.62348874955692e-06, + "loss": 0.23075388371944427, + "step": 4270 + }, + { + "epoch": 1.1341123356791927, + "grad_norm": 1.1072751580070286, + "learning_rate": 8.619139913649582e-06, + "loss": 0.23691913485527039, + "step": 4271 + }, + { + "epoch": 1.1343779046607356, + "grad_norm": 1.2656689209279672, + "learning_rate": 8.61479134395578e-06, + "loss": 0.2536017894744873, + "step": 4272 + }, + { + "epoch": 1.1346434736422786, + "grad_norm": 1.2870409796681632, + "learning_rate": 8.61044304131387e-06, + "loss": 0.3014161288738251, + "step": 4273 + }, + { + "epoch": 1.1349090426238215, + "grad_norm": 1.1669055614665604, + "learning_rate": 8.606095006562156e-06, + "loss": 0.26333582401275635, + "step": 4274 + }, + { + "epoch": 1.1351746116053645, + "grad_norm": 1.2370251285176135, + "learning_rate": 8.601747240538883e-06, + "loss": 0.23796264827251434, + "step": 4275 + }, + { + "epoch": 1.1354401805869074, + "grad_norm": 1.1989417705813543, + "learning_rate": 8.597399744082251e-06, + "loss": 0.23737141489982605, + "step": 4276 + }, + { + "epoch": 1.1357057495684504, + "grad_norm": 1.1281376384049915, + "learning_rate": 8.593052518030407e-06, + "loss": 0.21073032915592194, + "step": 4277 + }, + { + "epoch": 1.1359713185499933, + "grad_norm": 1.2935455290015059, + "learning_rate": 8.588705563221444e-06, + "loss": 0.2597163915634155, + "step": 4278 + }, + { + "epoch": 1.1362368875315363, + "grad_norm": 1.137636804234172, + "learning_rate": 8.584358880493402e-06, + "loss": 0.24541154503822327, + "step": 4279 + }, + { + "epoch": 1.1365024565130792, + "grad_norm": 1.1331800338594176, + "learning_rate": 8.580012470684273e-06, + "loss": 0.19294027984142303, + "step": 4280 + }, + { + "epoch": 1.1367680254946222, + "grad_norm": 1.2387583554091215, + "learning_rate": 8.575666334631994e-06, + "loss": 0.26909738779067993, + "step": 4281 + }, + { + "epoch": 1.1370335944761651, + "grad_norm": 1.2850664046416893, + "learning_rate": 8.571320473174444e-06, + "loss": 0.2550502121448517, + "step": 4282 + }, + { + "epoch": 1.137299163457708, + "grad_norm": 1.138070930000495, + "learning_rate": 8.566974887149461e-06, + "loss": 0.2256634682416916, + "step": 4283 + }, + { + "epoch": 1.137564732439251, + "grad_norm": 1.3289753418379673, + "learning_rate": 8.562629577394817e-06, + "loss": 0.26154983043670654, + "step": 4284 + }, + { + "epoch": 1.137830301420794, + "grad_norm": 1.2426566834274124, + "learning_rate": 8.558284544748239e-06, + "loss": 0.24685145914554596, + "step": 4285 + }, + { + "epoch": 1.138095870402337, + "grad_norm": 1.177162412641928, + "learning_rate": 8.553939790047396e-06, + "loss": 0.2584421932697296, + "step": 4286 + }, + { + "epoch": 1.13836143938388, + "grad_norm": 1.2486541463378953, + "learning_rate": 8.549595314129907e-06, + "loss": 0.24582788348197937, + "step": 4287 + }, + { + "epoch": 1.1386270083654229, + "grad_norm": 1.1978925998644077, + "learning_rate": 8.545251117833334e-06, + "loss": 0.26023977994918823, + "step": 4288 + }, + { + "epoch": 1.1388925773469658, + "grad_norm": 1.2566090334130535, + "learning_rate": 8.54090720199519e-06, + "loss": 0.25575515627861023, + "step": 4289 + }, + { + "epoch": 1.1391581463285088, + "grad_norm": 1.2234599227483165, + "learning_rate": 8.53656356745293e-06, + "loss": 0.2784460783004761, + "step": 4290 + }, + { + "epoch": 1.1394237153100517, + "grad_norm": 1.11922615590049, + "learning_rate": 8.532220215043953e-06, + "loss": 0.24723297357559204, + "step": 4291 + }, + { + "epoch": 1.1396892842915947, + "grad_norm": 1.1960822646368614, + "learning_rate": 8.52787714560561e-06, + "loss": 0.24694418907165527, + "step": 4292 + }, + { + "epoch": 1.1399548532731376, + "grad_norm": 1.2073723964066632, + "learning_rate": 8.52353435997519e-06, + "loss": 0.19976040720939636, + "step": 4293 + }, + { + "epoch": 1.1402204222546806, + "grad_norm": 1.0875644999756633, + "learning_rate": 8.519191858989932e-06, + "loss": 0.21742458641529083, + "step": 4294 + }, + { + "epoch": 1.1404859912362235, + "grad_norm": 1.2040315384402727, + "learning_rate": 8.514849643487018e-06, + "loss": 0.26382917165756226, + "step": 4295 + }, + { + "epoch": 1.1407515602177665, + "grad_norm": 1.3073789721234685, + "learning_rate": 8.510507714303577e-06, + "loss": 0.30778488516807556, + "step": 4296 + }, + { + "epoch": 1.1410171291993096, + "grad_norm": 1.0727267660957265, + "learning_rate": 8.506166072276681e-06, + "loss": 0.20894449949264526, + "step": 4297 + }, + { + "epoch": 1.1412826981808526, + "grad_norm": 1.2119089915252295, + "learning_rate": 8.50182471824335e-06, + "loss": 0.2389567494392395, + "step": 4298 + }, + { + "epoch": 1.1415482671623955, + "grad_norm": 1.0286533711803312, + "learning_rate": 8.497483653040545e-06, + "loss": 0.20531126856803894, + "step": 4299 + }, + { + "epoch": 1.1418138361439385, + "grad_norm": 1.2153067733576255, + "learning_rate": 8.49314287750517e-06, + "loss": 0.2577363848686218, + "step": 4300 + }, + { + "epoch": 1.1420794051254815, + "grad_norm": 1.211343687077752, + "learning_rate": 8.488802392474076e-06, + "loss": 0.24225997924804688, + "step": 4301 + }, + { + "epoch": 1.1423449741070244, + "grad_norm": 1.2698570110354703, + "learning_rate": 8.484462198784058e-06, + "loss": 0.26494917273521423, + "step": 4302 + }, + { + "epoch": 1.1426105430885674, + "grad_norm": 1.2988704892129896, + "learning_rate": 8.480122297271855e-06, + "loss": 0.24903994798660278, + "step": 4303 + }, + { + "epoch": 1.1428761120701103, + "grad_norm": 1.1681075442122268, + "learning_rate": 8.475782688774147e-06, + "loss": 0.25291907787323, + "step": 4304 + }, + { + "epoch": 1.1431416810516533, + "grad_norm": 1.1301459507046017, + "learning_rate": 8.47144337412756e-06, + "loss": 0.22958475351333618, + "step": 4305 + }, + { + "epoch": 1.1434072500331962, + "grad_norm": 1.175766015682232, + "learning_rate": 8.46710435416866e-06, + "loss": 0.2305452972650528, + "step": 4306 + }, + { + "epoch": 1.1436728190147392, + "grad_norm": 1.2105790475425935, + "learning_rate": 8.462765629733965e-06, + "loss": 0.25028055906295776, + "step": 4307 + }, + { + "epoch": 1.1439383879962821, + "grad_norm": 1.2809924485725674, + "learning_rate": 8.458427201659926e-06, + "loss": 0.24873222410678864, + "step": 4308 + }, + { + "epoch": 1.144203956977825, + "grad_norm": 1.2345010944986379, + "learning_rate": 8.454089070782943e-06, + "loss": 0.23396535217761993, + "step": 4309 + }, + { + "epoch": 1.144469525959368, + "grad_norm": 1.1955062282547588, + "learning_rate": 8.449751237939354e-06, + "loss": 0.27120494842529297, + "step": 4310 + }, + { + "epoch": 1.144735094940911, + "grad_norm": 1.182924840045628, + "learning_rate": 8.445413703965441e-06, + "loss": 0.2734759449958801, + "step": 4311 + }, + { + "epoch": 1.145000663922454, + "grad_norm": 1.1584309667252248, + "learning_rate": 8.441076469697434e-06, + "loss": 0.25353512167930603, + "step": 4312 + }, + { + "epoch": 1.1452662329039969, + "grad_norm": 1.1913513856414861, + "learning_rate": 8.436739535971497e-06, + "loss": 0.23851020634174347, + "step": 4313 + }, + { + "epoch": 1.1455318018855398, + "grad_norm": 1.2006838398252668, + "learning_rate": 8.432402903623741e-06, + "loss": 0.26320093870162964, + "step": 4314 + }, + { + "epoch": 1.1457973708670828, + "grad_norm": 1.1065666799118796, + "learning_rate": 8.428066573490211e-06, + "loss": 0.23859955370426178, + "step": 4315 + }, + { + "epoch": 1.1460629398486257, + "grad_norm": 1.197716796975668, + "learning_rate": 8.423730546406911e-06, + "loss": 0.2636772096157074, + "step": 4316 + }, + { + "epoch": 1.1463285088301687, + "grad_norm": 1.2459962038175347, + "learning_rate": 8.419394823209773e-06, + "loss": 0.2656415104866028, + "step": 4317 + }, + { + "epoch": 1.1465940778117116, + "grad_norm": 1.2225993542972535, + "learning_rate": 8.41505940473467e-06, + "loss": 0.2872830033302307, + "step": 4318 + }, + { + "epoch": 1.1468596467932546, + "grad_norm": 1.4653362839323858, + "learning_rate": 8.410724291817422e-06, + "loss": 0.229783833026886, + "step": 4319 + }, + { + "epoch": 1.1471252157747975, + "grad_norm": 4.273944826146497, + "learning_rate": 8.406389485293786e-06, + "loss": 0.24418675899505615, + "step": 4320 + }, + { + "epoch": 1.1473907847563405, + "grad_norm": 1.2385236183806463, + "learning_rate": 8.402054985999464e-06, + "loss": 0.2535584270954132, + "step": 4321 + }, + { + "epoch": 1.1476563537378834, + "grad_norm": 1.2116145926695832, + "learning_rate": 8.397720794770093e-06, + "loss": 0.23207828402519226, + "step": 4322 + }, + { + "epoch": 1.1479219227194264, + "grad_norm": 1.8129143471218838, + "learning_rate": 8.393386912441257e-06, + "loss": 0.27990391850471497, + "step": 4323 + }, + { + "epoch": 1.1481874917009693, + "grad_norm": 1.059877272327032, + "learning_rate": 8.38905333984847e-06, + "loss": 0.2098318189382553, + "step": 4324 + }, + { + "epoch": 1.1484530606825123, + "grad_norm": 1.1462464609840002, + "learning_rate": 8.384720077827204e-06, + "loss": 0.25303804874420166, + "step": 4325 + }, + { + "epoch": 1.1487186296640552, + "grad_norm": 1.0794728099252306, + "learning_rate": 8.380387127212858e-06, + "loss": 0.23481838405132294, + "step": 4326 + }, + { + "epoch": 1.1489841986455982, + "grad_norm": 1.1782142095551065, + "learning_rate": 8.376054488840771e-06, + "loss": 0.24842356145381927, + "step": 4327 + }, + { + "epoch": 1.1492497676271411, + "grad_norm": 1.136832039914945, + "learning_rate": 8.37172216354623e-06, + "loss": 0.23927366733551025, + "step": 4328 + }, + { + "epoch": 1.149515336608684, + "grad_norm": 1.1577812724546028, + "learning_rate": 8.367390152164448e-06, + "loss": 0.23836453258991241, + "step": 4329 + }, + { + "epoch": 1.149780905590227, + "grad_norm": 1.2492179140984832, + "learning_rate": 8.36305845553059e-06, + "loss": 0.2562161982059479, + "step": 4330 + }, + { + "epoch": 1.15004647457177, + "grad_norm": 1.120151700121908, + "learning_rate": 8.358727074479755e-06, + "loss": 0.21255920827388763, + "step": 4331 + }, + { + "epoch": 1.150312043553313, + "grad_norm": 1.1011600870179878, + "learning_rate": 8.354396009846985e-06, + "loss": 0.24200043082237244, + "step": 4332 + }, + { + "epoch": 1.150577612534856, + "grad_norm": 1.1644551235897023, + "learning_rate": 8.35006526246725e-06, + "loss": 0.23582379519939423, + "step": 4333 + }, + { + "epoch": 1.1508431815163989, + "grad_norm": 1.093546349726341, + "learning_rate": 8.34573483317548e-06, + "loss": 0.21554499864578247, + "step": 4334 + }, + { + "epoch": 1.1511087504979418, + "grad_norm": 1.2460346716976907, + "learning_rate": 8.341404722806525e-06, + "loss": 0.2789759039878845, + "step": 4335 + }, + { + "epoch": 1.1513743194794848, + "grad_norm": 1.212813860768853, + "learning_rate": 8.337074932195175e-06, + "loss": 0.24677832424640656, + "step": 4336 + }, + { + "epoch": 1.1516398884610277, + "grad_norm": 1.2351497128261646, + "learning_rate": 8.332745462176166e-06, + "loss": 0.28122392296791077, + "step": 4337 + }, + { + "epoch": 1.1519054574425707, + "grad_norm": 1.2447069177647443, + "learning_rate": 8.328416313584169e-06, + "loss": 0.23219403624534607, + "step": 4338 + }, + { + "epoch": 1.1521710264241136, + "grad_norm": 1.1258797089625292, + "learning_rate": 8.324087487253792e-06, + "loss": 0.19928379356861115, + "step": 4339 + }, + { + "epoch": 1.1524365954056566, + "grad_norm": 1.2737910298174706, + "learning_rate": 8.31975898401958e-06, + "loss": 0.27730467915534973, + "step": 4340 + }, + { + "epoch": 1.1527021643871995, + "grad_norm": 1.3906235348842741, + "learning_rate": 8.315430804716022e-06, + "loss": 0.25462737679481506, + "step": 4341 + }, + { + "epoch": 1.1529677333687425, + "grad_norm": 1.1703737499238527, + "learning_rate": 8.311102950177533e-06, + "loss": 0.2363007366657257, + "step": 4342 + }, + { + "epoch": 1.1532333023502854, + "grad_norm": 1.2498285131266695, + "learning_rate": 8.306775421238482e-06, + "loss": 0.2648352384567261, + "step": 4343 + }, + { + "epoch": 1.1534988713318284, + "grad_norm": 1.394847110607811, + "learning_rate": 8.302448218733158e-06, + "loss": 0.25645309686660767, + "step": 4344 + }, + { + "epoch": 1.1537644403133713, + "grad_norm": 1.2178564426244172, + "learning_rate": 8.298121343495797e-06, + "loss": 0.22962522506713867, + "step": 4345 + }, + { + "epoch": 1.1540300092949143, + "grad_norm": 1.132403649349265, + "learning_rate": 8.293794796360569e-06, + "loss": 0.21269623935222626, + "step": 4346 + }, + { + "epoch": 1.1542955782764572, + "grad_norm": 1.1646919704485588, + "learning_rate": 8.289468578161581e-06, + "loss": 0.2518436014652252, + "step": 4347 + }, + { + "epoch": 1.1545611472580002, + "grad_norm": 1.193830808481187, + "learning_rate": 8.285142689732877e-06, + "loss": 0.2318439483642578, + "step": 4348 + }, + { + "epoch": 1.1548267162395431, + "grad_norm": 1.0953821300718658, + "learning_rate": 8.280817131908438e-06, + "loss": 0.2278512567281723, + "step": 4349 + }, + { + "epoch": 1.155092285221086, + "grad_norm": 1.3446091578493078, + "learning_rate": 8.27649190552218e-06, + "loss": 0.2521114945411682, + "step": 4350 + }, + { + "epoch": 1.155357854202629, + "grad_norm": 1.1722019112748296, + "learning_rate": 8.272167011407955e-06, + "loss": 0.2565760016441345, + "step": 4351 + }, + { + "epoch": 1.155623423184172, + "grad_norm": 1.3209067321897832, + "learning_rate": 8.267842450399552e-06, + "loss": 0.2603546679019928, + "step": 4352 + }, + { + "epoch": 1.155888992165715, + "grad_norm": 1.1697050726438265, + "learning_rate": 8.263518223330698e-06, + "loss": 0.2175855189561844, + "step": 4353 + }, + { + "epoch": 1.156154561147258, + "grad_norm": 1.1937135661774867, + "learning_rate": 8.25919433103505e-06, + "loss": 0.24521774053573608, + "step": 4354 + }, + { + "epoch": 1.1564201301288008, + "grad_norm": 1.3267445452853517, + "learning_rate": 8.254870774346203e-06, + "loss": 0.29673823714256287, + "step": 4355 + }, + { + "epoch": 1.1566856991103438, + "grad_norm": 1.260162624950344, + "learning_rate": 8.25054755409769e-06, + "loss": 0.26994144916534424, + "step": 4356 + }, + { + "epoch": 1.1569512680918868, + "grad_norm": 1.1578908727655277, + "learning_rate": 8.246224671122974e-06, + "loss": 0.2545935809612274, + "step": 4357 + }, + { + "epoch": 1.1572168370734297, + "grad_norm": 1.1469888258961152, + "learning_rate": 8.241902126255458e-06, + "loss": 0.23589034378528595, + "step": 4358 + }, + { + "epoch": 1.1574824060549727, + "grad_norm": 1.229284708155894, + "learning_rate": 8.237579920328478e-06, + "loss": 0.2617190480232239, + "step": 4359 + }, + { + "epoch": 1.1577479750365158, + "grad_norm": 1.2741716320060574, + "learning_rate": 8.233258054175302e-06, + "loss": 0.3092418313026428, + "step": 4360 + }, + { + "epoch": 1.1580135440180588, + "grad_norm": 1.1377305602079475, + "learning_rate": 8.228936528629138e-06, + "loss": 0.22873908281326294, + "step": 4361 + }, + { + "epoch": 1.1582791129996017, + "grad_norm": 1.0592847205754, + "learning_rate": 8.224615344523123e-06, + "loss": 0.22549089789390564, + "step": 4362 + }, + { + "epoch": 1.1585446819811447, + "grad_norm": 1.0288617285826194, + "learning_rate": 8.22029450269033e-06, + "loss": 0.19141459465026855, + "step": 4363 + }, + { + "epoch": 1.1588102509626876, + "grad_norm": 1.1679333849265336, + "learning_rate": 8.21597400396377e-06, + "loss": 0.24277547001838684, + "step": 4364 + }, + { + "epoch": 1.1590758199442306, + "grad_norm": 1.1463053400858605, + "learning_rate": 8.21165384917638e-06, + "loss": 0.2429513931274414, + "step": 4365 + }, + { + "epoch": 1.1593413889257735, + "grad_norm": 1.0775583631999657, + "learning_rate": 8.207334039161035e-06, + "loss": 0.24710172414779663, + "step": 4366 + }, + { + "epoch": 1.1596069579073165, + "grad_norm": 1.1226530732908067, + "learning_rate": 8.203014574750546e-06, + "loss": 0.2553783357143402, + "step": 4367 + }, + { + "epoch": 1.1598725268888594, + "grad_norm": 1.1664625510577165, + "learning_rate": 8.198695456777653e-06, + "loss": 0.2558436095714569, + "step": 4368 + }, + { + "epoch": 1.1601380958704024, + "grad_norm": 1.093371491828669, + "learning_rate": 8.19437668607503e-06, + "loss": 0.20780377089977264, + "step": 4369 + }, + { + "epoch": 1.1604036648519453, + "grad_norm": 1.0184271240235683, + "learning_rate": 8.190058263475288e-06, + "loss": 0.22397254407405853, + "step": 4370 + }, + { + "epoch": 1.1606692338334883, + "grad_norm": 1.1123966470918765, + "learning_rate": 8.185740189810967e-06, + "loss": 0.2763773798942566, + "step": 4371 + }, + { + "epoch": 1.1609348028150313, + "grad_norm": 1.234569017856286, + "learning_rate": 8.181422465914541e-06, + "loss": 0.2801940441131592, + "step": 4372 + }, + { + "epoch": 1.1612003717965742, + "grad_norm": 1.3078225086374202, + "learning_rate": 8.177105092618413e-06, + "loss": 0.20949441194534302, + "step": 4373 + }, + { + "epoch": 1.1614659407781172, + "grad_norm": 1.020800458401727, + "learning_rate": 8.172788070754927e-06, + "loss": 0.24503354728221893, + "step": 4374 + }, + { + "epoch": 1.16173150975966, + "grad_norm": 1.212252624187319, + "learning_rate": 8.16847140115635e-06, + "loss": 0.256147563457489, + "step": 4375 + }, + { + "epoch": 1.161997078741203, + "grad_norm": 1.079933692504349, + "learning_rate": 8.164155084654886e-06, + "loss": 0.2178848683834076, + "step": 4376 + }, + { + "epoch": 1.162262647722746, + "grad_norm": 1.0121292441974634, + "learning_rate": 8.159839122082668e-06, + "loss": 0.22624582052230835, + "step": 4377 + }, + { + "epoch": 1.162528216704289, + "grad_norm": 1.0294597777179986, + "learning_rate": 8.155523514271764e-06, + "loss": 0.2184191346168518, + "step": 4378 + }, + { + "epoch": 1.162793785685832, + "grad_norm": 1.2825595051682412, + "learning_rate": 8.151208262054175e-06, + "loss": 0.2623840868473053, + "step": 4379 + }, + { + "epoch": 1.1630593546673749, + "grad_norm": 1.2529929341607686, + "learning_rate": 8.14689336626183e-06, + "loss": 0.27181199193000793, + "step": 4380 + }, + { + "epoch": 1.1633249236489178, + "grad_norm": 1.282994089786083, + "learning_rate": 8.142578827726587e-06, + "loss": 0.2791554629802704, + "step": 4381 + }, + { + "epoch": 1.1635904926304608, + "grad_norm": 1.221608581014812, + "learning_rate": 8.13826464728024e-06, + "loss": 0.2466641068458557, + "step": 4382 + }, + { + "epoch": 1.1638560616120037, + "grad_norm": 0.9724735599541757, + "learning_rate": 8.133950825754511e-06, + "loss": 0.1951724737882614, + "step": 4383 + }, + { + "epoch": 1.1641216305935467, + "grad_norm": 1.2462068833977051, + "learning_rate": 8.129637363981056e-06, + "loss": 0.2520062029361725, + "step": 4384 + }, + { + "epoch": 1.1643871995750896, + "grad_norm": 1.230128345167748, + "learning_rate": 8.12532426279146e-06, + "loss": 0.24101334810256958, + "step": 4385 + }, + { + "epoch": 1.1646527685566326, + "grad_norm": 1.244671245504639, + "learning_rate": 8.121011523017235e-06, + "loss": 0.2741190791130066, + "step": 4386 + }, + { + "epoch": 1.1649183375381755, + "grad_norm": 1.1570746383559662, + "learning_rate": 8.116699145489822e-06, + "loss": 0.2575281858444214, + "step": 4387 + }, + { + "epoch": 1.1651839065197185, + "grad_norm": 1.157233381368316, + "learning_rate": 8.112387131040608e-06, + "loss": 0.2557298243045807, + "step": 4388 + }, + { + "epoch": 1.1654494755012614, + "grad_norm": 1.2560692108341776, + "learning_rate": 8.108075480500892e-06, + "loss": 0.27485036849975586, + "step": 4389 + }, + { + "epoch": 1.1657150444828044, + "grad_norm": 1.2517544472207511, + "learning_rate": 8.103764194701909e-06, + "loss": 0.26458340883255005, + "step": 4390 + }, + { + "epoch": 1.1659806134643473, + "grad_norm": 1.2310585386329624, + "learning_rate": 8.099453274474827e-06, + "loss": 0.2281840592622757, + "step": 4391 + }, + { + "epoch": 1.1662461824458903, + "grad_norm": 1.2367230880082285, + "learning_rate": 8.095142720650739e-06, + "loss": 0.24956555664539337, + "step": 4392 + }, + { + "epoch": 1.1665117514274332, + "grad_norm": 1.109202461245095, + "learning_rate": 8.090832534060671e-06, + "loss": 0.22619420289993286, + "step": 4393 + }, + { + "epoch": 1.1667773204089762, + "grad_norm": 1.2922206575995636, + "learning_rate": 8.086522715535571e-06, + "loss": 0.2780688405036926, + "step": 4394 + }, + { + "epoch": 1.1670428893905191, + "grad_norm": 1.2699378735794575, + "learning_rate": 8.082213265906323e-06, + "loss": 0.2600886821746826, + "step": 4395 + }, + { + "epoch": 1.167308458372062, + "grad_norm": 1.244234758234162, + "learning_rate": 8.077904186003736e-06, + "loss": 0.25049078464508057, + "step": 4396 + }, + { + "epoch": 1.167574027353605, + "grad_norm": 1.2327544821473595, + "learning_rate": 8.073595476658558e-06, + "loss": 0.27745798230171204, + "step": 4397 + }, + { + "epoch": 1.167839596335148, + "grad_norm": 1.1682547274263488, + "learning_rate": 8.069287138701452e-06, + "loss": 0.2191929668188095, + "step": 4398 + }, + { + "epoch": 1.168105165316691, + "grad_norm": 1.297306908163856, + "learning_rate": 8.064979172963014e-06, + "loss": 0.24307313561439514, + "step": 4399 + }, + { + "epoch": 1.168370734298234, + "grad_norm": 1.1837345133145987, + "learning_rate": 8.060671580273772e-06, + "loss": 0.23036238551139832, + "step": 4400 + }, + { + "epoch": 1.1686363032797769, + "grad_norm": 1.096627050675377, + "learning_rate": 8.056364361464176e-06, + "loss": 0.2394433617591858, + "step": 4401 + }, + { + "epoch": 1.1689018722613198, + "grad_norm": 1.183557399538609, + "learning_rate": 8.052057517364608e-06, + "loss": 0.24099211394786835, + "step": 4402 + }, + { + "epoch": 1.1691674412428628, + "grad_norm": 1.1293667282926971, + "learning_rate": 8.047751048805376e-06, + "loss": 0.22036173939704895, + "step": 4403 + }, + { + "epoch": 1.1694330102244057, + "grad_norm": 1.185484128157471, + "learning_rate": 8.043444956616717e-06, + "loss": 0.22400429844856262, + "step": 4404 + }, + { + "epoch": 1.1696985792059487, + "grad_norm": 1.0594769241160498, + "learning_rate": 8.039139241628792e-06, + "loss": 0.21649131178855896, + "step": 4405 + }, + { + "epoch": 1.1699641481874916, + "grad_norm": 1.150957898906185, + "learning_rate": 8.034833904671698e-06, + "loss": 0.23412205278873444, + "step": 4406 + }, + { + "epoch": 1.1702297171690346, + "grad_norm": 1.2025485392569255, + "learning_rate": 8.030528946575453e-06, + "loss": 0.23822304606437683, + "step": 4407 + }, + { + "epoch": 1.1704952861505775, + "grad_norm": 1.2929661052617345, + "learning_rate": 8.026224368169998e-06, + "loss": 0.29250186681747437, + "step": 4408 + }, + { + "epoch": 1.1707608551321207, + "grad_norm": 1.4098437716027425, + "learning_rate": 8.021920170285205e-06, + "loss": 0.26794207096099854, + "step": 4409 + }, + { + "epoch": 1.1710264241136636, + "grad_norm": 1.2469013694849018, + "learning_rate": 8.017616353750874e-06, + "loss": 0.2573787271976471, + "step": 4410 + }, + { + "epoch": 1.1712919930952066, + "grad_norm": 1.1835378975512396, + "learning_rate": 8.01331291939673e-06, + "loss": 0.2744356691837311, + "step": 4411 + }, + { + "epoch": 1.1715575620767495, + "grad_norm": 1.4542599881672131, + "learning_rate": 8.009009868052424e-06, + "loss": 0.2582886815071106, + "step": 4412 + }, + { + "epoch": 1.1718231310582925, + "grad_norm": 1.1766031171819216, + "learning_rate": 8.004707200547534e-06, + "loss": 0.2553568482398987, + "step": 4413 + }, + { + "epoch": 1.1720887000398355, + "grad_norm": 1.144579662849428, + "learning_rate": 8.00040491771156e-06, + "loss": 0.2670289874076843, + "step": 4414 + }, + { + "epoch": 1.1723542690213784, + "grad_norm": 1.1520006084984327, + "learning_rate": 7.99610302037394e-06, + "loss": 0.215460866689682, + "step": 4415 + }, + { + "epoch": 1.1726198380029214, + "grad_norm": 1.2764670908026035, + "learning_rate": 7.991801509364023e-06, + "loss": 0.26481571793556213, + "step": 4416 + }, + { + "epoch": 1.1728854069844643, + "grad_norm": 1.0239999030663398, + "learning_rate": 7.98750038551109e-06, + "loss": 0.2060776650905609, + "step": 4417 + }, + { + "epoch": 1.1731509759660073, + "grad_norm": 1.147707044406535, + "learning_rate": 7.983199649644349e-06, + "loss": 0.2401561588048935, + "step": 4418 + }, + { + "epoch": 1.1734165449475502, + "grad_norm": 1.3064882111410037, + "learning_rate": 7.978899302592927e-06, + "loss": 0.2545842230319977, + "step": 4419 + }, + { + "epoch": 1.1736821139290932, + "grad_norm": 1.199445262296627, + "learning_rate": 7.974599345185884e-06, + "loss": 0.29925093054771423, + "step": 4420 + }, + { + "epoch": 1.1739476829106361, + "grad_norm": 1.7583031900565322, + "learning_rate": 7.9702997782522e-06, + "loss": 0.23944757878780365, + "step": 4421 + }, + { + "epoch": 1.174213251892179, + "grad_norm": 1.057746400765015, + "learning_rate": 7.96600060262078e-06, + "loss": 0.23745761811733246, + "step": 4422 + }, + { + "epoch": 1.174478820873722, + "grad_norm": 1.1164780002442092, + "learning_rate": 7.961701819120453e-06, + "loss": 0.22170330584049225, + "step": 4423 + }, + { + "epoch": 1.174744389855265, + "grad_norm": 1.2607094160663312, + "learning_rate": 7.95740342857998e-06, + "loss": 0.2645890712738037, + "step": 4424 + }, + { + "epoch": 1.175009958836808, + "grad_norm": 1.2171129338535713, + "learning_rate": 7.953105431828032e-06, + "loss": 0.25232207775115967, + "step": 4425 + }, + { + "epoch": 1.1752755278183509, + "grad_norm": 1.20503293579659, + "learning_rate": 7.948807829693219e-06, + "loss": 0.2656644880771637, + "step": 4426 + }, + { + "epoch": 1.1755410967998938, + "grad_norm": 1.069230366230624, + "learning_rate": 7.944510623004063e-06, + "loss": 0.25290653109550476, + "step": 4427 + }, + { + "epoch": 1.1758066657814368, + "grad_norm": 1.1825821036814732, + "learning_rate": 7.940213812589018e-06, + "loss": 0.27464741468429565, + "step": 4428 + }, + { + "epoch": 1.1760722347629797, + "grad_norm": 1.4910942744639428, + "learning_rate": 7.935917399276455e-06, + "loss": 0.2562064528465271, + "step": 4429 + }, + { + "epoch": 1.1763378037445227, + "grad_norm": 1.2720371671465533, + "learning_rate": 7.931621383894676e-06, + "loss": 0.267793208360672, + "step": 4430 + }, + { + "epoch": 1.1766033727260656, + "grad_norm": 1.1490167098873316, + "learning_rate": 7.9273257672719e-06, + "loss": 0.23651085793972015, + "step": 4431 + }, + { + "epoch": 1.1768689417076086, + "grad_norm": 1.0804412076412697, + "learning_rate": 7.923030550236267e-06, + "loss": 0.23691008985042572, + "step": 4432 + }, + { + "epoch": 1.1771345106891515, + "grad_norm": 1.1540873295746452, + "learning_rate": 7.918735733615852e-06, + "loss": 0.24495704472064972, + "step": 4433 + }, + { + "epoch": 1.1774000796706945, + "grad_norm": 1.4423069413713672, + "learning_rate": 7.91444131823864e-06, + "loss": 0.25423017144203186, + "step": 4434 + }, + { + "epoch": 1.1776656486522374, + "grad_norm": 1.1113893983435537, + "learning_rate": 7.910147304932548e-06, + "loss": 0.22870390117168427, + "step": 4435 + }, + { + "epoch": 1.1779312176337804, + "grad_norm": 1.0473620824498977, + "learning_rate": 7.905853694525405e-06, + "loss": 0.23037508130073547, + "step": 4436 + }, + { + "epoch": 1.1781967866153233, + "grad_norm": 1.2886040363623328, + "learning_rate": 7.901560487844973e-06, + "loss": 0.31184864044189453, + "step": 4437 + }, + { + "epoch": 1.1784623555968663, + "grad_norm": 1.302197101799982, + "learning_rate": 7.89726768571893e-06, + "loss": 0.24140426516532898, + "step": 4438 + }, + { + "epoch": 1.1787279245784092, + "grad_norm": 1.2134032336682008, + "learning_rate": 7.892975288974877e-06, + "loss": 0.25602301955223083, + "step": 4439 + }, + { + "epoch": 1.1789934935599522, + "grad_norm": 1.1868063067331378, + "learning_rate": 7.888683298440339e-06, + "loss": 0.2717514932155609, + "step": 4440 + }, + { + "epoch": 1.1792590625414952, + "grad_norm": 1.1670818939848298, + "learning_rate": 7.884391714942757e-06, + "loss": 0.252475380897522, + "step": 4441 + }, + { + "epoch": 1.179524631523038, + "grad_norm": 1.161546405047816, + "learning_rate": 7.880100539309506e-06, + "loss": 0.24777942895889282, + "step": 4442 + }, + { + "epoch": 1.179790200504581, + "grad_norm": 1.194146333188245, + "learning_rate": 7.875809772367867e-06, + "loss": 0.25111010670661926, + "step": 4443 + }, + { + "epoch": 1.180055769486124, + "grad_norm": 1.163412583383914, + "learning_rate": 7.87151941494505e-06, + "loss": 0.26183217763900757, + "step": 4444 + }, + { + "epoch": 1.180321338467667, + "grad_norm": 1.2974065116766642, + "learning_rate": 7.867229467868189e-06, + "loss": 0.27538490295410156, + "step": 4445 + }, + { + "epoch": 1.18058690744921, + "grad_norm": 1.078206017492716, + "learning_rate": 7.862939931964333e-06, + "loss": 0.2192106693983078, + "step": 4446 + }, + { + "epoch": 1.1808524764307529, + "grad_norm": 1.2415747879020278, + "learning_rate": 7.858650808060453e-06, + "loss": 0.26506057381629944, + "step": 4447 + }, + { + "epoch": 1.1811180454122958, + "grad_norm": 1.103375758703505, + "learning_rate": 7.854362096983443e-06, + "loss": 0.2345719337463379, + "step": 4448 + }, + { + "epoch": 1.1813836143938388, + "grad_norm": 1.1651284585435833, + "learning_rate": 7.850073799560114e-06, + "loss": 0.21404311060905457, + "step": 4449 + }, + { + "epoch": 1.1816491833753817, + "grad_norm": 1.1572235550991925, + "learning_rate": 7.8457859166172e-06, + "loss": 0.24332138895988464, + "step": 4450 + }, + { + "epoch": 1.1819147523569247, + "grad_norm": 1.1687901862394692, + "learning_rate": 7.841498448981354e-06, + "loss": 0.25025150179862976, + "step": 4451 + }, + { + "epoch": 1.1821803213384676, + "grad_norm": 1.167419454587793, + "learning_rate": 7.837211397479152e-06, + "loss": 0.21918940544128418, + "step": 4452 + }, + { + "epoch": 1.1824458903200106, + "grad_norm": 1.1517463754639392, + "learning_rate": 7.832924762937083e-06, + "loss": 0.24976079165935516, + "step": 4453 + }, + { + "epoch": 1.1827114593015535, + "grad_norm": 1.1165052000707918, + "learning_rate": 7.828638546181565e-06, + "loss": 0.21146243810653687, + "step": 4454 + }, + { + "epoch": 1.1829770282830965, + "grad_norm": 1.1110608449393633, + "learning_rate": 7.824352748038924e-06, + "loss": 0.22921445965766907, + "step": 4455 + }, + { + "epoch": 1.1832425972646394, + "grad_norm": 1.1833669908026252, + "learning_rate": 7.820067369335413e-06, + "loss": 0.24401478469371796, + "step": 4456 + }, + { + "epoch": 1.1835081662461824, + "grad_norm": 1.2543977272663969, + "learning_rate": 7.815782410897209e-06, + "loss": 0.2717207074165344, + "step": 4457 + }, + { + "epoch": 1.1837737352277253, + "grad_norm": 1.0934075655453726, + "learning_rate": 7.81149787355039e-06, + "loss": 0.20752058923244476, + "step": 4458 + }, + { + "epoch": 1.1840393042092683, + "grad_norm": 1.3448722481333402, + "learning_rate": 7.807213758120965e-06, + "loss": 0.31095850467681885, + "step": 4459 + }, + { + "epoch": 1.1843048731908112, + "grad_norm": 1.1769654791590503, + "learning_rate": 7.802930065434874e-06, + "loss": 0.23761102557182312, + "step": 4460 + }, + { + "epoch": 1.1845704421723542, + "grad_norm": 1.3225327364557968, + "learning_rate": 7.798646796317952e-06, + "loss": 0.2509460151195526, + "step": 4461 + }, + { + "epoch": 1.1848360111538971, + "grad_norm": 1.472525937697874, + "learning_rate": 7.794363951595966e-06, + "loss": 0.25903213024139404, + "step": 4462 + }, + { + "epoch": 1.18510158013544, + "grad_norm": 1.1904413554334654, + "learning_rate": 7.790081532094596e-06, + "loss": 0.23304736614227295, + "step": 4463 + }, + { + "epoch": 1.185367149116983, + "grad_norm": 1.311875765456408, + "learning_rate": 7.785799538639445e-06, + "loss": 0.28707265853881836, + "step": 4464 + }, + { + "epoch": 1.185632718098526, + "grad_norm": 1.0202920254712324, + "learning_rate": 7.781517972056028e-06, + "loss": 0.20282745361328125, + "step": 4465 + }, + { + "epoch": 1.185898287080069, + "grad_norm": 1.2606153791729335, + "learning_rate": 7.777236833169782e-06, + "loss": 0.24056631326675415, + "step": 4466 + }, + { + "epoch": 1.186163856061612, + "grad_norm": 1.4946194524955894, + "learning_rate": 7.772956122806058e-06, + "loss": 0.2677255868911743, + "step": 4467 + }, + { + "epoch": 1.1864294250431549, + "grad_norm": 1.2681064192856966, + "learning_rate": 7.768675841790124e-06, + "loss": 0.22032876312732697, + "step": 4468 + }, + { + "epoch": 1.1866949940246978, + "grad_norm": 1.3138325978828467, + "learning_rate": 7.764395990947177e-06, + "loss": 0.2980336546897888, + "step": 4469 + }, + { + "epoch": 1.1869605630062408, + "grad_norm": 1.2624280680532078, + "learning_rate": 7.760116571102314e-06, + "loss": 0.2562638521194458, + "step": 4470 + }, + { + "epoch": 1.1872261319877837, + "grad_norm": 1.2207997545500016, + "learning_rate": 7.755837583080561e-06, + "loss": 0.262576699256897, + "step": 4471 + }, + { + "epoch": 1.1874917009693267, + "grad_norm": 1.2672893771429377, + "learning_rate": 7.751559027706858e-06, + "loss": 0.2654029130935669, + "step": 4472 + }, + { + "epoch": 1.1877572699508698, + "grad_norm": 1.2996444615622489, + "learning_rate": 7.747280905806051e-06, + "loss": 0.2946662902832031, + "step": 4473 + }, + { + "epoch": 1.1880228389324128, + "grad_norm": 1.193974235945654, + "learning_rate": 7.743003218202921e-06, + "loss": 0.25140905380249023, + "step": 4474 + }, + { + "epoch": 1.1882884079139557, + "grad_norm": 1.2240016583398612, + "learning_rate": 7.738725965722149e-06, + "loss": 0.2601654529571533, + "step": 4475 + }, + { + "epoch": 1.1885539768954987, + "grad_norm": 1.9675422662507516, + "learning_rate": 7.73444914918834e-06, + "loss": 0.2639954090118408, + "step": 4476 + }, + { + "epoch": 1.1888195458770416, + "grad_norm": 1.174151986382161, + "learning_rate": 7.730172769426014e-06, + "loss": 0.23391291499137878, + "step": 4477 + }, + { + "epoch": 1.1890851148585846, + "grad_norm": 2.254589386622623, + "learning_rate": 7.725896827259613e-06, + "loss": 0.2912144958972931, + "step": 4478 + }, + { + "epoch": 1.1893506838401275, + "grad_norm": 1.0905445077469016, + "learning_rate": 7.72162132351348e-06, + "loss": 0.23867549002170563, + "step": 4479 + }, + { + "epoch": 1.1896162528216705, + "grad_norm": 1.1124853975848743, + "learning_rate": 7.717346259011888e-06, + "loss": 0.22434742748737335, + "step": 4480 + }, + { + "epoch": 1.1898818218032134, + "grad_norm": 1.2440839352544732, + "learning_rate": 7.713071634579017e-06, + "loss": 0.2504398822784424, + "step": 4481 + }, + { + "epoch": 1.1901473907847564, + "grad_norm": 1.1759629506533034, + "learning_rate": 7.70879745103896e-06, + "loss": 0.24887195229530334, + "step": 4482 + }, + { + "epoch": 1.1904129597662994, + "grad_norm": 1.2603454999195398, + "learning_rate": 7.704523709215732e-06, + "loss": 0.2730141580104828, + "step": 4483 + }, + { + "epoch": 1.1906785287478423, + "grad_norm": 1.2285382464481551, + "learning_rate": 7.70025040993326e-06, + "loss": 0.22197315096855164, + "step": 4484 + }, + { + "epoch": 1.1909440977293853, + "grad_norm": 1.2004564929121084, + "learning_rate": 7.695977554015387e-06, + "loss": 0.2852731943130493, + "step": 4485 + }, + { + "epoch": 1.1912096667109282, + "grad_norm": 1.2815387200597224, + "learning_rate": 7.691705142285863e-06, + "loss": 0.2577238976955414, + "step": 4486 + }, + { + "epoch": 1.1914752356924712, + "grad_norm": 1.066499567502605, + "learning_rate": 7.68743317556837e-06, + "loss": 0.23510503768920898, + "step": 4487 + }, + { + "epoch": 1.191740804674014, + "grad_norm": 1.557745891642732, + "learning_rate": 7.683161654686486e-06, + "loss": 0.2553985118865967, + "step": 4488 + }, + { + "epoch": 1.192006373655557, + "grad_norm": 1.1965147913981737, + "learning_rate": 7.67889058046371e-06, + "loss": 0.2778642475605011, + "step": 4489 + }, + { + "epoch": 1.1922719426371, + "grad_norm": 1.1622951487110165, + "learning_rate": 7.674619953723455e-06, + "loss": 0.24740618467330933, + "step": 4490 + }, + { + "epoch": 1.192537511618643, + "grad_norm": 1.1598996003550786, + "learning_rate": 7.670349775289047e-06, + "loss": 0.2453901171684265, + "step": 4491 + }, + { + "epoch": 1.192803080600186, + "grad_norm": 1.1444233008842855, + "learning_rate": 7.666080045983726e-06, + "loss": 0.2336064875125885, + "step": 4492 + }, + { + "epoch": 1.1930686495817289, + "grad_norm": 1.18047841753512, + "learning_rate": 7.661810766630648e-06, + "loss": 0.2375800907611847, + "step": 4493 + }, + { + "epoch": 1.1933342185632718, + "grad_norm": 1.1241813274405275, + "learning_rate": 7.657541938052876e-06, + "loss": 0.21272733807563782, + "step": 4494 + }, + { + "epoch": 1.1935997875448148, + "grad_norm": 1.1531042348696576, + "learning_rate": 7.65327356107339e-06, + "loss": 0.26597708463668823, + "step": 4495 + }, + { + "epoch": 1.1938653565263577, + "grad_norm": 1.1715955143508257, + "learning_rate": 7.649005636515088e-06, + "loss": 0.267806738615036, + "step": 4496 + }, + { + "epoch": 1.1941309255079007, + "grad_norm": 1.1812545197713797, + "learning_rate": 7.64473816520077e-06, + "loss": 0.2260194569826126, + "step": 4497 + }, + { + "epoch": 1.1943964944894436, + "grad_norm": 1.298416110387325, + "learning_rate": 7.640471147953157e-06, + "loss": 0.24523532390594482, + "step": 4498 + }, + { + "epoch": 1.1946620634709866, + "grad_norm": 1.1020194586485352, + "learning_rate": 7.636204585594879e-06, + "loss": 0.23230910301208496, + "step": 4499 + }, + { + "epoch": 1.1949276324525295, + "grad_norm": 1.1141631171804318, + "learning_rate": 7.631938478948478e-06, + "loss": 0.23322705924510956, + "step": 4500 + }, + { + "epoch": 1.1951932014340725, + "grad_norm": 1.3011711597097497, + "learning_rate": 7.6276728288364086e-06, + "loss": 0.25614386796951294, + "step": 4501 + }, + { + "epoch": 1.1954587704156154, + "grad_norm": 1.2188058731839337, + "learning_rate": 7.62340763608104e-06, + "loss": 0.22921821475028992, + "step": 4502 + }, + { + "epoch": 1.1957243393971584, + "grad_norm": 1.1538976889459698, + "learning_rate": 7.619142901504649e-06, + "loss": 0.25528913736343384, + "step": 4503 + }, + { + "epoch": 1.1959899083787013, + "grad_norm": 1.1730292690453887, + "learning_rate": 7.614878625929425e-06, + "loss": 0.2528502643108368, + "step": 4504 + }, + { + "epoch": 1.1962554773602443, + "grad_norm": 1.2636827238002009, + "learning_rate": 7.610614810177474e-06, + "loss": 0.2519027590751648, + "step": 4505 + }, + { + "epoch": 1.1965210463417872, + "grad_norm": 1.3563109831905724, + "learning_rate": 7.606351455070808e-06, + "loss": 0.2895655333995819, + "step": 4506 + }, + { + "epoch": 1.1967866153233302, + "grad_norm": 1.2317858842714817, + "learning_rate": 7.6020885614313515e-06, + "loss": 0.24588793516159058, + "step": 4507 + }, + { + "epoch": 1.1970521843048731, + "grad_norm": 1.3148149004868621, + "learning_rate": 7.597826130080938e-06, + "loss": 0.2996830940246582, + "step": 4508 + }, + { + "epoch": 1.197317753286416, + "grad_norm": 1.2289139982746875, + "learning_rate": 7.593564161841318e-06, + "loss": 0.2654343247413635, + "step": 4509 + }, + { + "epoch": 1.197583322267959, + "grad_norm": 1.2104660234722762, + "learning_rate": 7.589302657534144e-06, + "loss": 0.24949109554290771, + "step": 4510 + }, + { + "epoch": 1.197848891249502, + "grad_norm": 1.1785955409512114, + "learning_rate": 7.5850416179809886e-06, + "loss": 0.23205731809139252, + "step": 4511 + }, + { + "epoch": 1.198114460231045, + "grad_norm": 3.351023225066079, + "learning_rate": 7.580781044003324e-06, + "loss": 0.232904314994812, + "step": 4512 + }, + { + "epoch": 1.198380029212588, + "grad_norm": 1.0569352775404934, + "learning_rate": 7.576520936422542e-06, + "loss": 0.25071364641189575, + "step": 4513 + }, + { + "epoch": 1.1986455981941309, + "grad_norm": 1.3613643273685416, + "learning_rate": 7.572261296059944e-06, + "loss": 0.2574467658996582, + "step": 4514 + }, + { + "epoch": 1.1989111671756738, + "grad_norm": 1.1866331959407248, + "learning_rate": 7.568002123736735e-06, + "loss": 0.23134055733680725, + "step": 4515 + }, + { + "epoch": 1.1991767361572168, + "grad_norm": 1.093870770411857, + "learning_rate": 7.5637434202740334e-06, + "loss": 0.22163332998752594, + "step": 4516 + }, + { + "epoch": 1.1994423051387597, + "grad_norm": 1.182308432196374, + "learning_rate": 7.559485186492868e-06, + "loss": 0.2665749788284302, + "step": 4517 + }, + { + "epoch": 1.1997078741203027, + "grad_norm": 1.0758759053634162, + "learning_rate": 7.555227423214174e-06, + "loss": 0.2237103432416916, + "step": 4518 + }, + { + "epoch": 1.1999734431018456, + "grad_norm": 1.2216323349035507, + "learning_rate": 7.550970131258801e-06, + "loss": 0.23287461698055267, + "step": 4519 + }, + { + "epoch": 1.2002390120833886, + "grad_norm": 1.1237156855078405, + "learning_rate": 7.5467133114475025e-06, + "loss": 0.2296323925256729, + "step": 4520 + }, + { + "epoch": 1.2005045810649315, + "grad_norm": 1.0900498705064874, + "learning_rate": 7.542456964600944e-06, + "loss": 0.21358339488506317, + "step": 4521 + }, + { + "epoch": 1.2007701500464747, + "grad_norm": 1.2516498821908515, + "learning_rate": 7.5382010915396954e-06, + "loss": 0.2355872094631195, + "step": 4522 + }, + { + "epoch": 1.2010357190280176, + "grad_norm": 1.2039029354448443, + "learning_rate": 7.5339456930842455e-06, + "loss": 0.25397661328315735, + "step": 4523 + }, + { + "epoch": 1.2013012880095606, + "grad_norm": 1.1762399479435963, + "learning_rate": 7.52969077005498e-06, + "loss": 0.26658257842063904, + "step": 4524 + }, + { + "epoch": 1.2015668569911035, + "grad_norm": 1.1889790145170218, + "learning_rate": 7.525436323272201e-06, + "loss": 0.27207136154174805, + "step": 4525 + }, + { + "epoch": 1.2018324259726465, + "grad_norm": 1.1867510172835751, + "learning_rate": 7.521182353556114e-06, + "loss": 0.25889313220977783, + "step": 4526 + }, + { + "epoch": 1.2020979949541895, + "grad_norm": 1.3095753328357655, + "learning_rate": 7.516928861726834e-06, + "loss": 0.272185742855072, + "step": 4527 + }, + { + "epoch": 1.2023635639357324, + "grad_norm": 1.156226984644319, + "learning_rate": 7.512675848604385e-06, + "loss": 0.25371503829956055, + "step": 4528 + }, + { + "epoch": 1.2026291329172754, + "grad_norm": 1.2028831911106082, + "learning_rate": 7.5084233150086964e-06, + "loss": 0.2554902732372284, + "step": 4529 + }, + { + "epoch": 1.2028947018988183, + "grad_norm": 1.1714528701705076, + "learning_rate": 7.50417126175961e-06, + "loss": 0.22007369995117188, + "step": 4530 + }, + { + "epoch": 1.2031602708803613, + "grad_norm": 1.2057968317835202, + "learning_rate": 7.499919689676861e-06, + "loss": 0.27492445707321167, + "step": 4531 + }, + { + "epoch": 1.2034258398619042, + "grad_norm": 1.1229280499713745, + "learning_rate": 7.4956685995801144e-06, + "loss": 0.2321021854877472, + "step": 4532 + }, + { + "epoch": 1.2036914088434472, + "grad_norm": 1.1735641467762012, + "learning_rate": 7.491417992288927e-06, + "loss": 0.25410759449005127, + "step": 4533 + }, + { + "epoch": 1.2039569778249901, + "grad_norm": 1.0638924164212193, + "learning_rate": 7.487167868622765e-06, + "loss": 0.2080576866865158, + "step": 4534 + }, + { + "epoch": 1.204222546806533, + "grad_norm": 1.115815492341061, + "learning_rate": 7.482918229401001e-06, + "loss": 0.2333327978849411, + "step": 4535 + }, + { + "epoch": 1.204488115788076, + "grad_norm": 1.1999209092526242, + "learning_rate": 7.478669075442917e-06, + "loss": 0.23160479962825775, + "step": 4536 + }, + { + "epoch": 1.204753684769619, + "grad_norm": 1.2136747509439494, + "learning_rate": 7.474420407567699e-06, + "loss": 0.2627696394920349, + "step": 4537 + }, + { + "epoch": 1.205019253751162, + "grad_norm": 1.0694648198090266, + "learning_rate": 7.470172226594441e-06, + "loss": 0.18656940758228302, + "step": 4538 + }, + { + "epoch": 1.2052848227327049, + "grad_norm": 1.2245138263513848, + "learning_rate": 7.465924533342139e-06, + "loss": 0.2749083340167999, + "step": 4539 + }, + { + "epoch": 1.2055503917142478, + "grad_norm": 1.3944907322006155, + "learning_rate": 7.461677328629696e-06, + "loss": 0.27484387159347534, + "step": 4540 + }, + { + "epoch": 1.2058159606957908, + "grad_norm": 1.254197138569937, + "learning_rate": 7.457430613275934e-06, + "loss": 0.26357588171958923, + "step": 4541 + }, + { + "epoch": 1.2060815296773337, + "grad_norm": 1.2004336778554112, + "learning_rate": 7.453184388099559e-06, + "loss": 0.23495343327522278, + "step": 4542 + }, + { + "epoch": 1.2063470986588767, + "grad_norm": 1.2123259782755003, + "learning_rate": 7.4489386539192e-06, + "loss": 0.253970205783844, + "step": 4543 + }, + { + "epoch": 1.2066126676404196, + "grad_norm": 1.1523820852778563, + "learning_rate": 7.444693411553383e-06, + "loss": 0.24919062852859497, + "step": 4544 + }, + { + "epoch": 1.2068782366219626, + "grad_norm": 1.2181666045865969, + "learning_rate": 7.440448661820536e-06, + "loss": 0.24373450875282288, + "step": 4545 + }, + { + "epoch": 1.2071438056035055, + "grad_norm": 1.3762501451890354, + "learning_rate": 7.436204405539002e-06, + "loss": 0.24739482998847961, + "step": 4546 + }, + { + "epoch": 1.2074093745850485, + "grad_norm": 1.2982074074943253, + "learning_rate": 7.4319606435270195e-06, + "loss": 0.27041494846343994, + "step": 4547 + }, + { + "epoch": 1.2076749435665914, + "grad_norm": 1.1359942984852744, + "learning_rate": 7.427717376602739e-06, + "loss": 0.23243938386440277, + "step": 4548 + }, + { + "epoch": 1.2079405125481344, + "grad_norm": 1.3118758722508392, + "learning_rate": 7.423474605584206e-06, + "loss": 0.2346343696117401, + "step": 4549 + }, + { + "epoch": 1.2082060815296773, + "grad_norm": 1.1819354183035133, + "learning_rate": 7.419232331289385e-06, + "loss": 0.2587367296218872, + "step": 4550 + }, + { + "epoch": 1.2084716505112203, + "grad_norm": 1.195922174249915, + "learning_rate": 7.414990554536134e-06, + "loss": 0.2552938461303711, + "step": 4551 + }, + { + "epoch": 1.2087372194927632, + "grad_norm": 1.2688216449772127, + "learning_rate": 7.410749276142221e-06, + "loss": 0.2693648040294647, + "step": 4552 + }, + { + "epoch": 1.2090027884743062, + "grad_norm": 1.1997939452425357, + "learning_rate": 7.406508496925307e-06, + "loss": 0.21543294191360474, + "step": 4553 + }, + { + "epoch": 1.2092683574558492, + "grad_norm": 1.2385892147047024, + "learning_rate": 7.402268217702966e-06, + "loss": 0.2913009524345398, + "step": 4554 + }, + { + "epoch": 1.209533926437392, + "grad_norm": 1.0671356100150298, + "learning_rate": 7.398028439292675e-06, + "loss": 0.23279520869255066, + "step": 4555 + }, + { + "epoch": 1.209799495418935, + "grad_norm": 1.0946575444558022, + "learning_rate": 7.393789162511815e-06, + "loss": 0.25086939334869385, + "step": 4556 + }, + { + "epoch": 1.210065064400478, + "grad_norm": 1.0964890001200192, + "learning_rate": 7.389550388177662e-06, + "loss": 0.21704714000225067, + "step": 4557 + }, + { + "epoch": 1.210330633382021, + "grad_norm": 1.126699331966135, + "learning_rate": 7.3853121171074115e-06, + "loss": 0.230219304561615, + "step": 4558 + }, + { + "epoch": 1.210596202363564, + "grad_norm": 1.1809668678269754, + "learning_rate": 7.381074350118149e-06, + "loss": 0.26073017716407776, + "step": 4559 + }, + { + "epoch": 1.2108617713451069, + "grad_norm": 1.2065072762311946, + "learning_rate": 7.376837088026863e-06, + "loss": 0.25186216831207275, + "step": 4560 + }, + { + "epoch": 1.2111273403266498, + "grad_norm": 1.3978877577958326, + "learning_rate": 7.372600331650449e-06, + "loss": 0.28719040751457214, + "step": 4561 + }, + { + "epoch": 1.2113929093081928, + "grad_norm": 1.16073083909203, + "learning_rate": 7.368364081805704e-06, + "loss": 0.23972755670547485, + "step": 4562 + }, + { + "epoch": 1.2116584782897357, + "grad_norm": 1.096919114864748, + "learning_rate": 7.364128339309326e-06, + "loss": 0.23053769767284393, + "step": 4563 + }, + { + "epoch": 1.2119240472712787, + "grad_norm": 1.2910615683085556, + "learning_rate": 7.359893104977917e-06, + "loss": 0.25124189257621765, + "step": 4564 + }, + { + "epoch": 1.2121896162528216, + "grad_norm": 1.1863697592423188, + "learning_rate": 7.355658379627981e-06, + "loss": 0.2243686318397522, + "step": 4565 + }, + { + "epoch": 1.2124551852343646, + "grad_norm": 1.244591161752608, + "learning_rate": 7.3514241640759175e-06, + "loss": 0.26047343015670776, + "step": 4566 + }, + { + "epoch": 1.2127207542159075, + "grad_norm": 1.1775978450301259, + "learning_rate": 7.3471904591380434e-06, + "loss": 0.23603469133377075, + "step": 4567 + }, + { + "epoch": 1.2129863231974505, + "grad_norm": 1.2261707581126196, + "learning_rate": 7.342957265630561e-06, + "loss": 0.31320711970329285, + "step": 4568 + }, + { + "epoch": 1.2132518921789934, + "grad_norm": 1.22464158648852, + "learning_rate": 7.338724584369581e-06, + "loss": 0.22159788012504578, + "step": 4569 + }, + { + "epoch": 1.2135174611605364, + "grad_norm": 1.1206153371836056, + "learning_rate": 7.334492416171114e-06, + "loss": 0.21992239356040955, + "step": 4570 + }, + { + "epoch": 1.2137830301420793, + "grad_norm": 1.3229661253734524, + "learning_rate": 7.330260761851071e-06, + "loss": 0.20708827674388885, + "step": 4571 + }, + { + "epoch": 1.2140485991236223, + "grad_norm": 1.1899658624900848, + "learning_rate": 7.326029622225269e-06, + "loss": 0.2846507132053375, + "step": 4572 + }, + { + "epoch": 1.2143141681051652, + "grad_norm": 1.2218224134688922, + "learning_rate": 7.321798998109417e-06, + "loss": 0.24903801083564758, + "step": 4573 + }, + { + "epoch": 1.2145797370867082, + "grad_norm": 1.1817295734811926, + "learning_rate": 7.317568890319134e-06, + "loss": 0.23426681756973267, + "step": 4574 + }, + { + "epoch": 1.2148453060682511, + "grad_norm": 1.1685993771040228, + "learning_rate": 7.31333929966993e-06, + "loss": 0.2374490350484848, + "step": 4575 + }, + { + "epoch": 1.215110875049794, + "grad_norm": 1.13335327598736, + "learning_rate": 7.309110226977223e-06, + "loss": 0.24035832285881042, + "step": 4576 + }, + { + "epoch": 1.215376444031337, + "grad_norm": 1.2837405582571324, + "learning_rate": 7.30488167305633e-06, + "loss": 0.21872258186340332, + "step": 4577 + }, + { + "epoch": 1.21564201301288, + "grad_norm": 1.3425258296129825, + "learning_rate": 7.300653638722463e-06, + "loss": 0.2940255403518677, + "step": 4578 + }, + { + "epoch": 1.215907581994423, + "grad_norm": 1.1158795437619367, + "learning_rate": 7.29642612479074e-06, + "loss": 0.20970892906188965, + "step": 4579 + }, + { + "epoch": 1.216173150975966, + "grad_norm": 1.1571301789790744, + "learning_rate": 7.292199132076175e-06, + "loss": 0.21217449009418488, + "step": 4580 + }, + { + "epoch": 1.2164387199575089, + "grad_norm": 1.2448503896532135, + "learning_rate": 7.28797266139368e-06, + "loss": 0.2463359832763672, + "step": 4581 + }, + { + "epoch": 1.2167042889390518, + "grad_norm": 1.132320428820701, + "learning_rate": 7.283746713558071e-06, + "loss": 0.21921415627002716, + "step": 4582 + }, + { + "epoch": 1.2169698579205948, + "grad_norm": 1.2437376760058587, + "learning_rate": 7.279521289384059e-06, + "loss": 0.2412380576133728, + "step": 4583 + }, + { + "epoch": 1.2172354269021377, + "grad_norm": 1.180878934188553, + "learning_rate": 7.275296389686258e-06, + "loss": 0.2558564245700836, + "step": 4584 + }, + { + "epoch": 1.2175009958836809, + "grad_norm": 1.2566060880081307, + "learning_rate": 7.271072015279179e-06, + "loss": 0.2548869848251343, + "step": 4585 + }, + { + "epoch": 1.2177665648652238, + "grad_norm": 1.4407566508510072, + "learning_rate": 7.2668481669772304e-06, + "loss": 0.22183407843112946, + "step": 4586 + }, + { + "epoch": 1.2180321338467668, + "grad_norm": 1.20165829214997, + "learning_rate": 7.262624845594721e-06, + "loss": 0.24722473323345184, + "step": 4587 + }, + { + "epoch": 1.2182977028283097, + "grad_norm": 1.190564524584547, + "learning_rate": 7.258402051945858e-06, + "loss": 0.2678988575935364, + "step": 4588 + }, + { + "epoch": 1.2185632718098527, + "grad_norm": 1.187777405395345, + "learning_rate": 7.2541797868447435e-06, + "loss": 0.2116469144821167, + "step": 4589 + }, + { + "epoch": 1.2188288407913956, + "grad_norm": 1.2500071795758152, + "learning_rate": 7.249958051105383e-06, + "loss": 0.23897933959960938, + "step": 4590 + }, + { + "epoch": 1.2190944097729386, + "grad_norm": 1.2473885744661077, + "learning_rate": 7.245736845541676e-06, + "loss": 0.25434061884880066, + "step": 4591 + }, + { + "epoch": 1.2193599787544815, + "grad_norm": 1.2108382272450464, + "learning_rate": 7.2415161709674235e-06, + "loss": 0.2602628469467163, + "step": 4592 + }, + { + "epoch": 1.2196255477360245, + "grad_norm": 3.1633443202169764, + "learning_rate": 7.2372960281963165e-06, + "loss": 0.2519065737724304, + "step": 4593 + }, + { + "epoch": 1.2198911167175674, + "grad_norm": 1.550903602515833, + "learning_rate": 7.233076418041954e-06, + "loss": 0.24404102563858032, + "step": 4594 + }, + { + "epoch": 1.2201566856991104, + "grad_norm": 1.1561711817096534, + "learning_rate": 7.228857341317825e-06, + "loss": 0.23633979260921478, + "step": 4595 + }, + { + "epoch": 1.2204222546806534, + "grad_norm": 1.2128002082313463, + "learning_rate": 7.224638798837319e-06, + "loss": 0.2513781189918518, + "step": 4596 + }, + { + "epoch": 1.2206878236621963, + "grad_norm": 1.2409533600026899, + "learning_rate": 7.220420791413721e-06, + "loss": 0.23270189762115479, + "step": 4597 + }, + { + "epoch": 1.2209533926437393, + "grad_norm": 1.2503409564498669, + "learning_rate": 7.21620331986021e-06, + "loss": 0.2770010530948639, + "step": 4598 + }, + { + "epoch": 1.2212189616252822, + "grad_norm": 1.1284522462719728, + "learning_rate": 7.2119863849898684e-06, + "loss": 0.2312745451927185, + "step": 4599 + }, + { + "epoch": 1.2214845306068252, + "grad_norm": 1.2725314186948387, + "learning_rate": 7.20776998761567e-06, + "loss": 0.231276735663414, + "step": 4600 + }, + { + "epoch": 1.221750099588368, + "grad_norm": 1.1715742737590393, + "learning_rate": 7.203554128550486e-06, + "loss": 0.24927708506584167, + "step": 4601 + }, + { + "epoch": 1.222015668569911, + "grad_norm": 1.1138441718661785, + "learning_rate": 7.199338808607084e-06, + "loss": 0.23033373057842255, + "step": 4602 + }, + { + "epoch": 1.222281237551454, + "grad_norm": 1.2545098885673684, + "learning_rate": 7.195124028598131e-06, + "loss": 0.24003425240516663, + "step": 4603 + }, + { + "epoch": 1.222546806532997, + "grad_norm": 1.1872708193619057, + "learning_rate": 7.190909789336185e-06, + "loss": 0.22648809850215912, + "step": 4604 + }, + { + "epoch": 1.22281237551454, + "grad_norm": 1.2511860493227276, + "learning_rate": 7.1866960916337006e-06, + "loss": 0.2605816125869751, + "step": 4605 + }, + { + "epoch": 1.2230779444960829, + "grad_norm": 1.1424629632361756, + "learning_rate": 7.1824829363030305e-06, + "loss": 0.21549202501773834, + "step": 4606 + }, + { + "epoch": 1.2233435134776258, + "grad_norm": 1.1532084986944064, + "learning_rate": 7.17827032415642e-06, + "loss": 0.23113220930099487, + "step": 4607 + }, + { + "epoch": 1.2236090824591688, + "grad_norm": 1.1649312720163907, + "learning_rate": 7.174058256006012e-06, + "loss": 0.22736643254756927, + "step": 4608 + }, + { + "epoch": 1.2238746514407117, + "grad_norm": 1.172011833362534, + "learning_rate": 7.169846732663845e-06, + "loss": 0.2686663866043091, + "step": 4609 + }, + { + "epoch": 1.2241402204222547, + "grad_norm": 1.1555217624379808, + "learning_rate": 7.1656357549418485e-06, + "loss": 0.1980462670326233, + "step": 4610 + }, + { + "epoch": 1.2244057894037976, + "grad_norm": 1.2401629806715768, + "learning_rate": 7.161425323651846e-06, + "loss": 0.22997641563415527, + "step": 4611 + }, + { + "epoch": 1.2246713583853406, + "grad_norm": 1.3367939845671126, + "learning_rate": 7.157215439605567e-06, + "loss": 0.28781357407569885, + "step": 4612 + }, + { + "epoch": 1.2249369273668835, + "grad_norm": 1.2895382897388425, + "learning_rate": 7.153006103614624e-06, + "loss": 0.22558270394802094, + "step": 4613 + }, + { + "epoch": 1.2252024963484265, + "grad_norm": 1.1860196927831441, + "learning_rate": 7.148797316490527e-06, + "loss": 0.2435922622680664, + "step": 4614 + }, + { + "epoch": 1.2254680653299694, + "grad_norm": 1.2828543438888096, + "learning_rate": 7.14458907904468e-06, + "loss": 0.27840936183929443, + "step": 4615 + }, + { + "epoch": 1.2257336343115124, + "grad_norm": 1.2350405670943831, + "learning_rate": 7.1403813920883825e-06, + "loss": 0.2775651812553406, + "step": 4616 + }, + { + "epoch": 1.2259992032930553, + "grad_norm": 1.2738452228129284, + "learning_rate": 7.136174256432828e-06, + "loss": 0.2430988848209381, + "step": 4617 + }, + { + "epoch": 1.2262647722745983, + "grad_norm": 1.0618083363199646, + "learning_rate": 7.131967672889101e-06, + "loss": 0.2018759697675705, + "step": 4618 + }, + { + "epoch": 1.2265303412561412, + "grad_norm": 1.2320094058432127, + "learning_rate": 7.127761642268179e-06, + "loss": 0.25314825773239136, + "step": 4619 + }, + { + "epoch": 1.2267959102376842, + "grad_norm": 1.409693024729639, + "learning_rate": 7.123556165380935e-06, + "loss": 0.2542746365070343, + "step": 4620 + }, + { + "epoch": 1.2270614792192271, + "grad_norm": 1.2571649384815597, + "learning_rate": 7.119351243038142e-06, + "loss": 0.2912300229072571, + "step": 4621 + }, + { + "epoch": 1.22732704820077, + "grad_norm": 1.3877507856901592, + "learning_rate": 7.115146876050454e-06, + "loss": 0.26893284916877747, + "step": 4622 + }, + { + "epoch": 1.227592617182313, + "grad_norm": 1.3833428208823224, + "learning_rate": 7.110943065228425e-06, + "loss": 0.2711215317249298, + "step": 4623 + }, + { + "epoch": 1.227858186163856, + "grad_norm": 1.346165350849743, + "learning_rate": 7.106739811382501e-06, + "loss": 0.25530266761779785, + "step": 4624 + }, + { + "epoch": 1.228123755145399, + "grad_norm": 1.268299981159743, + "learning_rate": 7.102537115323018e-06, + "loss": 0.2547178864479065, + "step": 4625 + }, + { + "epoch": 1.228389324126942, + "grad_norm": 1.5802606545447795, + "learning_rate": 7.0983349778602064e-06, + "loss": 0.27973634004592896, + "step": 4626 + }, + { + "epoch": 1.2286548931084849, + "grad_norm": 1.205257873334912, + "learning_rate": 7.0941333998041884e-06, + "loss": 0.24066339433193207, + "step": 4627 + }, + { + "epoch": 1.2289204620900278, + "grad_norm": 1.1798307734371165, + "learning_rate": 7.0899323819649816e-06, + "loss": 0.24305742979049683, + "step": 4628 + }, + { + "epoch": 1.2291860310715708, + "grad_norm": 1.163221794708842, + "learning_rate": 7.085731925152484e-06, + "loss": 0.22478783130645752, + "step": 4629 + }, + { + "epoch": 1.2294516000531137, + "grad_norm": 1.1812808698189172, + "learning_rate": 7.081532030176506e-06, + "loss": 0.24995659291744232, + "step": 4630 + }, + { + "epoch": 1.2297171690346567, + "grad_norm": 1.1575900439946216, + "learning_rate": 7.077332697846733e-06, + "loss": 0.2579454183578491, + "step": 4631 + }, + { + "epoch": 1.2299827380161996, + "grad_norm": 1.2378373931288529, + "learning_rate": 7.073133928972745e-06, + "loss": 0.2513299286365509, + "step": 4632 + }, + { + "epoch": 1.2302483069977426, + "grad_norm": 1.0751310135047412, + "learning_rate": 7.068935724364016e-06, + "loss": 0.23344315588474274, + "step": 4633 + }, + { + "epoch": 1.2305138759792857, + "grad_norm": 1.1882346043976466, + "learning_rate": 7.064738084829912e-06, + "loss": 0.26750341057777405, + "step": 4634 + }, + { + "epoch": 1.2307794449608287, + "grad_norm": 1.1622882344241228, + "learning_rate": 7.0605410111796855e-06, + "loss": 0.22424373030662537, + "step": 4635 + }, + { + "epoch": 1.2310450139423716, + "grad_norm": 1.0711348851881108, + "learning_rate": 7.056344504222485e-06, + "loss": 0.24261844158172607, + "step": 4636 + }, + { + "epoch": 1.2313105829239146, + "grad_norm": 1.1382788327638453, + "learning_rate": 7.052148564767347e-06, + "loss": 0.22273704409599304, + "step": 4637 + }, + { + "epoch": 1.2315761519054576, + "grad_norm": 1.217398110209698, + "learning_rate": 7.047953193623195e-06, + "loss": 0.23726603388786316, + "step": 4638 + }, + { + "epoch": 1.2318417208870005, + "grad_norm": 1.1961933626954258, + "learning_rate": 7.043758391598856e-06, + "loss": 0.2612340748310089, + "step": 4639 + }, + { + "epoch": 1.2321072898685435, + "grad_norm": 1.3828917417203295, + "learning_rate": 7.039564159503034e-06, + "loss": 0.25722867250442505, + "step": 4640 + }, + { + "epoch": 1.2323728588500864, + "grad_norm": 1.2106898963951274, + "learning_rate": 7.035370498144325e-06, + "loss": 0.25940731167793274, + "step": 4641 + }, + { + "epoch": 1.2326384278316294, + "grad_norm": 1.1431229158704634, + "learning_rate": 7.03117740833122e-06, + "loss": 0.2328685224056244, + "step": 4642 + }, + { + "epoch": 1.2329039968131723, + "grad_norm": 1.360549509974518, + "learning_rate": 7.0269848908720965e-06, + "loss": 0.3019352853298187, + "step": 4643 + }, + { + "epoch": 1.2331695657947153, + "grad_norm": 1.370123584713732, + "learning_rate": 7.022792946575222e-06, + "loss": 0.2665002942085266, + "step": 4644 + }, + { + "epoch": 1.2334351347762582, + "grad_norm": 1.2172549009924116, + "learning_rate": 7.018601576248755e-06, + "loss": 0.2425101399421692, + "step": 4645 + }, + { + "epoch": 1.2337007037578012, + "grad_norm": 1.2088470091841177, + "learning_rate": 7.014410780700743e-06, + "loss": 0.23319771885871887, + "step": 4646 + }, + { + "epoch": 1.2339662727393441, + "grad_norm": 1.1714631765087196, + "learning_rate": 7.010220560739116e-06, + "loss": 0.23033195734024048, + "step": 4647 + }, + { + "epoch": 1.234231841720887, + "grad_norm": 1.211199620492339, + "learning_rate": 7.006030917171707e-06, + "loss": 0.24682006239891052, + "step": 4648 + }, + { + "epoch": 1.23449741070243, + "grad_norm": 1.2881207045369418, + "learning_rate": 7.001841850806228e-06, + "loss": 0.25566285848617554, + "step": 4649 + }, + { + "epoch": 1.234762979683973, + "grad_norm": 1.32329780476303, + "learning_rate": 6.9976533624502784e-06, + "loss": 0.2791779339313507, + "step": 4650 + }, + { + "epoch": 1.235028548665516, + "grad_norm": 1.3093366388831746, + "learning_rate": 6.993465452911352e-06, + "loss": 0.25597846508026123, + "step": 4651 + }, + { + "epoch": 1.2352941176470589, + "grad_norm": 1.197170425293823, + "learning_rate": 6.9892781229968275e-06, + "loss": 0.24034728109836578, + "step": 4652 + }, + { + "epoch": 1.2355596866286018, + "grad_norm": 1.2583607623295634, + "learning_rate": 6.985091373513972e-06, + "loss": 0.2209509015083313, + "step": 4653 + }, + { + "epoch": 1.2358252556101448, + "grad_norm": 1.298261075070858, + "learning_rate": 6.980905205269942e-06, + "loss": 0.29106947779655457, + "step": 4654 + }, + { + "epoch": 1.2360908245916877, + "grad_norm": 1.226505577270481, + "learning_rate": 6.976719619071782e-06, + "loss": 0.24014753103256226, + "step": 4655 + }, + { + "epoch": 1.2363563935732307, + "grad_norm": 1.2297022971330018, + "learning_rate": 6.972534615726422e-06, + "loss": 0.27135470509529114, + "step": 4656 + }, + { + "epoch": 1.2366219625547736, + "grad_norm": 1.2219120714336154, + "learning_rate": 6.968350196040683e-06, + "loss": 0.23386257886886597, + "step": 4657 + }, + { + "epoch": 1.2368875315363166, + "grad_norm": 1.1452987159774544, + "learning_rate": 6.964166360821271e-06, + "loss": 0.23119661211967468, + "step": 4658 + }, + { + "epoch": 1.2371531005178595, + "grad_norm": 1.1767967288021879, + "learning_rate": 6.959983110874782e-06, + "loss": 0.2399922013282776, + "step": 4659 + }, + { + "epoch": 1.2374186694994025, + "grad_norm": 1.0521231856668218, + "learning_rate": 6.9558004470076944e-06, + "loss": 0.18323534727096558, + "step": 4660 + }, + { + "epoch": 1.2376842384809454, + "grad_norm": 1.1985431375912965, + "learning_rate": 6.951618370026378e-06, + "loss": 0.25683268904685974, + "step": 4661 + }, + { + "epoch": 1.2379498074624884, + "grad_norm": 1.307367140627743, + "learning_rate": 6.947436880737089e-06, + "loss": 0.2861499786376953, + "step": 4662 + }, + { + "epoch": 1.2382153764440313, + "grad_norm": 1.3831407282476516, + "learning_rate": 6.943255979945965e-06, + "loss": 0.28021398186683655, + "step": 4663 + }, + { + "epoch": 1.2384809454255743, + "grad_norm": 1.2940713851528283, + "learning_rate": 6.939075668459039e-06, + "loss": 0.2739776074886322, + "step": 4664 + }, + { + "epoch": 1.2387465144071172, + "grad_norm": 1.3433235944815516, + "learning_rate": 6.934895947082221e-06, + "loss": 0.26015231013298035, + "step": 4665 + }, + { + "epoch": 1.2390120833886602, + "grad_norm": 1.3230400884249285, + "learning_rate": 6.930716816621317e-06, + "loss": 0.2572113871574402, + "step": 4666 + }, + { + "epoch": 1.2392776523702032, + "grad_norm": 1.266134559335497, + "learning_rate": 6.926538277882012e-06, + "loss": 0.24094708263874054, + "step": 4667 + }, + { + "epoch": 1.239543221351746, + "grad_norm": 1.1175335748548278, + "learning_rate": 6.92236033166988e-06, + "loss": 0.22803835570812225, + "step": 4668 + }, + { + "epoch": 1.239808790333289, + "grad_norm": 1.1198379137737728, + "learning_rate": 6.9181829787903774e-06, + "loss": 0.23672322928905487, + "step": 4669 + }, + { + "epoch": 1.240074359314832, + "grad_norm": 1.3356297624894082, + "learning_rate": 6.91400622004885e-06, + "loss": 0.2568579912185669, + "step": 4670 + }, + { + "epoch": 1.240339928296375, + "grad_norm": 1.1768710116388783, + "learning_rate": 6.909830056250527e-06, + "loss": 0.25267845392227173, + "step": 4671 + }, + { + "epoch": 1.240605497277918, + "grad_norm": 1.2702969549109802, + "learning_rate": 6.905654488200524e-06, + "loss": 0.30336999893188477, + "step": 4672 + }, + { + "epoch": 1.2408710662594609, + "grad_norm": 1.17710991443045, + "learning_rate": 6.901479516703842e-06, + "loss": 0.2741299867630005, + "step": 4673 + }, + { + "epoch": 1.2411366352410038, + "grad_norm": 1.276658372251755, + "learning_rate": 6.897305142565363e-06, + "loss": 0.2896823585033417, + "step": 4674 + }, + { + "epoch": 1.2414022042225468, + "grad_norm": 1.2718591233587666, + "learning_rate": 6.8931313665898625e-06, + "loss": 0.23102329671382904, + "step": 4675 + }, + { + "epoch": 1.2416677732040897, + "grad_norm": 1.3209479857777737, + "learning_rate": 6.8889581895819915e-06, + "loss": 0.2600775361061096, + "step": 4676 + }, + { + "epoch": 1.2419333421856327, + "grad_norm": 1.1932453661715805, + "learning_rate": 6.884785612346291e-06, + "loss": 0.23589132726192474, + "step": 4677 + }, + { + "epoch": 1.2421989111671756, + "grad_norm": 1.155454248544126, + "learning_rate": 6.880613635687184e-06, + "loss": 0.24419361352920532, + "step": 4678 + }, + { + "epoch": 1.2424644801487186, + "grad_norm": 1.1323309321599895, + "learning_rate": 6.876442260408977e-06, + "loss": 0.23267227411270142, + "step": 4679 + }, + { + "epoch": 1.2427300491302615, + "grad_norm": 1.2244929254620942, + "learning_rate": 6.8722714873158635e-06, + "loss": 0.2507064938545227, + "step": 4680 + }, + { + "epoch": 1.2429956181118045, + "grad_norm": 1.2079227486812785, + "learning_rate": 6.868101317211922e-06, + "loss": 0.2529929280281067, + "step": 4681 + }, + { + "epoch": 1.2432611870933474, + "grad_norm": 1.1627205371245832, + "learning_rate": 6.863931750901107e-06, + "loss": 0.23255379498004913, + "step": 4682 + }, + { + "epoch": 1.2435267560748904, + "grad_norm": 1.1997195000446994, + "learning_rate": 6.859762789187259e-06, + "loss": 0.22757332026958466, + "step": 4683 + }, + { + "epoch": 1.2437923250564333, + "grad_norm": 1.2115398233652928, + "learning_rate": 6.8555944328741145e-06, + "loss": 0.2578364312648773, + "step": 4684 + }, + { + "epoch": 1.2440578940379763, + "grad_norm": 1.1854445431935166, + "learning_rate": 6.851426682765278e-06, + "loss": 0.27568408846855164, + "step": 4685 + }, + { + "epoch": 1.2443234630195192, + "grad_norm": 1.19754548578965, + "learning_rate": 6.847259539664244e-06, + "loss": 0.25595831871032715, + "step": 4686 + }, + { + "epoch": 1.2445890320010622, + "grad_norm": 1.1807617266458326, + "learning_rate": 6.843093004374386e-06, + "loss": 0.2195426970720291, + "step": 4687 + }, + { + "epoch": 1.2448546009826051, + "grad_norm": 1.1623631531241645, + "learning_rate": 6.838927077698967e-06, + "loss": 0.23247741162776947, + "step": 4688 + }, + { + "epoch": 1.245120169964148, + "grad_norm": 1.2953467781322094, + "learning_rate": 6.834761760441127e-06, + "loss": 0.26149916648864746, + "step": 4689 + }, + { + "epoch": 1.245385738945691, + "grad_norm": 1.1310243964126157, + "learning_rate": 6.830597053403885e-06, + "loss": 0.2521447241306305, + "step": 4690 + }, + { + "epoch": 1.245651307927234, + "grad_norm": 1.1803812700297758, + "learning_rate": 6.826432957390155e-06, + "loss": 0.23401981592178345, + "step": 4691 + }, + { + "epoch": 1.245916876908777, + "grad_norm": 1.3114713754211442, + "learning_rate": 6.822269473202714e-06, + "loss": 0.25341230630874634, + "step": 4692 + }, + { + "epoch": 1.24618244589032, + "grad_norm": 1.2025537581570156, + "learning_rate": 6.818106601644248e-06, + "loss": 0.2513907551765442, + "step": 4693 + }, + { + "epoch": 1.2464480148718629, + "grad_norm": 1.2263403478965602, + "learning_rate": 6.8139443435173005e-06, + "loss": 0.2682073414325714, + "step": 4694 + }, + { + "epoch": 1.2467135838534058, + "grad_norm": 1.1801313342439474, + "learning_rate": 6.809782699624308e-06, + "loss": 0.22726872563362122, + "step": 4695 + }, + { + "epoch": 1.2469791528349488, + "grad_norm": 1.3004812874511507, + "learning_rate": 6.805621670767588e-06, + "loss": 0.24184030294418335, + "step": 4696 + }, + { + "epoch": 1.247244721816492, + "grad_norm": 1.0395051535883466, + "learning_rate": 6.801461257749334e-06, + "loss": 0.203639417886734, + "step": 4697 + }, + { + "epoch": 1.2475102907980349, + "grad_norm": 1.1786557175840897, + "learning_rate": 6.797301461371626e-06, + "loss": 0.2170606106519699, + "step": 4698 + }, + { + "epoch": 1.2477758597795778, + "grad_norm": 1.1231113548110434, + "learning_rate": 6.7931422824364245e-06, + "loss": 0.2225056290626526, + "step": 4699 + }, + { + "epoch": 1.2480414287611208, + "grad_norm": 1.1702414518259399, + "learning_rate": 6.788983721745569e-06, + "loss": 0.2388974130153656, + "step": 4700 + }, + { + "epoch": 1.2483069977426637, + "grad_norm": 1.14649445863332, + "learning_rate": 6.784825780100776e-06, + "loss": 0.2291644811630249, + "step": 4701 + }, + { + "epoch": 1.2485725667242067, + "grad_norm": 1.3474164807852358, + "learning_rate": 6.7806684583036595e-06, + "loss": 0.23793739080429077, + "step": 4702 + }, + { + "epoch": 1.2488381357057496, + "grad_norm": 1.2839354787463726, + "learning_rate": 6.776511757155695e-06, + "loss": 0.2756902277469635, + "step": 4703 + }, + { + "epoch": 1.2491037046872926, + "grad_norm": 1.3039866822855, + "learning_rate": 6.772355677458249e-06, + "loss": 0.25046268105506897, + "step": 4704 + }, + { + "epoch": 1.2493692736688355, + "grad_norm": 1.3053078100109528, + "learning_rate": 6.7682002200125575e-06, + "loss": 0.238486647605896, + "step": 4705 + }, + { + "epoch": 1.2496348426503785, + "grad_norm": 1.1855651210182463, + "learning_rate": 6.764045385619751e-06, + "loss": 0.2366628348827362, + "step": 4706 + }, + { + "epoch": 1.2499004116319214, + "grad_norm": 1.21176387977239, + "learning_rate": 6.759891175080827e-06, + "loss": 0.24825221300125122, + "step": 4707 + }, + { + "epoch": 1.2501659806134644, + "grad_norm": 1.2922207381934139, + "learning_rate": 6.755737589196673e-06, + "loss": 0.2304186224937439, + "step": 4708 + }, + { + "epoch": 1.2504315495950074, + "grad_norm": 1.200468035859197, + "learning_rate": 6.7515846287680476e-06, + "loss": 0.2824471592903137, + "step": 4709 + }, + { + "epoch": 1.2506971185765503, + "grad_norm": 1.1994302764371214, + "learning_rate": 6.747432294595591e-06, + "loss": 0.23130697011947632, + "step": 4710 + }, + { + "epoch": 1.2509626875580933, + "grad_norm": 1.3183641444794993, + "learning_rate": 6.7432805874798334e-06, + "loss": 0.28371602296829224, + "step": 4711 + }, + { + "epoch": 1.2512282565396362, + "grad_norm": 1.1529924861272876, + "learning_rate": 6.739129508221167e-06, + "loss": 0.23452092707157135, + "step": 4712 + }, + { + "epoch": 1.2514938255211792, + "grad_norm": 1.245806995398341, + "learning_rate": 6.734979057619873e-06, + "loss": 0.22486859560012817, + "step": 4713 + }, + { + "epoch": 1.2517593945027221, + "grad_norm": 1.3481589110906722, + "learning_rate": 6.730829236476111e-06, + "loss": 0.2818532884120941, + "step": 4714 + }, + { + "epoch": 1.252024963484265, + "grad_norm": 1.172531442878329, + "learning_rate": 6.7266800455899125e-06, + "loss": 0.2060810923576355, + "step": 4715 + }, + { + "epoch": 1.252290532465808, + "grad_norm": 1.2183128764116598, + "learning_rate": 6.722531485761199e-06, + "loss": 0.2183244377374649, + "step": 4716 + }, + { + "epoch": 1.252556101447351, + "grad_norm": 1.2596677279915016, + "learning_rate": 6.71838355778976e-06, + "loss": 0.24757327139377594, + "step": 4717 + }, + { + "epoch": 1.252821670428894, + "grad_norm": 1.3267776765958388, + "learning_rate": 6.714236262475268e-06, + "loss": 0.3058333396911621, + "step": 4718 + }, + { + "epoch": 1.2530872394104369, + "grad_norm": 1.1893155452841293, + "learning_rate": 6.71008960061727e-06, + "loss": 0.24095620214939117, + "step": 4719 + }, + { + "epoch": 1.2533528083919798, + "grad_norm": 1.3050165159615794, + "learning_rate": 6.705943573015199e-06, + "loss": 0.25614839792251587, + "step": 4720 + }, + { + "epoch": 1.2536183773735228, + "grad_norm": 1.2537185610498753, + "learning_rate": 6.701798180468356e-06, + "loss": 0.22295254468917847, + "step": 4721 + }, + { + "epoch": 1.2538839463550657, + "grad_norm": 1.1724661677534984, + "learning_rate": 6.697653423775926e-06, + "loss": 0.24783796072006226, + "step": 4722 + }, + { + "epoch": 1.2541495153366087, + "grad_norm": 1.5676339911360846, + "learning_rate": 6.693509303736969e-06, + "loss": 0.19702200591564178, + "step": 4723 + }, + { + "epoch": 1.2544150843181516, + "grad_norm": 1.2713976115459882, + "learning_rate": 6.689365821150421e-06, + "loss": 0.2539074122905731, + "step": 4724 + }, + { + "epoch": 1.2546806532996946, + "grad_norm": 1.2015875463338734, + "learning_rate": 6.6852229768150976e-06, + "loss": 0.2480372041463852, + "step": 4725 + }, + { + "epoch": 1.2549462222812375, + "grad_norm": 1.1742876462412417, + "learning_rate": 6.68108077152969e-06, + "loss": 0.2231048047542572, + "step": 4726 + }, + { + "epoch": 1.2552117912627805, + "grad_norm": 1.1571308721577904, + "learning_rate": 6.676939206092766e-06, + "loss": 0.260783851146698, + "step": 4727 + }, + { + "epoch": 1.2554773602443234, + "grad_norm": 1.2569537102203152, + "learning_rate": 6.67279828130277e-06, + "loss": 0.24069254100322723, + "step": 4728 + }, + { + "epoch": 1.2557429292258664, + "grad_norm": 1.1732343490674524, + "learning_rate": 6.668657997958027e-06, + "loss": 0.2578867971897125, + "step": 4729 + }, + { + "epoch": 1.2560084982074093, + "grad_norm": 1.102080552368197, + "learning_rate": 6.664518356856732e-06, + "loss": 0.20724457502365112, + "step": 4730 + }, + { + "epoch": 1.2562740671889523, + "grad_norm": 1.1527224778451435, + "learning_rate": 6.6603793587969586e-06, + "loss": 0.23107580840587616, + "step": 4731 + }, + { + "epoch": 1.2565396361704952, + "grad_norm": 1.123633807819834, + "learning_rate": 6.656241004576659e-06, + "loss": 0.2481832504272461, + "step": 4732 + }, + { + "epoch": 1.2568052051520382, + "grad_norm": 1.1353422900728998, + "learning_rate": 6.652103294993657e-06, + "loss": 0.2219698578119278, + "step": 4733 + }, + { + "epoch": 1.2570707741335811, + "grad_norm": 1.1538807443087884, + "learning_rate": 6.647966230845655e-06, + "loss": 0.2245863974094391, + "step": 4734 + }, + { + "epoch": 1.257336343115124, + "grad_norm": 1.1991392114731283, + "learning_rate": 6.643829812930231e-06, + "loss": 0.2086387574672699, + "step": 4735 + }, + { + "epoch": 1.257601912096667, + "grad_norm": 1.1702949625685939, + "learning_rate": 6.6396940420448355e-06, + "loss": 0.23484499752521515, + "step": 4736 + }, + { + "epoch": 1.25786748107821, + "grad_norm": 1.1449620939429583, + "learning_rate": 6.635558918986797e-06, + "loss": 0.22011062502861023, + "step": 4737 + }, + { + "epoch": 1.258133050059753, + "grad_norm": 1.240312422577115, + "learning_rate": 6.631424444553319e-06, + "loss": 0.2426830381155014, + "step": 4738 + }, + { + "epoch": 1.258398619041296, + "grad_norm": 1.2472398676845469, + "learning_rate": 6.627290619541481e-06, + "loss": 0.2702174484729767, + "step": 4739 + }, + { + "epoch": 1.2586641880228389, + "grad_norm": 1.4005529994015682, + "learning_rate": 6.623157444748234e-06, + "loss": 0.26594820618629456, + "step": 4740 + }, + { + "epoch": 1.2589297570043818, + "grad_norm": 1.2550785934224764, + "learning_rate": 6.619024920970405e-06, + "loss": 0.2546013593673706, + "step": 4741 + }, + { + "epoch": 1.2591953259859248, + "grad_norm": 1.425429985784882, + "learning_rate": 6.614893049004696e-06, + "loss": 0.27207985520362854, + "step": 4742 + }, + { + "epoch": 1.259460894967468, + "grad_norm": 1.4445692953489113, + "learning_rate": 6.610761829647685e-06, + "loss": 0.2640937566757202, + "step": 4743 + }, + { + "epoch": 1.2597264639490109, + "grad_norm": 1.4095791296432063, + "learning_rate": 6.60663126369582e-06, + "loss": 0.2890278697013855, + "step": 4744 + }, + { + "epoch": 1.2599920329305538, + "grad_norm": 1.1225606468440805, + "learning_rate": 6.602501351945425e-06, + "loss": 0.24610492587089539, + "step": 4745 + }, + { + "epoch": 1.2602576019120968, + "grad_norm": 1.5273064552741338, + "learning_rate": 6.598372095192699e-06, + "loss": 0.24946746230125427, + "step": 4746 + }, + { + "epoch": 1.2605231708936397, + "grad_norm": 1.0546449518544165, + "learning_rate": 6.594243494233717e-06, + "loss": 0.2369944453239441, + "step": 4747 + }, + { + "epoch": 1.2607887398751827, + "grad_norm": 1.180556169492091, + "learning_rate": 6.590115549864421e-06, + "loss": 0.20980143547058105, + "step": 4748 + }, + { + "epoch": 1.2610543088567256, + "grad_norm": 1.1524244978042124, + "learning_rate": 6.5859882628806315e-06, + "loss": 0.22930344939231873, + "step": 4749 + }, + { + "epoch": 1.2613198778382686, + "grad_norm": 1.1353386909454481, + "learning_rate": 6.5818616340780405e-06, + "loss": 0.22352416813373566, + "step": 4750 + }, + { + "epoch": 1.2615854468198116, + "grad_norm": 1.0615225488277533, + "learning_rate": 6.577735664252214e-06, + "loss": 0.2049327939748764, + "step": 4751 + }, + { + "epoch": 1.2618510158013545, + "grad_norm": 1.3420243952278277, + "learning_rate": 6.573610354198587e-06, + "loss": 0.21858355402946472, + "step": 4752 + }, + { + "epoch": 1.2621165847828975, + "grad_norm": 1.1248247337478985, + "learning_rate": 6.5694857047124786e-06, + "loss": 0.225118950009346, + "step": 4753 + }, + { + "epoch": 1.2623821537644404, + "grad_norm": 1.1623337764465298, + "learning_rate": 6.565361716589063e-06, + "loss": 0.25780409574508667, + "step": 4754 + }, + { + "epoch": 1.2626477227459834, + "grad_norm": 1.1580907073042885, + "learning_rate": 6.5612383906233964e-06, + "loss": 0.23507939279079437, + "step": 4755 + }, + { + "epoch": 1.2629132917275263, + "grad_norm": 1.1733914893757196, + "learning_rate": 6.557115727610417e-06, + "loss": 0.27884477376937866, + "step": 4756 + }, + { + "epoch": 1.2631788607090693, + "grad_norm": 1.145599873702901, + "learning_rate": 6.552993728344921e-06, + "loss": 0.2564120888710022, + "step": 4757 + }, + { + "epoch": 1.2634444296906122, + "grad_norm": 1.3139857622357067, + "learning_rate": 6.548872393621578e-06, + "loss": 0.259651243686676, + "step": 4758 + }, + { + "epoch": 1.2637099986721552, + "grad_norm": 1.2930462493551071, + "learning_rate": 6.544751724234937e-06, + "loss": 0.23473814129829407, + "step": 4759 + }, + { + "epoch": 1.2639755676536981, + "grad_norm": 1.4411652435541018, + "learning_rate": 6.540631720979411e-06, + "loss": 0.2447129189968109, + "step": 4760 + }, + { + "epoch": 1.264241136635241, + "grad_norm": 1.1968236723875711, + "learning_rate": 6.536512384649294e-06, + "loss": 0.22695237398147583, + "step": 4761 + }, + { + "epoch": 1.264506705616784, + "grad_norm": 1.117214929215876, + "learning_rate": 6.532393716038738e-06, + "loss": 0.24303656816482544, + "step": 4762 + }, + { + "epoch": 1.264772274598327, + "grad_norm": 1.2106972269991043, + "learning_rate": 6.528275715941776e-06, + "loss": 0.23911908268928528, + "step": 4763 + }, + { + "epoch": 1.26503784357987, + "grad_norm": 1.0480584899589354, + "learning_rate": 6.524158385152309e-06, + "loss": 0.19766747951507568, + "step": 4764 + }, + { + "epoch": 1.2653034125614129, + "grad_norm": 1.390914844473808, + "learning_rate": 6.520041724464114e-06, + "loss": 0.24074134230613708, + "step": 4765 + }, + { + "epoch": 1.2655689815429558, + "grad_norm": 1.3379815630375766, + "learning_rate": 6.515925734670834e-06, + "loss": 0.27557867765426636, + "step": 4766 + }, + { + "epoch": 1.2658345505244988, + "grad_norm": 1.3286252957995823, + "learning_rate": 6.511810416565979e-06, + "loss": 0.24387787282466888, + "step": 4767 + }, + { + "epoch": 1.2661001195060417, + "grad_norm": 1.4234035593814256, + "learning_rate": 6.507695770942939e-06, + "loss": 0.27863091230392456, + "step": 4768 + }, + { + "epoch": 1.2663656884875847, + "grad_norm": 1.1364646133588507, + "learning_rate": 6.503581798594965e-06, + "loss": 0.23589591681957245, + "step": 4769 + }, + { + "epoch": 1.2666312574691276, + "grad_norm": 1.1932509985997282, + "learning_rate": 6.499468500315185e-06, + "loss": 0.22869807481765747, + "step": 4770 + }, + { + "epoch": 1.2668968264506706, + "grad_norm": 1.2498634762148577, + "learning_rate": 6.495355876896592e-06, + "loss": 0.2351568192243576, + "step": 4771 + }, + { + "epoch": 1.2671623954322135, + "grad_norm": 1.1271253337210285, + "learning_rate": 6.491243929132052e-06, + "loss": 0.2291228175163269, + "step": 4772 + }, + { + "epoch": 1.2674279644137565, + "grad_norm": 1.2013953219342957, + "learning_rate": 6.487132657814297e-06, + "loss": 0.23203743994235992, + "step": 4773 + }, + { + "epoch": 1.2676935333952994, + "grad_norm": 1.0887907712326863, + "learning_rate": 6.483022063735938e-06, + "loss": 0.22035656869411469, + "step": 4774 + }, + { + "epoch": 1.2679591023768424, + "grad_norm": 1.1270651148723736, + "learning_rate": 6.478912147689448e-06, + "loss": 0.21576716005802155, + "step": 4775 + }, + { + "epoch": 1.2682246713583853, + "grad_norm": 1.3174966546949713, + "learning_rate": 6.474802910467171e-06, + "loss": 0.27764660120010376, + "step": 4776 + }, + { + "epoch": 1.2684902403399283, + "grad_norm": 1.2418434137314485, + "learning_rate": 6.4706943528613135e-06, + "loss": 0.23715822398662567, + "step": 4777 + }, + { + "epoch": 1.2687558093214713, + "grad_norm": 1.1794293567561218, + "learning_rate": 6.4665864756639606e-06, + "loss": 0.27764302492141724, + "step": 4778 + }, + { + "epoch": 1.2690213783030142, + "grad_norm": 1.2157630211554828, + "learning_rate": 6.4624792796670624e-06, + "loss": 0.21634885668754578, + "step": 4779 + }, + { + "epoch": 1.2692869472845572, + "grad_norm": 1.2217447541656432, + "learning_rate": 6.458372765662438e-06, + "loss": 0.27262234687805176, + "step": 4780 + }, + { + "epoch": 1.2695525162661, + "grad_norm": 1.1716437260315133, + "learning_rate": 6.454266934441775e-06, + "loss": 0.2219458371400833, + "step": 4781 + }, + { + "epoch": 1.269818085247643, + "grad_norm": 1.2515340549821425, + "learning_rate": 6.450161786796625e-06, + "loss": 0.22181497514247894, + "step": 4782 + }, + { + "epoch": 1.270083654229186, + "grad_norm": 1.1858127036353512, + "learning_rate": 6.446057323518422e-06, + "loss": 0.22642338275909424, + "step": 4783 + }, + { + "epoch": 1.270349223210729, + "grad_norm": 1.2243357553110101, + "learning_rate": 6.441953545398451e-06, + "loss": 0.239711195230484, + "step": 4784 + }, + { + "epoch": 1.270614792192272, + "grad_norm": 1.29507599792429, + "learning_rate": 6.437850453227872e-06, + "loss": 0.2422255128622055, + "step": 4785 + }, + { + "epoch": 1.2708803611738149, + "grad_norm": 1.3013507424737665, + "learning_rate": 6.433748047797715e-06, + "loss": 0.23184439539909363, + "step": 4786 + }, + { + "epoch": 1.2711459301553578, + "grad_norm": 1.3032581886502261, + "learning_rate": 6.429646329898873e-06, + "loss": 0.2737428843975067, + "step": 4787 + }, + { + "epoch": 1.2714114991369008, + "grad_norm": 1.2565288812855064, + "learning_rate": 6.4255453003221115e-06, + "loss": 0.23565897345542908, + "step": 4788 + }, + { + "epoch": 1.2716770681184437, + "grad_norm": 1.3665497750328797, + "learning_rate": 6.421444959858059e-06, + "loss": 0.24349254369735718, + "step": 4789 + }, + { + "epoch": 1.2719426370999867, + "grad_norm": 1.2050219186384792, + "learning_rate": 6.4173453092972115e-06, + "loss": 0.2637769281864166, + "step": 4790 + }, + { + "epoch": 1.2722082060815296, + "grad_norm": 1.0381858832581394, + "learning_rate": 6.413246349429934e-06, + "loss": 0.21420228481292725, + "step": 4791 + }, + { + "epoch": 1.2724737750630726, + "grad_norm": 1.1333618917642097, + "learning_rate": 6.409148081046461e-06, + "loss": 0.25270405411720276, + "step": 4792 + }, + { + "epoch": 1.2727393440446155, + "grad_norm": 1.270676964933882, + "learning_rate": 6.405050504936887e-06, + "loss": 0.2710546851158142, + "step": 4793 + }, + { + "epoch": 1.2730049130261585, + "grad_norm": 1.1608891040490155, + "learning_rate": 6.400953621891178e-06, + "loss": 0.2388489842414856, + "step": 4794 + }, + { + "epoch": 1.2732704820077014, + "grad_norm": 1.1600463634666516, + "learning_rate": 6.396857432699164e-06, + "loss": 0.24581485986709595, + "step": 4795 + }, + { + "epoch": 1.2735360509892444, + "grad_norm": 1.18464881130754, + "learning_rate": 6.3927619381505404e-06, + "loss": 0.24219104647636414, + "step": 4796 + }, + { + "epoch": 1.2738016199707873, + "grad_norm": 1.0878857914267965, + "learning_rate": 6.388667139034873e-06, + "loss": 0.22722014784812927, + "step": 4797 + }, + { + "epoch": 1.2740671889523303, + "grad_norm": 1.275017638940232, + "learning_rate": 6.384573036141589e-06, + "loss": 0.25177234411239624, + "step": 4798 + }, + { + "epoch": 1.2743327579338732, + "grad_norm": 1.2824350948041237, + "learning_rate": 6.380479630259983e-06, + "loss": 0.2291412651538849, + "step": 4799 + }, + { + "epoch": 1.2745983269154162, + "grad_norm": 1.3215047708165757, + "learning_rate": 6.376386922179216e-06, + "loss": 0.2528606951236725, + "step": 4800 + }, + { + "epoch": 1.2748638958969591, + "grad_norm": 1.11001311385955, + "learning_rate": 6.372294912688315e-06, + "loss": 0.21383032202720642, + "step": 4801 + }, + { + "epoch": 1.275129464878502, + "grad_norm": 1.2162134010863295, + "learning_rate": 6.368203602576168e-06, + "loss": 0.2538087069988251, + "step": 4802 + }, + { + "epoch": 1.275395033860045, + "grad_norm": 1.2127822206191197, + "learning_rate": 6.364112992631537e-06, + "loss": 0.24437417089939117, + "step": 4803 + }, + { + "epoch": 1.275660602841588, + "grad_norm": 1.1678428848154245, + "learning_rate": 6.360023083643036e-06, + "loss": 0.2347753942012787, + "step": 4804 + }, + { + "epoch": 1.275926171823131, + "grad_norm": 1.226812886332051, + "learning_rate": 6.3559338763991576e-06, + "loss": 0.271645188331604, + "step": 4805 + }, + { + "epoch": 1.276191740804674, + "grad_norm": 1.2088165730060163, + "learning_rate": 6.35184537168825e-06, + "loss": 0.2465275228023529, + "step": 4806 + }, + { + "epoch": 1.2764573097862169, + "grad_norm": 1.216147524532817, + "learning_rate": 6.347757570298527e-06, + "loss": 0.26494044065475464, + "step": 4807 + }, + { + "epoch": 1.2767228787677598, + "grad_norm": 3.360286997098956, + "learning_rate": 6.343670473018071e-06, + "loss": 0.28292080760002136, + "step": 4808 + }, + { + "epoch": 1.2769884477493028, + "grad_norm": 1.2160142828428218, + "learning_rate": 6.339584080634824e-06, + "loss": 0.2525850534439087, + "step": 4809 + }, + { + "epoch": 1.2772540167308457, + "grad_norm": 1.224576908350391, + "learning_rate": 6.335498393936597e-06, + "loss": 0.22056345641613007, + "step": 4810 + }, + { + "epoch": 1.2775195857123887, + "grad_norm": 1.1603347806824698, + "learning_rate": 6.331413413711061e-06, + "loss": 0.23081058263778687, + "step": 4811 + }, + { + "epoch": 1.2777851546939316, + "grad_norm": 1.2309265633693007, + "learning_rate": 6.327329140745751e-06, + "loss": 0.2722470760345459, + "step": 4812 + }, + { + "epoch": 1.2780507236754748, + "grad_norm": 1.2598117885787161, + "learning_rate": 6.32324557582807e-06, + "loss": 0.24454641342163086, + "step": 4813 + }, + { + "epoch": 1.2783162926570177, + "grad_norm": 1.2713820573097572, + "learning_rate": 6.319162719745277e-06, + "loss": 0.21884413063526154, + "step": 4814 + }, + { + "epoch": 1.2785818616385607, + "grad_norm": 1.276590514388197, + "learning_rate": 6.3150805732845e-06, + "loss": 0.2737545669078827, + "step": 4815 + }, + { + "epoch": 1.2788474306201036, + "grad_norm": 1.1747258996206047, + "learning_rate": 6.31099913723273e-06, + "loss": 0.2478230595588684, + "step": 4816 + }, + { + "epoch": 1.2791129996016466, + "grad_norm": 1.2461752717378811, + "learning_rate": 6.306918412376817e-06, + "loss": 0.2508094310760498, + "step": 4817 + }, + { + "epoch": 1.2793785685831895, + "grad_norm": 1.267840547546021, + "learning_rate": 6.302838399503477e-06, + "loss": 0.24666383862495422, + "step": 4818 + }, + { + "epoch": 1.2796441375647325, + "grad_norm": 1.176059099377582, + "learning_rate": 6.298759099399292e-06, + "loss": 0.27833491563796997, + "step": 4819 + }, + { + "epoch": 1.2799097065462754, + "grad_norm": 1.1948595147219725, + "learning_rate": 6.294680512850699e-06, + "loss": 0.23092475533485413, + "step": 4820 + }, + { + "epoch": 1.2801752755278184, + "grad_norm": 1.1935160504644853, + "learning_rate": 6.290602640644005e-06, + "loss": 0.2714667022228241, + "step": 4821 + }, + { + "epoch": 1.2804408445093614, + "grad_norm": 1.1769422055863235, + "learning_rate": 6.286525483565373e-06, + "loss": 0.23292411863803864, + "step": 4822 + }, + { + "epoch": 1.2807064134909043, + "grad_norm": 1.1322856806053188, + "learning_rate": 6.282449042400831e-06, + "loss": 0.23809143900871277, + "step": 4823 + }, + { + "epoch": 1.2809719824724473, + "grad_norm": 1.0235534573008647, + "learning_rate": 6.278373317936269e-06, + "loss": 0.22593267261981964, + "step": 4824 + }, + { + "epoch": 1.2812375514539902, + "grad_norm": 1.2491300300411192, + "learning_rate": 6.274298310957439e-06, + "loss": 0.26024624705314636, + "step": 4825 + }, + { + "epoch": 1.2815031204355332, + "grad_norm": 1.138185007529017, + "learning_rate": 6.270224022249957e-06, + "loss": 0.22418126463890076, + "step": 4826 + }, + { + "epoch": 1.2817686894170761, + "grad_norm": 1.2374650134400174, + "learning_rate": 6.266150452599288e-06, + "loss": 0.26452577114105225, + "step": 4827 + }, + { + "epoch": 1.282034258398619, + "grad_norm": 1.2453587043668277, + "learning_rate": 6.262077602790779e-06, + "loss": 0.24412381649017334, + "step": 4828 + }, + { + "epoch": 1.282299827380162, + "grad_norm": 1.1670875672055734, + "learning_rate": 6.258005473609623e-06, + "loss": 0.22476118803024292, + "step": 4829 + }, + { + "epoch": 1.282565396361705, + "grad_norm": 1.1744502576491334, + "learning_rate": 6.25393406584088e-06, + "loss": 0.2208547294139862, + "step": 4830 + }, + { + "epoch": 1.282830965343248, + "grad_norm": 1.340282271944368, + "learning_rate": 6.249863380269467e-06, + "loss": 0.2903650999069214, + "step": 4831 + }, + { + "epoch": 1.2830965343247909, + "grad_norm": 1.2018727401561922, + "learning_rate": 6.245793417680168e-06, + "loss": 0.24413639307022095, + "step": 4832 + }, + { + "epoch": 1.2833621033063338, + "grad_norm": 1.162422850806728, + "learning_rate": 6.241724178857621e-06, + "loss": 0.2193944752216339, + "step": 4833 + }, + { + "epoch": 1.2836276722878768, + "grad_norm": 1.2159517583191957, + "learning_rate": 6.237655664586326e-06, + "loss": 0.22847513854503632, + "step": 4834 + }, + { + "epoch": 1.2838932412694197, + "grad_norm": 1.4211501406512423, + "learning_rate": 6.233587875650648e-06, + "loss": 0.269639253616333, + "step": 4835 + }, + { + "epoch": 1.2841588102509627, + "grad_norm": 1.3153478129856002, + "learning_rate": 6.229520812834801e-06, + "loss": 0.26329392194747925, + "step": 4836 + }, + { + "epoch": 1.2844243792325056, + "grad_norm": 1.0811891602166492, + "learning_rate": 6.225454476922877e-06, + "loss": 0.18800514936447144, + "step": 4837 + }, + { + "epoch": 1.2846899482140486, + "grad_norm": 1.2987987933289529, + "learning_rate": 6.2213888686988125e-06, + "loss": 0.2617965340614319, + "step": 4838 + }, + { + "epoch": 1.2849555171955915, + "grad_norm": 1.2029687476094635, + "learning_rate": 6.217323988946411e-06, + "loss": 0.22468717396259308, + "step": 4839 + }, + { + "epoch": 1.2852210861771345, + "grad_norm": 1.2126923104659393, + "learning_rate": 6.213259838449333e-06, + "loss": 0.22465646266937256, + "step": 4840 + }, + { + "epoch": 1.2854866551586774, + "grad_norm": 1.243457795287806, + "learning_rate": 6.209196417991096e-06, + "loss": 0.2655075490474701, + "step": 4841 + }, + { + "epoch": 1.2857522241402204, + "grad_norm": 1.2818071805394324, + "learning_rate": 6.205133728355081e-06, + "loss": 0.25313282012939453, + "step": 4842 + }, + { + "epoch": 1.2860177931217633, + "grad_norm": 1.2136879668034726, + "learning_rate": 6.201071770324527e-06, + "loss": 0.23176322877407074, + "step": 4843 + }, + { + "epoch": 1.2862833621033063, + "grad_norm": 1.3628911983979357, + "learning_rate": 6.197010544682531e-06, + "loss": 0.27396953105926514, + "step": 4844 + }, + { + "epoch": 1.2865489310848492, + "grad_norm": 1.2333432651370633, + "learning_rate": 6.192950052212046e-06, + "loss": 0.24966171383857727, + "step": 4845 + }, + { + "epoch": 1.2868145000663922, + "grad_norm": 1.184789059228899, + "learning_rate": 6.188890293695895e-06, + "loss": 0.23290866613388062, + "step": 4846 + }, + { + "epoch": 1.2870800690479351, + "grad_norm": 1.2080105834836115, + "learning_rate": 6.184831269916749e-06, + "loss": 0.2368975132703781, + "step": 4847 + }, + { + "epoch": 1.287345638029478, + "grad_norm": 1.35199057217418, + "learning_rate": 6.180772981657139e-06, + "loss": 0.25305312871932983, + "step": 4848 + }, + { + "epoch": 1.287611207011021, + "grad_norm": 1.1825950927599171, + "learning_rate": 6.176715429699452e-06, + "loss": 0.22752982378005981, + "step": 4849 + }, + { + "epoch": 1.287876775992564, + "grad_norm": 1.152582857494987, + "learning_rate": 6.1726586148259395e-06, + "loss": 0.22426503896713257, + "step": 4850 + }, + { + "epoch": 1.288142344974107, + "grad_norm": 1.2203273234703247, + "learning_rate": 6.168602537818706e-06, + "loss": 0.21261993050575256, + "step": 4851 + }, + { + "epoch": 1.28840791395565, + "grad_norm": 1.1907151660933317, + "learning_rate": 6.1645471994597185e-06, + "loss": 0.237461656332016, + "step": 4852 + }, + { + "epoch": 1.2886734829371929, + "grad_norm": 1.113120156932308, + "learning_rate": 6.160492600530794e-06, + "loss": 0.1926390826702118, + "step": 4853 + }, + { + "epoch": 1.2889390519187358, + "grad_norm": 1.6824005161064397, + "learning_rate": 6.156438741813608e-06, + "loss": 0.22673740983009338, + "step": 4854 + }, + { + "epoch": 1.289204620900279, + "grad_norm": 1.1453361708789405, + "learning_rate": 6.15238562408971e-06, + "loss": 0.22148582339286804, + "step": 4855 + }, + { + "epoch": 1.289470189881822, + "grad_norm": 1.3581323367394031, + "learning_rate": 6.148333248140483e-06, + "loss": 0.28319716453552246, + "step": 4856 + }, + { + "epoch": 1.289735758863365, + "grad_norm": 1.4367360633574449, + "learning_rate": 6.14428161474718e-06, + "loss": 0.23505647480487823, + "step": 4857 + }, + { + "epoch": 1.2900013278449078, + "grad_norm": 1.2052965186154045, + "learning_rate": 6.140230724690908e-06, + "loss": 0.24323523044586182, + "step": 4858 + }, + { + "epoch": 1.2902668968264508, + "grad_norm": 1.2357784405363281, + "learning_rate": 6.136180578752629e-06, + "loss": 0.22818386554718018, + "step": 4859 + }, + { + "epoch": 1.2905324658079937, + "grad_norm": 1.2670464740614045, + "learning_rate": 6.132131177713165e-06, + "loss": 0.24285198748111725, + "step": 4860 + }, + { + "epoch": 1.2907980347895367, + "grad_norm": 1.1369753370104339, + "learning_rate": 6.128082522353194e-06, + "loss": 0.24115213751792908, + "step": 4861 + }, + { + "epoch": 1.2910636037710796, + "grad_norm": 1.2213111344560537, + "learning_rate": 6.124034613453247e-06, + "loss": 0.21564510464668274, + "step": 4862 + }, + { + "epoch": 1.2913291727526226, + "grad_norm": 1.299973209896211, + "learning_rate": 6.119987451793711e-06, + "loss": 0.2329743504524231, + "step": 4863 + }, + { + "epoch": 1.2915947417341656, + "grad_norm": 1.2218786239106318, + "learning_rate": 6.115941038154835e-06, + "loss": 0.2161208689212799, + "step": 4864 + }, + { + "epoch": 1.2918603107157085, + "grad_norm": 1.2078035628631776, + "learning_rate": 6.111895373316721e-06, + "loss": 0.22765520215034485, + "step": 4865 + }, + { + "epoch": 1.2921258796972515, + "grad_norm": 1.2199257873933993, + "learning_rate": 6.107850458059322e-06, + "loss": 0.25506818294525146, + "step": 4866 + }, + { + "epoch": 1.2923914486787944, + "grad_norm": 1.2014544077782259, + "learning_rate": 6.1038062931624505e-06, + "loss": 0.22543852031230927, + "step": 4867 + }, + { + "epoch": 1.2926570176603374, + "grad_norm": 1.282222410309602, + "learning_rate": 6.099762879405776e-06, + "loss": 0.24295030534267426, + "step": 4868 + }, + { + "epoch": 1.2929225866418803, + "grad_norm": 1.2221545432256802, + "learning_rate": 6.095720217568819e-06, + "loss": 0.2385009229183197, + "step": 4869 + }, + { + "epoch": 1.2931881556234233, + "grad_norm": 1.119514297375773, + "learning_rate": 6.091678308430956e-06, + "loss": 0.21410472691059113, + "step": 4870 + }, + { + "epoch": 1.2934537246049662, + "grad_norm": 1.299309717988783, + "learning_rate": 6.087637152771422e-06, + "loss": 0.25934773683547974, + "step": 4871 + }, + { + "epoch": 1.2937192935865092, + "grad_norm": 1.1783576597419445, + "learning_rate": 6.0835967513693e-06, + "loss": 0.24584373831748962, + "step": 4872 + }, + { + "epoch": 1.2939848625680521, + "grad_norm": 1.3413866916188153, + "learning_rate": 6.079557105003537e-06, + "loss": 0.2403055876493454, + "step": 4873 + }, + { + "epoch": 1.294250431549595, + "grad_norm": 1.2348806886655737, + "learning_rate": 6.075518214452927e-06, + "loss": 0.23861736059188843, + "step": 4874 + }, + { + "epoch": 1.294516000531138, + "grad_norm": 1.2099712971645404, + "learning_rate": 6.071480080496119e-06, + "loss": 0.21356427669525146, + "step": 4875 + }, + { + "epoch": 1.294781569512681, + "grad_norm": 1.314183683224707, + "learning_rate": 6.067442703911621e-06, + "loss": 0.2835869789123535, + "step": 4876 + }, + { + "epoch": 1.295047138494224, + "grad_norm": 1.1868362719294436, + "learning_rate": 6.063406085477788e-06, + "loss": 0.24233242869377136, + "step": 4877 + }, + { + "epoch": 1.2953127074757669, + "grad_norm": 1.2596980829406919, + "learning_rate": 6.059370225972834e-06, + "loss": 0.24986369907855988, + "step": 4878 + }, + { + "epoch": 1.2955782764573098, + "grad_norm": 1.2583930460503605, + "learning_rate": 6.055335126174826e-06, + "loss": 0.2445756494998932, + "step": 4879 + }, + { + "epoch": 1.2958438454388528, + "grad_norm": 1.0635663336037695, + "learning_rate": 6.0513007868616825e-06, + "loss": 0.21331898868083954, + "step": 4880 + }, + { + "epoch": 1.2961094144203957, + "grad_norm": 1.1578193819974294, + "learning_rate": 6.047267208811174e-06, + "loss": 0.2782329320907593, + "step": 4881 + }, + { + "epoch": 1.2963749834019387, + "grad_norm": 2.326385436360766, + "learning_rate": 6.043234392800932e-06, + "loss": 0.20866765081882477, + "step": 4882 + }, + { + "epoch": 1.2966405523834816, + "grad_norm": 1.3211750202424803, + "learning_rate": 6.039202339608432e-06, + "loss": 0.2517815828323364, + "step": 4883 + }, + { + "epoch": 1.2969061213650246, + "grad_norm": 1.283845753322191, + "learning_rate": 6.03517105001101e-06, + "loss": 0.2617926597595215, + "step": 4884 + }, + { + "epoch": 1.2971716903465675, + "grad_norm": 1.3255504140080887, + "learning_rate": 6.0311405247858465e-06, + "loss": 0.24753305315971375, + "step": 4885 + }, + { + "epoch": 1.2974372593281105, + "grad_norm": 1.1805849927447047, + "learning_rate": 6.027110764709982e-06, + "loss": 0.19791719317436218, + "step": 4886 + }, + { + "epoch": 1.2977028283096534, + "grad_norm": 1.236398594932959, + "learning_rate": 6.023081770560307e-06, + "loss": 0.243608757853508, + "step": 4887 + }, + { + "epoch": 1.2979683972911964, + "grad_norm": 1.3652744342035896, + "learning_rate": 6.019053543113564e-06, + "loss": 0.20469853281974792, + "step": 4888 + }, + { + "epoch": 1.2982339662727393, + "grad_norm": 1.4682720215540639, + "learning_rate": 6.015026083146345e-06, + "loss": 0.25613903999328613, + "step": 4889 + }, + { + "epoch": 1.2984995352542823, + "grad_norm": 1.236223607561111, + "learning_rate": 6.010999391435097e-06, + "loss": 0.23349006474018097, + "step": 4890 + }, + { + "epoch": 1.2987651042358253, + "grad_norm": 1.1137410591057113, + "learning_rate": 6.006973468756124e-06, + "loss": 0.23646268248558044, + "step": 4891 + }, + { + "epoch": 1.2990306732173682, + "grad_norm": 1.2845979720118916, + "learning_rate": 6.002948315885572e-06, + "loss": 0.2371794581413269, + "step": 4892 + }, + { + "epoch": 1.2992962421989112, + "grad_norm": 1.1150236044260142, + "learning_rate": 5.998923933599443e-06, + "loss": 0.23791949450969696, + "step": 4893 + }, + { + "epoch": 1.299561811180454, + "grad_norm": 1.2865838186648229, + "learning_rate": 5.994900322673593e-06, + "loss": 0.26923009753227234, + "step": 4894 + }, + { + "epoch": 1.299827380161997, + "grad_norm": 1.2724647699376699, + "learning_rate": 5.990877483883723e-06, + "loss": 0.20164884626865387, + "step": 4895 + }, + { + "epoch": 1.30009294914354, + "grad_norm": 1.1263986142938482, + "learning_rate": 5.986855418005393e-06, + "loss": 0.22345462441444397, + "step": 4896 + }, + { + "epoch": 1.300358518125083, + "grad_norm": 1.2936789930425872, + "learning_rate": 5.982834125814007e-06, + "loss": 0.26678675413131714, + "step": 4897 + }, + { + "epoch": 1.300624087106626, + "grad_norm": 1.3112472329084983, + "learning_rate": 5.978813608084825e-06, + "loss": 0.24674496054649353, + "step": 4898 + }, + { + "epoch": 1.3008896560881689, + "grad_norm": 1.3746634467420622, + "learning_rate": 5.974793865592947e-06, + "loss": 0.2804900109767914, + "step": 4899 + }, + { + "epoch": 1.3011552250697118, + "grad_norm": 1.3113866221822363, + "learning_rate": 5.970774899113345e-06, + "loss": 0.2413155734539032, + "step": 4900 + }, + { + "epoch": 1.3014207940512548, + "grad_norm": 1.139036608300987, + "learning_rate": 5.96675670942082e-06, + "loss": 0.21217301487922668, + "step": 4901 + }, + { + "epoch": 1.3016863630327977, + "grad_norm": 1.2012277530250777, + "learning_rate": 5.962739297290035e-06, + "loss": 0.23362940549850464, + "step": 4902 + }, + { + "epoch": 1.3019519320143407, + "grad_norm": 1.251148135143295, + "learning_rate": 5.958722663495499e-06, + "loss": 0.2669242322444916, + "step": 4903 + }, + { + "epoch": 1.3022175009958836, + "grad_norm": 1.2365395348631665, + "learning_rate": 5.95470680881157e-06, + "loss": 0.2234608232975006, + "step": 4904 + }, + { + "epoch": 1.3024830699774266, + "grad_norm": 1.2441781101215288, + "learning_rate": 5.95069173401246e-06, + "loss": 0.25150394439697266, + "step": 4905 + }, + { + "epoch": 1.3027486389589695, + "grad_norm": 1.127228294882686, + "learning_rate": 5.9466774398722264e-06, + "loss": 0.2408430427312851, + "step": 4906 + }, + { + "epoch": 1.3030142079405125, + "grad_norm": 1.1200862415380408, + "learning_rate": 5.942663927164776e-06, + "loss": 0.2197013795375824, + "step": 4907 + }, + { + "epoch": 1.3032797769220554, + "grad_norm": 1.1474317141184802, + "learning_rate": 5.938651196663865e-06, + "loss": 0.2224964201450348, + "step": 4908 + }, + { + "epoch": 1.3035453459035984, + "grad_norm": 1.313380369558454, + "learning_rate": 5.934639249143108e-06, + "loss": 0.26466232538223267, + "step": 4909 + }, + { + "epoch": 1.3038109148851413, + "grad_norm": 1.2910852400248352, + "learning_rate": 5.930628085375958e-06, + "loss": 0.257996141910553, + "step": 4910 + }, + { + "epoch": 1.3040764838666843, + "grad_norm": 1.2056479933898356, + "learning_rate": 5.92661770613572e-06, + "loss": 0.21995162963867188, + "step": 4911 + }, + { + "epoch": 1.3043420528482272, + "grad_norm": 1.3003100511120855, + "learning_rate": 5.922608112195546e-06, + "loss": 0.26007258892059326, + "step": 4912 + }, + { + "epoch": 1.3046076218297702, + "grad_norm": 1.2951583817832037, + "learning_rate": 5.918599304328442e-06, + "loss": 0.25168827176094055, + "step": 4913 + }, + { + "epoch": 1.3048731908113131, + "grad_norm": 1.1932184000685677, + "learning_rate": 5.9145912833072535e-06, + "loss": 0.24686852097511292, + "step": 4914 + }, + { + "epoch": 1.305138759792856, + "grad_norm": 1.1951264683753895, + "learning_rate": 5.910584049904684e-06, + "loss": 0.247032031416893, + "step": 4915 + }, + { + "epoch": 1.305404328774399, + "grad_norm": 1.1517786776797445, + "learning_rate": 5.906577604893278e-06, + "loss": 0.21644674241542816, + "step": 4916 + }, + { + "epoch": 1.305669897755942, + "grad_norm": 1.3685662184124912, + "learning_rate": 5.9025719490454304e-06, + "loss": 0.28093478083610535, + "step": 4917 + }, + { + "epoch": 1.305935466737485, + "grad_norm": 1.2246452754262638, + "learning_rate": 5.898567083133389e-06, + "loss": 0.23731757700443268, + "step": 4918 + }, + { + "epoch": 1.306201035719028, + "grad_norm": 1.1125400405938466, + "learning_rate": 5.894563007929243e-06, + "loss": 0.20725491642951965, + "step": 4919 + }, + { + "epoch": 1.3064666047005709, + "grad_norm": 1.3186749566879576, + "learning_rate": 5.89055972420493e-06, + "loss": 0.2509433329105377, + "step": 4920 + }, + { + "epoch": 1.3067321736821138, + "grad_norm": 1.2793911736037649, + "learning_rate": 5.886557232732235e-06, + "loss": 0.2611580491065979, + "step": 4921 + }, + { + "epoch": 1.3069977426636568, + "grad_norm": 1.1754660821918204, + "learning_rate": 5.882555534282792e-06, + "loss": 0.20567595958709717, + "step": 4922 + }, + { + "epoch": 1.3072633116451997, + "grad_norm": 1.2179299933591687, + "learning_rate": 5.878554629628081e-06, + "loss": 0.22851137816905975, + "step": 4923 + }, + { + "epoch": 1.3075288806267427, + "grad_norm": 1.2283350051517878, + "learning_rate": 5.874554519539431e-06, + "loss": 0.24295902252197266, + "step": 4924 + }, + { + "epoch": 1.3077944496082856, + "grad_norm": 1.4565590371796837, + "learning_rate": 5.870555204788013e-06, + "loss": 0.29564642906188965, + "step": 4925 + }, + { + "epoch": 1.3080600185898288, + "grad_norm": 1.1906652754397118, + "learning_rate": 5.8665566861448465e-06, + "loss": 0.2399739921092987, + "step": 4926 + }, + { + "epoch": 1.3083255875713717, + "grad_norm": 1.2056826487968673, + "learning_rate": 5.862558964380806e-06, + "loss": 0.23882555961608887, + "step": 4927 + }, + { + "epoch": 1.3085911565529147, + "grad_norm": 1.2167231777259742, + "learning_rate": 5.858562040266599e-06, + "loss": 0.2510842978954315, + "step": 4928 + }, + { + "epoch": 1.3088567255344576, + "grad_norm": 1.3760419048772665, + "learning_rate": 5.854565914572787e-06, + "loss": 0.257358193397522, + "step": 4929 + }, + { + "epoch": 1.3091222945160006, + "grad_norm": 1.1144476904886809, + "learning_rate": 5.850570588069775e-06, + "loss": 0.23228219151496887, + "step": 4930 + }, + { + "epoch": 1.3093878634975435, + "grad_norm": 1.2711888334314898, + "learning_rate": 5.846576061527818e-06, + "loss": 0.2234456092119217, + "step": 4931 + }, + { + "epoch": 1.3096534324790865, + "grad_norm": 1.1978737759145446, + "learning_rate": 5.842582335717009e-06, + "loss": 0.2273438423871994, + "step": 4932 + }, + { + "epoch": 1.3099190014606295, + "grad_norm": 1.2382395020505186, + "learning_rate": 5.838589411407294e-06, + "loss": 0.2423306405544281, + "step": 4933 + }, + { + "epoch": 1.3101845704421724, + "grad_norm": 1.2388376015521172, + "learning_rate": 5.834597289368463e-06, + "loss": 0.266438364982605, + "step": 4934 + }, + { + "epoch": 1.3104501394237154, + "grad_norm": 1.2553012161793193, + "learning_rate": 5.830605970370142e-06, + "loss": 0.2469342052936554, + "step": 4935 + }, + { + "epoch": 1.3107157084052583, + "grad_norm": 1.2077087937137967, + "learning_rate": 5.8266154551818225e-06, + "loss": 0.2834509611129761, + "step": 4936 + }, + { + "epoch": 1.3109812773868013, + "grad_norm": 1.3037377411135151, + "learning_rate": 5.822625744572821e-06, + "loss": 0.2615162134170532, + "step": 4937 + }, + { + "epoch": 1.3112468463683442, + "grad_norm": 1.1529903033018742, + "learning_rate": 5.818636839312309e-06, + "loss": 0.2247931957244873, + "step": 4938 + }, + { + "epoch": 1.3115124153498872, + "grad_norm": 1.162136486746663, + "learning_rate": 5.814648740169299e-06, + "loss": 0.23759335279464722, + "step": 4939 + }, + { + "epoch": 1.3117779843314301, + "grad_norm": 1.2647326324758852, + "learning_rate": 5.8106614479126515e-06, + "loss": 0.23381784558296204, + "step": 4940 + }, + { + "epoch": 1.312043553312973, + "grad_norm": 1.2132087226777075, + "learning_rate": 5.8066749633110675e-06, + "loss": 0.2671264410018921, + "step": 4941 + }, + { + "epoch": 1.312309122294516, + "grad_norm": 1.09997395594631, + "learning_rate": 5.8026892871330944e-06, + "loss": 0.226065531373024, + "step": 4942 + }, + { + "epoch": 1.312574691276059, + "grad_norm": 1.3057172624305828, + "learning_rate": 5.798704420147124e-06, + "loss": 0.2654735743999481, + "step": 4943 + }, + { + "epoch": 1.312840260257602, + "grad_norm": 1.2538641402604982, + "learning_rate": 5.794720363121389e-06, + "loss": 0.23757833242416382, + "step": 4944 + }, + { + "epoch": 1.3131058292391449, + "grad_norm": 1.2131030914710175, + "learning_rate": 5.790737116823975e-06, + "loss": 0.2561591565608978, + "step": 4945 + }, + { + "epoch": 1.3133713982206878, + "grad_norm": 1.1698592689009908, + "learning_rate": 5.7867546820227995e-06, + "loss": 0.22105304896831512, + "step": 4946 + }, + { + "epoch": 1.3136369672022308, + "grad_norm": 1.190016500907537, + "learning_rate": 5.7827730594856325e-06, + "loss": 0.2485857605934143, + "step": 4947 + }, + { + "epoch": 1.3139025361837737, + "grad_norm": 1.2087719424455774, + "learning_rate": 5.7787922499800804e-06, + "loss": 0.21256676316261292, + "step": 4948 + }, + { + "epoch": 1.3141681051653167, + "grad_norm": 1.2561271472593831, + "learning_rate": 5.774812254273604e-06, + "loss": 0.2700715661048889, + "step": 4949 + }, + { + "epoch": 1.3144336741468596, + "grad_norm": 1.072264118800501, + "learning_rate": 5.770833073133488e-06, + "loss": 0.22239381074905396, + "step": 4950 + }, + { + "epoch": 1.3146992431284026, + "grad_norm": 1.2811464089131772, + "learning_rate": 5.766854707326878e-06, + "loss": 0.22973249852657318, + "step": 4951 + }, + { + "epoch": 1.3149648121099455, + "grad_norm": 1.3904264621036453, + "learning_rate": 5.762877157620751e-06, + "loss": 0.27923673391342163, + "step": 4952 + }, + { + "epoch": 1.3152303810914885, + "grad_norm": 1.1321859486950596, + "learning_rate": 5.758900424781939e-06, + "loss": 0.23142218589782715, + "step": 4953 + }, + { + "epoch": 1.3154959500730314, + "grad_norm": 1.2732500147617782, + "learning_rate": 5.754924509577107e-06, + "loss": 0.23697996139526367, + "step": 4954 + }, + { + "epoch": 1.3157615190545744, + "grad_norm": 1.2838523265227373, + "learning_rate": 5.750949412772764e-06, + "loss": 0.27600961923599243, + "step": 4955 + }, + { + "epoch": 1.3160270880361173, + "grad_norm": 1.1644607269636458, + "learning_rate": 5.74697513513526e-06, + "loss": 0.2300705760717392, + "step": 4956 + }, + { + "epoch": 1.3162926570176603, + "grad_norm": 1.2927833273456342, + "learning_rate": 5.743001677430791e-06, + "loss": 0.2771111726760864, + "step": 4957 + }, + { + "epoch": 1.3165582259992032, + "grad_norm": 1.2582954956741819, + "learning_rate": 5.739029040425391e-06, + "loss": 0.2195657342672348, + "step": 4958 + }, + { + "epoch": 1.3168237949807462, + "grad_norm": 1.3450534906440017, + "learning_rate": 5.735057224884939e-06, + "loss": 0.2877159118652344, + "step": 4959 + }, + { + "epoch": 1.3170893639622892, + "grad_norm": 1.2211564124942835, + "learning_rate": 5.731086231575154e-06, + "loss": 0.264115571975708, + "step": 4960 + }, + { + "epoch": 1.317354932943832, + "grad_norm": 1.1286607753384608, + "learning_rate": 5.727116061261593e-06, + "loss": 0.22574637830257416, + "step": 4961 + }, + { + "epoch": 1.317620501925375, + "grad_norm": 1.3177978069758023, + "learning_rate": 5.723146714709664e-06, + "loss": 0.26063698530197144, + "step": 4962 + }, + { + "epoch": 1.317886070906918, + "grad_norm": 1.2211473527893268, + "learning_rate": 5.719178192684611e-06, + "loss": 0.26272428035736084, + "step": 4963 + }, + { + "epoch": 1.318151639888461, + "grad_norm": 1.257373941755789, + "learning_rate": 5.715210495951513e-06, + "loss": 0.27188578248023987, + "step": 4964 + }, + { + "epoch": 1.318417208870004, + "grad_norm": 1.2786927551317604, + "learning_rate": 5.711243625275296e-06, + "loss": 0.26374363899230957, + "step": 4965 + }, + { + "epoch": 1.3186827778515469, + "grad_norm": 1.2469422291735242, + "learning_rate": 5.7072775814207275e-06, + "loss": 0.24819093942642212, + "step": 4966 + }, + { + "epoch": 1.3189483468330898, + "grad_norm": 1.3834225319345155, + "learning_rate": 5.703312365152412e-06, + "loss": 0.24387019872665405, + "step": 4967 + }, + { + "epoch": 1.319213915814633, + "grad_norm": 1.2919715806670669, + "learning_rate": 5.699347977234799e-06, + "loss": 0.2198091745376587, + "step": 4968 + }, + { + "epoch": 1.319479484796176, + "grad_norm": 1.3500197578827224, + "learning_rate": 5.695384418432174e-06, + "loss": 0.24349649250507355, + "step": 4969 + }, + { + "epoch": 1.319745053777719, + "grad_norm": 1.238323956307032, + "learning_rate": 5.691421689508661e-06, + "loss": 0.2330506294965744, + "step": 4970 + }, + { + "epoch": 1.3200106227592618, + "grad_norm": 1.2015417123740977, + "learning_rate": 5.687459791228234e-06, + "loss": 0.22821848094463348, + "step": 4971 + }, + { + "epoch": 1.3202761917408048, + "grad_norm": 1.1813366864368284, + "learning_rate": 5.683498724354699e-06, + "loss": 0.2342798113822937, + "step": 4972 + }, + { + "epoch": 1.3205417607223477, + "grad_norm": 1.0659168750954966, + "learning_rate": 5.679538489651702e-06, + "loss": 0.19689922034740448, + "step": 4973 + }, + { + "epoch": 1.3208073297038907, + "grad_norm": 1.1808385090527131, + "learning_rate": 5.675579087882727e-06, + "loss": 0.23910056054592133, + "step": 4974 + }, + { + "epoch": 1.3210728986854336, + "grad_norm": 1.381638431012013, + "learning_rate": 5.671620519811105e-06, + "loss": 0.25725993514060974, + "step": 4975 + }, + { + "epoch": 1.3213384676669766, + "grad_norm": 1.3528699347449313, + "learning_rate": 5.667662786199997e-06, + "loss": 0.3030434250831604, + "step": 4976 + }, + { + "epoch": 1.3216040366485196, + "grad_norm": 1.1182092617897728, + "learning_rate": 5.6637058878124075e-06, + "loss": 0.223737433552742, + "step": 4977 + }, + { + "epoch": 1.3218696056300625, + "grad_norm": 1.07766141822832, + "learning_rate": 5.659749825411183e-06, + "loss": 0.21480265259742737, + "step": 4978 + }, + { + "epoch": 1.3221351746116055, + "grad_norm": 1.2398269968997129, + "learning_rate": 5.655794599759001e-06, + "loss": 0.23288744688034058, + "step": 4979 + }, + { + "epoch": 1.3224007435931484, + "grad_norm": 1.3344080514533678, + "learning_rate": 5.651840211618387e-06, + "loss": 0.23701068758964539, + "step": 4980 + }, + { + "epoch": 1.3226663125746914, + "grad_norm": 1.2102834630940547, + "learning_rate": 5.647886661751698e-06, + "loss": 0.22164157032966614, + "step": 4981 + }, + { + "epoch": 1.3229318815562343, + "grad_norm": 1.2096538262244674, + "learning_rate": 5.643933950921132e-06, + "loss": 0.23426607251167297, + "step": 4982 + }, + { + "epoch": 1.3231974505377773, + "grad_norm": 1.1880047089826309, + "learning_rate": 5.6399820798887266e-06, + "loss": 0.2567834258079529, + "step": 4983 + }, + { + "epoch": 1.3234630195193202, + "grad_norm": 1.3013809826248692, + "learning_rate": 5.6360310494163525e-06, + "loss": 0.2713038921356201, + "step": 4984 + }, + { + "epoch": 1.3237285885008632, + "grad_norm": 1.2908080991459006, + "learning_rate": 5.632080860265725e-06, + "loss": 0.2548249661922455, + "step": 4985 + }, + { + "epoch": 1.3239941574824061, + "grad_norm": 1.3471244082770852, + "learning_rate": 5.628131513198392e-06, + "loss": 0.2442832589149475, + "step": 4986 + }, + { + "epoch": 1.324259726463949, + "grad_norm": 1.3063670062134878, + "learning_rate": 5.6241830089757435e-06, + "loss": 0.24654853343963623, + "step": 4987 + }, + { + "epoch": 1.324525295445492, + "grad_norm": 1.2792033582455469, + "learning_rate": 5.620235348358997e-06, + "loss": 0.2802797853946686, + "step": 4988 + }, + { + "epoch": 1.324790864427035, + "grad_norm": 1.0588655062771883, + "learning_rate": 5.616288532109225e-06, + "loss": 0.18801404535770416, + "step": 4989 + }, + { + "epoch": 1.325056433408578, + "grad_norm": 1.2235746865490262, + "learning_rate": 5.6123425609873235e-06, + "loss": 0.2685382068157196, + "step": 4990 + }, + { + "epoch": 1.3253220023901209, + "grad_norm": 1.1873888072876837, + "learning_rate": 5.608397435754029e-06, + "loss": 0.23479774594306946, + "step": 4991 + }, + { + "epoch": 1.3255875713716638, + "grad_norm": 1.2164455244711625, + "learning_rate": 5.604453157169914e-06, + "loss": 0.24198031425476074, + "step": 4992 + }, + { + "epoch": 1.3258531403532068, + "grad_norm": 1.3448749532595476, + "learning_rate": 5.60050972599539e-06, + "loss": 0.25523462891578674, + "step": 4993 + }, + { + "epoch": 1.3261187093347497, + "grad_norm": 1.1695382845281797, + "learning_rate": 5.596567142990703e-06, + "loss": 0.23196743428707123, + "step": 4994 + }, + { + "epoch": 1.3263842783162927, + "grad_norm": 1.3145586744837223, + "learning_rate": 5.592625408915939e-06, + "loss": 0.29365748167037964, + "step": 4995 + }, + { + "epoch": 1.3266498472978356, + "grad_norm": 1.1946134760289593, + "learning_rate": 5.588684524531014e-06, + "loss": 0.24509185552597046, + "step": 4996 + }, + { + "epoch": 1.3269154162793786, + "grad_norm": 1.3358300509723116, + "learning_rate": 5.584744490595687e-06, + "loss": 0.27032390236854553, + "step": 4997 + }, + { + "epoch": 1.3271809852609215, + "grad_norm": 1.1645416268641489, + "learning_rate": 5.580805307869549e-06, + "loss": 0.24401508271694183, + "step": 4998 + }, + { + "epoch": 1.3274465542424645, + "grad_norm": 1.1506901325018217, + "learning_rate": 5.576866977112028e-06, + "loss": 0.2216658741235733, + "step": 4999 + }, + { + "epoch": 1.3277121232240074, + "grad_norm": 1.1830944265124126, + "learning_rate": 5.5729294990823875e-06, + "loss": 0.24545373022556305, + "step": 5000 + }, + { + "epoch": 1.3279776922055504, + "grad_norm": 1.377548009409137, + "learning_rate": 5.568992874539728e-06, + "loss": 0.260816752910614, + "step": 5001 + }, + { + "epoch": 1.3282432611870933, + "grad_norm": 1.1392730403811622, + "learning_rate": 5.565057104242984e-06, + "loss": 0.1850551962852478, + "step": 5002 + }, + { + "epoch": 1.3285088301686363, + "grad_norm": 2.1232949408605624, + "learning_rate": 5.561122188950923e-06, + "loss": 0.26854407787323, + "step": 5003 + }, + { + "epoch": 1.3287743991501793, + "grad_norm": 1.1591208934359583, + "learning_rate": 5.557188129422153e-06, + "loss": 0.24294906854629517, + "step": 5004 + }, + { + "epoch": 1.3290399681317222, + "grad_norm": 1.1880501452095942, + "learning_rate": 5.553254926415114e-06, + "loss": 0.2533603310585022, + "step": 5005 + }, + { + "epoch": 1.3293055371132652, + "grad_norm": 1.1756183262516449, + "learning_rate": 5.549322580688077e-06, + "loss": 0.2082313448190689, + "step": 5006 + }, + { + "epoch": 1.329571106094808, + "grad_norm": 1.1602290025540025, + "learning_rate": 5.545391092999158e-06, + "loss": 0.24265842139720917, + "step": 5007 + }, + { + "epoch": 1.329836675076351, + "grad_norm": 1.2321490774961563, + "learning_rate": 5.541460464106301e-06, + "loss": 0.2483578324317932, + "step": 5008 + }, + { + "epoch": 1.330102244057894, + "grad_norm": 1.2798509363454456, + "learning_rate": 5.537530694767281e-06, + "loss": 0.2769540548324585, + "step": 5009 + }, + { + "epoch": 1.330367813039437, + "grad_norm": 1.1781048091325885, + "learning_rate": 5.533601785739714e-06, + "loss": 0.2132025957107544, + "step": 5010 + }, + { + "epoch": 1.33063338202098, + "grad_norm": 1.2726887496075767, + "learning_rate": 5.529673737781047e-06, + "loss": 0.25223806500434875, + "step": 5011 + }, + { + "epoch": 1.3308989510025229, + "grad_norm": 1.13329365262538, + "learning_rate": 5.52574655164856e-06, + "loss": 0.22631296515464783, + "step": 5012 + }, + { + "epoch": 1.3311645199840658, + "grad_norm": 1.1821255064699665, + "learning_rate": 5.5218202280993725e-06, + "loss": 0.23756693303585052, + "step": 5013 + }, + { + "epoch": 1.3314300889656088, + "grad_norm": 1.2775335630974591, + "learning_rate": 5.517894767890427e-06, + "loss": 0.24746376276016235, + "step": 5014 + }, + { + "epoch": 1.3316956579471517, + "grad_norm": 1.105165815318004, + "learning_rate": 5.513970171778504e-06, + "loss": 0.21463070809841156, + "step": 5015 + }, + { + "epoch": 1.3319612269286947, + "grad_norm": 1.2090979668871258, + "learning_rate": 5.510046440520228e-06, + "loss": 0.21256107091903687, + "step": 5016 + }, + { + "epoch": 1.3322267959102376, + "grad_norm": 1.1963664670778913, + "learning_rate": 5.506123574872044e-06, + "loss": 0.25800254940986633, + "step": 5017 + }, + { + "epoch": 1.3324923648917806, + "grad_norm": 1.2726257558813519, + "learning_rate": 5.502201575590236e-06, + "loss": 0.2421891689300537, + "step": 5018 + }, + { + "epoch": 1.3327579338733235, + "grad_norm": 1.3181283061442692, + "learning_rate": 5.498280443430917e-06, + "loss": 0.24375903606414795, + "step": 5019 + }, + { + "epoch": 1.3330235028548665, + "grad_norm": 1.2419078132332353, + "learning_rate": 5.494360179150033e-06, + "loss": 0.22173303365707397, + "step": 5020 + }, + { + "epoch": 1.3332890718364094, + "grad_norm": 1.1754676882141941, + "learning_rate": 5.49044078350337e-06, + "loss": 0.24005022644996643, + "step": 5021 + }, + { + "epoch": 1.3335546408179524, + "grad_norm": 1.194558748352182, + "learning_rate": 5.486522257246538e-06, + "loss": 0.2600201964378357, + "step": 5022 + }, + { + "epoch": 1.3338202097994953, + "grad_norm": 1.2112657273591712, + "learning_rate": 5.482604601134984e-06, + "loss": 0.22889836132526398, + "step": 5023 + }, + { + "epoch": 1.3340857787810383, + "grad_norm": 1.151722502872684, + "learning_rate": 5.478687815923981e-06, + "loss": 0.25045812129974365, + "step": 5024 + }, + { + "epoch": 1.3343513477625812, + "grad_norm": 1.2499612320902753, + "learning_rate": 5.474771902368646e-06, + "loss": 0.24649837613105774, + "step": 5025 + }, + { + "epoch": 1.3346169167441242, + "grad_norm": 1.1975824340507155, + "learning_rate": 5.470856861223919e-06, + "loss": 0.23994389176368713, + "step": 5026 + }, + { + "epoch": 1.3348824857256671, + "grad_norm": 1.2488470912807048, + "learning_rate": 5.466942693244572e-06, + "loss": 0.24381600320339203, + "step": 5027 + }, + { + "epoch": 1.33514805470721, + "grad_norm": 1.1770895947351019, + "learning_rate": 5.463029399185217e-06, + "loss": 0.22110486030578613, + "step": 5028 + }, + { + "epoch": 1.335413623688753, + "grad_norm": 1.2878634690011452, + "learning_rate": 5.459116979800281e-06, + "loss": 0.25733259320259094, + "step": 5029 + }, + { + "epoch": 1.335679192670296, + "grad_norm": 1.2598918710105835, + "learning_rate": 5.4552054358440355e-06, + "loss": 0.22853803634643555, + "step": 5030 + }, + { + "epoch": 1.335944761651839, + "grad_norm": 1.3118793520277159, + "learning_rate": 5.451294768070581e-06, + "loss": 0.27503639459609985, + "step": 5031 + }, + { + "epoch": 1.336210330633382, + "grad_norm": 1.2721314541046291, + "learning_rate": 5.447384977233849e-06, + "loss": 0.27931997179985046, + "step": 5032 + }, + { + "epoch": 1.3364758996149249, + "grad_norm": 1.2287817779118972, + "learning_rate": 5.443476064087596e-06, + "loss": 0.2477954626083374, + "step": 5033 + }, + { + "epoch": 1.3367414685964678, + "grad_norm": 1.2204002745504476, + "learning_rate": 5.439568029385422e-06, + "loss": 0.2195623219013214, + "step": 5034 + }, + { + "epoch": 1.3370070375780108, + "grad_norm": 1.230653492520276, + "learning_rate": 5.435660873880747e-06, + "loss": 0.22160238027572632, + "step": 5035 + }, + { + "epoch": 1.3372726065595537, + "grad_norm": 1.6764380815480615, + "learning_rate": 5.4317545983268235e-06, + "loss": 0.24107405543327332, + "step": 5036 + }, + { + "epoch": 1.3375381755410967, + "grad_norm": 1.2985203082435115, + "learning_rate": 5.427849203476738e-06, + "loss": 0.2480086386203766, + "step": 5037 + }, + { + "epoch": 1.3378037445226398, + "grad_norm": 1.2654518356324462, + "learning_rate": 5.4239446900834005e-06, + "loss": 0.22476691007614136, + "step": 5038 + }, + { + "epoch": 1.3380693135041828, + "grad_norm": 1.217906592075979, + "learning_rate": 5.420041058899559e-06, + "loss": 0.23685473203659058, + "step": 5039 + }, + { + "epoch": 1.3383348824857257, + "grad_norm": 1.215790635675812, + "learning_rate": 5.416138310677784e-06, + "loss": 0.27753746509552, + "step": 5040 + }, + { + "epoch": 1.3386004514672687, + "grad_norm": 1.2682075315501737, + "learning_rate": 5.412236446170482e-06, + "loss": 0.22446027398109436, + "step": 5041 + }, + { + "epoch": 1.3388660204488116, + "grad_norm": 1.2214424011593596, + "learning_rate": 5.4083354661298816e-06, + "loss": 0.2535285949707031, + "step": 5042 + }, + { + "epoch": 1.3391315894303546, + "grad_norm": 1.2982364680013232, + "learning_rate": 5.4044353713080565e-06, + "loss": 0.2412964254617691, + "step": 5043 + }, + { + "epoch": 1.3393971584118975, + "grad_norm": 1.3092797704576777, + "learning_rate": 5.4005361624568895e-06, + "loss": 0.23863038420677185, + "step": 5044 + }, + { + "epoch": 1.3396627273934405, + "grad_norm": 1.159506578977356, + "learning_rate": 5.396637840328105e-06, + "loss": 0.22741727530956268, + "step": 5045 + }, + { + "epoch": 1.3399282963749835, + "grad_norm": 1.285452356277395, + "learning_rate": 5.392740405673251e-06, + "loss": 0.2497379630804062, + "step": 5046 + }, + { + "epoch": 1.3401938653565264, + "grad_norm": 1.2401289485061215, + "learning_rate": 5.388843859243712e-06, + "loss": 0.19558298587799072, + "step": 5047 + }, + { + "epoch": 1.3404594343380694, + "grad_norm": 1.2074615239750155, + "learning_rate": 5.3849482017906914e-06, + "loss": 0.2266748994588852, + "step": 5048 + }, + { + "epoch": 1.3407250033196123, + "grad_norm": 1.2657162316868396, + "learning_rate": 5.381053434065229e-06, + "loss": 0.2410028576850891, + "step": 5049 + }, + { + "epoch": 1.3409905723011553, + "grad_norm": 1.301692886719208, + "learning_rate": 5.37715955681819e-06, + "loss": 0.23965512216091156, + "step": 5050 + }, + { + "epoch": 1.3412561412826982, + "grad_norm": 1.1756365557449155, + "learning_rate": 5.373266570800262e-06, + "loss": 0.22440138459205627, + "step": 5051 + }, + { + "epoch": 1.3415217102642412, + "grad_norm": 1.2562473271519534, + "learning_rate": 5.369374476761975e-06, + "loss": 0.2509710192680359, + "step": 5052 + }, + { + "epoch": 1.3417872792457841, + "grad_norm": 1.3381440207626536, + "learning_rate": 5.365483275453677e-06, + "loss": 0.26555800437927246, + "step": 5053 + }, + { + "epoch": 1.342052848227327, + "grad_norm": 1.2240809600669689, + "learning_rate": 5.361592967625544e-06, + "loss": 0.23089733719825745, + "step": 5054 + }, + { + "epoch": 1.34231841720887, + "grad_norm": 1.1178692263054482, + "learning_rate": 5.357703554027582e-06, + "loss": 0.2040700763463974, + "step": 5055 + }, + { + "epoch": 1.342583986190413, + "grad_norm": 1.309704975193781, + "learning_rate": 5.353815035409624e-06, + "loss": 0.23539039492607117, + "step": 5056 + }, + { + "epoch": 1.342849555171956, + "grad_norm": 1.7065922202358847, + "learning_rate": 5.3499274125213294e-06, + "loss": 0.2190464437007904, + "step": 5057 + }, + { + "epoch": 1.3431151241534989, + "grad_norm": 1.1478595499251703, + "learning_rate": 5.346040686112189e-06, + "loss": 0.21557429432868958, + "step": 5058 + }, + { + "epoch": 1.3433806931350418, + "grad_norm": 1.1934269644730748, + "learning_rate": 5.342154856931515e-06, + "loss": 0.24398267269134521, + "step": 5059 + }, + { + "epoch": 1.3436462621165848, + "grad_norm": 1.1089059625649784, + "learning_rate": 5.338269925728451e-06, + "loss": 0.21652038395404816, + "step": 5060 + }, + { + "epoch": 1.3439118310981277, + "grad_norm": 1.1937531358219302, + "learning_rate": 5.334385893251966e-06, + "loss": 0.2031325101852417, + "step": 5061 + }, + { + "epoch": 1.3441774000796707, + "grad_norm": 1.1621991357090053, + "learning_rate": 5.330502760250853e-06, + "loss": 0.2484835982322693, + "step": 5062 + }, + { + "epoch": 1.3444429690612136, + "grad_norm": 1.2657742595884374, + "learning_rate": 5.326620527473737e-06, + "loss": 0.23698699474334717, + "step": 5063 + }, + { + "epoch": 1.3447085380427566, + "grad_norm": 1.2000433743668328, + "learning_rate": 5.322739195669065e-06, + "loss": 0.23928484320640564, + "step": 5064 + }, + { + "epoch": 1.3449741070242995, + "grad_norm": 1.1828146199314795, + "learning_rate": 5.318858765585115e-06, + "loss": 0.22679512202739716, + "step": 5065 + }, + { + "epoch": 1.3452396760058425, + "grad_norm": 1.2334385564497414, + "learning_rate": 5.314979237969984e-06, + "loss": 0.2115025818347931, + "step": 5066 + }, + { + "epoch": 1.3455052449873854, + "grad_norm": 1.261129899382787, + "learning_rate": 5.311100613571603e-06, + "loss": 0.2441834807395935, + "step": 5067 + }, + { + "epoch": 1.3457708139689284, + "grad_norm": 1.2722125718860966, + "learning_rate": 5.307222893137722e-06, + "loss": 0.2549205720424652, + "step": 5068 + }, + { + "epoch": 1.3460363829504713, + "grad_norm": 1.179054242584843, + "learning_rate": 5.3033460774159185e-06, + "loss": 0.24652990698814392, + "step": 5069 + }, + { + "epoch": 1.3463019519320143, + "grad_norm": 1.2062419936470874, + "learning_rate": 5.299470167153602e-06, + "loss": 0.2403775006532669, + "step": 5070 + }, + { + "epoch": 1.3465675209135572, + "grad_norm": 1.1208895570259512, + "learning_rate": 5.295595163097999e-06, + "loss": 0.2215663194656372, + "step": 5071 + }, + { + "epoch": 1.3468330898951002, + "grad_norm": 1.2914937229567889, + "learning_rate": 5.291721065996167e-06, + "loss": 0.2567424774169922, + "step": 5072 + }, + { + "epoch": 1.3470986588766432, + "grad_norm": 1.0608079556396839, + "learning_rate": 5.287847876594984e-06, + "loss": 0.21162359416484833, + "step": 5073 + }, + { + "epoch": 1.347364227858186, + "grad_norm": 1.221049341797181, + "learning_rate": 5.283975595641155e-06, + "loss": 0.21851085126399994, + "step": 5074 + }, + { + "epoch": 1.347629796839729, + "grad_norm": 1.2935501467753354, + "learning_rate": 5.280104223881212e-06, + "loss": 0.2491171509027481, + "step": 5075 + }, + { + "epoch": 1.347895365821272, + "grad_norm": 1.2921255335421646, + "learning_rate": 5.276233762061507e-06, + "loss": 0.22467780113220215, + "step": 5076 + }, + { + "epoch": 1.348160934802815, + "grad_norm": 1.159790816626821, + "learning_rate": 5.272364210928223e-06, + "loss": 0.24531611800193787, + "step": 5077 + }, + { + "epoch": 1.348426503784358, + "grad_norm": 1.2178282841242851, + "learning_rate": 5.268495571227361e-06, + "loss": 0.2582520544528961, + "step": 5078 + }, + { + "epoch": 1.3486920727659009, + "grad_norm": 1.2175282778251775, + "learning_rate": 5.264627843704749e-06, + "loss": 0.21180811524391174, + "step": 5079 + }, + { + "epoch": 1.348957641747444, + "grad_norm": 1.2942378328530906, + "learning_rate": 5.2607610291060406e-06, + "loss": 0.27026671171188354, + "step": 5080 + }, + { + "epoch": 1.349223210728987, + "grad_norm": 1.1721525183169563, + "learning_rate": 5.256895128176712e-06, + "loss": 0.22954419255256653, + "step": 5081 + }, + { + "epoch": 1.34948877971053, + "grad_norm": 1.3561853541918854, + "learning_rate": 5.253030141662063e-06, + "loss": 0.24064484238624573, + "step": 5082 + }, + { + "epoch": 1.349754348692073, + "grad_norm": 1.1245550279116328, + "learning_rate": 5.249166070307218e-06, + "loss": 0.1981196105480194, + "step": 5083 + }, + { + "epoch": 1.3500199176736158, + "grad_norm": 1.0881909699390468, + "learning_rate": 5.2453029148571226e-06, + "loss": 0.19882233440876007, + "step": 5084 + }, + { + "epoch": 1.3502854866551588, + "grad_norm": 1.2123536275051694, + "learning_rate": 5.24144067605655e-06, + "loss": 0.2409907579421997, + "step": 5085 + }, + { + "epoch": 1.3505510556367017, + "grad_norm": 1.2197874501412473, + "learning_rate": 5.237579354650092e-06, + "loss": 0.2205093652009964, + "step": 5086 + }, + { + "epoch": 1.3508166246182447, + "grad_norm": 1.4716074796051495, + "learning_rate": 5.233718951382163e-06, + "loss": 0.2283058911561966, + "step": 5087 + }, + { + "epoch": 1.3510821935997877, + "grad_norm": 1.2561007307780203, + "learning_rate": 5.229859466997012e-06, + "loss": 0.25584474205970764, + "step": 5088 + }, + { + "epoch": 1.3513477625813306, + "grad_norm": 1.1491167817661179, + "learning_rate": 5.226000902238696e-06, + "loss": 0.22516845166683197, + "step": 5089 + }, + { + "epoch": 1.3516133315628736, + "grad_norm": 1.2604818786719383, + "learning_rate": 5.222143257851102e-06, + "loss": 0.23440764844417572, + "step": 5090 + }, + { + "epoch": 1.3518789005444165, + "grad_norm": 1.2156754572685655, + "learning_rate": 5.218286534577938e-06, + "loss": 0.25858962535858154, + "step": 5091 + }, + { + "epoch": 1.3521444695259595, + "grad_norm": 1.1425154357949754, + "learning_rate": 5.214430733162736e-06, + "loss": 0.20676326751708984, + "step": 5092 + }, + { + "epoch": 1.3524100385075024, + "grad_norm": 1.1266241214136956, + "learning_rate": 5.210575854348853e-06, + "loss": 0.21892425417900085, + "step": 5093 + }, + { + "epoch": 1.3526756074890454, + "grad_norm": 1.2379350388596377, + "learning_rate": 5.206721898879454e-06, + "loss": 0.2538335919380188, + "step": 5094 + }, + { + "epoch": 1.3529411764705883, + "grad_norm": 1.2059035716196298, + "learning_rate": 5.202868867497542e-06, + "loss": 0.24750448763370514, + "step": 5095 + }, + { + "epoch": 1.3532067454521313, + "grad_norm": 1.2602608504342458, + "learning_rate": 5.199016760945931e-06, + "loss": 0.2569364011287689, + "step": 5096 + }, + { + "epoch": 1.3534723144336742, + "grad_norm": 0.9860855220263709, + "learning_rate": 5.19516557996727e-06, + "loss": 0.16788914799690247, + "step": 5097 + }, + { + "epoch": 1.3537378834152172, + "grad_norm": 1.0020852845957948, + "learning_rate": 5.191315325304018e-06, + "loss": 0.19006651639938354, + "step": 5098 + }, + { + "epoch": 1.3540034523967601, + "grad_norm": 1.187896658740898, + "learning_rate": 5.1874659976984575e-06, + "loss": 0.23474551737308502, + "step": 5099 + }, + { + "epoch": 1.354269021378303, + "grad_norm": 1.2829971661643687, + "learning_rate": 5.183617597892694e-06, + "loss": 0.26601099967956543, + "step": 5100 + }, + { + "epoch": 1.354534590359846, + "grad_norm": 1.1758855450162613, + "learning_rate": 5.179770126628654e-06, + "loss": 0.24207550287246704, + "step": 5101 + }, + { + "epoch": 1.354800159341389, + "grad_norm": 1.2535446057143411, + "learning_rate": 5.175923584648083e-06, + "loss": 0.2538307309150696, + "step": 5102 + }, + { + "epoch": 1.355065728322932, + "grad_norm": 1.1865818667829109, + "learning_rate": 5.172077972692553e-06, + "loss": 0.23073242604732513, + "step": 5103 + }, + { + "epoch": 1.3553312973044749, + "grad_norm": 1.348848385270533, + "learning_rate": 5.168233291503448e-06, + "loss": 0.2634595036506653, + "step": 5104 + }, + { + "epoch": 1.3555968662860178, + "grad_norm": 1.225057907199874, + "learning_rate": 5.1643895418219744e-06, + "loss": 0.23282350599765778, + "step": 5105 + }, + { + "epoch": 1.3558624352675608, + "grad_norm": 1.333152685269679, + "learning_rate": 5.160546724389172e-06, + "loss": 0.2543700933456421, + "step": 5106 + }, + { + "epoch": 1.3561280042491037, + "grad_norm": 1.1449256417555271, + "learning_rate": 5.1567048399458855e-06, + "loss": 0.2005772739648819, + "step": 5107 + }, + { + "epoch": 1.3563935732306467, + "grad_norm": 1.2429630346358373, + "learning_rate": 5.152863889232787e-06, + "loss": 0.2367073893547058, + "step": 5108 + }, + { + "epoch": 1.3566591422121896, + "grad_norm": 1.2839253544945022, + "learning_rate": 5.14902387299036e-06, + "loss": 0.25600770115852356, + "step": 5109 + }, + { + "epoch": 1.3569247111937326, + "grad_norm": 1.198566513294344, + "learning_rate": 5.145184791958918e-06, + "loss": 0.21678754687309265, + "step": 5110 + }, + { + "epoch": 1.3571902801752755, + "grad_norm": 1.3894724787206996, + "learning_rate": 5.141346646878591e-06, + "loss": 0.265438973903656, + "step": 5111 + }, + { + "epoch": 1.3574558491568185, + "grad_norm": 1.1239736089383028, + "learning_rate": 5.13750943848933e-06, + "loss": 0.24246999621391296, + "step": 5112 + }, + { + "epoch": 1.3577214181383614, + "grad_norm": 1.299396280421792, + "learning_rate": 5.133673167530899e-06, + "loss": 0.25401771068573, + "step": 5113 + }, + { + "epoch": 1.3579869871199044, + "grad_norm": 1.2329813534125698, + "learning_rate": 5.129837834742885e-06, + "loss": 0.2698017656803131, + "step": 5114 + }, + { + "epoch": 1.3582525561014474, + "grad_norm": 1.2787210937788358, + "learning_rate": 5.126003440864703e-06, + "loss": 0.27006995677948, + "step": 5115 + }, + { + "epoch": 1.3585181250829903, + "grad_norm": 1.2695682196385796, + "learning_rate": 5.122169986635575e-06, + "loss": 0.2370866984128952, + "step": 5116 + }, + { + "epoch": 1.3587836940645333, + "grad_norm": 1.3031561376922138, + "learning_rate": 5.1183374727945425e-06, + "loss": 0.24017807841300964, + "step": 5117 + }, + { + "epoch": 1.3590492630460762, + "grad_norm": 1.1487956614446662, + "learning_rate": 5.114505900080473e-06, + "loss": 0.21664533019065857, + "step": 5118 + }, + { + "epoch": 1.3593148320276192, + "grad_norm": 4.246209132455192, + "learning_rate": 5.110675269232046e-06, + "loss": 0.24561598896980286, + "step": 5119 + }, + { + "epoch": 1.359580401009162, + "grad_norm": 1.3902415348604562, + "learning_rate": 5.106845580987763e-06, + "loss": 0.26678937673568726, + "step": 5120 + }, + { + "epoch": 1.359845969990705, + "grad_norm": 1.354168350096278, + "learning_rate": 5.103016836085943e-06, + "loss": 0.21919070184230804, + "step": 5121 + }, + { + "epoch": 1.360111538972248, + "grad_norm": 1.3057665036353723, + "learning_rate": 5.099189035264722e-06, + "loss": 0.24887943267822266, + "step": 5122 + }, + { + "epoch": 1.360377107953791, + "grad_norm": 1.2017875007060346, + "learning_rate": 5.0953621792620556e-06, + "loss": 0.23597784340381622, + "step": 5123 + }, + { + "epoch": 1.360642676935334, + "grad_norm": 1.2098630506546966, + "learning_rate": 5.091536268815717e-06, + "loss": 0.21265193819999695, + "step": 5124 + }, + { + "epoch": 1.3609082459168769, + "grad_norm": 1.3606980074054404, + "learning_rate": 5.0877113046632945e-06, + "loss": 0.29837465286254883, + "step": 5125 + }, + { + "epoch": 1.3611738148984198, + "grad_norm": 1.1915793844006848, + "learning_rate": 5.0838872875421975e-06, + "loss": 0.2324269413948059, + "step": 5126 + }, + { + "epoch": 1.3614393838799628, + "grad_norm": 1.0970197687294143, + "learning_rate": 5.080064218189652e-06, + "loss": 0.19149541854858398, + "step": 5127 + }, + { + "epoch": 1.3617049528615057, + "grad_norm": 1.1710303609542994, + "learning_rate": 5.0762420973427e-06, + "loss": 0.247644305229187, + "step": 5128 + }, + { + "epoch": 1.3619705218430487, + "grad_norm": 1.1403838601028529, + "learning_rate": 5.0724209257382006e-06, + "loss": 0.2272202968597412, + "step": 5129 + }, + { + "epoch": 1.3622360908245916, + "grad_norm": 1.2012952880900256, + "learning_rate": 5.068600704112832e-06, + "loss": 0.25735989212989807, + "step": 5130 + }, + { + "epoch": 1.3625016598061346, + "grad_norm": 1.1771555574179005, + "learning_rate": 5.064781433203086e-06, + "loss": 0.19970473647117615, + "step": 5131 + }, + { + "epoch": 1.3627672287876775, + "grad_norm": 1.2156620394191346, + "learning_rate": 5.060963113745272e-06, + "loss": 0.24289372563362122, + "step": 5132 + }, + { + "epoch": 1.3630327977692205, + "grad_norm": 1.2352988713677027, + "learning_rate": 5.0571457464755226e-06, + "loss": 0.2757350504398346, + "step": 5133 + }, + { + "epoch": 1.3632983667507634, + "grad_norm": 1.2115447809386193, + "learning_rate": 5.053329332129777e-06, + "loss": 0.24552851915359497, + "step": 5134 + }, + { + "epoch": 1.3635639357323064, + "grad_norm": 1.1546263092618338, + "learning_rate": 5.049513871443797e-06, + "loss": 0.22152797877788544, + "step": 5135 + }, + { + "epoch": 1.3638295047138493, + "grad_norm": 1.2567398712194906, + "learning_rate": 5.045699365153155e-06, + "loss": 0.27098602056503296, + "step": 5136 + }, + { + "epoch": 1.3640950736953923, + "grad_norm": 1.201852433475055, + "learning_rate": 5.041885813993246e-06, + "loss": 0.21275216341018677, + "step": 5137 + }, + { + "epoch": 1.3643606426769352, + "grad_norm": 1.3326670101473788, + "learning_rate": 5.038073218699275e-06, + "loss": 0.2510162591934204, + "step": 5138 + }, + { + "epoch": 1.3646262116584782, + "grad_norm": 1.2702563681918038, + "learning_rate": 5.034261580006269e-06, + "loss": 0.23203429579734802, + "step": 5139 + }, + { + "epoch": 1.3648917806400211, + "grad_norm": 1.137285489869793, + "learning_rate": 5.030450898649064e-06, + "loss": 0.22178995609283447, + "step": 5140 + }, + { + "epoch": 1.365157349621564, + "grad_norm": 1.2415754400243457, + "learning_rate": 5.026641175362316e-06, + "loss": 0.2567412257194519, + "step": 5141 + }, + { + "epoch": 1.365422918603107, + "grad_norm": 1.232487080143156, + "learning_rate": 5.022832410880494e-06, + "loss": 0.21939827501773834, + "step": 5142 + }, + { + "epoch": 1.36568848758465, + "grad_norm": 1.4733425270104286, + "learning_rate": 5.019024605937882e-06, + "loss": 0.2325637936592102, + "step": 5143 + }, + { + "epoch": 1.365954056566193, + "grad_norm": 1.266575596941496, + "learning_rate": 5.015217761268582e-06, + "loss": 0.2416393756866455, + "step": 5144 + }, + { + "epoch": 1.366219625547736, + "grad_norm": 1.289260413423763, + "learning_rate": 5.011411877606507e-06, + "loss": 0.2439568042755127, + "step": 5145 + }, + { + "epoch": 1.3664851945292789, + "grad_norm": 1.1439689034996021, + "learning_rate": 5.007606955685387e-06, + "loss": 0.2495957612991333, + "step": 5146 + }, + { + "epoch": 1.3667507635108218, + "grad_norm": 1.1937127912858143, + "learning_rate": 5.003802996238766e-06, + "loss": 0.23415328562259674, + "step": 5147 + }, + { + "epoch": 1.3670163324923648, + "grad_norm": 1.26410321081345, + "learning_rate": 5.000000000000003e-06, + "loss": 0.2637922465801239, + "step": 5148 + }, + { + "epoch": 1.3672819014739077, + "grad_norm": 1.243307173830296, + "learning_rate": 4.9961979677022696e-06, + "loss": 0.2319526970386505, + "step": 5149 + }, + { + "epoch": 1.3675474704554509, + "grad_norm": 1.2115383829826751, + "learning_rate": 4.992396900078551e-06, + "loss": 0.2338445484638214, + "step": 5150 + }, + { + "epoch": 1.3678130394369938, + "grad_norm": 1.1683439299091893, + "learning_rate": 4.988596797861654e-06, + "loss": 0.19041961431503296, + "step": 5151 + }, + { + "epoch": 1.3680786084185368, + "grad_norm": 1.233073404450011, + "learning_rate": 4.984797661784191e-06, + "loss": 0.2698138952255249, + "step": 5152 + }, + { + "epoch": 1.3683441774000797, + "grad_norm": 1.2592426315358647, + "learning_rate": 4.980999492578588e-06, + "loss": 0.2208167165517807, + "step": 5153 + }, + { + "epoch": 1.3686097463816227, + "grad_norm": 1.1935159953807641, + "learning_rate": 4.9772022909770915e-06, + "loss": 0.2515152096748352, + "step": 5154 + }, + { + "epoch": 1.3688753153631656, + "grad_norm": 1.3110804278343313, + "learning_rate": 4.973406057711755e-06, + "loss": 0.2393365204334259, + "step": 5155 + }, + { + "epoch": 1.3691408843447086, + "grad_norm": 1.302037077529998, + "learning_rate": 4.969610793514446e-06, + "loss": 0.24546492099761963, + "step": 5156 + }, + { + "epoch": 1.3694064533262515, + "grad_norm": 1.5300417364025873, + "learning_rate": 4.965816499116849e-06, + "loss": 0.252412348985672, + "step": 5157 + }, + { + "epoch": 1.3696720223077945, + "grad_norm": 1.1552882128683561, + "learning_rate": 4.962023175250461e-06, + "loss": 0.22654281556606293, + "step": 5158 + }, + { + "epoch": 1.3699375912893375, + "grad_norm": 1.2873880265204376, + "learning_rate": 4.958230822646581e-06, + "loss": 0.2542813718318939, + "step": 5159 + }, + { + "epoch": 1.3702031602708804, + "grad_norm": 1.2851879635778218, + "learning_rate": 4.9544394420363395e-06, + "loss": 0.25376224517822266, + "step": 5160 + }, + { + "epoch": 1.3704687292524234, + "grad_norm": 1.252574665809313, + "learning_rate": 4.950649034150666e-06, + "loss": 0.21911674737930298, + "step": 5161 + }, + { + "epoch": 1.3707342982339663, + "grad_norm": 1.3527776455922371, + "learning_rate": 4.946859599720308e-06, + "loss": 0.2805126905441284, + "step": 5162 + }, + { + "epoch": 1.3709998672155093, + "grad_norm": 1.1716388954292443, + "learning_rate": 4.943071139475824e-06, + "loss": 0.2189590483903885, + "step": 5163 + }, + { + "epoch": 1.3712654361970522, + "grad_norm": 1.2218109142926636, + "learning_rate": 4.939283654147582e-06, + "loss": 0.21837599575519562, + "step": 5164 + }, + { + "epoch": 1.3715310051785952, + "grad_norm": 1.2779646624690562, + "learning_rate": 4.935497144465766e-06, + "loss": 0.25090983510017395, + "step": 5165 + }, + { + "epoch": 1.3717965741601381, + "grad_norm": 1.1988734011828608, + "learning_rate": 4.93171161116037e-06, + "loss": 0.22028754651546478, + "step": 5166 + }, + { + "epoch": 1.372062143141681, + "grad_norm": 1.1554753760684375, + "learning_rate": 4.927927054961201e-06, + "loss": 0.20097196102142334, + "step": 5167 + }, + { + "epoch": 1.372327712123224, + "grad_norm": 1.209557738779129, + "learning_rate": 4.924143476597872e-06, + "loss": 0.230082705616951, + "step": 5168 + }, + { + "epoch": 1.372593281104767, + "grad_norm": 1.1549715219295726, + "learning_rate": 4.920360876799821e-06, + "loss": 0.23701804876327515, + "step": 5169 + }, + { + "epoch": 1.37285885008631, + "grad_norm": 1.2740998730652584, + "learning_rate": 4.9165792562962834e-06, + "loss": 0.22357231378555298, + "step": 5170 + }, + { + "epoch": 1.3731244190678529, + "grad_norm": 1.2042473616661704, + "learning_rate": 4.912798615816312e-06, + "loss": 0.2533026337623596, + "step": 5171 + }, + { + "epoch": 1.3733899880493958, + "grad_norm": 1.3342025781776312, + "learning_rate": 4.90901895608877e-06, + "loss": 0.24878138303756714, + "step": 5172 + }, + { + "epoch": 1.3736555570309388, + "grad_norm": 1.5415419516618216, + "learning_rate": 4.905240277842335e-06, + "loss": 0.22641420364379883, + "step": 5173 + }, + { + "epoch": 1.3739211260124817, + "grad_norm": 1.2916997982097302, + "learning_rate": 4.901462581805483e-06, + "loss": 0.24495793879032135, + "step": 5174 + }, + { + "epoch": 1.3741866949940247, + "grad_norm": 1.3531795848957913, + "learning_rate": 4.897685868706512e-06, + "loss": 0.2688868045806885, + "step": 5175 + }, + { + "epoch": 1.3744522639755676, + "grad_norm": 1.2828126418821555, + "learning_rate": 4.893910139273531e-06, + "loss": 0.25796642899513245, + "step": 5176 + }, + { + "epoch": 1.3747178329571106, + "grad_norm": 1.4091718050104127, + "learning_rate": 4.890135394234451e-06, + "loss": 0.27557405829429626, + "step": 5177 + }, + { + "epoch": 1.3749834019386535, + "grad_norm": 1.620605499986823, + "learning_rate": 4.886361634317004e-06, + "loss": 0.23553809523582458, + "step": 5178 + }, + { + "epoch": 1.3752489709201965, + "grad_norm": 1.2608742989736732, + "learning_rate": 4.882588860248725e-06, + "loss": 0.2454400360584259, + "step": 5179 + }, + { + "epoch": 1.3755145399017394, + "grad_norm": 1.1743865548501493, + "learning_rate": 4.878817072756959e-06, + "loss": 0.19460657238960266, + "step": 5180 + }, + { + "epoch": 1.3757801088832824, + "grad_norm": 1.2528300475452, + "learning_rate": 4.875046272568863e-06, + "loss": 0.24833449721336365, + "step": 5181 + }, + { + "epoch": 1.3760456778648253, + "grad_norm": 1.3263672125712147, + "learning_rate": 4.871276460411403e-06, + "loss": 0.2774161994457245, + "step": 5182 + }, + { + "epoch": 1.3763112468463683, + "grad_norm": 2.6268834337513667, + "learning_rate": 4.867507637011353e-06, + "loss": 0.2277964949607849, + "step": 5183 + }, + { + "epoch": 1.3765768158279112, + "grad_norm": 1.8924198767245841, + "learning_rate": 4.863739803095299e-06, + "loss": 0.2176733911037445, + "step": 5184 + }, + { + "epoch": 1.3768423848094542, + "grad_norm": 1.3153810073025014, + "learning_rate": 4.859972959389634e-06, + "loss": 0.23529113829135895, + "step": 5185 + }, + { + "epoch": 1.3771079537909972, + "grad_norm": 1.3909544444662505, + "learning_rate": 4.856207106620557e-06, + "loss": 0.2646695077419281, + "step": 5186 + }, + { + "epoch": 1.37737352277254, + "grad_norm": 1.2095108180861869, + "learning_rate": 4.852442245514093e-06, + "loss": 0.23179873824119568, + "step": 5187 + }, + { + "epoch": 1.377639091754083, + "grad_norm": 1.1084014698771758, + "learning_rate": 4.84867837679605e-06, + "loss": 0.2127494066953659, + "step": 5188 + }, + { + "epoch": 1.377904660735626, + "grad_norm": 1.2275201950569183, + "learning_rate": 4.844915501192062e-06, + "loss": 0.2204679548740387, + "step": 5189 + }, + { + "epoch": 1.378170229717169, + "grad_norm": 1.2078653060668294, + "learning_rate": 4.841153619427567e-06, + "loss": 0.20271794497966766, + "step": 5190 + }, + { + "epoch": 1.378435798698712, + "grad_norm": 1.4269963155687142, + "learning_rate": 4.837392732227811e-06, + "loss": 0.2785792052745819, + "step": 5191 + }, + { + "epoch": 1.3787013676802549, + "grad_norm": 1.2501319487764966, + "learning_rate": 4.8336328403178486e-06, + "loss": 0.24904468655586243, + "step": 5192 + }, + { + "epoch": 1.378966936661798, + "grad_norm": 1.1230965332904321, + "learning_rate": 4.829873944422544e-06, + "loss": 0.20045346021652222, + "step": 5193 + }, + { + "epoch": 1.379232505643341, + "grad_norm": 1.1339816903135191, + "learning_rate": 4.826116045266565e-06, + "loss": 0.21814313530921936, + "step": 5194 + }, + { + "epoch": 1.379498074624884, + "grad_norm": 1.236126479276255, + "learning_rate": 4.82235914357439e-06, + "loss": 0.2408592253923416, + "step": 5195 + }, + { + "epoch": 1.379763643606427, + "grad_norm": 1.1229995433845732, + "learning_rate": 4.818603240070311e-06, + "loss": 0.21453416347503662, + "step": 5196 + }, + { + "epoch": 1.3800292125879698, + "grad_norm": 1.2915687788203387, + "learning_rate": 4.814848335478418e-06, + "loss": 0.2578599154949188, + "step": 5197 + }, + { + "epoch": 1.3802947815695128, + "grad_norm": 1.0696662022967476, + "learning_rate": 4.811094430522613e-06, + "loss": 0.1980094015598297, + "step": 5198 + }, + { + "epoch": 1.3805603505510557, + "grad_norm": 1.202740960535961, + "learning_rate": 4.807341525926604e-06, + "loss": 0.24620960652828217, + "step": 5199 + }, + { + "epoch": 1.3808259195325987, + "grad_norm": 1.2486655803425535, + "learning_rate": 4.803589622413908e-06, + "loss": 0.23525282740592957, + "step": 5200 + }, + { + "epoch": 1.3810914885141417, + "grad_norm": 1.1657735912575689, + "learning_rate": 4.799838720707847e-06, + "loss": 0.2277744859457016, + "step": 5201 + }, + { + "epoch": 1.3813570574956846, + "grad_norm": 1.2927728942283212, + "learning_rate": 4.796088821531549e-06, + "loss": 0.2727074921131134, + "step": 5202 + }, + { + "epoch": 1.3816226264772276, + "grad_norm": 1.2370931993726209, + "learning_rate": 4.7923399256079525e-06, + "loss": 0.21686753630638123, + "step": 5203 + }, + { + "epoch": 1.3818881954587705, + "grad_norm": 1.2572583885252075, + "learning_rate": 4.788592033659799e-06, + "loss": 0.2841380834579468, + "step": 5204 + }, + { + "epoch": 1.3821537644403135, + "grad_norm": 1.1157272204593003, + "learning_rate": 4.78484514640964e-06, + "loss": 0.24577853083610535, + "step": 5205 + }, + { + "epoch": 1.3824193334218564, + "grad_norm": 1.2077705032221964, + "learning_rate": 4.7810992645798285e-06, + "loss": 0.22289782762527466, + "step": 5206 + }, + { + "epoch": 1.3826849024033994, + "grad_norm": 1.1476107334002954, + "learning_rate": 4.7773543888925274e-06, + "loss": 0.2223999947309494, + "step": 5207 + }, + { + "epoch": 1.3829504713849423, + "grad_norm": 1.2183085137487102, + "learning_rate": 4.773610520069706e-06, + "loss": 0.23938870429992676, + "step": 5208 + }, + { + "epoch": 1.3832160403664853, + "grad_norm": 1.219370193725879, + "learning_rate": 4.769867658833136e-06, + "loss": 0.260856568813324, + "step": 5209 + }, + { + "epoch": 1.3834816093480282, + "grad_norm": 1.2333269697463725, + "learning_rate": 4.766125805904398e-06, + "loss": 0.23602089285850525, + "step": 5210 + }, + { + "epoch": 1.3837471783295712, + "grad_norm": 1.156747833138865, + "learning_rate": 4.762384962004877e-06, + "loss": 0.22543978691101074, + "step": 5211 + }, + { + "epoch": 1.3840127473111141, + "grad_norm": 1.3639051201807257, + "learning_rate": 4.758645127855763e-06, + "loss": 0.2432224452495575, + "step": 5212 + }, + { + "epoch": 1.384278316292657, + "grad_norm": 1.3947016936895973, + "learning_rate": 4.754906304178049e-06, + "loss": 0.22764597833156586, + "step": 5213 + }, + { + "epoch": 1.3845438852742, + "grad_norm": 1.2064067504011344, + "learning_rate": 4.751168491692541e-06, + "loss": 0.22503387928009033, + "step": 5214 + }, + { + "epoch": 1.384809454255743, + "grad_norm": 1.1066861130484609, + "learning_rate": 4.747431691119846e-06, + "loss": 0.21889932453632355, + "step": 5215 + }, + { + "epoch": 1.385075023237286, + "grad_norm": 1.3903278318809302, + "learning_rate": 4.743695903180372e-06, + "loss": 0.2695825695991516, + "step": 5216 + }, + { + "epoch": 1.3853405922188289, + "grad_norm": 1.2921759622470506, + "learning_rate": 4.739961128594336e-06, + "loss": 0.265118271112442, + "step": 5217 + }, + { + "epoch": 1.3856061612003718, + "grad_norm": 1.1349207398090602, + "learning_rate": 4.736227368081757e-06, + "loss": 0.2050788253545761, + "step": 5218 + }, + { + "epoch": 1.3858717301819148, + "grad_norm": 1.23951121142384, + "learning_rate": 4.7324946223624625e-06, + "loss": 0.274588406085968, + "step": 5219 + }, + { + "epoch": 1.3861372991634577, + "grad_norm": 1.209560473571303, + "learning_rate": 4.728762892156079e-06, + "loss": 0.2242514044046402, + "step": 5220 + }, + { + "epoch": 1.3864028681450007, + "grad_norm": 1.1337174836883812, + "learning_rate": 4.725032178182042e-06, + "loss": 0.19989261031150818, + "step": 5221 + }, + { + "epoch": 1.3866684371265436, + "grad_norm": 1.1989339880554155, + "learning_rate": 4.721302481159588e-06, + "loss": 0.24409207701683044, + "step": 5222 + }, + { + "epoch": 1.3869340061080866, + "grad_norm": 1.2425140627800753, + "learning_rate": 4.71757380180776e-06, + "loss": 0.25146353244781494, + "step": 5223 + }, + { + "epoch": 1.3871995750896295, + "grad_norm": 1.245669068902739, + "learning_rate": 4.713846140845401e-06, + "loss": 0.23076622188091278, + "step": 5224 + }, + { + "epoch": 1.3874651440711725, + "grad_norm": 1.1122357580396618, + "learning_rate": 4.7101194989911635e-06, + "loss": 0.2159188687801361, + "step": 5225 + }, + { + "epoch": 1.3877307130527154, + "grad_norm": 1.433039209205417, + "learning_rate": 4.706393876963497e-06, + "loss": 0.24891307950019836, + "step": 5226 + }, + { + "epoch": 1.3879962820342584, + "grad_norm": 1.2167285098476437, + "learning_rate": 4.702669275480659e-06, + "loss": 0.26254773139953613, + "step": 5227 + }, + { + "epoch": 1.3882618510158014, + "grad_norm": 1.0872799599118763, + "learning_rate": 4.698945695260709e-06, + "loss": 0.19589121639728546, + "step": 5228 + }, + { + "epoch": 1.3885274199973443, + "grad_norm": 1.273899860234835, + "learning_rate": 4.695223137021509e-06, + "loss": 0.23796147108078003, + "step": 5229 + }, + { + "epoch": 1.3887929889788873, + "grad_norm": 1.1566738109261303, + "learning_rate": 4.6915016014807235e-06, + "loss": 0.21211156249046326, + "step": 5230 + }, + { + "epoch": 1.3890585579604302, + "grad_norm": 1.1477189909918881, + "learning_rate": 4.687781089355817e-06, + "loss": 0.22418555617332458, + "step": 5231 + }, + { + "epoch": 1.3893241269419732, + "grad_norm": 1.1999712861158167, + "learning_rate": 4.68406160136407e-06, + "loss": 0.24140511453151703, + "step": 5232 + }, + { + "epoch": 1.389589695923516, + "grad_norm": 1.3515422291949701, + "learning_rate": 4.68034313822255e-06, + "loss": 0.2863473892211914, + "step": 5233 + }, + { + "epoch": 1.389855264905059, + "grad_norm": 1.1002404477789451, + "learning_rate": 4.676625700648133e-06, + "loss": 0.21283546090126038, + "step": 5234 + }, + { + "epoch": 1.390120833886602, + "grad_norm": 1.311958297113244, + "learning_rate": 4.672909289357498e-06, + "loss": 0.2701990008354187, + "step": 5235 + }, + { + "epoch": 1.390386402868145, + "grad_norm": 1.1672674472381515, + "learning_rate": 4.669193905067124e-06, + "loss": 0.23807264864444733, + "step": 5236 + }, + { + "epoch": 1.390651971849688, + "grad_norm": 1.3282268361230456, + "learning_rate": 4.665479548493298e-06, + "loss": 0.22204206883907318, + "step": 5237 + }, + { + "epoch": 1.3909175408312309, + "grad_norm": 1.2590492281878678, + "learning_rate": 4.661766220352098e-06, + "loss": 0.22389569878578186, + "step": 5238 + }, + { + "epoch": 1.3911831098127738, + "grad_norm": 1.2844920522393721, + "learning_rate": 4.65805392135941e-06, + "loss": 0.23752997815608978, + "step": 5239 + }, + { + "epoch": 1.3914486787943168, + "grad_norm": 1.8677910056359206, + "learning_rate": 4.654342652230921e-06, + "loss": 0.24055880308151245, + "step": 5240 + }, + { + "epoch": 1.3917142477758597, + "grad_norm": 1.2030621240735913, + "learning_rate": 4.6506324136821255e-06, + "loss": 0.22136151790618896, + "step": 5241 + }, + { + "epoch": 1.3919798167574027, + "grad_norm": 1.299031121789001, + "learning_rate": 4.646923206428311e-06, + "loss": 0.2616429924964905, + "step": 5242 + }, + { + "epoch": 1.3922453857389456, + "grad_norm": 1.218734267375269, + "learning_rate": 4.643215031184569e-06, + "loss": 0.24827662110328674, + "step": 5243 + }, + { + "epoch": 1.3925109547204886, + "grad_norm": 1.3223478407487963, + "learning_rate": 4.639507888665792e-06, + "loss": 0.21999669075012207, + "step": 5244 + }, + { + "epoch": 1.3927765237020315, + "grad_norm": 1.3241857590600639, + "learning_rate": 4.6358017795866715e-06, + "loss": 0.24511300027370453, + "step": 5245 + }, + { + "epoch": 1.3930420926835745, + "grad_norm": 1.2459535025826622, + "learning_rate": 4.632096704661704e-06, + "loss": 0.2410753220319748, + "step": 5246 + }, + { + "epoch": 1.3933076616651174, + "grad_norm": 1.157173292152249, + "learning_rate": 4.628392664605184e-06, + "loss": 0.2160021960735321, + "step": 5247 + }, + { + "epoch": 1.3935732306466604, + "grad_norm": 1.2204303717623475, + "learning_rate": 4.624689660131204e-06, + "loss": 0.22672782838344574, + "step": 5248 + }, + { + "epoch": 1.3938387996282033, + "grad_norm": 1.3056904555347544, + "learning_rate": 4.620987691953659e-06, + "loss": 0.25474926829338074, + "step": 5249 + }, + { + "epoch": 1.3941043686097463, + "grad_norm": 1.3078938706976893, + "learning_rate": 4.617286760786252e-06, + "loss": 0.2449323832988739, + "step": 5250 + }, + { + "epoch": 1.3943699375912892, + "grad_norm": 1.4350253205296164, + "learning_rate": 4.613586867342473e-06, + "loss": 0.23727643489837646, + "step": 5251 + }, + { + "epoch": 1.3946355065728322, + "grad_norm": 1.492440797106639, + "learning_rate": 4.609888012335624e-06, + "loss": 0.23727962374687195, + "step": 5252 + }, + { + "epoch": 1.3949010755543751, + "grad_norm": 1.1595482332609377, + "learning_rate": 4.60619019647879e-06, + "loss": 0.21957805752754211, + "step": 5253 + }, + { + "epoch": 1.395166644535918, + "grad_norm": 1.1972608851584254, + "learning_rate": 4.6024934204848745e-06, + "loss": 0.24184471368789673, + "step": 5254 + }, + { + "epoch": 1.395432213517461, + "grad_norm": 1.2654091836286674, + "learning_rate": 4.598797685066568e-06, + "loss": 0.239216148853302, + "step": 5255 + }, + { + "epoch": 1.395697782499004, + "grad_norm": 1.1503034311319646, + "learning_rate": 4.595102990936367e-06, + "loss": 0.17741018533706665, + "step": 5256 + }, + { + "epoch": 1.395963351480547, + "grad_norm": 1.2669115039567294, + "learning_rate": 4.591409338806566e-06, + "loss": 0.26139867305755615, + "step": 5257 + }, + { + "epoch": 1.39622892046209, + "grad_norm": 1.1295627244433792, + "learning_rate": 4.587716729389251e-06, + "loss": 0.23689255118370056, + "step": 5258 + }, + { + "epoch": 1.3964944894436329, + "grad_norm": 1.3449494333614898, + "learning_rate": 4.584025163396323e-06, + "loss": 0.22679267823696136, + "step": 5259 + }, + { + "epoch": 1.3967600584251758, + "grad_norm": 1.4665032620533849, + "learning_rate": 4.580334641539467e-06, + "loss": 0.2743435204029083, + "step": 5260 + }, + { + "epoch": 1.3970256274067188, + "grad_norm": 1.166091966014122, + "learning_rate": 4.5766451645301735e-06, + "loss": 0.22738990187644958, + "step": 5261 + }, + { + "epoch": 1.3972911963882617, + "grad_norm": 1.2398512539901747, + "learning_rate": 4.57295673307973e-06, + "loss": 0.24826082587242126, + "step": 5262 + }, + { + "epoch": 1.3975567653698049, + "grad_norm": 1.2172880570038314, + "learning_rate": 4.569269347899222e-06, + "loss": 0.23121042549610138, + "step": 5263 + }, + { + "epoch": 1.3978223343513478, + "grad_norm": 2.1881918032824443, + "learning_rate": 4.5655830096995345e-06, + "loss": 0.21382957696914673, + "step": 5264 + }, + { + "epoch": 1.3980879033328908, + "grad_norm": 1.6700623666107715, + "learning_rate": 4.561897719191349e-06, + "loss": 0.24439184367656708, + "step": 5265 + }, + { + "epoch": 1.3983534723144337, + "grad_norm": 1.1734120938371422, + "learning_rate": 4.558213477085148e-06, + "loss": 0.2106003314256668, + "step": 5266 + }, + { + "epoch": 1.3986190412959767, + "grad_norm": 1.568387486793487, + "learning_rate": 4.554530284091209e-06, + "loss": 0.3073291480541229, + "step": 5267 + }, + { + "epoch": 1.3988846102775196, + "grad_norm": 1.226744359266016, + "learning_rate": 4.550848140919606e-06, + "loss": 0.2448226660490036, + "step": 5268 + }, + { + "epoch": 1.3991501792590626, + "grad_norm": 1.4434974870419186, + "learning_rate": 4.5471670482802165e-06, + "loss": 0.25378671288490295, + "step": 5269 + }, + { + "epoch": 1.3994157482406056, + "grad_norm": 1.243366792714921, + "learning_rate": 4.5434870068827086e-06, + "loss": 0.2735089659690857, + "step": 5270 + }, + { + "epoch": 1.3996813172221485, + "grad_norm": 1.3983115308066707, + "learning_rate": 4.539808017436552e-06, + "loss": 0.2530548870563507, + "step": 5271 + }, + { + "epoch": 1.3999468862036915, + "grad_norm": 1.2566722493021396, + "learning_rate": 4.536130080651015e-06, + "loss": 0.23692254722118378, + "step": 5272 + }, + { + "epoch": 1.4002124551852344, + "grad_norm": 1.257120121799197, + "learning_rate": 4.532453197235155e-06, + "loss": 0.24554882943630219, + "step": 5273 + }, + { + "epoch": 1.4004780241667774, + "grad_norm": 1.2106096425654094, + "learning_rate": 4.528777367897837e-06, + "loss": 0.20152084529399872, + "step": 5274 + }, + { + "epoch": 1.4007435931483203, + "grad_norm": 1.207683737630722, + "learning_rate": 4.525102593347714e-06, + "loss": 0.20908965170383453, + "step": 5275 + }, + { + "epoch": 1.4010091621298633, + "grad_norm": 1.2398706056963738, + "learning_rate": 4.521428874293238e-06, + "loss": 0.23158209025859833, + "step": 5276 + }, + { + "epoch": 1.4012747311114062, + "grad_norm": 1.2494835342931663, + "learning_rate": 4.517756211442664e-06, + "loss": 0.2483675330877304, + "step": 5277 + }, + { + "epoch": 1.4015403000929492, + "grad_norm": 1.1662936164598174, + "learning_rate": 4.514084605504035e-06, + "loss": 0.23435397446155548, + "step": 5278 + }, + { + "epoch": 1.4018058690744921, + "grad_norm": 1.242534131664269, + "learning_rate": 4.510414057185195e-06, + "loss": 0.2605316936969757, + "step": 5279 + }, + { + "epoch": 1.402071438056035, + "grad_norm": 1.148911142729499, + "learning_rate": 4.506744567193782e-06, + "loss": 0.2279929518699646, + "step": 5280 + }, + { + "epoch": 1.402337007037578, + "grad_norm": 1.1849060379752767, + "learning_rate": 4.503076136237228e-06, + "loss": 0.23011639714241028, + "step": 5281 + }, + { + "epoch": 1.402602576019121, + "grad_norm": 1.1735153050753564, + "learning_rate": 4.499408765022765e-06, + "loss": 0.213611900806427, + "step": 5282 + }, + { + "epoch": 1.402868145000664, + "grad_norm": 1.3225078215525052, + "learning_rate": 4.495742454257418e-06, + "loss": 0.25555503368377686, + "step": 5283 + }, + { + "epoch": 1.4031337139822069, + "grad_norm": 1.331030123703595, + "learning_rate": 4.4920772046480095e-06, + "loss": 0.2694614827632904, + "step": 5284 + }, + { + "epoch": 1.4033992829637498, + "grad_norm": 1.3958578164403037, + "learning_rate": 4.4884130169011565e-06, + "loss": 0.2160607874393463, + "step": 5285 + }, + { + "epoch": 1.4036648519452928, + "grad_norm": 1.4996515147203022, + "learning_rate": 4.48474989172327e-06, + "loss": 0.2556128203868866, + "step": 5286 + }, + { + "epoch": 1.4039304209268357, + "grad_norm": 1.2506403611380352, + "learning_rate": 4.481087829820558e-06, + "loss": 0.2251313328742981, + "step": 5287 + }, + { + "epoch": 1.4041959899083787, + "grad_norm": 1.380992563161254, + "learning_rate": 4.477426831899024e-06, + "loss": 0.26856666803359985, + "step": 5288 + }, + { + "epoch": 1.4044615588899216, + "grad_norm": 1.2429158128712894, + "learning_rate": 4.473766898664464e-06, + "loss": 0.25573840737342834, + "step": 5289 + }, + { + "epoch": 1.4047271278714646, + "grad_norm": 1.2559748496125192, + "learning_rate": 4.4701080308224685e-06, + "loss": 0.26519301533699036, + "step": 5290 + }, + { + "epoch": 1.4049926968530075, + "grad_norm": 1.5959863642176566, + "learning_rate": 4.466450229078427e-06, + "loss": 0.2329619824886322, + "step": 5291 + }, + { + "epoch": 1.4052582658345505, + "grad_norm": 1.208485124140325, + "learning_rate": 4.4627934941375185e-06, + "loss": 0.2243901491165161, + "step": 5292 + }, + { + "epoch": 1.4055238348160934, + "grad_norm": 1.2042065274178317, + "learning_rate": 4.45913782670472e-06, + "loss": 0.22516998648643494, + "step": 5293 + }, + { + "epoch": 1.4057894037976364, + "grad_norm": 1.2427926273641645, + "learning_rate": 4.455483227484796e-06, + "loss": 0.25573113560676575, + "step": 5294 + }, + { + "epoch": 1.4060549727791793, + "grad_norm": 1.3935629686917204, + "learning_rate": 4.451829697182317e-06, + "loss": 0.2568536698818207, + "step": 5295 + }, + { + "epoch": 1.4063205417607223, + "grad_norm": 1.293797792298673, + "learning_rate": 4.448177236501638e-06, + "loss": 0.24510663747787476, + "step": 5296 + }, + { + "epoch": 1.4065861107422652, + "grad_norm": 1.3445763390180965, + "learning_rate": 4.444525846146911e-06, + "loss": 0.24890470504760742, + "step": 5297 + }, + { + "epoch": 1.4068516797238082, + "grad_norm": 1.3096169257052843, + "learning_rate": 4.440875526822081e-06, + "loss": 0.21442994475364685, + "step": 5298 + }, + { + "epoch": 1.4071172487053512, + "grad_norm": 1.2628911672392604, + "learning_rate": 4.437226279230884e-06, + "loss": 0.24281370639801025, + "step": 5299 + }, + { + "epoch": 1.407382817686894, + "grad_norm": 1.2336479145010515, + "learning_rate": 4.433578104076853e-06, + "loss": 0.19542500376701355, + "step": 5300 + }, + { + "epoch": 1.407648386668437, + "grad_norm": 1.256359230599367, + "learning_rate": 4.429931002063315e-06, + "loss": 0.22688990831375122, + "step": 5301 + }, + { + "epoch": 1.40791395564998, + "grad_norm": 1.3692436485711592, + "learning_rate": 4.42628497389339e-06, + "loss": 0.2520858347415924, + "step": 5302 + }, + { + "epoch": 1.408179524631523, + "grad_norm": 1.1723697651028326, + "learning_rate": 4.42264002026998e-06, + "loss": 0.237991064786911, + "step": 5303 + }, + { + "epoch": 1.408445093613066, + "grad_norm": 1.1277997255078087, + "learning_rate": 4.418996141895797e-06, + "loss": 0.20164436101913452, + "step": 5304 + }, + { + "epoch": 1.408710662594609, + "grad_norm": 1.2657361694815492, + "learning_rate": 4.415353339473338e-06, + "loss": 0.24009189009666443, + "step": 5305 + }, + { + "epoch": 1.408976231576152, + "grad_norm": 1.138145945953283, + "learning_rate": 4.411711613704889e-06, + "loss": 0.23170322179794312, + "step": 5306 + }, + { + "epoch": 1.409241800557695, + "grad_norm": 1.2244077415708243, + "learning_rate": 4.408070965292534e-06, + "loss": 0.2280617356300354, + "step": 5307 + }, + { + "epoch": 1.409507369539238, + "grad_norm": 1.2724409466040383, + "learning_rate": 4.404431394938145e-06, + "loss": 0.21982887387275696, + "step": 5308 + }, + { + "epoch": 1.409772938520781, + "grad_norm": 1.265647410959733, + "learning_rate": 4.40079290334339e-06, + "loss": 0.25295430421829224, + "step": 5309 + }, + { + "epoch": 1.4100385075023238, + "grad_norm": 1.1099961782761754, + "learning_rate": 4.397155491209727e-06, + "loss": 0.20109041035175323, + "step": 5310 + }, + { + "epoch": 1.4103040764838668, + "grad_norm": 1.3436616824827443, + "learning_rate": 4.393519159238405e-06, + "loss": 0.2487715482711792, + "step": 5311 + }, + { + "epoch": 1.4105696454654097, + "grad_norm": 1.1475311486694626, + "learning_rate": 4.389883908130465e-06, + "loss": 0.2031790167093277, + "step": 5312 + }, + { + "epoch": 1.4108352144469527, + "grad_norm": 1.277969729475343, + "learning_rate": 4.386249738586744e-06, + "loss": 0.23029211163520813, + "step": 5313 + }, + { + "epoch": 1.4111007834284957, + "grad_norm": 1.2100830863469687, + "learning_rate": 4.382616651307866e-06, + "loss": 0.23080995678901672, + "step": 5314 + }, + { + "epoch": 1.4113663524100386, + "grad_norm": 1.2376227742095711, + "learning_rate": 4.378984646994248e-06, + "loss": 0.2450534999370575, + "step": 5315 + }, + { + "epoch": 1.4116319213915816, + "grad_norm": 1.266655148641824, + "learning_rate": 4.375353726346094e-06, + "loss": 0.24349799752235413, + "step": 5316 + }, + { + "epoch": 1.4118974903731245, + "grad_norm": 1.2696628766548714, + "learning_rate": 4.371723890063411e-06, + "loss": 0.2431599199771881, + "step": 5317 + }, + { + "epoch": 1.4121630593546675, + "grad_norm": 1.3688178233929764, + "learning_rate": 4.368095138845978e-06, + "loss": 0.2051251232624054, + "step": 5318 + }, + { + "epoch": 1.4124286283362104, + "grad_norm": 1.1726447102511934, + "learning_rate": 4.36446747339338e-06, + "loss": 0.21346575021743774, + "step": 5319 + }, + { + "epoch": 1.4126941973177534, + "grad_norm": 1.2726406383058895, + "learning_rate": 4.360840894404989e-06, + "loss": 0.22193217277526855, + "step": 5320 + }, + { + "epoch": 1.4129597662992963, + "grad_norm": 1.2762131056761095, + "learning_rate": 4.357215402579961e-06, + "loss": 0.2112501859664917, + "step": 5321 + }, + { + "epoch": 1.4132253352808393, + "grad_norm": 1.1864412536946314, + "learning_rate": 4.3535909986172565e-06, + "loss": 0.2648766040802002, + "step": 5322 + }, + { + "epoch": 1.4134909042623822, + "grad_norm": 1.1533413783243194, + "learning_rate": 4.349967683215614e-06, + "loss": 0.22139690816402435, + "step": 5323 + }, + { + "epoch": 1.4137564732439252, + "grad_norm": 1.0259028802936685, + "learning_rate": 4.346345457073568e-06, + "loss": 0.21558481454849243, + "step": 5324 + }, + { + "epoch": 1.4140220422254681, + "grad_norm": 1.2763949378052617, + "learning_rate": 4.342724320889438e-06, + "loss": 0.2013886272907257, + "step": 5325 + }, + { + "epoch": 1.414287611207011, + "grad_norm": 1.2216640015824227, + "learning_rate": 4.3391042753613375e-06, + "loss": 0.2428729385137558, + "step": 5326 + }, + { + "epoch": 1.414553180188554, + "grad_norm": 1.2385329501903242, + "learning_rate": 4.3354853211871696e-06, + "loss": 0.20930354297161102, + "step": 5327 + }, + { + "epoch": 1.414818749170097, + "grad_norm": 1.1373474530618315, + "learning_rate": 4.331867459064623e-06, + "loss": 0.18988853693008423, + "step": 5328 + }, + { + "epoch": 1.41508431815164, + "grad_norm": 1.2833653393491664, + "learning_rate": 4.328250689691182e-06, + "loss": 0.24618801474571228, + "step": 5329 + }, + { + "epoch": 1.4153498871331829, + "grad_norm": 1.2635824567099267, + "learning_rate": 4.324635013764113e-06, + "loss": 0.23857265710830688, + "step": 5330 + }, + { + "epoch": 1.4156154561147258, + "grad_norm": 1.3200622076177175, + "learning_rate": 4.321020431980483e-06, + "loss": 0.21869014203548431, + "step": 5331 + }, + { + "epoch": 1.4158810250962688, + "grad_norm": 1.2317649692424293, + "learning_rate": 4.317406945037138e-06, + "loss": 0.2508969008922577, + "step": 5332 + }, + { + "epoch": 1.4161465940778117, + "grad_norm": 1.2114692744130235, + "learning_rate": 4.313794553630711e-06, + "loss": 0.2406233549118042, + "step": 5333 + }, + { + "epoch": 1.4164121630593547, + "grad_norm": 1.3314396378070763, + "learning_rate": 4.310183258457632e-06, + "loss": 0.2376224398612976, + "step": 5334 + }, + { + "epoch": 1.4166777320408976, + "grad_norm": 1.4802475566731417, + "learning_rate": 4.306573060214115e-06, + "loss": 0.2818688750267029, + "step": 5335 + }, + { + "epoch": 1.4169433010224406, + "grad_norm": 1.2248721858463099, + "learning_rate": 4.302963959596165e-06, + "loss": 0.2279777228832245, + "step": 5336 + }, + { + "epoch": 1.4172088700039835, + "grad_norm": 1.3681495314955672, + "learning_rate": 4.299355957299573e-06, + "loss": 0.2652052640914917, + "step": 5337 + }, + { + "epoch": 1.4174744389855265, + "grad_norm": 1.2814638931564002, + "learning_rate": 4.2957490540199185e-06, + "loss": 0.24415750801563263, + "step": 5338 + }, + { + "epoch": 1.4177400079670694, + "grad_norm": 1.2028147011593575, + "learning_rate": 4.292143250452569e-06, + "loss": 0.2318287044763565, + "step": 5339 + }, + { + "epoch": 1.4180055769486124, + "grad_norm": 1.1621443407054215, + "learning_rate": 4.288538547292685e-06, + "loss": 0.19914361834526062, + "step": 5340 + }, + { + "epoch": 1.4182711459301554, + "grad_norm": 1.2533818722517012, + "learning_rate": 4.2849349452352095e-06, + "loss": 0.22550678253173828, + "step": 5341 + }, + { + "epoch": 1.4185367149116983, + "grad_norm": 1.3481328868952585, + "learning_rate": 4.281332444974874e-06, + "loss": 0.25001436471939087, + "step": 5342 + }, + { + "epoch": 1.4188022838932413, + "grad_norm": 1.2557895781680242, + "learning_rate": 4.277731047206197e-06, + "loss": 0.24873407185077667, + "step": 5343 + }, + { + "epoch": 1.4190678528747842, + "grad_norm": 1.2532145662207181, + "learning_rate": 4.274130752623487e-06, + "loss": 0.25732600688934326, + "step": 5344 + }, + { + "epoch": 1.4193334218563272, + "grad_norm": 1.1956499236331526, + "learning_rate": 4.270531561920836e-06, + "loss": 0.1894054263830185, + "step": 5345 + }, + { + "epoch": 1.4195989908378701, + "grad_norm": 1.2861805940078326, + "learning_rate": 4.2669334757921284e-06, + "loss": 0.2632025480270386, + "step": 5346 + }, + { + "epoch": 1.419864559819413, + "grad_norm": 1.1223708980675566, + "learning_rate": 4.2633364949310315e-06, + "loss": 0.22106415033340454, + "step": 5347 + }, + { + "epoch": 1.420130128800956, + "grad_norm": 1.2191554963858982, + "learning_rate": 4.259740620031e-06, + "loss": 0.2246699184179306, + "step": 5348 + }, + { + "epoch": 1.420395697782499, + "grad_norm": 1.2377251567235985, + "learning_rate": 4.256145851785277e-06, + "loss": 0.2335890382528305, + "step": 5349 + }, + { + "epoch": 1.420661266764042, + "grad_norm": 1.3200881727026734, + "learning_rate": 4.252552190886892e-06, + "loss": 0.25485220551490784, + "step": 5350 + }, + { + "epoch": 1.4209268357455849, + "grad_norm": 1.406483107573335, + "learning_rate": 4.248959638028659e-06, + "loss": 0.26234719157218933, + "step": 5351 + }, + { + "epoch": 1.4211924047271278, + "grad_norm": 1.1946878328095272, + "learning_rate": 4.245368193903181e-06, + "loss": 0.22083795070648193, + "step": 5352 + }, + { + "epoch": 1.4214579737086708, + "grad_norm": 1.288602079194267, + "learning_rate": 4.241777859202846e-06, + "loss": 0.1886332929134369, + "step": 5353 + }, + { + "epoch": 1.4217235426902137, + "grad_norm": 1.506700165302322, + "learning_rate": 4.238188634619826e-06, + "loss": 0.26154160499572754, + "step": 5354 + }, + { + "epoch": 1.4219891116717567, + "grad_norm": 1.1472960297751262, + "learning_rate": 4.234600520846085e-06, + "loss": 0.24761158227920532, + "step": 5355 + }, + { + "epoch": 1.4222546806532996, + "grad_norm": 1.154393443673505, + "learning_rate": 4.2310135185733625e-06, + "loss": 0.20936736464500427, + "step": 5356 + }, + { + "epoch": 1.4225202496348426, + "grad_norm": 1.15600424022186, + "learning_rate": 4.227427628493198e-06, + "loss": 0.2173127979040146, + "step": 5357 + }, + { + "epoch": 1.4227858186163855, + "grad_norm": 1.217414245555098, + "learning_rate": 4.223842851296907e-06, + "loss": 0.2598559260368347, + "step": 5358 + }, + { + "epoch": 1.4230513875979285, + "grad_norm": 1.224021391863692, + "learning_rate": 4.22025918767559e-06, + "loss": 0.23701196908950806, + "step": 5359 + }, + { + "epoch": 1.4233169565794714, + "grad_norm": 1.2134140712383175, + "learning_rate": 4.216676638320135e-06, + "loss": 0.26052403450012207, + "step": 5360 + }, + { + "epoch": 1.4235825255610144, + "grad_norm": 1.2465682642545985, + "learning_rate": 4.213095203921217e-06, + "loss": 0.2464584857225418, + "step": 5361 + }, + { + "epoch": 1.4238480945425573, + "grad_norm": 1.2646547527576821, + "learning_rate": 4.209514885169294e-06, + "loss": 0.25889426469802856, + "step": 5362 + }, + { + "epoch": 1.4241136635241003, + "grad_norm": 1.2990812156107416, + "learning_rate": 4.2059356827546076e-06, + "loss": 0.26529380679130554, + "step": 5363 + }, + { + "epoch": 1.4243792325056432, + "grad_norm": 1.1509506747022789, + "learning_rate": 4.202357597367187e-06, + "loss": 0.2284630388021469, + "step": 5364 + }, + { + "epoch": 1.4246448014871862, + "grad_norm": 1.1509689814009059, + "learning_rate": 4.198780629696845e-06, + "loss": 0.2361873984336853, + "step": 5365 + }, + { + "epoch": 1.4249103704687291, + "grad_norm": 1.2489364054166838, + "learning_rate": 4.195204780433179e-06, + "loss": 0.2473624348640442, + "step": 5366 + }, + { + "epoch": 1.425175939450272, + "grad_norm": 1.2584581044476912, + "learning_rate": 4.19163005026557e-06, + "loss": 0.24852773547172546, + "step": 5367 + }, + { + "epoch": 1.425441508431815, + "grad_norm": 1.413523972125062, + "learning_rate": 4.188056439883183e-06, + "loss": 0.28409647941589355, + "step": 5368 + }, + { + "epoch": 1.425707077413358, + "grad_norm": 1.2672381227374172, + "learning_rate": 4.18448394997497e-06, + "loss": 0.2500985562801361, + "step": 5369 + }, + { + "epoch": 1.425972646394901, + "grad_norm": 1.2421534737421158, + "learning_rate": 4.1809125812296635e-06, + "loss": 0.23475977778434753, + "step": 5370 + }, + { + "epoch": 1.426238215376444, + "grad_norm": 1.3107626948919207, + "learning_rate": 4.177342334335782e-06, + "loss": 0.22925345599651337, + "step": 5371 + }, + { + "epoch": 1.4265037843579869, + "grad_norm": 1.1701714137905739, + "learning_rate": 4.173773209981627e-06, + "loss": 0.24463894963264465, + "step": 5372 + }, + { + "epoch": 1.4267693533395298, + "grad_norm": 1.2600839330793319, + "learning_rate": 4.170205208855281e-06, + "loss": 0.2451590746641159, + "step": 5373 + }, + { + "epoch": 1.4270349223210728, + "grad_norm": 1.192456234510782, + "learning_rate": 4.166638331644613e-06, + "loss": 0.21078437566757202, + "step": 5374 + }, + { + "epoch": 1.427300491302616, + "grad_norm": 1.1548728286132999, + "learning_rate": 4.163072579037279e-06, + "loss": 0.21466529369354248, + "step": 5375 + }, + { + "epoch": 1.4275660602841589, + "grad_norm": 1.3327200015078104, + "learning_rate": 4.159507951720713e-06, + "loss": 0.20103147625923157, + "step": 5376 + }, + { + "epoch": 1.4278316292657018, + "grad_norm": 1.2634022835060015, + "learning_rate": 4.15594445038213e-06, + "loss": 0.2618871331214905, + "step": 5377 + }, + { + "epoch": 1.4280971982472448, + "grad_norm": 1.314150540124243, + "learning_rate": 4.152382075708534e-06, + "loss": 0.2496388852596283, + "step": 5378 + }, + { + "epoch": 1.4283627672287877, + "grad_norm": 1.2776066314767451, + "learning_rate": 4.148820828386707e-06, + "loss": 0.2663899064064026, + "step": 5379 + }, + { + "epoch": 1.4286283362103307, + "grad_norm": 1.223751737565641, + "learning_rate": 4.145260709103216e-06, + "loss": 0.23617541790008545, + "step": 5380 + }, + { + "epoch": 1.4288939051918736, + "grad_norm": 1.2184450229688006, + "learning_rate": 4.141701718544411e-06, + "loss": 0.200006365776062, + "step": 5381 + }, + { + "epoch": 1.4291594741734166, + "grad_norm": 1.2899877428495155, + "learning_rate": 4.138143857396425e-06, + "loss": 0.22707203030586243, + "step": 5382 + }, + { + "epoch": 1.4294250431549596, + "grad_norm": 1.210998695531734, + "learning_rate": 4.134587126345162e-06, + "loss": 0.23903624713420868, + "step": 5383 + }, + { + "epoch": 1.4296906121365025, + "grad_norm": 1.56990305006701, + "learning_rate": 4.131031526076329e-06, + "loss": 0.2308908998966217, + "step": 5384 + }, + { + "epoch": 1.4299561811180455, + "grad_norm": 1.2125776866133393, + "learning_rate": 4.127477057275398e-06, + "loss": 0.18762601912021637, + "step": 5385 + }, + { + "epoch": 1.4302217500995884, + "grad_norm": 1.3670823879917342, + "learning_rate": 4.123923720627633e-06, + "loss": 0.281406044960022, + "step": 5386 + }, + { + "epoch": 1.4304873190811314, + "grad_norm": 1.24677960623226, + "learning_rate": 4.120371516818071e-06, + "loss": 0.24858589470386505, + "step": 5387 + }, + { + "epoch": 1.4307528880626743, + "grad_norm": 1.2017896897650255, + "learning_rate": 4.116820446531538e-06, + "loss": 0.22179371118545532, + "step": 5388 + }, + { + "epoch": 1.4310184570442173, + "grad_norm": 1.1523445225939053, + "learning_rate": 4.113270510452636e-06, + "loss": 0.22086869180202484, + "step": 5389 + }, + { + "epoch": 1.4312840260257602, + "grad_norm": 1.295626323300653, + "learning_rate": 4.109721709265753e-06, + "loss": 0.231503427028656, + "step": 5390 + }, + { + "epoch": 1.4315495950073032, + "grad_norm": 1.31237620612278, + "learning_rate": 4.106174043655054e-06, + "loss": 0.255252867937088, + "step": 5391 + }, + { + "epoch": 1.4318151639888461, + "grad_norm": 1.2773394357808008, + "learning_rate": 4.1026275143044854e-06, + "loss": 0.23336587846279144, + "step": 5392 + }, + { + "epoch": 1.432080732970389, + "grad_norm": 1.3267952754600625, + "learning_rate": 4.099082121897783e-06, + "loss": 0.2468583881855011, + "step": 5393 + }, + { + "epoch": 1.432346301951932, + "grad_norm": 1.2137255679394872, + "learning_rate": 4.095537867118452e-06, + "loss": 0.21211153268814087, + "step": 5394 + }, + { + "epoch": 1.432611870933475, + "grad_norm": 1.2552061461264346, + "learning_rate": 4.091994750649783e-06, + "loss": 0.23173204064369202, + "step": 5395 + }, + { + "epoch": 1.432877439915018, + "grad_norm": 1.2420339991667666, + "learning_rate": 4.088452773174853e-06, + "loss": 0.2606658935546875, + "step": 5396 + }, + { + "epoch": 1.4331430088965609, + "grad_norm": 1.2141954954044303, + "learning_rate": 4.084911935376502e-06, + "loss": 0.21198314428329468, + "step": 5397 + }, + { + "epoch": 1.4334085778781038, + "grad_norm": 1.273859413406427, + "learning_rate": 4.08137223793737e-06, + "loss": 0.216193288564682, + "step": 5398 + }, + { + "epoch": 1.4336741468596468, + "grad_norm": 1.3862686522767422, + "learning_rate": 4.077833681539866e-06, + "loss": 0.27767330408096313, + "step": 5399 + }, + { + "epoch": 1.4339397158411897, + "grad_norm": 1.193043888736233, + "learning_rate": 4.0742962668661826e-06, + "loss": 0.21584349870681763, + "step": 5400 + }, + { + "epoch": 1.4342052848227327, + "grad_norm": 1.2801175216615184, + "learning_rate": 4.070759994598288e-06, + "loss": 0.220070481300354, + "step": 5401 + }, + { + "epoch": 1.4344708538042756, + "grad_norm": 1.4276288870785, + "learning_rate": 4.067224865417941e-06, + "loss": 0.26035353541374207, + "step": 5402 + }, + { + "epoch": 1.4347364227858186, + "grad_norm": 1.1784144309393945, + "learning_rate": 4.063690880006671e-06, + "loss": 0.23704876005649567, + "step": 5403 + }, + { + "epoch": 1.4350019917673615, + "grad_norm": 1.2793709287846655, + "learning_rate": 4.060158039045785e-06, + "loss": 0.2345760464668274, + "step": 5404 + }, + { + "epoch": 1.4352675607489045, + "grad_norm": 1.2583985201804126, + "learning_rate": 4.056626343216377e-06, + "loss": 0.21307331323623657, + "step": 5405 + }, + { + "epoch": 1.4355331297304474, + "grad_norm": 1.2401804894465362, + "learning_rate": 4.053095793199313e-06, + "loss": 0.22029465436935425, + "step": 5406 + }, + { + "epoch": 1.4357986987119904, + "grad_norm": 1.3865770800537958, + "learning_rate": 4.049566389675244e-06, + "loss": 0.23419252038002014, + "step": 5407 + }, + { + "epoch": 1.4360642676935333, + "grad_norm": 1.2114754283066453, + "learning_rate": 4.046038133324595e-06, + "loss": 0.21648669242858887, + "step": 5408 + }, + { + "epoch": 1.4363298366750763, + "grad_norm": 1.3682353450989566, + "learning_rate": 4.042511024827573e-06, + "loss": 0.2343464195728302, + "step": 5409 + }, + { + "epoch": 1.4365954056566193, + "grad_norm": 1.28417678054491, + "learning_rate": 4.0389850648641615e-06, + "loss": 0.20108605921268463, + "step": 5410 + }, + { + "epoch": 1.4368609746381622, + "grad_norm": 1.2806759093192033, + "learning_rate": 4.0354602541141315e-06, + "loss": 0.21885806322097778, + "step": 5411 + }, + { + "epoch": 1.4371265436197052, + "grad_norm": 1.276580988371958, + "learning_rate": 4.031936593257017e-06, + "loss": 0.2382376492023468, + "step": 5412 + }, + { + "epoch": 1.437392112601248, + "grad_norm": 1.1333519329501958, + "learning_rate": 4.028414082972141e-06, + "loss": 0.21434128284454346, + "step": 5413 + }, + { + "epoch": 1.437657681582791, + "grad_norm": 1.2161992893188567, + "learning_rate": 4.024892723938601e-06, + "loss": 0.2345191240310669, + "step": 5414 + }, + { + "epoch": 1.437923250564334, + "grad_norm": 1.309666461481554, + "learning_rate": 4.021372516835273e-06, + "loss": 0.2478899210691452, + "step": 5415 + }, + { + "epoch": 1.438188819545877, + "grad_norm": 1.2593045594203824, + "learning_rate": 4.017853462340813e-06, + "loss": 0.21356827020645142, + "step": 5416 + }, + { + "epoch": 1.4384543885274201, + "grad_norm": 1.3891493537034765, + "learning_rate": 4.014335561133652e-06, + "loss": 0.26329827308654785, + "step": 5417 + }, + { + "epoch": 1.438719957508963, + "grad_norm": 1.3689872343615141, + "learning_rate": 4.010818813892e-06, + "loss": 0.25880998373031616, + "step": 5418 + }, + { + "epoch": 1.438985526490506, + "grad_norm": 1.2738388972586026, + "learning_rate": 4.007303221293844e-06, + "loss": 0.22749441862106323, + "step": 5419 + }, + { + "epoch": 1.439251095472049, + "grad_norm": 1.2267331489472144, + "learning_rate": 4.00378878401695e-06, + "loss": 0.2242615520954132, + "step": 5420 + }, + { + "epoch": 1.439516664453592, + "grad_norm": 1.168704950265394, + "learning_rate": 4.000275502738862e-06, + "loss": 0.19751839339733124, + "step": 5421 + }, + { + "epoch": 1.439782233435135, + "grad_norm": 1.4000090999513362, + "learning_rate": 3.996763378136895e-06, + "loss": 0.27319905161857605, + "step": 5422 + }, + { + "epoch": 1.4400478024166778, + "grad_norm": 1.1483039760635705, + "learning_rate": 3.993252410888149e-06, + "loss": 0.21676769852638245, + "step": 5423 + }, + { + "epoch": 1.4403133713982208, + "grad_norm": 1.222649759682682, + "learning_rate": 3.989742601669494e-06, + "loss": 0.22788718342781067, + "step": 5424 + }, + { + "epoch": 1.4405789403797638, + "grad_norm": 1.1800102666876688, + "learning_rate": 3.986233951157581e-06, + "loss": 0.23224875330924988, + "step": 5425 + }, + { + "epoch": 1.4408445093613067, + "grad_norm": 1.3242271211713557, + "learning_rate": 3.982726460028836e-06, + "loss": 0.23625247180461884, + "step": 5426 + }, + { + "epoch": 1.4411100783428497, + "grad_norm": 1.237043381628487, + "learning_rate": 3.979220128959463e-06, + "loss": 0.2092093527317047, + "step": 5427 + }, + { + "epoch": 1.4413756473243926, + "grad_norm": 1.164989095324882, + "learning_rate": 3.975714958625442e-06, + "loss": 0.22196070849895477, + "step": 5428 + }, + { + "epoch": 1.4416412163059356, + "grad_norm": 1.248575755705502, + "learning_rate": 3.972210949702525e-06, + "loss": 0.21276375651359558, + "step": 5429 + }, + { + "epoch": 1.4419067852874785, + "grad_norm": 1.2714203744447936, + "learning_rate": 3.968708102866247e-06, + "loss": 0.22150103747844696, + "step": 5430 + }, + { + "epoch": 1.4421723542690215, + "grad_norm": 1.2519929176778726, + "learning_rate": 3.965206418791914e-06, + "loss": 0.24529573321342468, + "step": 5431 + }, + { + "epoch": 1.4424379232505644, + "grad_norm": 1.3331662749929607, + "learning_rate": 3.961705898154609e-06, + "loss": 0.24349135160446167, + "step": 5432 + }, + { + "epoch": 1.4427034922321074, + "grad_norm": 1.3094668545917496, + "learning_rate": 3.9582065416291926e-06, + "loss": 0.23481428623199463, + "step": 5433 + }, + { + "epoch": 1.4429690612136503, + "grad_norm": 1.2664431166747565, + "learning_rate": 3.954708349890299e-06, + "loss": 0.2366936057806015, + "step": 5434 + }, + { + "epoch": 1.4432346301951933, + "grad_norm": 1.2699903819491114, + "learning_rate": 3.951211323612336e-06, + "loss": 0.24792322516441345, + "step": 5435 + }, + { + "epoch": 1.4435001991767362, + "grad_norm": 1.1943208090894295, + "learning_rate": 3.947715463469493e-06, + "loss": 0.22601652145385742, + "step": 5436 + }, + { + "epoch": 1.4437657681582792, + "grad_norm": 1.1333130191791405, + "learning_rate": 3.9442207701357235e-06, + "loss": 0.19603165984153748, + "step": 5437 + }, + { + "epoch": 1.4440313371398221, + "grad_norm": 1.26512939224431, + "learning_rate": 3.940727244284772e-06, + "loss": 0.22619353234767914, + "step": 5438 + }, + { + "epoch": 1.444296906121365, + "grad_norm": 1.3207139711857465, + "learning_rate": 3.937234886590146e-06, + "loss": 0.24836638569831848, + "step": 5439 + }, + { + "epoch": 1.444562475102908, + "grad_norm": 1.2114237797025103, + "learning_rate": 3.933743697725129e-06, + "loss": 0.21585768461227417, + "step": 5440 + }, + { + "epoch": 1.444828044084451, + "grad_norm": 1.2037953387653635, + "learning_rate": 3.930253678362784e-06, + "loss": 0.20876167714595795, + "step": 5441 + }, + { + "epoch": 1.445093613065994, + "grad_norm": 1.2825218153573943, + "learning_rate": 3.926764829175943e-06, + "loss": 0.24337999522686005, + "step": 5442 + }, + { + "epoch": 1.4453591820475369, + "grad_norm": 1.2238662957767994, + "learning_rate": 3.9232771508372155e-06, + "loss": 0.2511219084262848, + "step": 5443 + }, + { + "epoch": 1.4456247510290798, + "grad_norm": 1.2796769482653771, + "learning_rate": 3.919790644018986e-06, + "loss": 0.26257213950157166, + "step": 5444 + }, + { + "epoch": 1.4458903200106228, + "grad_norm": 1.3570371082898334, + "learning_rate": 3.91630530939341e-06, + "loss": 0.2720959782600403, + "step": 5445 + }, + { + "epoch": 1.4461558889921657, + "grad_norm": 1.2897968589877258, + "learning_rate": 3.912821147632421e-06, + "loss": 0.23849177360534668, + "step": 5446 + }, + { + "epoch": 1.4464214579737087, + "grad_norm": 1.2539273982781811, + "learning_rate": 3.909338159407722e-06, + "loss": 0.2366214245557785, + "step": 5447 + }, + { + "epoch": 1.4466870269552516, + "grad_norm": 1.21348130376658, + "learning_rate": 3.905856345390793e-06, + "loss": 0.21905584633350372, + "step": 5448 + }, + { + "epoch": 1.4469525959367946, + "grad_norm": 1.3001423574977207, + "learning_rate": 3.902375706252887e-06, + "loss": 0.23964065313339233, + "step": 5449 + }, + { + "epoch": 1.4472181649183375, + "grad_norm": 1.2161208716702177, + "learning_rate": 3.89889624266503e-06, + "loss": 0.22246500849723816, + "step": 5450 + }, + { + "epoch": 1.4474837338998805, + "grad_norm": 1.2845367508241097, + "learning_rate": 3.895417955298022e-06, + "loss": 0.22980710864067078, + "step": 5451 + }, + { + "epoch": 1.4477493028814234, + "grad_norm": 1.4690832477509688, + "learning_rate": 3.8919408448224346e-06, + "loss": 0.21276253461837769, + "step": 5452 + }, + { + "epoch": 1.4480148718629664, + "grad_norm": 1.3515036942552143, + "learning_rate": 3.888464911908616e-06, + "loss": 0.23925542831420898, + "step": 5453 + }, + { + "epoch": 1.4482804408445094, + "grad_norm": 1.1871457723177183, + "learning_rate": 3.884990157226683e-06, + "loss": 0.21528369188308716, + "step": 5454 + }, + { + "epoch": 1.4485460098260523, + "grad_norm": 1.2673056278722348, + "learning_rate": 3.8815165814465235e-06, + "loss": 0.24563542008399963, + "step": 5455 + }, + { + "epoch": 1.4488115788075953, + "grad_norm": 1.2561210989748839, + "learning_rate": 3.87804418523781e-06, + "loss": 0.2721150517463684, + "step": 5456 + }, + { + "epoch": 1.4490771477891382, + "grad_norm": 1.3721328159682122, + "learning_rate": 3.874572969269976e-06, + "loss": 0.23716527223587036, + "step": 5457 + }, + { + "epoch": 1.4493427167706812, + "grad_norm": 1.5185790933002854, + "learning_rate": 3.871102934212231e-06, + "loss": 0.2182254046201706, + "step": 5458 + }, + { + "epoch": 1.4496082857522241, + "grad_norm": 1.233204842662738, + "learning_rate": 3.867634080733557e-06, + "loss": 0.2179020643234253, + "step": 5459 + }, + { + "epoch": 1.449873854733767, + "grad_norm": 1.2633976965193632, + "learning_rate": 3.864166409502706e-06, + "loss": 0.22901684045791626, + "step": 5460 + }, + { + "epoch": 1.45013942371531, + "grad_norm": 1.209132482684757, + "learning_rate": 3.860699921188211e-06, + "loss": 0.2287352979183197, + "step": 5461 + }, + { + "epoch": 1.450404992696853, + "grad_norm": 1.214494370780124, + "learning_rate": 3.85723461645836e-06, + "loss": 0.2448873668909073, + "step": 5462 + }, + { + "epoch": 1.450670561678396, + "grad_norm": 1.323933009108344, + "learning_rate": 3.85377049598123e-06, + "loss": 0.2693510055541992, + "step": 5463 + }, + { + "epoch": 1.4509361306599389, + "grad_norm": 1.1826355120377283, + "learning_rate": 3.8503075604246554e-06, + "loss": 0.25414884090423584, + "step": 5464 + }, + { + "epoch": 1.4512016996414818, + "grad_norm": 1.3400776704302024, + "learning_rate": 3.846845810456258e-06, + "loss": 0.27798837423324585, + "step": 5465 + }, + { + "epoch": 1.4514672686230248, + "grad_norm": 1.3109571985733361, + "learning_rate": 3.8433852467434175e-06, + "loss": 0.23348593711853027, + "step": 5466 + }, + { + "epoch": 1.4517328376045677, + "grad_norm": 1.148921292979252, + "learning_rate": 3.839925869953292e-06, + "loss": 0.20993635058403015, + "step": 5467 + }, + { + "epoch": 1.4519984065861107, + "grad_norm": 1.1967150813107374, + "learning_rate": 3.836467680752808e-06, + "loss": 0.225263774394989, + "step": 5468 + }, + { + "epoch": 1.4522639755676536, + "grad_norm": 4.549069881323283, + "learning_rate": 3.833010679808662e-06, + "loss": 0.2481595277786255, + "step": 5469 + }, + { + "epoch": 1.4525295445491966, + "grad_norm": 1.098861894900169, + "learning_rate": 3.829554867787324e-06, + "loss": 0.20755310356616974, + "step": 5470 + }, + { + "epoch": 1.4527951135307395, + "grad_norm": 1.3031978879220207, + "learning_rate": 3.826100245355034e-06, + "loss": 0.22124455869197845, + "step": 5471 + }, + { + "epoch": 1.4530606825122825, + "grad_norm": 1.1779333046553406, + "learning_rate": 3.822646813177803e-06, + "loss": 0.23461398482322693, + "step": 5472 + }, + { + "epoch": 1.4533262514938254, + "grad_norm": 1.123494857736561, + "learning_rate": 3.819194571921407e-06, + "loss": 0.22890526056289673, + "step": 5473 + }, + { + "epoch": 1.4535918204753684, + "grad_norm": 1.1163449125196687, + "learning_rate": 3.815743522251406e-06, + "loss": 0.23236533999443054, + "step": 5474 + }, + { + "epoch": 1.4538573894569113, + "grad_norm": 1.204733497516731, + "learning_rate": 3.8122936648331164e-06, + "loss": 0.2192365825176239, + "step": 5475 + }, + { + "epoch": 1.4541229584384543, + "grad_norm": 1.3061324350348682, + "learning_rate": 3.8088450003316346e-06, + "loss": 0.23970162868499756, + "step": 5476 + }, + { + "epoch": 1.4543885274199972, + "grad_norm": 1.256131451943752, + "learning_rate": 3.8053975294118163e-06, + "loss": 0.24270984530448914, + "step": 5477 + }, + { + "epoch": 1.4546540964015402, + "grad_norm": 1.1616491435133687, + "learning_rate": 3.801951252738295e-06, + "loss": 0.22228944301605225, + "step": 5478 + }, + { + "epoch": 1.4549196653830831, + "grad_norm": 1.2998939083384287, + "learning_rate": 3.7985061709754735e-06, + "loss": 0.25029584765434265, + "step": 5479 + }, + { + "epoch": 1.455185234364626, + "grad_norm": 1.1546196330858232, + "learning_rate": 3.795062284787522e-06, + "loss": 0.23831725120544434, + "step": 5480 + }, + { + "epoch": 1.455450803346169, + "grad_norm": 1.2698177511587796, + "learning_rate": 3.7916195948383817e-06, + "loss": 0.2571605145931244, + "step": 5481 + }, + { + "epoch": 1.455716372327712, + "grad_norm": 1.4321109332673951, + "learning_rate": 3.7881781017917586e-06, + "loss": 0.2660857141017914, + "step": 5482 + }, + { + "epoch": 1.455981941309255, + "grad_norm": 1.3406733437493707, + "learning_rate": 3.7847378063111394e-06, + "loss": 0.2468302845954895, + "step": 5483 + }, + { + "epoch": 1.456247510290798, + "grad_norm": 1.363296358111954, + "learning_rate": 3.7812987090597696e-06, + "loss": 0.2559482753276825, + "step": 5484 + }, + { + "epoch": 1.4565130792723409, + "grad_norm": 1.2144737578388247, + "learning_rate": 3.7778608107006654e-06, + "loss": 0.24484393000602722, + "step": 5485 + }, + { + "epoch": 1.4567786482538838, + "grad_norm": 1.1782087302857855, + "learning_rate": 3.774424111896614e-06, + "loss": 0.2376541644334793, + "step": 5486 + }, + { + "epoch": 1.4570442172354268, + "grad_norm": 1.1748479481028287, + "learning_rate": 3.770988613310169e-06, + "loss": 0.22265875339508057, + "step": 5487 + }, + { + "epoch": 1.45730978621697, + "grad_norm": 1.2316185421612622, + "learning_rate": 3.7675543156036555e-06, + "loss": 0.2511552572250366, + "step": 5488 + }, + { + "epoch": 1.457575355198513, + "grad_norm": 1.2601957381413438, + "learning_rate": 3.764121219439165e-06, + "loss": 0.2412843108177185, + "step": 5489 + }, + { + "epoch": 1.4578409241800558, + "grad_norm": 1.2622123015546969, + "learning_rate": 3.760689325478559e-06, + "loss": 0.26342809200286865, + "step": 5490 + }, + { + "epoch": 1.4581064931615988, + "grad_norm": 1.2994089172948287, + "learning_rate": 3.7572586343834638e-06, + "loss": 0.23315641283988953, + "step": 5491 + }, + { + "epoch": 1.4583720621431417, + "grad_norm": 1.0927170518216454, + "learning_rate": 3.753829146815279e-06, + "loss": 0.24148929119110107, + "step": 5492 + }, + { + "epoch": 1.4586376311246847, + "grad_norm": 1.363697618202234, + "learning_rate": 3.750400863435166e-06, + "loss": 0.22838115692138672, + "step": 5493 + }, + { + "epoch": 1.4589032001062276, + "grad_norm": 1.2083898158968958, + "learning_rate": 3.746973784904061e-06, + "loss": 0.21669608354568481, + "step": 5494 + }, + { + "epoch": 1.4591687690877706, + "grad_norm": 1.4819576271076944, + "learning_rate": 3.743547911882662e-06, + "loss": 0.25619322061538696, + "step": 5495 + }, + { + "epoch": 1.4594343380693136, + "grad_norm": 1.2058542987095502, + "learning_rate": 3.7401232450314384e-06, + "loss": 0.23629480600357056, + "step": 5496 + }, + { + "epoch": 1.4596999070508565, + "grad_norm": 1.189438722154431, + "learning_rate": 3.7366997850106245e-06, + "loss": 0.21799582242965698, + "step": 5497 + }, + { + "epoch": 1.4599654760323995, + "grad_norm": 1.372571579127378, + "learning_rate": 3.733277532480223e-06, + "loss": 0.2582590579986572, + "step": 5498 + }, + { + "epoch": 1.4602310450139424, + "grad_norm": 1.1675281771435806, + "learning_rate": 3.729856488100003e-06, + "loss": 0.23641736805438995, + "step": 5499 + }, + { + "epoch": 1.4604966139954854, + "grad_norm": 1.3024331747300109, + "learning_rate": 3.7264366525295e-06, + "loss": 0.24150417745113373, + "step": 5500 + }, + { + "epoch": 1.4607621829770283, + "grad_norm": 1.2012687985267718, + "learning_rate": 3.7230180264280245e-06, + "loss": 0.2474009394645691, + "step": 5501 + }, + { + "epoch": 1.4610277519585713, + "grad_norm": 1.3411668359609863, + "learning_rate": 3.7196006104546435e-06, + "loss": 0.269604355096817, + "step": 5502 + }, + { + "epoch": 1.4612933209401142, + "grad_norm": 1.3014753471077654, + "learning_rate": 3.716184405268194e-06, + "loss": 0.24324679374694824, + "step": 5503 + }, + { + "epoch": 1.4615588899216572, + "grad_norm": 1.1306865007600708, + "learning_rate": 3.7127694115272805e-06, + "loss": 0.2249709963798523, + "step": 5504 + }, + { + "epoch": 1.4618244589032001, + "grad_norm": 1.2915165646779034, + "learning_rate": 3.7093556298902734e-06, + "loss": 0.2560918629169464, + "step": 5505 + }, + { + "epoch": 1.462090027884743, + "grad_norm": 1.154084739271703, + "learning_rate": 3.705943061015309e-06, + "loss": 0.22693020105361938, + "step": 5506 + }, + { + "epoch": 1.462355596866286, + "grad_norm": 1.2640727525169442, + "learning_rate": 3.702531705560292e-06, + "loss": 0.2617371678352356, + "step": 5507 + }, + { + "epoch": 1.462621165847829, + "grad_norm": 1.2561844307954502, + "learning_rate": 3.6991215641828903e-06, + "loss": 0.2314397394657135, + "step": 5508 + }, + { + "epoch": 1.462886734829372, + "grad_norm": 1.1063207547372251, + "learning_rate": 3.6957126375405383e-06, + "loss": 0.23186162114143372, + "step": 5509 + }, + { + "epoch": 1.4631523038109149, + "grad_norm": 1.2602306615156422, + "learning_rate": 3.6923049262904375e-06, + "loss": 0.21775083243846893, + "step": 5510 + }, + { + "epoch": 1.4634178727924578, + "grad_norm": 1.2619669881473867, + "learning_rate": 3.688898431089556e-06, + "loss": 0.24707889556884766, + "step": 5511 + }, + { + "epoch": 1.4636834417740008, + "grad_norm": 1.0923805026421214, + "learning_rate": 3.6854931525946237e-06, + "loss": 0.1941150575876236, + "step": 5512 + }, + { + "epoch": 1.4639490107555437, + "grad_norm": 1.0123090946182933, + "learning_rate": 3.6820890914621376e-06, + "loss": 0.17808857560157776, + "step": 5513 + }, + { + "epoch": 1.4642145797370867, + "grad_norm": 1.2139965705715394, + "learning_rate": 3.678686248348363e-06, + "loss": 0.2150077074766159, + "step": 5514 + }, + { + "epoch": 1.4644801487186296, + "grad_norm": 1.4267562521267494, + "learning_rate": 3.6752846239093276e-06, + "loss": 0.2605292797088623, + "step": 5515 + }, + { + "epoch": 1.4647457177001726, + "grad_norm": 1.202920213288267, + "learning_rate": 3.671884218800822e-06, + "loss": 0.22481867671012878, + "step": 5516 + }, + { + "epoch": 1.4650112866817155, + "grad_norm": 5.588780783186036, + "learning_rate": 3.668485033678406e-06, + "loss": 0.24453294277191162, + "step": 5517 + }, + { + "epoch": 1.4652768556632585, + "grad_norm": 1.379432138271627, + "learning_rate": 3.6650870691973996e-06, + "loss": 0.2672286033630371, + "step": 5518 + }, + { + "epoch": 1.4655424246448014, + "grad_norm": 1.2625747265975353, + "learning_rate": 3.661690326012897e-06, + "loss": 0.2514987587928772, + "step": 5519 + }, + { + "epoch": 1.4658079936263444, + "grad_norm": 1.3337549906693908, + "learning_rate": 3.6582948047797438e-06, + "loss": 0.25671514868736267, + "step": 5520 + }, + { + "epoch": 1.4660735626078873, + "grad_norm": 1.3535247420304835, + "learning_rate": 3.654900506152561e-06, + "loss": 0.25485602021217346, + "step": 5521 + }, + { + "epoch": 1.4663391315894303, + "grad_norm": 1.1813027271086827, + "learning_rate": 3.6515074307857257e-06, + "loss": 0.23556292057037354, + "step": 5522 + }, + { + "epoch": 1.4666047005709733, + "grad_norm": 1.15604598759747, + "learning_rate": 3.6481155793333855e-06, + "loss": 0.23347696661949158, + "step": 5523 + }, + { + "epoch": 1.4668702695525162, + "grad_norm": 1.218328581124676, + "learning_rate": 3.6447249524494466e-06, + "loss": 0.2405884712934494, + "step": 5524 + }, + { + "epoch": 1.4671358385340592, + "grad_norm": 1.2423110513745568, + "learning_rate": 3.6413355507875845e-06, + "loss": 0.23668336868286133, + "step": 5525 + }, + { + "epoch": 1.467401407515602, + "grad_norm": 1.207526661238473, + "learning_rate": 3.6379473750012375e-06, + "loss": 0.25534945726394653, + "step": 5526 + }, + { + "epoch": 1.467666976497145, + "grad_norm": 1.267472887202726, + "learning_rate": 3.634560425743596e-06, + "loss": 0.22227410972118378, + "step": 5527 + }, + { + "epoch": 1.467932545478688, + "grad_norm": 1.4853214348875312, + "learning_rate": 3.631174703667636e-06, + "loss": 0.23395927250385284, + "step": 5528 + }, + { + "epoch": 1.468198114460231, + "grad_norm": 1.2396534638298151, + "learning_rate": 3.6277902094260785e-06, + "loss": 0.23419208824634552, + "step": 5529 + }, + { + "epoch": 1.4684636834417741, + "grad_norm": 1.3441597355302621, + "learning_rate": 3.6244069436714158e-06, + "loss": 0.22185654938220978, + "step": 5530 + }, + { + "epoch": 1.468729252423317, + "grad_norm": 1.2489989202798994, + "learning_rate": 3.621024907055901e-06, + "loss": 0.2705134153366089, + "step": 5531 + }, + { + "epoch": 1.46899482140486, + "grad_norm": 1.23195362246657, + "learning_rate": 3.617644100231551e-06, + "loss": 0.23426109552383423, + "step": 5532 + }, + { + "epoch": 1.469260390386403, + "grad_norm": 1.2477206941188708, + "learning_rate": 3.6142645238501462e-06, + "loss": 0.25527146458625793, + "step": 5533 + }, + { + "epoch": 1.469525959367946, + "grad_norm": 1.1030456616341389, + "learning_rate": 3.610886178563228e-06, + "loss": 0.1882668435573578, + "step": 5534 + }, + { + "epoch": 1.469791528349489, + "grad_norm": 1.2622509171219458, + "learning_rate": 3.607509065022101e-06, + "loss": 0.24060532450675964, + "step": 5535 + }, + { + "epoch": 1.4700570973310318, + "grad_norm": 1.2245038712856335, + "learning_rate": 3.6041331838778325e-06, + "loss": 0.23555803298950195, + "step": 5536 + }, + { + "epoch": 1.4703226663125748, + "grad_norm": 1.2192798079575136, + "learning_rate": 3.6007585357812557e-06, + "loss": 0.23126551508903503, + "step": 5537 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 1.139497037450913, + "learning_rate": 3.597385121382961e-06, + "loss": 0.24203836917877197, + "step": 5538 + }, + { + "epoch": 1.4708538042756607, + "grad_norm": 1.2467383616518404, + "learning_rate": 3.5940129413333046e-06, + "loss": 0.239767923951149, + "step": 5539 + }, + { + "epoch": 1.4711193732572037, + "grad_norm": 1.158137574546163, + "learning_rate": 3.5906419962824002e-06, + "loss": 0.24732957780361176, + "step": 5540 + }, + { + "epoch": 1.4713849422387466, + "grad_norm": 1.2722296085836442, + "learning_rate": 3.587272286880131e-06, + "loss": 0.2296421229839325, + "step": 5541 + }, + { + "epoch": 1.4716505112202896, + "grad_norm": 1.2453973567418024, + "learning_rate": 3.583903813776132e-06, + "loss": 0.2339775711297989, + "step": 5542 + }, + { + "epoch": 1.4719160802018325, + "grad_norm": 1.194940832073201, + "learning_rate": 3.5805365776198052e-06, + "loss": 0.230351984500885, + "step": 5543 + }, + { + "epoch": 1.4721816491833755, + "grad_norm": 1.2792126719917591, + "learning_rate": 3.5771705790603163e-06, + "loss": 0.2501414716243744, + "step": 5544 + }, + { + "epoch": 1.4724472181649184, + "grad_norm": 1.2327284472179139, + "learning_rate": 3.5738058187465864e-06, + "loss": 0.23387153446674347, + "step": 5545 + }, + { + "epoch": 1.4727127871464614, + "grad_norm": 1.2921618045206031, + "learning_rate": 3.570442297327307e-06, + "loss": 0.23874594271183014, + "step": 5546 + }, + { + "epoch": 1.4729783561280043, + "grad_norm": 1.2841826918754735, + "learning_rate": 3.5670800154509245e-06, + "loss": 0.21867451071739197, + "step": 5547 + }, + { + "epoch": 1.4732439251095473, + "grad_norm": 1.2937830650411482, + "learning_rate": 3.563718973765644e-06, + "loss": 0.24124100804328918, + "step": 5548 + }, + { + "epoch": 1.4735094940910902, + "grad_norm": 1.2156419794246578, + "learning_rate": 3.5603591729194377e-06, + "loss": 0.22185327112674713, + "step": 5549 + }, + { + "epoch": 1.4737750630726332, + "grad_norm": 1.1571779294098303, + "learning_rate": 3.5570006135600345e-06, + "loss": 0.21193793416023254, + "step": 5550 + }, + { + "epoch": 1.4740406320541761, + "grad_norm": 1.3939617841899903, + "learning_rate": 3.553643296334924e-06, + "loss": 0.2615143656730652, + "step": 5551 + }, + { + "epoch": 1.474306201035719, + "grad_norm": 1.1936451275051074, + "learning_rate": 3.5502872218913597e-06, + "loss": 0.24937541782855988, + "step": 5552 + }, + { + "epoch": 1.474571770017262, + "grad_norm": 1.0736225386439564, + "learning_rate": 3.5469323908763507e-06, + "loss": 0.22849224507808685, + "step": 5553 + }, + { + "epoch": 1.474837338998805, + "grad_norm": 1.6488166459783042, + "learning_rate": 3.5435788039366657e-06, + "loss": 0.2209717333316803, + "step": 5554 + }, + { + "epoch": 1.475102907980348, + "grad_norm": 1.2992665215674652, + "learning_rate": 3.5402264617188453e-06, + "loss": 0.2529235780239105, + "step": 5555 + }, + { + "epoch": 1.4753684769618909, + "grad_norm": 1.2133685762997675, + "learning_rate": 3.536875364869181e-06, + "loss": 0.2045450657606125, + "step": 5556 + }, + { + "epoch": 1.4756340459434338, + "grad_norm": 1.0591536248970717, + "learning_rate": 3.5335255140337167e-06, + "loss": 0.1973644196987152, + "step": 5557 + }, + { + "epoch": 1.4758996149249768, + "grad_norm": 1.3059187006673687, + "learning_rate": 3.5301769098582685e-06, + "loss": 0.27417299151420593, + "step": 5558 + }, + { + "epoch": 1.4761651839065197, + "grad_norm": 1.2500382678843112, + "learning_rate": 3.5268295529884077e-06, + "loss": 0.24541756510734558, + "step": 5559 + }, + { + "epoch": 1.4764307528880627, + "grad_norm": 1.4461383875060436, + "learning_rate": 3.5234834440694655e-06, + "loss": 0.25785958766937256, + "step": 5560 + }, + { + "epoch": 1.4766963218696056, + "grad_norm": 1.1676448271023605, + "learning_rate": 3.5201385837465307e-06, + "loss": 0.21099212765693665, + "step": 5561 + }, + { + "epoch": 1.4769618908511486, + "grad_norm": 1.1787333048605453, + "learning_rate": 3.5167949726644545e-06, + "loss": 0.26023173332214355, + "step": 5562 + }, + { + "epoch": 1.4772274598326915, + "grad_norm": 1.6670162101301063, + "learning_rate": 3.5134526114678426e-06, + "loss": 0.22882963716983795, + "step": 5563 + }, + { + "epoch": 1.4774930288142345, + "grad_norm": 1.312450944331431, + "learning_rate": 3.5101115008010677e-06, + "loss": 0.21987251937389374, + "step": 5564 + }, + { + "epoch": 1.4777585977957775, + "grad_norm": 1.163985983495263, + "learning_rate": 3.506771641308255e-06, + "loss": 0.2169610857963562, + "step": 5565 + }, + { + "epoch": 1.4780241667773204, + "grad_norm": 4.440133890295746, + "learning_rate": 3.50343303363329e-06, + "loss": 0.22723034024238586, + "step": 5566 + }, + { + "epoch": 1.4782897357588634, + "grad_norm": 1.2392064660120468, + "learning_rate": 3.5000956784198157e-06, + "loss": 0.23738276958465576, + "step": 5567 + }, + { + "epoch": 1.4785553047404063, + "grad_norm": 1.1818266174210303, + "learning_rate": 3.496759576311235e-06, + "loss": 0.19922251999378204, + "step": 5568 + }, + { + "epoch": 1.4788208737219493, + "grad_norm": 1.294067668946831, + "learning_rate": 3.4934247279507092e-06, + "loss": 0.22529268264770508, + "step": 5569 + }, + { + "epoch": 1.4790864427034922, + "grad_norm": 1.3551359298814187, + "learning_rate": 3.4900911339811583e-06, + "loss": 0.26758015155792236, + "step": 5570 + }, + { + "epoch": 1.4793520116850352, + "grad_norm": 1.2627897957153122, + "learning_rate": 3.48675879504526e-06, + "loss": 0.24752648174762726, + "step": 5571 + }, + { + "epoch": 1.4796175806665781, + "grad_norm": 1.3085621441307098, + "learning_rate": 3.483427711785449e-06, + "loss": 0.25337618589401245, + "step": 5572 + }, + { + "epoch": 1.479883149648121, + "grad_norm": 1.3543288061594618, + "learning_rate": 3.480097884843919e-06, + "loss": 0.24504786729812622, + "step": 5573 + }, + { + "epoch": 1.480148718629664, + "grad_norm": 1.1750849317955903, + "learning_rate": 3.4767693148626223e-06, + "loss": 0.21255145967006683, + "step": 5574 + }, + { + "epoch": 1.480414287611207, + "grad_norm": 1.2853041773936769, + "learning_rate": 3.473442002483267e-06, + "loss": 0.2501891553401947, + "step": 5575 + }, + { + "epoch": 1.48067985659275, + "grad_norm": 1.195974425335747, + "learning_rate": 3.4701159483473202e-06, + "loss": 0.25276634097099304, + "step": 5576 + }, + { + "epoch": 1.4809454255742929, + "grad_norm": 1.427206116406706, + "learning_rate": 3.4667911530960052e-06, + "loss": 0.2760567367076874, + "step": 5577 + }, + { + "epoch": 1.4812109945558358, + "grad_norm": 1.2442739080424003, + "learning_rate": 3.463467617370305e-06, + "loss": 0.22686481475830078, + "step": 5578 + }, + { + "epoch": 1.4814765635373788, + "grad_norm": 1.2374194002920247, + "learning_rate": 3.4601453418109554e-06, + "loss": 0.23262599110603333, + "step": 5579 + }, + { + "epoch": 1.4817421325189217, + "grad_norm": 1.2263890428702933, + "learning_rate": 3.4568243270584545e-06, + "loss": 0.22231365740299225, + "step": 5580 + }, + { + "epoch": 1.4820077015004647, + "grad_norm": 1.2193067799394695, + "learning_rate": 3.4535045737530504e-06, + "loss": 0.22237855195999146, + "step": 5581 + }, + { + "epoch": 1.4822732704820076, + "grad_norm": 1.208437884817879, + "learning_rate": 3.4501860825347587e-06, + "loss": 0.2260412871837616, + "step": 5582 + }, + { + "epoch": 1.4825388394635506, + "grad_norm": 1.3488909026023506, + "learning_rate": 3.4468688540433425e-06, + "loss": 0.2133496105670929, + "step": 5583 + }, + { + "epoch": 1.4828044084450935, + "grad_norm": 1.231358912436915, + "learning_rate": 3.4435528889183245e-06, + "loss": 0.24750375747680664, + "step": 5584 + }, + { + "epoch": 1.4830699774266365, + "grad_norm": 1.2053641188090713, + "learning_rate": 3.440238187798983e-06, + "loss": 0.23673412203788757, + "step": 5585 + }, + { + "epoch": 1.4833355464081794, + "grad_norm": 1.312048381493266, + "learning_rate": 3.436924751324354e-06, + "loss": 0.2505243420600891, + "step": 5586 + }, + { + "epoch": 1.4836011153897224, + "grad_norm": 1.2769153596955758, + "learning_rate": 3.433612580133229e-06, + "loss": 0.276151180267334, + "step": 5587 + }, + { + "epoch": 1.4838666843712653, + "grad_norm": 1.0245497892529305, + "learning_rate": 3.430301674864154e-06, + "loss": 0.1756816953420639, + "step": 5588 + }, + { + "epoch": 1.4841322533528083, + "grad_norm": 1.2667973514811224, + "learning_rate": 3.4269920361554342e-06, + "loss": 0.25901898741722107, + "step": 5589 + }, + { + "epoch": 1.4843978223343512, + "grad_norm": 1.2034260428652863, + "learning_rate": 3.4236836646451286e-06, + "loss": 0.21196085214614868, + "step": 5590 + }, + { + "epoch": 1.4846633913158942, + "grad_norm": 1.2887221468811698, + "learning_rate": 3.4203765609710525e-06, + "loss": 0.24153128266334534, + "step": 5591 + }, + { + "epoch": 1.4849289602974372, + "grad_norm": 1.2285562462634616, + "learning_rate": 3.4170707257707757e-06, + "loss": 0.25715887546539307, + "step": 5592 + }, + { + "epoch": 1.48519452927898, + "grad_norm": 1.430212837200284, + "learning_rate": 3.413766159681624e-06, + "loss": 0.2920379042625427, + "step": 5593 + }, + { + "epoch": 1.485460098260523, + "grad_norm": 1.2173970332611068, + "learning_rate": 3.41046286334068e-06, + "loss": 0.22127456963062286, + "step": 5594 + }, + { + "epoch": 1.485725667242066, + "grad_norm": 1.2534339617557788, + "learning_rate": 3.4071608373847786e-06, + "loss": 0.23103584349155426, + "step": 5595 + }, + { + "epoch": 1.485991236223609, + "grad_norm": 1.2999427041349472, + "learning_rate": 3.403860082450513e-06, + "loss": 0.29068222641944885, + "step": 5596 + }, + { + "epoch": 1.486256805205152, + "grad_norm": 1.2532608064541852, + "learning_rate": 3.4005605991742296e-06, + "loss": 0.23703888058662415, + "step": 5597 + }, + { + "epoch": 1.4865223741866949, + "grad_norm": 1.4039489349034764, + "learning_rate": 3.3972623881920296e-06, + "loss": 0.23348261415958405, + "step": 5598 + }, + { + "epoch": 1.4867879431682378, + "grad_norm": 1.1603139615742908, + "learning_rate": 3.3939654501397645e-06, + "loss": 0.24733223021030426, + "step": 5599 + }, + { + "epoch": 1.487053512149781, + "grad_norm": 1.1220204153088178, + "learning_rate": 3.3906697856530548e-06, + "loss": 0.22576835751533508, + "step": 5600 + }, + { + "epoch": 1.487319081131324, + "grad_norm": 1.1809335952834177, + "learning_rate": 3.3873753953672593e-06, + "loss": 0.20863527059555054, + "step": 5601 + }, + { + "epoch": 1.487584650112867, + "grad_norm": 1.1823379745083873, + "learning_rate": 3.384082279917499e-06, + "loss": 0.2299712598323822, + "step": 5602 + }, + { + "epoch": 1.4878502190944098, + "grad_norm": 1.1858521746021262, + "learning_rate": 3.380790439938648e-06, + "loss": 0.23058944940567017, + "step": 5603 + }, + { + "epoch": 1.4881157880759528, + "grad_norm": 1.1304663814123712, + "learning_rate": 3.3774998760653344e-06, + "loss": 0.20307201147079468, + "step": 5604 + }, + { + "epoch": 1.4883813570574957, + "grad_norm": 1.112411027996001, + "learning_rate": 3.3742105889319388e-06, + "loss": 0.2296266108751297, + "step": 5605 + }, + { + "epoch": 1.4886469260390387, + "grad_norm": 1.3206442060716181, + "learning_rate": 3.370922579172601e-06, + "loss": 0.22702309489250183, + "step": 5606 + }, + { + "epoch": 1.4889124950205816, + "grad_norm": 1.4590848907033545, + "learning_rate": 3.3676358474212035e-06, + "loss": 0.30432331562042236, + "step": 5607 + }, + { + "epoch": 1.4891780640021246, + "grad_norm": 1.201356120373459, + "learning_rate": 3.3643503943113907e-06, + "loss": 0.2488052248954773, + "step": 5608 + }, + { + "epoch": 1.4894436329836676, + "grad_norm": 1.2096846483257637, + "learning_rate": 3.361066220476564e-06, + "loss": 0.2221754938364029, + "step": 5609 + }, + { + "epoch": 1.4897092019652105, + "grad_norm": 1.289556223007011, + "learning_rate": 3.3577833265498728e-06, + "loss": 0.2547761797904968, + "step": 5610 + }, + { + "epoch": 1.4899747709467535, + "grad_norm": 1.3306628367975963, + "learning_rate": 3.3545017131642164e-06, + "loss": 0.21811938285827637, + "step": 5611 + }, + { + "epoch": 1.4902403399282964, + "grad_norm": 1.4022029015386877, + "learning_rate": 3.3512213809522554e-06, + "loss": 0.30436158180236816, + "step": 5612 + }, + { + "epoch": 1.4905059089098394, + "grad_norm": 1.2224150283856856, + "learning_rate": 3.3479423305463953e-06, + "loss": 0.2053622156381607, + "step": 5613 + }, + { + "epoch": 1.4907714778913823, + "grad_norm": 1.3026832238379669, + "learning_rate": 3.344664562578801e-06, + "loss": 0.2017601728439331, + "step": 5614 + }, + { + "epoch": 1.4910370468729253, + "grad_norm": 1.2856046275416113, + "learning_rate": 3.341388077681387e-06, + "loss": 0.23668046295642853, + "step": 5615 + }, + { + "epoch": 1.4913026158544682, + "grad_norm": 1.1460002150937032, + "learning_rate": 3.338112876485821e-06, + "loss": 0.20016951858997345, + "step": 5616 + }, + { + "epoch": 1.4915681848360112, + "grad_norm": 1.3606548245166536, + "learning_rate": 3.3348389596235177e-06, + "loss": 0.25477850437164307, + "step": 5617 + }, + { + "epoch": 1.4918337538175541, + "grad_norm": 1.2758175160721472, + "learning_rate": 3.3315663277256594e-06, + "loss": 0.24063366651535034, + "step": 5618 + }, + { + "epoch": 1.492099322799097, + "grad_norm": 1.2737128535751616, + "learning_rate": 3.328294981423165e-06, + "loss": 0.23443251848220825, + "step": 5619 + }, + { + "epoch": 1.49236489178064, + "grad_norm": 1.1580169148577781, + "learning_rate": 3.325024921346717e-06, + "loss": 0.21191264688968658, + "step": 5620 + }, + { + "epoch": 1.492630460762183, + "grad_norm": 1.213323558189925, + "learning_rate": 3.3217561481267367e-06, + "loss": 0.22062326967716217, + "step": 5621 + }, + { + "epoch": 1.492896029743726, + "grad_norm": 1.1757529457487401, + "learning_rate": 3.318488662393409e-06, + "loss": 0.2235480695962906, + "step": 5622 + }, + { + "epoch": 1.4931615987252689, + "grad_norm": 1.2611472240425432, + "learning_rate": 3.315222464776665e-06, + "loss": 0.26665517687797546, + "step": 5623 + }, + { + "epoch": 1.4934271677068118, + "grad_norm": 1.270220596773442, + "learning_rate": 3.3119575559061902e-06, + "loss": 0.24300602078437805, + "step": 5624 + }, + { + "epoch": 1.4936927366883548, + "grad_norm": 1.2622444254847978, + "learning_rate": 3.308693936411421e-06, + "loss": 0.25441884994506836, + "step": 5625 + }, + { + "epoch": 1.4939583056698977, + "grad_norm": 1.2781695234171213, + "learning_rate": 3.3054316069215407e-06, + "loss": 0.23236152529716492, + "step": 5626 + }, + { + "epoch": 1.4942238746514407, + "grad_norm": 1.2299113342509724, + "learning_rate": 3.3021705680654946e-06, + "loss": 0.24535568058490753, + "step": 5627 + }, + { + "epoch": 1.4944894436329836, + "grad_norm": 1.3635919919461823, + "learning_rate": 3.29891082047197e-06, + "loss": 0.2542986273765564, + "step": 5628 + }, + { + "epoch": 1.4947550126145266, + "grad_norm": 1.3442816383357798, + "learning_rate": 3.295652364769407e-06, + "loss": 0.26490268111228943, + "step": 5629 + }, + { + "epoch": 1.4950205815960695, + "grad_norm": 1.2455944135633985, + "learning_rate": 3.292395201585997e-06, + "loss": 0.25576913356781006, + "step": 5630 + }, + { + "epoch": 1.4952861505776125, + "grad_norm": 1.321982811797117, + "learning_rate": 3.2891393315496846e-06, + "loss": 0.2930823266506195, + "step": 5631 + }, + { + "epoch": 1.4955517195591554, + "grad_norm": 1.3029577245101889, + "learning_rate": 3.285884755288161e-06, + "loss": 0.2426074892282486, + "step": 5632 + }, + { + "epoch": 1.4958172885406984, + "grad_norm": 1.1912484566122454, + "learning_rate": 3.2826314734288713e-06, + "loss": 0.24090878665447235, + "step": 5633 + }, + { + "epoch": 1.4960828575222413, + "grad_norm": 1.291391881665867, + "learning_rate": 3.2793794865990092e-06, + "loss": 0.26155173778533936, + "step": 5634 + }, + { + "epoch": 1.4963484265037843, + "grad_norm": 1.2581171617638447, + "learning_rate": 3.2761287954255195e-06, + "loss": 0.2594009041786194, + "step": 5635 + }, + { + "epoch": 1.4966139954853273, + "grad_norm": 1.248912763921314, + "learning_rate": 3.2728794005350972e-06, + "loss": 0.24434763193130493, + "step": 5636 + }, + { + "epoch": 1.4968795644668702, + "grad_norm": 1.3459414061970596, + "learning_rate": 3.269631302554188e-06, + "loss": 0.2622208297252655, + "step": 5637 + }, + { + "epoch": 1.4971451334484132, + "grad_norm": 1.2222057610309294, + "learning_rate": 3.266384502108987e-06, + "loss": 0.18913154304027557, + "step": 5638 + }, + { + "epoch": 1.497410702429956, + "grad_norm": 1.260519406868159, + "learning_rate": 3.263138999825437e-06, + "loss": 0.2610907554626465, + "step": 5639 + }, + { + "epoch": 1.497676271411499, + "grad_norm": 1.2585537664404678, + "learning_rate": 3.2598947963292337e-06, + "loss": 0.25841569900512695, + "step": 5640 + }, + { + "epoch": 1.497941840393042, + "grad_norm": 1.1680179490188496, + "learning_rate": 3.256651892245822e-06, + "loss": 0.2066381573677063, + "step": 5641 + }, + { + "epoch": 1.4982074093745852, + "grad_norm": 1.1877407935219242, + "learning_rate": 3.253410288200396e-06, + "loss": 0.23956719040870667, + "step": 5642 + }, + { + "epoch": 1.4984729783561281, + "grad_norm": 1.1996406642135662, + "learning_rate": 3.250169984817897e-06, + "loss": 0.23999394476413727, + "step": 5643 + }, + { + "epoch": 1.498738547337671, + "grad_norm": 1.4056134439986134, + "learning_rate": 3.2469309827230156e-06, + "loss": 0.24273940920829773, + "step": 5644 + }, + { + "epoch": 1.499004116319214, + "grad_norm": 1.193555704549332, + "learning_rate": 3.2436932825401977e-06, + "loss": 0.2212621569633484, + "step": 5645 + }, + { + "epoch": 1.499269685300757, + "grad_norm": 1.293874995027958, + "learning_rate": 3.2404568848936325e-06, + "loss": 0.2487148940563202, + "step": 5646 + }, + { + "epoch": 1.4995352542823, + "grad_norm": 1.2610121684030642, + "learning_rate": 3.237221790407259e-06, + "loss": 0.29314422607421875, + "step": 5647 + }, + { + "epoch": 1.499800823263843, + "grad_norm": 1.1765702458871505, + "learning_rate": 3.233987999704763e-06, + "loss": 0.22727417945861816, + "step": 5648 + }, + { + "epoch": 1.5000663922453858, + "grad_norm": 1.1578089091098656, + "learning_rate": 3.230755513409585e-06, + "loss": 0.18877442181110382, + "step": 5649 + }, + { + "epoch": 1.5003319612269288, + "grad_norm": 1.2855274132536632, + "learning_rate": 3.2275243321449068e-06, + "loss": 0.2504552900791168, + "step": 5650 + }, + { + "epoch": 1.5005975302084718, + "grad_norm": 1.1905373910388852, + "learning_rate": 3.224294456533663e-06, + "loss": 0.23579174280166626, + "step": 5651 + }, + { + "epoch": 1.5008630991900147, + "grad_norm": 1.3692203179408873, + "learning_rate": 3.221065887198537e-06, + "loss": 0.29236793518066406, + "step": 5652 + }, + { + "epoch": 1.5011286681715577, + "grad_norm": 1.3245217175369617, + "learning_rate": 3.2178386247619577e-06, + "loss": 0.2735568881034851, + "step": 5653 + }, + { + "epoch": 1.5013942371531006, + "grad_norm": 1.240462888838021, + "learning_rate": 3.214612669846103e-06, + "loss": 0.2391616702079773, + "step": 5654 + }, + { + "epoch": 1.5016598061346436, + "grad_norm": 1.3766117264936455, + "learning_rate": 3.2113880230729e-06, + "loss": 0.24532485008239746, + "step": 5655 + }, + { + "epoch": 1.5019253751161865, + "grad_norm": 1.3310069624279295, + "learning_rate": 3.2081646850640215e-06, + "loss": 0.2605767250061035, + "step": 5656 + }, + { + "epoch": 1.5021909440977295, + "grad_norm": 1.2109489933208193, + "learning_rate": 3.2049426564408893e-06, + "loss": 0.2651350200176239, + "step": 5657 + }, + { + "epoch": 1.5024565130792724, + "grad_norm": 1.3305800775425032, + "learning_rate": 3.2017219378246734e-06, + "loss": 0.2719389498233795, + "step": 5658 + }, + { + "epoch": 1.5027220820608154, + "grad_norm": 1.2359239723239188, + "learning_rate": 3.198502529836288e-06, + "loss": 0.23077815771102905, + "step": 5659 + }, + { + "epoch": 1.5029876510423583, + "grad_norm": 1.0838054114896152, + "learning_rate": 3.1952844330964007e-06, + "loss": 0.21954959630966187, + "step": 5660 + }, + { + "epoch": 1.5032532200239013, + "grad_norm": 1.3480229773492907, + "learning_rate": 3.1920676482254186e-06, + "loss": 0.28229185938835144, + "step": 5661 + }, + { + "epoch": 1.5035187890054442, + "grad_norm": 1.2587796771658648, + "learning_rate": 3.1888521758435e-06, + "loss": 0.24612295627593994, + "step": 5662 + }, + { + "epoch": 1.5037843579869872, + "grad_norm": 1.2649379995915024, + "learning_rate": 3.185638016570555e-06, + "loss": 0.24191413819789886, + "step": 5663 + }, + { + "epoch": 1.5040499269685301, + "grad_norm": 1.225446339219085, + "learning_rate": 3.1824251710262323e-06, + "loss": 0.2427935004234314, + "step": 5664 + }, + { + "epoch": 1.504315495950073, + "grad_norm": 1.2595635392757376, + "learning_rate": 3.17921363982993e-06, + "loss": 0.2600318193435669, + "step": 5665 + }, + { + "epoch": 1.504581064931616, + "grad_norm": 1.2817020254494476, + "learning_rate": 3.1760034236007954e-06, + "loss": 0.25215205550193787, + "step": 5666 + }, + { + "epoch": 1.504846633913159, + "grad_norm": 1.2568573714231897, + "learning_rate": 3.1727945229577183e-06, + "loss": 0.24460548162460327, + "step": 5667 + }, + { + "epoch": 1.505112202894702, + "grad_norm": 1.2881955251422392, + "learning_rate": 3.169586938519338e-06, + "loss": 0.2812577486038208, + "step": 5668 + }, + { + "epoch": 1.5053777718762449, + "grad_norm": 1.1272225605105841, + "learning_rate": 3.166380670904039e-06, + "loss": 0.23297616839408875, + "step": 5669 + }, + { + "epoch": 1.5056433408577878, + "grad_norm": 1.1954331932042688, + "learning_rate": 3.163175720729954e-06, + "loss": 0.21659572422504425, + "step": 5670 + }, + { + "epoch": 1.5059089098393308, + "grad_norm": 1.2142230208725098, + "learning_rate": 3.1599720886149508e-06, + "loss": 0.22246181964874268, + "step": 5671 + }, + { + "epoch": 1.5061744788208737, + "grad_norm": 1.132636194795227, + "learning_rate": 3.1567697751766624e-06, + "loss": 0.20020918548107147, + "step": 5672 + }, + { + "epoch": 1.5064400478024167, + "grad_norm": 1.363041735701654, + "learning_rate": 3.1535687810324523e-06, + "loss": 0.25693628191947937, + "step": 5673 + }, + { + "epoch": 1.5067056167839596, + "grad_norm": 1.5250673507385644, + "learning_rate": 3.150369106799436e-06, + "loss": 0.21841923892498016, + "step": 5674 + }, + { + "epoch": 1.5069711857655026, + "grad_norm": 1.1710254495806258, + "learning_rate": 3.1471707530944707e-06, + "loss": 0.18131780624389648, + "step": 5675 + }, + { + "epoch": 1.5072367547470455, + "grad_norm": 1.180596749481675, + "learning_rate": 3.143973720534164e-06, + "loss": 0.22510449588298798, + "step": 5676 + }, + { + "epoch": 1.5075023237285885, + "grad_norm": 1.3952546557365002, + "learning_rate": 3.1407780097348627e-06, + "loss": 0.23721462488174438, + "step": 5677 + }, + { + "epoch": 1.5077678927101315, + "grad_norm": 1.2200574848273704, + "learning_rate": 3.1375836213126653e-06, + "loss": 0.24281899631023407, + "step": 5678 + }, + { + "epoch": 1.5080334616916744, + "grad_norm": 1.3211068465604292, + "learning_rate": 3.134390555883412e-06, + "loss": 0.23910081386566162, + "step": 5679 + }, + { + "epoch": 1.5082990306732174, + "grad_norm": 1.357027881520108, + "learning_rate": 3.1311988140626825e-06, + "loss": 0.2635132670402527, + "step": 5680 + }, + { + "epoch": 1.5085645996547603, + "grad_norm": 1.239638674575543, + "learning_rate": 3.1280083964658147e-06, + "loss": 0.24802634119987488, + "step": 5681 + }, + { + "epoch": 1.5088301686363033, + "grad_norm": 1.3861680174510138, + "learning_rate": 3.1248193037078823e-06, + "loss": 0.24081437289714813, + "step": 5682 + }, + { + "epoch": 1.5090957376178462, + "grad_norm": 1.2124748227090532, + "learning_rate": 3.121631536403701e-06, + "loss": 0.19550001621246338, + "step": 5683 + }, + { + "epoch": 1.5093613065993892, + "grad_norm": 1.309177755877421, + "learning_rate": 3.118445095167837e-06, + "loss": 0.2397807538509369, + "step": 5684 + }, + { + "epoch": 1.5096268755809321, + "grad_norm": 1.2243819490197418, + "learning_rate": 3.115259980614602e-06, + "loss": 0.2185651659965515, + "step": 5685 + }, + { + "epoch": 1.509892444562475, + "grad_norm": 1.2555724014592389, + "learning_rate": 3.1120761933580414e-06, + "loss": 0.22214055061340332, + "step": 5686 + }, + { + "epoch": 1.510158013544018, + "grad_norm": 1.4127254863789025, + "learning_rate": 3.108893734011955e-06, + "loss": 0.23971091210842133, + "step": 5687 + }, + { + "epoch": 1.510423582525561, + "grad_norm": 1.3331222718828735, + "learning_rate": 3.1057126031898843e-06, + "loss": 0.26458197832107544, + "step": 5688 + }, + { + "epoch": 1.510689151507104, + "grad_norm": 1.3487790050882777, + "learning_rate": 3.1025328015051093e-06, + "loss": 0.23730339109897614, + "step": 5689 + }, + { + "epoch": 1.5109547204886469, + "grad_norm": 1.2964784198979393, + "learning_rate": 3.0993543295706653e-06, + "loss": 0.21981677412986755, + "step": 5690 + }, + { + "epoch": 1.5112202894701898, + "grad_norm": 1.1812817656913812, + "learning_rate": 3.0961771879993206e-06, + "loss": 0.21984878182411194, + "step": 5691 + }, + { + "epoch": 1.5114858584517328, + "grad_norm": 1.2732802047873515, + "learning_rate": 3.093001377403592e-06, + "loss": 0.23086440563201904, + "step": 5692 + }, + { + "epoch": 1.5117514274332757, + "grad_norm": 2.3681680891314953, + "learning_rate": 3.0898268983957368e-06, + "loss": 0.2355024814605713, + "step": 5693 + }, + { + "epoch": 1.5120169964148187, + "grad_norm": 1.3061363772251866, + "learning_rate": 3.0866537515877584e-06, + "loss": 0.21210229396820068, + "step": 5694 + }, + { + "epoch": 1.5122825653963616, + "grad_norm": 1.3436771657394675, + "learning_rate": 3.0834819375914003e-06, + "loss": 0.2387622594833374, + "step": 5695 + }, + { + "epoch": 1.5125481343779046, + "grad_norm": 1.3482258979232278, + "learning_rate": 3.0803114570181527e-06, + "loss": 0.23822402954101562, + "step": 5696 + }, + { + "epoch": 1.5128137033594475, + "grad_norm": 1.3248058910768958, + "learning_rate": 3.0771423104792454e-06, + "loss": 0.26844173669815063, + "step": 5697 + }, + { + "epoch": 1.5130792723409905, + "grad_norm": 1.2131778927640824, + "learning_rate": 3.07397449858565e-06, + "loss": 0.23288767039775848, + "step": 5698 + }, + { + "epoch": 1.5133448413225334, + "grad_norm": 1.2716046597052009, + "learning_rate": 3.0708080219480896e-06, + "loss": 0.23273086547851562, + "step": 5699 + }, + { + "epoch": 1.5136104103040764, + "grad_norm": 1.4240236624695346, + "learning_rate": 3.067642881177023e-06, + "loss": 0.2505509555339813, + "step": 5700 + }, + { + "epoch": 1.5138759792856193, + "grad_norm": 1.1441752919653974, + "learning_rate": 3.0644790768826473e-06, + "loss": 0.22801508009433746, + "step": 5701 + }, + { + "epoch": 1.5141415482671623, + "grad_norm": 1.1462347465841034, + "learning_rate": 3.061316609674908e-06, + "loss": 0.2110593169927597, + "step": 5702 + }, + { + "epoch": 1.5144071172487052, + "grad_norm": 1.2145033288630525, + "learning_rate": 3.0581554801634927e-06, + "loss": 0.22201795876026154, + "step": 5703 + }, + { + "epoch": 1.5146726862302482, + "grad_norm": 1.2993896506173446, + "learning_rate": 3.054995688957829e-06, + "loss": 0.23104460537433624, + "step": 5704 + }, + { + "epoch": 1.5149382552117912, + "grad_norm": 1.5590161841107484, + "learning_rate": 3.0518372366670877e-06, + "loss": 0.23373261094093323, + "step": 5705 + }, + { + "epoch": 1.515203824193334, + "grad_norm": 1.368121139637646, + "learning_rate": 3.0486801239001806e-06, + "loss": 0.2404957264661789, + "step": 5706 + }, + { + "epoch": 1.515469393174877, + "grad_norm": 1.2346548477581518, + "learning_rate": 3.0455243512657606e-06, + "loss": 0.23209382593631744, + "step": 5707 + }, + { + "epoch": 1.51573496215642, + "grad_norm": 1.156984368318911, + "learning_rate": 3.042369919372228e-06, + "loss": 0.218237042427063, + "step": 5708 + }, + { + "epoch": 1.516000531137963, + "grad_norm": 12.380411974697722, + "learning_rate": 3.039216828827717e-06, + "loss": 0.25025027990341187, + "step": 5709 + }, + { + "epoch": 1.516266100119506, + "grad_norm": 1.3454644235463973, + "learning_rate": 3.036065080240106e-06, + "loss": 0.24729448556900024, + "step": 5710 + }, + { + "epoch": 1.5165316691010489, + "grad_norm": 1.246980236713752, + "learning_rate": 3.032914674217017e-06, + "loss": 0.23614796996116638, + "step": 5711 + }, + { + "epoch": 1.5167972380825918, + "grad_norm": 1.1947534591327391, + "learning_rate": 3.029765611365808e-06, + "loss": 0.2313452661037445, + "step": 5712 + }, + { + "epoch": 1.5170628070641348, + "grad_norm": 1.2169352172923076, + "learning_rate": 3.0266178922935842e-06, + "loss": 0.22152003645896912, + "step": 5713 + }, + { + "epoch": 1.5173283760456777, + "grad_norm": 1.3132034423317465, + "learning_rate": 3.0234715176071874e-06, + "loss": 0.25942179560661316, + "step": 5714 + }, + { + "epoch": 1.5175939450272207, + "grad_norm": 1.213532583392701, + "learning_rate": 3.0203264879132e-06, + "loss": 0.25030237436294556, + "step": 5715 + }, + { + "epoch": 1.5178595140087636, + "grad_norm": 1.212709044397772, + "learning_rate": 3.0171828038179497e-06, + "loss": 0.2025807797908783, + "step": 5716 + }, + { + "epoch": 1.5181250829903066, + "grad_norm": 1.3035190960753136, + "learning_rate": 3.014040465927499e-06, + "loss": 0.20455190539360046, + "step": 5717 + }, + { + "epoch": 1.5183906519718495, + "grad_norm": 1.2171025232725439, + "learning_rate": 3.010899474847655e-06, + "loss": 0.24197113513946533, + "step": 5718 + }, + { + "epoch": 1.5186562209533925, + "grad_norm": 1.243656057613246, + "learning_rate": 3.007759831183964e-06, + "loss": 0.22290384769439697, + "step": 5719 + }, + { + "epoch": 1.5189217899349357, + "grad_norm": 1.133911078511842, + "learning_rate": 3.0046215355417117e-06, + "loss": 0.23087520897388458, + "step": 5720 + }, + { + "epoch": 1.5191873589164786, + "grad_norm": 1.3329430419316783, + "learning_rate": 3.0014845885259236e-06, + "loss": 0.24425405263900757, + "step": 5721 + }, + { + "epoch": 1.5194529278980216, + "grad_norm": 1.310265396817766, + "learning_rate": 2.9983489907413675e-06, + "loss": 0.24888862669467926, + "step": 5722 + }, + { + "epoch": 1.5197184968795645, + "grad_norm": 1.3023172954247402, + "learning_rate": 2.9952147427925493e-06, + "loss": 0.23556756973266602, + "step": 5723 + }, + { + "epoch": 1.5199840658611075, + "grad_norm": 1.3924872169111115, + "learning_rate": 2.992081845283715e-06, + "loss": 0.2532619833946228, + "step": 5724 + }, + { + "epoch": 1.5202496348426504, + "grad_norm": 1.3351422936737996, + "learning_rate": 2.988950298818848e-06, + "loss": 0.2574974000453949, + "step": 5725 + }, + { + "epoch": 1.5205152038241934, + "grad_norm": 1.1244851887087242, + "learning_rate": 2.9858201040016775e-06, + "loss": 0.21997734904289246, + "step": 5726 + }, + { + "epoch": 1.5207807728057363, + "grad_norm": 1.3952335702566243, + "learning_rate": 2.982691261435666e-06, + "loss": 0.2174127697944641, + "step": 5727 + }, + { + "epoch": 1.5210463417872793, + "grad_norm": 1.4277294646697747, + "learning_rate": 2.979563771724019e-06, + "loss": 0.22455093264579773, + "step": 5728 + }, + { + "epoch": 1.5213119107688222, + "grad_norm": 1.2606427849530746, + "learning_rate": 2.976437635469678e-06, + "loss": 0.270727276802063, + "step": 5729 + }, + { + "epoch": 1.5215774797503652, + "grad_norm": 1.1901052998095392, + "learning_rate": 2.9733128532753254e-06, + "loss": 0.2233714610338211, + "step": 5730 + }, + { + "epoch": 1.5218430487319081, + "grad_norm": 1.364720864117707, + "learning_rate": 2.970189425743383e-06, + "loss": 0.23599566519260406, + "step": 5731 + }, + { + "epoch": 1.522108617713451, + "grad_norm": 1.2707197493270106, + "learning_rate": 2.967067353476011e-06, + "loss": 0.23598654568195343, + "step": 5732 + }, + { + "epoch": 1.522374186694994, + "grad_norm": 1.1793549120144597, + "learning_rate": 2.963946637075107e-06, + "loss": 0.205197274684906, + "step": 5733 + }, + { + "epoch": 1.522639755676537, + "grad_norm": 1.1887492971446227, + "learning_rate": 2.9608272771423073e-06, + "loss": 0.23581506311893463, + "step": 5734 + }, + { + "epoch": 1.52290532465808, + "grad_norm": 1.2937911951812968, + "learning_rate": 2.9577092742789915e-06, + "loss": 0.2088197022676468, + "step": 5735 + }, + { + "epoch": 1.5231708936396229, + "grad_norm": 1.2943182118738674, + "learning_rate": 2.95459262908627e-06, + "loss": 0.22607067227363586, + "step": 5736 + }, + { + "epoch": 1.5234364626211658, + "grad_norm": 1.1748118237242067, + "learning_rate": 2.951477342164998e-06, + "loss": 0.22242344915866852, + "step": 5737 + }, + { + "epoch": 1.5237020316027088, + "grad_norm": 1.3280405020263697, + "learning_rate": 2.9483634141157636e-06, + "loss": 0.25626271963119507, + "step": 5738 + }, + { + "epoch": 1.5239676005842517, + "grad_norm": 1.2212084732536523, + "learning_rate": 2.9452508455388975e-06, + "loss": 0.2241421341896057, + "step": 5739 + }, + { + "epoch": 1.5242331695657947, + "grad_norm": 1.5088982481303157, + "learning_rate": 2.9421396370344648e-06, + "loss": 0.2191103994846344, + "step": 5740 + }, + { + "epoch": 1.5244987385473376, + "grad_norm": 1.2411878451658047, + "learning_rate": 2.9390297892022703e-06, + "loss": 0.26252660155296326, + "step": 5741 + }, + { + "epoch": 1.5247643075288806, + "grad_norm": 1.3964551352557335, + "learning_rate": 2.9359213026418567e-06, + "loss": 0.21522507071495056, + "step": 5742 + }, + { + "epoch": 1.5250298765104235, + "grad_norm": 1.0905013771622027, + "learning_rate": 2.932814177952499e-06, + "loss": 0.20159044861793518, + "step": 5743 + }, + { + "epoch": 1.5252954454919665, + "grad_norm": 1.138416177249403, + "learning_rate": 2.929708415733221e-06, + "loss": 0.22679558396339417, + "step": 5744 + }, + { + "epoch": 1.5255610144735094, + "grad_norm": 1.199157018703913, + "learning_rate": 2.926604016582776e-06, + "loss": 0.2315664291381836, + "step": 5745 + }, + { + "epoch": 1.5258265834550524, + "grad_norm": 1.2568252329386058, + "learning_rate": 2.923500981099652e-06, + "loss": 0.229634091258049, + "step": 5746 + }, + { + "epoch": 1.5260921524365954, + "grad_norm": 1.2179751735416722, + "learning_rate": 2.9203993098820793e-06, + "loss": 0.20657674968242645, + "step": 5747 + }, + { + "epoch": 1.5263577214181385, + "grad_norm": 1.2447733239425043, + "learning_rate": 2.9172990035280237e-06, + "loss": 0.2306358814239502, + "step": 5748 + }, + { + "epoch": 1.5266232903996815, + "grad_norm": 1.2950411042959078, + "learning_rate": 2.9142000626351875e-06, + "loss": 0.2608031928539276, + "step": 5749 + }, + { + "epoch": 1.5268888593812244, + "grad_norm": 1.337100599856471, + "learning_rate": 2.911102487801013e-06, + "loss": 0.24675670266151428, + "step": 5750 + }, + { + "epoch": 1.5271544283627674, + "grad_norm": 1.3568337572597398, + "learning_rate": 2.908006279622667e-06, + "loss": 0.22544966638088226, + "step": 5751 + }, + { + "epoch": 1.5274199973443103, + "grad_norm": 1.3214418017258782, + "learning_rate": 2.904911438697071e-06, + "loss": 0.2328556478023529, + "step": 5752 + }, + { + "epoch": 1.5276855663258533, + "grad_norm": 1.25396823790717, + "learning_rate": 2.901817965620871e-06, + "loss": 0.2316005825996399, + "step": 5753 + }, + { + "epoch": 1.5279511353073962, + "grad_norm": 1.2976508240318196, + "learning_rate": 2.8987258609904522e-06, + "loss": 0.2332756370306015, + "step": 5754 + }, + { + "epoch": 1.5282167042889392, + "grad_norm": 1.3432276903845415, + "learning_rate": 2.8956351254019355e-06, + "loss": 0.24855142831802368, + "step": 5755 + }, + { + "epoch": 1.5284822732704821, + "grad_norm": 1.2138875439685706, + "learning_rate": 2.8925457594511775e-06, + "loss": 0.18745368719100952, + "step": 5756 + }, + { + "epoch": 1.528747842252025, + "grad_norm": 1.877743895818308, + "learning_rate": 2.889457763733774e-06, + "loss": 0.22402942180633545, + "step": 5757 + }, + { + "epoch": 1.529013411233568, + "grad_norm": 1.292567134146249, + "learning_rate": 2.886371138845051e-06, + "loss": 0.2156108319759369, + "step": 5758 + }, + { + "epoch": 1.529278980215111, + "grad_norm": 1.2848231417758293, + "learning_rate": 2.883285885380076e-06, + "loss": 0.22866520285606384, + "step": 5759 + }, + { + "epoch": 1.529544549196654, + "grad_norm": 1.2907471990668473, + "learning_rate": 2.880202003933645e-06, + "loss": 0.2486938238143921, + "step": 5760 + }, + { + "epoch": 1.529810118178197, + "grad_norm": 1.34098643692872, + "learning_rate": 2.877119495100301e-06, + "loss": 0.2565295696258545, + "step": 5761 + }, + { + "epoch": 1.5300756871597399, + "grad_norm": 1.1480290388256142, + "learning_rate": 2.8740383594743116e-06, + "loss": 0.21510455012321472, + "step": 5762 + }, + { + "epoch": 1.5303412561412828, + "grad_norm": 1.266250058472157, + "learning_rate": 2.8709585976496825e-06, + "loss": 0.2122025489807129, + "step": 5763 + }, + { + "epoch": 1.5306068251228258, + "grad_norm": 1.3017513152107745, + "learning_rate": 2.8678802102201575e-06, + "loss": 0.24274399876594543, + "step": 5764 + }, + { + "epoch": 1.5308723941043687, + "grad_norm": 1.4573413266326471, + "learning_rate": 2.864803197779216e-06, + "loss": 0.22325341403484344, + "step": 5765 + }, + { + "epoch": 1.5311379630859117, + "grad_norm": 1.3303976558080437, + "learning_rate": 2.8617275609200625e-06, + "loss": 0.25205284357070923, + "step": 5766 + }, + { + "epoch": 1.5314035320674546, + "grad_norm": 1.2638986714524767, + "learning_rate": 2.8586533002356465e-06, + "loss": 0.2047557830810547, + "step": 5767 + }, + { + "epoch": 1.5316691010489976, + "grad_norm": 1.2195584514594966, + "learning_rate": 2.8555804163186508e-06, + "loss": 0.2166992425918579, + "step": 5768 + }, + { + "epoch": 1.5319346700305405, + "grad_norm": 1.2333416807696795, + "learning_rate": 2.8525089097614867e-06, + "loss": 0.26253193616867065, + "step": 5769 + }, + { + "epoch": 1.5322002390120835, + "grad_norm": 1.2030637435961495, + "learning_rate": 2.8494387811563108e-06, + "loss": 0.23307687044143677, + "step": 5770 + }, + { + "epoch": 1.5324658079936264, + "grad_norm": 1.2191481171426857, + "learning_rate": 2.8463700310950047e-06, + "loss": 0.22128549218177795, + "step": 5771 + }, + { + "epoch": 1.5327313769751694, + "grad_norm": 1.272136705974986, + "learning_rate": 2.8433026601691883e-06, + "loss": 0.21966281533241272, + "step": 5772 + }, + { + "epoch": 1.5329969459567123, + "grad_norm": 1.341088625881783, + "learning_rate": 2.840236668970213e-06, + "loss": 0.22869305312633514, + "step": 5773 + }, + { + "epoch": 1.5332625149382553, + "grad_norm": 1.2257027323986465, + "learning_rate": 2.837172058089167e-06, + "loss": 0.21431279182434082, + "step": 5774 + }, + { + "epoch": 1.5335280839197982, + "grad_norm": 1.3512853622822856, + "learning_rate": 2.8341088281168693e-06, + "loss": 0.24610282480716705, + "step": 5775 + }, + { + "epoch": 1.5337936529013412, + "grad_norm": 1.3400303957635655, + "learning_rate": 2.8310469796438767e-06, + "loss": 0.24414925277233124, + "step": 5776 + }, + { + "epoch": 1.5340592218828841, + "grad_norm": 1.3597459613858938, + "learning_rate": 2.8279865132604766e-06, + "loss": 0.2330513596534729, + "step": 5777 + }, + { + "epoch": 1.534324790864427, + "grad_norm": 1.2551411616890042, + "learning_rate": 2.8249274295566863e-06, + "loss": 0.23048308491706848, + "step": 5778 + }, + { + "epoch": 1.53459035984597, + "grad_norm": 1.2566974883874766, + "learning_rate": 2.821869729122273e-06, + "loss": 0.2411375492811203, + "step": 5779 + }, + { + "epoch": 1.534855928827513, + "grad_norm": 1.384873838300398, + "learning_rate": 2.818813412546715e-06, + "loss": 0.22985543310642242, + "step": 5780 + }, + { + "epoch": 1.535121497809056, + "grad_norm": 1.320574666083159, + "learning_rate": 2.815758480419235e-06, + "loss": 0.20867247879505157, + "step": 5781 + }, + { + "epoch": 1.5353870667905989, + "grad_norm": 2.0414068761810182, + "learning_rate": 2.8127049333287913e-06, + "loss": 0.26378586888313293, + "step": 5782 + }, + { + "epoch": 1.5356526357721418, + "grad_norm": 1.552041032509997, + "learning_rate": 2.8096527718640687e-06, + "loss": 0.2690306305885315, + "step": 5783 + }, + { + "epoch": 1.5359182047536848, + "grad_norm": 1.1602606034579108, + "learning_rate": 2.8066019966134907e-06, + "loss": 0.22226165235042572, + "step": 5784 + }, + { + "epoch": 1.5361837737352277, + "grad_norm": 1.2201060637055436, + "learning_rate": 2.803552608165209e-06, + "loss": 0.23370322585105896, + "step": 5785 + }, + { + "epoch": 1.5364493427167707, + "grad_norm": 1.3067141176486328, + "learning_rate": 2.8005046071071107e-06, + "loss": 0.26137909293174744, + "step": 5786 + }, + { + "epoch": 1.5367149116983136, + "grad_norm": 1.3588127622676833, + "learning_rate": 2.7974579940268096e-06, + "loss": 0.22630617022514343, + "step": 5787 + }, + { + "epoch": 1.5369804806798566, + "grad_norm": 1.2356618590652273, + "learning_rate": 2.7944127695116663e-06, + "loss": 0.22641140222549438, + "step": 5788 + }, + { + "epoch": 1.5372460496613995, + "grad_norm": 1.266648551925957, + "learning_rate": 2.791368934148757e-06, + "loss": 0.19647541642189026, + "step": 5789 + }, + { + "epoch": 1.5375116186429425, + "grad_norm": 1.212906210017999, + "learning_rate": 2.788326488524901e-06, + "loss": 0.22399532794952393, + "step": 5790 + }, + { + "epoch": 1.5377771876244855, + "grad_norm": 1.2862970389756843, + "learning_rate": 2.7852854332266434e-06, + "loss": 0.22549685835838318, + "step": 5791 + }, + { + "epoch": 1.5380427566060284, + "grad_norm": 1.168406987557996, + "learning_rate": 2.7822457688402637e-06, + "loss": 0.2129821628332138, + "step": 5792 + }, + { + "epoch": 1.5383083255875714, + "grad_norm": 1.2301298306170827, + "learning_rate": 2.7792074959517755e-06, + "loss": 0.25330638885498047, + "step": 5793 + }, + { + "epoch": 1.5385738945691143, + "grad_norm": 1.3148661968254225, + "learning_rate": 2.7761706151469204e-06, + "loss": 0.2413945198059082, + "step": 5794 + }, + { + "epoch": 1.5388394635506573, + "grad_norm": 1.2551515744231165, + "learning_rate": 2.773135127011174e-06, + "loss": 0.21930523216724396, + "step": 5795 + }, + { + "epoch": 1.5391050325322002, + "grad_norm": 1.2506577052831476, + "learning_rate": 2.7701010321297416e-06, + "loss": 0.25499141216278076, + "step": 5796 + }, + { + "epoch": 1.5393706015137432, + "grad_norm": 1.1567311669751301, + "learning_rate": 2.7670683310875613e-06, + "loss": 0.19475680589675903, + "step": 5797 + }, + { + "epoch": 1.5396361704952861, + "grad_norm": 1.3159422945276043, + "learning_rate": 2.7640370244693026e-06, + "loss": 0.22155825793743134, + "step": 5798 + }, + { + "epoch": 1.539901739476829, + "grad_norm": 1.1818601031709017, + "learning_rate": 2.761007112859365e-06, + "loss": 0.2146138846874237, + "step": 5799 + }, + { + "epoch": 1.540167308458372, + "grad_norm": 1.146035478957987, + "learning_rate": 2.7579785968418804e-06, + "loss": 0.22698411345481873, + "step": 5800 + }, + { + "epoch": 1.540432877439915, + "grad_norm": 1.2904710642906891, + "learning_rate": 2.75495147700071e-06, + "loss": 0.23889532685279846, + "step": 5801 + }, + { + "epoch": 1.540698446421458, + "grad_norm": 1.2353012354195356, + "learning_rate": 2.7519257539194488e-06, + "loss": 0.2514609694480896, + "step": 5802 + }, + { + "epoch": 1.5409640154030009, + "grad_norm": 1.2405153867334813, + "learning_rate": 2.7489014281814185e-06, + "loss": 0.22332100570201874, + "step": 5803 + }, + { + "epoch": 1.5412295843845438, + "grad_norm": 1.1768236369414826, + "learning_rate": 2.745878500369673e-06, + "loss": 0.21316683292388916, + "step": 5804 + }, + { + "epoch": 1.5414951533660868, + "grad_norm": 1.2446325297163028, + "learning_rate": 2.742856971066996e-06, + "loss": 0.2228018194437027, + "step": 5805 + }, + { + "epoch": 1.5417607223476297, + "grad_norm": 1.3243067869686356, + "learning_rate": 2.7398368408559084e-06, + "loss": 0.22217239439487457, + "step": 5806 + }, + { + "epoch": 1.5420262913291727, + "grad_norm": 1.331116794742511, + "learning_rate": 2.736818110318652e-06, + "loss": 0.21147233247756958, + "step": 5807 + }, + { + "epoch": 1.5422918603107156, + "grad_norm": 1.2851526092309566, + "learning_rate": 2.7338007800372024e-06, + "loss": 0.23844698071479797, + "step": 5808 + }, + { + "epoch": 1.5425574292922586, + "grad_norm": 1.3238454632326748, + "learning_rate": 2.7307848505932653e-06, + "loss": 0.2361423820257187, + "step": 5809 + }, + { + "epoch": 1.5428229982738015, + "grad_norm": 1.1977956377916248, + "learning_rate": 2.727770322568277e-06, + "loss": 0.21585656702518463, + "step": 5810 + }, + { + "epoch": 1.5430885672553445, + "grad_norm": 1.172295737533699, + "learning_rate": 2.724757196543403e-06, + "loss": 0.233969584107399, + "step": 5811 + }, + { + "epoch": 1.5433541362368874, + "grad_norm": 1.3309852612756656, + "learning_rate": 2.7217454730995363e-06, + "loss": 0.25040164589881897, + "step": 5812 + }, + { + "epoch": 1.5436197052184304, + "grad_norm": 1.5198455877328005, + "learning_rate": 2.7187351528173046e-06, + "loss": 0.25848713517189026, + "step": 5813 + }, + { + "epoch": 1.5438852741999733, + "grad_norm": 1.409976572144199, + "learning_rate": 2.715726236277061e-06, + "loss": 0.22255051136016846, + "step": 5814 + }, + { + "epoch": 1.5441508431815163, + "grad_norm": 1.1799889920310853, + "learning_rate": 2.7127187240588883e-06, + "loss": 0.1882694661617279, + "step": 5815 + }, + { + "epoch": 1.5444164121630592, + "grad_norm": 1.178741445510241, + "learning_rate": 2.7097126167426002e-06, + "loss": 0.20070400834083557, + "step": 5816 + }, + { + "epoch": 1.5446819811446022, + "grad_norm": 1.2959554460073714, + "learning_rate": 2.706707914907739e-06, + "loss": 0.25316092371940613, + "step": 5817 + }, + { + "epoch": 1.5449475501261452, + "grad_norm": 1.334925654094324, + "learning_rate": 2.703704619133576e-06, + "loss": 0.24665585160255432, + "step": 5818 + }, + { + "epoch": 1.545213119107688, + "grad_norm": 1.290703779819622, + "learning_rate": 2.7007027299991095e-06, + "loss": 0.24172846972942352, + "step": 5819 + }, + { + "epoch": 1.545478688089231, + "grad_norm": 1.2781945872260183, + "learning_rate": 2.6977022480830708e-06, + "loss": 0.2405129075050354, + "step": 5820 + }, + { + "epoch": 1.545744257070774, + "grad_norm": 1.075296946307477, + "learning_rate": 2.694703173963914e-06, + "loss": 0.19716276228427887, + "step": 5821 + }, + { + "epoch": 1.546009826052317, + "grad_norm": 1.1434881656258093, + "learning_rate": 2.6917055082198284e-06, + "loss": 0.20343703031539917, + "step": 5822 + }, + { + "epoch": 1.54627539503386, + "grad_norm": 1.5985849963050902, + "learning_rate": 2.688709251428725e-06, + "loss": 0.24382619559764862, + "step": 5823 + }, + { + "epoch": 1.5465409640154029, + "grad_norm": 1.7314575476063523, + "learning_rate": 2.6857144041682514e-06, + "loss": 0.2962399423122406, + "step": 5824 + }, + { + "epoch": 1.5468065329969458, + "grad_norm": 1.2699118659079873, + "learning_rate": 2.6827209670157774e-06, + "loss": 0.24034687876701355, + "step": 5825 + }, + { + "epoch": 1.5470721019784888, + "grad_norm": 1.3757632125147359, + "learning_rate": 2.6797289405484016e-06, + "loss": 0.2575085163116455, + "step": 5826 + }, + { + "epoch": 1.5473376709600317, + "grad_norm": 1.556424910652697, + "learning_rate": 2.6767383253429515e-06, + "loss": 0.2586629092693329, + "step": 5827 + }, + { + "epoch": 1.5476032399415747, + "grad_norm": 1.096117045688234, + "learning_rate": 2.6737491219759815e-06, + "loss": 0.18447624146938324, + "step": 5828 + }, + { + "epoch": 1.5478688089231176, + "grad_norm": 1.3930188378643134, + "learning_rate": 2.670761331023779e-06, + "loss": 0.244853213429451, + "step": 5829 + }, + { + "epoch": 1.5481343779046606, + "grad_norm": 1.3163693020327074, + "learning_rate": 2.66777495306235e-06, + "loss": 0.24641919136047363, + "step": 5830 + }, + { + "epoch": 1.5483999468862035, + "grad_norm": 1.4086337954424433, + "learning_rate": 2.6647899886674323e-06, + "loss": 0.2364550232887268, + "step": 5831 + }, + { + "epoch": 1.5486655158677467, + "grad_norm": 1.1695450852938096, + "learning_rate": 2.6618064384144925e-06, + "loss": 0.17760278284549713, + "step": 5832 + }, + { + "epoch": 1.5489310848492897, + "grad_norm": 1.1988872335295608, + "learning_rate": 2.6588243028787274e-06, + "loss": 0.18571510910987854, + "step": 5833 + }, + { + "epoch": 1.5491966538308326, + "grad_norm": 1.2537289047953852, + "learning_rate": 2.655843582635057e-06, + "loss": 0.23693162202835083, + "step": 5834 + }, + { + "epoch": 1.5494622228123756, + "grad_norm": 1.3552352092705502, + "learning_rate": 2.652864278258126e-06, + "loss": 0.26481011509895325, + "step": 5835 + }, + { + "epoch": 1.5497277917939185, + "grad_norm": 1.4182429828127188, + "learning_rate": 2.6498863903223115e-06, + "loss": 0.23405003547668457, + "step": 5836 + }, + { + "epoch": 1.5499933607754615, + "grad_norm": 2.5576796684815686, + "learning_rate": 2.6469099194017144e-06, + "loss": 0.20662814378738403, + "step": 5837 + }, + { + "epoch": 1.5502589297570044, + "grad_norm": 1.3124069479853646, + "learning_rate": 2.6439348660701634e-06, + "loss": 0.2722313404083252, + "step": 5838 + }, + { + "epoch": 1.5505244987385474, + "grad_norm": 1.3906100112719377, + "learning_rate": 2.6409612309012134e-06, + "loss": 0.2288864552974701, + "step": 5839 + }, + { + "epoch": 1.5507900677200903, + "grad_norm": 1.322570753297788, + "learning_rate": 2.6379890144681464e-06, + "loss": 0.2286190539598465, + "step": 5840 + }, + { + "epoch": 1.5510556367016333, + "grad_norm": 1.2231420705695173, + "learning_rate": 2.6350182173439666e-06, + "loss": 0.22478938102722168, + "step": 5841 + }, + { + "epoch": 1.5513212056831762, + "grad_norm": 1.415848841276022, + "learning_rate": 2.6320488401014166e-06, + "loss": 0.2520615756511688, + "step": 5842 + }, + { + "epoch": 1.5515867746647192, + "grad_norm": 1.3741284890856262, + "learning_rate": 2.629080883312952e-06, + "loss": 0.2121289074420929, + "step": 5843 + }, + { + "epoch": 1.5518523436462621, + "grad_norm": 1.3092311759839703, + "learning_rate": 2.6261143475507656e-06, + "loss": 0.2252352237701416, + "step": 5844 + }, + { + "epoch": 1.552117912627805, + "grad_norm": 1.191285245143269, + "learning_rate": 2.6231492333867626e-06, + "loss": 0.21188892424106598, + "step": 5845 + }, + { + "epoch": 1.552383481609348, + "grad_norm": 1.1276138403597054, + "learning_rate": 2.6201855413925857e-06, + "loss": 0.21534699201583862, + "step": 5846 + }, + { + "epoch": 1.552649050590891, + "grad_norm": 1.2849885490704696, + "learning_rate": 2.6172232721395998e-06, + "loss": 0.21781614422798157, + "step": 5847 + }, + { + "epoch": 1.552914619572434, + "grad_norm": 1.3317886914724781, + "learning_rate": 2.6142624261988947e-06, + "loss": 0.2476508915424347, + "step": 5848 + }, + { + "epoch": 1.5531801885539769, + "grad_norm": 1.3439658215829489, + "learning_rate": 2.611303004141287e-06, + "loss": 0.2692151665687561, + "step": 5849 + }, + { + "epoch": 1.5534457575355198, + "grad_norm": 1.2839746536411722, + "learning_rate": 2.6083450065373163e-06, + "loss": 0.24868687987327576, + "step": 5850 + }, + { + "epoch": 1.5537113265170628, + "grad_norm": 1.2704813852574235, + "learning_rate": 2.6053884339572543e-06, + "loss": 0.24215853214263916, + "step": 5851 + }, + { + "epoch": 1.5539768954986057, + "grad_norm": 1.2100819665594098, + "learning_rate": 2.602433286971091e-06, + "loss": 0.2157444804906845, + "step": 5852 + }, + { + "epoch": 1.5542424644801487, + "grad_norm": 1.369237575424674, + "learning_rate": 2.599479566148544e-06, + "loss": 0.22152379155158997, + "step": 5853 + }, + { + "epoch": 1.5545080334616916, + "grad_norm": 1.1930490692336162, + "learning_rate": 2.596527272059055e-06, + "loss": 0.2278299182653427, + "step": 5854 + }, + { + "epoch": 1.5547736024432346, + "grad_norm": 1.406485645097326, + "learning_rate": 2.593576405271793e-06, + "loss": 0.23183950781822205, + "step": 5855 + }, + { + "epoch": 1.5550391714247775, + "grad_norm": 1.209726796816396, + "learning_rate": 2.5906269663556484e-06, + "loss": 0.22167566418647766, + "step": 5856 + }, + { + "epoch": 1.5553047404063205, + "grad_norm": 1.1790986825354977, + "learning_rate": 2.5876789558792403e-06, + "loss": 0.24111366271972656, + "step": 5857 + }, + { + "epoch": 1.5555703093878634, + "grad_norm": 1.1706391072024214, + "learning_rate": 2.5847323744109087e-06, + "loss": 0.2090388983488083, + "step": 5858 + }, + { + "epoch": 1.5558358783694064, + "grad_norm": 1.2588154614837785, + "learning_rate": 2.58178722251872e-06, + "loss": 0.2087189108133316, + "step": 5859 + }, + { + "epoch": 1.5561014473509496, + "grad_norm": 1.300626487965864, + "learning_rate": 2.578843500770465e-06, + "loss": 0.2277342677116394, + "step": 5860 + }, + { + "epoch": 1.5563670163324925, + "grad_norm": 1.3517116904487896, + "learning_rate": 2.57590120973366e-06, + "loss": 0.2204241305589676, + "step": 5861 + }, + { + "epoch": 1.5566325853140355, + "grad_norm": 1.213807933631201, + "learning_rate": 2.5729603499755416e-06, + "loss": 0.2138606607913971, + "step": 5862 + }, + { + "epoch": 1.5568981542955784, + "grad_norm": 1.4669648743657906, + "learning_rate": 2.5700209220630733e-06, + "loss": 0.21257862448692322, + "step": 5863 + }, + { + "epoch": 1.5571637232771214, + "grad_norm": 1.2314998246120414, + "learning_rate": 2.5670829265629437e-06, + "loss": 0.20991909503936768, + "step": 5864 + }, + { + "epoch": 1.5574292922586643, + "grad_norm": 1.294980658460416, + "learning_rate": 2.5641463640415633e-06, + "loss": 0.23745422065258026, + "step": 5865 + }, + { + "epoch": 1.5576948612402073, + "grad_norm": 1.2425796180120088, + "learning_rate": 2.561211235065065e-06, + "loss": 0.21482989192008972, + "step": 5866 + }, + { + "epoch": 1.5579604302217502, + "grad_norm": 1.008120888370748, + "learning_rate": 2.558277540199309e-06, + "loss": 0.17866572737693787, + "step": 5867 + }, + { + "epoch": 1.5582259992032932, + "grad_norm": 1.2966262005019353, + "learning_rate": 2.555345280009872e-06, + "loss": 0.223822683095932, + "step": 5868 + }, + { + "epoch": 1.5584915681848361, + "grad_norm": 1.339606961190666, + "learning_rate": 2.552414455062068e-06, + "loss": 0.2293519228696823, + "step": 5869 + }, + { + "epoch": 1.558757137166379, + "grad_norm": 1.3023504432012787, + "learning_rate": 2.5494850659209203e-06, + "loss": 0.2556726038455963, + "step": 5870 + }, + { + "epoch": 1.559022706147922, + "grad_norm": 1.255574464472328, + "learning_rate": 2.546557113151181e-06, + "loss": 0.26891303062438965, + "step": 5871 + }, + { + "epoch": 1.559288275129465, + "grad_norm": 1.1754509839553133, + "learning_rate": 2.5436305973173257e-06, + "loss": 0.19510813057422638, + "step": 5872 + }, + { + "epoch": 1.559553844111008, + "grad_norm": 1.2819966401856495, + "learning_rate": 2.5407055189835518e-06, + "loss": 0.22906547784805298, + "step": 5873 + }, + { + "epoch": 1.559819413092551, + "grad_norm": 1.3121165067922245, + "learning_rate": 2.5377818787137788e-06, + "loss": 0.25452786684036255, + "step": 5874 + }, + { + "epoch": 1.5600849820740939, + "grad_norm": 1.2743199898597464, + "learning_rate": 2.5348596770716503e-06, + "loss": 0.205597922205925, + "step": 5875 + }, + { + "epoch": 1.5603505510556368, + "grad_norm": 1.3020148941868286, + "learning_rate": 2.5319389146205344e-06, + "loss": 0.24009352922439575, + "step": 5876 + }, + { + "epoch": 1.5606161200371798, + "grad_norm": 1.433983972963341, + "learning_rate": 2.5290195919235173e-06, + "loss": 0.23381268978118896, + "step": 5877 + }, + { + "epoch": 1.5608816890187227, + "grad_norm": 1.1554092234943296, + "learning_rate": 2.52610170954341e-06, + "loss": 0.2267276644706726, + "step": 5878 + }, + { + "epoch": 1.5611472580002657, + "grad_norm": 1.2742422977156036, + "learning_rate": 2.5231852680427482e-06, + "loss": 0.24330289661884308, + "step": 5879 + }, + { + "epoch": 1.5614128269818086, + "grad_norm": 1.2802855767249914, + "learning_rate": 2.5202702679837852e-06, + "loss": 0.24877145886421204, + "step": 5880 + }, + { + "epoch": 1.5616783959633516, + "grad_norm": 1.1377670913842177, + "learning_rate": 2.5173567099285e-06, + "loss": 0.20410388708114624, + "step": 5881 + }, + { + "epoch": 1.5619439649448945, + "grad_norm": 1.2268765869469427, + "learning_rate": 2.514444594438591e-06, + "loss": 0.21524877846240997, + "step": 5882 + }, + { + "epoch": 1.5622095339264375, + "grad_norm": 1.1986269244208958, + "learning_rate": 2.5115339220754796e-06, + "loss": 0.18785043060779572, + "step": 5883 + }, + { + "epoch": 1.5624751029079804, + "grad_norm": 1.3539528047627718, + "learning_rate": 2.5086246934003113e-06, + "loss": 0.21200208365917206, + "step": 5884 + }, + { + "epoch": 1.5627406718895234, + "grad_norm": 1.6373531833898813, + "learning_rate": 2.5057169089739485e-06, + "loss": 0.20752021670341492, + "step": 5885 + }, + { + "epoch": 1.5630062408710663, + "grad_norm": 1.1717071963534185, + "learning_rate": 2.502810569356976e-06, + "loss": 0.21395736932754517, + "step": 5886 + }, + { + "epoch": 1.5632718098526093, + "grad_norm": 1.2664848714228343, + "learning_rate": 2.499905675109707e-06, + "loss": 0.26949262619018555, + "step": 5887 + }, + { + "epoch": 1.5635373788341522, + "grad_norm": 1.5283985889023297, + "learning_rate": 2.497002226792169e-06, + "loss": 0.2309839278459549, + "step": 5888 + }, + { + "epoch": 1.5638029478156952, + "grad_norm": 1.2596143819163301, + "learning_rate": 2.4941002249641123e-06, + "loss": 0.24415400624275208, + "step": 5889 + }, + { + "epoch": 1.5640685167972381, + "grad_norm": 1.3074402223027564, + "learning_rate": 2.4911996701850083e-06, + "loss": 0.23493322730064392, + "step": 5890 + }, + { + "epoch": 1.564334085778781, + "grad_norm": 1.260748243658743, + "learning_rate": 2.488300563014049e-06, + "loss": 0.23824438452720642, + "step": 5891 + }, + { + "epoch": 1.564599654760324, + "grad_norm": 1.2534870916273309, + "learning_rate": 2.4854029040101503e-06, + "loss": 0.2523414194583893, + "step": 5892 + }, + { + "epoch": 1.564865223741867, + "grad_norm": 1.2879106186872462, + "learning_rate": 2.482506693731944e-06, + "loss": 0.21360887587070465, + "step": 5893 + }, + { + "epoch": 1.56513079272341, + "grad_norm": 1.1951820042572139, + "learning_rate": 2.47961193273779e-06, + "loss": 0.21182934939861298, + "step": 5894 + }, + { + "epoch": 1.5653963617049529, + "grad_norm": 1.4293886797193323, + "learning_rate": 2.4767186215857542e-06, + "loss": 0.23104771971702576, + "step": 5895 + }, + { + "epoch": 1.5656619306864958, + "grad_norm": 1.2606491547398977, + "learning_rate": 2.473826760833643e-06, + "loss": 0.22297397255897522, + "step": 5896 + }, + { + "epoch": 1.5659274996680388, + "grad_norm": 1.176802218612286, + "learning_rate": 2.4709363510389684e-06, + "loss": 0.21597865223884583, + "step": 5897 + }, + { + "epoch": 1.5661930686495817, + "grad_norm": 1.4303555951561693, + "learning_rate": 2.468047392758969e-06, + "loss": 0.27620527148246765, + "step": 5898 + }, + { + "epoch": 1.5664586376311247, + "grad_norm": 1.373809252877093, + "learning_rate": 2.465159886550601e-06, + "loss": 0.25262463092803955, + "step": 5899 + }, + { + "epoch": 1.5667242066126676, + "grad_norm": 1.376719462816966, + "learning_rate": 2.462273832970542e-06, + "loss": 0.2729034125804901, + "step": 5900 + }, + { + "epoch": 1.5669897755942106, + "grad_norm": 1.3637563490895455, + "learning_rate": 2.459389232575188e-06, + "loss": 0.2313854992389679, + "step": 5901 + }, + { + "epoch": 1.5672553445757536, + "grad_norm": 1.3202318144066494, + "learning_rate": 2.456506085920658e-06, + "loss": 0.22513791918754578, + "step": 5902 + }, + { + "epoch": 1.5675209135572965, + "grad_norm": 1.3152362934287614, + "learning_rate": 2.4536243935627856e-06, + "loss": 0.2658824026584625, + "step": 5903 + }, + { + "epoch": 1.5677864825388395, + "grad_norm": 1.1721087348112986, + "learning_rate": 2.4507441560571275e-06, + "loss": 0.21781010925769806, + "step": 5904 + }, + { + "epoch": 1.5680520515203824, + "grad_norm": 1.3393030222309363, + "learning_rate": 2.4478653739589632e-06, + "loss": 0.21047937870025635, + "step": 5905 + }, + { + "epoch": 1.5683176205019254, + "grad_norm": 1.2196979825563006, + "learning_rate": 2.4449880478232858e-06, + "loss": 0.21674057841300964, + "step": 5906 + }, + { + "epoch": 1.5685831894834683, + "grad_norm": 1.200112520021674, + "learning_rate": 2.44211217820481e-06, + "loss": 0.22062627971172333, + "step": 5907 + }, + { + "epoch": 1.5688487584650113, + "grad_norm": 1.3158234051142574, + "learning_rate": 2.439237765657968e-06, + "loss": 0.22440886497497559, + "step": 5908 + }, + { + "epoch": 1.5691143274465542, + "grad_norm": 1.129873307165861, + "learning_rate": 2.4363648107369175e-06, + "loss": 0.21888123452663422, + "step": 5909 + }, + { + "epoch": 1.5693798964280972, + "grad_norm": 1.2586007199788052, + "learning_rate": 2.433493313995524e-06, + "loss": 0.23104462027549744, + "step": 5910 + }, + { + "epoch": 1.5696454654096401, + "grad_norm": 1.427902558182486, + "learning_rate": 2.4306232759873803e-06, + "loss": 0.23032237589359283, + "step": 5911 + }, + { + "epoch": 1.569911034391183, + "grad_norm": 1.3780752776280365, + "learning_rate": 2.4277546972657974e-06, + "loss": 0.2588527202606201, + "step": 5912 + }, + { + "epoch": 1.570176603372726, + "grad_norm": 1.4647042397629928, + "learning_rate": 2.424887578383799e-06, + "loss": 0.2845698893070221, + "step": 5913 + }, + { + "epoch": 1.570442172354269, + "grad_norm": 1.338246310760916, + "learning_rate": 2.4220219198941384e-06, + "loss": 0.23010894656181335, + "step": 5914 + }, + { + "epoch": 1.570707741335812, + "grad_norm": 1.3783426416349442, + "learning_rate": 2.419157722349278e-06, + "loss": 0.2623594403266907, + "step": 5915 + }, + { + "epoch": 1.5709733103173549, + "grad_norm": 1.2349976574308903, + "learning_rate": 2.416294986301401e-06, + "loss": 0.2107153981924057, + "step": 5916 + }, + { + "epoch": 1.5712388792988978, + "grad_norm": 1.3633626366853218, + "learning_rate": 2.413433712302409e-06, + "loss": 0.2115003615617752, + "step": 5917 + }, + { + "epoch": 1.5715044482804408, + "grad_norm": 1.3738602333573011, + "learning_rate": 2.410573900903921e-06, + "loss": 0.22406762838363647, + "step": 5918 + }, + { + "epoch": 1.5717700172619837, + "grad_norm": 1.3017270649216575, + "learning_rate": 2.407715552657277e-06, + "loss": 0.24878525733947754, + "step": 5919 + }, + { + "epoch": 1.5720355862435267, + "grad_norm": 1.5003273963811, + "learning_rate": 2.404858668113532e-06, + "loss": 0.24546805024147034, + "step": 5920 + }, + { + "epoch": 1.5723011552250696, + "grad_norm": 1.5650848412040055, + "learning_rate": 2.402003247823459e-06, + "loss": 0.23430263996124268, + "step": 5921 + }, + { + "epoch": 1.5725667242066126, + "grad_norm": 1.3939131226044492, + "learning_rate": 2.399149292337547e-06, + "loss": 0.26935267448425293, + "step": 5922 + }, + { + "epoch": 1.5728322931881555, + "grad_norm": 1.1554138984093538, + "learning_rate": 2.3962968022060097e-06, + "loss": 0.21104472875595093, + "step": 5923 + }, + { + "epoch": 1.5730978621696985, + "grad_norm": 1.147816084956367, + "learning_rate": 2.3934457779787755e-06, + "loss": 0.17162750661373138, + "step": 5924 + }, + { + "epoch": 1.5733634311512414, + "grad_norm": 1.2036391990293953, + "learning_rate": 2.390596220205481e-06, + "loss": 0.22233474254608154, + "step": 5925 + }, + { + "epoch": 1.5736290001327844, + "grad_norm": 1.456348691360017, + "learning_rate": 2.387748129435491e-06, + "loss": 0.2326992005109787, + "step": 5926 + }, + { + "epoch": 1.5738945691143273, + "grad_norm": 1.2656294085970974, + "learning_rate": 2.3849015062178835e-06, + "loss": 0.245779350399971, + "step": 5927 + }, + { + "epoch": 1.5741601380958703, + "grad_norm": 1.2198185109849795, + "learning_rate": 2.382056351101454e-06, + "loss": 0.24269379675388336, + "step": 5928 + }, + { + "epoch": 1.5744257070774133, + "grad_norm": 1.2241918308854736, + "learning_rate": 2.3792126646347138e-06, + "loss": 0.23644019663333893, + "step": 5929 + }, + { + "epoch": 1.5746912760589562, + "grad_norm": 1.2680435600362268, + "learning_rate": 2.376370447365893e-06, + "loss": 0.254330575466156, + "step": 5930 + }, + { + "epoch": 1.5749568450404992, + "grad_norm": 1.4146409212378834, + "learning_rate": 2.373529699842936e-06, + "loss": 0.2728506922721863, + "step": 5931 + }, + { + "epoch": 1.575222414022042, + "grad_norm": 1.3627178065769006, + "learning_rate": 2.3706904226135087e-06, + "loss": 0.23671439290046692, + "step": 5932 + }, + { + "epoch": 1.575487983003585, + "grad_norm": 1.409873356618632, + "learning_rate": 2.367852616224989e-06, + "loss": 0.24205748736858368, + "step": 5933 + }, + { + "epoch": 1.575753551985128, + "grad_norm": 1.2728197754861583, + "learning_rate": 2.3650162812244725e-06, + "loss": 0.1915436089038849, + "step": 5934 + }, + { + "epoch": 1.576019120966671, + "grad_norm": 1.2091326643578577, + "learning_rate": 2.3621814181587697e-06, + "loss": 0.23453299701213837, + "step": 5935 + }, + { + "epoch": 1.576284689948214, + "grad_norm": 1.3060415308267561, + "learning_rate": 2.3593480275744106e-06, + "loss": 0.24066327512264252, + "step": 5936 + }, + { + "epoch": 1.5765502589297569, + "grad_norm": 1.246429396187596, + "learning_rate": 2.356516110017639e-06, + "loss": 0.22510530054569244, + "step": 5937 + }, + { + "epoch": 1.5768158279112998, + "grad_norm": 1.2889494549478113, + "learning_rate": 2.3536856660344144e-06, + "loss": 0.22967353463172913, + "step": 5938 + }, + { + "epoch": 1.5770813968928428, + "grad_norm": 1.2404139099674472, + "learning_rate": 2.3508566961704127e-06, + "loss": 0.2299107313156128, + "step": 5939 + }, + { + "epoch": 1.5773469658743857, + "grad_norm": 1.2560783974284127, + "learning_rate": 2.3480292009710282e-06, + "loss": 0.23418918251991272, + "step": 5940 + }, + { + "epoch": 1.5776125348559287, + "grad_norm": 1.2857056044544095, + "learning_rate": 2.3452031809813657e-06, + "loss": 0.26528510451316833, + "step": 5941 + }, + { + "epoch": 1.5778781038374716, + "grad_norm": 1.1247059842406957, + "learning_rate": 2.342378636746251e-06, + "loss": 0.21878717839717865, + "step": 5942 + }, + { + "epoch": 1.5781436728190146, + "grad_norm": 1.1637472196421235, + "learning_rate": 2.339555568810221e-06, + "loss": 0.19697530567646027, + "step": 5943 + }, + { + "epoch": 1.5784092418005577, + "grad_norm": 1.3422665805434115, + "learning_rate": 2.3367339777175313e-06, + "loss": 0.24812257289886475, + "step": 5944 + }, + { + "epoch": 1.5786748107821007, + "grad_norm": 1.3285793357341238, + "learning_rate": 2.3339138640121504e-06, + "loss": 0.27651745080947876, + "step": 5945 + }, + { + "epoch": 1.5789403797636437, + "grad_norm": 1.308131821171991, + "learning_rate": 2.3310952282377643e-06, + "loss": 0.2651634216308594, + "step": 5946 + }, + { + "epoch": 1.5792059487451866, + "grad_norm": 1.3163549633798883, + "learning_rate": 2.328278070937772e-06, + "loss": 0.23799028992652893, + "step": 5947 + }, + { + "epoch": 1.5794715177267296, + "grad_norm": 1.4229706240812914, + "learning_rate": 2.3254623926552867e-06, + "loss": 0.2528802752494812, + "step": 5948 + }, + { + "epoch": 1.5797370867082725, + "grad_norm": 1.2071666314804592, + "learning_rate": 2.322648193933137e-06, + "loss": 0.23819346725940704, + "step": 5949 + }, + { + "epoch": 1.5800026556898155, + "grad_norm": 1.2694222057013376, + "learning_rate": 2.319835475313873e-06, + "loss": 0.2510845959186554, + "step": 5950 + }, + { + "epoch": 1.5802682246713584, + "grad_norm": 1.0731141255180743, + "learning_rate": 2.31702423733975e-06, + "loss": 0.20156612992286682, + "step": 5951 + }, + { + "epoch": 1.5805337936529014, + "grad_norm": 1.320010192923148, + "learning_rate": 2.3142144805527413e-06, + "loss": 0.23375174403190613, + "step": 5952 + }, + { + "epoch": 1.5807993626344443, + "grad_norm": 1.187058092026163, + "learning_rate": 2.311406205494535e-06, + "loss": 0.2378280758857727, + "step": 5953 + }, + { + "epoch": 1.5810649316159873, + "grad_norm": 1.4550533599389408, + "learning_rate": 2.308599412706535e-06, + "loss": 0.2087683081626892, + "step": 5954 + }, + { + "epoch": 1.5813305005975302, + "grad_norm": 1.2856302099767283, + "learning_rate": 2.3057941027298557e-06, + "loss": 0.2228693962097168, + "step": 5955 + }, + { + "epoch": 1.5815960695790732, + "grad_norm": 1.4738789364963756, + "learning_rate": 2.302990276105329e-06, + "loss": 0.22694727778434753, + "step": 5956 + }, + { + "epoch": 1.5818616385606161, + "grad_norm": 1.2486840544551192, + "learning_rate": 2.300187933373499e-06, + "loss": 0.22996942698955536, + "step": 5957 + }, + { + "epoch": 1.582127207542159, + "grad_norm": 1.331719034245123, + "learning_rate": 2.2973870750746253e-06, + "loss": 0.2440253496170044, + "step": 5958 + }, + { + "epoch": 1.582392776523702, + "grad_norm": 1.3266637203740035, + "learning_rate": 2.2945877017486782e-06, + "loss": 0.2507309019565582, + "step": 5959 + }, + { + "epoch": 1.582658345505245, + "grad_norm": 2.8683041985739677, + "learning_rate": 2.2917898139353467e-06, + "loss": 0.24790918827056885, + "step": 5960 + }, + { + "epoch": 1.582923914486788, + "grad_norm": 1.4168604850261965, + "learning_rate": 2.2889934121740287e-06, + "loss": 0.22106975317001343, + "step": 5961 + }, + { + "epoch": 1.5831894834683309, + "grad_norm": 1.5726662217531726, + "learning_rate": 2.2861984970038385e-06, + "loss": 0.2410939633846283, + "step": 5962 + }, + { + "epoch": 1.5834550524498738, + "grad_norm": 1.1559016560001114, + "learning_rate": 2.283405068963601e-06, + "loss": 0.22821484506130219, + "step": 5963 + }, + { + "epoch": 1.5837206214314168, + "grad_norm": 1.2324685594628142, + "learning_rate": 2.2806131285918588e-06, + "loss": 0.21425281465053558, + "step": 5964 + }, + { + "epoch": 1.5839861904129597, + "grad_norm": 1.2434376170807215, + "learning_rate": 2.277822676426863e-06, + "loss": 0.22428902983665466, + "step": 5965 + }, + { + "epoch": 1.5842517593945027, + "grad_norm": 1.4592375031786005, + "learning_rate": 2.27503371300658e-06, + "loss": 0.2986769676208496, + "step": 5966 + }, + { + "epoch": 1.5845173283760456, + "grad_norm": 1.4384957681975041, + "learning_rate": 2.272246238868687e-06, + "loss": 0.24697065353393555, + "step": 5967 + }, + { + "epoch": 1.5847828973575886, + "grad_norm": 1.3175254870878064, + "learning_rate": 2.269460254550583e-06, + "loss": 0.23725461959838867, + "step": 5968 + }, + { + "epoch": 1.5850484663391315, + "grad_norm": 1.5010497616053564, + "learning_rate": 2.2666757605893664e-06, + "loss": 0.2661248445510864, + "step": 5969 + }, + { + "epoch": 1.5853140353206745, + "grad_norm": 1.2390278830143426, + "learning_rate": 2.263892757521858e-06, + "loss": 0.23328733444213867, + "step": 5970 + }, + { + "epoch": 1.5855796043022174, + "grad_norm": 1.2547818797647754, + "learning_rate": 2.2611112458845873e-06, + "loss": 0.22886580228805542, + "step": 5971 + }, + { + "epoch": 1.5858451732837606, + "grad_norm": 1.1882681583888588, + "learning_rate": 2.2583312262137966e-06, + "loss": 0.25051698088645935, + "step": 5972 + }, + { + "epoch": 1.5861107422653036, + "grad_norm": 1.2988472953319592, + "learning_rate": 2.2555526990454413e-06, + "loss": 0.2400815784931183, + "step": 5973 + }, + { + "epoch": 1.5863763112468465, + "grad_norm": 1.1598677166947555, + "learning_rate": 2.2527756649151912e-06, + "loss": 0.2212347537279129, + "step": 5974 + }, + { + "epoch": 1.5866418802283895, + "grad_norm": 1.355013417523964, + "learning_rate": 2.2500001243584204e-06, + "loss": 0.3002026379108429, + "step": 5975 + }, + { + "epoch": 1.5869074492099324, + "grad_norm": 1.1899701199057289, + "learning_rate": 2.2472260779102185e-06, + "loss": 0.19813531637191772, + "step": 5976 + }, + { + "epoch": 1.5871730181914754, + "grad_norm": 1.2404972223723234, + "learning_rate": 2.2444535261053968e-06, + "loss": 0.2233983874320984, + "step": 5977 + }, + { + "epoch": 1.5874385871730183, + "grad_norm": 1.417840431772693, + "learning_rate": 2.2416824694784676e-06, + "loss": 0.26059988141059875, + "step": 5978 + }, + { + "epoch": 1.5877041561545613, + "grad_norm": 1.2961846276739968, + "learning_rate": 2.2389129085636573e-06, + "loss": 0.23058606684207916, + "step": 5979 + }, + { + "epoch": 1.5879697251361042, + "grad_norm": 1.3397298592095879, + "learning_rate": 2.236144843894904e-06, + "loss": 0.2414383739233017, + "step": 5980 + }, + { + "epoch": 1.5882352941176472, + "grad_norm": 1.2013757541083616, + "learning_rate": 2.23337827600586e-06, + "loss": 0.21688291430473328, + "step": 5981 + }, + { + "epoch": 1.5885008630991901, + "grad_norm": 1.2977536190104755, + "learning_rate": 2.2306132054298847e-06, + "loss": 0.24297408759593964, + "step": 5982 + }, + { + "epoch": 1.588766432080733, + "grad_norm": 1.449081017944755, + "learning_rate": 2.227849632700052e-06, + "loss": 0.2655821442604065, + "step": 5983 + }, + { + "epoch": 1.589032001062276, + "grad_norm": 1.2305338711146763, + "learning_rate": 2.225087558349146e-06, + "loss": 0.20545080304145813, + "step": 5984 + }, + { + "epoch": 1.589297570043819, + "grad_norm": 1.470607418959754, + "learning_rate": 2.2223269829096593e-06, + "loss": 0.24151475727558136, + "step": 5985 + }, + { + "epoch": 1.589563139025362, + "grad_norm": 1.2194062039730535, + "learning_rate": 2.2195679069138043e-06, + "loss": 0.2294519543647766, + "step": 5986 + }, + { + "epoch": 1.589828708006905, + "grad_norm": 1.3319096935394759, + "learning_rate": 2.2168103308934953e-06, + "loss": 0.2041824758052826, + "step": 5987 + }, + { + "epoch": 1.5900942769884479, + "grad_norm": 1.181577384258167, + "learning_rate": 2.21405425538036e-06, + "loss": 0.1856188029050827, + "step": 5988 + }, + { + "epoch": 1.5903598459699908, + "grad_norm": 1.2644853901124522, + "learning_rate": 2.2112996809057395e-06, + "loss": 0.24337685108184814, + "step": 5989 + }, + { + "epoch": 1.5906254149515338, + "grad_norm": 1.1714048449744126, + "learning_rate": 2.20854660800068e-06, + "loss": 0.2201787382364273, + "step": 5990 + }, + { + "epoch": 1.5908909839330767, + "grad_norm": 1.322531300676563, + "learning_rate": 2.2057950371959427e-06, + "loss": 0.23505619168281555, + "step": 5991 + }, + { + "epoch": 1.5911565529146197, + "grad_norm": 1.4085526679551708, + "learning_rate": 2.203044969021997e-06, + "loss": 0.19528049230575562, + "step": 5992 + }, + { + "epoch": 1.5914221218961626, + "grad_norm": 1.2299879902160842, + "learning_rate": 2.2002964040090256e-06, + "loss": 0.22281290590763092, + "step": 5993 + }, + { + "epoch": 1.5916876908777056, + "grad_norm": 1.310771483519368, + "learning_rate": 2.1975493426869155e-06, + "loss": 0.19606761634349823, + "step": 5994 + }, + { + "epoch": 1.5919532598592485, + "grad_norm": 1.2570005315725017, + "learning_rate": 2.1948037855852733e-06, + "loss": 0.22559323906898499, + "step": 5995 + }, + { + "epoch": 1.5922188288407915, + "grad_norm": 1.2326545276620708, + "learning_rate": 2.192059733233408e-06, + "loss": 0.20417393743991852, + "step": 5996 + }, + { + "epoch": 1.5924843978223344, + "grad_norm": 1.351064737074131, + "learning_rate": 2.18931718616034e-06, + "loss": 0.2579960525035858, + "step": 5997 + }, + { + "epoch": 1.5927499668038774, + "grad_norm": 1.2980140620122547, + "learning_rate": 2.1865761448948e-06, + "loss": 0.23339781165122986, + "step": 5998 + }, + { + "epoch": 1.5930155357854203, + "grad_norm": 1.2588476812522966, + "learning_rate": 2.1838366099652274e-06, + "loss": 0.2368197739124298, + "step": 5999 + }, + { + "epoch": 1.5932811047669633, + "grad_norm": 1.2980274155826699, + "learning_rate": 2.1810985818997743e-06, + "loss": 0.2225847840309143, + "step": 6000 + }, + { + "epoch": 1.5935466737485062, + "grad_norm": 1.3094945647641514, + "learning_rate": 2.1783620612263e-06, + "loss": 0.2426701784133911, + "step": 6001 + }, + { + "epoch": 1.5938122427300492, + "grad_norm": 1.284834767608695, + "learning_rate": 2.175627048472372e-06, + "loss": 0.23647268116474152, + "step": 6002 + }, + { + "epoch": 1.5940778117115921, + "grad_norm": 1.2525920428706867, + "learning_rate": 2.1728935441652687e-06, + "loss": 0.22843337059020996, + "step": 6003 + }, + { + "epoch": 1.594343380693135, + "grad_norm": 1.1786632019087344, + "learning_rate": 2.1701615488319785e-06, + "loss": 0.21524465084075928, + "step": 6004 + }, + { + "epoch": 1.594608949674678, + "grad_norm": 1.225831889373155, + "learning_rate": 2.167431062999197e-06, + "loss": 0.2160830795764923, + "step": 6005 + }, + { + "epoch": 1.594874518656221, + "grad_norm": 1.238709201727011, + "learning_rate": 2.1647020871933288e-06, + "loss": 0.2321595996618271, + "step": 6006 + }, + { + "epoch": 1.595140087637764, + "grad_norm": 1.164283210992047, + "learning_rate": 2.1619746219404916e-06, + "loss": 0.21255026757717133, + "step": 6007 + }, + { + "epoch": 1.5954056566193069, + "grad_norm": 1.3822319128280973, + "learning_rate": 2.1592486677665047e-06, + "loss": 0.22851255536079407, + "step": 6008 + }, + { + "epoch": 1.5956712256008498, + "grad_norm": 1.3982384304626327, + "learning_rate": 2.1565242251969022e-06, + "loss": 0.23844364285469055, + "step": 6009 + }, + { + "epoch": 1.5959367945823928, + "grad_norm": 1.3184134341650149, + "learning_rate": 2.153801294756924e-06, + "loss": 0.2592385411262512, + "step": 6010 + }, + { + "epoch": 1.5962023635639357, + "grad_norm": 1.221300094567036, + "learning_rate": 2.151079876971519e-06, + "loss": 0.22163718938827515, + "step": 6011 + }, + { + "epoch": 1.5964679325454787, + "grad_norm": 1.1840952132259899, + "learning_rate": 2.1483599723653415e-06, + "loss": 0.1960998773574829, + "step": 6012 + }, + { + "epoch": 1.5967335015270216, + "grad_norm": 1.1732770789502442, + "learning_rate": 2.145641581462762e-06, + "loss": 0.20811150968074799, + "step": 6013 + }, + { + "epoch": 1.5969990705085646, + "grad_norm": 1.2065470685478314, + "learning_rate": 2.1429247047878534e-06, + "loss": 0.23184621334075928, + "step": 6014 + }, + { + "epoch": 1.5972646394901076, + "grad_norm": 1.3338850940720004, + "learning_rate": 2.1402093428643942e-06, + "loss": 0.22043758630752563, + "step": 6015 + }, + { + "epoch": 1.5975302084716505, + "grad_norm": 1.1736165993383876, + "learning_rate": 2.137495496215878e-06, + "loss": 0.18621152639389038, + "step": 6016 + }, + { + "epoch": 1.5977957774531935, + "grad_norm": 1.332636421894691, + "learning_rate": 2.1347831653654995e-06, + "loss": 0.2422473132610321, + "step": 6017 + }, + { + "epoch": 1.5980613464347364, + "grad_norm": 1.5933227500597664, + "learning_rate": 2.132072350836164e-06, + "loss": 0.2147202491760254, + "step": 6018 + }, + { + "epoch": 1.5983269154162794, + "grad_norm": 1.5455916288717333, + "learning_rate": 2.1293630531504873e-06, + "loss": 0.23091933131217957, + "step": 6019 + }, + { + "epoch": 1.5985924843978223, + "grad_norm": 1.290869089573798, + "learning_rate": 2.1266552728307876e-06, + "loss": 0.220037579536438, + "step": 6020 + }, + { + "epoch": 1.5988580533793653, + "grad_norm": 1.3343924424387823, + "learning_rate": 2.1239490103990946e-06, + "loss": 0.25520551204681396, + "step": 6021 + }, + { + "epoch": 1.5991236223609082, + "grad_norm": 1.412222062207012, + "learning_rate": 2.1212442663771427e-06, + "loss": 0.23216915130615234, + "step": 6022 + }, + { + "epoch": 1.5993891913424512, + "grad_norm": 1.381515312381825, + "learning_rate": 2.118541041286374e-06, + "loss": 0.22098806500434875, + "step": 6023 + }, + { + "epoch": 1.5996547603239941, + "grad_norm": 1.4609594644715316, + "learning_rate": 2.11583933564794e-06, + "loss": 0.261300265789032, + "step": 6024 + }, + { + "epoch": 1.599920329305537, + "grad_norm": 1.2095539498781858, + "learning_rate": 2.113139149982698e-06, + "loss": 0.20427154004573822, + "step": 6025 + }, + { + "epoch": 1.60018589828708, + "grad_norm": 1.2158101663646808, + "learning_rate": 2.110440484811209e-06, + "loss": 0.20700547099113464, + "step": 6026 + }, + { + "epoch": 1.600451467268623, + "grad_norm": 1.4331467444820847, + "learning_rate": 2.1077433406537475e-06, + "loss": 0.2789752185344696, + "step": 6027 + }, + { + "epoch": 1.600717036250166, + "grad_norm": 1.2991321976135584, + "learning_rate": 2.1050477180302885e-06, + "loss": 0.2205841988325119, + "step": 6028 + }, + { + "epoch": 1.6009826052317089, + "grad_norm": 1.3197920849647402, + "learning_rate": 2.1023536174605184e-06, + "loss": 0.24921822547912598, + "step": 6029 + }, + { + "epoch": 1.6012481742132518, + "grad_norm": 2.014197229906981, + "learning_rate": 2.0996610394638228e-06, + "loss": 0.2516329288482666, + "step": 6030 + }, + { + "epoch": 1.6015137431947948, + "grad_norm": 1.2656936665142342, + "learning_rate": 2.096969984559306e-06, + "loss": 0.21832503378391266, + "step": 6031 + }, + { + "epoch": 1.6017793121763377, + "grad_norm": 1.530808592055088, + "learning_rate": 2.094280453265769e-06, + "loss": 0.2499273419380188, + "step": 6032 + }, + { + "epoch": 1.6020448811578807, + "grad_norm": 1.167125195859278, + "learning_rate": 2.09159244610172e-06, + "loss": 0.21701282262802124, + "step": 6033 + }, + { + "epoch": 1.6023104501394236, + "grad_norm": 1.2536801575307182, + "learning_rate": 2.0889059635853783e-06, + "loss": 0.24446213245391846, + "step": 6034 + }, + { + "epoch": 1.6025760191209666, + "grad_norm": 1.412317581200794, + "learning_rate": 2.0862210062346622e-06, + "loss": 0.27299973368644714, + "step": 6035 + }, + { + "epoch": 1.6028415881025095, + "grad_norm": 1.320945278338079, + "learning_rate": 2.0835375745672027e-06, + "loss": 0.2384832501411438, + "step": 6036 + }, + { + "epoch": 1.6031071570840525, + "grad_norm": 1.340788170535406, + "learning_rate": 2.0808556691003335e-06, + "loss": 0.2563338875770569, + "step": 6037 + }, + { + "epoch": 1.6033727260655954, + "grad_norm": 1.5240284764155023, + "learning_rate": 2.0781752903510954e-06, + "loss": 0.29148975014686584, + "step": 6038 + }, + { + "epoch": 1.6036382950471384, + "grad_norm": 1.1673304070468655, + "learning_rate": 2.0754964388362264e-06, + "loss": 0.24276503920555115, + "step": 6039 + }, + { + "epoch": 1.6039038640286813, + "grad_norm": 1.2629655044665746, + "learning_rate": 2.0728191150721866e-06, + "loss": 0.1863931119441986, + "step": 6040 + }, + { + "epoch": 1.6041694330102243, + "grad_norm": 1.1731073698012655, + "learning_rate": 2.0701433195751286e-06, + "loss": 0.21270868182182312, + "step": 6041 + }, + { + "epoch": 1.6044350019917673, + "grad_norm": 1.2780583308550695, + "learning_rate": 2.0674690528609155e-06, + "loss": 0.21542516350746155, + "step": 6042 + }, + { + "epoch": 1.6047005709733102, + "grad_norm": 1.256432235067539, + "learning_rate": 2.0647963154451124e-06, + "loss": 0.23099860548973083, + "step": 6043 + }, + { + "epoch": 1.6049661399548532, + "grad_norm": 1.1769565332020941, + "learning_rate": 2.062125107842993e-06, + "loss": 0.22757291793823242, + "step": 6044 + }, + { + "epoch": 1.605231708936396, + "grad_norm": 1.317404807729369, + "learning_rate": 2.0594554305695346e-06, + "loss": 0.2370409518480301, + "step": 6045 + }, + { + "epoch": 1.605497277917939, + "grad_norm": 1.1803781252235817, + "learning_rate": 2.0567872841394186e-06, + "loss": 0.21620309352874756, + "step": 6046 + }, + { + "epoch": 1.605762846899482, + "grad_norm": 1.2191738819977833, + "learning_rate": 2.0541206690670324e-06, + "loss": 0.22821158170700073, + "step": 6047 + }, + { + "epoch": 1.606028415881025, + "grad_norm": 1.385940331470305, + "learning_rate": 2.0514555858664663e-06, + "loss": 0.24930253624916077, + "step": 6048 + }, + { + "epoch": 1.606293984862568, + "grad_norm": 1.3966922562239508, + "learning_rate": 2.048792035051521e-06, + "loss": 0.2491561770439148, + "step": 6049 + }, + { + "epoch": 1.6065595538441109, + "grad_norm": 1.3037697337655914, + "learning_rate": 2.046130017135697e-06, + "loss": 0.20652002096176147, + "step": 6050 + }, + { + "epoch": 1.6068251228256538, + "grad_norm": 1.1970911046995705, + "learning_rate": 2.0434695326321975e-06, + "loss": 0.25670793652534485, + "step": 6051 + }, + { + "epoch": 1.6070906918071968, + "grad_norm": 1.2469219040368793, + "learning_rate": 2.0408105820539328e-06, + "loss": 0.2328418493270874, + "step": 6052 + }, + { + "epoch": 1.6073562607887397, + "grad_norm": 1.2657559287734064, + "learning_rate": 2.0381531659135213e-06, + "loss": 0.20811162889003754, + "step": 6053 + }, + { + "epoch": 1.6076218297702827, + "grad_norm": 1.2637409014709644, + "learning_rate": 2.0354972847232756e-06, + "loss": 0.24068522453308105, + "step": 6054 + }, + { + "epoch": 1.6078873987518256, + "grad_norm": 1.3537388998191249, + "learning_rate": 2.032842938995221e-06, + "loss": 0.2519197463989258, + "step": 6055 + }, + { + "epoch": 1.6081529677333686, + "grad_norm": 1.349413355425799, + "learning_rate": 2.030190129241083e-06, + "loss": 0.2293267697095871, + "step": 6056 + }, + { + "epoch": 1.6084185367149118, + "grad_norm": 1.8474927483406436, + "learning_rate": 2.027538855972291e-06, + "loss": 0.22398510575294495, + "step": 6057 + }, + { + "epoch": 1.6086841056964547, + "grad_norm": 1.4186878733418118, + "learning_rate": 2.0248891196999833e-06, + "loss": 0.23074102401733398, + "step": 6058 + }, + { + "epoch": 1.6089496746779977, + "grad_norm": 1.352152679115686, + "learning_rate": 2.0222409209349957e-06, + "loss": 0.2618173658847809, + "step": 6059 + }, + { + "epoch": 1.6092152436595406, + "grad_norm": 1.2898742263880296, + "learning_rate": 2.0195942601878703e-06, + "loss": 0.25361114740371704, + "step": 6060 + }, + { + "epoch": 1.6094808126410836, + "grad_norm": 1.2270527625039152, + "learning_rate": 2.016949137968851e-06, + "loss": 0.2276519238948822, + "step": 6061 + }, + { + "epoch": 1.6097463816226265, + "grad_norm": 1.3155356069823825, + "learning_rate": 2.0143055547878863e-06, + "loss": 0.20834363996982574, + "step": 6062 + }, + { + "epoch": 1.6100119506041695, + "grad_norm": 1.348708703656222, + "learning_rate": 2.011663511154628e-06, + "loss": 0.2579394578933716, + "step": 6063 + }, + { + "epoch": 1.6102775195857124, + "grad_norm": 1.2574503425710122, + "learning_rate": 2.009023007578431e-06, + "loss": 0.22118912637233734, + "step": 6064 + }, + { + "epoch": 1.6105430885672554, + "grad_norm": 1.1631210187007555, + "learning_rate": 2.0063840445683537e-06, + "loss": 0.1881515383720398, + "step": 6065 + }, + { + "epoch": 1.6108086575487983, + "grad_norm": 1.2884662240297928, + "learning_rate": 2.003746622633155e-06, + "loss": 0.2270805984735489, + "step": 6066 + }, + { + "epoch": 1.6110742265303413, + "grad_norm": 1.4261065534360056, + "learning_rate": 2.0011107422813013e-06, + "loss": 0.26356351375579834, + "step": 6067 + }, + { + "epoch": 1.6113397955118842, + "grad_norm": 1.2506363457624738, + "learning_rate": 1.9984764040209615e-06, + "loss": 0.22937676310539246, + "step": 6068 + }, + { + "epoch": 1.6116053644934272, + "grad_norm": 1.329188800311282, + "learning_rate": 1.99584360836e-06, + "loss": 0.25062739849090576, + "step": 6069 + }, + { + "epoch": 1.6118709334749701, + "grad_norm": 1.1593663351806502, + "learning_rate": 1.993212355805989e-06, + "loss": 0.2031324952840805, + "step": 6070 + }, + { + "epoch": 1.612136502456513, + "grad_norm": 1.3722085699931008, + "learning_rate": 1.990582646866206e-06, + "loss": 0.25769656896591187, + "step": 6071 + }, + { + "epoch": 1.612402071438056, + "grad_norm": 1.3184109520906713, + "learning_rate": 1.987954482047626e-06, + "loss": 0.23856252431869507, + "step": 6072 + }, + { + "epoch": 1.612667640419599, + "grad_norm": 1.3452730145342116, + "learning_rate": 1.9853278618569284e-06, + "loss": 0.2336723804473877, + "step": 6073 + }, + { + "epoch": 1.612933209401142, + "grad_norm": 1.3427497614935235, + "learning_rate": 1.9827027868004942e-06, + "loss": 0.22327622771263123, + "step": 6074 + }, + { + "epoch": 1.6131987783826849, + "grad_norm": 1.302817235652594, + "learning_rate": 1.980079257384405e-06, + "loss": 0.26695019006729126, + "step": 6075 + }, + { + "epoch": 1.6134643473642278, + "grad_norm": 1.174792834468628, + "learning_rate": 1.9774572741144514e-06, + "loss": 0.2467387616634369, + "step": 6076 + }, + { + "epoch": 1.6137299163457708, + "grad_norm": 1.3974546997540778, + "learning_rate": 1.9748368374961193e-06, + "loss": 0.25473737716674805, + "step": 6077 + }, + { + "epoch": 1.6139954853273137, + "grad_norm": 1.295354894556923, + "learning_rate": 1.972217948034596e-06, + "loss": 0.25508594512939453, + "step": 6078 + }, + { + "epoch": 1.6142610543088567, + "grad_norm": 1.2627621502033493, + "learning_rate": 1.969600606234774e-06, + "loss": 0.23020131886005402, + "step": 6079 + }, + { + "epoch": 1.6145266232903996, + "grad_norm": 1.2036992831321345, + "learning_rate": 1.9669848126012447e-06, + "loss": 0.249805748462677, + "step": 6080 + }, + { + "epoch": 1.6147921922719426, + "grad_norm": 1.2304217597704168, + "learning_rate": 1.964370567638303e-06, + "loss": 0.2377707064151764, + "step": 6081 + }, + { + "epoch": 1.6150577612534855, + "grad_norm": 1.3812388616949685, + "learning_rate": 1.9617578718499452e-06, + "loss": 0.28656789660453796, + "step": 6082 + }, + { + "epoch": 1.6153233302350285, + "grad_norm": 1.3083477730508752, + "learning_rate": 1.9591467257398668e-06, + "loss": 0.22079989314079285, + "step": 6083 + }, + { + "epoch": 1.6155888992165715, + "grad_norm": 1.048982897357468, + "learning_rate": 1.9565371298114666e-06, + "loss": 0.1993042230606079, + "step": 6084 + }, + { + "epoch": 1.6158544681981146, + "grad_norm": 1.1837758778278344, + "learning_rate": 1.9539290845678438e-06, + "loss": 0.20818357169628143, + "step": 6085 + }, + { + "epoch": 1.6161200371796576, + "grad_norm": 1.2192677831294998, + "learning_rate": 1.9513225905117996e-06, + "loss": 0.20531761646270752, + "step": 6086 + }, + { + "epoch": 1.6163856061612005, + "grad_norm": 1.2499003349392819, + "learning_rate": 1.948717648145834e-06, + "loss": 0.23414376378059387, + "step": 6087 + }, + { + "epoch": 1.6166511751427435, + "grad_norm": 1.2073482694002922, + "learning_rate": 1.9461142579721493e-06, + "loss": 0.2025471031665802, + "step": 6088 + }, + { + "epoch": 1.6169167441242864, + "grad_norm": 1.4729414889087271, + "learning_rate": 1.943512420492649e-06, + "loss": 0.19130446016788483, + "step": 6089 + }, + { + "epoch": 1.6171823131058294, + "grad_norm": 1.1947055473554775, + "learning_rate": 1.940912136208938e-06, + "loss": 0.21637848019599915, + "step": 6090 + }, + { + "epoch": 1.6174478820873723, + "grad_norm": 1.301401884532825, + "learning_rate": 1.9383134056223176e-06, + "loss": 0.26844075322151184, + "step": 6091 + }, + { + "epoch": 1.6177134510689153, + "grad_norm": 1.1755891449306313, + "learning_rate": 1.935716229233794e-06, + "loss": 0.19573305547237396, + "step": 6092 + }, + { + "epoch": 1.6179790200504582, + "grad_norm": 1.2705214543802177, + "learning_rate": 1.93312060754407e-06, + "loss": 0.22705954313278198, + "step": 6093 + }, + { + "epoch": 1.6182445890320012, + "grad_norm": 1.279170245457384, + "learning_rate": 1.9305265410535545e-06, + "loss": 0.2505400478839874, + "step": 6094 + }, + { + "epoch": 1.6185101580135441, + "grad_norm": 1.2108711177458409, + "learning_rate": 1.927934030262353e-06, + "loss": 0.2328193187713623, + "step": 6095 + }, + { + "epoch": 1.618775726995087, + "grad_norm": 1.2588974628750198, + "learning_rate": 1.9253430756702674e-06, + "loss": 0.23876577615737915, + "step": 6096 + }, + { + "epoch": 1.61904129597663, + "grad_norm": 1.3685755624123837, + "learning_rate": 1.9227536777768063e-06, + "loss": 0.2390732318162918, + "step": 6097 + }, + { + "epoch": 1.619306864958173, + "grad_norm": 1.3858306009370809, + "learning_rate": 1.9201658370811736e-06, + "loss": 0.25231993198394775, + "step": 6098 + }, + { + "epoch": 1.619572433939716, + "grad_norm": 1.2520374949609627, + "learning_rate": 1.917579554082274e-06, + "loss": 0.21527352929115295, + "step": 6099 + }, + { + "epoch": 1.619838002921259, + "grad_norm": 1.2236250632687489, + "learning_rate": 1.9149948292787133e-06, + "loss": 0.21394580602645874, + "step": 6100 + }, + { + "epoch": 1.6201035719028019, + "grad_norm": 1.3465338603905943, + "learning_rate": 1.912411663168796e-06, + "loss": 0.26093196868896484, + "step": 6101 + }, + { + "epoch": 1.6203691408843448, + "grad_norm": 1.3518497357465815, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.2631412744522095, + "step": 6102 + }, + { + "epoch": 1.6206347098658878, + "grad_norm": 1.3007944720423297, + "learning_rate": 1.9072500090216073e-06, + "loss": 0.270250141620636, + "step": 6103 + }, + { + "epoch": 1.6209002788474307, + "grad_norm": 1.3385737712068424, + "learning_rate": 1.9046715219794397e-06, + "loss": 0.22944031655788422, + "step": 6104 + }, + { + "epoch": 1.6211658478289737, + "grad_norm": 1.2125488505372424, + "learning_rate": 1.902094595621129e-06, + "loss": 0.24429070949554443, + "step": 6105 + }, + { + "epoch": 1.6214314168105166, + "grad_norm": 1.2581532570405378, + "learning_rate": 1.8995192304434729e-06, + "loss": 0.25656238198280334, + "step": 6106 + }, + { + "epoch": 1.6216969857920596, + "grad_norm": 1.3466122688772229, + "learning_rate": 1.8969454269429743e-06, + "loss": 0.2575233280658722, + "step": 6107 + }, + { + "epoch": 1.6219625547736025, + "grad_norm": 1.245984919504028, + "learning_rate": 1.8943731856158299e-06, + "loss": 0.24881063401699066, + "step": 6108 + }, + { + "epoch": 1.6222281237551455, + "grad_norm": 1.2845731125917577, + "learning_rate": 1.8918025069579382e-06, + "loss": 0.23353847861289978, + "step": 6109 + }, + { + "epoch": 1.6224936927366884, + "grad_norm": 1.2505489106727152, + "learning_rate": 1.8892333914648953e-06, + "loss": 0.21085457503795624, + "step": 6110 + }, + { + "epoch": 1.6227592617182314, + "grad_norm": 1.4134001131082032, + "learning_rate": 1.8866658396319947e-06, + "loss": 0.28600943088531494, + "step": 6111 + }, + { + "epoch": 1.6230248306997743, + "grad_norm": 1.1689838110439057, + "learning_rate": 1.8840998519542352e-06, + "loss": 0.22580507397651672, + "step": 6112 + }, + { + "epoch": 1.6232903996813173, + "grad_norm": 1.212526750953587, + "learning_rate": 1.8815354289263066e-06, + "loss": 0.19310800731182098, + "step": 6113 + }, + { + "epoch": 1.6235559686628602, + "grad_norm": 1.3020905454433194, + "learning_rate": 1.8789725710425988e-06, + "loss": 0.21633204817771912, + "step": 6114 + }, + { + "epoch": 1.6238215376444032, + "grad_norm": 1.4315370828946672, + "learning_rate": 1.8764112787972e-06, + "loss": 0.21346023678779602, + "step": 6115 + }, + { + "epoch": 1.6240871066259461, + "grad_norm": 1.21392020481053, + "learning_rate": 1.8738515526838986e-06, + "loss": 0.21206694841384888, + "step": 6116 + }, + { + "epoch": 1.624352675607489, + "grad_norm": 1.3197096686410696, + "learning_rate": 1.8712933931961773e-06, + "loss": 0.2135339230298996, + "step": 6117 + }, + { + "epoch": 1.624618244589032, + "grad_norm": 1.2484635869956482, + "learning_rate": 1.8687368008272243e-06, + "loss": 0.2168758660554886, + "step": 6118 + }, + { + "epoch": 1.624883813570575, + "grad_norm": 1.1804251189525716, + "learning_rate": 1.866181776069914e-06, + "loss": 0.20825617015361786, + "step": 6119 + }, + { + "epoch": 1.625149382552118, + "grad_norm": 1.291082575518304, + "learning_rate": 1.863628319416826e-06, + "loss": 0.25367867946624756, + "step": 6120 + }, + { + "epoch": 1.625414951533661, + "grad_norm": 1.3053498393136334, + "learning_rate": 1.8610764313602404e-06, + "loss": 0.21604284644126892, + "step": 6121 + }, + { + "epoch": 1.6256805205152038, + "grad_norm": 1.2871138327885168, + "learning_rate": 1.8585261123921283e-06, + "loss": 0.2324865758419037, + "step": 6122 + }, + { + "epoch": 1.6259460894967468, + "grad_norm": 1.2467444217539543, + "learning_rate": 1.8559773630041632e-06, + "loss": 0.2077629417181015, + "step": 6123 + }, + { + "epoch": 1.6262116584782897, + "grad_norm": 1.1704936500874914, + "learning_rate": 1.8534301836877122e-06, + "loss": 0.19919469952583313, + "step": 6124 + }, + { + "epoch": 1.6264772274598327, + "grad_norm": 1.1998850682672693, + "learning_rate": 1.8508845749338412e-06, + "loss": 0.21069160103797913, + "step": 6125 + }, + { + "epoch": 1.6267427964413756, + "grad_norm": 1.218804714337499, + "learning_rate": 1.8483405372333152e-06, + "loss": 0.2286640703678131, + "step": 6126 + }, + { + "epoch": 1.6270083654229186, + "grad_norm": 1.33630910648056, + "learning_rate": 1.8457980710765932e-06, + "loss": 0.2430541068315506, + "step": 6127 + }, + { + "epoch": 1.6272739344044616, + "grad_norm": 1.3713498598627625, + "learning_rate": 1.8432571769538344e-06, + "loss": 0.21875709295272827, + "step": 6128 + }, + { + "epoch": 1.6275395033860045, + "grad_norm": 1.4416966555618131, + "learning_rate": 1.8407178553548876e-06, + "loss": 0.22591018676757812, + "step": 6129 + }, + { + "epoch": 1.6278050723675475, + "grad_norm": 1.362917465597037, + "learning_rate": 1.8381801067693129e-06, + "loss": 0.25429075956344604, + "step": 6130 + }, + { + "epoch": 1.6280706413490904, + "grad_norm": 1.31452454626215, + "learning_rate": 1.8356439316863528e-06, + "loss": 0.2437858283519745, + "step": 6131 + }, + { + "epoch": 1.6283362103306334, + "grad_norm": 1.2489983792436092, + "learning_rate": 1.8331093305949532e-06, + "loss": 0.24196262657642365, + "step": 6132 + }, + { + "epoch": 1.6286017793121763, + "grad_norm": 1.3756170241894088, + "learning_rate": 1.8305763039837576e-06, + "loss": 0.25779271125793457, + "step": 6133 + }, + { + "epoch": 1.6288673482937193, + "grad_norm": 1.223955710903011, + "learning_rate": 1.8280448523410987e-06, + "loss": 0.23418015241622925, + "step": 6134 + }, + { + "epoch": 1.6291329172752622, + "grad_norm": 1.3748973147827792, + "learning_rate": 1.8255149761550128e-06, + "loss": 0.2670775353908539, + "step": 6135 + }, + { + "epoch": 1.6293984862568052, + "grad_norm": 1.423176544673552, + "learning_rate": 1.822986675913231e-06, + "loss": 0.29342639446258545, + "step": 6136 + }, + { + "epoch": 1.6296640552383481, + "grad_norm": 1.244422511511833, + "learning_rate": 1.8204599521031785e-06, + "loss": 0.22768062353134155, + "step": 6137 + }, + { + "epoch": 1.629929624219891, + "grad_norm": 1.6355607569945512, + "learning_rate": 1.817934805211976e-06, + "loss": 0.23938167095184326, + "step": 6138 + }, + { + "epoch": 1.630195193201434, + "grad_norm": 1.311916117620117, + "learning_rate": 1.8154112357264474e-06, + "loss": 0.1982264518737793, + "step": 6139 + }, + { + "epoch": 1.630460762182977, + "grad_norm": 1.3026965235969699, + "learning_rate": 1.8128892441331047e-06, + "loss": 0.23591312766075134, + "step": 6140 + }, + { + "epoch": 1.63072633116452, + "grad_norm": 1.259123916156089, + "learning_rate": 1.8103688309181567e-06, + "loss": 0.20317673683166504, + "step": 6141 + }, + { + "epoch": 1.6309919001460629, + "grad_norm": 1.2846300858550195, + "learning_rate": 1.8078499965675112e-06, + "loss": 0.233676478266716, + "step": 6142 + }, + { + "epoch": 1.6312574691276058, + "grad_norm": 1.3296785293607047, + "learning_rate": 1.8053327415667688e-06, + "loss": 0.22850775718688965, + "step": 6143 + }, + { + "epoch": 1.6315230381091488, + "grad_norm": 1.2850656633806874, + "learning_rate": 1.8028170664012268e-06, + "loss": 0.2603572607040405, + "step": 6144 + }, + { + "epoch": 1.6317886070906917, + "grad_norm": 1.3208849168125785, + "learning_rate": 1.8003029715558773e-06, + "loss": 0.27881523966789246, + "step": 6145 + }, + { + "epoch": 1.6320541760722347, + "grad_norm": 1.225668329292659, + "learning_rate": 1.797790457515406e-06, + "loss": 0.21744176745414734, + "step": 6146 + }, + { + "epoch": 1.6323197450537776, + "grad_norm": 1.2220588910103882, + "learning_rate": 1.7952795247642008e-06, + "loss": 0.20449542999267578, + "step": 6147 + }, + { + "epoch": 1.6325853140353206, + "grad_norm": 1.3015735321136237, + "learning_rate": 1.7927701737863402e-06, + "loss": 0.25641053915023804, + "step": 6148 + }, + { + "epoch": 1.6328508830168635, + "grad_norm": 1.294201240106412, + "learning_rate": 1.7902624050655914e-06, + "loss": 0.23583751916885376, + "step": 6149 + }, + { + "epoch": 1.6331164519984065, + "grad_norm": 1.4310897316272893, + "learning_rate": 1.787756219085427e-06, + "loss": 0.2709866762161255, + "step": 6150 + }, + { + "epoch": 1.6333820209799494, + "grad_norm": 1.2536554341378991, + "learning_rate": 1.785251616329009e-06, + "loss": 0.233103945851326, + "step": 6151 + }, + { + "epoch": 1.6336475899614924, + "grad_norm": 1.2660813048243769, + "learning_rate": 1.7827485972791957e-06, + "loss": 0.2665184438228607, + "step": 6152 + }, + { + "epoch": 1.6339131589430353, + "grad_norm": 1.2551185732946457, + "learning_rate": 1.7802471624185392e-06, + "loss": 0.20934605598449707, + "step": 6153 + }, + { + "epoch": 1.6341787279245783, + "grad_norm": 1.2179362426676639, + "learning_rate": 1.7777473122292866e-06, + "loss": 0.2102464735507965, + "step": 6154 + }, + { + "epoch": 1.6344442969061213, + "grad_norm": 1.2289784110367914, + "learning_rate": 1.7752490471933769e-06, + "loss": 0.22889986634254456, + "step": 6155 + }, + { + "epoch": 1.6347098658876642, + "grad_norm": 1.3627659705359922, + "learning_rate": 1.772752367792452e-06, + "loss": 0.2261584997177124, + "step": 6156 + }, + { + "epoch": 1.6349754348692072, + "grad_norm": 1.2186249427048736, + "learning_rate": 1.7702572745078395e-06, + "loss": 0.21456710994243622, + "step": 6157 + }, + { + "epoch": 1.63524100385075, + "grad_norm": 1.1535452073956258, + "learning_rate": 1.7677637678205627e-06, + "loss": 0.22762097418308258, + "step": 6158 + }, + { + "epoch": 1.635506572832293, + "grad_norm": 1.306484526102534, + "learning_rate": 1.7652718482113417e-06, + "loss": 0.24772633612155914, + "step": 6159 + }, + { + "epoch": 1.635772141813836, + "grad_norm": 1.3290630048425123, + "learning_rate": 1.7627815161605887e-06, + "loss": 0.22980757057666779, + "step": 6160 + }, + { + "epoch": 1.636037710795379, + "grad_norm": 1.1593602123779645, + "learning_rate": 1.760292772148411e-06, + "loss": 0.19560125470161438, + "step": 6161 + }, + { + "epoch": 1.636303279776922, + "grad_norm": 1.388673809129743, + "learning_rate": 1.7578056166546086e-06, + "loss": 0.23733064532279968, + "step": 6162 + }, + { + "epoch": 1.6365688487584649, + "grad_norm": 1.2026681813349183, + "learning_rate": 1.7553200501586743e-06, + "loss": 0.21064560115337372, + "step": 6163 + }, + { + "epoch": 1.6368344177400078, + "grad_norm": 1.3444341606502546, + "learning_rate": 1.7528360731397986e-06, + "loss": 0.26709994673728943, + "step": 6164 + }, + { + "epoch": 1.6370999867215508, + "grad_norm": 1.2755110888757868, + "learning_rate": 1.750353686076861e-06, + "loss": 0.26555943489074707, + "step": 6165 + }, + { + "epoch": 1.6373655557030937, + "grad_norm": 1.3299250322981557, + "learning_rate": 1.7478728894484375e-06, + "loss": 0.24480760097503662, + "step": 6166 + }, + { + "epoch": 1.6376311246846367, + "grad_norm": 1.2560095314061934, + "learning_rate": 1.7453936837327967e-06, + "loss": 0.2170884907245636, + "step": 6167 + }, + { + "epoch": 1.6378966936661796, + "grad_norm": 1.340756013397369, + "learning_rate": 1.7429160694078983e-06, + "loss": 0.24728982150554657, + "step": 6168 + }, + { + "epoch": 1.6381622626477228, + "grad_norm": 1.1911402182063675, + "learning_rate": 1.7404400469513994e-06, + "loss": 0.20886945724487305, + "step": 6169 + }, + { + "epoch": 1.6384278316292658, + "grad_norm": 1.2150445755778985, + "learning_rate": 1.7379656168406467e-06, + "loss": 0.1892474740743637, + "step": 6170 + }, + { + "epoch": 1.6386934006108087, + "grad_norm": 1.3004801024505461, + "learning_rate": 1.7354927795526821e-06, + "loss": 0.24953782558441162, + "step": 6171 + }, + { + "epoch": 1.6389589695923517, + "grad_norm": 1.2292705802712374, + "learning_rate": 1.7330215355642377e-06, + "loss": 0.2311600148677826, + "step": 6172 + }, + { + "epoch": 1.6392245385738946, + "grad_norm": 1.2596864005467026, + "learning_rate": 1.73055188535174e-06, + "loss": 0.24018675088882446, + "step": 6173 + }, + { + "epoch": 1.6394901075554376, + "grad_norm": 1.3394449685829455, + "learning_rate": 1.7280838293913116e-06, + "loss": 0.22607022523880005, + "step": 6174 + }, + { + "epoch": 1.6397556765369805, + "grad_norm": 1.2860534255043978, + "learning_rate": 1.7256173681587619e-06, + "loss": 0.23725482821464539, + "step": 6175 + }, + { + "epoch": 1.6400212455185235, + "grad_norm": 1.2500709715234832, + "learning_rate": 1.723152502129597e-06, + "loss": 0.241235613822937, + "step": 6176 + }, + { + "epoch": 1.6402868145000664, + "grad_norm": 1.2070755501863832, + "learning_rate": 1.7206892317790136e-06, + "loss": 0.2150690108537674, + "step": 6177 + }, + { + "epoch": 1.6405523834816094, + "grad_norm": 1.2557873581014805, + "learning_rate": 1.7182275575819007e-06, + "loss": 0.22133421897888184, + "step": 6178 + }, + { + "epoch": 1.6408179524631523, + "grad_norm": 1.1297884729403, + "learning_rate": 1.7157674800128399e-06, + "loss": 0.1937463879585266, + "step": 6179 + }, + { + "epoch": 1.6410835214446953, + "grad_norm": 1.0851305240668396, + "learning_rate": 1.7133089995461062e-06, + "loss": 0.18938027322292328, + "step": 6180 + }, + { + "epoch": 1.6413490904262382, + "grad_norm": 1.2621430482402598, + "learning_rate": 1.7108521166556646e-06, + "loss": 0.23577997088432312, + "step": 6181 + }, + { + "epoch": 1.6416146594077812, + "grad_norm": 1.2915526813468403, + "learning_rate": 1.7083968318151734e-06, + "loss": 0.2712448537349701, + "step": 6182 + }, + { + "epoch": 1.6418802283893241, + "grad_norm": 1.276409938985324, + "learning_rate": 1.7059431454979825e-06, + "loss": 0.24242255091667175, + "step": 6183 + }, + { + "epoch": 1.642145797370867, + "grad_norm": 1.3152058895449834, + "learning_rate": 1.7034910581771347e-06, + "loss": 0.22521010041236877, + "step": 6184 + }, + { + "epoch": 1.64241136635241, + "grad_norm": 1.3840145244958133, + "learning_rate": 1.7010405703253618e-06, + "loss": 0.22026273608207703, + "step": 6185 + }, + { + "epoch": 1.642676935333953, + "grad_norm": 1.458737402535225, + "learning_rate": 1.6985916824150894e-06, + "loss": 0.22726528346538544, + "step": 6186 + }, + { + "epoch": 1.642942504315496, + "grad_norm": 1.3396783040947258, + "learning_rate": 1.6961443949184353e-06, + "loss": 0.25172409415245056, + "step": 6187 + }, + { + "epoch": 1.6432080732970389, + "grad_norm": 1.1393591185728944, + "learning_rate": 1.6936987083072065e-06, + "loss": 0.21173113584518433, + "step": 6188 + }, + { + "epoch": 1.6434736422785818, + "grad_norm": 1.3589729407555038, + "learning_rate": 1.6912546230529036e-06, + "loss": 0.22596749663352966, + "step": 6189 + }, + { + "epoch": 1.6437392112601248, + "grad_norm": 1.3604263454917045, + "learning_rate": 1.6888121396267166e-06, + "loss": 0.2749077081680298, + "step": 6190 + }, + { + "epoch": 1.6440047802416677, + "grad_norm": 2.5555069132462283, + "learning_rate": 1.6863712584995252e-06, + "loss": 0.22150780260562897, + "step": 6191 + }, + { + "epoch": 1.6442703492232107, + "grad_norm": 1.2838243253096144, + "learning_rate": 1.6839319801419073e-06, + "loss": 0.23437368869781494, + "step": 6192 + }, + { + "epoch": 1.6445359182047536, + "grad_norm": 1.3069256977628543, + "learning_rate": 1.681494305024125e-06, + "loss": 0.22949008643627167, + "step": 6193 + }, + { + "epoch": 1.6448014871862966, + "grad_norm": 1.2956112975441718, + "learning_rate": 1.6790582336161332e-06, + "loss": 0.24147525429725647, + "step": 6194 + }, + { + "epoch": 1.6450670561678395, + "grad_norm": 1.180082798545332, + "learning_rate": 1.6766237663875773e-06, + "loss": 0.2001456618309021, + "step": 6195 + }, + { + "epoch": 1.6453326251493825, + "grad_norm": 1.2710753216206616, + "learning_rate": 1.674190903807794e-06, + "loss": 0.17668186128139496, + "step": 6196 + }, + { + "epoch": 1.6455981941309257, + "grad_norm": 1.369840319031622, + "learning_rate": 1.6717596463458107e-06, + "loss": 0.24585255980491638, + "step": 6197 + }, + { + "epoch": 1.6458637631124686, + "grad_norm": 1.2328642285488454, + "learning_rate": 1.6693299944703479e-06, + "loss": 0.2234572172164917, + "step": 6198 + }, + { + "epoch": 1.6461293320940116, + "grad_norm": 1.2369910191993496, + "learning_rate": 1.6669019486498083e-06, + "loss": 0.2007240653038025, + "step": 6199 + }, + { + "epoch": 1.6463949010755545, + "grad_norm": 1.317383450933259, + "learning_rate": 1.6644755093522913e-06, + "loss": 0.21926215291023254, + "step": 6200 + }, + { + "epoch": 1.6466604700570975, + "grad_norm": 1.3404302006039666, + "learning_rate": 1.662050677045589e-06, + "loss": 0.24797898530960083, + "step": 6201 + }, + { + "epoch": 1.6469260390386404, + "grad_norm": 1.285343354391859, + "learning_rate": 1.65962745219718e-06, + "loss": 0.22087037563323975, + "step": 6202 + }, + { + "epoch": 1.6471916080201834, + "grad_norm": 1.2765781805195457, + "learning_rate": 1.6572058352742327e-06, + "loss": 0.23073960840702057, + "step": 6203 + }, + { + "epoch": 1.6474571770017263, + "grad_norm": 1.3644493807061109, + "learning_rate": 1.6547858267436056e-06, + "loss": 0.2430298924446106, + "step": 6204 + }, + { + "epoch": 1.6477227459832693, + "grad_norm": 1.286198443262182, + "learning_rate": 1.6523674270718493e-06, + "loss": 0.23337247967720032, + "step": 6205 + }, + { + "epoch": 1.6479883149648122, + "grad_norm": 1.2144238817830517, + "learning_rate": 1.6499506367252016e-06, + "loss": 0.22141093015670776, + "step": 6206 + }, + { + "epoch": 1.6482538839463552, + "grad_norm": 1.280282959866893, + "learning_rate": 1.647535456169591e-06, + "loss": 0.23247988522052765, + "step": 6207 + }, + { + "epoch": 1.6485194529278981, + "grad_norm": 1.3728921390628253, + "learning_rate": 1.6451218858706374e-06, + "loss": 0.2659391760826111, + "step": 6208 + }, + { + "epoch": 1.648785021909441, + "grad_norm": 1.2534645715863684, + "learning_rate": 1.642709926293644e-06, + "loss": 0.2154998630285263, + "step": 6209 + }, + { + "epoch": 1.649050590890984, + "grad_norm": 1.322825591754104, + "learning_rate": 1.6402995779036146e-06, + "loss": 0.20363599061965942, + "step": 6210 + }, + { + "epoch": 1.649316159872527, + "grad_norm": 1.3775669953664806, + "learning_rate": 1.6378908411652328e-06, + "loss": 0.23388779163360596, + "step": 6211 + }, + { + "epoch": 1.64958172885407, + "grad_norm": 1.205059730534318, + "learning_rate": 1.6354837165428772e-06, + "loss": 0.20465341210365295, + "step": 6212 + }, + { + "epoch": 1.649847297835613, + "grad_norm": 1.2409004364034002, + "learning_rate": 1.6330782045006088e-06, + "loss": 0.2233584225177765, + "step": 6213 + }, + { + "epoch": 1.6501128668171559, + "grad_norm": 1.313264623251788, + "learning_rate": 1.6306743055021834e-06, + "loss": 0.2880077064037323, + "step": 6214 + }, + { + "epoch": 1.6503784357986988, + "grad_norm": 1.2769524753658168, + "learning_rate": 1.6282720200110458e-06, + "loss": 0.23332230746746063, + "step": 6215 + }, + { + "epoch": 1.6506440047802418, + "grad_norm": 1.2682336609825682, + "learning_rate": 1.6258713484903266e-06, + "loss": 0.22191204130649567, + "step": 6216 + }, + { + "epoch": 1.6509095737617847, + "grad_norm": 1.2899982671052521, + "learning_rate": 1.6234722914028478e-06, + "loss": 0.2403659224510193, + "step": 6217 + }, + { + "epoch": 1.6511751427433277, + "grad_norm": 1.2823746538865957, + "learning_rate": 1.6210748492111161e-06, + "loss": 0.2230256348848343, + "step": 6218 + }, + { + "epoch": 1.6514407117248706, + "grad_norm": 1.233703409456991, + "learning_rate": 1.6186790223773375e-06, + "loss": 0.2086302787065506, + "step": 6219 + }, + { + "epoch": 1.6517062807064136, + "grad_norm": 1.2696219439991872, + "learning_rate": 1.6162848113633934e-06, + "loss": 0.22336703538894653, + "step": 6220 + }, + { + "epoch": 1.6519718496879565, + "grad_norm": 1.2026474951561137, + "learning_rate": 1.6138922166308613e-06, + "loss": 0.2354746013879776, + "step": 6221 + }, + { + "epoch": 1.6522374186694995, + "grad_norm": 1.212799588563382, + "learning_rate": 1.6115012386410045e-06, + "loss": 0.23983564972877502, + "step": 6222 + }, + { + "epoch": 1.6525029876510424, + "grad_norm": 1.3394195242071623, + "learning_rate": 1.6091118778547765e-06, + "loss": 0.25468897819519043, + "step": 6223 + }, + { + "epoch": 1.6527685566325854, + "grad_norm": 1.2085737685975797, + "learning_rate": 1.6067241347328166e-06, + "loss": 0.2225346863269806, + "step": 6224 + }, + { + "epoch": 1.6530341256141283, + "grad_norm": 1.4474708027397767, + "learning_rate": 1.6043380097354543e-06, + "loss": 0.28801992535591125, + "step": 6225 + }, + { + "epoch": 1.6532996945956713, + "grad_norm": 1.1308003259460488, + "learning_rate": 1.6019535033227063e-06, + "loss": 0.1869816929101944, + "step": 6226 + }, + { + "epoch": 1.6535652635772142, + "grad_norm": 1.3022141110443597, + "learning_rate": 1.5995706159542768e-06, + "loss": 0.2569049894809723, + "step": 6227 + }, + { + "epoch": 1.6538308325587572, + "grad_norm": 1.2689496619282572, + "learning_rate": 1.5971893480895583e-06, + "loss": 0.19138488173484802, + "step": 6228 + }, + { + "epoch": 1.6540964015403001, + "grad_norm": 1.2583553251304942, + "learning_rate": 1.5948097001876318e-06, + "loss": 0.23107777535915375, + "step": 6229 + }, + { + "epoch": 1.654361970521843, + "grad_norm": 1.4140324563807463, + "learning_rate": 1.5924316727072652e-06, + "loss": 0.21682313084602356, + "step": 6230 + }, + { + "epoch": 1.654627539503386, + "grad_norm": 1.6445896965406597, + "learning_rate": 1.5900552661069135e-06, + "loss": 0.27629974484443665, + "step": 6231 + }, + { + "epoch": 1.654893108484929, + "grad_norm": 1.2060133562172235, + "learning_rate": 1.587680480844721e-06, + "loss": 0.21919876337051392, + "step": 6232 + }, + { + "epoch": 1.655158677466472, + "grad_norm": 1.4827934801999716, + "learning_rate": 1.5853073173785183e-06, + "loss": 0.2556184232234955, + "step": 6233 + }, + { + "epoch": 1.655424246448015, + "grad_norm": 1.1362954303327644, + "learning_rate": 1.5829357761658214e-06, + "loss": 0.1904449462890625, + "step": 6234 + }, + { + "epoch": 1.6556898154295578, + "grad_norm": 1.2410374365127181, + "learning_rate": 1.5805658576638372e-06, + "loss": 0.1991434246301651, + "step": 6235 + }, + { + "epoch": 1.6559553844111008, + "grad_norm": 1.4428347821081515, + "learning_rate": 1.5781975623294554e-06, + "loss": 0.2609177231788635, + "step": 6236 + }, + { + "epoch": 1.6562209533926437, + "grad_norm": 1.276051044481299, + "learning_rate": 1.575830890619261e-06, + "loss": 0.2481592893600464, + "step": 6237 + }, + { + "epoch": 1.6564865223741867, + "grad_norm": 1.2930470444266673, + "learning_rate": 1.5734658429895156e-06, + "loss": 0.23855090141296387, + "step": 6238 + }, + { + "epoch": 1.6567520913557297, + "grad_norm": 1.326739898505445, + "learning_rate": 1.5711024198961745e-06, + "loss": 0.2480623573064804, + "step": 6239 + }, + { + "epoch": 1.6570176603372726, + "grad_norm": 1.4145385747738486, + "learning_rate": 1.5687406217948775e-06, + "loss": 0.2504739463329315, + "step": 6240 + }, + { + "epoch": 1.6572832293188156, + "grad_norm": 1.1843269954841462, + "learning_rate": 1.5663804491409506e-06, + "loss": 0.2068580538034439, + "step": 6241 + }, + { + "epoch": 1.6575487983003585, + "grad_norm": 1.45151426190796, + "learning_rate": 1.5640219023894077e-06, + "loss": 0.2448163628578186, + "step": 6242 + }, + { + "epoch": 1.6578143672819015, + "grad_norm": 1.3391765527579818, + "learning_rate": 1.5616649819949492e-06, + "loss": 0.2514716386795044, + "step": 6243 + }, + { + "epoch": 1.6580799362634444, + "grad_norm": 1.1884099966156902, + "learning_rate": 1.559309688411962e-06, + "loss": 0.2067629098892212, + "step": 6244 + }, + { + "epoch": 1.6583455052449874, + "grad_norm": 1.2042735442206352, + "learning_rate": 1.5569560220945168e-06, + "loss": 0.22909750044345856, + "step": 6245 + }, + { + "epoch": 1.6586110742265303, + "grad_norm": 1.4646403481954997, + "learning_rate": 1.5546039834963745e-06, + "loss": 0.203629732131958, + "step": 6246 + }, + { + "epoch": 1.6588766432080733, + "grad_norm": 1.2050936311763847, + "learning_rate": 1.552253573070981e-06, + "loss": 0.21919086575508118, + "step": 6247 + }, + { + "epoch": 1.6591422121896162, + "grad_norm": 1.4379501702554756, + "learning_rate": 1.549904791271466e-06, + "loss": 0.2535661458969116, + "step": 6248 + }, + { + "epoch": 1.6594077811711592, + "grad_norm": 1.2609582047884877, + "learning_rate": 1.5475576385506475e-06, + "loss": 0.224460631608963, + "step": 6249 + }, + { + "epoch": 1.6596733501527021, + "grad_norm": 1.2625738742925756, + "learning_rate": 1.5452121153610288e-06, + "loss": 0.21925818920135498, + "step": 6250 + }, + { + "epoch": 1.659938919134245, + "grad_norm": 1.2787763694898493, + "learning_rate": 1.5428682221547997e-06, + "loss": 0.2100696563720703, + "step": 6251 + }, + { + "epoch": 1.660204488115788, + "grad_norm": 1.3484219674096825, + "learning_rate": 1.540525959383834e-06, + "loss": 0.25982293486595154, + "step": 6252 + }, + { + "epoch": 1.660470057097331, + "grad_norm": 1.2527966644905648, + "learning_rate": 1.538185327499694e-06, + "loss": 0.23615162074565887, + "step": 6253 + }, + { + "epoch": 1.660735626078874, + "grad_norm": 1.2738910414784854, + "learning_rate": 1.5358463269536218e-06, + "loss": 0.2454022467136383, + "step": 6254 + }, + { + "epoch": 1.6610011950604169, + "grad_norm": 1.3825181535789863, + "learning_rate": 1.5335089581965556e-06, + "loss": 0.2330605536699295, + "step": 6255 + }, + { + "epoch": 1.6612667640419598, + "grad_norm": 1.2169082012465264, + "learning_rate": 1.5311732216791087e-06, + "loss": 0.23193006217479706, + "step": 6256 + }, + { + "epoch": 1.6615323330235028, + "grad_norm": 1.2690481284418431, + "learning_rate": 1.5288391178515838e-06, + "loss": 0.23254770040512085, + "step": 6257 + }, + { + "epoch": 1.6617979020050457, + "grad_norm": 1.2246821396199268, + "learning_rate": 1.5265066471639701e-06, + "loss": 0.23240572214126587, + "step": 6258 + }, + { + "epoch": 1.6620634709865887, + "grad_norm": 1.3414134094293932, + "learning_rate": 1.5241758100659386e-06, + "loss": 0.2765730619430542, + "step": 6259 + }, + { + "epoch": 1.6623290399681316, + "grad_norm": 1.2956291225041994, + "learning_rate": 1.5218466070068472e-06, + "loss": 0.26366496086120605, + "step": 6260 + }, + { + "epoch": 1.6625946089496746, + "grad_norm": 1.240730160583952, + "learning_rate": 1.5195190384357405e-06, + "loss": 0.22322653234004974, + "step": 6261 + }, + { + "epoch": 1.6628601779312175, + "grad_norm": 1.2433877123660553, + "learning_rate": 1.5171931048013466e-06, + "loss": 0.24144116044044495, + "step": 6262 + }, + { + "epoch": 1.6631257469127605, + "grad_norm": 1.3783130308299147, + "learning_rate": 1.5148688065520734e-06, + "loss": 0.24559618532657623, + "step": 6263 + }, + { + "epoch": 1.6633913158943034, + "grad_norm": 1.3258590224160887, + "learning_rate": 1.5125461441360223e-06, + "loss": 0.24337056279182434, + "step": 6264 + }, + { + "epoch": 1.6636568848758464, + "grad_norm": 1.3292875380649603, + "learning_rate": 1.5102251180009752e-06, + "loss": 0.2733612358570099, + "step": 6265 + }, + { + "epoch": 1.6639224538573893, + "grad_norm": 1.2329811544038785, + "learning_rate": 1.5079057285943976e-06, + "loss": 0.2116459757089615, + "step": 6266 + }, + { + "epoch": 1.6641880228389323, + "grad_norm": 1.2335642813115397, + "learning_rate": 1.5055879763634407e-06, + "loss": 0.21221664547920227, + "step": 6267 + }, + { + "epoch": 1.6644535918204753, + "grad_norm": 1.2500150658336624, + "learning_rate": 1.503271861754939e-06, + "loss": 0.21166589856147766, + "step": 6268 + }, + { + "epoch": 1.6647191608020182, + "grad_norm": 1.5113123418333367, + "learning_rate": 1.5009573852154136e-06, + "loss": 0.2652161121368408, + "step": 6269 + }, + { + "epoch": 1.6649847297835612, + "grad_norm": 1.262834880378694, + "learning_rate": 1.4986445471910672e-06, + "loss": 0.22142267227172852, + "step": 6270 + }, + { + "epoch": 1.665250298765104, + "grad_norm": 1.4442965183949772, + "learning_rate": 1.4963333481277874e-06, + "loss": 0.2307332456111908, + "step": 6271 + }, + { + "epoch": 1.665515867746647, + "grad_norm": 1.411326986781179, + "learning_rate": 1.494023788471144e-06, + "loss": 0.2669411897659302, + "step": 6272 + }, + { + "epoch": 1.66578143672819, + "grad_norm": 1.2823998109594834, + "learning_rate": 1.4917158686663992e-06, + "loss": 0.2468804121017456, + "step": 6273 + }, + { + "epoch": 1.666047005709733, + "grad_norm": 1.2639666166307362, + "learning_rate": 1.4894095891584882e-06, + "loss": 0.24152463674545288, + "step": 6274 + }, + { + "epoch": 1.666312574691276, + "grad_norm": 1.098201760932299, + "learning_rate": 1.4871049503920353e-06, + "loss": 0.1966545283794403, + "step": 6275 + }, + { + "epoch": 1.6665781436728189, + "grad_norm": 1.2773845282560163, + "learning_rate": 1.4848019528113477e-06, + "loss": 0.24772626161575317, + "step": 6276 + }, + { + "epoch": 1.6668437126543618, + "grad_norm": 1.3731672204722256, + "learning_rate": 1.4825005968604189e-06, + "loss": 0.22138851881027222, + "step": 6277 + }, + { + "epoch": 1.6671092816359048, + "grad_norm": 1.2245583238686863, + "learning_rate": 1.4802008829829172e-06, + "loss": 0.24345465004444122, + "step": 6278 + }, + { + "epoch": 1.6673748506174477, + "grad_norm": 1.3209828849983516, + "learning_rate": 1.477902811622205e-06, + "loss": 0.22862716019153595, + "step": 6279 + }, + { + "epoch": 1.6676404195989907, + "grad_norm": 1.2914770883474422, + "learning_rate": 1.4756063832213207e-06, + "loss": 0.2763083577156067, + "step": 6280 + }, + { + "epoch": 1.6679059885805336, + "grad_norm": 1.3142139937070516, + "learning_rate": 1.4733115982229885e-06, + "loss": 0.24631357192993164, + "step": 6281 + }, + { + "epoch": 1.6681715575620768, + "grad_norm": 1.322429969576976, + "learning_rate": 1.4710184570696184e-06, + "loss": 0.22650030255317688, + "step": 6282 + }, + { + "epoch": 1.6684371265436198, + "grad_norm": 1.3243342318873437, + "learning_rate": 1.4687269602033006e-06, + "loss": 0.2455909103155136, + "step": 6283 + }, + { + "epoch": 1.6687026955251627, + "grad_norm": 1.3711517369784783, + "learning_rate": 1.4664371080658079e-06, + "loss": 0.25625506043434143, + "step": 6284 + }, + { + "epoch": 1.6689682645067057, + "grad_norm": 1.1450036681372322, + "learning_rate": 1.4641489010985954e-06, + "loss": 0.22178369760513306, + "step": 6285 + }, + { + "epoch": 1.6692338334882486, + "grad_norm": 1.2644620602089436, + "learning_rate": 1.4618623397428055e-06, + "loss": 0.23936234414577484, + "step": 6286 + }, + { + "epoch": 1.6694994024697916, + "grad_norm": 1.2667144776178243, + "learning_rate": 1.459577424439258e-06, + "loss": 0.21629829704761505, + "step": 6287 + }, + { + "epoch": 1.6697649714513345, + "grad_norm": 1.3486786043134158, + "learning_rate": 1.457294155628457e-06, + "loss": 0.238427072763443, + "step": 6288 + }, + { + "epoch": 1.6700305404328775, + "grad_norm": 1.412674472973442, + "learning_rate": 1.4550125337505926e-06, + "loss": 0.23168250918388367, + "step": 6289 + }, + { + "epoch": 1.6702961094144204, + "grad_norm": 1.3185872633193214, + "learning_rate": 1.45273255924553e-06, + "loss": 0.25518402457237244, + "step": 6290 + }, + { + "epoch": 1.6705616783959634, + "grad_norm": 1.2092220747685465, + "learning_rate": 1.450454232552826e-06, + "loss": 0.2488553822040558, + "step": 6291 + }, + { + "epoch": 1.6708272473775063, + "grad_norm": 1.4309048190710245, + "learning_rate": 1.448177554111716e-06, + "loss": 0.2684085965156555, + "step": 6292 + }, + { + "epoch": 1.6710928163590493, + "grad_norm": 1.3645105519242562, + "learning_rate": 1.4459025243611124e-06, + "loss": 0.24627447128295898, + "step": 6293 + }, + { + "epoch": 1.6713583853405922, + "grad_norm": 1.2960987120962004, + "learning_rate": 1.4436291437396156e-06, + "loss": 0.24725376069545746, + "step": 6294 + }, + { + "epoch": 1.6716239543221352, + "grad_norm": 1.2752333210419433, + "learning_rate": 1.4413574126855067e-06, + "loss": 0.23488914966583252, + "step": 6295 + }, + { + "epoch": 1.6718895233036781, + "grad_norm": 1.2385365684534737, + "learning_rate": 1.4390873316367492e-06, + "loss": 0.2031177133321762, + "step": 6296 + }, + { + "epoch": 1.672155092285221, + "grad_norm": 1.265889760948498, + "learning_rate": 1.4368189010309874e-06, + "loss": 0.25378018617630005, + "step": 6297 + }, + { + "epoch": 1.672420661266764, + "grad_norm": 1.2443137764428682, + "learning_rate": 1.434552121305548e-06, + "loss": 0.21305282413959503, + "step": 6298 + }, + { + "epoch": 1.672686230248307, + "grad_norm": 1.1925787762252436, + "learning_rate": 1.432286992897437e-06, + "loss": 0.20908987522125244, + "step": 6299 + }, + { + "epoch": 1.67295179922985, + "grad_norm": 1.2228377563088515, + "learning_rate": 1.4300235162433496e-06, + "loss": 0.21945340931415558, + "step": 6300 + }, + { + "epoch": 1.6732173682113929, + "grad_norm": 1.3659267409445854, + "learning_rate": 1.4277616917796544e-06, + "loss": 0.22096669673919678, + "step": 6301 + }, + { + "epoch": 1.6734829371929358, + "grad_norm": 1.2773291306452106, + "learning_rate": 1.425501519942406e-06, + "loss": 0.2233850657939911, + "step": 6302 + }, + { + "epoch": 1.6737485061744788, + "grad_norm": 1.2672720076411363, + "learning_rate": 1.423243001167337e-06, + "loss": 0.21432995796203613, + "step": 6303 + }, + { + "epoch": 1.6740140751560217, + "grad_norm": 1.3864014459258447, + "learning_rate": 1.4209861358898636e-06, + "loss": 0.2649557590484619, + "step": 6304 + }, + { + "epoch": 1.6742796441375647, + "grad_norm": 1.2642836811067808, + "learning_rate": 1.418730924545083e-06, + "loss": 0.24918347597122192, + "step": 6305 + }, + { + "epoch": 1.6745452131191076, + "grad_norm": 1.3089175693989048, + "learning_rate": 1.4164773675677745e-06, + "loss": 0.24121029675006866, + "step": 6306 + }, + { + "epoch": 1.6748107821006506, + "grad_norm": 1.2569762960026158, + "learning_rate": 1.4142254653923949e-06, + "loss": 0.24401789903640747, + "step": 6307 + }, + { + "epoch": 1.6750763510821935, + "grad_norm": 1.3272546708188746, + "learning_rate": 1.4119752184530867e-06, + "loss": 0.2374853938817978, + "step": 6308 + }, + { + "epoch": 1.6753419200637365, + "grad_norm": 1.2973848864698938, + "learning_rate": 1.4097266271836695e-06, + "loss": 0.2351088970899582, + "step": 6309 + }, + { + "epoch": 1.6756074890452797, + "grad_norm": 1.301417674196528, + "learning_rate": 1.407479692017647e-06, + "loss": 0.19560754299163818, + "step": 6310 + }, + { + "epoch": 1.6758730580268226, + "grad_norm": 1.390250023674765, + "learning_rate": 1.405234413388199e-06, + "loss": 0.24124252796173096, + "step": 6311 + }, + { + "epoch": 1.6761386270083656, + "grad_norm": 1.3742469305206364, + "learning_rate": 1.4029907917281903e-06, + "loss": 0.2208215445280075, + "step": 6312 + }, + { + "epoch": 1.6764041959899085, + "grad_norm": 1.2125662977366807, + "learning_rate": 1.4007488274701653e-06, + "loss": 0.23888292908668518, + "step": 6313 + }, + { + "epoch": 1.6766697649714515, + "grad_norm": 1.2936432356109655, + "learning_rate": 1.3985085210463479e-06, + "loss": 0.24079063534736633, + "step": 6314 + }, + { + "epoch": 1.6769353339529944, + "grad_norm": 1.2011852751375642, + "learning_rate": 1.3962698728886414e-06, + "loss": 0.18975606560707092, + "step": 6315 + }, + { + "epoch": 1.6772009029345374, + "grad_norm": 1.322599968285396, + "learning_rate": 1.3940328834286333e-06, + "loss": 0.201214998960495, + "step": 6316 + }, + { + "epoch": 1.6774664719160803, + "grad_norm": 1.2090909210103018, + "learning_rate": 1.3917975530975836e-06, + "loss": 0.20079322159290314, + "step": 6317 + }, + { + "epoch": 1.6777320408976233, + "grad_norm": 1.2732868066143843, + "learning_rate": 1.3895638823264447e-06, + "loss": 0.23593586683273315, + "step": 6318 + }, + { + "epoch": 1.6779976098791662, + "grad_norm": 1.3931846809533017, + "learning_rate": 1.3873318715458383e-06, + "loss": 0.26574259996414185, + "step": 6319 + }, + { + "epoch": 1.6782631788607092, + "grad_norm": 1.252943610173436, + "learning_rate": 1.3851015211860696e-06, + "loss": 0.20573323965072632, + "step": 6320 + }, + { + "epoch": 1.6785287478422521, + "grad_norm": 1.4484920974875073, + "learning_rate": 1.3828728316771244e-06, + "loss": 0.25610506534576416, + "step": 6321 + }, + { + "epoch": 1.678794316823795, + "grad_norm": 1.330338299337135, + "learning_rate": 1.380645803448668e-06, + "loss": 0.2138693630695343, + "step": 6322 + }, + { + "epoch": 1.679059885805338, + "grad_norm": 1.1479105398064924, + "learning_rate": 1.3784204369300447e-06, + "loss": 0.21522866189479828, + "step": 6323 + }, + { + "epoch": 1.679325454786881, + "grad_norm": 1.441538971613898, + "learning_rate": 1.376196732550279e-06, + "loss": 0.25622743368148804, + "step": 6324 + }, + { + "epoch": 1.679591023768424, + "grad_norm": 1.354050705773023, + "learning_rate": 1.3739746907380757e-06, + "loss": 0.18025386333465576, + "step": 6325 + }, + { + "epoch": 1.679856592749967, + "grad_norm": 1.1665775097977176, + "learning_rate": 1.3717543119218168e-06, + "loss": 0.18785078823566437, + "step": 6326 + }, + { + "epoch": 1.6801221617315099, + "grad_norm": 1.3771154706722653, + "learning_rate": 1.3695355965295653e-06, + "loss": 0.24682481586933136, + "step": 6327 + }, + { + "epoch": 1.6803877307130528, + "grad_norm": 1.2994385931646761, + "learning_rate": 1.3673185449890647e-06, + "loss": 0.2193487137556076, + "step": 6328 + }, + { + "epoch": 1.6806532996945958, + "grad_norm": 1.2960131024456552, + "learning_rate": 1.3651031577277351e-06, + "loss": 0.24963265657424927, + "step": 6329 + }, + { + "epoch": 1.6809188686761387, + "grad_norm": 1.2714587333981215, + "learning_rate": 1.3628894351726785e-06, + "loss": 0.21473057568073273, + "step": 6330 + }, + { + "epoch": 1.6811844376576817, + "grad_norm": 1.4508064568072063, + "learning_rate": 1.3606773777506731e-06, + "loss": 0.2539534866809845, + "step": 6331 + }, + { + "epoch": 1.6814500066392246, + "grad_norm": 1.5049767699399101, + "learning_rate": 1.3584669858881771e-06, + "loss": 0.2671799659729004, + "step": 6332 + }, + { + "epoch": 1.6817155756207676, + "grad_norm": 1.211295376852026, + "learning_rate": 1.3562582600113295e-06, + "loss": 0.24291013181209564, + "step": 6333 + }, + { + "epoch": 1.6819811446023105, + "grad_norm": 1.3672105989135315, + "learning_rate": 1.354051200545946e-06, + "loss": 0.24249233305454254, + "step": 6334 + }, + { + "epoch": 1.6822467135838535, + "grad_norm": 1.2855842039831968, + "learning_rate": 1.351845807917519e-06, + "loss": 0.21647261083126068, + "step": 6335 + }, + { + "epoch": 1.6825122825653964, + "grad_norm": 1.2764605035604815, + "learning_rate": 1.349642082551227e-06, + "loss": 0.2348332703113556, + "step": 6336 + }, + { + "epoch": 1.6827778515469394, + "grad_norm": 1.3049495455341118, + "learning_rate": 1.34744002487192e-06, + "loss": 0.22503259778022766, + "step": 6337 + }, + { + "epoch": 1.6830434205284823, + "grad_norm": 1.3236190891705721, + "learning_rate": 1.3452396353041286e-06, + "loss": 0.2397763580083847, + "step": 6338 + }, + { + "epoch": 1.6833089895100253, + "grad_norm": 1.156426557066381, + "learning_rate": 1.3430409142720624e-06, + "loss": 0.23345956206321716, + "step": 6339 + }, + { + "epoch": 1.6835745584915682, + "grad_norm": 1.1932341696009043, + "learning_rate": 1.3408438621996088e-06, + "loss": 0.19660598039627075, + "step": 6340 + }, + { + "epoch": 1.6838401274731112, + "grad_norm": 1.262928020262074, + "learning_rate": 1.3386484795103327e-06, + "loss": 0.19148695468902588, + "step": 6341 + }, + { + "epoch": 1.6841056964546541, + "grad_norm": 1.2112774084067142, + "learning_rate": 1.3364547666274819e-06, + "loss": 0.2078169733285904, + "step": 6342 + }, + { + "epoch": 1.684371265436197, + "grad_norm": 1.3703852622718744, + "learning_rate": 1.3342627239739715e-06, + "loss": 0.23122575879096985, + "step": 6343 + }, + { + "epoch": 1.68463683441774, + "grad_norm": 1.350523705417422, + "learning_rate": 1.3320723519724032e-06, + "loss": 0.2744083106517792, + "step": 6344 + }, + { + "epoch": 1.684902403399283, + "grad_norm": 1.3462449472678248, + "learning_rate": 1.3298836510450597e-06, + "loss": 0.26361098885536194, + "step": 6345 + }, + { + "epoch": 1.685167972380826, + "grad_norm": 1.2550654654863131, + "learning_rate": 1.3276966216138932e-06, + "loss": 0.21833205223083496, + "step": 6346 + }, + { + "epoch": 1.685433541362369, + "grad_norm": 1.306325021058624, + "learning_rate": 1.3255112641005374e-06, + "loss": 0.22075100243091583, + "step": 6347 + }, + { + "epoch": 1.6856991103439118, + "grad_norm": 1.4286786068270776, + "learning_rate": 1.3233275789263034e-06, + "loss": 0.24352343380451202, + "step": 6348 + }, + { + "epoch": 1.6859646793254548, + "grad_norm": 1.5476580340833483, + "learning_rate": 1.3211455665121808e-06, + "loss": 0.2331303060054779, + "step": 6349 + }, + { + "epoch": 1.6862302483069977, + "grad_norm": 1.398559395598541, + "learning_rate": 1.3189652272788356e-06, + "loss": 0.2511689066886902, + "step": 6350 + }, + { + "epoch": 1.6864958172885407, + "grad_norm": 1.1704691076383393, + "learning_rate": 1.3167865616466113e-06, + "loss": 0.18535873293876648, + "step": 6351 + }, + { + "epoch": 1.6867613862700837, + "grad_norm": 1.3097469055952822, + "learning_rate": 1.3146095700355289e-06, + "loss": 0.23924914002418518, + "step": 6352 + }, + { + "epoch": 1.6870269552516266, + "grad_norm": 1.1591649275755667, + "learning_rate": 1.3124342528652845e-06, + "loss": 0.19710025191307068, + "step": 6353 + }, + { + "epoch": 1.6872925242331696, + "grad_norm": 1.393629731020981, + "learning_rate": 1.3102606105552585e-06, + "loss": 0.21439281105995178, + "step": 6354 + }, + { + "epoch": 1.6875580932147125, + "grad_norm": 1.3051512833867451, + "learning_rate": 1.3080886435245e-06, + "loss": 0.2647722363471985, + "step": 6355 + }, + { + "epoch": 1.6878236621962555, + "grad_norm": 2.6038516980586355, + "learning_rate": 1.3059183521917396e-06, + "loss": 0.2202019840478897, + "step": 6356 + }, + { + "epoch": 1.6880892311777984, + "grad_norm": 1.3022104210295473, + "learning_rate": 1.3037497369753871e-06, + "loss": 0.25833001732826233, + "step": 6357 + }, + { + "epoch": 1.6883548001593414, + "grad_norm": 1.1906464618269579, + "learning_rate": 1.3015827982935192e-06, + "loss": 0.19984321296215057, + "step": 6358 + }, + { + "epoch": 1.6886203691408843, + "grad_norm": 1.3347301103088016, + "learning_rate": 1.2994175365638996e-06, + "loss": 0.2190552055835724, + "step": 6359 + }, + { + "epoch": 1.6888859381224273, + "grad_norm": 1.265894337049371, + "learning_rate": 1.2972539522039652e-06, + "loss": 0.26262593269348145, + "step": 6360 + }, + { + "epoch": 1.6891515071039702, + "grad_norm": 1.285416913994909, + "learning_rate": 1.2950920456308292e-06, + "loss": 0.2665651738643646, + "step": 6361 + }, + { + "epoch": 1.6894170760855132, + "grad_norm": 1.213162722605336, + "learning_rate": 1.2929318172612803e-06, + "loss": 0.22369208931922913, + "step": 6362 + }, + { + "epoch": 1.6896826450670561, + "grad_norm": 1.2234073567984471, + "learning_rate": 1.2907732675117878e-06, + "loss": 0.21063543856143951, + "step": 6363 + }, + { + "epoch": 1.689948214048599, + "grad_norm": 1.3608426715056905, + "learning_rate": 1.2886163967984944e-06, + "loss": 0.2303045690059662, + "step": 6364 + }, + { + "epoch": 1.690213783030142, + "grad_norm": 1.1473656525455074, + "learning_rate": 1.2864612055372182e-06, + "loss": 0.20185884833335876, + "step": 6365 + }, + { + "epoch": 1.690479352011685, + "grad_norm": 1.2673026097919315, + "learning_rate": 1.284307694143455e-06, + "loss": 0.22900527715682983, + "step": 6366 + }, + { + "epoch": 1.690744920993228, + "grad_norm": 1.2373147270640896, + "learning_rate": 1.282155863032377e-06, + "loss": 0.21405862271785736, + "step": 6367 + }, + { + "epoch": 1.6910104899747709, + "grad_norm": 1.3139606008654157, + "learning_rate": 1.2800057126188304e-06, + "loss": 0.26143258810043335, + "step": 6368 + }, + { + "epoch": 1.6912760589563138, + "grad_norm": 1.319330305112879, + "learning_rate": 1.2778572433173397e-06, + "loss": 0.24437926709651947, + "step": 6369 + }, + { + "epoch": 1.6915416279378568, + "grad_norm": 1.1954155676954614, + "learning_rate": 1.275710455542104e-06, + "loss": 0.24862337112426758, + "step": 6370 + }, + { + "epoch": 1.6918071969193997, + "grad_norm": 1.2264107157331223, + "learning_rate": 1.2735653497069978e-06, + "loss": 0.2146604359149933, + "step": 6371 + }, + { + "epoch": 1.6920727659009427, + "grad_norm": 1.3217815480091177, + "learning_rate": 1.2714219262255777e-06, + "loss": 0.2525256872177124, + "step": 6372 + }, + { + "epoch": 1.6923383348824856, + "grad_norm": 1.289957068010404, + "learning_rate": 1.2692801855110638e-06, + "loss": 0.23462912440299988, + "step": 6373 + }, + { + "epoch": 1.6926039038640286, + "grad_norm": 1.3468375801476438, + "learning_rate": 1.2671401279763595e-06, + "loss": 0.21551170945167542, + "step": 6374 + }, + { + "epoch": 1.6928694728455715, + "grad_norm": 1.4457180200872415, + "learning_rate": 1.2650017540340454e-06, + "loss": 0.24094407260417938, + "step": 6375 + }, + { + "epoch": 1.6931350418271145, + "grad_norm": 1.2168123169553724, + "learning_rate": 1.2628650640963736e-06, + "loss": 0.23101133108139038, + "step": 6376 + }, + { + "epoch": 1.6934006108086574, + "grad_norm": 1.4830646801660192, + "learning_rate": 1.2607300585752724e-06, + "loss": 0.2513899803161621, + "step": 6377 + }, + { + "epoch": 1.6936661797902004, + "grad_norm": 1.417144859782869, + "learning_rate": 1.258596737882345e-06, + "loss": 0.2490600198507309, + "step": 6378 + }, + { + "epoch": 1.6939317487717434, + "grad_norm": 1.3403225341914131, + "learning_rate": 1.256465102428872e-06, + "loss": 0.25767675042152405, + "step": 6379 + }, + { + "epoch": 1.6941973177532863, + "grad_norm": 1.2775246675329248, + "learning_rate": 1.254335152625804e-06, + "loss": 0.2231348305940628, + "step": 6380 + }, + { + "epoch": 1.6944628867348293, + "grad_norm": 1.4410136520558763, + "learning_rate": 1.2522068888837758e-06, + "loss": 0.25873979926109314, + "step": 6381 + }, + { + "epoch": 1.6947284557163722, + "grad_norm": 1.4111151195923193, + "learning_rate": 1.2500803116130887e-06, + "loss": 0.2848423421382904, + "step": 6382 + }, + { + "epoch": 1.6949940246979152, + "grad_norm": 1.1110125207312456, + "learning_rate": 1.247955421223721e-06, + "loss": 0.21343804895877838, + "step": 6383 + }, + { + "epoch": 1.695259593679458, + "grad_norm": 1.3025436504976033, + "learning_rate": 1.245832218125328e-06, + "loss": 0.23080062866210938, + "step": 6384 + }, + { + "epoch": 1.695525162661001, + "grad_norm": 1.3020267493975237, + "learning_rate": 1.2437107027272376e-06, + "loss": 0.2397225797176361, + "step": 6385 + }, + { + "epoch": 1.695790731642544, + "grad_norm": 1.3120966348534624, + "learning_rate": 1.2415908754384532e-06, + "loss": 0.22798654437065125, + "step": 6386 + }, + { + "epoch": 1.696056300624087, + "grad_norm": 1.3399304326822938, + "learning_rate": 1.2394727366676518e-06, + "loss": 0.2534061074256897, + "step": 6387 + }, + { + "epoch": 1.69632186960563, + "grad_norm": 1.2269756633197797, + "learning_rate": 1.2373562868231858e-06, + "loss": 0.2127036452293396, + "step": 6388 + }, + { + "epoch": 1.6965874385871729, + "grad_norm": 1.341525895521795, + "learning_rate": 1.2352415263130813e-06, + "loss": 0.22341205179691315, + "step": 6389 + }, + { + "epoch": 1.6968530075687158, + "grad_norm": 1.316572711467383, + "learning_rate": 1.2331284555450406e-06, + "loss": 0.2435426563024521, + "step": 6390 + }, + { + "epoch": 1.6971185765502588, + "grad_norm": 1.3203864338710647, + "learning_rate": 1.2310170749264383e-06, + "loss": 0.24652531743049622, + "step": 6391 + }, + { + "epoch": 1.6973841455318017, + "grad_norm": 1.251250109623578, + "learning_rate": 1.228907384864323e-06, + "loss": 0.24172671139240265, + "step": 6392 + }, + { + "epoch": 1.6976497145133447, + "grad_norm": 1.293405881850453, + "learning_rate": 1.2267993857654182e-06, + "loss": 0.21534420549869537, + "step": 6393 + }, + { + "epoch": 1.6979152834948879, + "grad_norm": 2.1259133697182575, + "learning_rate": 1.2246930780361221e-06, + "loss": 0.2617778182029724, + "step": 6394 + }, + { + "epoch": 1.6981808524764308, + "grad_norm": 1.1793022391098469, + "learning_rate": 1.2225884620825046e-06, + "loss": 0.20388583838939667, + "step": 6395 + }, + { + "epoch": 1.6984464214579738, + "grad_norm": 1.289033320527503, + "learning_rate": 1.220485538310312e-06, + "loss": 0.23714327812194824, + "step": 6396 + }, + { + "epoch": 1.6987119904395167, + "grad_norm": 1.3592785135687544, + "learning_rate": 1.2183843071249634e-06, + "loss": 0.2495463341474533, + "step": 6397 + }, + { + "epoch": 1.6989775594210597, + "grad_norm": 1.2730498991215184, + "learning_rate": 1.2162847689315483e-06, + "loss": 0.2419012188911438, + "step": 6398 + }, + { + "epoch": 1.6992431284026026, + "grad_norm": 1.2226640861076554, + "learning_rate": 1.214186924134838e-06, + "loss": 0.23392438888549805, + "step": 6399 + }, + { + "epoch": 1.6995086973841456, + "grad_norm": 1.3210458214149883, + "learning_rate": 1.2120907731392695e-06, + "loss": 0.22855526208877563, + "step": 6400 + }, + { + "epoch": 1.6997742663656885, + "grad_norm": 1.2152782326664608, + "learning_rate": 1.2099963163489558e-06, + "loss": 0.22393949329853058, + "step": 6401 + }, + { + "epoch": 1.7000398353472315, + "grad_norm": 1.3855673404796554, + "learning_rate": 1.2079035541676832e-06, + "loss": 0.2539960741996765, + "step": 6402 + }, + { + "epoch": 1.7003054043287744, + "grad_norm": 1.3330270743987416, + "learning_rate": 1.2058124869989129e-06, + "loss": 0.23716852068901062, + "step": 6403 + }, + { + "epoch": 1.7005709733103174, + "grad_norm": 1.347782549245642, + "learning_rate": 1.2037231152457773e-06, + "loss": 0.24658545851707458, + "step": 6404 + }, + { + "epoch": 1.7008365422918603, + "grad_norm": 1.2494300647338343, + "learning_rate": 1.201635439311083e-06, + "loss": 0.2316630333662033, + "step": 6405 + }, + { + "epoch": 1.7011021112734033, + "grad_norm": 1.0834142572483991, + "learning_rate": 1.1995494595973089e-06, + "loss": 0.20434345304965973, + "step": 6406 + }, + { + "epoch": 1.7013676802549462, + "grad_norm": 1.3445140884275912, + "learning_rate": 1.197465176506607e-06, + "loss": 0.2585931420326233, + "step": 6407 + }, + { + "epoch": 1.7016332492364892, + "grad_norm": 1.2567668360829787, + "learning_rate": 1.1953825904408033e-06, + "loss": 0.23007069528102875, + "step": 6408 + }, + { + "epoch": 1.7018988182180321, + "grad_norm": 1.2770978609777501, + "learning_rate": 1.1933017018013948e-06, + "loss": 0.21822810173034668, + "step": 6409 + }, + { + "epoch": 1.702164387199575, + "grad_norm": 1.2875752799081717, + "learning_rate": 1.1912225109895526e-06, + "loss": 0.241228848695755, + "step": 6410 + }, + { + "epoch": 1.702429956181118, + "grad_norm": 1.3509759956774154, + "learning_rate": 1.1891450184061203e-06, + "loss": 0.28803908824920654, + "step": 6411 + }, + { + "epoch": 1.702695525162661, + "grad_norm": 1.3018941028318989, + "learning_rate": 1.1870692244516147e-06, + "loss": 0.2387516349554062, + "step": 6412 + }, + { + "epoch": 1.702961094144204, + "grad_norm": 1.2538051398244094, + "learning_rate": 1.1849951295262242e-06, + "loss": 0.19774140417575836, + "step": 6413 + }, + { + "epoch": 1.7032266631257469, + "grad_norm": 1.269953409174644, + "learning_rate": 1.1829227340298088e-06, + "loss": 0.22842247784137726, + "step": 6414 + }, + { + "epoch": 1.7034922321072898, + "grad_norm": 1.1987695898844528, + "learning_rate": 1.1808520383619015e-06, + "loss": 0.21994739770889282, + "step": 6415 + }, + { + "epoch": 1.7037578010888328, + "grad_norm": 1.2719096074486522, + "learning_rate": 1.1787830429217084e-06, + "loss": 0.22328051924705505, + "step": 6416 + }, + { + "epoch": 1.7040233700703757, + "grad_norm": 1.3583279531737376, + "learning_rate": 1.1767157481081092e-06, + "loss": 0.26704326272010803, + "step": 6417 + }, + { + "epoch": 1.7042889390519187, + "grad_norm": 1.2796404749500392, + "learning_rate": 1.174650154319653e-06, + "loss": 0.2148481160402298, + "step": 6418 + }, + { + "epoch": 1.7045545080334616, + "grad_norm": 1.1912742761204351, + "learning_rate": 1.1725862619545625e-06, + "loss": 0.21731218695640564, + "step": 6419 + }, + { + "epoch": 1.7048200770150046, + "grad_norm": 1.3502505047017879, + "learning_rate": 1.1705240714107301e-06, + "loss": 0.20832043886184692, + "step": 6420 + }, + { + "epoch": 1.7050856459965475, + "grad_norm": 1.2922565511595965, + "learning_rate": 1.1684635830857249e-06, + "loss": 0.21739046275615692, + "step": 6421 + }, + { + "epoch": 1.7053512149780907, + "grad_norm": 1.3041232291639149, + "learning_rate": 1.1664047973767811e-06, + "loss": 0.23972246050834656, + "step": 6422 + }, + { + "epoch": 1.7056167839596337, + "grad_norm": 1.2420174603299015, + "learning_rate": 1.1643477146808092e-06, + "loss": 0.2471289187669754, + "step": 6423 + }, + { + "epoch": 1.7058823529411766, + "grad_norm": 1.2148999014811244, + "learning_rate": 1.1622923353943916e-06, + "loss": 0.2014283537864685, + "step": 6424 + }, + { + "epoch": 1.7061479219227196, + "grad_norm": 1.1799937956162947, + "learning_rate": 1.1602386599137782e-06, + "loss": 0.21680915355682373, + "step": 6425 + }, + { + "epoch": 1.7064134909042625, + "grad_norm": 1.2221660563202492, + "learning_rate": 1.158186688634898e-06, + "loss": 0.2101205736398697, + "step": 6426 + }, + { + "epoch": 1.7066790598858055, + "grad_norm": 1.2879683442276364, + "learning_rate": 1.1561364219533444e-06, + "loss": 0.22114071249961853, + "step": 6427 + }, + { + "epoch": 1.7069446288673484, + "grad_norm": 1.2910925736026095, + "learning_rate": 1.1540878602643858e-06, + "loss": 0.20608706772327423, + "step": 6428 + }, + { + "epoch": 1.7072101978488914, + "grad_norm": 1.2486066037383718, + "learning_rate": 1.1520410039629593e-06, + "loss": 0.2247905433177948, + "step": 6429 + }, + { + "epoch": 1.7074757668304343, + "grad_norm": 1.1718742986299986, + "learning_rate": 1.1499958534436751e-06, + "loss": 0.22623226046562195, + "step": 6430 + }, + { + "epoch": 1.7077413358119773, + "grad_norm": 1.2776253558863635, + "learning_rate": 1.1479524091008142e-06, + "loss": 0.2063906192779541, + "step": 6431 + }, + { + "epoch": 1.7080069047935202, + "grad_norm": 1.4035125322254989, + "learning_rate": 1.1459106713283286e-06, + "loss": 0.2787795960903168, + "step": 6432 + }, + { + "epoch": 1.7082724737750632, + "grad_norm": 1.2096674582385407, + "learning_rate": 1.1438706405198419e-06, + "loss": 0.23090440034866333, + "step": 6433 + }, + { + "epoch": 1.7085380427566061, + "grad_norm": 1.288319877687408, + "learning_rate": 1.141832317068645e-06, + "loss": 0.23690670728683472, + "step": 6434 + }, + { + "epoch": 1.708803611738149, + "grad_norm": 1.2499926164056985, + "learning_rate": 1.1397957013677064e-06, + "loss": 0.209202378988266, + "step": 6435 + }, + { + "epoch": 1.709069180719692, + "grad_norm": 1.2311768368116, + "learning_rate": 1.1377607938096635e-06, + "loss": 0.22541575133800507, + "step": 6436 + }, + { + "epoch": 1.709334749701235, + "grad_norm": 1.3505125458173146, + "learning_rate": 1.1357275947868162e-06, + "loss": 0.2460884153842926, + "step": 6437 + }, + { + "epoch": 1.709600318682778, + "grad_norm": 1.195327574575731, + "learning_rate": 1.1336961046911443e-06, + "loss": 0.21967202425003052, + "step": 6438 + }, + { + "epoch": 1.709865887664321, + "grad_norm": 1.346022527152768, + "learning_rate": 1.1316663239142954e-06, + "loss": 0.23619329929351807, + "step": 6439 + }, + { + "epoch": 1.7101314566458639, + "grad_norm": 1.3033234842407981, + "learning_rate": 1.129638252847587e-06, + "loss": 0.24563436210155487, + "step": 6440 + }, + { + "epoch": 1.7103970256274068, + "grad_norm": 1.3840933006905622, + "learning_rate": 1.1276118918820068e-06, + "loss": 0.25508859753608704, + "step": 6441 + }, + { + "epoch": 1.7106625946089498, + "grad_norm": 1.3406379279103604, + "learning_rate": 1.1255872414082136e-06, + "loss": 0.24761545658111572, + "step": 6442 + }, + { + "epoch": 1.7109281635904927, + "grad_norm": 4.632018568484065, + "learning_rate": 1.1235643018165344e-06, + "loss": 0.2355962097644806, + "step": 6443 + }, + { + "epoch": 1.7111937325720357, + "grad_norm": 1.3274457548497118, + "learning_rate": 1.1215430734969723e-06, + "loss": 0.2534273862838745, + "step": 6444 + }, + { + "epoch": 1.7114593015535786, + "grad_norm": 1.2846712625276346, + "learning_rate": 1.1195235568391938e-06, + "loss": 0.2756424844264984, + "step": 6445 + }, + { + "epoch": 1.7117248705351216, + "grad_norm": 1.2126020570228762, + "learning_rate": 1.1175057522325383e-06, + "loss": 0.2198309451341629, + "step": 6446 + }, + { + "epoch": 1.7119904395166645, + "grad_norm": 1.2343738377988847, + "learning_rate": 1.1154896600660136e-06, + "loss": 0.21767666935920715, + "step": 6447 + }, + { + "epoch": 1.7122560084982075, + "grad_norm": 1.4965895030859304, + "learning_rate": 1.1134752807283e-06, + "loss": 0.2679128348827362, + "step": 6448 + }, + { + "epoch": 1.7125215774797504, + "grad_norm": 1.292131622576057, + "learning_rate": 1.1114626146077457e-06, + "loss": 0.2268792986869812, + "step": 6449 + }, + { + "epoch": 1.7127871464612934, + "grad_norm": 1.224637524783582, + "learning_rate": 1.109451662092369e-06, + "loss": 0.21585378050804138, + "step": 6450 + }, + { + "epoch": 1.7130527154428363, + "grad_norm": 1.3157463227820392, + "learning_rate": 1.1074424235698567e-06, + "loss": 0.2258647382259369, + "step": 6451 + }, + { + "epoch": 1.7133182844243793, + "grad_norm": 1.3742268123946286, + "learning_rate": 1.1054348994275677e-06, + "loss": 0.2456682175397873, + "step": 6452 + }, + { + "epoch": 1.7135838534059222, + "grad_norm": 1.4853732102975625, + "learning_rate": 1.1034290900525279e-06, + "loss": 0.22897745668888092, + "step": 6453 + }, + { + "epoch": 1.7138494223874652, + "grad_norm": 1.133114987282755, + "learning_rate": 1.101424995831435e-06, + "loss": 0.1910650134086609, + "step": 6454 + }, + { + "epoch": 1.7141149913690081, + "grad_norm": 1.2728981818199352, + "learning_rate": 1.0994226171506529e-06, + "loss": 0.2519158720970154, + "step": 6455 + }, + { + "epoch": 1.714380560350551, + "grad_norm": 1.259309948081026, + "learning_rate": 1.0974219543962184e-06, + "loss": 0.24191951751708984, + "step": 6456 + }, + { + "epoch": 1.714646129332094, + "grad_norm": 1.3159238719963862, + "learning_rate": 1.0954230079538352e-06, + "loss": 0.2560814619064331, + "step": 6457 + }, + { + "epoch": 1.714911698313637, + "grad_norm": 1.2640782659289207, + "learning_rate": 1.0934257782088763e-06, + "loss": 0.22969035804271698, + "step": 6458 + }, + { + "epoch": 1.71517726729518, + "grad_norm": 1.3584917562872394, + "learning_rate": 1.0914302655463837e-06, + "loss": 0.26114046573638916, + "step": 6459 + }, + { + "epoch": 1.715442836276723, + "grad_norm": 1.2235177756044688, + "learning_rate": 1.0894364703510685e-06, + "loss": 0.21457752585411072, + "step": 6460 + }, + { + "epoch": 1.7157084052582658, + "grad_norm": 1.164559577491723, + "learning_rate": 1.0874443930073098e-06, + "loss": 0.19998760521411896, + "step": 6461 + }, + { + "epoch": 1.7159739742398088, + "grad_norm": 1.2278101157674874, + "learning_rate": 1.0854540338991615e-06, + "loss": 0.2379671037197113, + "step": 6462 + }, + { + "epoch": 1.7162395432213517, + "grad_norm": 1.3827652808641404, + "learning_rate": 1.0834653934103367e-06, + "loss": 0.2236609309911728, + "step": 6463 + }, + { + "epoch": 1.7165051122028947, + "grad_norm": 1.2673726734268553, + "learning_rate": 1.0814784719242234e-06, + "loss": 0.22507379949092865, + "step": 6464 + }, + { + "epoch": 1.7167706811844377, + "grad_norm": 1.3174434539455087, + "learning_rate": 1.079493269823877e-06, + "loss": 0.22138816118240356, + "step": 6465 + }, + { + "epoch": 1.7170362501659806, + "grad_norm": 1.3880746036316538, + "learning_rate": 1.0775097874920204e-06, + "loss": 0.227338969707489, + "step": 6466 + }, + { + "epoch": 1.7173018191475236, + "grad_norm": 1.2588670866885754, + "learning_rate": 1.0755280253110466e-06, + "loss": 0.23694375157356262, + "step": 6467 + }, + { + "epoch": 1.7175673881290665, + "grad_norm": 1.365387614603678, + "learning_rate": 1.0735479836630136e-06, + "loss": 0.26219409704208374, + "step": 6468 + }, + { + "epoch": 1.7178329571106095, + "grad_norm": 1.20539748496599, + "learning_rate": 1.0715696629296524e-06, + "loss": 0.22215887904167175, + "step": 6469 + }, + { + "epoch": 1.7180985260921524, + "grad_norm": 1.3543481839639284, + "learning_rate": 1.0695930634923602e-06, + "loss": 0.25434768199920654, + "step": 6470 + }, + { + "epoch": 1.7183640950736954, + "grad_norm": 1.1809119822759757, + "learning_rate": 1.0676181857321998e-06, + "loss": 0.2092076987028122, + "step": 6471 + }, + { + "epoch": 1.7186296640552383, + "grad_norm": 1.330663320526799, + "learning_rate": 1.0656450300299048e-06, + "loss": 0.2710237503051758, + "step": 6472 + }, + { + "epoch": 1.7188952330367813, + "grad_norm": 1.2715188060789504, + "learning_rate": 1.0636735967658785e-06, + "loss": 0.2533886432647705, + "step": 6473 + }, + { + "epoch": 1.7191608020183242, + "grad_norm": 1.2174102707049457, + "learning_rate": 1.0617038863201878e-06, + "loss": 0.2545754909515381, + "step": 6474 + }, + { + "epoch": 1.7194263709998672, + "grad_norm": 1.2560655592374788, + "learning_rate": 1.0597358990725703e-06, + "loss": 0.26010993123054504, + "step": 6475 + }, + { + "epoch": 1.7196919399814101, + "grad_norm": 1.2632076366916114, + "learning_rate": 1.0577696354024314e-06, + "loss": 0.22529907524585724, + "step": 6476 + }, + { + "epoch": 1.719957508962953, + "grad_norm": 1.157260113755536, + "learning_rate": 1.0558050956888433e-06, + "loss": 0.1897469311952591, + "step": 6477 + }, + { + "epoch": 1.720223077944496, + "grad_norm": 1.31651804495616, + "learning_rate": 1.0538422803105441e-06, + "loss": 0.24663670361042023, + "step": 6478 + }, + { + "epoch": 1.720488646926039, + "grad_norm": 1.343902959790046, + "learning_rate": 1.0518811896459423e-06, + "loss": 0.2462892383337021, + "step": 6479 + }, + { + "epoch": 1.720754215907582, + "grad_norm": 1.117431347891292, + "learning_rate": 1.0499218240731157e-06, + "loss": 0.18652144074440002, + "step": 6480 + }, + { + "epoch": 1.7210197848891249, + "grad_norm": 1.2234103731079693, + "learning_rate": 1.0479641839698052e-06, + "loss": 0.24614468216896057, + "step": 6481 + }, + { + "epoch": 1.7212853538706678, + "grad_norm": 1.2632894895468527, + "learning_rate": 1.046008269713421e-06, + "loss": 0.27925312519073486, + "step": 6482 + }, + { + "epoch": 1.7215509228522108, + "grad_norm": 1.3426272887839532, + "learning_rate": 1.0440540816810395e-06, + "loss": 0.2626710832118988, + "step": 6483 + }, + { + "epoch": 1.7218164918337537, + "grad_norm": 1.2982212521269376, + "learning_rate": 1.042101620249405e-06, + "loss": 0.23039895296096802, + "step": 6484 + }, + { + "epoch": 1.7220820608152967, + "grad_norm": 1.2564768074123291, + "learning_rate": 1.0401508857949295e-06, + "loss": 0.19559775292873383, + "step": 6485 + }, + { + "epoch": 1.7223476297968396, + "grad_norm": 1.222035384596064, + "learning_rate": 1.0382018786936943e-06, + "loss": 0.24982990324497223, + "step": 6486 + }, + { + "epoch": 1.7226131987783826, + "grad_norm": 1.356827120814655, + "learning_rate": 1.0362545993214402e-06, + "loss": 0.26212313771247864, + "step": 6487 + }, + { + "epoch": 1.7228787677599255, + "grad_norm": 1.2583181328160484, + "learning_rate": 1.0343090480535788e-06, + "loss": 0.22827446460723877, + "step": 6488 + }, + { + "epoch": 1.7231443367414685, + "grad_norm": 1.3650470156220376, + "learning_rate": 1.032365225265196e-06, + "loss": 0.2710435390472412, + "step": 6489 + }, + { + "epoch": 1.7234099057230114, + "grad_norm": 1.560435811081079, + "learning_rate": 1.030423131331033e-06, + "loss": 0.25116702914237976, + "step": 6490 + }, + { + "epoch": 1.7236754747045544, + "grad_norm": 1.2598369270207033, + "learning_rate": 1.0284827666255048e-06, + "loss": 0.1980481743812561, + "step": 6491 + }, + { + "epoch": 1.7239410436860974, + "grad_norm": 1.3159445178277585, + "learning_rate": 1.0265441315226898e-06, + "loss": 0.2777971625328064, + "step": 6492 + }, + { + "epoch": 1.7242066126676403, + "grad_norm": 1.3290253215924488, + "learning_rate": 1.0246072263963336e-06, + "loss": 0.23041702806949615, + "step": 6493 + }, + { + "epoch": 1.7244721816491833, + "grad_norm": 1.2761862568921072, + "learning_rate": 1.0226720516198495e-06, + "loss": 0.21428728103637695, + "step": 6494 + }, + { + "epoch": 1.7247377506307262, + "grad_norm": 1.2965072992275601, + "learning_rate": 1.020738607566316e-06, + "loss": 0.22577518224716187, + "step": 6495 + }, + { + "epoch": 1.7250033196122692, + "grad_norm": 1.2489154030372867, + "learning_rate": 1.0188068946084783e-06, + "loss": 0.21080979704856873, + "step": 6496 + }, + { + "epoch": 1.7252688885938121, + "grad_norm": 1.1941107816051266, + "learning_rate": 1.0168769131187472e-06, + "loss": 0.21232858300209045, + "step": 6497 + }, + { + "epoch": 1.725534457575355, + "grad_norm": 1.3035016990745079, + "learning_rate": 1.0149486634692019e-06, + "loss": 0.25525614619255066, + "step": 6498 + }, + { + "epoch": 1.725800026556898, + "grad_norm": 1.2742578592858531, + "learning_rate": 1.0130221460315858e-06, + "loss": 0.26291778683662415, + "step": 6499 + }, + { + "epoch": 1.726065595538441, + "grad_norm": 1.1747703502148148, + "learning_rate": 1.011097361177308e-06, + "loss": 0.21314382553100586, + "step": 6500 + }, + { + "epoch": 1.726331164519984, + "grad_norm": 1.3027182735878766, + "learning_rate": 1.0091743092774474e-06, + "loss": 0.2106419950723648, + "step": 6501 + }, + { + "epoch": 1.7265967335015269, + "grad_norm": 1.2753206037657139, + "learning_rate": 1.0072529907027407e-06, + "loss": 0.22456032037734985, + "step": 6502 + }, + { + "epoch": 1.7268623024830698, + "grad_norm": 2.1059170179774807, + "learning_rate": 1.0053334058235975e-06, + "loss": 0.2301097959280014, + "step": 6503 + }, + { + "epoch": 1.7271278714646128, + "grad_norm": 1.4062353485935484, + "learning_rate": 1.0034155550100922e-06, + "loss": 0.21207617223262787, + "step": 6504 + }, + { + "epoch": 1.7273934404461557, + "grad_norm": 1.3379977808716934, + "learning_rate": 1.0014994386319621e-06, + "loss": 0.24378664791584015, + "step": 6505 + }, + { + "epoch": 1.727659009427699, + "grad_norm": 1.402146752515372, + "learning_rate": 9.995850570586107e-07, + "loss": 0.24914023280143738, + "step": 6506 + }, + { + "epoch": 1.7279245784092419, + "grad_norm": 1.2949159811476645, + "learning_rate": 9.976724106591128e-07, + "loss": 0.23235921561717987, + "step": 6507 + }, + { + "epoch": 1.7281901473907848, + "grad_norm": 1.295455173430887, + "learning_rate": 9.957614998022015e-07, + "loss": 0.22441455721855164, + "step": 6508 + }, + { + "epoch": 1.7284557163723278, + "grad_norm": 1.4195770964317103, + "learning_rate": 9.93852324856278e-07, + "loss": 0.2559920847415924, + "step": 6509 + }, + { + "epoch": 1.7287212853538707, + "grad_norm": 1.2106097617539484, + "learning_rate": 9.919448861894088e-07, + "loss": 0.21378321945667267, + "step": 6510 + }, + { + "epoch": 1.7289868543354137, + "grad_norm": 1.223247289196822, + "learning_rate": 9.900391841693247e-07, + "loss": 0.23622627556324005, + "step": 6511 + }, + { + "epoch": 1.7292524233169566, + "grad_norm": 1.2354266119490807, + "learning_rate": 9.88135219163424e-07, + "loss": 0.217013418674469, + "step": 6512 + }, + { + "epoch": 1.7295179922984996, + "grad_norm": 1.342902376475473, + "learning_rate": 9.862329915387669e-07, + "loss": 0.2221517264842987, + "step": 6513 + }, + { + "epoch": 1.7297835612800425, + "grad_norm": 1.3136496001371853, + "learning_rate": 9.84332501662083e-07, + "loss": 0.24377144873142242, + "step": 6514 + }, + { + "epoch": 1.7300491302615855, + "grad_norm": 1.2574348774674273, + "learning_rate": 9.824337498997593e-07, + "loss": 0.23368799686431885, + "step": 6515 + }, + { + "epoch": 1.7303146992431284, + "grad_norm": 1.1949944292188206, + "learning_rate": 9.805367366178608e-07, + "loss": 0.23061680793762207, + "step": 6516 + }, + { + "epoch": 1.7305802682246714, + "grad_norm": 1.2715048223769598, + "learning_rate": 9.78641462182104e-07, + "loss": 0.24157950282096863, + "step": 6517 + }, + { + "epoch": 1.7308458372062143, + "grad_norm": 1.3248165077712177, + "learning_rate": 9.76747926957875e-07, + "loss": 0.2122395783662796, + "step": 6518 + }, + { + "epoch": 1.7311114061877573, + "grad_norm": 1.320024810941134, + "learning_rate": 9.748561313102266e-07, + "loss": 0.2351134717464447, + "step": 6519 + }, + { + "epoch": 1.7313769751693002, + "grad_norm": 1.2421546716744003, + "learning_rate": 9.729660756038738e-07, + "loss": 0.22462692856788635, + "step": 6520 + }, + { + "epoch": 1.7316425441508432, + "grad_norm": 1.191887437920794, + "learning_rate": 9.710777602031985e-07, + "loss": 0.2140806019306183, + "step": 6521 + }, + { + "epoch": 1.7319081131323861, + "grad_norm": 1.1138928252794336, + "learning_rate": 9.691911854722447e-07, + "loss": 0.22256694734096527, + "step": 6522 + }, + { + "epoch": 1.732173682113929, + "grad_norm": 1.3703383963226383, + "learning_rate": 9.673063517747216e-07, + "loss": 0.26044604182243347, + "step": 6523 + }, + { + "epoch": 1.732439251095472, + "grad_norm": 1.2598416492801234, + "learning_rate": 9.65423259474001e-07, + "loss": 0.22553196549415588, + "step": 6524 + }, + { + "epoch": 1.732704820077015, + "grad_norm": 1.351471142700479, + "learning_rate": 9.635419089331255e-07, + "loss": 0.2240113914012909, + "step": 6525 + }, + { + "epoch": 1.732970389058558, + "grad_norm": 1.1814437793767476, + "learning_rate": 9.616623005147952e-07, + "loss": 0.2239987701177597, + "step": 6526 + }, + { + "epoch": 1.7332359580401009, + "grad_norm": 1.3385972692968178, + "learning_rate": 9.597844345813746e-07, + "loss": 0.2779507040977478, + "step": 6527 + }, + { + "epoch": 1.7335015270216438, + "grad_norm": 1.24243402144453, + "learning_rate": 9.57908311494896e-07, + "loss": 0.20211297273635864, + "step": 6528 + }, + { + "epoch": 1.7337670960031868, + "grad_norm": 1.3764658259437736, + "learning_rate": 9.560339316170542e-07, + "loss": 0.2552817165851593, + "step": 6529 + }, + { + "epoch": 1.7340326649847297, + "grad_norm": 1.2797541334315956, + "learning_rate": 9.54161295309206e-07, + "loss": 0.248790442943573, + "step": 6530 + }, + { + "epoch": 1.7342982339662727, + "grad_norm": 1.2952054804389268, + "learning_rate": 9.522904029323754e-07, + "loss": 0.22865381836891174, + "step": 6531 + }, + { + "epoch": 1.7345638029478156, + "grad_norm": 1.2248102039230788, + "learning_rate": 9.504212548472458e-07, + "loss": 0.212583988904953, + "step": 6532 + }, + { + "epoch": 1.7348293719293586, + "grad_norm": 1.3834113478738954, + "learning_rate": 9.48553851414169e-07, + "loss": 0.24632221460342407, + "step": 6533 + }, + { + "epoch": 1.7350949409109018, + "grad_norm": 1.2843254083507383, + "learning_rate": 9.466881929931582e-07, + "loss": 0.2264299988746643, + "step": 6534 + }, + { + "epoch": 1.7353605098924447, + "grad_norm": 1.1969400150248917, + "learning_rate": 9.4482427994389e-07, + "loss": 0.21560585498809814, + "step": 6535 + }, + { + "epoch": 1.7356260788739877, + "grad_norm": 1.2133784097522973, + "learning_rate": 9.429621126257038e-07, + "loss": 0.24358224868774414, + "step": 6536 + }, + { + "epoch": 1.7358916478555306, + "grad_norm": 1.2714225965713206, + "learning_rate": 9.411016913976045e-07, + "loss": 0.23307816684246063, + "step": 6537 + }, + { + "epoch": 1.7361572168370736, + "grad_norm": 1.3040669928143356, + "learning_rate": 9.392430166182597e-07, + "loss": 0.28001490235328674, + "step": 6538 + }, + { + "epoch": 1.7364227858186165, + "grad_norm": 1.271471324412232, + "learning_rate": 9.373860886459996e-07, + "loss": 0.22544093430042267, + "step": 6539 + }, + { + "epoch": 1.7366883548001595, + "grad_norm": 1.196472605989987, + "learning_rate": 9.355309078388186e-07, + "loss": 0.2066478282213211, + "step": 6540 + }, + { + "epoch": 1.7369539237817024, + "grad_norm": 1.3162468805281542, + "learning_rate": 9.336774745543697e-07, + "loss": 0.21185964345932007, + "step": 6541 + }, + { + "epoch": 1.7372194927632454, + "grad_norm": 1.2806137892507987, + "learning_rate": 9.318257891499793e-07, + "loss": 0.2337890863418579, + "step": 6542 + }, + { + "epoch": 1.7374850617447883, + "grad_norm": 1.3468215205180822, + "learning_rate": 9.299758519826274e-07, + "loss": 0.2430594563484192, + "step": 6543 + }, + { + "epoch": 1.7377506307263313, + "grad_norm": 1.4072339591675835, + "learning_rate": 9.281276634089609e-07, + "loss": 0.24799269437789917, + "step": 6544 + }, + { + "epoch": 1.7380161997078742, + "grad_norm": 1.3533264573117185, + "learning_rate": 9.26281223785287e-07, + "loss": 0.24756166338920593, + "step": 6545 + }, + { + "epoch": 1.7382817686894172, + "grad_norm": 1.281195516970091, + "learning_rate": 9.244365334675787e-07, + "loss": 0.23465190827846527, + "step": 6546 + }, + { + "epoch": 1.7385473376709601, + "grad_norm": 1.22953964144765, + "learning_rate": 9.225935928114716e-07, + "loss": 0.2039640098810196, + "step": 6547 + }, + { + "epoch": 1.738812906652503, + "grad_norm": 1.3426382286400422, + "learning_rate": 9.207524021722602e-07, + "loss": 0.22304412722587585, + "step": 6548 + }, + { + "epoch": 1.739078475634046, + "grad_norm": 1.2253196898929546, + "learning_rate": 9.189129619049064e-07, + "loss": 0.19985908269882202, + "step": 6549 + }, + { + "epoch": 1.739344044615589, + "grad_norm": 1.3354963919439176, + "learning_rate": 9.17075272364032e-07, + "loss": 0.2335432469844818, + "step": 6550 + }, + { + "epoch": 1.739609613597132, + "grad_norm": 1.6822196536181961, + "learning_rate": 9.152393339039223e-07, + "loss": 0.2313593327999115, + "step": 6551 + }, + { + "epoch": 1.739875182578675, + "grad_norm": 1.310977344619443, + "learning_rate": 9.134051468785243e-07, + "loss": 0.2320600152015686, + "step": 6552 + }, + { + "epoch": 1.7401407515602179, + "grad_norm": 1.0942022372096942, + "learning_rate": 9.115727116414475e-07, + "loss": 0.1870848387479782, + "step": 6553 + }, + { + "epoch": 1.7404063205417608, + "grad_norm": 1.340037469005655, + "learning_rate": 9.097420285459635e-07, + "loss": 0.22922812402248383, + "step": 6554 + }, + { + "epoch": 1.7406718895233038, + "grad_norm": 1.3705243227438364, + "learning_rate": 9.079130979450068e-07, + "loss": 0.2505050301551819, + "step": 6555 + }, + { + "epoch": 1.7409374585048467, + "grad_norm": 1.3187608464438627, + "learning_rate": 9.060859201911732e-07, + "loss": 0.20445439219474792, + "step": 6556 + }, + { + "epoch": 1.7412030274863897, + "grad_norm": 1.1489822386745985, + "learning_rate": 9.042604956367218e-07, + "loss": 0.22338441014289856, + "step": 6557 + }, + { + "epoch": 1.7414685964679326, + "grad_norm": 1.2900464387857213, + "learning_rate": 9.024368246335735e-07, + "loss": 0.24923941493034363, + "step": 6558 + }, + { + "epoch": 1.7417341654494756, + "grad_norm": 1.3383952744906746, + "learning_rate": 9.006149075333071e-07, + "loss": 0.22842931747436523, + "step": 6559 + }, + { + "epoch": 1.7419997344310185, + "grad_norm": 1.391145524863548, + "learning_rate": 8.987947446871703e-07, + "loss": 0.22451579570770264, + "step": 6560 + }, + { + "epoch": 1.7422653034125615, + "grad_norm": 1.3218089225892669, + "learning_rate": 8.969763364460682e-07, + "loss": 0.2521047592163086, + "step": 6561 + }, + { + "epoch": 1.7425308723941044, + "grad_norm": 1.1675892500249985, + "learning_rate": 8.951596831605691e-07, + "loss": 0.25001099705696106, + "step": 6562 + }, + { + "epoch": 1.7427964413756474, + "grad_norm": 1.175521207104519, + "learning_rate": 8.933447851809007e-07, + "loss": 0.19592508673667908, + "step": 6563 + }, + { + "epoch": 1.7430620103571903, + "grad_norm": 1.399887131584603, + "learning_rate": 8.915316428569554e-07, + "loss": 0.2785179018974304, + "step": 6564 + }, + { + "epoch": 1.7433275793387333, + "grad_norm": 1.1688351316361159, + "learning_rate": 8.897202565382845e-07, + "loss": 0.20700594782829285, + "step": 6565 + }, + { + "epoch": 1.7435931483202762, + "grad_norm": 1.2225569857896341, + "learning_rate": 8.879106265741044e-07, + "loss": 0.253167062997818, + "step": 6566 + }, + { + "epoch": 1.7438587173018192, + "grad_norm": 1.4278912909015264, + "learning_rate": 8.861027533132859e-07, + "loss": 0.27672937512397766, + "step": 6567 + }, + { + "epoch": 1.7441242862833621, + "grad_norm": 1.3136368448280313, + "learning_rate": 8.842966371043671e-07, + "loss": 0.23050950467586517, + "step": 6568 + }, + { + "epoch": 1.744389855264905, + "grad_norm": 1.2790658189865058, + "learning_rate": 8.824922782955481e-07, + "loss": 0.23529425263404846, + "step": 6569 + }, + { + "epoch": 1.744655424246448, + "grad_norm": 1.2887213562899031, + "learning_rate": 8.806896772346873e-07, + "loss": 0.21803250908851624, + "step": 6570 + }, + { + "epoch": 1.744920993227991, + "grad_norm": 1.3669961004756481, + "learning_rate": 8.788888342693047e-07, + "loss": 0.24237293004989624, + "step": 6571 + }, + { + "epoch": 1.745186562209534, + "grad_norm": 1.1957319745445254, + "learning_rate": 8.770897497465803e-07, + "loss": 0.2008107602596283, + "step": 6572 + }, + { + "epoch": 1.745452131191077, + "grad_norm": 1.2693790937709173, + "learning_rate": 8.752924240133587e-07, + "loss": 0.23106279969215393, + "step": 6573 + }, + { + "epoch": 1.7457177001726198, + "grad_norm": 1.377716829660982, + "learning_rate": 8.734968574161406e-07, + "loss": 0.23726215958595276, + "step": 6574 + }, + { + "epoch": 1.7459832691541628, + "grad_norm": 1.211024095215965, + "learning_rate": 8.717030503010915e-07, + "loss": 0.26349812746047974, + "step": 6575 + }, + { + "epoch": 1.7462488381357057, + "grad_norm": 1.2871963140003055, + "learning_rate": 8.699110030140367e-07, + "loss": 0.23226451873779297, + "step": 6576 + }, + { + "epoch": 1.7465144071172487, + "grad_norm": 1.3173524718115384, + "learning_rate": 8.68120715900459e-07, + "loss": 0.22188402712345123, + "step": 6577 + }, + { + "epoch": 1.7467799760987917, + "grad_norm": 1.2367242455559135, + "learning_rate": 8.663321893055087e-07, + "loss": 0.21238234639167786, + "step": 6578 + }, + { + "epoch": 1.7470455450803346, + "grad_norm": 1.3423960800972676, + "learning_rate": 8.645454235739903e-07, + "loss": 0.2700675427913666, + "step": 6579 + }, + { + "epoch": 1.7473111140618776, + "grad_norm": 1.2737029023524005, + "learning_rate": 8.627604190503714e-07, + "loss": 0.24463894963264465, + "step": 6580 + }, + { + "epoch": 1.7475766830434205, + "grad_norm": 1.2537801110870739, + "learning_rate": 8.609771760787822e-07, + "loss": 0.23429079353809357, + "step": 6581 + }, + { + "epoch": 1.7478422520249635, + "grad_norm": 1.342775712878445, + "learning_rate": 8.591956950030067e-07, + "loss": 0.21767663955688477, + "step": 6582 + }, + { + "epoch": 1.7481078210065064, + "grad_norm": 1.3390334282971272, + "learning_rate": 8.574159761664957e-07, + "loss": 0.2499813735485077, + "step": 6583 + }, + { + "epoch": 1.7483733899880494, + "grad_norm": 1.471955255689367, + "learning_rate": 8.556380199123582e-07, + "loss": 0.28065958619117737, + "step": 6584 + }, + { + "epoch": 1.7486389589695923, + "grad_norm": 1.3012440070718, + "learning_rate": 8.538618265833621e-07, + "loss": 0.2166985273361206, + "step": 6585 + }, + { + "epoch": 1.7489045279511353, + "grad_norm": 1.2228700023368582, + "learning_rate": 8.520873965219356e-07, + "loss": 0.22835782170295715, + "step": 6586 + }, + { + "epoch": 1.7491700969326782, + "grad_norm": 1.2209097376008975, + "learning_rate": 8.503147300701709e-07, + "loss": 0.23575961589813232, + "step": 6587 + }, + { + "epoch": 1.7494356659142212, + "grad_norm": 1.1275514661567778, + "learning_rate": 8.485438275698154e-07, + "loss": 0.183369442820549, + "step": 6588 + }, + { + "epoch": 1.7497012348957641, + "grad_norm": 1.519810508178025, + "learning_rate": 8.467746893622786e-07, + "loss": 0.2731352746486664, + "step": 6589 + }, + { + "epoch": 1.749966803877307, + "grad_norm": 1.2913957246056922, + "learning_rate": 8.450073157886296e-07, + "loss": 0.20177578926086426, + "step": 6590 + }, + { + "epoch": 1.75023237285885, + "grad_norm": 1.2742798574628598, + "learning_rate": 8.432417071895982e-07, + "loss": 0.21672385931015015, + "step": 6591 + }, + { + "epoch": 1.750497941840393, + "grad_norm": 1.370933216008306, + "learning_rate": 8.414778639055699e-07, + "loss": 0.2503831386566162, + "step": 6592 + }, + { + "epoch": 1.750763510821936, + "grad_norm": 1.2884133202144494, + "learning_rate": 8.397157862765959e-07, + "loss": 0.2427521049976349, + "step": 6593 + }, + { + "epoch": 1.7510290798034789, + "grad_norm": 1.3424141731181953, + "learning_rate": 8.379554746423824e-07, + "loss": 0.23128533363342285, + "step": 6594 + }, + { + "epoch": 1.7512946487850218, + "grad_norm": 1.2353999110478557, + "learning_rate": 8.361969293422967e-07, + "loss": 0.2470957189798355, + "step": 6595 + }, + { + "epoch": 1.7515602177665648, + "grad_norm": 1.3335789710762707, + "learning_rate": 8.344401507153665e-07, + "loss": 0.29447510838508606, + "step": 6596 + }, + { + "epoch": 1.7518257867481077, + "grad_norm": 1.197223419032368, + "learning_rate": 8.326851391002777e-07, + "loss": 0.21585828065872192, + "step": 6597 + }, + { + "epoch": 1.7520913557296507, + "grad_norm": 1.2653558688292899, + "learning_rate": 8.30931894835375e-07, + "loss": 0.24081121385097504, + "step": 6598 + }, + { + "epoch": 1.7523569247111936, + "grad_norm": 1.3408805119391818, + "learning_rate": 8.291804182586638e-07, + "loss": 0.23052063584327698, + "step": 6599 + }, + { + "epoch": 1.7526224936927366, + "grad_norm": 1.2126901970374089, + "learning_rate": 8.274307097078093e-07, + "loss": 0.19008183479309082, + "step": 6600 + }, + { + "epoch": 1.7528880626742795, + "grad_norm": 1.3285441470167585, + "learning_rate": 8.25682769520132e-07, + "loss": 0.2632960379123688, + "step": 6601 + }, + { + "epoch": 1.7531536316558225, + "grad_norm": 1.4350439941988302, + "learning_rate": 8.239365980326175e-07, + "loss": 0.25958624482154846, + "step": 6602 + }, + { + "epoch": 1.7534192006373654, + "grad_norm": 1.304275360361708, + "learning_rate": 8.221921955819035e-07, + "loss": 0.22370605170726776, + "step": 6603 + }, + { + "epoch": 1.7536847696189084, + "grad_norm": 1.2385957043075924, + "learning_rate": 8.204495625042919e-07, + "loss": 0.22018703818321228, + "step": 6604 + }, + { + "epoch": 1.7539503386004514, + "grad_norm": 1.3626754196729718, + "learning_rate": 8.187086991357418e-07, + "loss": 0.26802191138267517, + "step": 6605 + }, + { + "epoch": 1.7542159075819943, + "grad_norm": 1.5313825040978437, + "learning_rate": 8.169696058118725e-07, + "loss": 0.21560518443584442, + "step": 6606 + }, + { + "epoch": 1.7544814765635373, + "grad_norm": 1.270508998157205, + "learning_rate": 8.152322828679593e-07, + "loss": 0.23222430050373077, + "step": 6607 + }, + { + "epoch": 1.7547470455450802, + "grad_norm": 1.1542994886817455, + "learning_rate": 8.134967306389374e-07, + "loss": 0.17638427019119263, + "step": 6608 + }, + { + "epoch": 1.7550126145266232, + "grad_norm": 1.3257823658984844, + "learning_rate": 8.117629494594015e-07, + "loss": 0.21539513766765594, + "step": 6609 + }, + { + "epoch": 1.7552781835081661, + "grad_norm": 1.3431199934216977, + "learning_rate": 8.100309396636031e-07, + "loss": 0.2265736162662506, + "step": 6610 + }, + { + "epoch": 1.755543752489709, + "grad_norm": 1.3478032961337874, + "learning_rate": 8.083007015854549e-07, + "loss": 0.2688787281513214, + "step": 6611 + }, + { + "epoch": 1.755809321471252, + "grad_norm": 1.3027271078273857, + "learning_rate": 8.065722355585249e-07, + "loss": 0.19756367802619934, + "step": 6612 + }, + { + "epoch": 1.756074890452795, + "grad_norm": 1.3749986253881121, + "learning_rate": 8.048455419160405e-07, + "loss": 0.19934290647506714, + "step": 6613 + }, + { + "epoch": 1.756340459434338, + "grad_norm": 1.5756000064179743, + "learning_rate": 8.031206209908904e-07, + "loss": 0.2523588538169861, + "step": 6614 + }, + { + "epoch": 1.7566060284158809, + "grad_norm": 1.2988900493114706, + "learning_rate": 8.01397473115616e-07, + "loss": 0.22825747728347778, + "step": 6615 + }, + { + "epoch": 1.7568715973974238, + "grad_norm": 1.3238944187902402, + "learning_rate": 7.996760986224228e-07, + "loss": 0.24525251984596252, + "step": 6616 + }, + { + "epoch": 1.7571371663789668, + "grad_norm": 1.366323962207031, + "learning_rate": 7.979564978431687e-07, + "loss": 0.21883559226989746, + "step": 6617 + }, + { + "epoch": 1.7574027353605097, + "grad_norm": 1.5827948860142422, + "learning_rate": 7.96238671109374e-07, + "loss": 0.2642098069190979, + "step": 6618 + }, + { + "epoch": 1.757668304342053, + "grad_norm": 1.3345016667633411, + "learning_rate": 7.945226187522159e-07, + "loss": 0.24094998836517334, + "step": 6619 + }, + { + "epoch": 1.7579338733235959, + "grad_norm": 1.2243450261876818, + "learning_rate": 7.928083411025278e-07, + "loss": 0.2225762903690338, + "step": 6620 + }, + { + "epoch": 1.7581994423051388, + "grad_norm": 1.2991544127435968, + "learning_rate": 7.910958384908041e-07, + "loss": 0.26722851395606995, + "step": 6621 + }, + { + "epoch": 1.7584650112866818, + "grad_norm": 1.3206157533666447, + "learning_rate": 7.893851112471907e-07, + "loss": 0.2176910787820816, + "step": 6622 + }, + { + "epoch": 1.7587305802682247, + "grad_norm": 1.3618122023344794, + "learning_rate": 7.876761597015003e-07, + "loss": 0.20261354744434357, + "step": 6623 + }, + { + "epoch": 1.7589961492497677, + "grad_norm": 1.1728416456458601, + "learning_rate": 7.859689841831975e-07, + "loss": 0.23314467072486877, + "step": 6624 + }, + { + "epoch": 1.7592617182313106, + "grad_norm": 1.3115277523344588, + "learning_rate": 7.842635850214054e-07, + "loss": 0.19854989647865295, + "step": 6625 + }, + { + "epoch": 1.7595272872128536, + "grad_norm": 1.2614486006783794, + "learning_rate": 7.825599625449043e-07, + "loss": 0.2422565519809723, + "step": 6626 + }, + { + "epoch": 1.7597928561943965, + "grad_norm": 1.342773057026848, + "learning_rate": 7.808581170821328e-07, + "loss": 0.27029529213905334, + "step": 6627 + }, + { + "epoch": 1.7600584251759395, + "grad_norm": 1.1918292148332001, + "learning_rate": 7.791580489611872e-07, + "loss": 0.23596832156181335, + "step": 6628 + }, + { + "epoch": 1.7603239941574824, + "grad_norm": 1.2062344481848934, + "learning_rate": 7.774597585098198e-07, + "loss": 0.218271404504776, + "step": 6629 + }, + { + "epoch": 1.7605895631390254, + "grad_norm": 1.3762692469809215, + "learning_rate": 7.75763246055441e-07, + "loss": 0.2551255226135254, + "step": 6630 + }, + { + "epoch": 1.7608551321205683, + "grad_norm": 1.3049962391533094, + "learning_rate": 7.740685119251179e-07, + "loss": 0.24410653114318848, + "step": 6631 + }, + { + "epoch": 1.7611207011021113, + "grad_norm": 1.2577276419448338, + "learning_rate": 7.723755564455771e-07, + "loss": 0.23044872283935547, + "step": 6632 + }, + { + "epoch": 1.7613862700836542, + "grad_norm": 1.334208934461724, + "learning_rate": 7.706843799431985e-07, + "loss": 0.24569427967071533, + "step": 6633 + }, + { + "epoch": 1.7616518390651972, + "grad_norm": 1.1605227177029394, + "learning_rate": 7.689949827440224e-07, + "loss": 0.200277179479599, + "step": 6634 + }, + { + "epoch": 1.7619174080467401, + "grad_norm": 1.1742759165978003, + "learning_rate": 7.673073651737428e-07, + "loss": 0.19217821955680847, + "step": 6635 + }, + { + "epoch": 1.762182977028283, + "grad_norm": 1.281151649074766, + "learning_rate": 7.656215275577151e-07, + "loss": 0.227005273103714, + "step": 6636 + }, + { + "epoch": 1.762448546009826, + "grad_norm": 1.2211778988331632, + "learning_rate": 7.639374702209468e-07, + "loss": 0.21359863877296448, + "step": 6637 + }, + { + "epoch": 1.762714114991369, + "grad_norm": 1.267969218396632, + "learning_rate": 7.62255193488105e-07, + "loss": 0.24056711792945862, + "step": 6638 + }, + { + "epoch": 1.762979683972912, + "grad_norm": 1.28035138481303, + "learning_rate": 7.605746976835127e-07, + "loss": 0.20897413790225983, + "step": 6639 + }, + { + "epoch": 1.763245252954455, + "grad_norm": 1.2567764889990254, + "learning_rate": 7.588959831311493e-07, + "loss": 0.20395967364311218, + "step": 6640 + }, + { + "epoch": 1.7635108219359978, + "grad_norm": 1.4827108993688454, + "learning_rate": 7.572190501546517e-07, + "loss": 0.2334095984697342, + "step": 6641 + }, + { + "epoch": 1.7637763909175408, + "grad_norm": 1.3358734576215814, + "learning_rate": 7.555438990773134e-07, + "loss": 0.23892858624458313, + "step": 6642 + }, + { + "epoch": 1.7640419598990837, + "grad_norm": 1.3063666339869877, + "learning_rate": 7.538705302220839e-07, + "loss": 0.23515449464321136, + "step": 6643 + }, + { + "epoch": 1.7643075288806267, + "grad_norm": 1.1919354046726482, + "learning_rate": 7.521989439115674e-07, + "loss": 0.19728611409664154, + "step": 6644 + }, + { + "epoch": 1.7645730978621696, + "grad_norm": 1.2609989060636697, + "learning_rate": 7.505291404680281e-07, + "loss": 0.22277355194091797, + "step": 6645 + }, + { + "epoch": 1.7648386668437126, + "grad_norm": 1.2129119488866849, + "learning_rate": 7.488611202133822e-07, + "loss": 0.24117602407932281, + "step": 6646 + }, + { + "epoch": 1.7651042358252558, + "grad_norm": 1.3643314179100876, + "learning_rate": 7.471948834692045e-07, + "loss": 0.24675750732421875, + "step": 6647 + }, + { + "epoch": 1.7653698048067987, + "grad_norm": 1.3261352525807495, + "learning_rate": 7.455304305567279e-07, + "loss": 0.2413899004459381, + "step": 6648 + }, + { + "epoch": 1.7656353737883417, + "grad_norm": 1.3357210816225529, + "learning_rate": 7.438677617968348e-07, + "loss": 0.22125428915023804, + "step": 6649 + }, + { + "epoch": 1.7659009427698846, + "grad_norm": 1.2099689083776513, + "learning_rate": 7.422068775100732e-07, + "loss": 0.205051988363266, + "step": 6650 + }, + { + "epoch": 1.7661665117514276, + "grad_norm": 1.2734255069971199, + "learning_rate": 7.405477780166415e-07, + "loss": 0.23711715638637543, + "step": 6651 + }, + { + "epoch": 1.7664320807329705, + "grad_norm": 1.4063590395204508, + "learning_rate": 7.388904636363914e-07, + "loss": 0.2591046988964081, + "step": 6652 + }, + { + "epoch": 1.7666976497145135, + "grad_norm": 1.4323150626725398, + "learning_rate": 7.372349346888363e-07, + "loss": 0.24837243556976318, + "step": 6653 + }, + { + "epoch": 1.7669632186960564, + "grad_norm": 1.1492996795155954, + "learning_rate": 7.35581191493141e-07, + "loss": 0.20910412073135376, + "step": 6654 + }, + { + "epoch": 1.7672287876775994, + "grad_norm": 1.113119722429438, + "learning_rate": 7.339292343681282e-07, + "loss": 0.2056204229593277, + "step": 6655 + }, + { + "epoch": 1.7674943566591423, + "grad_norm": 1.2927092177897141, + "learning_rate": 7.322790636322764e-07, + "loss": 0.2496742308139801, + "step": 6656 + }, + { + "epoch": 1.7677599256406853, + "grad_norm": 1.3571185149739835, + "learning_rate": 7.306306796037188e-07, + "loss": 0.24432921409606934, + "step": 6657 + }, + { + "epoch": 1.7680254946222282, + "grad_norm": 1.3006085174415165, + "learning_rate": 7.289840826002414e-07, + "loss": 0.2492775321006775, + "step": 6658 + }, + { + "epoch": 1.7682910636037712, + "grad_norm": 1.3256617876861967, + "learning_rate": 7.273392729392936e-07, + "loss": 0.22673827409744263, + "step": 6659 + }, + { + "epoch": 1.7685566325853141, + "grad_norm": 1.3730978211523115, + "learning_rate": 7.25696250937975e-07, + "loss": 0.2225622981786728, + "step": 6660 + }, + { + "epoch": 1.768822201566857, + "grad_norm": 1.2296766172450786, + "learning_rate": 7.240550169130378e-07, + "loss": 0.24896883964538574, + "step": 6661 + }, + { + "epoch": 1.7690877705484, + "grad_norm": 1.2103035123370711, + "learning_rate": 7.224155711808923e-07, + "loss": 0.2395302951335907, + "step": 6662 + }, + { + "epoch": 1.769353339529943, + "grad_norm": 1.2658162555194572, + "learning_rate": 7.207779140576066e-07, + "loss": 0.2255886197090149, + "step": 6663 + }, + { + "epoch": 1.769618908511486, + "grad_norm": 1.2518907529925698, + "learning_rate": 7.191420458589005e-07, + "loss": 0.24029678106307983, + "step": 6664 + }, + { + "epoch": 1.769884477493029, + "grad_norm": 1.1016484922093457, + "learning_rate": 7.175079669001506e-07, + "loss": 0.19399142265319824, + "step": 6665 + }, + { + "epoch": 1.7701500464745719, + "grad_norm": 1.2291425924678119, + "learning_rate": 7.158756774963882e-07, + "loss": 0.24569162726402283, + "step": 6666 + }, + { + "epoch": 1.7704156154561148, + "grad_norm": 1.2180012837263907, + "learning_rate": 7.142451779622971e-07, + "loss": 0.2484329342842102, + "step": 6667 + }, + { + "epoch": 1.7706811844376578, + "grad_norm": 1.2505833357389051, + "learning_rate": 7.126164686122216e-07, + "loss": 0.24423512816429138, + "step": 6668 + }, + { + "epoch": 1.7709467534192007, + "grad_norm": 1.1277554918017485, + "learning_rate": 7.109895497601571e-07, + "loss": 0.20146678388118744, + "step": 6669 + }, + { + "epoch": 1.7712123224007437, + "grad_norm": 1.2945002187740315, + "learning_rate": 7.093644217197526e-07, + "loss": 0.23329001665115356, + "step": 6670 + }, + { + "epoch": 1.7714778913822866, + "grad_norm": 1.1689758736288713, + "learning_rate": 7.077410848043165e-07, + "loss": 0.2290019690990448, + "step": 6671 + }, + { + "epoch": 1.7717434603638296, + "grad_norm": 1.2744441159542537, + "learning_rate": 7.061195393268061e-07, + "loss": 0.2329377382993698, + "step": 6672 + }, + { + "epoch": 1.7720090293453725, + "grad_norm": 1.1430677052322078, + "learning_rate": 7.04499785599837e-07, + "loss": 0.21513575315475464, + "step": 6673 + }, + { + "epoch": 1.7722745983269155, + "grad_norm": 1.1659646021132744, + "learning_rate": 7.028818239356794e-07, + "loss": 0.19022463262081146, + "step": 6674 + }, + { + "epoch": 1.7725401673084584, + "grad_norm": 1.2837523861206293, + "learning_rate": 7.012656546462571e-07, + "loss": 0.2097887396812439, + "step": 6675 + }, + { + "epoch": 1.7728057362900014, + "grad_norm": 1.3991640357566577, + "learning_rate": 6.996512780431486e-07, + "loss": 0.2559792101383209, + "step": 6676 + }, + { + "epoch": 1.7730713052715443, + "grad_norm": 1.3219531410357084, + "learning_rate": 6.980386944375849e-07, + "loss": 0.24624274671077728, + "step": 6677 + }, + { + "epoch": 1.7733368742530873, + "grad_norm": 1.2405076465604956, + "learning_rate": 6.964279041404553e-07, + "loss": 0.22904372215270996, + "step": 6678 + }, + { + "epoch": 1.7736024432346302, + "grad_norm": 1.216707646052236, + "learning_rate": 6.948189074623002e-07, + "loss": 0.20808623731136322, + "step": 6679 + }, + { + "epoch": 1.7738680122161732, + "grad_norm": 1.229477200185015, + "learning_rate": 6.932117047133158e-07, + "loss": 0.1931435763835907, + "step": 6680 + }, + { + "epoch": 1.7741335811977161, + "grad_norm": 1.2962984681963328, + "learning_rate": 6.91606296203351e-07, + "loss": 0.22938531637191772, + "step": 6681 + }, + { + "epoch": 1.774399150179259, + "grad_norm": 1.2921857742770726, + "learning_rate": 6.900026822419103e-07, + "loss": 0.240365132689476, + "step": 6682 + }, + { + "epoch": 1.774664719160802, + "grad_norm": 1.3560359754116593, + "learning_rate": 6.8840086313815e-07, + "loss": 0.26665499806404114, + "step": 6683 + }, + { + "epoch": 1.774930288142345, + "grad_norm": 1.1827095382370005, + "learning_rate": 6.86800839200884e-07, + "loss": 0.19775834679603577, + "step": 6684 + }, + { + "epoch": 1.775195857123888, + "grad_norm": 1.2698613362606737, + "learning_rate": 6.852026107385756e-07, + "loss": 0.20334021747112274, + "step": 6685 + }, + { + "epoch": 1.775461426105431, + "grad_norm": 1.1845529296493982, + "learning_rate": 6.836061780593484e-07, + "loss": 0.20670340955257416, + "step": 6686 + }, + { + "epoch": 1.7757269950869738, + "grad_norm": 1.2940248868651125, + "learning_rate": 6.820115414709727e-07, + "loss": 0.2033209353685379, + "step": 6687 + }, + { + "epoch": 1.7759925640685168, + "grad_norm": 1.101442360403221, + "learning_rate": 6.804187012808761e-07, + "loss": 0.23827815055847168, + "step": 6688 + }, + { + "epoch": 1.7762581330500598, + "grad_norm": 1.200357834005043, + "learning_rate": 6.788276577961394e-07, + "loss": 0.2054731547832489, + "step": 6689 + }, + { + "epoch": 1.7765237020316027, + "grad_norm": 1.3006753644657554, + "learning_rate": 6.772384113234987e-07, + "loss": 0.25553691387176514, + "step": 6690 + }, + { + "epoch": 1.7767892710131457, + "grad_norm": 1.2800516387465457, + "learning_rate": 6.756509621693385e-07, + "loss": 0.23650874197483063, + "step": 6691 + }, + { + "epoch": 1.7770548399946886, + "grad_norm": 1.2987358367196533, + "learning_rate": 6.740653106397033e-07, + "loss": 0.2353624701499939, + "step": 6692 + }, + { + "epoch": 1.7773204089762316, + "grad_norm": 1.3578478166739052, + "learning_rate": 6.724814570402871e-07, + "loss": 0.26034629344940186, + "step": 6693 + }, + { + "epoch": 1.7775859779577745, + "grad_norm": 1.2070636800070726, + "learning_rate": 6.70899401676438e-07, + "loss": 0.2272130399942398, + "step": 6694 + }, + { + "epoch": 1.7778515469393175, + "grad_norm": 1.353295285146214, + "learning_rate": 6.693191448531589e-07, + "loss": 0.27940404415130615, + "step": 6695 + }, + { + "epoch": 1.7781171159208604, + "grad_norm": 1.2726244327901954, + "learning_rate": 6.677406868751013e-07, + "loss": 0.22997702658176422, + "step": 6696 + }, + { + "epoch": 1.7783826849024034, + "grad_norm": 1.2569026906720413, + "learning_rate": 6.661640280465775e-07, + "loss": 0.22918452322483063, + "step": 6697 + }, + { + "epoch": 1.7786482538839463, + "grad_norm": 1.2456580683228033, + "learning_rate": 6.645891686715456e-07, + "loss": 0.18456090986728668, + "step": 6698 + }, + { + "epoch": 1.7789138228654893, + "grad_norm": 1.3290472252808803, + "learning_rate": 6.630161090536214e-07, + "loss": 0.23256534337997437, + "step": 6699 + }, + { + "epoch": 1.7791793918470322, + "grad_norm": 1.2224316750050632, + "learning_rate": 6.614448494960713e-07, + "loss": 0.21171879768371582, + "step": 6700 + }, + { + "epoch": 1.7794449608285752, + "grad_norm": 1.201224789246079, + "learning_rate": 6.598753903018163e-07, + "loss": 0.21382400393486023, + "step": 6701 + }, + { + "epoch": 1.7797105298101181, + "grad_norm": 1.2240177347792593, + "learning_rate": 6.583077317734299e-07, + "loss": 0.22954748570919037, + "step": 6702 + }, + { + "epoch": 1.779976098791661, + "grad_norm": 1.519530195710278, + "learning_rate": 6.56741874213136e-07, + "loss": 0.25691086053848267, + "step": 6703 + }, + { + "epoch": 1.780241667773204, + "grad_norm": 1.4662002194098382, + "learning_rate": 6.551778179228174e-07, + "loss": 0.23413901031017303, + "step": 6704 + }, + { + "epoch": 1.780507236754747, + "grad_norm": 1.2775019242293946, + "learning_rate": 6.536155632040031e-07, + "loss": 0.2493733912706375, + "step": 6705 + }, + { + "epoch": 1.78077280573629, + "grad_norm": 1.2512747936457356, + "learning_rate": 6.520551103578776e-07, + "loss": 0.26094138622283936, + "step": 6706 + }, + { + "epoch": 1.7810383747178329, + "grad_norm": 1.3016608765448805, + "learning_rate": 6.504964596852781e-07, + "loss": 0.23509518802165985, + "step": 6707 + }, + { + "epoch": 1.7813039436993758, + "grad_norm": 1.4726929969063267, + "learning_rate": 6.489396114866942e-07, + "loss": 0.2471122294664383, + "step": 6708 + }, + { + "epoch": 1.7815695126809188, + "grad_norm": 1.3034668854019054, + "learning_rate": 6.47384566062268e-07, + "loss": 0.2363303005695343, + "step": 6709 + }, + { + "epoch": 1.7818350816624617, + "grad_norm": 1.1801501968168786, + "learning_rate": 6.458313237117953e-07, + "loss": 0.18868233263492584, + "step": 6710 + }, + { + "epoch": 1.7821006506440047, + "grad_norm": 1.3437880175802723, + "learning_rate": 6.442798847347187e-07, + "loss": 0.23380546271800995, + "step": 6711 + }, + { + "epoch": 1.7823662196255476, + "grad_norm": 1.471740030592424, + "learning_rate": 6.42730249430139e-07, + "loss": 0.24112167954444885, + "step": 6712 + }, + { + "epoch": 1.7826317886070906, + "grad_norm": 1.2664184946697812, + "learning_rate": 6.411824180968096e-07, + "loss": 0.2397521436214447, + "step": 6713 + }, + { + "epoch": 1.7828973575886335, + "grad_norm": 1.309174308390434, + "learning_rate": 6.396363910331338e-07, + "loss": 0.23775406181812286, + "step": 6714 + }, + { + "epoch": 1.7831629265701765, + "grad_norm": 1.4327166340451307, + "learning_rate": 6.380921685371655e-07, + "loss": 0.23278602957725525, + "step": 6715 + }, + { + "epoch": 1.7834284955517195, + "grad_norm": 1.1135605228940266, + "learning_rate": 6.365497509066143e-07, + "loss": 0.20028996467590332, + "step": 6716 + }, + { + "epoch": 1.7836940645332624, + "grad_norm": 1.146963533940078, + "learning_rate": 6.35009138438839e-07, + "loss": 0.20862875878810883, + "step": 6717 + }, + { + "epoch": 1.7839596335148054, + "grad_norm": 1.3257848293601993, + "learning_rate": 6.334703314308521e-07, + "loss": 0.23522542417049408, + "step": 6718 + }, + { + "epoch": 1.7842252024963483, + "grad_norm": 1.2172150430538355, + "learning_rate": 6.319333301793173e-07, + "loss": 0.24633824825286865, + "step": 6719 + }, + { + "epoch": 1.7844907714778913, + "grad_norm": 1.3131451310460658, + "learning_rate": 6.30398134980551e-07, + "loss": 0.22141410410404205, + "step": 6720 + }, + { + "epoch": 1.7847563404594342, + "grad_norm": 1.3593079444355614, + "learning_rate": 6.288647461305186e-07, + "loss": 0.23313754796981812, + "step": 6721 + }, + { + "epoch": 1.7850219094409772, + "grad_norm": 1.2751593889081192, + "learning_rate": 6.273331639248414e-07, + "loss": 0.22015389800071716, + "step": 6722 + }, + { + "epoch": 1.7852874784225201, + "grad_norm": 1.2716859790694561, + "learning_rate": 6.258033886587911e-07, + "loss": 0.21154522895812988, + "step": 6723 + }, + { + "epoch": 1.785553047404063, + "grad_norm": 1.3319130935282857, + "learning_rate": 6.242754206272883e-07, + "loss": 0.2320503294467926, + "step": 6724 + }, + { + "epoch": 1.785818616385606, + "grad_norm": 1.2016740259413836, + "learning_rate": 6.227492601249097e-07, + "loss": 0.21778921782970428, + "step": 6725 + }, + { + "epoch": 1.786084185367149, + "grad_norm": 1.2321504813505204, + "learning_rate": 6.212249074458776e-07, + "loss": 0.2368871569633484, + "step": 6726 + }, + { + "epoch": 1.786349754348692, + "grad_norm": 1.5195368545073897, + "learning_rate": 6.197023628840704e-07, + "loss": 0.27269479632377625, + "step": 6727 + }, + { + "epoch": 1.7866153233302349, + "grad_norm": 1.2744130185555103, + "learning_rate": 6.181816267330177e-07, + "loss": 0.2414151132106781, + "step": 6728 + }, + { + "epoch": 1.7868808923117778, + "grad_norm": 1.1197825562175172, + "learning_rate": 6.166626992858993e-07, + "loss": 0.2156972736120224, + "step": 6729 + }, + { + "epoch": 1.7871464612933208, + "grad_norm": 1.2748992996552195, + "learning_rate": 6.151455808355455e-07, + "loss": 0.2510441541671753, + "step": 6730 + }, + { + "epoch": 1.787412030274864, + "grad_norm": 1.2924509412618195, + "learning_rate": 6.136302716744402e-07, + "loss": 0.20290088653564453, + "step": 6731 + }, + { + "epoch": 1.787677599256407, + "grad_norm": 1.3705736121123597, + "learning_rate": 6.121167720947174e-07, + "loss": 0.25088101625442505, + "step": 6732 + }, + { + "epoch": 1.7879431682379499, + "grad_norm": 1.3723338572382136, + "learning_rate": 6.106050823881604e-07, + "loss": 0.2566376328468323, + "step": 6733 + }, + { + "epoch": 1.7882087372194928, + "grad_norm": 1.1043772478174716, + "learning_rate": 6.09095202846206e-07, + "loss": 0.1882714033126831, + "step": 6734 + }, + { + "epoch": 1.7884743062010358, + "grad_norm": 1.2323780172305254, + "learning_rate": 6.075871337599404e-07, + "loss": 0.18705856800079346, + "step": 6735 + }, + { + "epoch": 1.7887398751825787, + "grad_norm": 1.1976910574931858, + "learning_rate": 6.060808754201031e-07, + "loss": 0.24756133556365967, + "step": 6736 + }, + { + "epoch": 1.7890054441641217, + "grad_norm": 1.3197777974144425, + "learning_rate": 6.045764281170818e-07, + "loss": 0.2537599205970764, + "step": 6737 + }, + { + "epoch": 1.7892710131456646, + "grad_norm": 1.330362234255321, + "learning_rate": 6.030737921409169e-07, + "loss": 0.22049202024936676, + "step": 6738 + }, + { + "epoch": 1.7895365821272076, + "grad_norm": 1.1222347914068396, + "learning_rate": 6.015729677812965e-07, + "loss": 0.20820394158363342, + "step": 6739 + }, + { + "epoch": 1.7898021511087505, + "grad_norm": 1.3153590716408405, + "learning_rate": 6.00073955327567e-07, + "loss": 0.2339879721403122, + "step": 6740 + }, + { + "epoch": 1.7900677200902935, + "grad_norm": 1.2483259153993207, + "learning_rate": 5.98576755068715e-07, + "loss": 0.22082161903381348, + "step": 6741 + }, + { + "epoch": 1.7903332890718364, + "grad_norm": 1.28162605766883, + "learning_rate": 5.97081367293385e-07, + "loss": 0.21883058547973633, + "step": 6742 + }, + { + "epoch": 1.7905988580533794, + "grad_norm": 1.1591166092235485, + "learning_rate": 5.955877922898712e-07, + "loss": 0.214680016040802, + "step": 6743 + }, + { + "epoch": 1.7908644270349223, + "grad_norm": 1.37628370977899, + "learning_rate": 5.940960303461152e-07, + "loss": 0.24533744156360626, + "step": 6744 + }, + { + "epoch": 1.7911299960164653, + "grad_norm": 1.3046535737377691, + "learning_rate": 5.926060817497137e-07, + "loss": 0.19857585430145264, + "step": 6745 + }, + { + "epoch": 1.7913955649980082, + "grad_norm": 1.4468975368000232, + "learning_rate": 5.911179467879081e-07, + "loss": 0.27493876218795776, + "step": 6746 + }, + { + "epoch": 1.7916611339795512, + "grad_norm": 1.1490145590407708, + "learning_rate": 5.896316257475954e-07, + "loss": 0.20560544729232788, + "step": 6747 + }, + { + "epoch": 1.7919267029610941, + "grad_norm": 1.2213631424870741, + "learning_rate": 5.881471189153199e-07, + "loss": 0.23559418320655823, + "step": 6748 + }, + { + "epoch": 1.792192271942637, + "grad_norm": 1.3144055462601232, + "learning_rate": 5.866644265772769e-07, + "loss": 0.23055103421211243, + "step": 6749 + }, + { + "epoch": 1.79245784092418, + "grad_norm": 1.4747052812755685, + "learning_rate": 5.851835490193136e-07, + "loss": 0.2780724763870239, + "step": 6750 + }, + { + "epoch": 1.792723409905723, + "grad_norm": 1.2354333862915858, + "learning_rate": 5.837044865269248e-07, + "loss": 0.20216618478298187, + "step": 6751 + }, + { + "epoch": 1.792988978887266, + "grad_norm": 1.308066661539038, + "learning_rate": 5.822272393852557e-07, + "loss": 0.2289930284023285, + "step": 6752 + }, + { + "epoch": 1.793254547868809, + "grad_norm": 1.2952454297764495, + "learning_rate": 5.80751807879103e-07, + "loss": 0.2028929740190506, + "step": 6753 + }, + { + "epoch": 1.7935201168503518, + "grad_norm": 1.2960791997009702, + "learning_rate": 5.792781922929114e-07, + "loss": 0.1964842826128006, + "step": 6754 + }, + { + "epoch": 1.7937856858318948, + "grad_norm": 1.4512315838061285, + "learning_rate": 5.77806392910778e-07, + "loss": 0.2617039084434509, + "step": 6755 + }, + { + "epoch": 1.7940512548134377, + "grad_norm": 1.325466585449178, + "learning_rate": 5.76336410016447e-07, + "loss": 0.2582395374774933, + "step": 6756 + }, + { + "epoch": 1.7943168237949807, + "grad_norm": 1.2587701407069858, + "learning_rate": 5.74868243893314e-07, + "loss": 0.23379334807395935, + "step": 6757 + }, + { + "epoch": 1.7945823927765236, + "grad_norm": 1.2979435124807637, + "learning_rate": 5.734018948244247e-07, + "loss": 0.2376977801322937, + "step": 6758 + }, + { + "epoch": 1.7948479617580668, + "grad_norm": 1.414785341098569, + "learning_rate": 5.719373630924741e-07, + "loss": 0.21816037595272064, + "step": 6759 + }, + { + "epoch": 1.7951135307396098, + "grad_norm": 1.1404163081963787, + "learning_rate": 5.704746489798063e-07, + "loss": 0.22156387567520142, + "step": 6760 + }, + { + "epoch": 1.7953790997211527, + "grad_norm": 1.195358056085369, + "learning_rate": 5.690137527684147e-07, + "loss": 0.20818129181861877, + "step": 6761 + }, + { + "epoch": 1.7956446687026957, + "grad_norm": 1.1501993150491747, + "learning_rate": 5.67554674739944e-07, + "loss": 0.18672943115234375, + "step": 6762 + }, + { + "epoch": 1.7959102376842386, + "grad_norm": 1.2143392515173568, + "learning_rate": 5.66097415175686e-07, + "loss": 0.2023036777973175, + "step": 6763 + }, + { + "epoch": 1.7961758066657816, + "grad_norm": 1.3551091626165586, + "learning_rate": 5.646419743565845e-07, + "loss": 0.24798424541950226, + "step": 6764 + }, + { + "epoch": 1.7964413756473245, + "grad_norm": 1.2034553304236573, + "learning_rate": 5.631883525632297e-07, + "loss": 0.1885790377855301, + "step": 6765 + }, + { + "epoch": 1.7967069446288675, + "grad_norm": 1.3693229184747842, + "learning_rate": 5.617365500758631e-07, + "loss": 0.24120381474494934, + "step": 6766 + }, + { + "epoch": 1.7969725136104104, + "grad_norm": 1.2063823939207, + "learning_rate": 5.602865671743763e-07, + "loss": 0.24238690733909607, + "step": 6767 + }, + { + "epoch": 1.7972380825919534, + "grad_norm": 1.2611645650605894, + "learning_rate": 5.588384041383089e-07, + "loss": 0.22928190231323242, + "step": 6768 + }, + { + "epoch": 1.7975036515734963, + "grad_norm": 1.3148280979127052, + "learning_rate": 5.573920612468486e-07, + "loss": 0.2464730143547058, + "step": 6769 + }, + { + "epoch": 1.7977692205550393, + "grad_norm": 1.149985298163883, + "learning_rate": 5.559475387788348e-07, + "loss": 0.2167670875787735, + "step": 6770 + }, + { + "epoch": 1.7980347895365822, + "grad_norm": 1.3365719233561757, + "learning_rate": 5.545048370127526e-07, + "loss": 0.24080663919448853, + "step": 6771 + }, + { + "epoch": 1.7983003585181252, + "grad_norm": 1.3571891328346308, + "learning_rate": 5.530639562267382e-07, + "loss": 0.25481417775154114, + "step": 6772 + }, + { + "epoch": 1.7985659274996681, + "grad_norm": 1.3525822075957274, + "learning_rate": 5.51624896698576e-07, + "loss": 0.23328909277915955, + "step": 6773 + }, + { + "epoch": 1.798831496481211, + "grad_norm": 1.136424514008492, + "learning_rate": 5.50187658705702e-07, + "loss": 0.18779747188091278, + "step": 6774 + }, + { + "epoch": 1.799097065462754, + "grad_norm": 1.3089016035676113, + "learning_rate": 5.487522425251968e-07, + "loss": 0.24840545654296875, + "step": 6775 + }, + { + "epoch": 1.799362634444297, + "grad_norm": 1.4658187281761286, + "learning_rate": 5.473186484337911e-07, + "loss": 0.2559642791748047, + "step": 6776 + }, + { + "epoch": 1.79962820342584, + "grad_norm": 1.3714243263968933, + "learning_rate": 5.458868767078673e-07, + "loss": 0.2005981206893921, + "step": 6777 + }, + { + "epoch": 1.799893772407383, + "grad_norm": 1.4085177100377464, + "learning_rate": 5.444569276234523e-07, + "loss": 0.2480883002281189, + "step": 6778 + }, + { + "epoch": 1.8001593413889259, + "grad_norm": 1.2203856732153913, + "learning_rate": 5.430288014562235e-07, + "loss": 0.23043295741081238, + "step": 6779 + }, + { + "epoch": 1.8004249103704688, + "grad_norm": 1.4245462518797845, + "learning_rate": 5.416024984815072e-07, + "loss": 0.22702521085739136, + "step": 6780 + }, + { + "epoch": 1.8006904793520118, + "grad_norm": 1.153610007644359, + "learning_rate": 5.401780189742789e-07, + "loss": 0.19955751299858093, + "step": 6781 + }, + { + "epoch": 1.8009560483335547, + "grad_norm": 1.2560139759300732, + "learning_rate": 5.387553632091591e-07, + "loss": 0.19743162393569946, + "step": 6782 + }, + { + "epoch": 1.8012216173150977, + "grad_norm": 1.3072968250539403, + "learning_rate": 5.373345314604206e-07, + "loss": 0.2262525111436844, + "step": 6783 + }, + { + "epoch": 1.8014871862966406, + "grad_norm": 1.2987858405959638, + "learning_rate": 5.359155240019809e-07, + "loss": 0.249632328748703, + "step": 6784 + }, + { + "epoch": 1.8017527552781836, + "grad_norm": 1.1804135507002813, + "learning_rate": 5.344983411074111e-07, + "loss": 0.19300231337547302, + "step": 6785 + }, + { + "epoch": 1.8020183242597265, + "grad_norm": 1.293291337799575, + "learning_rate": 5.330829830499263e-07, + "loss": 0.22256134450435638, + "step": 6786 + }, + { + "epoch": 1.8022838932412695, + "grad_norm": 1.283065855572867, + "learning_rate": 5.316694501023911e-07, + "loss": 0.2666356563568115, + "step": 6787 + }, + { + "epoch": 1.8025494622228124, + "grad_norm": 1.239663996945653, + "learning_rate": 5.302577425373156e-07, + "loss": 0.223050057888031, + "step": 6788 + }, + { + "epoch": 1.8028150312043554, + "grad_norm": 1.3011452698852823, + "learning_rate": 5.288478606268632e-07, + "loss": 0.2298094481229782, + "step": 6789 + }, + { + "epoch": 1.8030806001858983, + "grad_norm": 1.4761708863150307, + "learning_rate": 5.27439804642843e-07, + "loss": 0.23596417903900146, + "step": 6790 + }, + { + "epoch": 1.8033461691674413, + "grad_norm": 1.226229776793909, + "learning_rate": 5.26033574856708e-07, + "loss": 0.19501623511314392, + "step": 6791 + }, + { + "epoch": 1.8036117381489842, + "grad_norm": 1.2825838070785722, + "learning_rate": 5.246291715395657e-07, + "loss": 0.23518472909927368, + "step": 6792 + }, + { + "epoch": 1.8038773071305272, + "grad_norm": 1.1820374841237484, + "learning_rate": 5.232265949621651e-07, + "loss": 0.2251899093389511, + "step": 6793 + }, + { + "epoch": 1.8041428761120701, + "grad_norm": 1.1527654541489951, + "learning_rate": 5.218258453949099e-07, + "loss": 0.1764119267463684, + "step": 6794 + }, + { + "epoch": 1.804408445093613, + "grad_norm": 1.2895741356204065, + "learning_rate": 5.204269231078484e-07, + "loss": 0.20768773555755615, + "step": 6795 + }, + { + "epoch": 1.804674014075156, + "grad_norm": 1.3841780370828203, + "learning_rate": 5.19029828370674e-07, + "loss": 0.2115546613931656, + "step": 6796 + }, + { + "epoch": 1.804939583056699, + "grad_norm": 1.315680847185169, + "learning_rate": 5.176345614527312e-07, + "loss": 0.2465972602367401, + "step": 6797 + }, + { + "epoch": 1.805205152038242, + "grad_norm": 1.379203464130328, + "learning_rate": 5.162411226230102e-07, + "loss": 0.2359803020954132, + "step": 6798 + }, + { + "epoch": 1.805470721019785, + "grad_norm": 1.4106819634653143, + "learning_rate": 5.148495121501506e-07, + "loss": 0.27518990635871887, + "step": 6799 + }, + { + "epoch": 1.8057362900013278, + "grad_norm": 1.3653410113402416, + "learning_rate": 5.134597303024391e-07, + "loss": 0.23914849758148193, + "step": 6800 + }, + { + "epoch": 1.8060018589828708, + "grad_norm": 1.256847668479307, + "learning_rate": 5.120717773478068e-07, + "loss": 0.21771098673343658, + "step": 6801 + }, + { + "epoch": 1.8062674279644138, + "grad_norm": 1.2716100664289411, + "learning_rate": 5.106856535538363e-07, + "loss": 0.235421285033226, + "step": 6802 + }, + { + "epoch": 1.8065329969459567, + "grad_norm": 1.4167241401735549, + "learning_rate": 5.093013591877561e-07, + "loss": 0.23973548412322998, + "step": 6803 + }, + { + "epoch": 1.8067985659274997, + "grad_norm": 1.484886222602596, + "learning_rate": 5.079188945164426e-07, + "loss": 0.24059349298477173, + "step": 6804 + }, + { + "epoch": 1.8070641349090426, + "grad_norm": 1.3840991454067133, + "learning_rate": 5.065382598064161e-07, + "loss": 0.25188207626342773, + "step": 6805 + }, + { + "epoch": 1.8073297038905856, + "grad_norm": 1.1866308474402574, + "learning_rate": 5.051594553238482e-07, + "loss": 0.20124536752700806, + "step": 6806 + }, + { + "epoch": 1.8075952728721285, + "grad_norm": 1.2234769875088154, + "learning_rate": 5.037824813345571e-07, + "loss": 0.2059330940246582, + "step": 6807 + }, + { + "epoch": 1.8078608418536715, + "grad_norm": 1.2468279665046458, + "learning_rate": 5.024073381040052e-07, + "loss": 0.2122621238231659, + "step": 6808 + }, + { + "epoch": 1.8081264108352144, + "grad_norm": 1.2203093249465347, + "learning_rate": 5.010340258973046e-07, + "loss": 0.20064303278923035, + "step": 6809 + }, + { + "epoch": 1.8083919798167574, + "grad_norm": 1.3685187895509534, + "learning_rate": 4.996625449792147e-07, + "loss": 0.24773281812667847, + "step": 6810 + }, + { + "epoch": 1.8086575487983003, + "grad_norm": 1.149837064877599, + "learning_rate": 4.982928956141375e-07, + "loss": 0.2111661732196808, + "step": 6811 + }, + { + "epoch": 1.8089231177798433, + "grad_norm": 1.2721912706796665, + "learning_rate": 4.969250780661306e-07, + "loss": 0.24823394417762756, + "step": 6812 + }, + { + "epoch": 1.8091886867613862, + "grad_norm": 1.410632443971984, + "learning_rate": 4.955590925988896e-07, + "loss": 0.24726605415344238, + "step": 6813 + }, + { + "epoch": 1.8094542557429292, + "grad_norm": 1.3112520269484638, + "learning_rate": 4.941949394757605e-07, + "loss": 0.2269962728023529, + "step": 6814 + }, + { + "epoch": 1.8097198247244721, + "grad_norm": 1.311172380903373, + "learning_rate": 4.928326189597377e-07, + "loss": 0.2336469292640686, + "step": 6815 + }, + { + "epoch": 1.809985393706015, + "grad_norm": 1.3372206959113173, + "learning_rate": 4.914721313134585e-07, + "loss": 0.24872124195098877, + "step": 6816 + }, + { + "epoch": 1.810250962687558, + "grad_norm": 1.3116570930981006, + "learning_rate": 4.901134767992099e-07, + "loss": 0.2484157383441925, + "step": 6817 + }, + { + "epoch": 1.810516531669101, + "grad_norm": 1.5234901533359522, + "learning_rate": 4.887566556789247e-07, + "loss": 0.24683158099651337, + "step": 6818 + }, + { + "epoch": 1.810782100650644, + "grad_norm": 1.1959899225802055, + "learning_rate": 4.874016682141802e-07, + "loss": 0.18717995285987854, + "step": 6819 + }, + { + "epoch": 1.8110476696321869, + "grad_norm": 1.2862771000886628, + "learning_rate": 4.860485146662053e-07, + "loss": 0.2220807671546936, + "step": 6820 + }, + { + "epoch": 1.8113132386137298, + "grad_norm": 1.196369102162481, + "learning_rate": 4.84697195295869e-07, + "loss": 0.2178400307893753, + "step": 6821 + }, + { + "epoch": 1.8115788075952728, + "grad_norm": 1.2250082051849178, + "learning_rate": 4.833477103636908e-07, + "loss": 0.2056645154953003, + "step": 6822 + }, + { + "epoch": 1.8118443765768157, + "grad_norm": 1.1729075702986809, + "learning_rate": 4.820000601298358e-07, + "loss": 0.21441905200481415, + "step": 6823 + }, + { + "epoch": 1.8121099455583587, + "grad_norm": 1.4445497728186703, + "learning_rate": 4.806542448541151e-07, + "loss": 0.17688237130641937, + "step": 6824 + }, + { + "epoch": 1.8123755145399016, + "grad_norm": 1.3216659704658935, + "learning_rate": 4.793102647959847e-07, + "loss": 0.22405505180358887, + "step": 6825 + }, + { + "epoch": 1.8126410835214446, + "grad_norm": 1.4226735460298432, + "learning_rate": 4.779681202145503e-07, + "loss": 0.21617908775806427, + "step": 6826 + }, + { + "epoch": 1.8129066525029875, + "grad_norm": 1.3284639992790963, + "learning_rate": 4.766278113685596e-07, + "loss": 0.23570871353149414, + "step": 6827 + }, + { + "epoch": 1.8131722214845305, + "grad_norm": 1.222373726415007, + "learning_rate": 4.7528933851641036e-07, + "loss": 0.23806743323802948, + "step": 6828 + }, + { + "epoch": 1.8134377904660735, + "grad_norm": 1.3312930220149763, + "learning_rate": 4.739527019161405e-07, + "loss": 0.24859179556369781, + "step": 6829 + }, + { + "epoch": 1.8137033594476164, + "grad_norm": 1.2143252342774762, + "learning_rate": 4.726179018254418e-07, + "loss": 0.21314260363578796, + "step": 6830 + }, + { + "epoch": 1.8139689284291594, + "grad_norm": 1.272910058647325, + "learning_rate": 4.7128493850164715e-07, + "loss": 0.25290659070014954, + "step": 6831 + }, + { + "epoch": 1.8142344974107023, + "grad_norm": 1.1800117497978073, + "learning_rate": 4.699538122017355e-07, + "loss": 0.22606703639030457, + "step": 6832 + }, + { + "epoch": 1.8145000663922453, + "grad_norm": 1.3037958158309495, + "learning_rate": 4.6862452318233275e-07, + "loss": 0.23973071575164795, + "step": 6833 + }, + { + "epoch": 1.8147656353737882, + "grad_norm": 1.2341358358957555, + "learning_rate": 4.672970716997094e-07, + "loss": 0.2225341498851776, + "step": 6834 + }, + { + "epoch": 1.8150312043553312, + "grad_norm": 1.441833447404081, + "learning_rate": 4.6597145800978183e-07, + "loss": 0.19153356552124023, + "step": 6835 + }, + { + "epoch": 1.8152967733368741, + "grad_norm": 1.2010339801105188, + "learning_rate": 4.646476823681145e-07, + "loss": 0.19694843888282776, + "step": 6836 + }, + { + "epoch": 1.815562342318417, + "grad_norm": 1.2719437537675773, + "learning_rate": 4.6332574502991554e-07, + "loss": 0.2353869527578354, + "step": 6837 + }, + { + "epoch": 1.81582791129996, + "grad_norm": 1.3504470280928214, + "learning_rate": 4.6200564625003775e-07, + "loss": 0.20919787883758545, + "step": 6838 + }, + { + "epoch": 1.816093480281503, + "grad_norm": 1.1775336742921327, + "learning_rate": 4.6068738628298193e-07, + "loss": 0.18352919816970825, + "step": 6839 + }, + { + "epoch": 1.816359049263046, + "grad_norm": 1.3571378213568392, + "learning_rate": 4.5937096538289147e-07, + "loss": 0.24711212515830994, + "step": 6840 + }, + { + "epoch": 1.8166246182445889, + "grad_norm": 1.2216287617055834, + "learning_rate": 4.580563838035579e-07, + "loss": 0.2350531816482544, + "step": 6841 + }, + { + "epoch": 1.8168901872261318, + "grad_norm": 1.3731447849726235, + "learning_rate": 4.5674364179841614e-07, + "loss": 0.26124465465545654, + "step": 6842 + }, + { + "epoch": 1.8171557562076748, + "grad_norm": 1.3819435677197398, + "learning_rate": 4.5543273962054934e-07, + "loss": 0.2110440880060196, + "step": 6843 + }, + { + "epoch": 1.817421325189218, + "grad_norm": 1.425540844923539, + "learning_rate": 4.5412367752268094e-07, + "loss": 0.2409415990114212, + "step": 6844 + }, + { + "epoch": 1.817686894170761, + "grad_norm": 1.2827549712815094, + "learning_rate": 4.528164557571857e-07, + "loss": 0.2280777543783188, + "step": 6845 + }, + { + "epoch": 1.8179524631523039, + "grad_norm": 1.111661347066374, + "learning_rate": 4.515110745760787e-07, + "loss": 0.201339989900589, + "step": 6846 + }, + { + "epoch": 1.8182180321338468, + "grad_norm": 1.2576623337538495, + "learning_rate": 4.5020753423102083e-07, + "loss": 0.22910752892494202, + "step": 6847 + }, + { + "epoch": 1.8184836011153898, + "grad_norm": 1.2835742527474332, + "learning_rate": 4.4890583497332327e-07, + "loss": 0.21736779808998108, + "step": 6848 + }, + { + "epoch": 1.8187491700969327, + "grad_norm": 1.282796826855034, + "learning_rate": 4.476059770539354e-07, + "loss": 0.20898449420928955, + "step": 6849 + }, + { + "epoch": 1.8190147390784757, + "grad_norm": 1.2514312774528749, + "learning_rate": 4.463079607234555e-07, + "loss": 0.22159051895141602, + "step": 6850 + }, + { + "epoch": 1.8192803080600186, + "grad_norm": 1.290667660986327, + "learning_rate": 4.450117862321246e-07, + "loss": 0.24081172049045563, + "step": 6851 + }, + { + "epoch": 1.8195458770415616, + "grad_norm": 1.2092663587603776, + "learning_rate": 4.4371745382983164e-07, + "loss": 0.17856758832931519, + "step": 6852 + }, + { + "epoch": 1.8198114460231045, + "grad_norm": 1.2002967167521004, + "learning_rate": 4.424249637661071e-07, + "loss": 0.20796868205070496, + "step": 6853 + }, + { + "epoch": 1.8200770150046475, + "grad_norm": 1.5683273026632796, + "learning_rate": 4.4113431629013046e-07, + "loss": 0.24277149140834808, + "step": 6854 + }, + { + "epoch": 1.8203425839861904, + "grad_norm": 1.1767967505464594, + "learning_rate": 4.3984551165071944e-07, + "loss": 0.19315838813781738, + "step": 6855 + }, + { + "epoch": 1.8206081529677334, + "grad_norm": 1.2457379727303777, + "learning_rate": 4.3855855009634075e-07, + "loss": 0.20789340138435364, + "step": 6856 + }, + { + "epoch": 1.8208737219492763, + "grad_norm": 1.4246348317049922, + "learning_rate": 4.372734318751082e-07, + "loss": 0.2871186137199402, + "step": 6857 + }, + { + "epoch": 1.8211392909308193, + "grad_norm": 1.3878283876849893, + "learning_rate": 4.359901572347758e-07, + "loss": 0.2419736236333847, + "step": 6858 + }, + { + "epoch": 1.8214048599123622, + "grad_norm": 1.3237602075469659, + "learning_rate": 4.3470872642274455e-07, + "loss": 0.2190292328596115, + "step": 6859 + }, + { + "epoch": 1.8216704288939052, + "grad_norm": 1.3879953178475168, + "learning_rate": 4.3342913968605903e-07, + "loss": 0.2654367685317993, + "step": 6860 + }, + { + "epoch": 1.8219359978754481, + "grad_norm": 1.3362249609314758, + "learning_rate": 4.321513972714075e-07, + "loss": 0.2536984086036682, + "step": 6861 + }, + { + "epoch": 1.822201566856991, + "grad_norm": 1.3804156416489965, + "learning_rate": 4.308754994251252e-07, + "loss": 0.260431170463562, + "step": 6862 + }, + { + "epoch": 1.822467135838534, + "grad_norm": 1.1376782237723586, + "learning_rate": 4.2960144639318855e-07, + "loss": 0.19348303973674774, + "step": 6863 + }, + { + "epoch": 1.822732704820077, + "grad_norm": 1.3505211109720399, + "learning_rate": 4.283292384212201e-07, + "loss": 0.2284386157989502, + "step": 6864 + }, + { + "epoch": 1.82299827380162, + "grad_norm": 1.2449697035186624, + "learning_rate": 4.270588757544869e-07, + "loss": 0.23439526557922363, + "step": 6865 + }, + { + "epoch": 1.823263842783163, + "grad_norm": 1.247098399621602, + "learning_rate": 4.2579035863790086e-07, + "loss": 0.2123441994190216, + "step": 6866 + }, + { + "epoch": 1.8235294117647058, + "grad_norm": 1.251423525262008, + "learning_rate": 4.245236873160163e-07, + "loss": 0.24568180739879608, + "step": 6867 + }, + { + "epoch": 1.8237949807462488, + "grad_norm": 1.4504253184377665, + "learning_rate": 4.232588620330325e-07, + "loss": 0.24078285694122314, + "step": 6868 + }, + { + "epoch": 1.8240605497277917, + "grad_norm": 1.157509101798501, + "learning_rate": 4.2199588303279414e-07, + "loss": 0.2003621608018875, + "step": 6869 + }, + { + "epoch": 1.8243261187093347, + "grad_norm": 1.3049050095763572, + "learning_rate": 4.2073475055878664e-07, + "loss": 0.21201889216899872, + "step": 6870 + }, + { + "epoch": 1.8245916876908777, + "grad_norm": 1.429124542908126, + "learning_rate": 4.1947546485414215e-07, + "loss": 0.23175427317619324, + "step": 6871 + }, + { + "epoch": 1.8248572566724208, + "grad_norm": 1.3101487536079581, + "learning_rate": 4.182180261616364e-07, + "loss": 0.2391383945941925, + "step": 6872 + }, + { + "epoch": 1.8251228256539638, + "grad_norm": 1.341869026992186, + "learning_rate": 4.169624347236878e-07, + "loss": 0.23120146989822388, + "step": 6873 + }, + { + "epoch": 1.8253883946355067, + "grad_norm": 1.1699948636498165, + "learning_rate": 4.157086907823604e-07, + "loss": 0.22541432082653046, + "step": 6874 + }, + { + "epoch": 1.8256539636170497, + "grad_norm": 1.3354293669412138, + "learning_rate": 4.1445679457936094e-07, + "loss": 0.25613510608673096, + "step": 6875 + }, + { + "epoch": 1.8259195325985926, + "grad_norm": 1.191861909098097, + "learning_rate": 4.1320674635604186e-07, + "loss": 0.21002547442913055, + "step": 6876 + }, + { + "epoch": 1.8261851015801356, + "grad_norm": 1.230870532242656, + "learning_rate": 4.119585463533959e-07, + "loss": 0.2593066692352295, + "step": 6877 + }, + { + "epoch": 1.8264506705616785, + "grad_norm": 1.4772106156087776, + "learning_rate": 4.1071219481206184e-07, + "loss": 0.23771531879901886, + "step": 6878 + }, + { + "epoch": 1.8267162395432215, + "grad_norm": 1.3106459571340912, + "learning_rate": 4.094676919723206e-07, + "loss": 0.2069541960954666, + "step": 6879 + }, + { + "epoch": 1.8269818085247644, + "grad_norm": 1.2065450512433227, + "learning_rate": 4.082250380740993e-07, + "loss": 0.21314311027526855, + "step": 6880 + }, + { + "epoch": 1.8272473775063074, + "grad_norm": 1.2723957233809677, + "learning_rate": 4.069842333569662e-07, + "loss": 0.198696106672287, + "step": 6881 + }, + { + "epoch": 1.8275129464878503, + "grad_norm": 1.2365636263350124, + "learning_rate": 4.057452780601334e-07, + "loss": 0.22771228849887848, + "step": 6882 + }, + { + "epoch": 1.8277785154693933, + "grad_norm": 1.3935711018120034, + "learning_rate": 4.045081724224564e-07, + "loss": 0.24176150560379028, + "step": 6883 + }, + { + "epoch": 1.8280440844509362, + "grad_norm": 1.1711714123320747, + "learning_rate": 4.0327291668243785e-07, + "loss": 0.18257084488868713, + "step": 6884 + }, + { + "epoch": 1.8283096534324792, + "grad_norm": 1.7740145369201021, + "learning_rate": 4.02039511078216e-07, + "loss": 0.2317531704902649, + "step": 6885 + }, + { + "epoch": 1.8285752224140222, + "grad_norm": 1.237685133468282, + "learning_rate": 4.008079558475797e-07, + "loss": 0.22523516416549683, + "step": 6886 + }, + { + "epoch": 1.828840791395565, + "grad_norm": 1.338469580607285, + "learning_rate": 3.995782512279578e-07, + "loss": 0.22351330518722534, + "step": 6887 + }, + { + "epoch": 1.829106360377108, + "grad_norm": 1.3272231861758204, + "learning_rate": 3.983503974564229e-07, + "loss": 0.22151902318000793, + "step": 6888 + }, + { + "epoch": 1.829371929358651, + "grad_norm": 1.2483501881623744, + "learning_rate": 3.971243947696901e-07, + "loss": 0.20800583064556122, + "step": 6889 + }, + { + "epoch": 1.829637498340194, + "grad_norm": 1.189419989304772, + "learning_rate": 3.959002434041181e-07, + "loss": 0.21332690119743347, + "step": 6890 + }, + { + "epoch": 1.829903067321737, + "grad_norm": 1.3040750377284556, + "learning_rate": 3.946779435957093e-07, + "loss": 0.2561502456665039, + "step": 6891 + }, + { + "epoch": 1.8301686363032799, + "grad_norm": 1.2150229659643972, + "learning_rate": 3.934574955801074e-07, + "loss": 0.23636910319328308, + "step": 6892 + }, + { + "epoch": 1.8304342052848228, + "grad_norm": 1.303931878967275, + "learning_rate": 3.922388995926041e-07, + "loss": 0.26683998107910156, + "step": 6893 + }, + { + "epoch": 1.8306997742663658, + "grad_norm": 1.319570373744726, + "learning_rate": 3.910221558681271e-07, + "loss": 0.2779492735862732, + "step": 6894 + }, + { + "epoch": 1.8309653432479087, + "grad_norm": 1.473106593059021, + "learning_rate": 3.8980726464125095e-07, + "loss": 0.20174488425254822, + "step": 6895 + }, + { + "epoch": 1.8312309122294517, + "grad_norm": 1.3128034885814306, + "learning_rate": 3.885942261461928e-07, + "loss": 0.21486055850982666, + "step": 6896 + }, + { + "epoch": 1.8314964812109946, + "grad_norm": 1.2201269476427121, + "learning_rate": 3.8738304061681107e-07, + "loss": 0.25637733936309814, + "step": 6897 + }, + { + "epoch": 1.8317620501925376, + "grad_norm": 1.3661274524986262, + "learning_rate": 3.8617370828661014e-07, + "loss": 0.2518364489078522, + "step": 6898 + }, + { + "epoch": 1.8320276191740805, + "grad_norm": 1.2902396654446358, + "learning_rate": 3.849662293887324e-07, + "loss": 0.25752246379852295, + "step": 6899 + }, + { + "epoch": 1.8322931881556235, + "grad_norm": 1.1514833439027936, + "learning_rate": 3.8376060415596826e-07, + "loss": 0.20891718566417694, + "step": 6900 + }, + { + "epoch": 1.8325587571371664, + "grad_norm": 1.378720679176223, + "learning_rate": 3.825568328207452e-07, + "loss": 0.20491960644721985, + "step": 6901 + }, + { + "epoch": 1.8328243261187094, + "grad_norm": 1.2540067790590503, + "learning_rate": 3.813549156151386e-07, + "loss": 0.22183339297771454, + "step": 6902 + }, + { + "epoch": 1.8330898951002523, + "grad_norm": 1.3321077338345055, + "learning_rate": 3.801548527708621e-07, + "loss": 0.2476987987756729, + "step": 6903 + }, + { + "epoch": 1.8333554640817953, + "grad_norm": 1.470629998110282, + "learning_rate": 3.7895664451927493e-07, + "loss": 0.26486238837242126, + "step": 6904 + }, + { + "epoch": 1.8336210330633382, + "grad_norm": 1.2524745099106778, + "learning_rate": 3.777602910913769e-07, + "loss": 0.25922873616218567, + "step": 6905 + }, + { + "epoch": 1.8338866020448812, + "grad_norm": 1.317563058388092, + "learning_rate": 3.7656579271781127e-07, + "loss": 0.22682476043701172, + "step": 6906 + }, + { + "epoch": 1.8341521710264241, + "grad_norm": 1.2391277284536568, + "learning_rate": 3.753731496288626e-07, + "loss": 0.20371592044830322, + "step": 6907 + }, + { + "epoch": 1.834417740007967, + "grad_norm": 1.2444383452097851, + "learning_rate": 3.7418236205445826e-07, + "loss": 0.23857446014881134, + "step": 6908 + }, + { + "epoch": 1.83468330898951, + "grad_norm": 2.6487436557467645, + "learning_rate": 3.729934302241689e-07, + "loss": 0.27119290828704834, + "step": 6909 + }, + { + "epoch": 1.834948877971053, + "grad_norm": 1.254159773595776, + "learning_rate": 3.7180635436720567e-07, + "loss": 0.2354927361011505, + "step": 6910 + }, + { + "epoch": 1.835214446952596, + "grad_norm": 1.301136184663389, + "learning_rate": 3.706211347124233e-07, + "loss": 0.26378512382507324, + "step": 6911 + }, + { + "epoch": 1.835480015934139, + "grad_norm": 1.3296098934003593, + "learning_rate": 3.6943777148831907e-07, + "loss": 0.20725026726722717, + "step": 6912 + }, + { + "epoch": 1.8357455849156818, + "grad_norm": 1.2212362377090786, + "learning_rate": 3.682562649230304e-07, + "loss": 0.2049856185913086, + "step": 6913 + }, + { + "epoch": 1.8360111538972248, + "grad_norm": 1.2555620791922353, + "learning_rate": 3.6707661524433833e-07, + "loss": 0.19303423166275024, + "step": 6914 + }, + { + "epoch": 1.8362767228787678, + "grad_norm": 1.2395332139010746, + "learning_rate": 3.6589882267966445e-07, + "loss": 0.21510104835033417, + "step": 6915 + }, + { + "epoch": 1.8365422918603107, + "grad_norm": 1.1669418633603965, + "learning_rate": 3.6472288745607376e-07, + "loss": 0.1933138072490692, + "step": 6916 + }, + { + "epoch": 1.8368078608418537, + "grad_norm": 1.112367559966563, + "learning_rate": 3.6354880980027373e-07, + "loss": 0.2015206664800644, + "step": 6917 + }, + { + "epoch": 1.8370734298233966, + "grad_norm": 1.2823070307410491, + "learning_rate": 3.6237658993861114e-07, + "loss": 0.20550866425037384, + "step": 6918 + }, + { + "epoch": 1.8373389988049396, + "grad_norm": 1.3067689335737758, + "learning_rate": 3.612062280970763e-07, + "loss": 0.221620112657547, + "step": 6919 + }, + { + "epoch": 1.8376045677864825, + "grad_norm": 1.3556317520839982, + "learning_rate": 3.6003772450130315e-07, + "loss": 0.23098941147327423, + "step": 6920 + }, + { + "epoch": 1.8378701367680255, + "grad_norm": 1.147765516964157, + "learning_rate": 3.588710793765626e-07, + "loss": 0.2119837999343872, + "step": 6921 + }, + { + "epoch": 1.8381357057495684, + "grad_norm": 1.3802709807389941, + "learning_rate": 3.5770629294777146e-07, + "loss": 0.24879229068756104, + "step": 6922 + }, + { + "epoch": 1.8384012747311114, + "grad_norm": 1.3060365647669372, + "learning_rate": 3.565433654394879e-07, + "loss": 0.18895789980888367, + "step": 6923 + }, + { + "epoch": 1.8386668437126543, + "grad_norm": 1.2553378569117732, + "learning_rate": 3.55382297075908e-07, + "loss": 0.23148275911808014, + "step": 6924 + }, + { + "epoch": 1.8389324126941973, + "grad_norm": 1.212120061404488, + "learning_rate": 3.542230880808739e-07, + "loss": 0.20919913053512573, + "step": 6925 + }, + { + "epoch": 1.8391979816757402, + "grad_norm": 1.4703495422250146, + "learning_rate": 3.53065738677868e-07, + "loss": 0.22832845151424408, + "step": 6926 + }, + { + "epoch": 1.8394635506572832, + "grad_norm": 1.2792392305491092, + "learning_rate": 3.519102490900117e-07, + "loss": 0.25866004824638367, + "step": 6927 + }, + { + "epoch": 1.8397291196388261, + "grad_norm": 1.4425441758777668, + "learning_rate": 3.507566195400691e-07, + "loss": 0.23372048139572144, + "step": 6928 + }, + { + "epoch": 1.839994688620369, + "grad_norm": 1.3100572186568338, + "learning_rate": 3.496048502504501e-07, + "loss": 0.2516997158527374, + "step": 6929 + }, + { + "epoch": 1.840260257601912, + "grad_norm": 1.3352189279547024, + "learning_rate": 3.4845494144320036e-07, + "loss": 0.21170508861541748, + "step": 6930 + }, + { + "epoch": 1.840525826583455, + "grad_norm": 1.3970465930645521, + "learning_rate": 3.473068933400081e-07, + "loss": 0.2642953395843506, + "step": 6931 + }, + { + "epoch": 1.840791395564998, + "grad_norm": 1.2429277065520816, + "learning_rate": 3.461607061622041e-07, + "loss": 0.2294994294643402, + "step": 6932 + }, + { + "epoch": 1.8410569645465409, + "grad_norm": 1.3898674163561502, + "learning_rate": 3.450163801307582e-07, + "loss": 0.2554621696472168, + "step": 6933 + }, + { + "epoch": 1.8413225335280838, + "grad_norm": 1.5251200097904765, + "learning_rate": 3.4387391546628733e-07, + "loss": 0.2291295826435089, + "step": 6934 + }, + { + "epoch": 1.8415881025096268, + "grad_norm": 1.2253918775229307, + "learning_rate": 3.4273331238903974e-07, + "loss": 0.1996842920780182, + "step": 6935 + }, + { + "epoch": 1.8418536714911697, + "grad_norm": 1.3974356568527164, + "learning_rate": 3.415945711189128e-07, + "loss": 0.248038187623024, + "step": 6936 + }, + { + "epoch": 1.8421192404727127, + "grad_norm": 1.4224083213114915, + "learning_rate": 3.4045769187544096e-07, + "loss": 0.232235848903656, + "step": 6937 + }, + { + "epoch": 1.8423848094542556, + "grad_norm": 1.2811247103872994, + "learning_rate": 3.3932267487780333e-07, + "loss": 0.2526085376739502, + "step": 6938 + }, + { + "epoch": 1.8426503784357986, + "grad_norm": 1.324059920588895, + "learning_rate": 3.381895203448182e-07, + "loss": 0.22401389479637146, + "step": 6939 + }, + { + "epoch": 1.8429159474173415, + "grad_norm": 1.2904044842651823, + "learning_rate": 3.3705822849494195e-07, + "loss": 0.2509264647960663, + "step": 6940 + }, + { + "epoch": 1.8431815163988845, + "grad_norm": 1.2502849304352568, + "learning_rate": 3.3592879954627564e-07, + "loss": 0.2451169192790985, + "step": 6941 + }, + { + "epoch": 1.8434470853804275, + "grad_norm": 1.2774613485778883, + "learning_rate": 3.3480123371655957e-07, + "loss": 0.2361738532781601, + "step": 6942 + }, + { + "epoch": 1.8437126543619704, + "grad_norm": 1.1823675774441849, + "learning_rate": 3.3367553122317544e-07, + "loss": 0.22336295247077942, + "step": 6943 + }, + { + "epoch": 1.8439782233435134, + "grad_norm": 1.4218109729535482, + "learning_rate": 3.325516922831451e-07, + "loss": 0.22287659347057343, + "step": 6944 + }, + { + "epoch": 1.8442437923250563, + "grad_norm": 1.2819242467045069, + "learning_rate": 3.3142971711312975e-07, + "loss": 0.21845945715904236, + "step": 6945 + }, + { + "epoch": 1.8445093613065993, + "grad_norm": 1.2822597279006254, + "learning_rate": 3.303096059294364e-07, + "loss": 0.2650350332260132, + "step": 6946 + }, + { + "epoch": 1.8447749302881422, + "grad_norm": 1.346661503925149, + "learning_rate": 3.291913589480078e-07, + "loss": 0.21282124519348145, + "step": 6947 + }, + { + "epoch": 1.8450404992696852, + "grad_norm": 1.1254422779054267, + "learning_rate": 3.280749763844293e-07, + "loss": 0.17899346351623535, + "step": 6948 + }, + { + "epoch": 1.8453060682512281, + "grad_norm": 1.3295675928838626, + "learning_rate": 3.269604584539254e-07, + "loss": 0.23462103307247162, + "step": 6949 + }, + { + "epoch": 1.845571637232771, + "grad_norm": 1.2573990354862534, + "learning_rate": 3.2584780537136206e-07, + "loss": 0.20188388228416443, + "step": 6950 + }, + { + "epoch": 1.845837206214314, + "grad_norm": 1.3823133322277716, + "learning_rate": 3.247370173512443e-07, + "loss": 0.2760109305381775, + "step": 6951 + }, + { + "epoch": 1.846102775195857, + "grad_norm": 1.1542508493730164, + "learning_rate": 3.236280946077219e-07, + "loss": 0.20977352559566498, + "step": 6952 + }, + { + "epoch": 1.8463683441774, + "grad_norm": 1.299549634983184, + "learning_rate": 3.225210373545806e-07, + "loss": 0.26468873023986816, + "step": 6953 + }, + { + "epoch": 1.8466339131589429, + "grad_norm": 1.287524526318513, + "learning_rate": 3.214158458052463e-07, + "loss": 0.2362184375524521, + "step": 6954 + }, + { + "epoch": 1.8468994821404858, + "grad_norm": 1.29131597308928, + "learning_rate": 3.2031252017278966e-07, + "loss": 0.21406327188014984, + "step": 6955 + }, + { + "epoch": 1.847165051122029, + "grad_norm": 1.4794600314925854, + "learning_rate": 3.1921106066991835e-07, + "loss": 0.2698758840560913, + "step": 6956 + }, + { + "epoch": 1.847430620103572, + "grad_norm": 1.3029413719135112, + "learning_rate": 3.1811146750898025e-07, + "loss": 0.22954389452934265, + "step": 6957 + }, + { + "epoch": 1.847696189085115, + "grad_norm": 1.149631756175727, + "learning_rate": 3.170137409019636e-07, + "loss": 0.23005755245685577, + "step": 6958 + }, + { + "epoch": 1.8479617580666579, + "grad_norm": 1.270561680049171, + "learning_rate": 3.159178810604968e-07, + "loss": 0.22408893704414368, + "step": 6959 + }, + { + "epoch": 1.8482273270482008, + "grad_norm": 1.1761716687553918, + "learning_rate": 3.14823888195851e-07, + "loss": 0.1983698308467865, + "step": 6960 + }, + { + "epoch": 1.8484928960297438, + "grad_norm": 1.387251984339494, + "learning_rate": 3.137317625189329e-07, + "loss": 0.24643054604530334, + "step": 6961 + }, + { + "epoch": 1.8487584650112867, + "grad_norm": 1.3612119090250128, + "learning_rate": 3.1264150424029083e-07, + "loss": 0.274917870759964, + "step": 6962 + }, + { + "epoch": 1.8490240339928297, + "grad_norm": 1.2836957141365997, + "learning_rate": 3.115531135701155e-07, + "loss": 0.2129468023777008, + "step": 6963 + }, + { + "epoch": 1.8492896029743726, + "grad_norm": 1.3421884287788837, + "learning_rate": 3.1046659071823695e-07, + "loss": 0.24127928912639618, + "step": 6964 + }, + { + "epoch": 1.8495551719559156, + "grad_norm": 1.2737231627436634, + "learning_rate": 3.093819358941208e-07, + "loss": 0.2528054416179657, + "step": 6965 + }, + { + "epoch": 1.8498207409374585, + "grad_norm": 1.253824703575336, + "learning_rate": 3.0829914930687767e-07, + "loss": 0.23623798787593842, + "step": 6966 + }, + { + "epoch": 1.8500863099190015, + "grad_norm": 1.231408637511902, + "learning_rate": 3.0721823116525497e-07, + "loss": 0.20241659879684448, + "step": 6967 + }, + { + "epoch": 1.8503518789005444, + "grad_norm": 1.264350645442844, + "learning_rate": 3.0613918167764156e-07, + "loss": 0.24365916848182678, + "step": 6968 + }, + { + "epoch": 1.8506174478820874, + "grad_norm": 1.311846273217192, + "learning_rate": 3.0506200105206554e-07, + "loss": 0.2550637722015381, + "step": 6969 + }, + { + "epoch": 1.8508830168636303, + "grad_norm": 1.1438212130974086, + "learning_rate": 3.0398668949619515e-07, + "loss": 0.21531938016414642, + "step": 6970 + }, + { + "epoch": 1.8511485858451733, + "grad_norm": 1.3468646282560623, + "learning_rate": 3.029132472173368e-07, + "loss": 0.22749900817871094, + "step": 6971 + }, + { + "epoch": 1.8514141548267162, + "grad_norm": 1.186404759445675, + "learning_rate": 3.018416744224373e-07, + "loss": 0.1826775223016739, + "step": 6972 + }, + { + "epoch": 1.8516797238082592, + "grad_norm": 1.1782373460713542, + "learning_rate": 3.0077197131808344e-07, + "loss": 0.21982814371585846, + "step": 6973 + }, + { + "epoch": 1.8519452927898021, + "grad_norm": 1.2874557997839566, + "learning_rate": 2.997041381105026e-07, + "loss": 0.23515473306179047, + "step": 6974 + }, + { + "epoch": 1.852210861771345, + "grad_norm": 1.2184369208885015, + "learning_rate": 2.9863817500556e-07, + "loss": 0.19620616734027863, + "step": 6975 + }, + { + "epoch": 1.852476430752888, + "grad_norm": 1.208715706835639, + "learning_rate": 2.975740822087603e-07, + "loss": 0.22158116102218628, + "step": 6976 + }, + { + "epoch": 1.852741999734431, + "grad_norm": 1.5176127203291871, + "learning_rate": 2.96511859925247e-07, + "loss": 0.23082244396209717, + "step": 6977 + }, + { + "epoch": 1.853007568715974, + "grad_norm": 1.286088700644728, + "learning_rate": 2.954515083598064e-07, + "loss": 0.22743141651153564, + "step": 6978 + }, + { + "epoch": 1.853273137697517, + "grad_norm": 1.3437900472909596, + "learning_rate": 2.943930277168594e-07, + "loss": 0.2329188883304596, + "step": 6979 + }, + { + "epoch": 1.8535387066790598, + "grad_norm": 1.1892741095151198, + "learning_rate": 2.9333641820047055e-07, + "loss": 0.20360302925109863, + "step": 6980 + }, + { + "epoch": 1.8538042756606028, + "grad_norm": 1.1771915113483071, + "learning_rate": 2.922816800143402e-07, + "loss": 0.1903664767742157, + "step": 6981 + }, + { + "epoch": 1.8540698446421457, + "grad_norm": 1.2252145672801615, + "learning_rate": 2.912288133618102e-07, + "loss": 0.2247854322195053, + "step": 6982 + }, + { + "epoch": 1.8543354136236887, + "grad_norm": 1.305215823982529, + "learning_rate": 2.9017781844586035e-07, + "loss": 0.22693192958831787, + "step": 6983 + }, + { + "epoch": 1.8546009826052319, + "grad_norm": 1.3213552294005186, + "learning_rate": 2.891286954691108e-07, + "loss": 0.23769894242286682, + "step": 6984 + }, + { + "epoch": 1.8548665515867748, + "grad_norm": 1.267542763443237, + "learning_rate": 2.880814446338198e-07, + "loss": 0.23251450061798096, + "step": 6985 + }, + { + "epoch": 1.8551321205683178, + "grad_norm": 1.3253334264213772, + "learning_rate": 2.870360661418847e-07, + "loss": 0.20828741788864136, + "step": 6986 + }, + { + "epoch": 1.8553976895498607, + "grad_norm": 1.2448815733296377, + "learning_rate": 2.859925601948421e-07, + "loss": 0.2324519008398056, + "step": 6987 + }, + { + "epoch": 1.8556632585314037, + "grad_norm": 1.2799176737952995, + "learning_rate": 2.8495092699386774e-07, + "loss": 0.2166297733783722, + "step": 6988 + }, + { + "epoch": 1.8559288275129466, + "grad_norm": 1.416567928880924, + "learning_rate": 2.839111667397765e-07, + "loss": 0.2760158181190491, + "step": 6989 + }, + { + "epoch": 1.8561943964944896, + "grad_norm": 1.1117414218952344, + "learning_rate": 2.8287327963302025e-07, + "loss": 0.2263752520084381, + "step": 6990 + }, + { + "epoch": 1.8564599654760325, + "grad_norm": 1.328135206527719, + "learning_rate": 2.8183726587369455e-07, + "loss": 0.2490656077861786, + "step": 6991 + }, + { + "epoch": 1.8567255344575755, + "grad_norm": 1.4860885268210424, + "learning_rate": 2.808031256615285e-07, + "loss": 0.22495508193969727, + "step": 6992 + }, + { + "epoch": 1.8569911034391184, + "grad_norm": 1.297235121122649, + "learning_rate": 2.7977085919589253e-07, + "loss": 0.2671046853065491, + "step": 6993 + }, + { + "epoch": 1.8572566724206614, + "grad_norm": 1.2050300397617886, + "learning_rate": 2.7874046667579535e-07, + "loss": 0.19782954454421997, + "step": 6994 + }, + { + "epoch": 1.8575222414022043, + "grad_norm": 1.3009259795352104, + "learning_rate": 2.777119482998847e-07, + "loss": 0.24458879232406616, + "step": 6995 + }, + { + "epoch": 1.8577878103837473, + "grad_norm": 1.203325902936209, + "learning_rate": 2.7668530426644637e-07, + "loss": 0.23476794362068176, + "step": 6996 + }, + { + "epoch": 1.8580533793652902, + "grad_norm": 1.3828799415147273, + "learning_rate": 2.7566053477340535e-07, + "loss": 0.2318287342786789, + "step": 6997 + }, + { + "epoch": 1.8583189483468332, + "grad_norm": 1.1075382213650395, + "learning_rate": 2.746376400183259e-07, + "loss": 0.21341973543167114, + "step": 6998 + }, + { + "epoch": 1.8585845173283762, + "grad_norm": 1.3634634009375282, + "learning_rate": 2.7361662019840916e-07, + "loss": 0.25269803404808044, + "step": 6999 + }, + { + "epoch": 1.858850086309919, + "grad_norm": 1.2242004376785176, + "learning_rate": 2.7259747551049653e-07, + "loss": 0.24590039253234863, + "step": 7000 + }, + { + "epoch": 1.859115655291462, + "grad_norm": 1.2116643717780577, + "learning_rate": 2.715802061510664e-07, + "loss": 0.19907096028327942, + "step": 7001 + }, + { + "epoch": 1.859381224273005, + "grad_norm": 1.319285786592131, + "learning_rate": 2.705648123162363e-07, + "loss": 0.24304917454719543, + "step": 7002 + }, + { + "epoch": 1.859646793254548, + "grad_norm": 1.3884525546157216, + "learning_rate": 2.6955129420176193e-07, + "loss": 0.24846915900707245, + "step": 7003 + }, + { + "epoch": 1.859912362236091, + "grad_norm": 1.365283429552511, + "learning_rate": 2.685396520030381e-07, + "loss": 0.21709200739860535, + "step": 7004 + }, + { + "epoch": 1.8601779312176339, + "grad_norm": 1.3687506828870908, + "learning_rate": 2.675298859150977e-07, + "loss": 0.28031325340270996, + "step": 7005 + }, + { + "epoch": 1.8604435001991768, + "grad_norm": 1.1527129171653896, + "learning_rate": 2.6652199613261155e-07, + "loss": 0.20367707312107086, + "step": 7006 + }, + { + "epoch": 1.8607090691807198, + "grad_norm": 1.1875101722790007, + "learning_rate": 2.6551598284988877e-07, + "loss": 0.20737403631210327, + "step": 7007 + }, + { + "epoch": 1.8609746381622627, + "grad_norm": 1.3375926225189751, + "learning_rate": 2.6451184626087646e-07, + "loss": 0.2504046559333801, + "step": 7008 + }, + { + "epoch": 1.8612402071438057, + "grad_norm": 1.3403751507501938, + "learning_rate": 2.635095865591608e-07, + "loss": 0.26347339153289795, + "step": 7009 + }, + { + "epoch": 1.8615057761253486, + "grad_norm": 1.1832867553985462, + "learning_rate": 2.625092039379662e-07, + "loss": 0.2347220480442047, + "step": 7010 + }, + { + "epoch": 1.8617713451068916, + "grad_norm": 1.2487098903864389, + "learning_rate": 2.6151069859015386e-07, + "loss": 0.23565630614757538, + "step": 7011 + }, + { + "epoch": 1.8620369140884345, + "grad_norm": 1.2377624004623402, + "learning_rate": 2.605140707082243e-07, + "loss": 0.21462437510490417, + "step": 7012 + }, + { + "epoch": 1.8623024830699775, + "grad_norm": 1.2992774401284823, + "learning_rate": 2.595193204843149e-07, + "loss": 0.24224728345870972, + "step": 7013 + }, + { + "epoch": 1.8625680520515204, + "grad_norm": 1.3531530893390702, + "learning_rate": 2.5852644811020344e-07, + "loss": 0.24200880527496338, + "step": 7014 + }, + { + "epoch": 1.8628336210330634, + "grad_norm": 1.2331149203562455, + "learning_rate": 2.5753545377730227e-07, + "loss": 0.23315191268920898, + "step": 7015 + }, + { + "epoch": 1.8630991900146063, + "grad_norm": 1.4360061023192454, + "learning_rate": 2.56546337676663e-07, + "loss": 0.31112274527549744, + "step": 7016 + }, + { + "epoch": 1.8633647589961493, + "grad_norm": 1.1775380155652753, + "learning_rate": 2.555590999989754e-07, + "loss": 0.2291945070028305, + "step": 7017 + }, + { + "epoch": 1.8636303279776922, + "grad_norm": 1.3248749602779475, + "learning_rate": 2.5457374093457057e-07, + "loss": 0.2324746549129486, + "step": 7018 + }, + { + "epoch": 1.8638958969592352, + "grad_norm": 1.3333311590100283, + "learning_rate": 2.5359026067341086e-07, + "loss": 0.2585206627845764, + "step": 7019 + }, + { + "epoch": 1.8641614659407781, + "grad_norm": 1.254813387894953, + "learning_rate": 2.5260865940510027e-07, + "loss": 0.22986871004104614, + "step": 7020 + }, + { + "epoch": 1.864427034922321, + "grad_norm": 1.3302473304174876, + "learning_rate": 2.5162893731888074e-07, + "loss": 0.22615428268909454, + "step": 7021 + }, + { + "epoch": 1.864692603903864, + "grad_norm": 1.2311139475810073, + "learning_rate": 2.5065109460363113e-07, + "loss": 0.21324753761291504, + "step": 7022 + }, + { + "epoch": 1.864958172885407, + "grad_norm": 1.2499721276179248, + "learning_rate": 2.4967513144786736e-07, + "loss": 0.2247733324766159, + "step": 7023 + }, + { + "epoch": 1.86522374186695, + "grad_norm": 1.198842298043478, + "learning_rate": 2.4870104803974336e-07, + "loss": 0.22080597281455994, + "step": 7024 + }, + { + "epoch": 1.865489310848493, + "grad_norm": 1.3721040923851937, + "learning_rate": 2.4772884456705224e-07, + "loss": 0.23669888079166412, + "step": 7025 + }, + { + "epoch": 1.8657548798300359, + "grad_norm": 1.2946969495879501, + "learning_rate": 2.4675852121722075e-07, + "loss": 0.2320847064256668, + "step": 7026 + }, + { + "epoch": 1.8660204488115788, + "grad_norm": 1.374404266409337, + "learning_rate": 2.4579007817731925e-07, + "loss": 0.2595662474632263, + "step": 7027 + }, + { + "epoch": 1.8662860177931218, + "grad_norm": 1.2351512812852723, + "learning_rate": 2.4482351563405174e-07, + "loss": 0.22152045369148254, + "step": 7028 + }, + { + "epoch": 1.8665515867746647, + "grad_norm": 1.270416082371449, + "learning_rate": 2.4385883377375683e-07, + "loss": 0.2391948401927948, + "step": 7029 + }, + { + "epoch": 1.8668171557562077, + "grad_norm": 1.3234796115140017, + "learning_rate": 2.428960327824159e-07, + "loss": 0.23117749392986298, + "step": 7030 + }, + { + "epoch": 1.8670827247377506, + "grad_norm": 1.313106749776766, + "learning_rate": 2.41935112845646e-07, + "loss": 0.24019500613212585, + "step": 7031 + }, + { + "epoch": 1.8673482937192936, + "grad_norm": 1.253088890729472, + "learning_rate": 2.4097607414869995e-07, + "loss": 0.19560202956199646, + "step": 7032 + }, + { + "epoch": 1.8676138627008365, + "grad_norm": 1.3625686769003584, + "learning_rate": 2.4001891687647103e-07, + "loss": 0.23110055923461914, + "step": 7033 + }, + { + "epoch": 1.8678794316823795, + "grad_norm": 1.3388200482229684, + "learning_rate": 2.39063641213485e-07, + "loss": 0.2214709371328354, + "step": 7034 + }, + { + "epoch": 1.8681450006639224, + "grad_norm": 1.2700799842548796, + "learning_rate": 2.381102473439101e-07, + "loss": 0.22123369574546814, + "step": 7035 + }, + { + "epoch": 1.8684105696454654, + "grad_norm": 1.4629863869289934, + "learning_rate": 2.371587354515481e-07, + "loss": 0.23984813690185547, + "step": 7036 + }, + { + "epoch": 1.8686761386270083, + "grad_norm": 1.4496870886295976, + "learning_rate": 2.3620910571984124e-07, + "loss": 0.26089030504226685, + "step": 7037 + }, + { + "epoch": 1.8689417076085513, + "grad_norm": 1.2076380290124689, + "learning_rate": 2.3526135833186527e-07, + "loss": 0.2344229370355606, + "step": 7038 + }, + { + "epoch": 1.8692072765900942, + "grad_norm": 1.290620691312973, + "learning_rate": 2.34315493470334e-07, + "loss": 0.24499498307704926, + "step": 7039 + }, + { + "epoch": 1.8694728455716372, + "grad_norm": 1.2975050166282813, + "learning_rate": 2.333715113176005e-07, + "loss": 0.21971477568149567, + "step": 7040 + }, + { + "epoch": 1.8697384145531801, + "grad_norm": 1.2659856510175163, + "learning_rate": 2.3242941205565362e-07, + "loss": 0.2594453990459442, + "step": 7041 + }, + { + "epoch": 1.870003983534723, + "grad_norm": 1.3125676617059407, + "learning_rate": 2.3148919586611806e-07, + "loss": 0.24689960479736328, + "step": 7042 + }, + { + "epoch": 1.870269552516266, + "grad_norm": 1.2165345453138858, + "learning_rate": 2.3055086293025665e-07, + "loss": 0.19972509145736694, + "step": 7043 + }, + { + "epoch": 1.870535121497809, + "grad_norm": 1.2460782677559714, + "learning_rate": 2.2961441342896795e-07, + "loss": 0.2139236032962799, + "step": 7044 + }, + { + "epoch": 1.870800690479352, + "grad_norm": 1.196552292185578, + "learning_rate": 2.286798475427898e-07, + "loss": 0.2251984179019928, + "step": 7045 + }, + { + "epoch": 1.8710662594608949, + "grad_norm": 1.2395291577625112, + "learning_rate": 2.277471654518959e-07, + "loss": 0.24517378211021423, + "step": 7046 + }, + { + "epoch": 1.8713318284424378, + "grad_norm": 1.3048847468612028, + "learning_rate": 2.2681636733609457e-07, + "loss": 0.19115275144577026, + "step": 7047 + }, + { + "epoch": 1.8715973974239808, + "grad_norm": 1.2997607659373802, + "learning_rate": 2.2588745337483454e-07, + "loss": 0.26092633605003357, + "step": 7048 + }, + { + "epoch": 1.8718629664055237, + "grad_norm": 1.2646212726473884, + "learning_rate": 2.2496042374719807e-07, + "loss": 0.18862302601337433, + "step": 7049 + }, + { + "epoch": 1.8721285353870667, + "grad_norm": 1.1602330038245767, + "learning_rate": 2.2403527863190554e-07, + "loss": 0.20728996396064758, + "step": 7050 + }, + { + "epoch": 1.8723941043686096, + "grad_norm": 1.236025812615254, + "learning_rate": 2.231120182073143e-07, + "loss": 0.24244122207164764, + "step": 7051 + }, + { + "epoch": 1.8726596733501526, + "grad_norm": 1.205655043915546, + "learning_rate": 2.2219064265141866e-07, + "loss": 0.18956953287124634, + "step": 7052 + }, + { + "epoch": 1.8729252423316956, + "grad_norm": 1.1159089015267554, + "learning_rate": 2.2127115214184868e-07, + "loss": 0.19873176515102386, + "step": 7053 + }, + { + "epoch": 1.8731908113132385, + "grad_norm": 1.2896839736015335, + "learning_rate": 2.203535468558704e-07, + "loss": 0.23717360198497772, + "step": 7054 + }, + { + "epoch": 1.8734563802947815, + "grad_norm": 1.3203924338573048, + "learning_rate": 2.1943782697038896e-07, + "loss": 0.24051904678344727, + "step": 7055 + }, + { + "epoch": 1.8737219492763244, + "grad_norm": 1.3193670550613668, + "learning_rate": 2.1852399266194312e-07, + "loss": 0.23541691899299622, + "step": 7056 + }, + { + "epoch": 1.8739875182578674, + "grad_norm": 1.3395958296451687, + "learning_rate": 2.1761204410671088e-07, + "loss": 0.22566163539886475, + "step": 7057 + }, + { + "epoch": 1.8742530872394103, + "grad_norm": 1.297432294479727, + "learning_rate": 2.167019814805027e-07, + "loss": 0.25771743059158325, + "step": 7058 + }, + { + "epoch": 1.8745186562209533, + "grad_norm": 1.1482951648622821, + "learning_rate": 2.1579380495876934e-07, + "loss": 0.22624637186527252, + "step": 7059 + }, + { + "epoch": 1.8747842252024962, + "grad_norm": 1.3036126318267591, + "learning_rate": 2.148875147165963e-07, + "loss": 0.24671627581119537, + "step": 7060 + }, + { + "epoch": 1.8750497941840392, + "grad_norm": 1.1983704285109544, + "learning_rate": 2.1398311092870605e-07, + "loss": 0.21607278287410736, + "step": 7061 + }, + { + "epoch": 1.8753153631655821, + "grad_norm": 1.1102939736369823, + "learning_rate": 2.1308059376945689e-07, + "loss": 0.1960655301809311, + "step": 7062 + }, + { + "epoch": 1.875580932147125, + "grad_norm": 1.2816228458436618, + "learning_rate": 2.1217996341284297e-07, + "loss": 0.22005721926689148, + "step": 7063 + }, + { + "epoch": 1.875846501128668, + "grad_norm": 1.2746284533707484, + "learning_rate": 2.1128122003249541e-07, + "loss": 0.21442776918411255, + "step": 7064 + }, + { + "epoch": 1.876112070110211, + "grad_norm": 1.1849768238897622, + "learning_rate": 2.1038436380168114e-07, + "loss": 0.23126785457134247, + "step": 7065 + }, + { + "epoch": 1.876377639091754, + "grad_norm": 1.4246070766583077, + "learning_rate": 2.094893948933041e-07, + "loss": 0.24286629259586334, + "step": 7066 + }, + { + "epoch": 1.8766432080732969, + "grad_norm": 1.3706445020134141, + "learning_rate": 2.0859631347990406e-07, + "loss": 0.25771957635879517, + "step": 7067 + }, + { + "epoch": 1.87690877705484, + "grad_norm": 1.1754559873110961, + "learning_rate": 2.0770511973365436e-07, + "loss": 0.19837790727615356, + "step": 7068 + }, + { + "epoch": 1.877174346036383, + "grad_norm": 1.2372359407501599, + "learning_rate": 2.0681581382636984e-07, + "loss": 0.21209359169006348, + "step": 7069 + }, + { + "epoch": 1.877439915017926, + "grad_norm": 1.9178204608286211, + "learning_rate": 2.0592839592949554e-07, + "loss": 0.26641422510147095, + "step": 7070 + }, + { + "epoch": 1.877705483999469, + "grad_norm": 1.3604176831947503, + "learning_rate": 2.050428662141146e-07, + "loss": 0.21609601378440857, + "step": 7071 + }, + { + "epoch": 1.8779710529810119, + "grad_norm": 1.2861845280896875, + "learning_rate": 2.0415922485095051e-07, + "loss": 0.23642000555992126, + "step": 7072 + }, + { + "epoch": 1.8782366219625548, + "grad_norm": 1.3854568667341272, + "learning_rate": 2.0327747201035587e-07, + "loss": 0.24564675986766815, + "step": 7073 + }, + { + "epoch": 1.8785021909440978, + "grad_norm": 1.229212126818568, + "learning_rate": 2.0239760786232355e-07, + "loss": 0.20001479983329773, + "step": 7074 + }, + { + "epoch": 1.8787677599256407, + "grad_norm": 1.2817747323253132, + "learning_rate": 2.015196325764801e-07, + "loss": 0.2590208649635315, + "step": 7075 + }, + { + "epoch": 1.8790333289071837, + "grad_norm": 1.2462050168824985, + "learning_rate": 2.0064354632208904e-07, + "loss": 0.23298504948616028, + "step": 7076 + }, + { + "epoch": 1.8792988978887266, + "grad_norm": 1.2573573484068483, + "learning_rate": 1.997693492680497e-07, + "loss": 0.22409996390342712, + "step": 7077 + }, + { + "epoch": 1.8795644668702696, + "grad_norm": 1.410723892029772, + "learning_rate": 1.9889704158289724e-07, + "loss": 0.27316784858703613, + "step": 7078 + }, + { + "epoch": 1.8798300358518125, + "grad_norm": 1.2924796650338854, + "learning_rate": 1.980266234348016e-07, + "loss": 0.2271946519613266, + "step": 7079 + }, + { + "epoch": 1.8800956048333555, + "grad_norm": 1.2438429761767338, + "learning_rate": 1.9715809499156858e-07, + "loss": 0.20887964963912964, + "step": 7080 + }, + { + "epoch": 1.8803611738148984, + "grad_norm": 1.2112268618082698, + "learning_rate": 1.9629145642064197e-07, + "loss": 0.23468685150146484, + "step": 7081 + }, + { + "epoch": 1.8806267427964414, + "grad_norm": 1.308865144497765, + "learning_rate": 1.9542670788909813e-07, + "loss": 0.21624556183815002, + "step": 7082 + }, + { + "epoch": 1.8808923117779843, + "grad_norm": 1.1751415989571612, + "learning_rate": 1.9456384956365149e-07, + "loss": 0.22328166663646698, + "step": 7083 + }, + { + "epoch": 1.8811578807595273, + "grad_norm": 1.3508603820961609, + "learning_rate": 1.93702881610649e-07, + "loss": 0.2526431381702423, + "step": 7084 + }, + { + "epoch": 1.8814234497410702, + "grad_norm": 1.3562256445660688, + "learning_rate": 1.9284380419607784e-07, + "loss": 0.23668771982192993, + "step": 7085 + }, + { + "epoch": 1.8816890187226132, + "grad_norm": 1.2668189225170288, + "learning_rate": 1.9198661748555557e-07, + "loss": 0.24710845947265625, + "step": 7086 + }, + { + "epoch": 1.8819545877041561, + "grad_norm": 1.4047256701053605, + "learning_rate": 1.911313216443389e-07, + "loss": 0.22696900367736816, + "step": 7087 + }, + { + "epoch": 1.882220156685699, + "grad_norm": 1.3717447863189725, + "learning_rate": 1.9027791683731922e-07, + "loss": 0.21652163565158844, + "step": 7088 + }, + { + "epoch": 1.882485725667242, + "grad_norm": 1.3189608691767827, + "learning_rate": 1.894264032290205e-07, + "loss": 0.2166716307401657, + "step": 7089 + }, + { + "epoch": 1.882751294648785, + "grad_norm": 1.3746931913110367, + "learning_rate": 1.8857678098360698e-07, + "loss": 0.26200050115585327, + "step": 7090 + }, + { + "epoch": 1.883016863630328, + "grad_norm": 1.2945644704190118, + "learning_rate": 1.8772905026487654e-07, + "loss": 0.2292764037847519, + "step": 7091 + }, + { + "epoch": 1.883282432611871, + "grad_norm": 1.3106590918741248, + "learning_rate": 1.8688321123625842e-07, + "loss": 0.23893016576766968, + "step": 7092 + }, + { + "epoch": 1.8835480015934138, + "grad_norm": 1.2241030970764724, + "learning_rate": 1.860392640608244e-07, + "loss": 0.2509230673313141, + "step": 7093 + }, + { + "epoch": 1.8838135705749568, + "grad_norm": 1.2218686374923997, + "learning_rate": 1.8519720890127434e-07, + "loss": 0.24156486988067627, + "step": 7094 + }, + { + "epoch": 1.8840791395564997, + "grad_norm": 1.2859122561460798, + "learning_rate": 1.843570459199462e-07, + "loss": 0.2120019942522049, + "step": 7095 + }, + { + "epoch": 1.884344708538043, + "grad_norm": 1.6579646138710773, + "learning_rate": 1.835187752788159e-07, + "loss": 0.23400259017944336, + "step": 7096 + }, + { + "epoch": 1.8846102775195859, + "grad_norm": 1.281132346942695, + "learning_rate": 1.8268239713949087e-07, + "loss": 0.20913103222846985, + "step": 7097 + }, + { + "epoch": 1.8848758465011288, + "grad_norm": 1.3381319381686223, + "learning_rate": 1.8184791166321546e-07, + "loss": 0.24468877911567688, + "step": 7098 + }, + { + "epoch": 1.8851414154826718, + "grad_norm": 1.236616212709848, + "learning_rate": 1.8101531901086767e-07, + "loss": 0.2038918137550354, + "step": 7099 + }, + { + "epoch": 1.8854069844642147, + "grad_norm": 1.3201086548941574, + "learning_rate": 1.8018461934296239e-07, + "loss": 0.24191413819789886, + "step": 7100 + }, + { + "epoch": 1.8856725534457577, + "grad_norm": 1.277539269643606, + "learning_rate": 1.793558128196493e-07, + "loss": 0.24394474923610687, + "step": 7101 + }, + { + "epoch": 1.8859381224273006, + "grad_norm": 1.1561225023553612, + "learning_rate": 1.7852889960071063e-07, + "loss": 0.22630709409713745, + "step": 7102 + }, + { + "epoch": 1.8862036914088436, + "grad_norm": 1.5472360212555962, + "learning_rate": 1.7770387984556768e-07, + "loss": 0.23936980962753296, + "step": 7103 + }, + { + "epoch": 1.8864692603903865, + "grad_norm": 1.275471897769737, + "learning_rate": 1.768807537132733e-07, + "loss": 0.24808618426322937, + "step": 7104 + }, + { + "epoch": 1.8867348293719295, + "grad_norm": 1.273035999339445, + "learning_rate": 1.7605952136251603e-07, + "loss": 0.23934635519981384, + "step": 7105 + }, + { + "epoch": 1.8870003983534724, + "grad_norm": 1.189686791776393, + "learning_rate": 1.7524018295162148e-07, + "loss": 0.22107656300067902, + "step": 7106 + }, + { + "epoch": 1.8872659673350154, + "grad_norm": 1.3496800848037154, + "learning_rate": 1.7442273863854553e-07, + "loss": 0.23253028094768524, + "step": 7107 + }, + { + "epoch": 1.8875315363165583, + "grad_norm": 1.3028365552765204, + "learning_rate": 1.7360718858088542e-07, + "loss": 0.2501102387905121, + "step": 7108 + }, + { + "epoch": 1.8877971052981013, + "grad_norm": 1.4057988238229884, + "learning_rate": 1.7279353293586765e-07, + "loss": 0.25537967681884766, + "step": 7109 + }, + { + "epoch": 1.8880626742796442, + "grad_norm": 2.7876746143917033, + "learning_rate": 1.7198177186035447e-07, + "loss": 0.25701045989990234, + "step": 7110 + }, + { + "epoch": 1.8883282432611872, + "grad_norm": 1.1447271563365653, + "learning_rate": 1.7117190551084628e-07, + "loss": 0.2109440565109253, + "step": 7111 + }, + { + "epoch": 1.8885938122427302, + "grad_norm": 1.2454061070152636, + "learning_rate": 1.7036393404347373e-07, + "loss": 0.22767721116542816, + "step": 7112 + }, + { + "epoch": 1.888859381224273, + "grad_norm": 1.1572937395529788, + "learning_rate": 1.6955785761400444e-07, + "loss": 0.1976814568042755, + "step": 7113 + }, + { + "epoch": 1.889124950205816, + "grad_norm": 1.1727224852039306, + "learning_rate": 1.687536763778419e-07, + "loss": 0.21109873056411743, + "step": 7114 + }, + { + "epoch": 1.889390519187359, + "grad_norm": 1.1916227822459606, + "learning_rate": 1.6795139049002095e-07, + "loss": 0.2165786623954773, + "step": 7115 + }, + { + "epoch": 1.889656088168902, + "grad_norm": 1.2917556149315792, + "learning_rate": 1.6715100010521347e-07, + "loss": 0.23962441086769104, + "step": 7116 + }, + { + "epoch": 1.889921657150445, + "grad_norm": 1.2423009900583697, + "learning_rate": 1.6635250537772596e-07, + "loss": 0.23351140320301056, + "step": 7117 + }, + { + "epoch": 1.8901872261319879, + "grad_norm": 1.3034348272306633, + "learning_rate": 1.6555590646149866e-07, + "loss": 0.19999945163726807, + "step": 7118 + }, + { + "epoch": 1.8904527951135308, + "grad_norm": 1.432201467842623, + "learning_rate": 1.647612035101054e-07, + "loss": 0.27142196893692017, + "step": 7119 + }, + { + "epoch": 1.8907183640950738, + "grad_norm": 1.2861780172834696, + "learning_rate": 1.6396839667675691e-07, + "loss": 0.21525685489177704, + "step": 7120 + }, + { + "epoch": 1.8909839330766167, + "grad_norm": 3.2062699859400396, + "learning_rate": 1.631774861142965e-07, + "loss": 0.24305005371570587, + "step": 7121 + }, + { + "epoch": 1.8912495020581597, + "grad_norm": 1.2019998279555377, + "learning_rate": 1.6238847197520113e-07, + "loss": 0.23202842473983765, + "step": 7122 + }, + { + "epoch": 1.8915150710397026, + "grad_norm": 1.4409003412080332, + "learning_rate": 1.6160135441158576e-07, + "loss": 0.24373790621757507, + "step": 7123 + }, + { + "epoch": 1.8917806400212456, + "grad_norm": 1.2360359431057044, + "learning_rate": 1.6081613357519565e-07, + "loss": 0.22774222493171692, + "step": 7124 + }, + { + "epoch": 1.8920462090027885, + "grad_norm": 1.2064368847282083, + "learning_rate": 1.6003280961741196e-07, + "loss": 0.20660057663917542, + "step": 7125 + }, + { + "epoch": 1.8923117779843315, + "grad_norm": 1.3070998228758686, + "learning_rate": 1.5925138268925166e-07, + "loss": 0.23578912019729614, + "step": 7126 + }, + { + "epoch": 1.8925773469658744, + "grad_norm": 1.2737250152668298, + "learning_rate": 1.5847185294136313e-07, + "loss": 0.20852091908454895, + "step": 7127 + }, + { + "epoch": 1.8928429159474174, + "grad_norm": 1.1465883719364975, + "learning_rate": 1.5769422052403172e-07, + "loss": 0.17455898225307465, + "step": 7128 + }, + { + "epoch": 1.8931084849289603, + "grad_norm": 1.5036497092390075, + "learning_rate": 1.5691848558717638e-07, + "loss": 0.29552748799324036, + "step": 7129 + }, + { + "epoch": 1.8933740539105033, + "grad_norm": 1.3009458238394367, + "learning_rate": 1.5614464828034746e-07, + "loss": 0.22972649335861206, + "step": 7130 + }, + { + "epoch": 1.8936396228920462, + "grad_norm": 1.2296689152648304, + "learning_rate": 1.5537270875273348e-07, + "loss": 0.2134108692407608, + "step": 7131 + }, + { + "epoch": 1.8939051918735892, + "grad_norm": 1.4119584533896288, + "learning_rate": 1.546026671531542e-07, + "loss": 0.24145451188087463, + "step": 7132 + }, + { + "epoch": 1.8941707608551321, + "grad_norm": 1.355860353407812, + "learning_rate": 1.5383452363006534e-07, + "loss": 0.2323920726776123, + "step": 7133 + }, + { + "epoch": 1.894436329836675, + "grad_norm": 1.197617700552455, + "learning_rate": 1.5306827833155403e-07, + "loss": 0.20091015100479126, + "step": 7134 + }, + { + "epoch": 1.894701898818218, + "grad_norm": 1.370489911603159, + "learning_rate": 1.523039314053465e-07, + "loss": 0.2451317310333252, + "step": 7135 + }, + { + "epoch": 1.894967467799761, + "grad_norm": 1.2946538259097045, + "learning_rate": 1.5154148299879822e-07, + "loss": 0.22744594514369965, + "step": 7136 + }, + { + "epoch": 1.895233036781304, + "grad_norm": 1.2046527835430252, + "learning_rate": 1.5078093325889943e-07, + "loss": 0.2460673749446869, + "step": 7137 + }, + { + "epoch": 1.895498605762847, + "grad_norm": 1.4172423595206858, + "learning_rate": 1.5002228233227722e-07, + "loss": 0.2524537444114685, + "step": 7138 + }, + { + "epoch": 1.8957641747443899, + "grad_norm": 1.1840127480017744, + "learning_rate": 1.4926553036518798e-07, + "loss": 0.2056279480457306, + "step": 7139 + }, + { + "epoch": 1.8960297437259328, + "grad_norm": 1.2144930845419581, + "learning_rate": 1.485106775035261e-07, + "loss": 0.2656184732913971, + "step": 7140 + }, + { + "epoch": 1.8962953127074758, + "grad_norm": 1.1903286988332102, + "learning_rate": 1.477577238928185e-07, + "loss": 0.2190116047859192, + "step": 7141 + }, + { + "epoch": 1.8965608816890187, + "grad_norm": 1.206151177902952, + "learning_rate": 1.4700666967822574e-07, + "loss": 0.22984017431735992, + "step": 7142 + }, + { + "epoch": 1.8968264506705617, + "grad_norm": 1.1949819121682481, + "learning_rate": 1.462575150045409e-07, + "loss": 0.17947378754615784, + "step": 7143 + }, + { + "epoch": 1.8970920196521046, + "grad_norm": 1.2649423314993642, + "learning_rate": 1.4551026001619395e-07, + "loss": 0.24965715408325195, + "step": 7144 + }, + { + "epoch": 1.8973575886336476, + "grad_norm": 1.236302993447548, + "learning_rate": 1.4476490485724526e-07, + "loss": 0.2337307333946228, + "step": 7145 + }, + { + "epoch": 1.8976231576151905, + "grad_norm": 1.2205039464348546, + "learning_rate": 1.4402144967139098e-07, + "loss": 0.22668538987636566, + "step": 7146 + }, + { + "epoch": 1.8978887265967335, + "grad_norm": 1.350785859399433, + "learning_rate": 1.4327989460196091e-07, + "loss": 0.21934781968593597, + "step": 7147 + }, + { + "epoch": 1.8981542955782764, + "grad_norm": 1.2212959594670445, + "learning_rate": 1.4254023979191844e-07, + "loss": 0.1957930624485016, + "step": 7148 + }, + { + "epoch": 1.8984198645598194, + "grad_norm": 1.1724780894008597, + "learning_rate": 1.4180248538385956e-07, + "loss": 0.22351369261741638, + "step": 7149 + }, + { + "epoch": 1.8986854335413623, + "grad_norm": 1.3930947329130605, + "learning_rate": 1.4106663152001487e-07, + "loss": 0.2603265047073364, + "step": 7150 + }, + { + "epoch": 1.8989510025229053, + "grad_norm": 1.260479860356455, + "learning_rate": 1.4033267834224873e-07, + "loss": 0.2566663324832916, + "step": 7151 + }, + { + "epoch": 1.8992165715044482, + "grad_norm": 1.2799319314175146, + "learning_rate": 1.3960062599205682e-07, + "loss": 0.23130206763744354, + "step": 7152 + }, + { + "epoch": 1.8994821404859912, + "grad_norm": 1.1757231252562024, + "learning_rate": 1.3887047461057179e-07, + "loss": 0.17946425080299377, + "step": 7153 + }, + { + "epoch": 1.8997477094675341, + "grad_norm": 1.2434099546308155, + "learning_rate": 1.3814222433855884e-07, + "loss": 0.23946328461170197, + "step": 7154 + }, + { + "epoch": 1.900013278449077, + "grad_norm": 1.2249367291717066, + "learning_rate": 1.3741587531641566e-07, + "loss": 0.21002715826034546, + "step": 7155 + }, + { + "epoch": 1.90027884743062, + "grad_norm": 1.3062374823275615, + "learning_rate": 1.3669142768417242e-07, + "loss": 0.2121986746788025, + "step": 7156 + }, + { + "epoch": 1.900544416412163, + "grad_norm": 1.373871289837254, + "learning_rate": 1.3596888158149525e-07, + "loss": 0.26400670409202576, + "step": 7157 + }, + { + "epoch": 1.900809985393706, + "grad_norm": 1.1813353744292436, + "learning_rate": 1.3524823714768375e-07, + "loss": 0.18764406442642212, + "step": 7158 + }, + { + "epoch": 1.9010755543752489, + "grad_norm": 1.415975931925435, + "learning_rate": 1.3452949452166686e-07, + "loss": 0.2550342381000519, + "step": 7159 + }, + { + "epoch": 1.9013411233567918, + "grad_norm": 1.304366194966887, + "learning_rate": 1.3381265384201035e-07, + "loss": 0.23188576102256775, + "step": 7160 + }, + { + "epoch": 1.9016066923383348, + "grad_norm": 1.2473914592639561, + "learning_rate": 1.3309771524691372e-07, + "loss": 0.23124513030052185, + "step": 7161 + }, + { + "epoch": 1.9018722613198777, + "grad_norm": 1.2056745011797427, + "learning_rate": 1.323846788742078e-07, + "loss": 0.19941067695617676, + "step": 7162 + }, + { + "epoch": 1.9021378303014207, + "grad_norm": 1.4624998875104938, + "learning_rate": 1.316735448613593e-07, + "loss": 0.22510412335395813, + "step": 7163 + }, + { + "epoch": 1.9024033992829636, + "grad_norm": 1.2448961229015743, + "learning_rate": 1.309643133454641e-07, + "loss": 0.19102326035499573, + "step": 7164 + }, + { + "epoch": 1.9026689682645066, + "grad_norm": 1.2307397875458914, + "learning_rate": 1.3025698446325618e-07, + "loss": 0.20826731622219086, + "step": 7165 + }, + { + "epoch": 1.9029345372460496, + "grad_norm": 1.3483240422328144, + "learning_rate": 1.2955155835109757e-07, + "loss": 0.23238909244537354, + "step": 7166 + }, + { + "epoch": 1.9032001062275925, + "grad_norm": 1.4338552298496805, + "learning_rate": 1.2884803514498833e-07, + "loss": 0.2635011374950409, + "step": 7167 + }, + { + "epoch": 1.9034656752091355, + "grad_norm": 1.1745725675637841, + "learning_rate": 1.281464149805578e-07, + "loss": 0.2073322981595993, + "step": 7168 + }, + { + "epoch": 1.9037312441906784, + "grad_norm": 1.2344038568124596, + "learning_rate": 1.274466979930711e-07, + "loss": 0.22091326117515564, + "step": 7169 + }, + { + "epoch": 1.9039968131722214, + "grad_norm": 1.114689842836081, + "learning_rate": 1.2674888431742472e-07, + "loss": 0.18613001704216003, + "step": 7170 + }, + { + "epoch": 1.9042623821537643, + "grad_norm": 1.2788383965135535, + "learning_rate": 1.2605297408814887e-07, + "loss": 0.2165849655866623, + "step": 7171 + }, + { + "epoch": 1.9045279511353073, + "grad_norm": 1.294203512401496, + "learning_rate": 1.2535896743940844e-07, + "loss": 0.21317794919013977, + "step": 7172 + }, + { + "epoch": 1.9047935201168502, + "grad_norm": 1.47127212987638, + "learning_rate": 1.2466686450499866e-07, + "loss": 0.25221073627471924, + "step": 7173 + }, + { + "epoch": 1.9050590890983932, + "grad_norm": 1.2647474973058104, + "learning_rate": 1.239766654183472e-07, + "loss": 0.21598559617996216, + "step": 7174 + }, + { + "epoch": 1.9053246580799361, + "grad_norm": 1.2635227030316536, + "learning_rate": 1.232883703125187e-07, + "loss": 0.2284495085477829, + "step": 7175 + }, + { + "epoch": 1.905590227061479, + "grad_norm": 1.1825527167306378, + "learning_rate": 1.2260197932020713e-07, + "loss": 0.21899332106113434, + "step": 7176 + }, + { + "epoch": 1.905855796043022, + "grad_norm": 1.3588902485974734, + "learning_rate": 1.2191749257374097e-07, + "loss": 0.2633277177810669, + "step": 7177 + }, + { + "epoch": 1.906121365024565, + "grad_norm": 1.2643904365611611, + "learning_rate": 1.2123491020508137e-07, + "loss": 0.2330140471458435, + "step": 7178 + }, + { + "epoch": 1.906386934006108, + "grad_norm": 1.2757939155257039, + "learning_rate": 1.2055423234582087e-07, + "loss": 0.21859750151634216, + "step": 7179 + }, + { + "epoch": 1.9066525029876509, + "grad_norm": 1.3985563606047093, + "learning_rate": 1.198754591271878e-07, + "loss": 0.252164363861084, + "step": 7180 + }, + { + "epoch": 1.906918071969194, + "grad_norm": 1.4365501399575176, + "learning_rate": 1.191985906800408e-07, + "loss": 0.24968160688877106, + "step": 7181 + }, + { + "epoch": 1.907183640950737, + "grad_norm": 1.199067091736319, + "learning_rate": 1.185236271348722e-07, + "loss": 0.2083423137664795, + "step": 7182 + }, + { + "epoch": 1.90744920993228, + "grad_norm": 1.258208503364781, + "learning_rate": 1.1785056862180789e-07, + "loss": 0.2468394935131073, + "step": 7183 + }, + { + "epoch": 1.907714778913823, + "grad_norm": 1.2908738922715033, + "learning_rate": 1.1717941527060405e-07, + "loss": 0.22417521476745605, + "step": 7184 + }, + { + "epoch": 1.9079803478953659, + "grad_norm": 1.2789853859840312, + "learning_rate": 1.1651016721065167e-07, + "loss": 0.2411842793226242, + "step": 7185 + }, + { + "epoch": 1.9082459168769088, + "grad_norm": 1.311967953603668, + "learning_rate": 1.1584282457097417e-07, + "loss": 0.24650761485099792, + "step": 7186 + }, + { + "epoch": 1.9085114858584518, + "grad_norm": 1.3305923315328496, + "learning_rate": 1.1517738748022755e-07, + "loss": 0.22433717548847198, + "step": 7187 + }, + { + "epoch": 1.9087770548399947, + "grad_norm": 1.2666444248015347, + "learning_rate": 1.145138560667003e-07, + "loss": 0.20867910981178284, + "step": 7188 + }, + { + "epoch": 1.9090426238215377, + "grad_norm": 1.2511449541105855, + "learning_rate": 1.138522304583134e-07, + "loss": 0.21889618039131165, + "step": 7189 + }, + { + "epoch": 1.9093081928030806, + "grad_norm": 1.113107479716362, + "learning_rate": 1.1319251078261928e-07, + "loss": 0.19350749254226685, + "step": 7190 + }, + { + "epoch": 1.9095737617846236, + "grad_norm": 1.183265546980091, + "learning_rate": 1.125346971668051e-07, + "loss": 0.19123657047748566, + "step": 7191 + }, + { + "epoch": 1.9098393307661665, + "grad_norm": 1.2653223306994201, + "learning_rate": 1.118787897376905e-07, + "loss": 0.21433782577514648, + "step": 7192 + }, + { + "epoch": 1.9101048997477095, + "grad_norm": 1.474925382041675, + "learning_rate": 1.1122478862172437e-07, + "loss": 0.2521187663078308, + "step": 7193 + }, + { + "epoch": 1.9103704687292524, + "grad_norm": 1.2835872924926361, + "learning_rate": 1.1057269394499248e-07, + "loss": 0.2141486555337906, + "step": 7194 + }, + { + "epoch": 1.9106360377107954, + "grad_norm": 1.271472683987379, + "learning_rate": 1.0992250583320985e-07, + "loss": 0.22960343956947327, + "step": 7195 + }, + { + "epoch": 1.9109016066923383, + "grad_norm": 1.3433609684783299, + "learning_rate": 1.092742244117262e-07, + "loss": 0.21809744834899902, + "step": 7196 + }, + { + "epoch": 1.9111671756738813, + "grad_norm": 1.248347973820862, + "learning_rate": 1.0862784980552044e-07, + "loss": 0.22418212890625, + "step": 7197 + }, + { + "epoch": 1.9114327446554242, + "grad_norm": 1.2504701200893746, + "learning_rate": 1.0798338213920845e-07, + "loss": 0.22050701081752777, + "step": 7198 + }, + { + "epoch": 1.9116983136369672, + "grad_norm": 1.206849931438756, + "learning_rate": 1.0734082153703418e-07, + "loss": 0.23200345039367676, + "step": 7199 + }, + { + "epoch": 1.9119638826185101, + "grad_norm": 1.1102825382626649, + "learning_rate": 1.0670016812287631e-07, + "loss": 0.18366631865501404, + "step": 7200 + }, + { + "epoch": 1.912229451600053, + "grad_norm": 1.2844567521026582, + "learning_rate": 1.0606142202024605e-07, + "loss": 0.24362193048000336, + "step": 7201 + }, + { + "epoch": 1.912495020581596, + "grad_norm": 1.2822631921528913, + "learning_rate": 1.0542458335228601e-07, + "loss": 0.2216200977563858, + "step": 7202 + }, + { + "epoch": 1.912760589563139, + "grad_norm": 1.0921875359661608, + "learning_rate": 1.0478965224176907e-07, + "loss": 0.20216065645217896, + "step": 7203 + }, + { + "epoch": 1.913026158544682, + "grad_norm": 1.254966671592246, + "learning_rate": 1.041566288111051e-07, + "loss": 0.22054359316825867, + "step": 7204 + }, + { + "epoch": 1.913291727526225, + "grad_norm": 1.3532366246655447, + "learning_rate": 1.0352551318233206e-07, + "loss": 0.21569015085697174, + "step": 7205 + }, + { + "epoch": 1.9135572965077678, + "grad_norm": 1.2826756039782425, + "learning_rate": 1.028963054771226e-07, + "loss": 0.22967267036437988, + "step": 7206 + }, + { + "epoch": 1.9138228654893108, + "grad_norm": 1.3494789006319945, + "learning_rate": 1.0226900581677968e-07, + "loss": 0.2422460913658142, + "step": 7207 + }, + { + "epoch": 1.9140884344708538, + "grad_norm": 1.3606228589652338, + "learning_rate": 1.0164361432223879e-07, + "loss": 0.25891292095184326, + "step": 7208 + }, + { + "epoch": 1.914354003452397, + "grad_norm": 1.3570561855059022, + "learning_rate": 1.0102013111406905e-07, + "loss": 0.26915764808654785, + "step": 7209 + }, + { + "epoch": 1.9146195724339399, + "grad_norm": 1.3889996377213247, + "learning_rate": 1.0039855631247097e-07, + "loss": 0.2268485426902771, + "step": 7210 + }, + { + "epoch": 1.9148851414154828, + "grad_norm": 1.254622691077732, + "learning_rate": 9.977889003727647e-08, + "loss": 0.22551512718200684, + "step": 7211 + }, + { + "epoch": 1.9151507103970258, + "grad_norm": 1.233084698895248, + "learning_rate": 9.91611324079489e-08, + "loss": 0.24224743247032166, + "step": 7212 + }, + { + "epoch": 1.9154162793785687, + "grad_norm": 1.2426176239380708, + "learning_rate": 9.854528354358517e-08, + "loss": 0.19550879299640656, + "step": 7213 + }, + { + "epoch": 1.9156818483601117, + "grad_norm": 1.3449782320604147, + "learning_rate": 9.793134356291478e-08, + "loss": 0.24986523389816284, + "step": 7214 + }, + { + "epoch": 1.9159474173416546, + "grad_norm": 1.3340583070384961, + "learning_rate": 9.731931258429638e-08, + "loss": 0.2565170228481293, + "step": 7215 + }, + { + "epoch": 1.9162129863231976, + "grad_norm": 1.185156912642083, + "learning_rate": 9.670919072572449e-08, + "loss": 0.2166958749294281, + "step": 7216 + }, + { + "epoch": 1.9164785553047405, + "grad_norm": 1.2903999319183896, + "learning_rate": 9.610097810482166e-08, + "loss": 0.2002115249633789, + "step": 7217 + }, + { + "epoch": 1.9167441242862835, + "grad_norm": 1.1589813054229285, + "learning_rate": 9.549467483884412e-08, + "loss": 0.209486186504364, + "step": 7218 + }, + { + "epoch": 1.9170096932678264, + "grad_norm": 1.2748483155423624, + "learning_rate": 9.489028104468056e-08, + "loss": 0.22061321139335632, + "step": 7219 + }, + { + "epoch": 1.9172752622493694, + "grad_norm": 1.3916500275624957, + "learning_rate": 9.428779683885114e-08, + "loss": 0.21880047023296356, + "step": 7220 + }, + { + "epoch": 1.9175408312309123, + "grad_norm": 1.174801358834737, + "learning_rate": 9.368722233750849e-08, + "loss": 0.22674325108528137, + "step": 7221 + }, + { + "epoch": 1.9178064002124553, + "grad_norm": 1.2877078963500264, + "learning_rate": 9.308855765643332e-08, + "loss": 0.22100718319416046, + "step": 7222 + }, + { + "epoch": 1.9180719691939982, + "grad_norm": 1.3291196619762962, + "learning_rate": 9.249180291104553e-08, + "loss": 0.23105254769325256, + "step": 7223 + }, + { + "epoch": 1.9183375381755412, + "grad_norm": 1.2897395451200044, + "learning_rate": 9.189695821638755e-08, + "loss": 0.22483405470848083, + "step": 7224 + }, + { + "epoch": 1.9186031071570842, + "grad_norm": 1.0701399001286365, + "learning_rate": 9.130402368714208e-08, + "loss": 0.1939004510641098, + "step": 7225 + }, + { + "epoch": 1.918868676138627, + "grad_norm": 1.2349263677236755, + "learning_rate": 9.071299943761769e-08, + "loss": 0.21722440421581268, + "step": 7226 + }, + { + "epoch": 1.91913424512017, + "grad_norm": 1.2911544131515666, + "learning_rate": 9.012388558175877e-08, + "loss": 0.24213966727256775, + "step": 7227 + }, + { + "epoch": 1.919399814101713, + "grad_norm": 1.2266941536480729, + "learning_rate": 8.953668223313783e-08, + "loss": 0.2305546998977661, + "step": 7228 + }, + { + "epoch": 1.919665383083256, + "grad_norm": 1.3932840646040938, + "learning_rate": 8.895138950496207e-08, + "loss": 0.2678033709526062, + "step": 7229 + }, + { + "epoch": 1.919930952064799, + "grad_norm": 1.2449965535251106, + "learning_rate": 8.836800751006791e-08, + "loss": 0.2491014301776886, + "step": 7230 + }, + { + "epoch": 1.9201965210463419, + "grad_norm": 1.2551836576043742, + "learning_rate": 8.778653636092537e-08, + "loss": 0.21837326884269714, + "step": 7231 + }, + { + "epoch": 1.9204620900278848, + "grad_norm": 1.2745391136427304, + "learning_rate": 8.72069761696348e-08, + "loss": 0.24149999022483826, + "step": 7232 + }, + { + "epoch": 1.9207276590094278, + "grad_norm": 1.3444140835580012, + "learning_rate": 8.662932704792793e-08, + "loss": 0.2124684453010559, + "step": 7233 + }, + { + "epoch": 1.9209932279909707, + "grad_norm": 1.3660213009765734, + "learning_rate": 8.60535891071712e-08, + "loss": 0.2452150285243988, + "step": 7234 + }, + { + "epoch": 1.9212587969725137, + "grad_norm": 1.2005299446152509, + "learning_rate": 8.547976245835698e-08, + "loss": 0.23598846793174744, + "step": 7235 + }, + { + "epoch": 1.9215243659540566, + "grad_norm": 1.3152974069295431, + "learning_rate": 8.490784721211454e-08, + "loss": 0.2105225920677185, + "step": 7236 + }, + { + "epoch": 1.9217899349355996, + "grad_norm": 1.4424977304862223, + "learning_rate": 8.433784347870122e-08, + "loss": 0.2585388720035553, + "step": 7237 + }, + { + "epoch": 1.9220555039171425, + "grad_norm": 1.2300698994172445, + "learning_rate": 8.376975136800691e-08, + "loss": 0.21703900396823883, + "step": 7238 + }, + { + "epoch": 1.9223210728986855, + "grad_norm": 1.2580366958382383, + "learning_rate": 8.3203570989554e-08, + "loss": 0.22771210968494415, + "step": 7239 + }, + { + "epoch": 1.9225866418802284, + "grad_norm": 1.1645003525207898, + "learning_rate": 8.263930245249408e-08, + "loss": 0.22535575926303864, + "step": 7240 + }, + { + "epoch": 1.9228522108617714, + "grad_norm": 1.1822452042500315, + "learning_rate": 8.207694586561344e-08, + "loss": 0.2052595466375351, + "step": 7241 + }, + { + "epoch": 1.9231177798433143, + "grad_norm": 1.2683012213528768, + "learning_rate": 8.151650133732536e-08, + "loss": 0.19611456990242004, + "step": 7242 + }, + { + "epoch": 1.9233833488248573, + "grad_norm": 1.2762939262923303, + "learning_rate": 8.095796897567787e-08, + "loss": 0.20256826281547546, + "step": 7243 + }, + { + "epoch": 1.9236489178064002, + "grad_norm": 1.5444723931343434, + "learning_rate": 8.040134888835038e-08, + "loss": 0.25462138652801514, + "step": 7244 + }, + { + "epoch": 1.9239144867879432, + "grad_norm": 1.2813246309729553, + "learning_rate": 7.984664118265262e-08, + "loss": 0.27362316846847534, + "step": 7245 + }, + { + "epoch": 1.9241800557694861, + "grad_norm": 1.3526739723939418, + "learning_rate": 7.929384596552459e-08, + "loss": 0.23749098181724548, + "step": 7246 + }, + { + "epoch": 1.924445624751029, + "grad_norm": 1.3016147885306604, + "learning_rate": 7.874296334353882e-08, + "loss": 0.2472018599510193, + "step": 7247 + }, + { + "epoch": 1.924711193732572, + "grad_norm": 1.3451463766339227, + "learning_rate": 7.819399342290034e-08, + "loss": 0.23181989789009094, + "step": 7248 + }, + { + "epoch": 1.924976762714115, + "grad_norm": 1.2415200588572097, + "learning_rate": 7.764693630944231e-08, + "loss": 0.21363665163516998, + "step": 7249 + }, + { + "epoch": 1.925242331695658, + "grad_norm": 1.1849821155034532, + "learning_rate": 7.710179210863144e-08, + "loss": 0.21239221096038818, + "step": 7250 + }, + { + "epoch": 1.925507900677201, + "grad_norm": 1.4494720585200522, + "learning_rate": 7.655856092556591e-08, + "loss": 0.2643742263317108, + "step": 7251 + }, + { + "epoch": 1.9257734696587439, + "grad_norm": 1.251877664981762, + "learning_rate": 7.601724286497414e-08, + "loss": 0.2232428789138794, + "step": 7252 + }, + { + "epoch": 1.9260390386402868, + "grad_norm": 1.313277386530887, + "learning_rate": 7.547783803121489e-08, + "loss": 0.2052377462387085, + "step": 7253 + }, + { + "epoch": 1.9263046076218298, + "grad_norm": 1.2540878413614547, + "learning_rate": 7.494034652827942e-08, + "loss": 0.22194740176200867, + "step": 7254 + }, + { + "epoch": 1.9265701766033727, + "grad_norm": 1.2500554609811554, + "learning_rate": 7.440476845979038e-08, + "loss": 0.22004084289073944, + "step": 7255 + }, + { + "epoch": 1.9268357455849157, + "grad_norm": 1.5480704193409933, + "learning_rate": 7.387110392899965e-08, + "loss": 0.2218078374862671, + "step": 7256 + }, + { + "epoch": 1.9271013145664586, + "grad_norm": 1.3006193889830067, + "learning_rate": 7.33393530387927e-08, + "loss": 0.23272839188575745, + "step": 7257 + }, + { + "epoch": 1.9273668835480016, + "grad_norm": 1.3119971487868216, + "learning_rate": 7.280951589168417e-08, + "loss": 0.23666653037071228, + "step": 7258 + }, + { + "epoch": 1.9276324525295445, + "grad_norm": 1.235294099691234, + "learning_rate": 7.228159258982126e-08, + "loss": 0.21946533024311066, + "step": 7259 + }, + { + "epoch": 1.9278980215110875, + "grad_norm": 1.252328485116134, + "learning_rate": 7.175558323498033e-08, + "loss": 0.22158634662628174, + "step": 7260 + }, + { + "epoch": 1.9281635904926304, + "grad_norm": 1.1330771135999202, + "learning_rate": 7.123148792857026e-08, + "loss": 0.19978654384613037, + "step": 7261 + }, + { + "epoch": 1.9284291594741734, + "grad_norm": 1.2859436875650823, + "learning_rate": 7.070930677163023e-08, + "loss": 0.21197813749313354, + "step": 7262 + }, + { + "epoch": 1.9286947284557163, + "grad_norm": 1.2611518825786316, + "learning_rate": 7.018903986483083e-08, + "loss": 0.22650468349456787, + "step": 7263 + }, + { + "epoch": 1.9289602974372593, + "grad_norm": 1.2701948406662635, + "learning_rate": 6.967068730847293e-08, + "loss": 0.22257481515407562, + "step": 7264 + }, + { + "epoch": 1.9292258664188022, + "grad_norm": 1.3219742856760701, + "learning_rate": 6.915424920248992e-08, + "loss": 0.24899804592132568, + "step": 7265 + }, + { + "epoch": 1.9294914354003452, + "grad_norm": 1.2996576951077934, + "learning_rate": 6.863972564644328e-08, + "loss": 0.250610888004303, + "step": 7266 + }, + { + "epoch": 1.9297570043818881, + "grad_norm": 1.251137163804366, + "learning_rate": 6.81271167395292e-08, + "loss": 0.22786292433738708, + "step": 7267 + }, + { + "epoch": 1.930022573363431, + "grad_norm": 1.2890465128808872, + "learning_rate": 6.761642258056977e-08, + "loss": 0.22816789150238037, + "step": 7268 + }, + { + "epoch": 1.930288142344974, + "grad_norm": 1.3522601458627446, + "learning_rate": 6.7107643268024e-08, + "loss": 0.2589687407016754, + "step": 7269 + }, + { + "epoch": 1.930553711326517, + "grad_norm": 1.1963236616697677, + "learning_rate": 6.660077889997673e-08, + "loss": 0.2281583547592163, + "step": 7270 + }, + { + "epoch": 1.93081928030806, + "grad_norm": 1.3347065729182181, + "learning_rate": 6.60958295741454e-08, + "loss": 0.22833740711212158, + "step": 7271 + }, + { + "epoch": 1.931084849289603, + "grad_norm": 1.1611313283452582, + "learning_rate": 6.559279538787877e-08, + "loss": 0.20720313489437103, + "step": 7272 + }, + { + "epoch": 1.9313504182711458, + "grad_norm": 1.1884544288263172, + "learning_rate": 6.509167643815594e-08, + "loss": 0.17191773653030396, + "step": 7273 + }, + { + "epoch": 1.9316159872526888, + "grad_norm": 1.1354230474675757, + "learning_rate": 6.459247282158632e-08, + "loss": 0.23586943745613098, + "step": 7274 + }, + { + "epoch": 1.9318815562342317, + "grad_norm": 1.3318856895013969, + "learning_rate": 6.409518463441067e-08, + "loss": 0.21353168785572052, + "step": 7275 + }, + { + "epoch": 1.9321471252157747, + "grad_norm": 1.404937308132313, + "learning_rate": 6.359981197250009e-08, + "loss": 0.23148195445537567, + "step": 7276 + }, + { + "epoch": 1.9324126941973176, + "grad_norm": 1.3040478141172254, + "learning_rate": 6.310635493135709e-08, + "loss": 0.2113666534423828, + "step": 7277 + }, + { + "epoch": 1.9326782631788606, + "grad_norm": 1.3399999009479682, + "learning_rate": 6.261481360611332e-08, + "loss": 0.27689510583877563, + "step": 7278 + }, + { + "epoch": 1.9329438321604036, + "grad_norm": 1.2809237898551964, + "learning_rate": 6.2125188091533e-08, + "loss": 0.23746277391910553, + "step": 7279 + }, + { + "epoch": 1.9332094011419465, + "grad_norm": 1.4215326252349767, + "learning_rate": 6.163747848201062e-08, + "loss": 0.23123708367347717, + "step": 7280 + }, + { + "epoch": 1.9334749701234895, + "grad_norm": 1.3095914464878196, + "learning_rate": 6.115168487157097e-08, + "loss": 0.23640167713165283, + "step": 7281 + }, + { + "epoch": 1.9337405391050324, + "grad_norm": 1.3278235730632808, + "learning_rate": 6.066780735386801e-08, + "loss": 0.2259385585784912, + "step": 7282 + }, + { + "epoch": 1.9340061080865754, + "grad_norm": 1.230137664492021, + "learning_rate": 6.018584602218824e-08, + "loss": 0.219761461019516, + "step": 7283 + }, + { + "epoch": 1.9342716770681183, + "grad_norm": 1.43054331413576, + "learning_rate": 5.970580096944733e-08, + "loss": 0.24411989748477936, + "step": 7284 + }, + { + "epoch": 1.9345372460496613, + "grad_norm": 1.196712051616964, + "learning_rate": 5.922767228819459e-08, + "loss": 0.232415571808815, + "step": 7285 + }, + { + "epoch": 1.9348028150312042, + "grad_norm": 1.341424963494065, + "learning_rate": 5.875146007060517e-08, + "loss": 0.25938165187835693, + "step": 7286 + }, + { + "epoch": 1.9350683840127472, + "grad_norm": 1.253589726996753, + "learning_rate": 5.827716440848785e-08, + "loss": 0.22138425707817078, + "step": 7287 + }, + { + "epoch": 1.9353339529942901, + "grad_norm": 1.12038038288381, + "learning_rate": 5.7804785393282825e-08, + "loss": 0.19724398851394653, + "step": 7288 + }, + { + "epoch": 1.935599521975833, + "grad_norm": 1.4840167690508577, + "learning_rate": 5.7334323116056136e-08, + "loss": 0.25307583808898926, + "step": 7289 + }, + { + "epoch": 1.935865090957376, + "grad_norm": 1.2525903433235852, + "learning_rate": 5.686577766751078e-08, + "loss": 0.2436421811580658, + "step": 7290 + }, + { + "epoch": 1.936130659938919, + "grad_norm": 1.2518328182394873, + "learning_rate": 5.6399149137973394e-08, + "loss": 0.2164984941482544, + "step": 7291 + }, + { + "epoch": 1.936396228920462, + "grad_norm": 1.2277499731042363, + "learning_rate": 5.5934437617407576e-08, + "loss": 0.22526800632476807, + "step": 7292 + }, + { + "epoch": 1.936661797902005, + "grad_norm": 2.195756796154145, + "learning_rate": 5.547164319540277e-08, + "loss": 0.27787747979164124, + "step": 7293 + }, + { + "epoch": 1.936927366883548, + "grad_norm": 1.2647979578451993, + "learning_rate": 5.5010765961179825e-08, + "loss": 0.2188001275062561, + "step": 7294 + }, + { + "epoch": 1.937192935865091, + "grad_norm": 1.2454775538056309, + "learning_rate": 5.4551806003591e-08, + "loss": 0.22620335221290588, + "step": 7295 + }, + { + "epoch": 1.937458504846634, + "grad_norm": 1.186081247005514, + "learning_rate": 5.409476341111775e-08, + "loss": 0.20357783138751984, + "step": 7296 + }, + { + "epoch": 1.937724073828177, + "grad_norm": 1.2316030990526627, + "learning_rate": 5.3639638271872906e-08, + "loss": 0.22717830538749695, + "step": 7297 + }, + { + "epoch": 1.9379896428097199, + "grad_norm": 1.1600371116406252, + "learning_rate": 5.318643067360074e-08, + "loss": 0.20139163732528687, + "step": 7298 + }, + { + "epoch": 1.9382552117912628, + "grad_norm": 1.3377291184643103, + "learning_rate": 5.273514070367247e-08, + "loss": 0.2620807886123657, + "step": 7299 + }, + { + "epoch": 1.9385207807728058, + "grad_norm": 1.2240680803779018, + "learning_rate": 5.2285768449091834e-08, + "loss": 0.2102596014738083, + "step": 7300 + }, + { + "epoch": 1.9387863497543487, + "grad_norm": 1.3057613284367482, + "learning_rate": 5.183831399649175e-08, + "loss": 0.2105238288640976, + "step": 7301 + }, + { + "epoch": 1.9390519187358917, + "grad_norm": 1.2241670740951547, + "learning_rate": 5.1392777432138773e-08, + "loss": 0.22178848087787628, + "step": 7302 + }, + { + "epoch": 1.9393174877174346, + "grad_norm": 1.3648564311332518, + "learning_rate": 5.094915884192419e-08, + "loss": 0.23375345766544342, + "step": 7303 + }, + { + "epoch": 1.9395830566989776, + "grad_norm": 1.3411332724549108, + "learning_rate": 5.050745831137405e-08, + "loss": 0.22709332406520844, + "step": 7304 + }, + { + "epoch": 1.9398486256805205, + "grad_norm": 1.270429998105922, + "learning_rate": 5.0067675925642437e-08, + "loss": 0.2312362790107727, + "step": 7305 + }, + { + "epoch": 1.9401141946620635, + "grad_norm": 1.159162680689607, + "learning_rate": 4.962981176951376e-08, + "loss": 0.2014419138431549, + "step": 7306 + }, + { + "epoch": 1.9403797636436064, + "grad_norm": 1.4294147842238243, + "learning_rate": 4.9193865927404936e-08, + "loss": 0.23700466752052307, + "step": 7307 + }, + { + "epoch": 1.9406453326251494, + "grad_norm": 1.3814639969092575, + "learning_rate": 4.8759838483358745e-08, + "loss": 0.23362770676612854, + "step": 7308 + }, + { + "epoch": 1.9409109016066923, + "grad_norm": 1.4217349736822034, + "learning_rate": 4.832772952105269e-08, + "loss": 0.26057323813438416, + "step": 7309 + }, + { + "epoch": 1.9411764705882353, + "grad_norm": 1.1693504727058668, + "learning_rate": 4.789753912379014e-08, + "loss": 0.20954950153827667, + "step": 7310 + }, + { + "epoch": 1.9414420395697782, + "grad_norm": 1.1532528532836688, + "learning_rate": 4.746926737450919e-08, + "loss": 0.2100827842950821, + "step": 7311 + }, + { + "epoch": 1.9417076085513212, + "grad_norm": 1.2509560196931713, + "learning_rate": 4.7042914355773795e-08, + "loss": 0.216691792011261, + "step": 7312 + }, + { + "epoch": 1.9419731775328641, + "grad_norm": 1.2086430330598397, + "learning_rate": 4.6618480149780434e-08, + "loss": 0.22815749049186707, + "step": 7313 + }, + { + "epoch": 1.942238746514407, + "grad_norm": 1.3440658280324072, + "learning_rate": 4.6195964838353646e-08, + "loss": 0.23365731537342072, + "step": 7314 + }, + { + "epoch": 1.94250431549595, + "grad_norm": 1.5301363693806977, + "learning_rate": 4.577536850295161e-08, + "loss": 0.2112172693014145, + "step": 7315 + }, + { + "epoch": 1.942769884477493, + "grad_norm": 1.1945701714854287, + "learning_rate": 4.5356691224659466e-08, + "loss": 0.21821950376033783, + "step": 7316 + }, + { + "epoch": 1.943035453459036, + "grad_norm": 1.1491339078592526, + "learning_rate": 4.4939933084192646e-08, + "loss": 0.2374412566423416, + "step": 7317 + }, + { + "epoch": 1.943301022440579, + "grad_norm": 1.3549046355713708, + "learning_rate": 4.4525094161897987e-08, + "loss": 0.2483779489994049, + "step": 7318 + }, + { + "epoch": 1.9435665914221218, + "grad_norm": 1.327945477663327, + "learning_rate": 4.411217453775152e-08, + "loss": 0.23641882836818695, + "step": 7319 + }, + { + "epoch": 1.9438321604036648, + "grad_norm": 1.3586245026219714, + "learning_rate": 4.370117429135956e-08, + "loss": 0.24779492616653442, + "step": 7320 + }, + { + "epoch": 1.944097729385208, + "grad_norm": 1.1641395539357577, + "learning_rate": 4.329209350195651e-08, + "loss": 0.20288071036338806, + "step": 7321 + }, + { + "epoch": 1.944363298366751, + "grad_norm": 1.2676649817410126, + "learning_rate": 4.288493224840928e-08, + "loss": 0.24286144971847534, + "step": 7322 + }, + { + "epoch": 1.9446288673482939, + "grad_norm": 1.3164985028745375, + "learning_rate": 4.2479690609213976e-08, + "loss": 0.22825902700424194, + "step": 7323 + }, + { + "epoch": 1.9448944363298368, + "grad_norm": 1.255280762331411, + "learning_rate": 4.207636866249587e-08, + "loss": 0.22563335299491882, + "step": 7324 + }, + { + "epoch": 1.9451600053113798, + "grad_norm": 1.2990544857906836, + "learning_rate": 4.167496648601166e-08, + "loss": 0.22853273153305054, + "step": 7325 + }, + { + "epoch": 1.9454255742929227, + "grad_norm": 1.1281442356079434, + "learning_rate": 4.1275484157147216e-08, + "loss": 0.20790672302246094, + "step": 7326 + }, + { + "epoch": 1.9456911432744657, + "grad_norm": 1.1980029703513235, + "learning_rate": 4.087792175291649e-08, + "loss": 0.2165423035621643, + "step": 7327 + }, + { + "epoch": 1.9459567122560086, + "grad_norm": 1.3858946395294593, + "learning_rate": 4.048227934996485e-08, + "loss": 0.2605394721031189, + "step": 7328 + }, + { + "epoch": 1.9462222812375516, + "grad_norm": 1.280554987273632, + "learning_rate": 4.008855702456904e-08, + "loss": 0.22624900937080383, + "step": 7329 + }, + { + "epoch": 1.9464878502190945, + "grad_norm": 1.1967949808184344, + "learning_rate": 3.9696754852632804e-08, + "loss": 0.23086196184158325, + "step": 7330 + }, + { + "epoch": 1.9467534192006375, + "grad_norm": 1.4330145211347993, + "learning_rate": 3.9306872909691265e-08, + "loss": 0.24633410573005676, + "step": 7331 + }, + { + "epoch": 1.9470189881821804, + "grad_norm": 2.2568432653955894, + "learning_rate": 3.8918911270908745e-08, + "loss": 0.2535535395145416, + "step": 7332 + }, + { + "epoch": 1.9472845571637234, + "grad_norm": 1.3555855555438505, + "learning_rate": 3.853287001108097e-08, + "loss": 0.23904260993003845, + "step": 7333 + }, + { + "epoch": 1.9475501261452663, + "grad_norm": 1.3963340527453718, + "learning_rate": 3.814874920463063e-08, + "loss": 0.22525179386138916, + "step": 7334 + }, + { + "epoch": 1.9478156951268093, + "grad_norm": 1.415360473918547, + "learning_rate": 3.776654892561293e-08, + "loss": 0.21139883995056152, + "step": 7335 + }, + { + "epoch": 1.9480812641083523, + "grad_norm": 1.2272269269066283, + "learning_rate": 3.738626924771005e-08, + "loss": 0.21939310431480408, + "step": 7336 + }, + { + "epoch": 1.9483468330898952, + "grad_norm": 1.1845473795192814, + "learning_rate": 3.7007910244236664e-08, + "loss": 0.22852283716201782, + "step": 7337 + }, + { + "epoch": 1.9486124020714382, + "grad_norm": 1.2529721413425112, + "learning_rate": 3.663147198813666e-08, + "loss": 0.20769211649894714, + "step": 7338 + }, + { + "epoch": 1.948877971052981, + "grad_norm": 1.216093250313145, + "learning_rate": 3.625695455198086e-08, + "loss": 0.21721890568733215, + "step": 7339 + }, + { + "epoch": 1.949143540034524, + "grad_norm": 1.261493312403511, + "learning_rate": 3.588435800797263e-08, + "loss": 0.24236848950386047, + "step": 7340 + }, + { + "epoch": 1.949409109016067, + "grad_norm": 1.21142050375974, + "learning_rate": 3.5513682427944505e-08, + "loss": 0.2300192266702652, + "step": 7341 + }, + { + "epoch": 1.94967467799761, + "grad_norm": 1.1850825722481098, + "learning_rate": 3.5144927883358215e-08, + "loss": 0.21636728942394257, + "step": 7342 + }, + { + "epoch": 1.949940246979153, + "grad_norm": 1.3000939007920165, + "learning_rate": 3.477809444530578e-08, + "loss": 0.25367966294288635, + "step": 7343 + }, + { + "epoch": 1.9502058159606959, + "grad_norm": 1.4245768388392126, + "learning_rate": 3.4413182184507285e-08, + "loss": 0.24514247477054596, + "step": 7344 + }, + { + "epoch": 1.9504713849422388, + "grad_norm": 1.1048557155163508, + "learning_rate": 3.405019117131425e-08, + "loss": 0.18460404872894287, + "step": 7345 + }, + { + "epoch": 1.9507369539237818, + "grad_norm": 1.275062396510646, + "learning_rate": 3.3689121475706244e-08, + "loss": 0.2096845805644989, + "step": 7346 + }, + { + "epoch": 1.9510025229053247, + "grad_norm": 1.2314050158221594, + "learning_rate": 3.332997316729536e-08, + "loss": 0.22435057163238525, + "step": 7347 + }, + { + "epoch": 1.9512680918868677, + "grad_norm": 1.208912476805739, + "learning_rate": 3.2972746315318436e-08, + "loss": 0.20798128843307495, + "step": 7348 + }, + { + "epoch": 1.9515336608684106, + "grad_norm": 1.2922181556866412, + "learning_rate": 3.2617440988645945e-08, + "loss": 0.23958316445350647, + "step": 7349 + }, + { + "epoch": 1.9517992298499536, + "grad_norm": 1.3799363972113297, + "learning_rate": 3.2264057255777525e-08, + "loss": 0.21934574842453003, + "step": 7350 + }, + { + "epoch": 1.9520647988314965, + "grad_norm": 1.2014453671941887, + "learning_rate": 3.1912595184839804e-08, + "loss": 0.24321375787258148, + "step": 7351 + }, + { + "epoch": 1.9523303678130395, + "grad_norm": 1.1661737247347086, + "learning_rate": 3.156305484359079e-08, + "loss": 0.20932736992835999, + "step": 7352 + }, + { + "epoch": 1.9525959367945824, + "grad_norm": 1.2983329607047998, + "learning_rate": 3.12154362994177e-08, + "loss": 0.19824840128421783, + "step": 7353 + }, + { + "epoch": 1.9528615057761254, + "grad_norm": 1.3128795915591134, + "learning_rate": 3.0869739619338034e-08, + "loss": 0.212745800614357, + "step": 7354 + }, + { + "epoch": 1.9531270747576683, + "grad_norm": 1.247129470001585, + "learning_rate": 3.0525964869997374e-08, + "loss": 0.23044779896736145, + "step": 7355 + }, + { + "epoch": 1.9533926437392113, + "grad_norm": 1.2323689907378315, + "learning_rate": 3.018411211767158e-08, + "loss": 0.2237459123134613, + "step": 7356 + }, + { + "epoch": 1.9536582127207542, + "grad_norm": 1.3228713238231502, + "learning_rate": 2.984418142826684e-08, + "loss": 0.2592429518699646, + "step": 7357 + }, + { + "epoch": 1.9539237817022972, + "grad_norm": 1.1444806738907807, + "learning_rate": 2.9506172867315163e-08, + "loss": 0.17559123039245605, + "step": 7358 + }, + { + "epoch": 1.9541893506838401, + "grad_norm": 1.287127142439038, + "learning_rate": 2.917008649998332e-08, + "loss": 0.24143017828464508, + "step": 7359 + }, + { + "epoch": 1.954454919665383, + "grad_norm": 1.310526275865734, + "learning_rate": 2.883592239106392e-08, + "loss": 0.23560799658298492, + "step": 7360 + }, + { + "epoch": 1.954720488646926, + "grad_norm": 1.357586181070064, + "learning_rate": 2.8503680604979878e-08, + "loss": 0.2456119805574417, + "step": 7361 + }, + { + "epoch": 1.954986057628469, + "grad_norm": 1.2143945666113656, + "learning_rate": 2.817336120578329e-08, + "loss": 0.21878069639205933, + "step": 7362 + }, + { + "epoch": 1.955251626610012, + "grad_norm": 1.2288786099560105, + "learning_rate": 2.7844964257155438e-08, + "loss": 0.20496608316898346, + "step": 7363 + }, + { + "epoch": 1.955517195591555, + "grad_norm": 1.2067776880816419, + "learning_rate": 2.7518489822407902e-08, + "loss": 0.23219498991966248, + "step": 7364 + }, + { + "epoch": 1.9557827645730979, + "grad_norm": 1.3499865013336032, + "learning_rate": 2.7193937964481442e-08, + "loss": 0.2284272015094757, + "step": 7365 + }, + { + "epoch": 1.9560483335546408, + "grad_norm": 1.3177047034961433, + "learning_rate": 2.68713087459449e-08, + "loss": 0.22303974628448486, + "step": 7366 + }, + { + "epoch": 1.9563139025361838, + "grad_norm": 1.337791009624748, + "learning_rate": 2.655060222899741e-08, + "loss": 0.22489243745803833, + "step": 7367 + }, + { + "epoch": 1.9565794715177267, + "grad_norm": 1.2719472133739602, + "learning_rate": 2.6231818475468407e-08, + "loss": 0.27986854314804077, + "step": 7368 + }, + { + "epoch": 1.9568450404992697, + "grad_norm": 1.3884495118427658, + "learning_rate": 2.591495754681539e-08, + "loss": 0.29321208596229553, + "step": 7369 + }, + { + "epoch": 1.9571106094808126, + "grad_norm": 1.3942541242432065, + "learning_rate": 2.5600019504125053e-08, + "loss": 0.2560982406139374, + "step": 7370 + }, + { + "epoch": 1.9573761784623556, + "grad_norm": 1.4283472016053, + "learning_rate": 2.528700440811438e-08, + "loss": 0.264164537191391, + "step": 7371 + }, + { + "epoch": 1.9576417474438985, + "grad_norm": 1.1832183058517125, + "learning_rate": 2.4975912319127326e-08, + "loss": 0.2135474979877472, + "step": 7372 + }, + { + "epoch": 1.9579073164254415, + "grad_norm": 1.265205421311282, + "learning_rate": 2.466674329714036e-08, + "loss": 0.2100939154624939, + "step": 7373 + }, + { + "epoch": 1.9581728854069844, + "grad_norm": 1.395586955333931, + "learning_rate": 2.4359497401758026e-08, + "loss": 0.23327934741973877, + "step": 7374 + }, + { + "epoch": 1.9584384543885274, + "grad_norm": 1.0722904974981595, + "learning_rate": 2.405417469221183e-08, + "loss": 0.18830639123916626, + "step": 7375 + }, + { + "epoch": 1.9587040233700703, + "grad_norm": 1.284092871282835, + "learning_rate": 2.3750775227364686e-08, + "loss": 0.2558823227882385, + "step": 7376 + }, + { + "epoch": 1.9589695923516133, + "grad_norm": 1.2598399224501151, + "learning_rate": 2.3449299065710917e-08, + "loss": 0.24241580069065094, + "step": 7377 + }, + { + "epoch": 1.9592351613331562, + "grad_norm": 1.1684337819721369, + "learning_rate": 2.3149746265368478e-08, + "loss": 0.21678534150123596, + "step": 7378 + }, + { + "epoch": 1.9595007303146992, + "grad_norm": 1.2804084693654512, + "learning_rate": 2.2852116884088947e-08, + "loss": 0.20956794917583466, + "step": 7379 + }, + { + "epoch": 1.9597662992962421, + "grad_norm": 1.2682321373225172, + "learning_rate": 2.2556410979253095e-08, + "loss": 0.2185555249452591, + "step": 7380 + }, + { + "epoch": 1.960031868277785, + "grad_norm": 1.3369178147645102, + "learning_rate": 2.226262860786643e-08, + "loss": 0.21802933514118195, + "step": 7381 + }, + { + "epoch": 1.960297437259328, + "grad_norm": 1.4565773631347612, + "learning_rate": 2.1970769826570317e-08, + "loss": 0.22842684388160706, + "step": 7382 + }, + { + "epoch": 1.960563006240871, + "grad_norm": 1.2737807469252465, + "learning_rate": 2.1680834691628627e-08, + "loss": 0.23380814492702484, + "step": 7383 + }, + { + "epoch": 1.960828575222414, + "grad_norm": 1.311531421948895, + "learning_rate": 2.1392823258938877e-08, + "loss": 0.23476335406303406, + "step": 7384 + }, + { + "epoch": 1.961094144203957, + "grad_norm": 1.2100451325455786, + "learning_rate": 2.110673558402554e-08, + "loss": 0.19657662510871887, + "step": 7385 + }, + { + "epoch": 1.9613597131854998, + "grad_norm": 1.191542044024077, + "learning_rate": 2.0822571722044494e-08, + "loss": 0.1724000722169876, + "step": 7386 + }, + { + "epoch": 1.9616252821670428, + "grad_norm": 1.3535695538712786, + "learning_rate": 2.0540331727777475e-08, + "loss": 0.22960031032562256, + "step": 7387 + }, + { + "epoch": 1.9618908511485857, + "grad_norm": 1.4028518726902017, + "learning_rate": 2.0260015655637623e-08, + "loss": 0.2601638436317444, + "step": 7388 + }, + { + "epoch": 1.9621564201301287, + "grad_norm": 1.3907771240802078, + "learning_rate": 1.998162355966726e-08, + "loss": 0.2562445402145386, + "step": 7389 + }, + { + "epoch": 1.9624219891116716, + "grad_norm": 1.1881922077977833, + "learning_rate": 1.9705155493535688e-08, + "loss": 0.20073221623897552, + "step": 7390 + }, + { + "epoch": 1.9626875580932146, + "grad_norm": 1.2076860773847395, + "learning_rate": 1.9430611510544707e-08, + "loss": 0.18454071879386902, + "step": 7391 + }, + { + "epoch": 1.9629531270747576, + "grad_norm": 1.1878203901407238, + "learning_rate": 1.915799166362087e-08, + "loss": 0.18515023589134216, + "step": 7392 + }, + { + "epoch": 1.9632186960563005, + "grad_norm": 1.3323308983960227, + "learning_rate": 1.8887296005323242e-08, + "loss": 0.25658512115478516, + "step": 7393 + }, + { + "epoch": 1.9634842650378435, + "grad_norm": 1.4122913637661163, + "learning_rate": 1.861852458783897e-08, + "loss": 0.2219933569431305, + "step": 7394 + }, + { + "epoch": 1.9637498340193864, + "grad_norm": 1.3005286775146463, + "learning_rate": 1.8351677462983276e-08, + "loss": 0.24949616193771362, + "step": 7395 + }, + { + "epoch": 1.9640154030009294, + "grad_norm": 1.4026906711741571, + "learning_rate": 1.808675468220167e-08, + "loss": 0.24348726868629456, + "step": 7396 + }, + { + "epoch": 1.9642809719824723, + "grad_norm": 1.3848607909391346, + "learning_rate": 1.782375629656885e-08, + "loss": 0.2329033762216568, + "step": 7397 + }, + { + "epoch": 1.9645465409640153, + "grad_norm": 1.2075544796662319, + "learning_rate": 1.7562682356786488e-08, + "loss": 0.22265426814556122, + "step": 7398 + }, + { + "epoch": 1.9648121099455582, + "grad_norm": 1.2895787739524316, + "learning_rate": 1.730353291318654e-08, + "loss": 0.24438990652561188, + "step": 7399 + }, + { + "epoch": 1.9650776789271012, + "grad_norm": 1.3518107746112518, + "learning_rate": 1.704630801573015e-08, + "loss": 0.2632136642932892, + "step": 7400 + }, + { + "epoch": 1.9653432479086441, + "grad_norm": 1.3377019916165274, + "learning_rate": 1.6791007714008766e-08, + "loss": 0.22230927646160126, + "step": 7401 + }, + { + "epoch": 1.965608816890187, + "grad_norm": 1.3577982430958546, + "learning_rate": 1.653763205723968e-08, + "loss": 0.26317098736763, + "step": 7402 + }, + { + "epoch": 1.96587438587173, + "grad_norm": 1.3261620865973216, + "learning_rate": 1.628618109427049e-08, + "loss": 0.23205846548080444, + "step": 7403 + }, + { + "epoch": 1.966139954853273, + "grad_norm": 1.1507090645553337, + "learning_rate": 1.6036654873579084e-08, + "loss": 0.202583909034729, + "step": 7404 + }, + { + "epoch": 1.966405523834816, + "grad_norm": 1.3959078486467311, + "learning_rate": 1.5789053443270308e-08, + "loss": 0.2579672038555145, + "step": 7405 + }, + { + "epoch": 1.966671092816359, + "grad_norm": 1.4293268160842907, + "learning_rate": 1.5543376851080428e-08, + "loss": 0.27483606338500977, + "step": 7406 + }, + { + "epoch": 1.966936661797902, + "grad_norm": 1.6466914863601023, + "learning_rate": 1.5299625144370444e-08, + "loss": 0.22510311007499695, + "step": 7407 + }, + { + "epoch": 1.967202230779445, + "grad_norm": 1.3926470224592478, + "learning_rate": 1.505779837013499e-08, + "loss": 0.24941131472587585, + "step": 7408 + }, + { + "epoch": 1.967467799760988, + "grad_norm": 1.316826202799614, + "learning_rate": 1.481789657499344e-08, + "loss": 0.22301170229911804, + "step": 7409 + }, + { + "epoch": 1.967733368742531, + "grad_norm": 1.4513024231529628, + "learning_rate": 1.4579919805198795e-08, + "loss": 0.23045194149017334, + "step": 7410 + }, + { + "epoch": 1.9679989377240739, + "grad_norm": 1.2632313332378347, + "learning_rate": 1.4343868106627689e-08, + "loss": 0.25892990827560425, + "step": 7411 + }, + { + "epoch": 1.9682645067056168, + "grad_norm": 1.316940344896203, + "learning_rate": 1.4109741524788167e-08, + "loss": 0.23086567223072052, + "step": 7412 + }, + { + "epoch": 1.9685300756871598, + "grad_norm": 1.2838593122102535, + "learning_rate": 1.3877540104818566e-08, + "loss": 0.2514735460281372, + "step": 7413 + }, + { + "epoch": 1.9687956446687027, + "grad_norm": 1.2787980812943278, + "learning_rate": 1.3647263891484187e-08, + "loss": 0.21824213862419128, + "step": 7414 + }, + { + "epoch": 1.9690612136502457, + "grad_norm": 1.3351479110439386, + "learning_rate": 1.3418912929178407e-08, + "loss": 0.2262609452009201, + "step": 7415 + }, + { + "epoch": 1.9693267826317886, + "grad_norm": 1.2373165426791106, + "learning_rate": 1.3192487261926013e-08, + "loss": 0.23119492828845978, + "step": 7416 + }, + { + "epoch": 1.9695923516133316, + "grad_norm": 1.2213219567044962, + "learning_rate": 1.2967986933378751e-08, + "loss": 0.20173534750938416, + "step": 7417 + }, + { + "epoch": 1.9698579205948745, + "grad_norm": 1.3102471335629409, + "learning_rate": 1.2745411986816447e-08, + "loss": 0.2212662547826767, + "step": 7418 + }, + { + "epoch": 1.9701234895764175, + "grad_norm": 1.2461352597734543, + "learning_rate": 1.2524762465151442e-08, + "loss": 0.21990706026554108, + "step": 7419 + }, + { + "epoch": 1.9703890585579604, + "grad_norm": 1.2130065240866306, + "learning_rate": 1.2306038410919707e-08, + "loss": 0.18648189306259155, + "step": 7420 + }, + { + "epoch": 1.9706546275395034, + "grad_norm": 1.334350070832243, + "learning_rate": 1.2089239866289737e-08, + "loss": 0.23273484408855438, + "step": 7421 + }, + { + "epoch": 1.9709201965210463, + "grad_norm": 1.3083344252475524, + "learning_rate": 1.1874366873059206e-08, + "loss": 0.21514324843883514, + "step": 7422 + }, + { + "epoch": 1.9711857655025893, + "grad_norm": 1.2628839077455776, + "learning_rate": 1.1661419472650538e-08, + "loss": 0.2544926106929779, + "step": 7423 + }, + { + "epoch": 1.9714513344841322, + "grad_norm": 1.1881271398224822, + "learning_rate": 1.1450397706119776e-08, + "loss": 0.235082745552063, + "step": 7424 + }, + { + "epoch": 1.9717169034656752, + "grad_norm": 1.3712056139426412, + "learning_rate": 1.1241301614147715e-08, + "loss": 0.24777358770370483, + "step": 7425 + }, + { + "epoch": 1.9719824724472181, + "grad_norm": 1.5271853101134352, + "learning_rate": 1.1034131237045443e-08, + "loss": 0.23714174330234528, + "step": 7426 + }, + { + "epoch": 1.972248041428761, + "grad_norm": 1.3430700979817631, + "learning_rate": 1.0828886614754342e-08, + "loss": 0.24665668606758118, + "step": 7427 + }, + { + "epoch": 1.972513610410304, + "grad_norm": 1.3931055934155485, + "learning_rate": 1.062556778684276e-08, + "loss": 0.23421131074428558, + "step": 7428 + }, + { + "epoch": 1.972779179391847, + "grad_norm": 1.274566697934482, + "learning_rate": 1.0424174792508234e-08, + "loss": 0.23443526029586792, + "step": 7429 + }, + { + "epoch": 1.97304474837339, + "grad_norm": 1.3315316306417777, + "learning_rate": 1.0224707670576373e-08, + "loss": 0.24177192151546478, + "step": 7430 + }, + { + "epoch": 1.973310317354933, + "grad_norm": 1.4439736433803494, + "learning_rate": 1.002716645950197e-08, + "loss": 0.20957472920417786, + "step": 7431 + }, + { + "epoch": 1.9735758863364758, + "grad_norm": 1.2252184749081894, + "learning_rate": 9.831551197370116e-09, + "loss": 0.21594710648059845, + "step": 7432 + }, + { + "epoch": 1.9738414553180188, + "grad_norm": 1.4445839220306718, + "learning_rate": 9.637861921891756e-09, + "loss": 0.2372155487537384, + "step": 7433 + }, + { + "epoch": 1.974107024299562, + "grad_norm": 1.295551996082086, + "learning_rate": 9.446098670408132e-09, + "loss": 0.211237370967865, + "step": 7434 + }, + { + "epoch": 1.974372593281105, + "grad_norm": 1.3006326416512255, + "learning_rate": 9.256261479888562e-09, + "loss": 0.25123757123947144, + "step": 7435 + }, + { + "epoch": 1.9746381622626479, + "grad_norm": 1.2670719422156809, + "learning_rate": 9.068350386932655e-09, + "loss": 0.23048831522464752, + "step": 7436 + }, + { + "epoch": 1.9749037312441908, + "grad_norm": 1.2157385411321804, + "learning_rate": 8.882365427765883e-09, + "loss": 0.22923544049263, + "step": 7437 + }, + { + "epoch": 1.9751693002257338, + "grad_norm": 1.1040485462060259, + "learning_rate": 8.698306638245114e-09, + "loss": 0.199529767036438, + "step": 7438 + }, + { + "epoch": 1.9754348692072767, + "grad_norm": 1.314383264088006, + "learning_rate": 8.516174053854187e-09, + "loss": 0.22778059542179108, + "step": 7439 + }, + { + "epoch": 1.9757004381888197, + "grad_norm": 1.3428968973890816, + "learning_rate": 8.335967709706128e-09, + "loss": 0.22807848453521729, + "step": 7440 + }, + { + "epoch": 1.9759660071703626, + "grad_norm": 1.3347725648799278, + "learning_rate": 8.157687640543143e-09, + "loss": 0.24764932692050934, + "step": 7441 + }, + { + "epoch": 1.9762315761519056, + "grad_norm": 1.376463462320243, + "learning_rate": 7.98133388073552e-09, + "loss": 0.22213312983512878, + "step": 7442 + }, + { + "epoch": 1.9764971451334485, + "grad_norm": 1.2799794398059858, + "learning_rate": 7.806906464281617e-09, + "loss": 0.22822709381580353, + "step": 7443 + }, + { + "epoch": 1.9767627141149915, + "grad_norm": 1.2148981447749936, + "learning_rate": 7.634405424808977e-09, + "loss": 0.2236599326133728, + "step": 7444 + }, + { + "epoch": 1.9770282830965344, + "grad_norm": 1.263255403192069, + "learning_rate": 7.463830795574334e-09, + "loss": 0.20294487476348877, + "step": 7445 + }, + { + "epoch": 1.9772938520780774, + "grad_norm": 1.3034015114742201, + "learning_rate": 7.295182609461382e-09, + "loss": 0.2187870740890503, + "step": 7446 + }, + { + "epoch": 1.9775594210596203, + "grad_norm": 1.362800468373944, + "learning_rate": 7.128460898984113e-09, + "loss": 0.2629002630710602, + "step": 7447 + }, + { + "epoch": 1.9778249900411633, + "grad_norm": 1.3155096560899557, + "learning_rate": 6.963665696285704e-09, + "loss": 0.24024136364459991, + "step": 7448 + }, + { + "epoch": 1.9780905590227063, + "grad_norm": 1.240780926418524, + "learning_rate": 6.800797033134077e-09, + "loss": 0.22334401309490204, + "step": 7449 + }, + { + "epoch": 1.9783561280042492, + "grad_norm": 1.2853076050759633, + "learning_rate": 6.639854940930779e-09, + "loss": 0.21535055339336395, + "step": 7450 + }, + { + "epoch": 1.9786216969857922, + "grad_norm": 1.3182931470109147, + "learning_rate": 6.480839450703214e-09, + "loss": 0.26096785068511963, + "step": 7451 + }, + { + "epoch": 1.978887265967335, + "grad_norm": 1.2393293544951642, + "learning_rate": 6.323750593106859e-09, + "loss": 0.22461384534835815, + "step": 7452 + }, + { + "epoch": 1.979152834948878, + "grad_norm": 1.2999818118404687, + "learning_rate": 6.168588398426378e-09, + "loss": 0.24372713267803192, + "step": 7453 + }, + { + "epoch": 1.979418403930421, + "grad_norm": 1.2743158428703243, + "learning_rate": 6.015352896576732e-09, + "loss": 0.19544872641563416, + "step": 7454 + }, + { + "epoch": 1.979683972911964, + "grad_norm": 1.1957228310016947, + "learning_rate": 5.864044117097623e-09, + "loss": 0.22004768252372742, + "step": 7455 + }, + { + "epoch": 1.979949541893507, + "grad_norm": 1.3624679399119848, + "learning_rate": 5.714662089162381e-09, + "loss": 0.2509492337703705, + "step": 7456 + }, + { + "epoch": 1.9802151108750499, + "grad_norm": 1.1563599654889156, + "learning_rate": 5.567206841567974e-09, + "loss": 0.19315078854560852, + "step": 7457 + }, + { + "epoch": 1.9804806798565928, + "grad_norm": 1.1652222675857882, + "learning_rate": 5.421678402741659e-09, + "loss": 0.20722024142742157, + "step": 7458 + }, + { + "epoch": 1.9807462488381358, + "grad_norm": 1.2430974429352135, + "learning_rate": 5.278076800742105e-09, + "loss": 0.2041238397359848, + "step": 7459 + }, + { + "epoch": 1.9810118178196787, + "grad_norm": 1.226308526828602, + "learning_rate": 5.136402063251611e-09, + "loss": 0.21889238059520721, + "step": 7460 + }, + { + "epoch": 1.9812773868012217, + "grad_norm": 1.2925316754685727, + "learning_rate": 4.996654217584995e-09, + "loss": 0.23580557107925415, + "step": 7461 + }, + { + "epoch": 1.9815429557827646, + "grad_norm": 1.5912986799887796, + "learning_rate": 4.858833290684039e-09, + "loss": 0.24967315793037415, + "step": 7462 + }, + { + "epoch": 1.9818085247643076, + "grad_norm": 1.3642305983011473, + "learning_rate": 4.722939309116381e-09, + "loss": 0.21802274882793427, + "step": 7463 + }, + { + "epoch": 1.9820740937458505, + "grad_norm": 1.2778589071361273, + "learning_rate": 4.588972299084393e-09, + "loss": 0.2641376554965973, + "step": 7464 + }, + { + "epoch": 1.9823396627273935, + "grad_norm": 1.181293128126433, + "learning_rate": 4.456932286412974e-09, + "loss": 0.20166629552841187, + "step": 7465 + }, + { + "epoch": 1.9826052317089364, + "grad_norm": 1.3531318882305197, + "learning_rate": 4.3268192965573164e-09, + "loss": 0.22796592116355896, + "step": 7466 + }, + { + "epoch": 1.9828708006904794, + "grad_norm": 1.1849961491022751, + "learning_rate": 4.19863335460402e-09, + "loss": 0.19833455979824066, + "step": 7467 + }, + { + "epoch": 1.9831363696720223, + "grad_norm": 1.273561592311718, + "learning_rate": 4.07237448526554e-09, + "loss": 0.23009257018566132, + "step": 7468 + }, + { + "epoch": 1.9834019386535653, + "grad_norm": 1.2188380225442625, + "learning_rate": 3.9480427128812945e-09, + "loss": 0.22418440878391266, + "step": 7469 + }, + { + "epoch": 1.9836675076351082, + "grad_norm": 1.2878640211544259, + "learning_rate": 3.825638061421e-09, + "loss": 0.2015800178050995, + "step": 7470 + }, + { + "epoch": 1.9839330766166512, + "grad_norm": 1.2488639013131106, + "learning_rate": 3.705160554485776e-09, + "loss": 0.22166767716407776, + "step": 7471 + }, + { + "epoch": 1.9841986455981941, + "grad_norm": 1.476152466944419, + "learning_rate": 3.5866102152981586e-09, + "loss": 0.3154509961605072, + "step": 7472 + }, + { + "epoch": 1.984464214579737, + "grad_norm": 1.3338840715084874, + "learning_rate": 3.4699870667165292e-09, + "loss": 0.25891417264938354, + "step": 7473 + }, + { + "epoch": 1.98472978356128, + "grad_norm": 1.2984805204003045, + "learning_rate": 3.355291131222904e-09, + "loss": 0.24837851524353027, + "step": 7474 + }, + { + "epoch": 1.984995352542823, + "grad_norm": 1.2923319105031845, + "learning_rate": 3.2425224309307055e-09, + "loss": 0.24254213273525238, + "step": 7475 + }, + { + "epoch": 1.985260921524366, + "grad_norm": 1.3479980629574153, + "learning_rate": 3.1316809875781005e-09, + "loss": 0.24822884798049927, + "step": 7476 + }, + { + "epoch": 1.985526490505909, + "grad_norm": 1.2515754926310612, + "learning_rate": 3.022766822535772e-09, + "loss": 0.19553488492965698, + "step": 7477 + }, + { + "epoch": 1.9857920594874519, + "grad_norm": 1.289139949226706, + "learning_rate": 2.9157799568002576e-09, + "loss": 0.24758943915367126, + "step": 7478 + }, + { + "epoch": 1.9860576284689948, + "grad_norm": 1.3254058481790592, + "learning_rate": 2.810720410998391e-09, + "loss": 0.22947746515274048, + "step": 7479 + }, + { + "epoch": 1.9863231974505378, + "grad_norm": 1.1718425441422213, + "learning_rate": 2.7075882053828605e-09, + "loss": 0.20573696494102478, + "step": 7480 + }, + { + "epoch": 1.9865887664320807, + "grad_norm": 1.3248019948595686, + "learning_rate": 2.606383359837761e-09, + "loss": 0.2547800838947296, + "step": 7481 + }, + { + "epoch": 1.9868543354136237, + "grad_norm": 1.3239089800396548, + "learning_rate": 2.507105893874151e-09, + "loss": 0.22227191925048828, + "step": 7482 + }, + { + "epoch": 1.9871199043951666, + "grad_norm": 1.379027057566697, + "learning_rate": 2.409755826630056e-09, + "loss": 0.24687603116035461, + "step": 7483 + }, + { + "epoch": 1.9873854733767096, + "grad_norm": 1.3626347731044859, + "learning_rate": 2.3143331768749053e-09, + "loss": 0.23577818274497986, + "step": 7484 + }, + { + "epoch": 1.9876510423582525, + "grad_norm": 1.2429616783261994, + "learning_rate": 2.2208379630039858e-09, + "loss": 0.23012465238571167, + "step": 7485 + }, + { + "epoch": 1.9879166113397955, + "grad_norm": 1.2667278392117014, + "learning_rate": 2.129270203043987e-09, + "loss": 0.21479251980781555, + "step": 7486 + }, + { + "epoch": 1.9881821803213384, + "grad_norm": 1.2419157692275362, + "learning_rate": 2.039629914645236e-09, + "loss": 0.24436548352241516, + "step": 7487 + }, + { + "epoch": 1.9884477493028814, + "grad_norm": 1.3198752588445606, + "learning_rate": 1.951917115091684e-09, + "loss": 0.22225134074687958, + "step": 7488 + }, + { + "epoch": 1.9887133182844243, + "grad_norm": 1.4243538533938824, + "learning_rate": 1.8661318212920275e-09, + "loss": 0.22320827841758728, + "step": 7489 + }, + { + "epoch": 1.9889788872659673, + "grad_norm": 1.3025984911365984, + "learning_rate": 1.7822740497852597e-09, + "loss": 0.2317924201488495, + "step": 7490 + }, + { + "epoch": 1.9892444562475102, + "grad_norm": 1.370204940685918, + "learning_rate": 1.700343816738448e-09, + "loss": 0.2275170385837555, + "step": 7491 + }, + { + "epoch": 1.9895100252290532, + "grad_norm": 1.652167024814656, + "learning_rate": 1.6203411379456247e-09, + "loss": 0.24541540443897247, + "step": 7492 + }, + { + "epoch": 1.9897755942105961, + "grad_norm": 1.311164124852614, + "learning_rate": 1.5422660288322288e-09, + "loss": 0.23041896522045135, + "step": 7493 + }, + { + "epoch": 1.990041163192139, + "grad_norm": 1.301476042648128, + "learning_rate": 1.4661185044484438e-09, + "loss": 0.22362437844276428, + "step": 7494 + }, + { + "epoch": 1.990306732173682, + "grad_norm": 1.1872303288026824, + "learning_rate": 1.3918985794747486e-09, + "loss": 0.22082944214344025, + "step": 7495 + }, + { + "epoch": 1.990572301155225, + "grad_norm": 1.2985516009859217, + "learning_rate": 1.3196062682208078e-09, + "loss": 0.2210516780614853, + "step": 7496 + }, + { + "epoch": 1.990837870136768, + "grad_norm": 1.2609254238659025, + "learning_rate": 1.249241584623251e-09, + "loss": 0.21891455352306366, + "step": 7497 + }, + { + "epoch": 1.991103439118311, + "grad_norm": 1.2687100133579783, + "learning_rate": 1.1808045422478932e-09, + "loss": 0.23363247513771057, + "step": 7498 + }, + { + "epoch": 1.9913690080998538, + "grad_norm": 1.188481032582791, + "learning_rate": 1.1142951542875146e-09, + "loss": 0.20676104724407196, + "step": 7499 + }, + { + "epoch": 1.9916345770813968, + "grad_norm": 1.2983095103442552, + "learning_rate": 1.0497134335663018e-09, + "loss": 0.23037788271903992, + "step": 7500 + } + ], + "logging_steps": 1, + "max_steps": 7532, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5680246279962624.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-7500/training_args.bin b/checkpoint-7500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c0c92feb0b44b3362d1d98054f06b20cb57a4b7 --- /dev/null +++ b/checkpoint-7500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89857e5ce3d813c9a03825c43337cd93b1e4a595acca4834e9e4f1a47312d609 +size 6968 diff --git a/checkpoint-7500/zero_to_fp32.py b/checkpoint-7500/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-7500/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-7532/README.md b/checkpoint-7532/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-7532/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-7532/adapter_config.json b/checkpoint-7532/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e68fb35d77856a51c03fe5e97700fc3194faedb5 --- /dev/null +++ b/checkpoint-7532/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.4.mlp.up_proj", + "k_proj", + "layers.16.mlp.up_proj", + "layers.15.mlp.down_proj", + "layers.2.mlp.down_proj", + "layers.14.mlp.up_proj", + "layers.26.mlp.down_proj", + "layers.1.mlp.up_proj", + "layers.14.mlp.down_proj", + "layers.20.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.12.mlp.up_proj", + "layers.10.mlp.up_proj", + "layers.12.mlp.gate_proj", + "layers.22.mlp.down_proj", + "layers.9.mlp.up_proj", + "layers.9.mlp.gate_proj", + "layers.19.mlp.up_proj", + "layers.22.mlp.gate_proj", + "v_proj", + "layers.15.mlp.up_proj", + "layers.21.mlp.up_proj", + "layers.6.mlp.up_proj", + "layers.0.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.11.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.17.mlp.gate_proj", + "layers.1.mlp.down_proj", + "layers.21.mlp.down_proj", + "layers.27.mlp.up_proj", + "layers.16.mlp.gate_proj", + "q_proj", + "layers.10.mlp.down_proj", + "layers.7.mlp.gate_proj", + "layers.0.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.22.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.25.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.25.mlp.down_proj", + "layers.13.mlp.down_proj", + "layers.2.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.23.mlp.gate_proj", + "layers.3.mlp.gate_proj", + "layers.3.mlp.up_proj", + "layers.23.mlp.down_proj", + "o_proj", + "layers.6.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.20.mlp.gate_proj", + "layers.26.mlp.gate_proj", + "layers.24.mlp.down_proj", + "layers.20.mlp.down_proj", + "layers.24.mlp.up_proj", + "layers.0.mlp.gate_proj", + "layers.15.mlp.gate_proj", + "layers.4.mlp.down_proj", + "layers.8.mlp.gate_proj", + "layers.12.mlp.down_proj", + "layers.8.mlp.down_proj", + "layers.25.mlp.gate_proj", + "layers.3.mlp.down_proj", + "layers.11.mlp.down_proj", + "layers.6.mlp.down_proj", + "layers.16.mlp.down_proj", + "layers.26.mlp.up_proj", + "layers.19.mlp.gate_proj", + "layers.10.mlp.gate_proj", + "layers.23.mlp.up_proj", + "layers.21.mlp.gate_proj", + "layers.13.mlp.up_proj", + "layers.18.mlp.gate_proj", + "layers.17.mlp.up_proj", + "layers.5.mlp.down_proj", + "layers.24.mlp.gate_proj", + "layers.4.mlp.gate_proj", + "layers.19.mlp.down_proj", + "layers.27.mlp.gate_proj", + "layers.18.mlp.down_proj", + "layers.7.mlp.up_proj", + "layers.27.mlp.down_proj", + "layers.2.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.11.mlp.gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-7532/adapter_model.safetensors b/checkpoint-7532/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cb96688656e8f6e53f5d2b043739353eb69aa2d --- /dev/null +++ b/checkpoint-7532/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1994850f0e38a79ff5d308061112090688fcb346fcd1968cbb0e1c4b6b6f7e83 +size 323020440 diff --git a/checkpoint-7532/chat_template.jinja b/checkpoint-7532/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-7532/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-7532/global_step7532/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-7532/global_step7532/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1fb646a7168b932b74d6ccf3abba5811449d6db --- /dev/null +++ b/checkpoint-7532/global_step7532/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a114923566777bd2bb1f92c5a9a01cb734e1e92131ca719d0551f623f8bba3 +size 1937772272 diff --git a/checkpoint-7532/global_step7532/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-7532/global_step7532/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3456985aa21b743c766c406c837e2ad55cf72fa5 --- /dev/null +++ b/checkpoint-7532/global_step7532/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e17bb5daf17b1444b9e4c99609095a0c5aa2e25fa3f58acd56c41df65fd1f56 +size 460630 diff --git a/checkpoint-7532/latest b/checkpoint-7532/latest new file mode 100644 index 0000000000000000000000000000000000000000..6101d50d65bb16111e5f54fd7083f46d9c714dd7 --- /dev/null +++ b/checkpoint-7532/latest @@ -0,0 +1 @@ +global_step7532 \ No newline at end of file diff --git a/checkpoint-7532/processor_config.json b/checkpoint-7532/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-7532/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-7532/rng_state.pth b/checkpoint-7532/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8fa79fce75608b20521385f93739e31af3ce37f5 --- /dev/null +++ b/checkpoint-7532/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:662c45da342a87d9d686ed8822b1be67a15e9b367e7308770ffc674602629c94 +size 14244 diff --git a/checkpoint-7532/scheduler.pt b/checkpoint-7532/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e409523b222cdc8619829e8b1782793a44024d2 --- /dev/null +++ b/checkpoint-7532/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3961f8838a4fd8e4153a0a27e6d277ed65f741910ed0e652e83728030c9ccb9b +size 1000 diff --git a/checkpoint-7532/tokenizer.json b/checkpoint-7532/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-7532/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-7532/tokenizer_config.json b/checkpoint-7532/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-7532/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-7532/trainer_state.json b/checkpoint-7532/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..db9b9d82d7db7e17d1333af17620ec7fb04bed3e --- /dev/null +++ b/checkpoint-7532/trainer_state.json @@ -0,0 +1,52758 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 7532, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002655689815429558, + "grad_norm": 1.8881195832990014, + "learning_rate": 0.0, + "loss": 1.1502833366394043, + "step": 1 + }, + { + "epoch": 0.0005311379630859116, + "grad_norm": 1.77718785062999, + "learning_rate": 5.3050397877984086e-08, + "loss": 1.1698756217956543, + "step": 2 + }, + { + "epoch": 0.0007967069446288673, + "grad_norm": 1.6766718507101437, + "learning_rate": 1.0610079575596817e-07, + "loss": 1.1060130596160889, + "step": 3 + }, + { + "epoch": 0.0010622759261718232, + "grad_norm": 1.876053682165919, + "learning_rate": 1.5915119363395226e-07, + "loss": 1.1075276136398315, + "step": 4 + }, + { + "epoch": 0.001327844907714779, + "grad_norm": 1.88228417845019, + "learning_rate": 2.1220159151193635e-07, + "loss": 1.2153511047363281, + "step": 5 + }, + { + "epoch": 0.0015934138892577346, + "grad_norm": 1.9273368394845023, + "learning_rate": 2.6525198938992043e-07, + "loss": 1.1400426626205444, + "step": 6 + }, + { + "epoch": 0.0018589828708006906, + "grad_norm": 1.904814034912833, + "learning_rate": 3.183023872679045e-07, + "loss": 1.2070660591125488, + "step": 7 + }, + { + "epoch": 0.0021245518523436463, + "grad_norm": 1.7346381008587795, + "learning_rate": 3.713527851458886e-07, + "loss": 1.1614588499069214, + "step": 8 + }, + { + "epoch": 0.002390120833886602, + "grad_norm": 1.817032704311048, + "learning_rate": 4.244031830238727e-07, + "loss": 1.1739476919174194, + "step": 9 + }, + { + "epoch": 0.002655689815429558, + "grad_norm": 1.8291974144657501, + "learning_rate": 4.774535809018568e-07, + "loss": 1.1559171676635742, + "step": 10 + }, + { + "epoch": 0.0029212587969725135, + "grad_norm": 2.0039010539208744, + "learning_rate": 5.305039787798409e-07, + "loss": 1.2086225748062134, + "step": 11 + }, + { + "epoch": 0.0031868277785154693, + "grad_norm": 1.876026657216244, + "learning_rate": 5.83554376657825e-07, + "loss": 1.227709174156189, + "step": 12 + }, + { + "epoch": 0.003452396760058425, + "grad_norm": 2.0245192813139825, + "learning_rate": 6.36604774535809e-07, + "loss": 1.255577564239502, + "step": 13 + }, + { + "epoch": 0.003717965741601381, + "grad_norm": 1.8641260357218605, + "learning_rate": 6.896551724137931e-07, + "loss": 1.1953760385513306, + "step": 14 + }, + { + "epoch": 0.0039835347231443365, + "grad_norm": 1.9079733249323254, + "learning_rate": 7.427055702917772e-07, + "loss": 1.1325336694717407, + "step": 15 + }, + { + "epoch": 0.004249103704687293, + "grad_norm": 1.8230190567516942, + "learning_rate": 7.957559681697613e-07, + "loss": 1.232974648475647, + "step": 16 + }, + { + "epoch": 0.004514672686230248, + "grad_norm": 1.8532380418447003, + "learning_rate": 8.488063660477454e-07, + "loss": 1.1527395248413086, + "step": 17 + }, + { + "epoch": 0.004780241667773204, + "grad_norm": 1.986294801704247, + "learning_rate": 9.018567639257295e-07, + "loss": 1.151026964187622, + "step": 18 + }, + { + "epoch": 0.00504581064931616, + "grad_norm": 1.8048967405226255, + "learning_rate": 9.549071618037136e-07, + "loss": 1.155288815498352, + "step": 19 + }, + { + "epoch": 0.005311379630859116, + "grad_norm": 2.1631450267380767, + "learning_rate": 1.0079575596816979e-06, + "loss": 1.183434009552002, + "step": 20 + }, + { + "epoch": 0.005576948612402072, + "grad_norm": 1.88758019498484, + "learning_rate": 1.0610079575596817e-06, + "loss": 1.161030650138855, + "step": 21 + }, + { + "epoch": 0.005842517593945027, + "grad_norm": 1.9605989446426395, + "learning_rate": 1.1140583554376658e-06, + "loss": 1.123382806777954, + "step": 22 + }, + { + "epoch": 0.006108086575487983, + "grad_norm": 2.2042020560619306, + "learning_rate": 1.16710875331565e-06, + "loss": 1.238707423210144, + "step": 23 + }, + { + "epoch": 0.0063736555570309385, + "grad_norm": 2.289866056000848, + "learning_rate": 1.220159151193634e-06, + "loss": 1.2058464288711548, + "step": 24 + }, + { + "epoch": 0.006639224538573895, + "grad_norm": 2.724214643619529, + "learning_rate": 1.273209549071618e-06, + "loss": 1.2351092100143433, + "step": 25 + }, + { + "epoch": 0.00690479352011685, + "grad_norm": 2.5088520951326028, + "learning_rate": 1.3262599469496024e-06, + "loss": 1.1739860773086548, + "step": 26 + }, + { + "epoch": 0.007170362501659806, + "grad_norm": 2.3243798435890155, + "learning_rate": 1.3793103448275862e-06, + "loss": 1.1407617330551147, + "step": 27 + }, + { + "epoch": 0.007435931483202762, + "grad_norm": 2.533007430657115, + "learning_rate": 1.4323607427055705e-06, + "loss": 1.1844531297683716, + "step": 28 + }, + { + "epoch": 0.007701500464745718, + "grad_norm": 2.4702075978733804, + "learning_rate": 1.4854111405835544e-06, + "loss": 1.1293678283691406, + "step": 29 + }, + { + "epoch": 0.007967069446288673, + "grad_norm": 3.0873404038783963, + "learning_rate": 1.5384615384615387e-06, + "loss": 1.1310899257659912, + "step": 30 + }, + { + "epoch": 0.00823263842783163, + "grad_norm": 2.7098364862500013, + "learning_rate": 1.5915119363395226e-06, + "loss": 1.1015795469284058, + "step": 31 + }, + { + "epoch": 0.008498207409374585, + "grad_norm": 2.8074949689582476, + "learning_rate": 1.6445623342175069e-06, + "loss": 1.0756056308746338, + "step": 32 + }, + { + "epoch": 0.00876377639091754, + "grad_norm": 3.1563034348975676, + "learning_rate": 1.6976127320954908e-06, + "loss": 1.1496126651763916, + "step": 33 + }, + { + "epoch": 0.009029345372460496, + "grad_norm": 2.842390896608423, + "learning_rate": 1.750663129973475e-06, + "loss": 1.203465461730957, + "step": 34 + }, + { + "epoch": 0.009294914354003453, + "grad_norm": 2.6747271223349753, + "learning_rate": 1.803713527851459e-06, + "loss": 1.0613923072814941, + "step": 35 + }, + { + "epoch": 0.009560483335546408, + "grad_norm": 2.146709655536541, + "learning_rate": 1.8567639257294432e-06, + "loss": 1.06027090549469, + "step": 36 + }, + { + "epoch": 0.009826052317089363, + "grad_norm": 1.9942495143394863, + "learning_rate": 1.909814323607427e-06, + "loss": 1.0508522987365723, + "step": 37 + }, + { + "epoch": 0.01009162129863232, + "grad_norm": 2.1704927298148107, + "learning_rate": 1.9628647214854114e-06, + "loss": 1.0353929996490479, + "step": 38 + }, + { + "epoch": 0.010357190280175276, + "grad_norm": 1.8252380884349957, + "learning_rate": 2.0159151193633957e-06, + "loss": 0.9974027276039124, + "step": 39 + }, + { + "epoch": 0.010622759261718231, + "grad_norm": 1.7188806752497834, + "learning_rate": 2.0689655172413796e-06, + "loss": 1.0849467515945435, + "step": 40 + }, + { + "epoch": 0.010888328243261186, + "grad_norm": 1.3692667089198218, + "learning_rate": 2.1220159151193635e-06, + "loss": 1.005434274673462, + "step": 41 + }, + { + "epoch": 0.011153897224804143, + "grad_norm": 1.3465343019370317, + "learning_rate": 2.1750663129973478e-06, + "loss": 1.052631139755249, + "step": 42 + }, + { + "epoch": 0.011419466206347099, + "grad_norm": 1.352421126005469, + "learning_rate": 2.2281167108753316e-06, + "loss": 0.9470957517623901, + "step": 43 + }, + { + "epoch": 0.011685035187890054, + "grad_norm": 1.2219308328594767, + "learning_rate": 2.281167108753316e-06, + "loss": 0.9865130186080933, + "step": 44 + }, + { + "epoch": 0.01195060416943301, + "grad_norm": 1.19161259271228, + "learning_rate": 2.3342175066313e-06, + "loss": 0.9405577778816223, + "step": 45 + }, + { + "epoch": 0.012216173150975966, + "grad_norm": 1.1603073869733838, + "learning_rate": 2.387267904509284e-06, + "loss": 0.9418795108795166, + "step": 46 + }, + { + "epoch": 0.012481742132518922, + "grad_norm": 1.1897328813812988, + "learning_rate": 2.440318302387268e-06, + "loss": 0.9841142892837524, + "step": 47 + }, + { + "epoch": 0.012747311114061877, + "grad_norm": 1.159720101499262, + "learning_rate": 2.4933687002652523e-06, + "loss": 0.9412609338760376, + "step": 48 + }, + { + "epoch": 0.013012880095604834, + "grad_norm": 1.1421347262548374, + "learning_rate": 2.546419098143236e-06, + "loss": 0.9239889979362488, + "step": 49 + }, + { + "epoch": 0.01327844907714779, + "grad_norm": 1.144363453746544, + "learning_rate": 2.59946949602122e-06, + "loss": 0.9212941527366638, + "step": 50 + }, + { + "epoch": 0.013544018058690745, + "grad_norm": 0.9916816911141796, + "learning_rate": 2.6525198938992047e-06, + "loss": 0.8863773345947266, + "step": 51 + }, + { + "epoch": 0.0138095870402337, + "grad_norm": 0.9890613082667745, + "learning_rate": 2.7055702917771886e-06, + "loss": 0.8990404009819031, + "step": 52 + }, + { + "epoch": 0.014075156021776657, + "grad_norm": 1.1123466462737277, + "learning_rate": 2.7586206896551725e-06, + "loss": 0.9257171154022217, + "step": 53 + }, + { + "epoch": 0.014340725003319612, + "grad_norm": 0.8689931750055545, + "learning_rate": 2.8116710875331564e-06, + "loss": 0.8239601254463196, + "step": 54 + }, + { + "epoch": 0.014606293984862568, + "grad_norm": 0.9936229603029793, + "learning_rate": 2.864721485411141e-06, + "loss": 0.8656830787658691, + "step": 55 + }, + { + "epoch": 0.014871862966405525, + "grad_norm": 1.0202371081091262, + "learning_rate": 2.917771883289125e-06, + "loss": 0.9470342397689819, + "step": 56 + }, + { + "epoch": 0.01513743194794848, + "grad_norm": 0.9663900963956384, + "learning_rate": 2.970822281167109e-06, + "loss": 0.8699859976768494, + "step": 57 + }, + { + "epoch": 0.015403000929491435, + "grad_norm": 0.940263545207204, + "learning_rate": 3.0238726790450927e-06, + "loss": 0.8668704628944397, + "step": 58 + }, + { + "epoch": 0.01566856991103439, + "grad_norm": 0.9865381848251076, + "learning_rate": 3.0769230769230774e-06, + "loss": 0.841624915599823, + "step": 59 + }, + { + "epoch": 0.015934138892577346, + "grad_norm": 0.8909972421095332, + "learning_rate": 3.1299734748010613e-06, + "loss": 0.8412661552429199, + "step": 60 + }, + { + "epoch": 0.0161997078741203, + "grad_norm": 0.8771283277278942, + "learning_rate": 3.183023872679045e-06, + "loss": 0.818957507610321, + "step": 61 + }, + { + "epoch": 0.01646527685566326, + "grad_norm": 0.9190140482494583, + "learning_rate": 3.23607427055703e-06, + "loss": 0.8030763268470764, + "step": 62 + }, + { + "epoch": 0.016730845837206215, + "grad_norm": 0.8839367067386452, + "learning_rate": 3.2891246684350138e-06, + "loss": 0.7869359850883484, + "step": 63 + }, + { + "epoch": 0.01699641481874917, + "grad_norm": 0.8058255896640879, + "learning_rate": 3.3421750663129977e-06, + "loss": 0.7912170886993408, + "step": 64 + }, + { + "epoch": 0.017261983800292126, + "grad_norm": 0.8538938403853334, + "learning_rate": 3.3952254641909815e-06, + "loss": 0.7736695408821106, + "step": 65 + }, + { + "epoch": 0.01752755278183508, + "grad_norm": 0.8652625375848492, + "learning_rate": 3.448275862068966e-06, + "loss": 0.768275260925293, + "step": 66 + }, + { + "epoch": 0.017793121763378036, + "grad_norm": 0.8691478661970735, + "learning_rate": 3.50132625994695e-06, + "loss": 0.7210639119148254, + "step": 67 + }, + { + "epoch": 0.01805869074492099, + "grad_norm": 0.8378031795839386, + "learning_rate": 3.554376657824934e-06, + "loss": 0.7488028407096863, + "step": 68 + }, + { + "epoch": 0.01832425972646395, + "grad_norm": 0.8943989597273122, + "learning_rate": 3.607427055702918e-06, + "loss": 0.7329621911048889, + "step": 69 + }, + { + "epoch": 0.018589828708006906, + "grad_norm": 0.92104620358882, + "learning_rate": 3.660477453580902e-06, + "loss": 0.7270619869232178, + "step": 70 + }, + { + "epoch": 0.01885539768954986, + "grad_norm": 0.9782498013554233, + "learning_rate": 3.7135278514588865e-06, + "loss": 0.7271254658699036, + "step": 71 + }, + { + "epoch": 0.019120966671092816, + "grad_norm": 0.9115603845811348, + "learning_rate": 3.7665782493368703e-06, + "loss": 0.787033200263977, + "step": 72 + }, + { + "epoch": 0.01938653565263577, + "grad_norm": 0.8604692726067453, + "learning_rate": 3.819628647214854e-06, + "loss": 0.7049479484558105, + "step": 73 + }, + { + "epoch": 0.019652104634178727, + "grad_norm": 0.8610577281688413, + "learning_rate": 3.8726790450928385e-06, + "loss": 0.7146892547607422, + "step": 74 + }, + { + "epoch": 0.019917673615721682, + "grad_norm": 0.7602187567662452, + "learning_rate": 3.925729442970823e-06, + "loss": 0.7212516069412231, + "step": 75 + }, + { + "epoch": 0.02018324259726464, + "grad_norm": 0.6842508042039768, + "learning_rate": 3.978779840848806e-06, + "loss": 0.6612375378608704, + "step": 76 + }, + { + "epoch": 0.020448811578807596, + "grad_norm": 0.7781006919053841, + "learning_rate": 4.031830238726791e-06, + "loss": 0.7038244605064392, + "step": 77 + }, + { + "epoch": 0.02071438056035055, + "grad_norm": 0.7186592057129139, + "learning_rate": 4.084880636604775e-06, + "loss": 0.7081903219223022, + "step": 78 + }, + { + "epoch": 0.020979949541893507, + "grad_norm": 0.7655954113403886, + "learning_rate": 4.137931034482759e-06, + "loss": 0.7079841494560242, + "step": 79 + }, + { + "epoch": 0.021245518523436462, + "grad_norm": 0.7149787673446053, + "learning_rate": 4.190981432360743e-06, + "loss": 0.7090641260147095, + "step": 80 + }, + { + "epoch": 0.021511087504979418, + "grad_norm": 0.6657837070384769, + "learning_rate": 4.244031830238727e-06, + "loss": 0.6632575988769531, + "step": 81 + }, + { + "epoch": 0.021776656486522373, + "grad_norm": 0.6666401713606211, + "learning_rate": 4.297082228116711e-06, + "loss": 0.7231097221374512, + "step": 82 + }, + { + "epoch": 0.02204222546806533, + "grad_norm": 0.6804476609839887, + "learning_rate": 4.3501326259946955e-06, + "loss": 0.6696034669876099, + "step": 83 + }, + { + "epoch": 0.022307794449608287, + "grad_norm": 0.7073638927991296, + "learning_rate": 4.403183023872679e-06, + "loss": 0.7550696134567261, + "step": 84 + }, + { + "epoch": 0.022573363431151242, + "grad_norm": 0.7064770122504733, + "learning_rate": 4.456233421750663e-06, + "loss": 0.671328067779541, + "step": 85 + }, + { + "epoch": 0.022838932412694198, + "grad_norm": 0.6506139330803743, + "learning_rate": 4.5092838196286476e-06, + "loss": 0.6864410638809204, + "step": 86 + }, + { + "epoch": 0.023104501394237153, + "grad_norm": 0.6642837777732639, + "learning_rate": 4.562334217506632e-06, + "loss": 0.6870769262313843, + "step": 87 + }, + { + "epoch": 0.023370070375780108, + "grad_norm": 0.6947506894199804, + "learning_rate": 4.615384615384616e-06, + "loss": 0.6539690494537354, + "step": 88 + }, + { + "epoch": 0.023635639357323063, + "grad_norm": 0.6446743321890098, + "learning_rate": 4.6684350132626e-06, + "loss": 0.6946991086006165, + "step": 89 + }, + { + "epoch": 0.02390120833886602, + "grad_norm": 0.6384512383480915, + "learning_rate": 4.721485411140584e-06, + "loss": 0.6177583932876587, + "step": 90 + }, + { + "epoch": 0.024166777320408978, + "grad_norm": 0.7150510018442997, + "learning_rate": 4.774535809018568e-06, + "loss": 0.6890037059783936, + "step": 91 + }, + { + "epoch": 0.024432346301951933, + "grad_norm": 0.6592991709316253, + "learning_rate": 4.8275862068965525e-06, + "loss": 0.6563063263893127, + "step": 92 + }, + { + "epoch": 0.024697915283494888, + "grad_norm": 0.6897740926797078, + "learning_rate": 4.880636604774536e-06, + "loss": 0.714318573474884, + "step": 93 + }, + { + "epoch": 0.024963484265037843, + "grad_norm": 0.6433596226177777, + "learning_rate": 4.93368700265252e-06, + "loss": 0.6720882654190063, + "step": 94 + }, + { + "epoch": 0.0252290532465808, + "grad_norm": 0.5910528348002435, + "learning_rate": 4.9867374005305045e-06, + "loss": 0.602899968624115, + "step": 95 + }, + { + "epoch": 0.025494622228123754, + "grad_norm": 0.6635651676723159, + "learning_rate": 5.039787798408489e-06, + "loss": 0.6628841161727905, + "step": 96 + }, + { + "epoch": 0.02576019120966671, + "grad_norm": 0.6070065577903714, + "learning_rate": 5.092838196286472e-06, + "loss": 0.6486932635307312, + "step": 97 + }, + { + "epoch": 0.026025760191209668, + "grad_norm": 0.6484848126679549, + "learning_rate": 5.145888594164457e-06, + "loss": 0.6719033122062683, + "step": 98 + }, + { + "epoch": 0.026291329172752623, + "grad_norm": 0.6856934201881044, + "learning_rate": 5.19893899204244e-06, + "loss": 0.6818530559539795, + "step": 99 + }, + { + "epoch": 0.02655689815429558, + "grad_norm": 0.6204811558305167, + "learning_rate": 5.251989389920424e-06, + "loss": 0.6306912899017334, + "step": 100 + }, + { + "epoch": 0.026822467135838534, + "grad_norm": 0.7820574736690976, + "learning_rate": 5.3050397877984095e-06, + "loss": 0.5952945351600647, + "step": 101 + }, + { + "epoch": 0.02708803611738149, + "grad_norm": 0.6546243503849497, + "learning_rate": 5.358090185676394e-06, + "loss": 0.6566107273101807, + "step": 102 + }, + { + "epoch": 0.027353605098924445, + "grad_norm": 0.707921645301647, + "learning_rate": 5.411140583554377e-06, + "loss": 0.6981694102287292, + "step": 103 + }, + { + "epoch": 0.0276191740804674, + "grad_norm": 0.6375441067969543, + "learning_rate": 5.4641909814323615e-06, + "loss": 0.6231328248977661, + "step": 104 + }, + { + "epoch": 0.02788474306201036, + "grad_norm": 0.6964560869475424, + "learning_rate": 5.517241379310345e-06, + "loss": 0.6414977312088013, + "step": 105 + }, + { + "epoch": 0.028150312043553314, + "grad_norm": 0.6835502446580011, + "learning_rate": 5.570291777188329e-06, + "loss": 0.6335234642028809, + "step": 106 + }, + { + "epoch": 0.02841588102509627, + "grad_norm": 0.6248033284508979, + "learning_rate": 5.623342175066313e-06, + "loss": 0.6040852665901184, + "step": 107 + }, + { + "epoch": 0.028681450006639225, + "grad_norm": 0.6645474785171195, + "learning_rate": 5.676392572944297e-06, + "loss": 0.6011114716529846, + "step": 108 + }, + { + "epoch": 0.02894701898818218, + "grad_norm": 0.655106623405533, + "learning_rate": 5.729442970822282e-06, + "loss": 0.6042627096176147, + "step": 109 + }, + { + "epoch": 0.029212587969725135, + "grad_norm": 0.720208539355598, + "learning_rate": 5.782493368700266e-06, + "loss": 0.6183412671089172, + "step": 110 + }, + { + "epoch": 0.02947815695126809, + "grad_norm": 0.6666287454908232, + "learning_rate": 5.83554376657825e-06, + "loss": 0.6150818467140198, + "step": 111 + }, + { + "epoch": 0.02974372593281105, + "grad_norm": 0.6840692324124527, + "learning_rate": 5.888594164456234e-06, + "loss": 0.6202039122581482, + "step": 112 + }, + { + "epoch": 0.030009294914354005, + "grad_norm": 0.6626407253242022, + "learning_rate": 5.941644562334218e-06, + "loss": 0.6334809064865112, + "step": 113 + }, + { + "epoch": 0.03027486389589696, + "grad_norm": 0.6319419097399773, + "learning_rate": 5.994694960212202e-06, + "loss": 0.5728089809417725, + "step": 114 + }, + { + "epoch": 0.030540432877439915, + "grad_norm": 0.6988175213443283, + "learning_rate": 6.0477453580901854e-06, + "loss": 0.6884603500366211, + "step": 115 + }, + { + "epoch": 0.03080600185898287, + "grad_norm": 0.6618120552387852, + "learning_rate": 6.1007957559681706e-06, + "loss": 0.5619829893112183, + "step": 116 + }, + { + "epoch": 0.031071570840525826, + "grad_norm": 0.6756012639437595, + "learning_rate": 6.153846153846155e-06, + "loss": 0.6224710941314697, + "step": 117 + }, + { + "epoch": 0.03133713982206878, + "grad_norm": 0.7208355833756769, + "learning_rate": 6.206896551724138e-06, + "loss": 0.6119496822357178, + "step": 118 + }, + { + "epoch": 0.03160270880361174, + "grad_norm": 0.6917782946677038, + "learning_rate": 6.259946949602123e-06, + "loss": 0.6190857887268066, + "step": 119 + }, + { + "epoch": 0.03186827778515469, + "grad_norm": 0.6704531181022263, + "learning_rate": 6.312997347480107e-06, + "loss": 0.6460769176483154, + "step": 120 + }, + { + "epoch": 0.03213384676669765, + "grad_norm": 0.7493511248909543, + "learning_rate": 6.36604774535809e-06, + "loss": 0.6148796677589417, + "step": 121 + }, + { + "epoch": 0.0323994157482406, + "grad_norm": 0.6359613412994526, + "learning_rate": 6.419098143236075e-06, + "loss": 0.558960497379303, + "step": 122 + }, + { + "epoch": 0.03266498472978356, + "grad_norm": 0.6785691051694177, + "learning_rate": 6.47214854111406e-06, + "loss": 0.5844984650611877, + "step": 123 + }, + { + "epoch": 0.03293055371132652, + "grad_norm": 0.6692815537253501, + "learning_rate": 6.525198938992043e-06, + "loss": 0.5343623161315918, + "step": 124 + }, + { + "epoch": 0.03319612269286947, + "grad_norm": 0.6705726789318588, + "learning_rate": 6.5782493368700276e-06, + "loss": 0.5834348797798157, + "step": 125 + }, + { + "epoch": 0.03346169167441243, + "grad_norm": 0.7626576562771024, + "learning_rate": 6.631299734748011e-06, + "loss": 0.5997360944747925, + "step": 126 + }, + { + "epoch": 0.03372726065595538, + "grad_norm": 0.7117893752859364, + "learning_rate": 6.684350132625995e-06, + "loss": 0.5991666316986084, + "step": 127 + }, + { + "epoch": 0.03399282963749834, + "grad_norm": 0.7060406683837459, + "learning_rate": 6.737400530503979e-06, + "loss": 0.581120491027832, + "step": 128 + }, + { + "epoch": 0.03425839861904129, + "grad_norm": 0.6869761252397286, + "learning_rate": 6.790450928381963e-06, + "loss": 0.6219569444656372, + "step": 129 + }, + { + "epoch": 0.03452396760058425, + "grad_norm": 0.6916173566260286, + "learning_rate": 6.843501326259947e-06, + "loss": 0.5950608253479004, + "step": 130 + }, + { + "epoch": 0.03478953658212721, + "grad_norm": 0.6136480902733893, + "learning_rate": 6.896551724137932e-06, + "loss": 0.5762747526168823, + "step": 131 + }, + { + "epoch": 0.03505510556367016, + "grad_norm": 0.670368708945713, + "learning_rate": 6.949602122015916e-06, + "loss": 0.6003131866455078, + "step": 132 + }, + { + "epoch": 0.03532067454521312, + "grad_norm": 0.6439028776339482, + "learning_rate": 7.0026525198939e-06, + "loss": 0.5866605043411255, + "step": 133 + }, + { + "epoch": 0.03558624352675607, + "grad_norm": 0.8324202287699098, + "learning_rate": 7.055702917771884e-06, + "loss": 0.6668443083763123, + "step": 134 + }, + { + "epoch": 0.03585181250829903, + "grad_norm": 0.7064456856515898, + "learning_rate": 7.108753315649868e-06, + "loss": 0.5738306045532227, + "step": 135 + }, + { + "epoch": 0.03611738148984198, + "grad_norm": 0.6941604370641007, + "learning_rate": 7.1618037135278515e-06, + "loss": 0.5774663686752319, + "step": 136 + }, + { + "epoch": 0.03638295047138494, + "grad_norm": 0.7648336305672251, + "learning_rate": 7.214854111405836e-06, + "loss": 0.5721150636672974, + "step": 137 + }, + { + "epoch": 0.0366485194529279, + "grad_norm": 0.7394576462203543, + "learning_rate": 7.267904509283821e-06, + "loss": 0.6350122690200806, + "step": 138 + }, + { + "epoch": 0.03691408843447085, + "grad_norm": 0.6540602529440619, + "learning_rate": 7.320954907161804e-06, + "loss": 0.5435039401054382, + "step": 139 + }, + { + "epoch": 0.03717965741601381, + "grad_norm": 0.6965351191908165, + "learning_rate": 7.374005305039789e-06, + "loss": 0.5869162678718567, + "step": 140 + }, + { + "epoch": 0.03744522639755676, + "grad_norm": 0.6664228073022063, + "learning_rate": 7.427055702917773e-06, + "loss": 0.5645807981491089, + "step": 141 + }, + { + "epoch": 0.03771079537909972, + "grad_norm": 0.6503771775205762, + "learning_rate": 7.480106100795756e-06, + "loss": 0.5502692461013794, + "step": 142 + }, + { + "epoch": 0.037976364360642674, + "grad_norm": 0.6223645459397411, + "learning_rate": 7.533156498673741e-06, + "loss": 0.5602732300758362, + "step": 143 + }, + { + "epoch": 0.03824193334218563, + "grad_norm": 0.8638951879324807, + "learning_rate": 7.586206896551724e-06, + "loss": 0.6011391282081604, + "step": 144 + }, + { + "epoch": 0.03850750232372859, + "grad_norm": 0.6930636234613441, + "learning_rate": 7.639257294429708e-06, + "loss": 0.5482327938079834, + "step": 145 + }, + { + "epoch": 0.03877307130527154, + "grad_norm": 0.6693652199128735, + "learning_rate": 7.692307692307694e-06, + "loss": 0.5926344394683838, + "step": 146 + }, + { + "epoch": 0.0390386402868145, + "grad_norm": 0.8434991800954339, + "learning_rate": 7.745358090185677e-06, + "loss": 0.6558316946029663, + "step": 147 + }, + { + "epoch": 0.039304209268357454, + "grad_norm": 0.6845819362079449, + "learning_rate": 7.79840848806366e-06, + "loss": 0.572425365447998, + "step": 148 + }, + { + "epoch": 0.03956977824990041, + "grad_norm": 0.696296152543372, + "learning_rate": 7.851458885941646e-06, + "loss": 0.5684784650802612, + "step": 149 + }, + { + "epoch": 0.039835347231443365, + "grad_norm": 0.6779490529346879, + "learning_rate": 7.904509283819629e-06, + "loss": 0.5843643546104431, + "step": 150 + }, + { + "epoch": 0.04010091621298632, + "grad_norm": 0.6894842979231472, + "learning_rate": 7.957559681697613e-06, + "loss": 0.5471494793891907, + "step": 151 + }, + { + "epoch": 0.04036648519452928, + "grad_norm": 0.7583250211136208, + "learning_rate": 8.010610079575598e-06, + "loss": 0.595018744468689, + "step": 152 + }, + { + "epoch": 0.040632054176072234, + "grad_norm": 0.6904128122756304, + "learning_rate": 8.063660477453583e-06, + "loss": 0.5431865453720093, + "step": 153 + }, + { + "epoch": 0.04089762315761519, + "grad_norm": 0.7943246581886504, + "learning_rate": 8.116710875331566e-06, + "loss": 0.5622385740280151, + "step": 154 + }, + { + "epoch": 0.041163192139158145, + "grad_norm": 0.7792002007338675, + "learning_rate": 8.16976127320955e-06, + "loss": 0.5795880556106567, + "step": 155 + }, + { + "epoch": 0.0414287611207011, + "grad_norm": 0.7432143976693507, + "learning_rate": 8.222811671087533e-06, + "loss": 0.5854965448379517, + "step": 156 + }, + { + "epoch": 0.041694330102244055, + "grad_norm": 0.8104825185442435, + "learning_rate": 8.275862068965518e-06, + "loss": 0.5374501943588257, + "step": 157 + }, + { + "epoch": 0.041959899083787014, + "grad_norm": 0.7598674115735401, + "learning_rate": 8.328912466843502e-06, + "loss": 0.5779006481170654, + "step": 158 + }, + { + "epoch": 0.04222546806532997, + "grad_norm": 0.7033741631796787, + "learning_rate": 8.381962864721485e-06, + "loss": 0.550236701965332, + "step": 159 + }, + { + "epoch": 0.042491037046872925, + "grad_norm": 0.7285453499901458, + "learning_rate": 8.43501326259947e-06, + "loss": 0.557443380355835, + "step": 160 + }, + { + "epoch": 0.04275660602841588, + "grad_norm": 0.7050753960524794, + "learning_rate": 8.488063660477454e-06, + "loss": 0.5875238180160522, + "step": 161 + }, + { + "epoch": 0.043022175009958835, + "grad_norm": 0.7215582793376403, + "learning_rate": 8.541114058355439e-06, + "loss": 0.510900616645813, + "step": 162 + }, + { + "epoch": 0.043287743991501794, + "grad_norm": 0.7559114001900116, + "learning_rate": 8.594164456233422e-06, + "loss": 0.5465859174728394, + "step": 163 + }, + { + "epoch": 0.043553312973044746, + "grad_norm": 0.7494489908601825, + "learning_rate": 8.647214854111406e-06, + "loss": 0.5508615970611572, + "step": 164 + }, + { + "epoch": 0.043818881954587705, + "grad_norm": 0.7714387963397975, + "learning_rate": 8.700265251989391e-06, + "loss": 0.5437714457511902, + "step": 165 + }, + { + "epoch": 0.04408445093613066, + "grad_norm": 0.7480600693956645, + "learning_rate": 8.753315649867374e-06, + "loss": 0.542698323726654, + "step": 166 + }, + { + "epoch": 0.044350019917673615, + "grad_norm": 0.7339141407878966, + "learning_rate": 8.806366047745358e-06, + "loss": 0.5169371962547302, + "step": 167 + }, + { + "epoch": 0.044615588899216574, + "grad_norm": 0.725595419270195, + "learning_rate": 8.859416445623343e-06, + "loss": 0.5436176061630249, + "step": 168 + }, + { + "epoch": 0.044881157880759526, + "grad_norm": 0.8205411933516983, + "learning_rate": 8.912466843501327e-06, + "loss": 0.568030834197998, + "step": 169 + }, + { + "epoch": 0.045146726862302484, + "grad_norm": 0.7544356200090666, + "learning_rate": 8.965517241379312e-06, + "loss": 0.5218889713287354, + "step": 170 + }, + { + "epoch": 0.045412295843845436, + "grad_norm": 0.7860957525035722, + "learning_rate": 9.018567639257295e-06, + "loss": 0.5275779962539673, + "step": 171 + }, + { + "epoch": 0.045677864825388395, + "grad_norm": 0.6938225497373272, + "learning_rate": 9.071618037135279e-06, + "loss": 0.5263184905052185, + "step": 172 + }, + { + "epoch": 0.045943433806931354, + "grad_norm": 0.7549069812662602, + "learning_rate": 9.124668435013264e-06, + "loss": 0.563044548034668, + "step": 173 + }, + { + "epoch": 0.046209002788474306, + "grad_norm": 0.9364041083837341, + "learning_rate": 9.177718832891247e-06, + "loss": 0.5896912217140198, + "step": 174 + }, + { + "epoch": 0.046474571770017264, + "grad_norm": 0.7219752548557496, + "learning_rate": 9.230769230769232e-06, + "loss": 0.5163949131965637, + "step": 175 + }, + { + "epoch": 0.046740140751560216, + "grad_norm": 0.8391633255974319, + "learning_rate": 9.283819628647216e-06, + "loss": 0.6203320026397705, + "step": 176 + }, + { + "epoch": 0.047005709733103175, + "grad_norm": 0.9119997852547688, + "learning_rate": 9.3368700265252e-06, + "loss": 0.5528024435043335, + "step": 177 + }, + { + "epoch": 0.04727127871464613, + "grad_norm": 0.8828541610102935, + "learning_rate": 9.389920424403184e-06, + "loss": 0.5657555460929871, + "step": 178 + }, + { + "epoch": 0.047536847696189086, + "grad_norm": 0.7671789386737649, + "learning_rate": 9.442970822281168e-06, + "loss": 0.5301925539970398, + "step": 179 + }, + { + "epoch": 0.04780241667773204, + "grad_norm": 0.8675940797859782, + "learning_rate": 9.496021220159151e-06, + "loss": 0.5388369560241699, + "step": 180 + }, + { + "epoch": 0.048067985659274996, + "grad_norm": 0.7966332028310692, + "learning_rate": 9.549071618037136e-06, + "loss": 0.5549717545509338, + "step": 181 + }, + { + "epoch": 0.048333554640817955, + "grad_norm": 0.8814678011939608, + "learning_rate": 9.60212201591512e-06, + "loss": 0.5959764719009399, + "step": 182 + }, + { + "epoch": 0.04859912362236091, + "grad_norm": 0.7841222204736121, + "learning_rate": 9.655172413793105e-06, + "loss": 0.5461844205856323, + "step": 183 + }, + { + "epoch": 0.048864692603903866, + "grad_norm": 0.7620084886447284, + "learning_rate": 9.708222811671088e-06, + "loss": 0.5428494811058044, + "step": 184 + }, + { + "epoch": 0.04913026158544682, + "grad_norm": 0.7918991595575344, + "learning_rate": 9.761273209549072e-06, + "loss": 0.552198052406311, + "step": 185 + }, + { + "epoch": 0.049395830566989776, + "grad_norm": 0.6896394660507362, + "learning_rate": 9.814323607427057e-06, + "loss": 0.49992549419403076, + "step": 186 + }, + { + "epoch": 0.04966139954853273, + "grad_norm": 0.7875507527713166, + "learning_rate": 9.86737400530504e-06, + "loss": 0.557820200920105, + "step": 187 + }, + { + "epoch": 0.04992696853007569, + "grad_norm": 0.8883719893129148, + "learning_rate": 9.920424403183024e-06, + "loss": 0.5238749384880066, + "step": 188 + }, + { + "epoch": 0.050192537511618646, + "grad_norm": 0.988465476825029, + "learning_rate": 9.973474801061009e-06, + "loss": 0.5346978902816772, + "step": 189 + }, + { + "epoch": 0.0504581064931616, + "grad_norm": 0.8024883433630577, + "learning_rate": 1.0026525198938993e-05, + "loss": 0.5256577730178833, + "step": 190 + }, + { + "epoch": 0.050723675474704556, + "grad_norm": 0.8026852335394901, + "learning_rate": 1.0079575596816978e-05, + "loss": 0.5235393047332764, + "step": 191 + }, + { + "epoch": 0.05098924445624751, + "grad_norm": 0.6835673591276205, + "learning_rate": 1.013262599469496e-05, + "loss": 0.4984837472438812, + "step": 192 + }, + { + "epoch": 0.05125481343779047, + "grad_norm": 0.7829913352817355, + "learning_rate": 1.0185676392572945e-05, + "loss": 0.5209602117538452, + "step": 193 + }, + { + "epoch": 0.05152038241933342, + "grad_norm": 0.8334733472253096, + "learning_rate": 1.023872679045093e-05, + "loss": 0.5468267202377319, + "step": 194 + }, + { + "epoch": 0.05178595140087638, + "grad_norm": 0.8107908645155819, + "learning_rate": 1.0291777188328913e-05, + "loss": 0.5531667470932007, + "step": 195 + }, + { + "epoch": 0.052051520382419336, + "grad_norm": 0.8437904919697584, + "learning_rate": 1.0344827586206898e-05, + "loss": 0.5741526484489441, + "step": 196 + }, + { + "epoch": 0.05231708936396229, + "grad_norm": 0.6830882515315945, + "learning_rate": 1.039787798408488e-05, + "loss": 0.46132561564445496, + "step": 197 + }, + { + "epoch": 0.05258265834550525, + "grad_norm": 0.8402230890409916, + "learning_rate": 1.0450928381962865e-05, + "loss": 0.5074198842048645, + "step": 198 + }, + { + "epoch": 0.0528482273270482, + "grad_norm": 0.7476727742688456, + "learning_rate": 1.0503978779840849e-05, + "loss": 0.5193089842796326, + "step": 199 + }, + { + "epoch": 0.05311379630859116, + "grad_norm": 0.7814745235248249, + "learning_rate": 1.0557029177718834e-05, + "loss": 0.5209243297576904, + "step": 200 + }, + { + "epoch": 0.05337936529013411, + "grad_norm": 0.8844918483638834, + "learning_rate": 1.0610079575596819e-05, + "loss": 0.5607191920280457, + "step": 201 + }, + { + "epoch": 0.05364493427167707, + "grad_norm": 0.7926104097207243, + "learning_rate": 1.0663129973474802e-05, + "loss": 0.5482805371284485, + "step": 202 + }, + { + "epoch": 0.05391050325322003, + "grad_norm": 0.8109463956858287, + "learning_rate": 1.0716180371352788e-05, + "loss": 0.5579961538314819, + "step": 203 + }, + { + "epoch": 0.05417607223476298, + "grad_norm": 0.8246893162942163, + "learning_rate": 1.076923076923077e-05, + "loss": 0.5119072794914246, + "step": 204 + }, + { + "epoch": 0.05444164121630594, + "grad_norm": 0.8293246958439139, + "learning_rate": 1.0822281167108754e-05, + "loss": 0.5129292607307434, + "step": 205 + }, + { + "epoch": 0.05470721019784889, + "grad_norm": 0.6895550242199711, + "learning_rate": 1.0875331564986738e-05, + "loss": 0.500032901763916, + "step": 206 + }, + { + "epoch": 0.05497277917939185, + "grad_norm": 0.8385731092525408, + "learning_rate": 1.0928381962864723e-05, + "loss": 0.5264571309089661, + "step": 207 + }, + { + "epoch": 0.0552383481609348, + "grad_norm": 0.7915802802090326, + "learning_rate": 1.0981432360742708e-05, + "loss": 0.5569590330123901, + "step": 208 + }, + { + "epoch": 0.05550391714247776, + "grad_norm": 0.8546725938844908, + "learning_rate": 1.103448275862069e-05, + "loss": 0.5429908037185669, + "step": 209 + }, + { + "epoch": 0.05576948612402072, + "grad_norm": 0.8175642333393268, + "learning_rate": 1.1087533156498675e-05, + "loss": 0.5073692202568054, + "step": 210 + }, + { + "epoch": 0.05603505510556367, + "grad_norm": 0.9551222157670755, + "learning_rate": 1.1140583554376659e-05, + "loss": 0.5613659620285034, + "step": 211 + }, + { + "epoch": 0.05630062408710663, + "grad_norm": 1.8348970874488084, + "learning_rate": 1.1193633952254644e-05, + "loss": 0.5197691917419434, + "step": 212 + }, + { + "epoch": 0.05656619306864958, + "grad_norm": 0.9173115658326468, + "learning_rate": 1.1246684350132625e-05, + "loss": 0.5410990715026855, + "step": 213 + }, + { + "epoch": 0.05683176205019254, + "grad_norm": 0.8562107533946397, + "learning_rate": 1.129973474801061e-05, + "loss": 0.5852477550506592, + "step": 214 + }, + { + "epoch": 0.05709733103173549, + "grad_norm": 0.8483195878163089, + "learning_rate": 1.1352785145888594e-05, + "loss": 0.5312488079071045, + "step": 215 + }, + { + "epoch": 0.05736290001327845, + "grad_norm": 0.8817111257753456, + "learning_rate": 1.140583554376658e-05, + "loss": 0.5075235366821289, + "step": 216 + }, + { + "epoch": 0.05762846899482141, + "grad_norm": 0.8014885700994473, + "learning_rate": 1.1458885941644564e-05, + "loss": 0.5213298797607422, + "step": 217 + }, + { + "epoch": 0.05789403797636436, + "grad_norm": 0.8852582070340804, + "learning_rate": 1.1511936339522548e-05, + "loss": 0.5564183592796326, + "step": 218 + }, + { + "epoch": 0.05815960695790732, + "grad_norm": 1.0148412469588788, + "learning_rate": 1.1564986737400531e-05, + "loss": 0.5328387022018433, + "step": 219 + }, + { + "epoch": 0.05842517593945027, + "grad_norm": 0.7824132338865165, + "learning_rate": 1.1618037135278515e-05, + "loss": 0.5010273456573486, + "step": 220 + }, + { + "epoch": 0.05869074492099323, + "grad_norm": 0.8493817546068081, + "learning_rate": 1.16710875331565e-05, + "loss": 0.5473708510398865, + "step": 221 + }, + { + "epoch": 0.05895631390253618, + "grad_norm": 1.1554913959885298, + "learning_rate": 1.1724137931034483e-05, + "loss": 0.5359818339347839, + "step": 222 + }, + { + "epoch": 0.05922188288407914, + "grad_norm": 0.9663065987200732, + "learning_rate": 1.1777188328912468e-05, + "loss": 0.5274665951728821, + "step": 223 + }, + { + "epoch": 0.0594874518656221, + "grad_norm": 0.8158672021913522, + "learning_rate": 1.1830238726790454e-05, + "loss": 0.5463781952857971, + "step": 224 + }, + { + "epoch": 0.05975302084716505, + "grad_norm": 0.7817235200046289, + "learning_rate": 1.1883289124668435e-05, + "loss": 0.553212583065033, + "step": 225 + }, + { + "epoch": 0.06001858982870801, + "grad_norm": 0.8540074681170072, + "learning_rate": 1.193633952254642e-05, + "loss": 0.47144171595573425, + "step": 226 + }, + { + "epoch": 0.06028415881025096, + "grad_norm": 0.9191106803002166, + "learning_rate": 1.1989389920424404e-05, + "loss": 0.506844162940979, + "step": 227 + }, + { + "epoch": 0.06054972779179392, + "grad_norm": 0.794192267301098, + "learning_rate": 1.2042440318302389e-05, + "loss": 0.4965322017669678, + "step": 228 + }, + { + "epoch": 0.06081529677333687, + "grad_norm": 0.8421546110465796, + "learning_rate": 1.2095490716180371e-05, + "loss": 0.4815751612186432, + "step": 229 + }, + { + "epoch": 0.06108086575487983, + "grad_norm": 0.8107361719185122, + "learning_rate": 1.2148541114058356e-05, + "loss": 0.5245312452316284, + "step": 230 + }, + { + "epoch": 0.06134643473642279, + "grad_norm": 0.8749447967552209, + "learning_rate": 1.2201591511936341e-05, + "loss": 0.5215133428573608, + "step": 231 + }, + { + "epoch": 0.06161200371796574, + "grad_norm": 0.8315635530714504, + "learning_rate": 1.2254641909814325e-05, + "loss": 0.5039419531822205, + "step": 232 + }, + { + "epoch": 0.0618775726995087, + "grad_norm": 1.0583546039713638, + "learning_rate": 1.230769230769231e-05, + "loss": 0.5562925338745117, + "step": 233 + }, + { + "epoch": 0.06214314168105165, + "grad_norm": 1.069780059811152, + "learning_rate": 1.2360742705570291e-05, + "loss": 0.5372984409332275, + "step": 234 + }, + { + "epoch": 0.06240871066259461, + "grad_norm": 0.8766841361731121, + "learning_rate": 1.2413793103448277e-05, + "loss": 0.44987717270851135, + "step": 235 + }, + { + "epoch": 0.06267427964413756, + "grad_norm": 0.9229136432445015, + "learning_rate": 1.246684350132626e-05, + "loss": 0.537068247795105, + "step": 236 + }, + { + "epoch": 0.06293984862568051, + "grad_norm": 0.9828329951785308, + "learning_rate": 1.2519893899204245e-05, + "loss": 0.504779577255249, + "step": 237 + }, + { + "epoch": 0.06320541760722348, + "grad_norm": 1.0061858451025696, + "learning_rate": 1.257294429708223e-05, + "loss": 0.5524113774299622, + "step": 238 + }, + { + "epoch": 0.06347098658876643, + "grad_norm": 0.9888885225244529, + "learning_rate": 1.2625994694960214e-05, + "loss": 0.5089439153671265, + "step": 239 + }, + { + "epoch": 0.06373655557030938, + "grad_norm": 0.8394940482178029, + "learning_rate": 1.2679045092838197e-05, + "loss": 0.4501679837703705, + "step": 240 + }, + { + "epoch": 0.06400212455185235, + "grad_norm": 0.8117693384854435, + "learning_rate": 1.273209549071618e-05, + "loss": 0.5360216498374939, + "step": 241 + }, + { + "epoch": 0.0642676935333953, + "grad_norm": 0.876954304053235, + "learning_rate": 1.2785145888594166e-05, + "loss": 0.5595712661743164, + "step": 242 + }, + { + "epoch": 0.06453326251493825, + "grad_norm": 1.080992038181853, + "learning_rate": 1.283819628647215e-05, + "loss": 0.5010904669761658, + "step": 243 + }, + { + "epoch": 0.0647988314964812, + "grad_norm": 1.0446842005075034, + "learning_rate": 1.2891246684350134e-05, + "loss": 0.5053697228431702, + "step": 244 + }, + { + "epoch": 0.06506440047802417, + "grad_norm": 0.803002193385922, + "learning_rate": 1.294429708222812e-05, + "loss": 0.5045514106750488, + "step": 245 + }, + { + "epoch": 0.06532996945956712, + "grad_norm": 0.7912163744531999, + "learning_rate": 1.2997347480106101e-05, + "loss": 0.5546073913574219, + "step": 246 + }, + { + "epoch": 0.06559553844111007, + "grad_norm": 0.9572908035308383, + "learning_rate": 1.3050397877984087e-05, + "loss": 0.47276046872138977, + "step": 247 + }, + { + "epoch": 0.06586110742265304, + "grad_norm": 0.8233476091470914, + "learning_rate": 1.310344827586207e-05, + "loss": 0.4757889211177826, + "step": 248 + }, + { + "epoch": 0.06612667640419599, + "grad_norm": 0.8415305337388579, + "learning_rate": 1.3156498673740055e-05, + "loss": 0.5078848600387573, + "step": 249 + }, + { + "epoch": 0.06639224538573894, + "grad_norm": 0.8437984625649567, + "learning_rate": 1.3209549071618037e-05, + "loss": 0.4890335202217102, + "step": 250 + }, + { + "epoch": 0.0666578143672819, + "grad_norm": 0.8299999132068526, + "learning_rate": 1.3262599469496022e-05, + "loss": 0.5406580567359924, + "step": 251 + }, + { + "epoch": 0.06692338334882486, + "grad_norm": 0.9307594142144101, + "learning_rate": 1.3315649867374005e-05, + "loss": 0.5236875414848328, + "step": 252 + }, + { + "epoch": 0.06718895233036781, + "grad_norm": 1.0602580439454288, + "learning_rate": 1.336870026525199e-05, + "loss": 0.4991317391395569, + "step": 253 + }, + { + "epoch": 0.06745452131191076, + "grad_norm": 0.8277603880683132, + "learning_rate": 1.3421750663129976e-05, + "loss": 0.4234679639339447, + "step": 254 + }, + { + "epoch": 0.06772009029345373, + "grad_norm": 0.9984839302922622, + "learning_rate": 1.3474801061007958e-05, + "loss": 0.49749234318733215, + "step": 255 + }, + { + "epoch": 0.06798565927499668, + "grad_norm": 0.9543855303701088, + "learning_rate": 1.3527851458885943e-05, + "loss": 0.5049105286598206, + "step": 256 + }, + { + "epoch": 0.06825122825653963, + "grad_norm": 0.8443711840757044, + "learning_rate": 1.3580901856763926e-05, + "loss": 0.5355304479598999, + "step": 257 + }, + { + "epoch": 0.06851679723808259, + "grad_norm": 0.9255144140027944, + "learning_rate": 1.3633952254641911e-05, + "loss": 0.46302929520606995, + "step": 258 + }, + { + "epoch": 0.06878236621962555, + "grad_norm": 0.953877794861965, + "learning_rate": 1.3687002652519895e-05, + "loss": 0.5054173469543457, + "step": 259 + }, + { + "epoch": 0.0690479352011685, + "grad_norm": 0.8214682466537866, + "learning_rate": 1.374005305039788e-05, + "loss": 0.5018566846847534, + "step": 260 + }, + { + "epoch": 0.06931350418271146, + "grad_norm": 0.878430758752321, + "learning_rate": 1.3793103448275863e-05, + "loss": 0.4938735365867615, + "step": 261 + }, + { + "epoch": 0.06957907316425442, + "grad_norm": 0.8343439459008911, + "learning_rate": 1.3846153846153847e-05, + "loss": 0.4605029225349426, + "step": 262 + }, + { + "epoch": 0.06984464214579737, + "grad_norm": 0.8260329604526515, + "learning_rate": 1.3899204244031832e-05, + "loss": 0.5056782960891724, + "step": 263 + }, + { + "epoch": 0.07011021112734032, + "grad_norm": 0.860551370737139, + "learning_rate": 1.3952254641909815e-05, + "loss": 0.5017784833908081, + "step": 264 + }, + { + "epoch": 0.07037578010888328, + "grad_norm": 0.8353804409772935, + "learning_rate": 1.40053050397878e-05, + "loss": 0.5132012367248535, + "step": 265 + }, + { + "epoch": 0.07064134909042624, + "grad_norm": 0.8151795113028358, + "learning_rate": 1.4058355437665782e-05, + "loss": 0.531212329864502, + "step": 266 + }, + { + "epoch": 0.0709069180719692, + "grad_norm": 0.8086605566204427, + "learning_rate": 1.4111405835543767e-05, + "loss": 0.4900968074798584, + "step": 267 + }, + { + "epoch": 0.07117248705351215, + "grad_norm": 0.8735731145360269, + "learning_rate": 1.4164456233421753e-05, + "loss": 0.45277124643325806, + "step": 268 + }, + { + "epoch": 0.07143805603505511, + "grad_norm": 0.8760293380808535, + "learning_rate": 1.4217506631299736e-05, + "loss": 0.48026078939437866, + "step": 269 + }, + { + "epoch": 0.07170362501659806, + "grad_norm": 0.9019281227597356, + "learning_rate": 1.4270557029177721e-05, + "loss": 0.5111234784126282, + "step": 270 + }, + { + "epoch": 0.07196919399814102, + "grad_norm": 0.9120608197487232, + "learning_rate": 1.4323607427055703e-05, + "loss": 0.5448082685470581, + "step": 271 + }, + { + "epoch": 0.07223476297968397, + "grad_norm": 0.9400729117423203, + "learning_rate": 1.4376657824933688e-05, + "loss": 0.5242921113967896, + "step": 272 + }, + { + "epoch": 0.07250033196122693, + "grad_norm": 0.9404952891335322, + "learning_rate": 1.4429708222811672e-05, + "loss": 0.5194095373153687, + "step": 273 + }, + { + "epoch": 0.07276590094276988, + "grad_norm": 0.8893776382848525, + "learning_rate": 1.4482758620689657e-05, + "loss": 0.4620330333709717, + "step": 274 + }, + { + "epoch": 0.07303146992431284, + "grad_norm": 0.886983687866706, + "learning_rate": 1.4535809018567642e-05, + "loss": 0.4654063582420349, + "step": 275 + }, + { + "epoch": 0.0732970389058558, + "grad_norm": 0.7984003718276244, + "learning_rate": 1.4588859416445624e-05, + "loss": 0.4637746810913086, + "step": 276 + }, + { + "epoch": 0.07356260788739875, + "grad_norm": 0.8288882522584324, + "learning_rate": 1.4641909814323609e-05, + "loss": 0.47949421405792236, + "step": 277 + }, + { + "epoch": 0.0738281768689417, + "grad_norm": 1.0041804846004008, + "learning_rate": 1.4694960212201592e-05, + "loss": 0.49565935134887695, + "step": 278 + }, + { + "epoch": 0.07409374585048466, + "grad_norm": 0.9214786055945364, + "learning_rate": 1.4748010610079577e-05, + "loss": 0.5057941675186157, + "step": 279 + }, + { + "epoch": 0.07435931483202762, + "grad_norm": 0.9073397896109812, + "learning_rate": 1.480106100795756e-05, + "loss": 0.5495956540107727, + "step": 280 + }, + { + "epoch": 0.07462488381357057, + "grad_norm": 0.8743353741776648, + "learning_rate": 1.4854111405835546e-05, + "loss": 0.4502897560596466, + "step": 281 + }, + { + "epoch": 0.07489045279511353, + "grad_norm": 0.8694785116368758, + "learning_rate": 1.490716180371353e-05, + "loss": 0.4799070954322815, + "step": 282 + }, + { + "epoch": 0.07515602177665649, + "grad_norm": 0.886176954457428, + "learning_rate": 1.4960212201591513e-05, + "loss": 0.45640307664871216, + "step": 283 + }, + { + "epoch": 0.07542159075819944, + "grad_norm": 0.8937725285994821, + "learning_rate": 1.5013262599469498e-05, + "loss": 0.47862207889556885, + "step": 284 + }, + { + "epoch": 0.0756871597397424, + "grad_norm": 0.8717898339198907, + "learning_rate": 1.5066312997347481e-05, + "loss": 0.48195987939834595, + "step": 285 + }, + { + "epoch": 0.07595272872128535, + "grad_norm": 0.9124586645482137, + "learning_rate": 1.5119363395225467e-05, + "loss": 0.518566370010376, + "step": 286 + }, + { + "epoch": 0.07621829770282831, + "grad_norm": 0.9766882853479317, + "learning_rate": 1.5172413793103448e-05, + "loss": 0.5034162402153015, + "step": 287 + }, + { + "epoch": 0.07648386668437127, + "grad_norm": 0.8995114639723897, + "learning_rate": 1.5225464190981433e-05, + "loss": 0.497822642326355, + "step": 288 + }, + { + "epoch": 0.07674943566591422, + "grad_norm": 0.8484786603983125, + "learning_rate": 1.5278514588859417e-05, + "loss": 0.510530412197113, + "step": 289 + }, + { + "epoch": 0.07701500464745718, + "grad_norm": 0.9406440408252492, + "learning_rate": 1.53315649867374e-05, + "loss": 0.5163881778717041, + "step": 290 + }, + { + "epoch": 0.07728057362900013, + "grad_norm": 0.9825958938719339, + "learning_rate": 1.5384615384615387e-05, + "loss": 0.5161621570587158, + "step": 291 + }, + { + "epoch": 0.07754614261054309, + "grad_norm": 0.8680267479326179, + "learning_rate": 1.543766578249337e-05, + "loss": 0.5260482430458069, + "step": 292 + }, + { + "epoch": 0.07781171159208604, + "grad_norm": 0.8791995274446183, + "learning_rate": 1.5490716180371354e-05, + "loss": 0.4946279227733612, + "step": 293 + }, + { + "epoch": 0.078077280573629, + "grad_norm": 0.9734620967906259, + "learning_rate": 1.5543766578249338e-05, + "loss": 0.5030514001846313, + "step": 294 + }, + { + "epoch": 0.07834284955517196, + "grad_norm": 0.899295097408943, + "learning_rate": 1.559681697612732e-05, + "loss": 0.48864102363586426, + "step": 295 + }, + { + "epoch": 0.07860841853671491, + "grad_norm": 0.8710376092284174, + "learning_rate": 1.5649867374005304e-05, + "loss": 0.48310425877571106, + "step": 296 + }, + { + "epoch": 0.07887398751825787, + "grad_norm": 1.0094258392730318, + "learning_rate": 1.570291777188329e-05, + "loss": 0.4451446533203125, + "step": 297 + }, + { + "epoch": 0.07913955649980083, + "grad_norm": 0.9863170561942101, + "learning_rate": 1.5755968169761275e-05, + "loss": 0.4884604811668396, + "step": 298 + }, + { + "epoch": 0.07940512548134378, + "grad_norm": 0.8355693003184833, + "learning_rate": 1.5809018567639258e-05, + "loss": 0.5047659873962402, + "step": 299 + }, + { + "epoch": 0.07967069446288673, + "grad_norm": 0.8879040718748079, + "learning_rate": 1.586206896551724e-05, + "loss": 0.49124205112457275, + "step": 300 + }, + { + "epoch": 0.0799362634444297, + "grad_norm": 0.9411885452551192, + "learning_rate": 1.5915119363395225e-05, + "loss": 0.5113086700439453, + "step": 301 + }, + { + "epoch": 0.08020183242597265, + "grad_norm": 0.9345380756850689, + "learning_rate": 1.5968169761273212e-05, + "loss": 0.5298338532447815, + "step": 302 + }, + { + "epoch": 0.0804674014075156, + "grad_norm": 0.9050429706274331, + "learning_rate": 1.6021220159151195e-05, + "loss": 0.4673181176185608, + "step": 303 + }, + { + "epoch": 0.08073297038905856, + "grad_norm": 0.8972864762330055, + "learning_rate": 1.607427055702918e-05, + "loss": 0.45361828804016113, + "step": 304 + }, + { + "epoch": 0.08099853937060152, + "grad_norm": 0.8848533583648175, + "learning_rate": 1.6127320954907166e-05, + "loss": 0.5144034624099731, + "step": 305 + }, + { + "epoch": 0.08126410835214447, + "grad_norm": 0.9263690972931414, + "learning_rate": 1.6180371352785146e-05, + "loss": 0.5027451515197754, + "step": 306 + }, + { + "epoch": 0.08152967733368742, + "grad_norm": 0.8575377500476566, + "learning_rate": 1.6233421750663133e-05, + "loss": 0.4987551271915436, + "step": 307 + }, + { + "epoch": 0.08179524631523039, + "grad_norm": 1.0121964253373468, + "learning_rate": 1.6286472148541116e-05, + "loss": 0.5433062314987183, + "step": 308 + }, + { + "epoch": 0.08206081529677334, + "grad_norm": 0.8973695218716041, + "learning_rate": 1.63395225464191e-05, + "loss": 0.49603772163391113, + "step": 309 + }, + { + "epoch": 0.08232638427831629, + "grad_norm": 0.9033181815462389, + "learning_rate": 1.6392572944297083e-05, + "loss": 0.47990959882736206, + "step": 310 + }, + { + "epoch": 0.08259195325985925, + "grad_norm": 0.9843185449650845, + "learning_rate": 1.6445623342175066e-05, + "loss": 0.5196831226348877, + "step": 311 + }, + { + "epoch": 0.0828575222414022, + "grad_norm": 0.8589822510995361, + "learning_rate": 1.6498673740053053e-05, + "loss": 0.4664091467857361, + "step": 312 + }, + { + "epoch": 0.08312309122294516, + "grad_norm": 0.9077443936761218, + "learning_rate": 1.6551724137931037e-05, + "loss": 0.4405553936958313, + "step": 313 + }, + { + "epoch": 0.08338866020448811, + "grad_norm": 0.8561334135462362, + "learning_rate": 1.660477453580902e-05, + "loss": 0.46172815561294556, + "step": 314 + }, + { + "epoch": 0.08365422918603108, + "grad_norm": 0.8835708894071636, + "learning_rate": 1.6657824933687004e-05, + "loss": 0.5004327297210693, + "step": 315 + }, + { + "epoch": 0.08391979816757403, + "grad_norm": 0.8452618593185571, + "learning_rate": 1.6710875331564987e-05, + "loss": 0.4727814197540283, + "step": 316 + }, + { + "epoch": 0.08418536714911698, + "grad_norm": 0.7631381381409372, + "learning_rate": 1.676392572944297e-05, + "loss": 0.43602120876312256, + "step": 317 + }, + { + "epoch": 0.08445093613065995, + "grad_norm": 0.9092168864142193, + "learning_rate": 1.6816976127320957e-05, + "loss": 0.5110410451889038, + "step": 318 + }, + { + "epoch": 0.0847165051122029, + "grad_norm": 0.9902301773407237, + "learning_rate": 1.687002652519894e-05, + "loss": 0.4798283278942108, + "step": 319 + }, + { + "epoch": 0.08498207409374585, + "grad_norm": 0.8572923551208312, + "learning_rate": 1.6923076923076924e-05, + "loss": 0.45690029859542847, + "step": 320 + }, + { + "epoch": 0.0852476430752888, + "grad_norm": 0.8864718165003516, + "learning_rate": 1.6976127320954908e-05, + "loss": 0.4770117998123169, + "step": 321 + }, + { + "epoch": 0.08551321205683177, + "grad_norm": 0.888032985544436, + "learning_rate": 1.702917771883289e-05, + "loss": 0.512240469455719, + "step": 322 + }, + { + "epoch": 0.08577878103837472, + "grad_norm": 0.8665270088700595, + "learning_rate": 1.7082228116710878e-05, + "loss": 0.4696195423603058, + "step": 323 + }, + { + "epoch": 0.08604435001991767, + "grad_norm": 0.8876364903970222, + "learning_rate": 1.713527851458886e-05, + "loss": 0.4779578149318695, + "step": 324 + }, + { + "epoch": 0.08630991900146064, + "grad_norm": 0.9604080935445363, + "learning_rate": 1.7188328912466845e-05, + "loss": 0.48670440912246704, + "step": 325 + }, + { + "epoch": 0.08657548798300359, + "grad_norm": 0.9813156772782552, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.5285798907279968, + "step": 326 + }, + { + "epoch": 0.08684105696454654, + "grad_norm": 0.9264252564283505, + "learning_rate": 1.7294429708222812e-05, + "loss": 0.46095865964889526, + "step": 327 + }, + { + "epoch": 0.08710662594608949, + "grad_norm": 0.8953179311501671, + "learning_rate": 1.73474801061008e-05, + "loss": 0.44342565536499023, + "step": 328 + }, + { + "epoch": 0.08737219492763246, + "grad_norm": 0.9640917124230414, + "learning_rate": 1.7400530503978782e-05, + "loss": 0.48974257707595825, + "step": 329 + }, + { + "epoch": 0.08763776390917541, + "grad_norm": 1.3568266957703046, + "learning_rate": 1.7453580901856765e-05, + "loss": 0.4763977527618408, + "step": 330 + }, + { + "epoch": 0.08790333289071836, + "grad_norm": 1.0231360729141987, + "learning_rate": 1.750663129973475e-05, + "loss": 0.5390856266021729, + "step": 331 + }, + { + "epoch": 0.08816890187226133, + "grad_norm": 0.9254788253309115, + "learning_rate": 1.7559681697612732e-05, + "loss": 0.4833192825317383, + "step": 332 + }, + { + "epoch": 0.08843447085380428, + "grad_norm": 0.9106057248503829, + "learning_rate": 1.7612732095490716e-05, + "loss": 0.47842955589294434, + "step": 333 + }, + { + "epoch": 0.08870003983534723, + "grad_norm": 0.8653538374375338, + "learning_rate": 1.7665782493368703e-05, + "loss": 0.4543060064315796, + "step": 334 + }, + { + "epoch": 0.08896560881689018, + "grad_norm": 0.9024795887264612, + "learning_rate": 1.7718832891246686e-05, + "loss": 0.4492039978504181, + "step": 335 + }, + { + "epoch": 0.08923117779843315, + "grad_norm": 0.9660730803540603, + "learning_rate": 1.777188328912467e-05, + "loss": 0.4930066466331482, + "step": 336 + }, + { + "epoch": 0.0894967467799761, + "grad_norm": 0.9494811659806174, + "learning_rate": 1.7824933687002653e-05, + "loss": 0.46343356370925903, + "step": 337 + }, + { + "epoch": 0.08976231576151905, + "grad_norm": 0.98824099461907, + "learning_rate": 1.7877984084880636e-05, + "loss": 0.5118839740753174, + "step": 338 + }, + { + "epoch": 0.09002788474306202, + "grad_norm": 0.9759312233085756, + "learning_rate": 1.7931034482758623e-05, + "loss": 0.4659194350242615, + "step": 339 + }, + { + "epoch": 0.09029345372460497, + "grad_norm": 0.868792760549277, + "learning_rate": 1.7984084880636607e-05, + "loss": 0.45929303765296936, + "step": 340 + }, + { + "epoch": 0.09055902270614792, + "grad_norm": 0.9774857416777888, + "learning_rate": 1.803713527851459e-05, + "loss": 0.5072556734085083, + "step": 341 + }, + { + "epoch": 0.09082459168769087, + "grad_norm": 0.8722377179138728, + "learning_rate": 1.8090185676392577e-05, + "loss": 0.42370402812957764, + "step": 342 + }, + { + "epoch": 0.09109016066923384, + "grad_norm": 0.9404121189660462, + "learning_rate": 1.8143236074270557e-05, + "loss": 0.5017818212509155, + "step": 343 + }, + { + "epoch": 0.09135572965077679, + "grad_norm": 1.0279846493738434, + "learning_rate": 1.8196286472148544e-05, + "loss": 0.4746384620666504, + "step": 344 + }, + { + "epoch": 0.09162129863231974, + "grad_norm": 1.0016746569872437, + "learning_rate": 1.8249336870026527e-05, + "loss": 0.49020540714263916, + "step": 345 + }, + { + "epoch": 0.09188686761386271, + "grad_norm": 0.8521475505102624, + "learning_rate": 1.830238726790451e-05, + "loss": 0.4569393992424011, + "step": 346 + }, + { + "epoch": 0.09215243659540566, + "grad_norm": 0.9587089968564823, + "learning_rate": 1.8355437665782494e-05, + "loss": 0.46831727027893066, + "step": 347 + }, + { + "epoch": 0.09241800557694861, + "grad_norm": 0.909230845841239, + "learning_rate": 1.8408488063660478e-05, + "loss": 0.4795265197753906, + "step": 348 + }, + { + "epoch": 0.09268357455849156, + "grad_norm": 0.9641043081337674, + "learning_rate": 1.8461538461538465e-05, + "loss": 0.5122503042221069, + "step": 349 + }, + { + "epoch": 0.09294914354003453, + "grad_norm": 0.8617611974669258, + "learning_rate": 1.8514588859416448e-05, + "loss": 0.4190404713153839, + "step": 350 + }, + { + "epoch": 0.09321471252157748, + "grad_norm": 0.9061006884991066, + "learning_rate": 1.856763925729443e-05, + "loss": 0.47778886556625366, + "step": 351 + }, + { + "epoch": 0.09348028150312043, + "grad_norm": 0.9208451846579827, + "learning_rate": 1.8620689655172415e-05, + "loss": 0.45851507782936096, + "step": 352 + }, + { + "epoch": 0.09374585048466338, + "grad_norm": 1.0050481975496854, + "learning_rate": 1.86737400530504e-05, + "loss": 0.4888782501220703, + "step": 353 + }, + { + "epoch": 0.09401141946620635, + "grad_norm": 0.9454138173982718, + "learning_rate": 1.8726790450928382e-05, + "loss": 0.5032983422279358, + "step": 354 + }, + { + "epoch": 0.0942769884477493, + "grad_norm": 0.9130362696106749, + "learning_rate": 1.877984084880637e-05, + "loss": 0.4754604697227478, + "step": 355 + }, + { + "epoch": 0.09454255742929225, + "grad_norm": 0.9970889038933597, + "learning_rate": 1.8832891246684352e-05, + "loss": 0.488397479057312, + "step": 356 + }, + { + "epoch": 0.09480812641083522, + "grad_norm": 1.222649143916529, + "learning_rate": 1.8885941644562336e-05, + "loss": 0.4775403141975403, + "step": 357 + }, + { + "epoch": 0.09507369539237817, + "grad_norm": 0.9872263151320333, + "learning_rate": 1.893899204244032e-05, + "loss": 0.47063153982162476, + "step": 358 + }, + { + "epoch": 0.09533926437392112, + "grad_norm": 1.0222144168199743, + "learning_rate": 1.8992042440318303e-05, + "loss": 0.4856908321380615, + "step": 359 + }, + { + "epoch": 0.09560483335546408, + "grad_norm": 0.9195037496858368, + "learning_rate": 1.904509283819629e-05, + "loss": 0.440033495426178, + "step": 360 + }, + { + "epoch": 0.09587040233700704, + "grad_norm": 0.9961899484684762, + "learning_rate": 1.9098143236074273e-05, + "loss": 0.4825770854949951, + "step": 361 + }, + { + "epoch": 0.09613597131854999, + "grad_norm": 0.9443841189655576, + "learning_rate": 1.9151193633952256e-05, + "loss": 0.48192232847213745, + "step": 362 + }, + { + "epoch": 0.09640154030009294, + "grad_norm": 0.9065595450317342, + "learning_rate": 1.920424403183024e-05, + "loss": 0.4689444899559021, + "step": 363 + }, + { + "epoch": 0.09666710928163591, + "grad_norm": 0.9970961253516039, + "learning_rate": 1.9257294429708223e-05, + "loss": 0.47120895981788635, + "step": 364 + }, + { + "epoch": 0.09693267826317886, + "grad_norm": 1.0106028234477955, + "learning_rate": 1.931034482758621e-05, + "loss": 0.4968941807746887, + "step": 365 + }, + { + "epoch": 0.09719824724472181, + "grad_norm": 1.115125675989656, + "learning_rate": 1.9363395225464193e-05, + "loss": 0.46982288360595703, + "step": 366 + }, + { + "epoch": 0.09746381622626477, + "grad_norm": 0.9408972278578609, + "learning_rate": 1.9416445623342177e-05, + "loss": 0.4541531205177307, + "step": 367 + }, + { + "epoch": 0.09772938520780773, + "grad_norm": 0.9760564476186651, + "learning_rate": 1.946949602122016e-05, + "loss": 0.45576703548431396, + "step": 368 + }, + { + "epoch": 0.09799495418935068, + "grad_norm": 0.9893999168346334, + "learning_rate": 1.9522546419098144e-05, + "loss": 0.48060357570648193, + "step": 369 + }, + { + "epoch": 0.09826052317089363, + "grad_norm": 0.9675810264832774, + "learning_rate": 1.9575596816976127e-05, + "loss": 0.47536781430244446, + "step": 370 + }, + { + "epoch": 0.0985260921524366, + "grad_norm": 0.9516181191759193, + "learning_rate": 1.9628647214854114e-05, + "loss": 0.46463894844055176, + "step": 371 + }, + { + "epoch": 0.09879166113397955, + "grad_norm": 1.0082712913027811, + "learning_rate": 1.9681697612732098e-05, + "loss": 0.49570178985595703, + "step": 372 + }, + { + "epoch": 0.0990572301155225, + "grad_norm": 1.0327922438955468, + "learning_rate": 1.973474801061008e-05, + "loss": 0.4764043390750885, + "step": 373 + }, + { + "epoch": 0.09932279909706546, + "grad_norm": 0.9227866290107449, + "learning_rate": 1.9787798408488064e-05, + "loss": 0.43582671880722046, + "step": 374 + }, + { + "epoch": 0.09958836807860842, + "grad_norm": 0.9360238854832598, + "learning_rate": 1.9840848806366048e-05, + "loss": 0.46077725291252136, + "step": 375 + }, + { + "epoch": 0.09985393706015137, + "grad_norm": 0.9607682273492437, + "learning_rate": 1.9893899204244035e-05, + "loss": 0.4794929027557373, + "step": 376 + }, + { + "epoch": 0.10011950604169433, + "grad_norm": 0.9619848398175739, + "learning_rate": 1.9946949602122018e-05, + "loss": 0.43174588680267334, + "step": 377 + }, + { + "epoch": 0.10038507502323729, + "grad_norm": 0.90095462919728, + "learning_rate": 2e-05, + "loss": 0.44885915517807007, + "step": 378 + }, + { + "epoch": 0.10065064400478024, + "grad_norm": 1.0789787198205218, + "learning_rate": 1.9999999036058974e-05, + "loss": 0.520150899887085, + "step": 379 + }, + { + "epoch": 0.1009162129863232, + "grad_norm": 0.9699182604374589, + "learning_rate": 1.9999996144236068e-05, + "loss": 0.5139277577400208, + "step": 380 + }, + { + "epoch": 0.10118178196786615, + "grad_norm": 1.0077278580199993, + "learning_rate": 1.999999132453184e-05, + "loss": 0.48935171961784363, + "step": 381 + }, + { + "epoch": 0.10144735094940911, + "grad_norm": 0.9095465340361383, + "learning_rate": 1.999998457694723e-05, + "loss": 0.4805561304092407, + "step": 382 + }, + { + "epoch": 0.10171291993095206, + "grad_norm": 0.9209321398292457, + "learning_rate": 1.9999975901483532e-05, + "loss": 0.4340912997722626, + "step": 383 + }, + { + "epoch": 0.10197848891249502, + "grad_norm": 1.0414639039942946, + "learning_rate": 1.999996529814242e-05, + "loss": 0.48282474279403687, + "step": 384 + }, + { + "epoch": 0.10224405789403798, + "grad_norm": 0.9753320144694753, + "learning_rate": 1.999995276692593e-05, + "loss": 0.4653206169605255, + "step": 385 + }, + { + "epoch": 0.10250962687558093, + "grad_norm": 0.919281113033857, + "learning_rate": 1.999993830783649e-05, + "loss": 0.48501014709472656, + "step": 386 + }, + { + "epoch": 0.10277519585712389, + "grad_norm": 1.0711296444042975, + "learning_rate": 1.9999921920876882e-05, + "loss": 0.48260143399238586, + "step": 387 + }, + { + "epoch": 0.10304076483866684, + "grad_norm": 0.9590085896328235, + "learning_rate": 1.9999903606050267e-05, + "loss": 0.44557270407676697, + "step": 388 + }, + { + "epoch": 0.1033063338202098, + "grad_norm": 1.111282066618818, + "learning_rate": 1.9999883363360175e-05, + "loss": 0.4843652546405792, + "step": 389 + }, + { + "epoch": 0.10357190280175275, + "grad_norm": 0.9708048507544866, + "learning_rate": 1.9999861192810508e-05, + "loss": 0.4536727964878082, + "step": 390 + }, + { + "epoch": 0.1038374717832957, + "grad_norm": 1.0216212958759847, + "learning_rate": 1.9999837094405538e-05, + "loss": 0.49557366967201233, + "step": 391 + }, + { + "epoch": 0.10410304076483867, + "grad_norm": 1.0254795167373827, + "learning_rate": 1.9999811068149917e-05, + "loss": 0.45077240467071533, + "step": 392 + }, + { + "epoch": 0.10436860974638162, + "grad_norm": 0.9857255709196505, + "learning_rate": 1.9999783114048658e-05, + "loss": 0.4554041624069214, + "step": 393 + }, + { + "epoch": 0.10463417872792458, + "grad_norm": 0.8770920920154472, + "learning_rate": 1.999975323210715e-05, + "loss": 0.43526744842529297, + "step": 394 + }, + { + "epoch": 0.10489974770946753, + "grad_norm": 0.9824982196768539, + "learning_rate": 1.9999721422331154e-05, + "loss": 0.4097936749458313, + "step": 395 + }, + { + "epoch": 0.1051653166910105, + "grad_norm": 1.013432449022695, + "learning_rate": 1.9999687684726803e-05, + "loss": 0.4740130305290222, + "step": 396 + }, + { + "epoch": 0.10543088567255345, + "grad_norm": 0.9786752992542405, + "learning_rate": 1.9999652019300604e-05, + "loss": 0.43374374508857727, + "step": 397 + }, + { + "epoch": 0.1056964546540964, + "grad_norm": 0.9323415402935509, + "learning_rate": 1.999961442605943e-05, + "loss": 0.4423784911632538, + "step": 398 + }, + { + "epoch": 0.10596202363563936, + "grad_norm": 1.0497518439124596, + "learning_rate": 1.999957490501053e-05, + "loss": 0.4660544693470001, + "step": 399 + }, + { + "epoch": 0.10622759261718231, + "grad_norm": 1.11742327964835, + "learning_rate": 1.999953345616152e-05, + "loss": 0.4579896628856659, + "step": 400 + }, + { + "epoch": 0.10649316159872527, + "grad_norm": 1.0653029752390735, + "learning_rate": 1.9999490079520395e-05, + "loss": 0.4634096920490265, + "step": 401 + }, + { + "epoch": 0.10675873058026822, + "grad_norm": 0.9969566988589958, + "learning_rate": 1.9999444775095517e-05, + "loss": 0.45374077558517456, + "step": 402 + }, + { + "epoch": 0.10702429956181118, + "grad_norm": 1.1298291912896017, + "learning_rate": 1.9999397542895615e-05, + "loss": 0.49752670526504517, + "step": 403 + }, + { + "epoch": 0.10728986854335414, + "grad_norm": 1.049244919494092, + "learning_rate": 1.99993483829298e-05, + "loss": 0.4539335370063782, + "step": 404 + }, + { + "epoch": 0.10755543752489709, + "grad_norm": 1.0017841795942442, + "learning_rate": 1.999929729520755e-05, + "loss": 0.4665772616863251, + "step": 405 + }, + { + "epoch": 0.10782100650644005, + "grad_norm": 1.023688686658119, + "learning_rate": 1.9999244279738713e-05, + "loss": 0.4850832223892212, + "step": 406 + }, + { + "epoch": 0.108086575487983, + "grad_norm": 0.9960763191436038, + "learning_rate": 1.9999189336533508e-05, + "loss": 0.43974876403808594, + "step": 407 + }, + { + "epoch": 0.10835214446952596, + "grad_norm": 1.0378626233602128, + "learning_rate": 1.9999132465602526e-05, + "loss": 0.46823856234550476, + "step": 408 + }, + { + "epoch": 0.10861771345106891, + "grad_norm": 1.0461372802003532, + "learning_rate": 1.9999073666956734e-05, + "loss": 0.49704545736312866, + "step": 409 + }, + { + "epoch": 0.10888328243261187, + "grad_norm": 1.03380477635781, + "learning_rate": 1.999901294060747e-05, + "loss": 0.3863454759120941, + "step": 410 + }, + { + "epoch": 0.10914885141415483, + "grad_norm": 1.1280569204620268, + "learning_rate": 1.9998950286566438e-05, + "loss": 0.4903780221939087, + "step": 411 + }, + { + "epoch": 0.10941442039569778, + "grad_norm": 0.9546134462956446, + "learning_rate": 1.9998885704845716e-05, + "loss": 0.4312375485897064, + "step": 412 + }, + { + "epoch": 0.10967998937724074, + "grad_norm": 0.9382591225300354, + "learning_rate": 1.9998819195457756e-05, + "loss": 0.4350954294204712, + "step": 413 + }, + { + "epoch": 0.1099455583587837, + "grad_norm": 0.9201016144754837, + "learning_rate": 1.999875075841538e-05, + "loss": 0.4364873766899109, + "step": 414 + }, + { + "epoch": 0.11021112734032665, + "grad_norm": 0.9578414566062486, + "learning_rate": 1.999868039373178e-05, + "loss": 0.42079728841781616, + "step": 415 + }, + { + "epoch": 0.1104766963218696, + "grad_norm": 1.0011321946551845, + "learning_rate": 1.9998608101420527e-05, + "loss": 0.4396737515926361, + "step": 416 + }, + { + "epoch": 0.11074226530341257, + "grad_norm": 0.9922478693245596, + "learning_rate": 1.9998533881495552e-05, + "loss": 0.44765806198120117, + "step": 417 + }, + { + "epoch": 0.11100783428495552, + "grad_norm": 1.0219437952159112, + "learning_rate": 1.999845773397117e-05, + "loss": 0.46199291944503784, + "step": 418 + }, + { + "epoch": 0.11127340326649847, + "grad_norm": 0.9510961467421052, + "learning_rate": 1.9998379658862058e-05, + "loss": 0.44561129808425903, + "step": 419 + }, + { + "epoch": 0.11153897224804143, + "grad_norm": 1.0559368690309399, + "learning_rate": 1.9998299656183263e-05, + "loss": 0.46025681495666504, + "step": 420 + }, + { + "epoch": 0.11180454122958439, + "grad_norm": 0.9881679042322009, + "learning_rate": 1.999821772595022e-05, + "loss": 0.4408613443374634, + "step": 421 + }, + { + "epoch": 0.11207011021112734, + "grad_norm": 0.9620122842513851, + "learning_rate": 1.999813386817871e-05, + "loss": 0.4846842586994171, + "step": 422 + }, + { + "epoch": 0.11233567919267029, + "grad_norm": 0.9697081207450757, + "learning_rate": 1.999804808288491e-05, + "loss": 0.44503283500671387, + "step": 423 + }, + { + "epoch": 0.11260124817421326, + "grad_norm": 0.9687765160951803, + "learning_rate": 1.9997960370085355e-05, + "loss": 0.4090060293674469, + "step": 424 + }, + { + "epoch": 0.11286681715575621, + "grad_norm": 0.9575575943579401, + "learning_rate": 1.999787072979696e-05, + "loss": 0.43246471881866455, + "step": 425 + }, + { + "epoch": 0.11313238613729916, + "grad_norm": 1.001604978030575, + "learning_rate": 1.9997779162036996e-05, + "loss": 0.46283262968063354, + "step": 426 + }, + { + "epoch": 0.11339795511884213, + "grad_norm": 0.9108113962903395, + "learning_rate": 1.999768566682313e-05, + "loss": 0.3866165578365326, + "step": 427 + }, + { + "epoch": 0.11366352410038508, + "grad_norm": 0.9595506331685858, + "learning_rate": 1.9997590244173374e-05, + "loss": 0.4501144289970398, + "step": 428 + }, + { + "epoch": 0.11392909308192803, + "grad_norm": 0.9153639565172541, + "learning_rate": 1.9997492894106127e-05, + "loss": 0.43005290627479553, + "step": 429 + }, + { + "epoch": 0.11419466206347098, + "grad_norm": 0.9635360081712412, + "learning_rate": 1.9997393616640165e-05, + "loss": 0.4427964985370636, + "step": 430 + }, + { + "epoch": 0.11446023104501395, + "grad_norm": 1.0560533392763956, + "learning_rate": 1.999729241179462e-05, + "loss": 0.4690951108932495, + "step": 431 + }, + { + "epoch": 0.1147258000265569, + "grad_norm": 0.9559285214931015, + "learning_rate": 1.9997189279589003e-05, + "loss": 0.456949919462204, + "step": 432 + }, + { + "epoch": 0.11499136900809985, + "grad_norm": 0.9851459681291062, + "learning_rate": 1.99970842200432e-05, + "loss": 0.456052303314209, + "step": 433 + }, + { + "epoch": 0.11525693798964282, + "grad_norm": 0.9609923633405658, + "learning_rate": 1.9996977233177466e-05, + "loss": 0.43220120668411255, + "step": 434 + }, + { + "epoch": 0.11552250697118577, + "grad_norm": 0.9022181145862976, + "learning_rate": 1.9996868319012422e-05, + "loss": 0.4237494170665741, + "step": 435 + }, + { + "epoch": 0.11578807595272872, + "grad_norm": 1.1387519975876466, + "learning_rate": 1.9996757477569072e-05, + "loss": 0.4713878631591797, + "step": 436 + }, + { + "epoch": 0.11605364493427167, + "grad_norm": 1.026114633188765, + "learning_rate": 1.9996644708868776e-05, + "loss": 0.4561111330986023, + "step": 437 + }, + { + "epoch": 0.11631921391581464, + "grad_norm": 1.0425252904592188, + "learning_rate": 1.9996530012933285e-05, + "loss": 0.468253493309021, + "step": 438 + }, + { + "epoch": 0.11658478289735759, + "grad_norm": 0.9323050726416767, + "learning_rate": 1.9996413389784704e-05, + "loss": 0.4815019369125366, + "step": 439 + }, + { + "epoch": 0.11685035187890054, + "grad_norm": 0.9369313249225236, + "learning_rate": 1.9996294839445518e-05, + "loss": 0.4235987663269043, + "step": 440 + }, + { + "epoch": 0.1171159208604435, + "grad_norm": 0.9217309559918773, + "learning_rate": 1.999617436193858e-05, + "loss": 0.40562817454338074, + "step": 441 + }, + { + "epoch": 0.11738148984198646, + "grad_norm": 1.1384168500780398, + "learning_rate": 1.999605195728712e-05, + "loss": 0.424539715051651, + "step": 442 + }, + { + "epoch": 0.11764705882352941, + "grad_norm": 0.9616123874834243, + "learning_rate": 1.9995927625514736e-05, + "loss": 0.43677473068237305, + "step": 443 + }, + { + "epoch": 0.11791262780507236, + "grad_norm": 0.9761533315060044, + "learning_rate": 1.9995801366645396e-05, + "loss": 0.47325971722602844, + "step": 444 + }, + { + "epoch": 0.11817819678661533, + "grad_norm": 0.9447069768738408, + "learning_rate": 1.9995673180703443e-05, + "loss": 0.4206562638282776, + "step": 445 + }, + { + "epoch": 0.11844376576815828, + "grad_norm": 0.9743544240614231, + "learning_rate": 1.999554306771359e-05, + "loss": 0.4492834210395813, + "step": 446 + }, + { + "epoch": 0.11870933474970123, + "grad_norm": 1.0629000505790311, + "learning_rate": 1.9995411027700917e-05, + "loss": 0.4445284605026245, + "step": 447 + }, + { + "epoch": 0.1189749037312442, + "grad_norm": 0.9911650776890225, + "learning_rate": 1.9995277060690885e-05, + "loss": 0.4038352370262146, + "step": 448 + }, + { + "epoch": 0.11924047271278715, + "grad_norm": 0.9418518804089067, + "learning_rate": 1.9995141166709318e-05, + "loss": 0.4261324405670166, + "step": 449 + }, + { + "epoch": 0.1195060416943301, + "grad_norm": 1.067611227425969, + "learning_rate": 1.9995003345782416e-05, + "loss": 0.44187062978744507, + "step": 450 + }, + { + "epoch": 0.11977161067587305, + "grad_norm": 0.9191915914869351, + "learning_rate": 1.9994863597936752e-05, + "loss": 0.44672587513923645, + "step": 451 + }, + { + "epoch": 0.12003717965741602, + "grad_norm": 0.9882052007755191, + "learning_rate": 1.999472192319926e-05, + "loss": 0.44322314858436584, + "step": 452 + }, + { + "epoch": 0.12030274863895897, + "grad_norm": 0.9882289435866314, + "learning_rate": 1.9994578321597258e-05, + "loss": 0.4396611154079437, + "step": 453 + }, + { + "epoch": 0.12056831762050192, + "grad_norm": 0.9831868773412876, + "learning_rate": 1.9994432793158433e-05, + "loss": 0.4487733542919159, + "step": 454 + }, + { + "epoch": 0.12083388660204489, + "grad_norm": 0.9360753951175719, + "learning_rate": 1.999428533791084e-05, + "loss": 0.3969653248786926, + "step": 455 + }, + { + "epoch": 0.12109945558358784, + "grad_norm": 0.9662346637828156, + "learning_rate": 1.9994135955882906e-05, + "loss": 0.39312344789505005, + "step": 456 + }, + { + "epoch": 0.12136502456513079, + "grad_norm": 0.9019524086641805, + "learning_rate": 1.9993984647103425e-05, + "loss": 0.3979804217815399, + "step": 457 + }, + { + "epoch": 0.12163059354667374, + "grad_norm": 1.0970468981958466, + "learning_rate": 1.9993831411601573e-05, + "loss": 0.4430229365825653, + "step": 458 + }, + { + "epoch": 0.12189616252821671, + "grad_norm": 0.994492352252997, + "learning_rate": 1.9993676249406895e-05, + "loss": 0.4511718451976776, + "step": 459 + }, + { + "epoch": 0.12216173150975966, + "grad_norm": 1.091979336298699, + "learning_rate": 1.9993519160549298e-05, + "loss": 0.4686455726623535, + "step": 460 + }, + { + "epoch": 0.12242730049130261, + "grad_norm": 1.0158374042593608, + "learning_rate": 1.9993360145059073e-05, + "loss": 0.4501730501651764, + "step": 461 + }, + { + "epoch": 0.12269286947284558, + "grad_norm": 0.8530053413909426, + "learning_rate": 1.999319920296687e-05, + "loss": 0.40718767046928406, + "step": 462 + }, + { + "epoch": 0.12295843845438853, + "grad_norm": 1.1181007301257784, + "learning_rate": 1.9993036334303716e-05, + "loss": 0.47313761711120605, + "step": 463 + }, + { + "epoch": 0.12322400743593148, + "grad_norm": 0.9710975932515886, + "learning_rate": 1.9992871539101018e-05, + "loss": 0.47417378425598145, + "step": 464 + }, + { + "epoch": 0.12348957641747443, + "grad_norm": 0.9297582414898758, + "learning_rate": 1.999270481739054e-05, + "loss": 0.44206154346466064, + "step": 465 + }, + { + "epoch": 0.1237551453990174, + "grad_norm": 0.8745553533375581, + "learning_rate": 1.9992536169204427e-05, + "loss": 0.3800848722457886, + "step": 466 + }, + { + "epoch": 0.12402071438056035, + "grad_norm": 0.9337162704530373, + "learning_rate": 1.9992365594575194e-05, + "loss": 0.40339407324790955, + "step": 467 + }, + { + "epoch": 0.1242862833621033, + "grad_norm": 0.945328490567385, + "learning_rate": 1.999219309353572e-05, + "loss": 0.45280492305755615, + "step": 468 + }, + { + "epoch": 0.12455185234364627, + "grad_norm": 1.0911195899085697, + "learning_rate": 1.9992018666119266e-05, + "loss": 0.4600910544395447, + "step": 469 + }, + { + "epoch": 0.12481742132518922, + "grad_norm": 0.9649890056306747, + "learning_rate": 1.9991842312359458e-05, + "loss": 0.4475003480911255, + "step": 470 + }, + { + "epoch": 0.12508299030673217, + "grad_norm": 1.0493048741226816, + "learning_rate": 1.9991664032290297e-05, + "loss": 0.45377033948898315, + "step": 471 + }, + { + "epoch": 0.12534855928827512, + "grad_norm": 0.9964208438270044, + "learning_rate": 1.9991483825946147e-05, + "loss": 0.4397522509098053, + "step": 472 + }, + { + "epoch": 0.12561412826981808, + "grad_norm": 0.9309535511597795, + "learning_rate": 1.9991301693361756e-05, + "loss": 0.4258221387863159, + "step": 473 + }, + { + "epoch": 0.12587969725136103, + "grad_norm": 0.9120842027423138, + "learning_rate": 1.9991117634572234e-05, + "loss": 0.40272068977355957, + "step": 474 + }, + { + "epoch": 0.126145266232904, + "grad_norm": 0.8761120829975514, + "learning_rate": 1.9990931649613067e-05, + "loss": 0.3721206784248352, + "step": 475 + }, + { + "epoch": 0.12641083521444696, + "grad_norm": 0.9997105907953329, + "learning_rate": 1.9990743738520115e-05, + "loss": 0.4530203938484192, + "step": 476 + }, + { + "epoch": 0.1266764041959899, + "grad_norm": 0.999446109489731, + "learning_rate": 1.999055390132959e-05, + "loss": 0.4281614422798157, + "step": 477 + }, + { + "epoch": 0.12694197317753286, + "grad_norm": 1.3617327829527315, + "learning_rate": 1.999036213807811e-05, + "loss": 0.41965895891189575, + "step": 478 + }, + { + "epoch": 0.12720754215907581, + "grad_norm": 0.9525189428273744, + "learning_rate": 1.9990168448802633e-05, + "loss": 0.40055203437805176, + "step": 479 + }, + { + "epoch": 0.12747311114061877, + "grad_norm": 1.0868137290392272, + "learning_rate": 1.99899728335405e-05, + "loss": 0.4266522526741028, + "step": 480 + }, + { + "epoch": 0.12773868012216172, + "grad_norm": 1.028316280940819, + "learning_rate": 1.9989775292329425e-05, + "loss": 0.42291250824928284, + "step": 481 + }, + { + "epoch": 0.1280042491037047, + "grad_norm": 1.0319881226067493, + "learning_rate": 1.9989575825207494e-05, + "loss": 0.41346436738967896, + "step": 482 + }, + { + "epoch": 0.12826981808524765, + "grad_norm": 1.0162482863207583, + "learning_rate": 1.998937443221316e-05, + "loss": 0.4092825651168823, + "step": 483 + }, + { + "epoch": 0.1285353870667906, + "grad_norm": 0.9789070022917183, + "learning_rate": 1.998917111338525e-05, + "loss": 0.39763280749320984, + "step": 484 + }, + { + "epoch": 0.12880095604833355, + "grad_norm": 1.1639998102533433, + "learning_rate": 1.9988965868762956e-05, + "loss": 0.45523273944854736, + "step": 485 + }, + { + "epoch": 0.1290665250298765, + "grad_norm": 0.9737102573843942, + "learning_rate": 1.9988758698385854e-05, + "loss": 0.40181300044059753, + "step": 486 + }, + { + "epoch": 0.12933209401141946, + "grad_norm": 1.0269411713354706, + "learning_rate": 1.9988549602293884e-05, + "loss": 0.42487743496894836, + "step": 487 + }, + { + "epoch": 0.1295976629929624, + "grad_norm": 0.9805378587174307, + "learning_rate": 1.998833858052735e-05, + "loss": 0.41672298312187195, + "step": 488 + }, + { + "epoch": 0.1298632319745054, + "grad_norm": 0.9804335652831319, + "learning_rate": 1.998812563312694e-05, + "loss": 0.36750108003616333, + "step": 489 + }, + { + "epoch": 0.13012880095604834, + "grad_norm": 1.0991024476796578, + "learning_rate": 1.9987910760133712e-05, + "loss": 0.49290573596954346, + "step": 490 + }, + { + "epoch": 0.1303943699375913, + "grad_norm": 0.9956647709409898, + "learning_rate": 1.9987693961589084e-05, + "loss": 0.460039347410202, + "step": 491 + }, + { + "epoch": 0.13065993891913424, + "grad_norm": 1.269757897267166, + "learning_rate": 1.998747523753485e-05, + "loss": 0.4471668303012848, + "step": 492 + }, + { + "epoch": 0.1309255079006772, + "grad_norm": 0.9411513149719377, + "learning_rate": 1.9987254588013184e-05, + "loss": 0.395844966173172, + "step": 493 + }, + { + "epoch": 0.13119107688222015, + "grad_norm": 0.9546844808839872, + "learning_rate": 1.9987032013066623e-05, + "loss": 0.4465745985507965, + "step": 494 + }, + { + "epoch": 0.1314566458637631, + "grad_norm": 1.0929917252775374, + "learning_rate": 1.9986807512738075e-05, + "loss": 0.43123912811279297, + "step": 495 + }, + { + "epoch": 0.13172221484530608, + "grad_norm": 0.9741124155963404, + "learning_rate": 1.9986581087070824e-05, + "loss": 0.40066564083099365, + "step": 496 + }, + { + "epoch": 0.13198778382684903, + "grad_norm": 0.9421948045046618, + "learning_rate": 1.9986352736108515e-05, + "loss": 0.38514643907546997, + "step": 497 + }, + { + "epoch": 0.13225335280839198, + "grad_norm": 0.9713567699891517, + "learning_rate": 1.9986122459895182e-05, + "loss": 0.37397241592407227, + "step": 498 + }, + { + "epoch": 0.13251892178993493, + "grad_norm": 0.9697777712481016, + "learning_rate": 1.9985890258475215e-05, + "loss": 0.44865745306015015, + "step": 499 + }, + { + "epoch": 0.1327844907714779, + "grad_norm": 1.000823551239605, + "learning_rate": 1.9985656131893374e-05, + "loss": 0.4161406457424164, + "step": 500 + }, + { + "epoch": 0.13305005975302084, + "grad_norm": 1.049045844462056, + "learning_rate": 1.9985420080194804e-05, + "loss": 0.41364359855651855, + "step": 501 + }, + { + "epoch": 0.1333156287345638, + "grad_norm": 0.9766347522178017, + "learning_rate": 1.9985182103425007e-05, + "loss": 0.38466009497642517, + "step": 502 + }, + { + "epoch": 0.13358119771610677, + "grad_norm": 0.9820108788569575, + "learning_rate": 1.9984942201629868e-05, + "loss": 0.4189472794532776, + "step": 503 + }, + { + "epoch": 0.13384676669764972, + "grad_norm": 1.0124943582595707, + "learning_rate": 1.998470037485563e-05, + "loss": 0.4088754653930664, + "step": 504 + }, + { + "epoch": 0.13411233567919267, + "grad_norm": 0.9404621165531668, + "learning_rate": 1.9984456623148923e-05, + "loss": 0.4197084307670593, + "step": 505 + }, + { + "epoch": 0.13437790466073563, + "grad_norm": 1.022677047132229, + "learning_rate": 1.998421094655673e-05, + "loss": 0.4318644404411316, + "step": 506 + }, + { + "epoch": 0.13464347364227858, + "grad_norm": 0.9443470782499029, + "learning_rate": 1.9983963345126423e-05, + "loss": 0.38180238008499146, + "step": 507 + }, + { + "epoch": 0.13490904262382153, + "grad_norm": 0.9655473739081939, + "learning_rate": 1.9983713818905733e-05, + "loss": 0.38704103231430054, + "step": 508 + }, + { + "epoch": 0.13517461160536448, + "grad_norm": 1.050357567916831, + "learning_rate": 1.998346236794276e-05, + "loss": 0.4206693768501282, + "step": 509 + }, + { + "epoch": 0.13544018058690746, + "grad_norm": 1.1108901361228778, + "learning_rate": 1.9983208992285993e-05, + "loss": 0.42818987369537354, + "step": 510 + }, + { + "epoch": 0.1357057495684504, + "grad_norm": 1.0771548955106338, + "learning_rate": 1.9982953691984274e-05, + "loss": 0.44592660665512085, + "step": 511 + }, + { + "epoch": 0.13597131854999336, + "grad_norm": 1.006125968429414, + "learning_rate": 1.9982696467086815e-05, + "loss": 0.4272580146789551, + "step": 512 + }, + { + "epoch": 0.13623688753153632, + "grad_norm": 1.084212872761102, + "learning_rate": 1.9982437317643218e-05, + "loss": 0.4416295289993286, + "step": 513 + }, + { + "epoch": 0.13650245651307927, + "grad_norm": 1.1040865905907058, + "learning_rate": 1.998217624370343e-05, + "loss": 0.45108669996261597, + "step": 514 + }, + { + "epoch": 0.13676802549462222, + "grad_norm": 0.9866796372680723, + "learning_rate": 1.9981913245317802e-05, + "loss": 0.40311864018440247, + "step": 515 + }, + { + "epoch": 0.13703359447616517, + "grad_norm": 1.041531014011416, + "learning_rate": 1.9981648322537017e-05, + "loss": 0.4388020932674408, + "step": 516 + }, + { + "epoch": 0.13729916345770815, + "grad_norm": 1.069295153220874, + "learning_rate": 1.9981381475412162e-05, + "loss": 0.42741361260414124, + "step": 517 + }, + { + "epoch": 0.1375647324392511, + "grad_norm": 0.8562984414004653, + "learning_rate": 1.9981112703994677e-05, + "loss": 0.3766555190086365, + "step": 518 + }, + { + "epoch": 0.13783030142079405, + "grad_norm": 0.9297024970383198, + "learning_rate": 1.998084200833638e-05, + "loss": 0.38618308305740356, + "step": 519 + }, + { + "epoch": 0.138095870402337, + "grad_norm": 1.0033450202172107, + "learning_rate": 1.9980569388489457e-05, + "loss": 0.4553264379501343, + "step": 520 + }, + { + "epoch": 0.13836143938387996, + "grad_norm": 1.024202819723292, + "learning_rate": 1.9980294844506468e-05, + "loss": 0.44632673263549805, + "step": 521 + }, + { + "epoch": 0.1386270083654229, + "grad_norm": 1.0907023510727254, + "learning_rate": 1.998001837644033e-05, + "loss": 0.4285067617893219, + "step": 522 + }, + { + "epoch": 0.13889257734696586, + "grad_norm": 0.9721672428790065, + "learning_rate": 1.9979739984344365e-05, + "loss": 0.39360538125038147, + "step": 523 + }, + { + "epoch": 0.13915814632850884, + "grad_norm": 0.9475835393492287, + "learning_rate": 1.9979459668272226e-05, + "loss": 0.4007593095302582, + "step": 524 + }, + { + "epoch": 0.1394237153100518, + "grad_norm": 1.028990364637073, + "learning_rate": 1.9979177428277955e-05, + "loss": 0.40176767110824585, + "step": 525 + }, + { + "epoch": 0.13968928429159475, + "grad_norm": 1.0167293750004343, + "learning_rate": 1.9978893264415978e-05, + "loss": 0.4190528392791748, + "step": 526 + }, + { + "epoch": 0.1399548532731377, + "grad_norm": 0.9871913820335487, + "learning_rate": 1.9978607176741063e-05, + "loss": 0.4139288067817688, + "step": 527 + }, + { + "epoch": 0.14022042225468065, + "grad_norm": 0.8610694360554231, + "learning_rate": 1.9978319165308373e-05, + "loss": 0.3666151463985443, + "step": 528 + }, + { + "epoch": 0.1404859912362236, + "grad_norm": 1.016794526359022, + "learning_rate": 1.997802923017343e-05, + "loss": 0.44621142745018005, + "step": 529 + }, + { + "epoch": 0.14075156021776655, + "grad_norm": 0.9742602007181285, + "learning_rate": 1.9977737371392134e-05, + "loss": 0.4162977635860443, + "step": 530 + }, + { + "epoch": 0.14101712919930953, + "grad_norm": 1.0386051117102446, + "learning_rate": 1.997744358902075e-05, + "loss": 0.438882052898407, + "step": 531 + }, + { + "epoch": 0.14128269818085248, + "grad_norm": 0.9131334625730753, + "learning_rate": 1.997714788311591e-05, + "loss": 0.43381333351135254, + "step": 532 + }, + { + "epoch": 0.14154826716239544, + "grad_norm": 1.0341262373297713, + "learning_rate": 1.9976850253734633e-05, + "loss": 0.41925039887428284, + "step": 533 + }, + { + "epoch": 0.1418138361439384, + "grad_norm": 1.0366031704059997, + "learning_rate": 1.997655070093429e-05, + "loss": 0.40469998121261597, + "step": 534 + }, + { + "epoch": 0.14207940512548134, + "grad_norm": 1.069653848503876, + "learning_rate": 1.9976249224772638e-05, + "loss": 0.4252749979496002, + "step": 535 + }, + { + "epoch": 0.1423449741070243, + "grad_norm": 0.9131599330211423, + "learning_rate": 1.9975945825307788e-05, + "loss": 0.42437341809272766, + "step": 536 + }, + { + "epoch": 0.14261054308856724, + "grad_norm": 0.9295944144104017, + "learning_rate": 1.9975640502598243e-05, + "loss": 0.3435184955596924, + "step": 537 + }, + { + "epoch": 0.14287611207011022, + "grad_norm": 1.135805935036872, + "learning_rate": 1.9975333256702864e-05, + "loss": 0.4677535593509674, + "step": 538 + }, + { + "epoch": 0.14314168105165317, + "grad_norm": 0.9857610455714647, + "learning_rate": 1.9975024087680873e-05, + "loss": 0.3860551118850708, + "step": 539 + }, + { + "epoch": 0.14340725003319613, + "grad_norm": 1.0260051612127887, + "learning_rate": 1.9974712995591887e-05, + "loss": 0.4067271649837494, + "step": 540 + }, + { + "epoch": 0.14367281901473908, + "grad_norm": 1.0673102525592195, + "learning_rate": 1.9974399980495877e-05, + "loss": 0.42236536741256714, + "step": 541 + }, + { + "epoch": 0.14393838799628203, + "grad_norm": 0.9825710114440017, + "learning_rate": 1.9974085042453188e-05, + "loss": 0.45230624079704285, + "step": 542 + }, + { + "epoch": 0.14420395697782498, + "grad_norm": 1.0223761508252163, + "learning_rate": 1.997376818152453e-05, + "loss": 0.428194522857666, + "step": 543 + }, + { + "epoch": 0.14446952595936793, + "grad_norm": 1.0337438279048081, + "learning_rate": 1.9973449397771004e-05, + "loss": 0.40774789452552795, + "step": 544 + }, + { + "epoch": 0.1447350949409109, + "grad_norm": 0.9168779980285519, + "learning_rate": 1.9973128691254054e-05, + "loss": 0.4086815118789673, + "step": 545 + }, + { + "epoch": 0.14500066392245387, + "grad_norm": 0.9934439062572693, + "learning_rate": 1.997280606203552e-05, + "loss": 0.4045162796974182, + "step": 546 + }, + { + "epoch": 0.14526623290399682, + "grad_norm": 1.0110955437735047, + "learning_rate": 1.9972481510177594e-05, + "loss": 0.40463268756866455, + "step": 547 + }, + { + "epoch": 0.14553180188553977, + "grad_norm": 1.0029896014566093, + "learning_rate": 1.9972155035742847e-05, + "loss": 0.46733587980270386, + "step": 548 + }, + { + "epoch": 0.14579737086708272, + "grad_norm": 0.9683751197048177, + "learning_rate": 1.997182663879422e-05, + "loss": 0.45210930705070496, + "step": 549 + }, + { + "epoch": 0.14606293984862567, + "grad_norm": 0.9559484778346481, + "learning_rate": 1.9971496319395022e-05, + "loss": 0.39798587560653687, + "step": 550 + }, + { + "epoch": 0.14632850883016862, + "grad_norm": 1.0582410708312875, + "learning_rate": 1.9971164077608937e-05, + "loss": 0.4166080057621002, + "step": 551 + }, + { + "epoch": 0.1465940778117116, + "grad_norm": 0.99705391441119, + "learning_rate": 1.9970829913500017e-05, + "loss": 0.3995435833930969, + "step": 552 + }, + { + "epoch": 0.14685964679325456, + "grad_norm": 0.9693599664680953, + "learning_rate": 1.9970493827132686e-05, + "loss": 0.39335039258003235, + "step": 553 + }, + { + "epoch": 0.1471252157747975, + "grad_norm": 1.0653128556742777, + "learning_rate": 1.9970155818571733e-05, + "loss": 0.3923008441925049, + "step": 554 + }, + { + "epoch": 0.14739078475634046, + "grad_norm": 1.1000528384874784, + "learning_rate": 1.996981588788233e-05, + "loss": 0.42148759961128235, + "step": 555 + }, + { + "epoch": 0.1476563537378834, + "grad_norm": 0.9532704289154984, + "learning_rate": 1.9969474035130005e-05, + "loss": 0.36099517345428467, + "step": 556 + }, + { + "epoch": 0.14792192271942636, + "grad_norm": 0.9498609858415961, + "learning_rate": 1.9969130260380663e-05, + "loss": 0.39650559425354004, + "step": 557 + }, + { + "epoch": 0.14818749170096931, + "grad_norm": 0.9667452630427784, + "learning_rate": 1.9968784563700586e-05, + "loss": 0.36410078406333923, + "step": 558 + }, + { + "epoch": 0.1484530606825123, + "grad_norm": 1.002419821858965, + "learning_rate": 1.996843694515641e-05, + "loss": 0.41312888264656067, + "step": 559 + }, + { + "epoch": 0.14871862966405525, + "grad_norm": 1.1088153047335336, + "learning_rate": 1.9968087404815162e-05, + "loss": 0.3895263373851776, + "step": 560 + }, + { + "epoch": 0.1489841986455982, + "grad_norm": 1.2422388501205763, + "learning_rate": 1.9967735942744226e-05, + "loss": 0.4400597810745239, + "step": 561 + }, + { + "epoch": 0.14924976762714115, + "grad_norm": 1.1300700300497077, + "learning_rate": 1.9967382559011356e-05, + "loss": 0.36712852120399475, + "step": 562 + }, + { + "epoch": 0.1495153366086841, + "grad_norm": 1.0425502358891738, + "learning_rate": 1.9967027253684685e-05, + "loss": 0.4043564200401306, + "step": 563 + }, + { + "epoch": 0.14978090559022705, + "grad_norm": 1.101160625764444, + "learning_rate": 1.9966670026832707e-05, + "loss": 0.45233044028282166, + "step": 564 + }, + { + "epoch": 0.15004647457177, + "grad_norm": 1.3277254520379258, + "learning_rate": 1.9966310878524297e-05, + "loss": 0.441600501537323, + "step": 565 + }, + { + "epoch": 0.15031204355331299, + "grad_norm": 1.0833095900878238, + "learning_rate": 1.9965949808828687e-05, + "loss": 0.4268038868904114, + "step": 566 + }, + { + "epoch": 0.15057761253485594, + "grad_norm": 1.1492448156590855, + "learning_rate": 1.9965586817815494e-05, + "loss": 0.41927874088287354, + "step": 567 + }, + { + "epoch": 0.1508431815163989, + "grad_norm": 1.026170307581087, + "learning_rate": 1.9965221905554695e-05, + "loss": 0.41488781571388245, + "step": 568 + }, + { + "epoch": 0.15110875049794184, + "grad_norm": 0.9559142330236491, + "learning_rate": 1.9964855072116642e-05, + "loss": 0.3624749779701233, + "step": 569 + }, + { + "epoch": 0.1513743194794848, + "grad_norm": 1.254830306735622, + "learning_rate": 1.996448631757206e-05, + "loss": 0.45119866728782654, + "step": 570 + }, + { + "epoch": 0.15163988846102774, + "grad_norm": 1.095837461898702, + "learning_rate": 1.996411564199203e-05, + "loss": 0.41389739513397217, + "step": 571 + }, + { + "epoch": 0.1519054574425707, + "grad_norm": 0.9684460814064966, + "learning_rate": 1.996374304544802e-05, + "loss": 0.3640916347503662, + "step": 572 + }, + { + "epoch": 0.15217102642411368, + "grad_norm": 1.0711015344753547, + "learning_rate": 1.9963368528011867e-05, + "loss": 0.45648565888404846, + "step": 573 + }, + { + "epoch": 0.15243659540565663, + "grad_norm": 0.9722794055909949, + "learning_rate": 1.9962992089755765e-05, + "loss": 0.4335980713367462, + "step": 574 + }, + { + "epoch": 0.15270216438719958, + "grad_norm": 1.158400874054287, + "learning_rate": 1.996261373075229e-05, + "loss": 0.3908158540725708, + "step": 575 + }, + { + "epoch": 0.15296773336874253, + "grad_norm": 0.9311953954584888, + "learning_rate": 1.996223345107439e-05, + "loss": 0.36533305048942566, + "step": 576 + }, + { + "epoch": 0.15323330235028548, + "grad_norm": 0.9771467412652409, + "learning_rate": 1.9961851250795372e-05, + "loss": 0.407212495803833, + "step": 577 + }, + { + "epoch": 0.15349887133182843, + "grad_norm": 0.9988499065644934, + "learning_rate": 1.996146712998892e-05, + "loss": 0.4266315698623657, + "step": 578 + }, + { + "epoch": 0.1537644403133714, + "grad_norm": 0.9843108485081927, + "learning_rate": 1.9961081088729092e-05, + "loss": 0.3806581199169159, + "step": 579 + }, + { + "epoch": 0.15403000929491437, + "grad_norm": 0.9497423806639163, + "learning_rate": 1.9960693127090312e-05, + "loss": 0.40962716937065125, + "step": 580 + }, + { + "epoch": 0.15429557827645732, + "grad_norm": 0.94680923059909, + "learning_rate": 1.996030324514737e-05, + "loss": 0.4195394515991211, + "step": 581 + }, + { + "epoch": 0.15456114725800027, + "grad_norm": 1.0211843119224446, + "learning_rate": 1.995991144297543e-05, + "loss": 0.4366803765296936, + "step": 582 + }, + { + "epoch": 0.15482671623954322, + "grad_norm": 1.1779341722116263, + "learning_rate": 1.995951772065004e-05, + "loss": 0.44951680302619934, + "step": 583 + }, + { + "epoch": 0.15509228522108617, + "grad_norm": 1.1165714790353467, + "learning_rate": 1.9959122078247088e-05, + "loss": 0.42920851707458496, + "step": 584 + }, + { + "epoch": 0.15535785420262913, + "grad_norm": 1.3260467831670406, + "learning_rate": 1.9958724515842856e-05, + "loss": 0.3805098533630371, + "step": 585 + }, + { + "epoch": 0.15562342318417208, + "grad_norm": 1.1544212798945541, + "learning_rate": 1.995832503351399e-05, + "loss": 0.439333438873291, + "step": 586 + }, + { + "epoch": 0.15588899216571506, + "grad_norm": 0.9414235863159184, + "learning_rate": 1.9957923631337505e-05, + "loss": 0.38338547945022583, + "step": 587 + }, + { + "epoch": 0.156154561147258, + "grad_norm": 0.9711288321476074, + "learning_rate": 1.9957520309390786e-05, + "loss": 0.40603697299957275, + "step": 588 + }, + { + "epoch": 0.15642013012880096, + "grad_norm": 0.9468286962292546, + "learning_rate": 1.9957115067751594e-05, + "loss": 0.42816999554634094, + "step": 589 + }, + { + "epoch": 0.1566856991103439, + "grad_norm": 0.979497417166178, + "learning_rate": 1.9956707906498046e-05, + "loss": 0.42367884516716003, + "step": 590 + }, + { + "epoch": 0.15695126809188686, + "grad_norm": 1.1158588594509518, + "learning_rate": 1.995629882570864e-05, + "loss": 0.4349297881126404, + "step": 591 + }, + { + "epoch": 0.15721683707342982, + "grad_norm": 0.9762108745852242, + "learning_rate": 1.995588782546225e-05, + "loss": 0.37990960478782654, + "step": 592 + }, + { + "epoch": 0.15748240605497277, + "grad_norm": 0.9495653219493333, + "learning_rate": 1.9955474905838102e-05, + "loss": 0.4085468649864197, + "step": 593 + }, + { + "epoch": 0.15774797503651575, + "grad_norm": 0.9419429879365407, + "learning_rate": 1.995506006691581e-05, + "loss": 0.41362464427948, + "step": 594 + }, + { + "epoch": 0.1580135440180587, + "grad_norm": 1.002559702640921, + "learning_rate": 1.9954643308775342e-05, + "loss": 0.3830018937587738, + "step": 595 + }, + { + "epoch": 0.15827911299960165, + "grad_norm": 1.1505182326275074, + "learning_rate": 1.995422463149705e-05, + "loss": 0.48350822925567627, + "step": 596 + }, + { + "epoch": 0.1585446819811446, + "grad_norm": 0.9889824166630486, + "learning_rate": 1.995380403516165e-05, + "loss": 0.4215185344219208, + "step": 597 + }, + { + "epoch": 0.15881025096268755, + "grad_norm": 1.06826056700577, + "learning_rate": 1.9953381519850224e-05, + "loss": 0.42061948776245117, + "step": 598 + }, + { + "epoch": 0.1590758199442305, + "grad_norm": 1.032451381790901, + "learning_rate": 1.995295708564423e-05, + "loss": 0.38956254720687866, + "step": 599 + }, + { + "epoch": 0.15934138892577346, + "grad_norm": 1.0492553607775368, + "learning_rate": 1.9952530732625492e-05, + "loss": 0.3864685893058777, + "step": 600 + }, + { + "epoch": 0.15960695790731644, + "grad_norm": 0.9770856461072062, + "learning_rate": 1.9952102460876214e-05, + "loss": 0.395724356174469, + "step": 601 + }, + { + "epoch": 0.1598725268888594, + "grad_norm": 1.04245602393598, + "learning_rate": 1.995167227047895e-05, + "loss": 0.4220300316810608, + "step": 602 + }, + { + "epoch": 0.16013809587040234, + "grad_norm": 1.1406615370546667, + "learning_rate": 1.9951240161516643e-05, + "loss": 0.4129142165184021, + "step": 603 + }, + { + "epoch": 0.1604036648519453, + "grad_norm": 0.983753356740355, + "learning_rate": 1.9950806134072595e-05, + "loss": 0.3951375484466553, + "step": 604 + }, + { + "epoch": 0.16066923383348825, + "grad_norm": 1.0214548083454909, + "learning_rate": 1.9950370188230486e-05, + "loss": 0.4117582142353058, + "step": 605 + }, + { + "epoch": 0.1609348028150312, + "grad_norm": 1.0340746201961049, + "learning_rate": 1.994993232407436e-05, + "loss": 0.3920668363571167, + "step": 606 + }, + { + "epoch": 0.16120037179657415, + "grad_norm": 0.9768399206450091, + "learning_rate": 1.9949492541688626e-05, + "loss": 0.3756999373435974, + "step": 607 + }, + { + "epoch": 0.16146594077811713, + "grad_norm": 1.0034054922110034, + "learning_rate": 1.9949050841158078e-05, + "loss": 0.41009610891342163, + "step": 608 + }, + { + "epoch": 0.16173150975966008, + "grad_norm": 0.9847346075479474, + "learning_rate": 1.994860722256786e-05, + "loss": 0.3986571729183197, + "step": 609 + }, + { + "epoch": 0.16199707874120303, + "grad_norm": 0.9978440495541314, + "learning_rate": 1.994816168600351e-05, + "loss": 0.3903341591358185, + "step": 610 + }, + { + "epoch": 0.16226264772274598, + "grad_norm": 0.9992231775305654, + "learning_rate": 1.994771423155091e-05, + "loss": 0.39725261926651, + "step": 611 + }, + { + "epoch": 0.16252821670428894, + "grad_norm": 0.9446936558476315, + "learning_rate": 1.994726485929633e-05, + "loss": 0.39461129903793335, + "step": 612 + }, + { + "epoch": 0.1627937856858319, + "grad_norm": 1.0162077284831286, + "learning_rate": 1.99468135693264e-05, + "loss": 0.41346144676208496, + "step": 613 + }, + { + "epoch": 0.16305935466737484, + "grad_norm": 1.0305116850266922, + "learning_rate": 1.9946360361728127e-05, + "loss": 0.41148197650909424, + "step": 614 + }, + { + "epoch": 0.16332492364891782, + "grad_norm": 0.9678436330540818, + "learning_rate": 1.9945905236588884e-05, + "loss": 0.38204139471054077, + "step": 615 + }, + { + "epoch": 0.16359049263046077, + "grad_norm": 0.9830320911733957, + "learning_rate": 1.9945448193996412e-05, + "loss": 0.41496896743774414, + "step": 616 + }, + { + "epoch": 0.16385606161200372, + "grad_norm": 0.9327494941136337, + "learning_rate": 1.994498923403882e-05, + "loss": 0.38998982310295105, + "step": 617 + }, + { + "epoch": 0.16412163059354667, + "grad_norm": 1.0310759290486786, + "learning_rate": 1.99445283568046e-05, + "loss": 0.39018991589546204, + "step": 618 + }, + { + "epoch": 0.16438719957508963, + "grad_norm": 1.1133251353738367, + "learning_rate": 1.9944065562382594e-05, + "loss": 0.41579991579055786, + "step": 619 + }, + { + "epoch": 0.16465276855663258, + "grad_norm": 1.1413714641323347, + "learning_rate": 1.9943600850862027e-05, + "loss": 0.426283061504364, + "step": 620 + }, + { + "epoch": 0.16491833753817553, + "grad_norm": 1.0537239280428552, + "learning_rate": 1.9943134222332493e-05, + "loss": 0.418672651052475, + "step": 621 + }, + { + "epoch": 0.1651839065197185, + "grad_norm": 1.0177048245128393, + "learning_rate": 1.9942665676883946e-05, + "loss": 0.4014776349067688, + "step": 622 + }, + { + "epoch": 0.16544947550126146, + "grad_norm": 0.9703989792649265, + "learning_rate": 1.994219521460672e-05, + "loss": 0.3714776933193207, + "step": 623 + }, + { + "epoch": 0.1657150444828044, + "grad_norm": 1.005321267739283, + "learning_rate": 1.9941722835591514e-05, + "loss": 0.39415785670280457, + "step": 624 + }, + { + "epoch": 0.16598061346434737, + "grad_norm": 1.739817458909074, + "learning_rate": 1.9941248539929395e-05, + "loss": 0.3706223964691162, + "step": 625 + }, + { + "epoch": 0.16624618244589032, + "grad_norm": 0.9887487099192142, + "learning_rate": 1.9940772327711807e-05, + "loss": 0.4167429208755493, + "step": 626 + }, + { + "epoch": 0.16651175142743327, + "grad_norm": 1.0502993213264278, + "learning_rate": 1.9940294199030553e-05, + "loss": 0.38234227895736694, + "step": 627 + }, + { + "epoch": 0.16677732040897622, + "grad_norm": 0.9929957655695576, + "learning_rate": 1.9939814153977813e-05, + "loss": 0.4139519929885864, + "step": 628 + }, + { + "epoch": 0.1670428893905192, + "grad_norm": 1.0428716869119874, + "learning_rate": 1.9939332192646136e-05, + "loss": 0.44490402936935425, + "step": 629 + }, + { + "epoch": 0.16730845837206215, + "grad_norm": 0.9723220719956404, + "learning_rate": 1.993884831512843e-05, + "loss": 0.3870658278465271, + "step": 630 + }, + { + "epoch": 0.1675740273536051, + "grad_norm": 0.9337218443909966, + "learning_rate": 1.993836252151799e-05, + "loss": 0.3308948278427124, + "step": 631 + }, + { + "epoch": 0.16783959633514806, + "grad_norm": 1.1119638169858157, + "learning_rate": 1.993787481190847e-05, + "loss": 0.3727487623691559, + "step": 632 + }, + { + "epoch": 0.168105165316691, + "grad_norm": 1.0025380900585623, + "learning_rate": 1.9937385186393888e-05, + "loss": 0.4277465343475342, + "step": 633 + }, + { + "epoch": 0.16837073429823396, + "grad_norm": 1.2120120873899203, + "learning_rate": 1.9936893645068647e-05, + "loss": 0.4276485741138458, + "step": 634 + }, + { + "epoch": 0.1686363032797769, + "grad_norm": 1.000070161461063, + "learning_rate": 1.9936400188027502e-05, + "loss": 0.374578058719635, + "step": 635 + }, + { + "epoch": 0.1689018722613199, + "grad_norm": 1.113556890943216, + "learning_rate": 1.993590481536559e-05, + "loss": 0.4583400785923004, + "step": 636 + }, + { + "epoch": 0.16916744124286284, + "grad_norm": 0.9731147624235688, + "learning_rate": 1.9935407527178417e-05, + "loss": 0.3734489679336548, + "step": 637 + }, + { + "epoch": 0.1694330102244058, + "grad_norm": 1.0110441212525507, + "learning_rate": 1.9934908323561846e-05, + "loss": 0.39524513483047485, + "step": 638 + }, + { + "epoch": 0.16969857920594875, + "grad_norm": 1.0264447655460065, + "learning_rate": 1.9934407204612124e-05, + "loss": 0.42300352454185486, + "step": 639 + }, + { + "epoch": 0.1699641481874917, + "grad_norm": 0.9950374891978715, + "learning_rate": 1.9933904170425858e-05, + "loss": 0.4152276813983917, + "step": 640 + }, + { + "epoch": 0.17022971716903465, + "grad_norm": 1.230783330329369, + "learning_rate": 1.9933399221100026e-05, + "loss": 0.43046653270721436, + "step": 641 + }, + { + "epoch": 0.1704952861505776, + "grad_norm": 1.0095783418631343, + "learning_rate": 1.993289235673198e-05, + "loss": 0.4134339392185211, + "step": 642 + }, + { + "epoch": 0.17076085513212058, + "grad_norm": 1.0051407398693462, + "learning_rate": 1.9932383577419432e-05, + "loss": 0.44028693437576294, + "step": 643 + }, + { + "epoch": 0.17102642411366353, + "grad_norm": 1.0208746920457954, + "learning_rate": 1.9931872883260473e-05, + "loss": 0.3790222704410553, + "step": 644 + }, + { + "epoch": 0.17129199309520649, + "grad_norm": 1.041462978505965, + "learning_rate": 1.9931360274353556e-05, + "loss": 0.3683086633682251, + "step": 645 + }, + { + "epoch": 0.17155756207674944, + "grad_norm": 1.0400069352454702, + "learning_rate": 1.993084575079751e-05, + "loss": 0.3630594313144684, + "step": 646 + }, + { + "epoch": 0.1718231310582924, + "grad_norm": 1.0694046561659416, + "learning_rate": 1.993032931269153e-05, + "loss": 0.4398641884326935, + "step": 647 + }, + { + "epoch": 0.17208870003983534, + "grad_norm": 1.107156801944608, + "learning_rate": 1.992981096013517e-05, + "loss": 0.42222845554351807, + "step": 648 + }, + { + "epoch": 0.1723542690213783, + "grad_norm": 1.043160064840446, + "learning_rate": 1.992929069322837e-05, + "loss": 0.38966643810272217, + "step": 649 + }, + { + "epoch": 0.17261983800292127, + "grad_norm": 1.0607803195691352, + "learning_rate": 1.992876851207143e-05, + "loss": 0.4394804835319519, + "step": 650 + }, + { + "epoch": 0.17288540698446422, + "grad_norm": 0.9714467718451273, + "learning_rate": 1.9928244416765022e-05, + "loss": 0.3475287854671478, + "step": 651 + }, + { + "epoch": 0.17315097596600718, + "grad_norm": 0.9848879046616053, + "learning_rate": 1.992771840741018e-05, + "loss": 0.40047168731689453, + "step": 652 + }, + { + "epoch": 0.17341654494755013, + "grad_norm": 1.0744593937096147, + "learning_rate": 1.9927190484108315e-05, + "loss": 0.4028981328010559, + "step": 653 + }, + { + "epoch": 0.17368211392909308, + "grad_norm": 1.010491020672817, + "learning_rate": 1.9926660646961208e-05, + "loss": 0.3891482949256897, + "step": 654 + }, + { + "epoch": 0.17394768291063603, + "grad_norm": 1.1163232689680433, + "learning_rate": 1.9926128896071e-05, + "loss": 0.4570680856704712, + "step": 655 + }, + { + "epoch": 0.17421325189217898, + "grad_norm": 0.9509061944047602, + "learning_rate": 1.992559523154021e-05, + "loss": 0.392758309841156, + "step": 656 + }, + { + "epoch": 0.17447882087372196, + "grad_norm": 0.9648168194829144, + "learning_rate": 1.992505965347172e-05, + "loss": 0.39552047848701477, + "step": 657 + }, + { + "epoch": 0.17474438985526491, + "grad_norm": 1.045434666464082, + "learning_rate": 1.992452216196879e-05, + "loss": 0.4412619173526764, + "step": 658 + }, + { + "epoch": 0.17500995883680787, + "grad_norm": 1.033655605856329, + "learning_rate": 1.9923982757135028e-05, + "loss": 0.4075942635536194, + "step": 659 + }, + { + "epoch": 0.17527552781835082, + "grad_norm": 1.0660210414475448, + "learning_rate": 1.9923441439074434e-05, + "loss": 0.44615018367767334, + "step": 660 + }, + { + "epoch": 0.17554109679989377, + "grad_norm": 0.9504988883268379, + "learning_rate": 1.992289820789137e-05, + "loss": 0.3957441449165344, + "step": 661 + }, + { + "epoch": 0.17580666578143672, + "grad_norm": 0.9513339400965243, + "learning_rate": 1.992235306369056e-05, + "loss": 0.4014820158481598, + "step": 662 + }, + { + "epoch": 0.17607223476297967, + "grad_norm": 0.9988043316582222, + "learning_rate": 1.9921806006577102e-05, + "loss": 0.39478158950805664, + "step": 663 + }, + { + "epoch": 0.17633780374452265, + "grad_norm": 1.0278124558587338, + "learning_rate": 1.9921257036656463e-05, + "loss": 0.45742082595825195, + "step": 664 + }, + { + "epoch": 0.1766033727260656, + "grad_norm": 0.9674516471555401, + "learning_rate": 1.9920706154034477e-05, + "loss": 0.36519041657447815, + "step": 665 + }, + { + "epoch": 0.17686894170760856, + "grad_norm": 1.0086354363577679, + "learning_rate": 1.992015335881735e-05, + "loss": 0.40599358081817627, + "step": 666 + }, + { + "epoch": 0.1771345106891515, + "grad_norm": 0.958585892866014, + "learning_rate": 1.991959865111165e-05, + "loss": 0.4064781367778778, + "step": 667 + }, + { + "epoch": 0.17740007967069446, + "grad_norm": 0.9430583774727941, + "learning_rate": 1.991904203102432e-05, + "loss": 0.4076484143733978, + "step": 668 + }, + { + "epoch": 0.1776656486522374, + "grad_norm": 1.1044553051326549, + "learning_rate": 1.9918483498662678e-05, + "loss": 0.42157143354415894, + "step": 669 + }, + { + "epoch": 0.17793121763378036, + "grad_norm": 1.005923050768092, + "learning_rate": 1.9917923054134388e-05, + "loss": 0.3814900517463684, + "step": 670 + }, + { + "epoch": 0.17819678661532334, + "grad_norm": 1.0156953904207233, + "learning_rate": 1.9917360697547506e-05, + "loss": 0.4211175739765167, + "step": 671 + }, + { + "epoch": 0.1784623555968663, + "grad_norm": 1.0530805044024834, + "learning_rate": 1.991679642901045e-05, + "loss": 0.3975893259048462, + "step": 672 + }, + { + "epoch": 0.17872792457840925, + "grad_norm": 0.9633270935214763, + "learning_rate": 1.9916230248631993e-05, + "loss": 0.36090826988220215, + "step": 673 + }, + { + "epoch": 0.1789934935599522, + "grad_norm": 0.9408638333666679, + "learning_rate": 1.99156621565213e-05, + "loss": 0.36511334776878357, + "step": 674 + }, + { + "epoch": 0.17925906254149515, + "grad_norm": 1.0839117569759185, + "learning_rate": 1.9915092152787888e-05, + "loss": 0.4131924510002136, + "step": 675 + }, + { + "epoch": 0.1795246315230381, + "grad_norm": 1.1407281463751517, + "learning_rate": 1.9914520237541644e-05, + "loss": 0.4283728301525116, + "step": 676 + }, + { + "epoch": 0.17979020050458105, + "grad_norm": 0.9751873028047018, + "learning_rate": 1.991394641089283e-05, + "loss": 0.3855544924736023, + "step": 677 + }, + { + "epoch": 0.18005576948612403, + "grad_norm": 1.3517309919327671, + "learning_rate": 1.9913370672952074e-05, + "loss": 0.41288501024246216, + "step": 678 + }, + { + "epoch": 0.180321338467667, + "grad_norm": 1.1127679640996702, + "learning_rate": 1.9912793023830365e-05, + "loss": 0.3824073076248169, + "step": 679 + }, + { + "epoch": 0.18058690744920994, + "grad_norm": 1.0055812841256684, + "learning_rate": 1.9912213463639077e-05, + "loss": 0.39005106687545776, + "step": 680 + }, + { + "epoch": 0.1808524764307529, + "grad_norm": 1.0115332151563563, + "learning_rate": 1.9911631992489933e-05, + "loss": 0.3521374464035034, + "step": 681 + }, + { + "epoch": 0.18111804541229584, + "grad_norm": 0.983790464571211, + "learning_rate": 1.9911048610495037e-05, + "loss": 0.337347149848938, + "step": 682 + }, + { + "epoch": 0.1813836143938388, + "grad_norm": 1.1534370397304132, + "learning_rate": 1.9910463317766864e-05, + "loss": 0.4349983334541321, + "step": 683 + }, + { + "epoch": 0.18164918337538175, + "grad_norm": 1.059114838428009, + "learning_rate": 1.9909876114418242e-05, + "loss": 0.3783540427684784, + "step": 684 + }, + { + "epoch": 0.18191475235692472, + "grad_norm": 1.0050293498117582, + "learning_rate": 1.9909287000562383e-05, + "loss": 0.4065130054950714, + "step": 685 + }, + { + "epoch": 0.18218032133846768, + "grad_norm": 1.0122618604087057, + "learning_rate": 1.990869597631286e-05, + "loss": 0.3876315653324127, + "step": 686 + }, + { + "epoch": 0.18244589032001063, + "grad_norm": 0.9622962910168786, + "learning_rate": 1.9908103041783615e-05, + "loss": 0.3716024160385132, + "step": 687 + }, + { + "epoch": 0.18271145930155358, + "grad_norm": 1.086778230300176, + "learning_rate": 1.990750819708896e-05, + "loss": 0.4096733331680298, + "step": 688 + }, + { + "epoch": 0.18297702828309653, + "grad_norm": 1.131269280292305, + "learning_rate": 1.9906911442343567e-05, + "loss": 0.41432395577430725, + "step": 689 + }, + { + "epoch": 0.18324259726463948, + "grad_norm": 1.1182736792418642, + "learning_rate": 1.9906312777662493e-05, + "loss": 0.3934200406074524, + "step": 690 + }, + { + "epoch": 0.18350816624618244, + "grad_norm": 1.0493015785833109, + "learning_rate": 1.9905712203161148e-05, + "loss": 0.4246784746646881, + "step": 691 + }, + { + "epoch": 0.18377373522772542, + "grad_norm": 1.1362836227785695, + "learning_rate": 1.9905109718955323e-05, + "loss": 0.40027567744255066, + "step": 692 + }, + { + "epoch": 0.18403930420926837, + "grad_norm": 1.056262242708622, + "learning_rate": 1.990450532516116e-05, + "loss": 0.4162583351135254, + "step": 693 + }, + { + "epoch": 0.18430487319081132, + "grad_norm": 1.05760814074371, + "learning_rate": 1.990389902189518e-05, + "loss": 0.4133074879646301, + "step": 694 + }, + { + "epoch": 0.18457044217235427, + "grad_norm": 1.0438921885629904, + "learning_rate": 1.9903290809274277e-05, + "loss": 0.333192378282547, + "step": 695 + }, + { + "epoch": 0.18483601115389722, + "grad_norm": 0.9814281867123515, + "learning_rate": 1.9902680687415704e-05, + "loss": 0.39349496364593506, + "step": 696 + }, + { + "epoch": 0.18510158013544017, + "grad_norm": 1.0366332083029342, + "learning_rate": 1.9902068656437086e-05, + "loss": 0.39678412675857544, + "step": 697 + }, + { + "epoch": 0.18536714911698313, + "grad_norm": 1.0003960978434148, + "learning_rate": 1.9901454716456415e-05, + "loss": 0.3553932011127472, + "step": 698 + }, + { + "epoch": 0.18563271809852608, + "grad_norm": 1.0876315802223169, + "learning_rate": 1.990083886759205e-05, + "loss": 0.4264630079269409, + "step": 699 + }, + { + "epoch": 0.18589828708006906, + "grad_norm": 1.0135520655053032, + "learning_rate": 1.9900221109962726e-05, + "loss": 0.3883950412273407, + "step": 700 + }, + { + "epoch": 0.186163856061612, + "grad_norm": 1.0408639715408188, + "learning_rate": 1.989960144368753e-05, + "loss": 0.38465407490730286, + "step": 701 + }, + { + "epoch": 0.18642942504315496, + "grad_norm": 2.2198594223984065, + "learning_rate": 1.9898979868885933e-05, + "loss": 0.39897871017456055, + "step": 702 + }, + { + "epoch": 0.1866949940246979, + "grad_norm": 1.120873004114704, + "learning_rate": 1.9898356385677762e-05, + "loss": 0.4386023283004761, + "step": 703 + }, + { + "epoch": 0.18696056300624087, + "grad_norm": 1.0254606123190075, + "learning_rate": 1.989773099418322e-05, + "loss": 0.42621874809265137, + "step": 704 + }, + { + "epoch": 0.18722613198778382, + "grad_norm": 1.0153284696458207, + "learning_rate": 1.9897103694522877e-05, + "loss": 0.3811546266078949, + "step": 705 + }, + { + "epoch": 0.18749170096932677, + "grad_norm": 1.0634877610237485, + "learning_rate": 1.989647448681767e-05, + "loss": 0.4018982946872711, + "step": 706 + }, + { + "epoch": 0.18775726995086975, + "grad_norm": 1.0316038713106725, + "learning_rate": 1.9895843371188897e-05, + "loss": 0.3920126259326935, + "step": 707 + }, + { + "epoch": 0.1880228389324127, + "grad_norm": 0.9767495366810068, + "learning_rate": 1.9895210347758233e-05, + "loss": 0.3598487973213196, + "step": 708 + }, + { + "epoch": 0.18828840791395565, + "grad_norm": 1.0286682270198635, + "learning_rate": 1.9894575416647717e-05, + "loss": 0.4204316735267639, + "step": 709 + }, + { + "epoch": 0.1885539768954986, + "grad_norm": 0.9653709480495668, + "learning_rate": 1.9893938577979755e-05, + "loss": 0.33814263343811035, + "step": 710 + }, + { + "epoch": 0.18881954587704156, + "grad_norm": 0.9588770367914977, + "learning_rate": 1.9893299831877124e-05, + "loss": 0.3788227140903473, + "step": 711 + }, + { + "epoch": 0.1890851148585845, + "grad_norm": 0.9974371582936609, + "learning_rate": 1.989265917846297e-05, + "loss": 0.38141176104545593, + "step": 712 + }, + { + "epoch": 0.18935068384012746, + "grad_norm": 1.0051109402301954, + "learning_rate": 1.9892016617860793e-05, + "loss": 0.3757280707359314, + "step": 713 + }, + { + "epoch": 0.18961625282167044, + "grad_norm": 0.9863956856856875, + "learning_rate": 1.989137215019448e-05, + "loss": 0.37819087505340576, + "step": 714 + }, + { + "epoch": 0.1898818218032134, + "grad_norm": 1.1797000402703188, + "learning_rate": 1.9890725775588277e-05, + "loss": 0.46046000719070435, + "step": 715 + }, + { + "epoch": 0.19014739078475634, + "grad_norm": 0.9967163493181064, + "learning_rate": 1.9890077494166792e-05, + "loss": 0.33967363834381104, + "step": 716 + }, + { + "epoch": 0.1904129597662993, + "grad_norm": 0.9620841339155507, + "learning_rate": 1.988942730605501e-05, + "loss": 0.36672675609588623, + "step": 717 + }, + { + "epoch": 0.19067852874784225, + "grad_norm": 1.0666183498740949, + "learning_rate": 1.9888775211378278e-05, + "loss": 0.38705015182495117, + "step": 718 + }, + { + "epoch": 0.1909440977293852, + "grad_norm": 1.0696051052523068, + "learning_rate": 1.9888121210262313e-05, + "loss": 0.35257095098495483, + "step": 719 + }, + { + "epoch": 0.19120966671092815, + "grad_norm": 1.0337108803934987, + "learning_rate": 1.9887465302833194e-05, + "loss": 0.3803965449333191, + "step": 720 + }, + { + "epoch": 0.19147523569247113, + "grad_norm": 1.0097965015220993, + "learning_rate": 1.988680748921738e-05, + "loss": 0.38166487216949463, + "step": 721 + }, + { + "epoch": 0.19174080467401408, + "grad_norm": 0.971159209120872, + "learning_rate": 1.988614776954169e-05, + "loss": 0.4017483592033386, + "step": 722 + }, + { + "epoch": 0.19200637365555703, + "grad_norm": 1.0651840937747212, + "learning_rate": 1.98854861439333e-05, + "loss": 0.4343035817146301, + "step": 723 + }, + { + "epoch": 0.19227194263709999, + "grad_norm": 1.0527178531986199, + "learning_rate": 1.9884822612519773e-05, + "loss": 0.4017031192779541, + "step": 724 + }, + { + "epoch": 0.19253751161864294, + "grad_norm": 0.9558335625340557, + "learning_rate": 1.988415717542903e-05, + "loss": 0.32294636964797974, + "step": 725 + }, + { + "epoch": 0.1928030806001859, + "grad_norm": 1.018550638071552, + "learning_rate": 1.988348983278935e-05, + "loss": 0.34661561250686646, + "step": 726 + }, + { + "epoch": 0.19306864958172884, + "grad_norm": 1.1264464061553692, + "learning_rate": 1.98828205847294e-05, + "loss": 0.3588724434375763, + "step": 727 + }, + { + "epoch": 0.19333421856327182, + "grad_norm": 1.151476031768393, + "learning_rate": 1.9882149431378194e-05, + "loss": 0.45439180731773376, + "step": 728 + }, + { + "epoch": 0.19359978754481477, + "grad_norm": 1.092854672146059, + "learning_rate": 1.988147637286513e-05, + "loss": 0.3916742205619812, + "step": 729 + }, + { + "epoch": 0.19386535652635772, + "grad_norm": 1.1073017625666908, + "learning_rate": 1.988080140931996e-05, + "loss": 0.3838115334510803, + "step": 730 + }, + { + "epoch": 0.19413092550790068, + "grad_norm": 1.0305888563782257, + "learning_rate": 1.9880124540872813e-05, + "loss": 0.3803096413612366, + "step": 731 + }, + { + "epoch": 0.19439649448944363, + "grad_norm": 1.0697488639709387, + "learning_rate": 1.987944576765418e-05, + "loss": 0.4180675446987152, + "step": 732 + }, + { + "epoch": 0.19466206347098658, + "grad_norm": 0.968492149308095, + "learning_rate": 1.987876508979492e-05, + "loss": 0.34485924243927, + "step": 733 + }, + { + "epoch": 0.19492763245252953, + "grad_norm": 1.0301319893667387, + "learning_rate": 1.987808250742626e-05, + "loss": 0.3696223795413971, + "step": 734 + }, + { + "epoch": 0.1951932014340725, + "grad_norm": 1.0070871597151176, + "learning_rate": 1.9877398020679796e-05, + "loss": 0.39920324087142944, + "step": 735 + }, + { + "epoch": 0.19545877041561546, + "grad_norm": 0.9772548764362861, + "learning_rate": 1.987671162968748e-05, + "loss": 0.33534419536590576, + "step": 736 + }, + { + "epoch": 0.19572433939715841, + "grad_norm": 0.955184588375953, + "learning_rate": 1.9876023334581657e-05, + "loss": 0.3698185682296753, + "step": 737 + }, + { + "epoch": 0.19598990837870137, + "grad_norm": 1.0108475553340988, + "learning_rate": 1.9875333135495e-05, + "loss": 0.37388375401496887, + "step": 738 + }, + { + "epoch": 0.19625547736024432, + "grad_norm": 0.9685434293396273, + "learning_rate": 1.9874641032560594e-05, + "loss": 0.3285469114780426, + "step": 739 + }, + { + "epoch": 0.19652104634178727, + "grad_norm": 1.01794140535256, + "learning_rate": 1.9873947025911854e-05, + "loss": 0.3539549708366394, + "step": 740 + }, + { + "epoch": 0.19678661532333022, + "grad_norm": 1.0943847325994938, + "learning_rate": 1.9873251115682577e-05, + "loss": 0.4707021117210388, + "step": 741 + }, + { + "epoch": 0.1970521843048732, + "grad_norm": 0.9783865509799976, + "learning_rate": 1.987255330200693e-05, + "loss": 0.3871781826019287, + "step": 742 + }, + { + "epoch": 0.19731775328641615, + "grad_norm": 1.0462206197157178, + "learning_rate": 1.9871853585019446e-05, + "loss": 0.3890243172645569, + "step": 743 + }, + { + "epoch": 0.1975833222679591, + "grad_norm": 0.9914096392216383, + "learning_rate": 1.9871151964855013e-05, + "loss": 0.34914374351501465, + "step": 744 + }, + { + "epoch": 0.19784889124950206, + "grad_norm": 1.0157439665946277, + "learning_rate": 1.9870448441648905e-05, + "loss": 0.41009777784347534, + "step": 745 + }, + { + "epoch": 0.198114460231045, + "grad_norm": 1.0725931773033663, + "learning_rate": 1.9869743015536747e-05, + "loss": 0.39449363946914673, + "step": 746 + }, + { + "epoch": 0.19838002921258796, + "grad_norm": 1.081644116196219, + "learning_rate": 1.9869035686654538e-05, + "loss": 0.3530065417289734, + "step": 747 + }, + { + "epoch": 0.1986455981941309, + "grad_norm": 1.1338420898560146, + "learning_rate": 1.986832645513864e-05, + "loss": 0.4255196154117584, + "step": 748 + }, + { + "epoch": 0.1989111671756739, + "grad_norm": 1.0625457917520444, + "learning_rate": 1.9867615321125796e-05, + "loss": 0.3921143114566803, + "step": 749 + }, + { + "epoch": 0.19917673615721684, + "grad_norm": 1.1076371778966394, + "learning_rate": 1.986690228475309e-05, + "loss": 0.4157381057739258, + "step": 750 + }, + { + "epoch": 0.1994423051387598, + "grad_norm": 0.9887260401437288, + "learning_rate": 1.986618734615799e-05, + "loss": 0.3922047019004822, + "step": 751 + }, + { + "epoch": 0.19970787412030275, + "grad_norm": 1.2477225666156357, + "learning_rate": 1.9865470505478335e-05, + "loss": 0.4378710985183716, + "step": 752 + }, + { + "epoch": 0.1999734431018457, + "grad_norm": 0.9960415180367619, + "learning_rate": 1.986475176285232e-05, + "loss": 0.3636753261089325, + "step": 753 + }, + { + "epoch": 0.20023901208338865, + "grad_norm": 1.0691751577172293, + "learning_rate": 1.986403111841851e-05, + "loss": 0.3509834408760071, + "step": 754 + }, + { + "epoch": 0.2005045810649316, + "grad_norm": 0.9490438891131449, + "learning_rate": 1.986330857231583e-05, + "loss": 0.3539624512195587, + "step": 755 + }, + { + "epoch": 0.20077015004647458, + "grad_norm": 1.002849163142055, + "learning_rate": 1.9862584124683587e-05, + "loss": 0.417904257774353, + "step": 756 + }, + { + "epoch": 0.20103571902801753, + "grad_norm": 0.9438738740406134, + "learning_rate": 1.9861857775661442e-05, + "loss": 0.3602277636528015, + "step": 757 + }, + { + "epoch": 0.2013012880095605, + "grad_norm": 1.0703002408877305, + "learning_rate": 1.986112952538943e-05, + "loss": 0.41064661741256714, + "step": 758 + }, + { + "epoch": 0.20156685699110344, + "grad_norm": 0.9789269746167363, + "learning_rate": 1.9860399374007944e-05, + "loss": 0.36313754320144653, + "step": 759 + }, + { + "epoch": 0.2018324259726464, + "grad_norm": 1.0711706181502203, + "learning_rate": 1.9859667321657755e-05, + "loss": 0.39497628808021545, + "step": 760 + }, + { + "epoch": 0.20209799495418934, + "grad_norm": 1.0173001682725575, + "learning_rate": 1.9858933368479987e-05, + "loss": 0.405613511800766, + "step": 761 + }, + { + "epoch": 0.2023635639357323, + "grad_norm": 0.9881458101524105, + "learning_rate": 1.9858197514616142e-05, + "loss": 0.39093440771102905, + "step": 762 + }, + { + "epoch": 0.20262913291727527, + "grad_norm": 1.0330584509521943, + "learning_rate": 1.9857459760208084e-05, + "loss": 0.39908382296562195, + "step": 763 + }, + { + "epoch": 0.20289470189881822, + "grad_norm": 0.9416263868211369, + "learning_rate": 1.9856720105398038e-05, + "loss": 0.36787620186805725, + "step": 764 + }, + { + "epoch": 0.20316027088036118, + "grad_norm": 1.0128388377672763, + "learning_rate": 1.985597855032861e-05, + "loss": 0.390550822019577, + "step": 765 + }, + { + "epoch": 0.20342583986190413, + "grad_norm": 1.115759431869763, + "learning_rate": 1.9855235095142754e-05, + "loss": 0.4191611409187317, + "step": 766 + }, + { + "epoch": 0.20369140884344708, + "grad_norm": 1.1288935622655036, + "learning_rate": 1.985448973998381e-05, + "loss": 0.4060766100883484, + "step": 767 + }, + { + "epoch": 0.20395697782499003, + "grad_norm": 1.055264696895727, + "learning_rate": 1.985374248499546e-05, + "loss": 0.3906163275241852, + "step": 768 + }, + { + "epoch": 0.20422254680653298, + "grad_norm": 1.0101644212894914, + "learning_rate": 1.9852993330321774e-05, + "loss": 0.3926839828491211, + "step": 769 + }, + { + "epoch": 0.20448811578807596, + "grad_norm": 1.0474151984911524, + "learning_rate": 1.9852242276107182e-05, + "loss": 0.37276068329811096, + "step": 770 + }, + { + "epoch": 0.20475368476961892, + "grad_norm": 0.9531396793135881, + "learning_rate": 1.9851489322496476e-05, + "loss": 0.3765360414981842, + "step": 771 + }, + { + "epoch": 0.20501925375116187, + "grad_norm": 1.0017274873228423, + "learning_rate": 1.9850734469634815e-05, + "loss": 0.35091257095336914, + "step": 772 + }, + { + "epoch": 0.20528482273270482, + "grad_norm": 1.1164065944268338, + "learning_rate": 1.9849977717667725e-05, + "loss": 0.4259791076183319, + "step": 773 + }, + { + "epoch": 0.20555039171424777, + "grad_norm": 0.9939508272565134, + "learning_rate": 1.9849219066741102e-05, + "loss": 0.3563114404678345, + "step": 774 + }, + { + "epoch": 0.20581596069579072, + "grad_norm": 1.0814350606971046, + "learning_rate": 1.9848458517001203e-05, + "loss": 0.4148223102092743, + "step": 775 + }, + { + "epoch": 0.20608152967733367, + "grad_norm": 1.0296405515766518, + "learning_rate": 1.9847696068594655e-05, + "loss": 0.3817785382270813, + "step": 776 + }, + { + "epoch": 0.20634709865887665, + "grad_norm": 1.115875170640065, + "learning_rate": 1.984693172166845e-05, + "loss": 0.41741886734962463, + "step": 777 + }, + { + "epoch": 0.2066126676404196, + "grad_norm": 1.0479957521256793, + "learning_rate": 1.9846165476369938e-05, + "loss": 0.34800025820732117, + "step": 778 + }, + { + "epoch": 0.20687823662196256, + "grad_norm": 1.0122784392492805, + "learning_rate": 1.9845397332846848e-05, + "loss": 0.38093405961990356, + "step": 779 + }, + { + "epoch": 0.2071438056035055, + "grad_norm": 1.0953515150858002, + "learning_rate": 1.9844627291247268e-05, + "loss": 0.40733009576797485, + "step": 780 + }, + { + "epoch": 0.20740937458504846, + "grad_norm": 1.1011295166986532, + "learning_rate": 1.9843855351719655e-05, + "loss": 0.3829066753387451, + "step": 781 + }, + { + "epoch": 0.2076749435665914, + "grad_norm": 1.0316161170996605, + "learning_rate": 1.9843081514412827e-05, + "loss": 0.3574868440628052, + "step": 782 + }, + { + "epoch": 0.20794051254813437, + "grad_norm": 1.071531696766489, + "learning_rate": 1.984230577947597e-05, + "loss": 0.3675144612789154, + "step": 783 + }, + { + "epoch": 0.20820608152967734, + "grad_norm": 0.9982781618225591, + "learning_rate": 1.9841528147058638e-05, + "loss": 0.36120525002479553, + "step": 784 + }, + { + "epoch": 0.2084716505112203, + "grad_norm": 1.0016427535647234, + "learning_rate": 1.984074861731075e-05, + "loss": 0.3651392459869385, + "step": 785 + }, + { + "epoch": 0.20873721949276325, + "grad_norm": 1.1254815799645344, + "learning_rate": 1.983996719038259e-05, + "loss": 0.4204651117324829, + "step": 786 + }, + { + "epoch": 0.2090027884743062, + "grad_norm": 1.0600310007301286, + "learning_rate": 1.9839183866424806e-05, + "loss": 0.4452149271965027, + "step": 787 + }, + { + "epoch": 0.20926835745584915, + "grad_norm": 1.000047138771705, + "learning_rate": 1.9838398645588418e-05, + "loss": 0.3931270241737366, + "step": 788 + }, + { + "epoch": 0.2095339264373921, + "grad_norm": 1.0009892054118905, + "learning_rate": 1.98376115280248e-05, + "loss": 0.3680538535118103, + "step": 789 + }, + { + "epoch": 0.20979949541893506, + "grad_norm": 0.9848864128393906, + "learning_rate": 1.9836822513885704e-05, + "loss": 0.3766820728778839, + "step": 790 + }, + { + "epoch": 0.21006506440047804, + "grad_norm": 1.0494510099931045, + "learning_rate": 1.9836031603323245e-05, + "loss": 0.3602439761161804, + "step": 791 + }, + { + "epoch": 0.210330633382021, + "grad_norm": 0.9790632198207762, + "learning_rate": 1.98352387964899e-05, + "loss": 0.38925549387931824, + "step": 792 + }, + { + "epoch": 0.21059620236356394, + "grad_norm": 1.0121548586068807, + "learning_rate": 1.9834444093538504e-05, + "loss": 0.3569640517234802, + "step": 793 + }, + { + "epoch": 0.2108617713451069, + "grad_norm": 1.0171085592107372, + "learning_rate": 1.9833647494622275e-05, + "loss": 0.3543340265750885, + "step": 794 + }, + { + "epoch": 0.21112734032664984, + "grad_norm": 1.0426744340585967, + "learning_rate": 1.983284899989479e-05, + "loss": 0.37313222885131836, + "step": 795 + }, + { + "epoch": 0.2113929093081928, + "grad_norm": 1.0940501026222131, + "learning_rate": 1.983204860950998e-05, + "loss": 0.3874257802963257, + "step": 796 + }, + { + "epoch": 0.21165847828973575, + "grad_norm": 1.005805069630653, + "learning_rate": 1.983124632362216e-05, + "loss": 0.3815164864063263, + "step": 797 + }, + { + "epoch": 0.21192404727127873, + "grad_norm": 1.0879143214156584, + "learning_rate": 1.9830442142386e-05, + "loss": 0.39476731419563293, + "step": 798 + }, + { + "epoch": 0.21218961625282168, + "grad_norm": 1.0888281701524323, + "learning_rate": 1.9829636065956527e-05, + "loss": 0.399338036775589, + "step": 799 + }, + { + "epoch": 0.21245518523436463, + "grad_norm": 1.0679987938098825, + "learning_rate": 1.9828828094489157e-05, + "loss": 0.3940344452857971, + "step": 800 + }, + { + "epoch": 0.21272075421590758, + "grad_norm": 1.0124680733329086, + "learning_rate": 1.9828018228139647e-05, + "loss": 0.35597044229507446, + "step": 801 + }, + { + "epoch": 0.21298632319745053, + "grad_norm": 1.197291261672491, + "learning_rate": 1.9827206467064133e-05, + "loss": 0.4309435784816742, + "step": 802 + }, + { + "epoch": 0.21325189217899349, + "grad_norm": 1.0158009285134544, + "learning_rate": 1.9826392811419113e-05, + "loss": 0.37327438592910767, + "step": 803 + }, + { + "epoch": 0.21351746116053644, + "grad_norm": 0.9944187944281718, + "learning_rate": 1.9825577261361454e-05, + "loss": 0.35214242339134216, + "step": 804 + }, + { + "epoch": 0.21378303014207942, + "grad_norm": 1.1575422458756877, + "learning_rate": 1.982475981704838e-05, + "loss": 0.41114968061447144, + "step": 805 + }, + { + "epoch": 0.21404859912362237, + "grad_norm": 0.9719994027948292, + "learning_rate": 1.9823940478637486e-05, + "loss": 0.3632299304008484, + "step": 806 + }, + { + "epoch": 0.21431416810516532, + "grad_norm": 1.1699036102992622, + "learning_rate": 1.9823119246286727e-05, + "loss": 0.39640772342681885, + "step": 807 + }, + { + "epoch": 0.21457973708670827, + "grad_norm": 1.002397111320771, + "learning_rate": 1.9822296120154433e-05, + "loss": 0.39356929063796997, + "step": 808 + }, + { + "epoch": 0.21484530606825122, + "grad_norm": 1.061754718166072, + "learning_rate": 1.9821471100399294e-05, + "loss": 0.3710761070251465, + "step": 809 + }, + { + "epoch": 0.21511087504979418, + "grad_norm": 0.9713246248834058, + "learning_rate": 1.9820644187180354e-05, + "loss": 0.35515087842941284, + "step": 810 + }, + { + "epoch": 0.21537644403133713, + "grad_norm": 1.0166244205196049, + "learning_rate": 1.981981538065704e-05, + "loss": 0.3803205192089081, + "step": 811 + }, + { + "epoch": 0.2156420130128801, + "grad_norm": 1.0421456761704733, + "learning_rate": 1.9818984680989134e-05, + "loss": 0.40275394916534424, + "step": 812 + }, + { + "epoch": 0.21590758199442306, + "grad_norm": 1.0872785008811605, + "learning_rate": 1.9818152088336786e-05, + "loss": 0.3711051344871521, + "step": 813 + }, + { + "epoch": 0.216173150975966, + "grad_norm": 1.0872190904032264, + "learning_rate": 1.9817317602860512e-05, + "loss": 0.4198985695838928, + "step": 814 + }, + { + "epoch": 0.21643871995750896, + "grad_norm": 0.9931448766878032, + "learning_rate": 1.9816481224721185e-05, + "loss": 0.38333773612976074, + "step": 815 + }, + { + "epoch": 0.21670428893905191, + "grad_norm": 1.1679000778390602, + "learning_rate": 1.9815642954080055e-05, + "loss": 0.3959774971008301, + "step": 816 + }, + { + "epoch": 0.21696985792059487, + "grad_norm": 1.1013876458182361, + "learning_rate": 1.9814802791098728e-05, + "loss": 0.3475337326526642, + "step": 817 + }, + { + "epoch": 0.21723542690213782, + "grad_norm": 1.06867842878894, + "learning_rate": 1.981396073593918e-05, + "loss": 0.369370698928833, + "step": 818 + }, + { + "epoch": 0.2175009958836808, + "grad_norm": 1.085763343280496, + "learning_rate": 1.9813116788763744e-05, + "loss": 0.3515776991844177, + "step": 819 + }, + { + "epoch": 0.21776656486522375, + "grad_norm": 1.0780206278908893, + "learning_rate": 1.9812270949735124e-05, + "loss": 0.3637402355670929, + "step": 820 + }, + { + "epoch": 0.2180321338467667, + "grad_norm": 1.0342672695189807, + "learning_rate": 1.9811423219016395e-05, + "loss": 0.3930947780609131, + "step": 821 + }, + { + "epoch": 0.21829770282830965, + "grad_norm": 1.102521832922822, + "learning_rate": 1.981057359677098e-05, + "loss": 0.40081048011779785, + "step": 822 + }, + { + "epoch": 0.2185632718098526, + "grad_norm": 1.0386373096164698, + "learning_rate": 1.9809722083162682e-05, + "loss": 0.3831724226474762, + "step": 823 + }, + { + "epoch": 0.21882884079139556, + "grad_norm": 1.0516274934858763, + "learning_rate": 1.9808868678355662e-05, + "loss": 0.3919270932674408, + "step": 824 + }, + { + "epoch": 0.2190944097729385, + "grad_norm": 1.0623138704484363, + "learning_rate": 1.9808013382514448e-05, + "loss": 0.41782522201538086, + "step": 825 + }, + { + "epoch": 0.2193599787544815, + "grad_norm": 1.0570337251212087, + "learning_rate": 1.9807156195803926e-05, + "loss": 0.3751329779624939, + "step": 826 + }, + { + "epoch": 0.21962554773602444, + "grad_norm": 1.0009279652164118, + "learning_rate": 1.9806297118389353e-05, + "loss": 0.36451685428619385, + "step": 827 + }, + { + "epoch": 0.2198911167175674, + "grad_norm": 1.1911804759546862, + "learning_rate": 1.9805436150436352e-05, + "loss": 0.3924056887626648, + "step": 828 + }, + { + "epoch": 0.22015668569911034, + "grad_norm": 0.9887238598202497, + "learning_rate": 1.9804573292110906e-05, + "loss": 0.34744757413864136, + "step": 829 + }, + { + "epoch": 0.2204222546806533, + "grad_norm": 1.1506637434477502, + "learning_rate": 1.980370854357936e-05, + "loss": 0.4162982702255249, + "step": 830 + }, + { + "epoch": 0.22068782366219625, + "grad_norm": 1.103994708633239, + "learning_rate": 1.9802841905008434e-05, + "loss": 0.36572596430778503, + "step": 831 + }, + { + "epoch": 0.2209533926437392, + "grad_norm": 1.0028116020560682, + "learning_rate": 1.98019733765652e-05, + "loss": 0.3535170555114746, + "step": 832 + }, + { + "epoch": 0.22121896162528218, + "grad_norm": 1.061392974987333, + "learning_rate": 1.9801102958417107e-05, + "loss": 0.3906480073928833, + "step": 833 + }, + { + "epoch": 0.22148453060682513, + "grad_norm": 1.0646039703833918, + "learning_rate": 1.980023065073195e-05, + "loss": 0.34185755252838135, + "step": 834 + }, + { + "epoch": 0.22175009958836808, + "grad_norm": 1.1983506875652454, + "learning_rate": 1.9799356453677913e-05, + "loss": 0.4216359853744507, + "step": 835 + }, + { + "epoch": 0.22201566856991103, + "grad_norm": 1.038756499639493, + "learning_rate": 1.979848036742352e-05, + "loss": 0.365469366312027, + "step": 836 + }, + { + "epoch": 0.222281237551454, + "grad_norm": 1.0128951338762324, + "learning_rate": 1.9797602392137678e-05, + "loss": 0.3570204973220825, + "step": 837 + }, + { + "epoch": 0.22254680653299694, + "grad_norm": 1.0221196075964396, + "learning_rate": 1.9796722527989646e-05, + "loss": 0.3929975926876068, + "step": 838 + }, + { + "epoch": 0.2228123755145399, + "grad_norm": 1.1512146064832047, + "learning_rate": 1.979584077514905e-05, + "loss": 0.39064258337020874, + "step": 839 + }, + { + "epoch": 0.22307794449608287, + "grad_norm": 1.0559333522375243, + "learning_rate": 1.9794957133785884e-05, + "loss": 0.3626471757888794, + "step": 840 + }, + { + "epoch": 0.22334351347762582, + "grad_norm": 1.0867316997584564, + "learning_rate": 1.9794071604070506e-05, + "loss": 0.4337238371372223, + "step": 841 + }, + { + "epoch": 0.22360908245916877, + "grad_norm": 0.9358033183445809, + "learning_rate": 1.9793184186173632e-05, + "loss": 0.3361967206001282, + "step": 842 + }, + { + "epoch": 0.22387465144071172, + "grad_norm": 0.961043072021178, + "learning_rate": 1.9792294880266346e-05, + "loss": 0.3429332971572876, + "step": 843 + }, + { + "epoch": 0.22414022042225468, + "grad_norm": 1.012773989217256, + "learning_rate": 1.97914036865201e-05, + "loss": 0.39196616411209106, + "step": 844 + }, + { + "epoch": 0.22440578940379763, + "grad_norm": 1.1250916546708978, + "learning_rate": 1.9790510605106697e-05, + "loss": 0.3763045072555542, + "step": 845 + }, + { + "epoch": 0.22467135838534058, + "grad_norm": 1.1139610172600873, + "learning_rate": 1.978961563619832e-05, + "loss": 0.41614070534706116, + "step": 846 + }, + { + "epoch": 0.22493692736688356, + "grad_norm": 1.065347693165354, + "learning_rate": 1.9788718779967506e-05, + "loss": 0.3834165334701538, + "step": 847 + }, + { + "epoch": 0.2252024963484265, + "grad_norm": 0.9834992911039661, + "learning_rate": 1.978782003658716e-05, + "loss": 0.3552364110946655, + "step": 848 + }, + { + "epoch": 0.22546806532996946, + "grad_norm": 1.0365749744504318, + "learning_rate": 1.9786919406230544e-05, + "loss": 0.3857925534248352, + "step": 849 + }, + { + "epoch": 0.22573363431151242, + "grad_norm": 1.0779836727772776, + "learning_rate": 1.9786016889071294e-05, + "loss": 0.3501393795013428, + "step": 850 + }, + { + "epoch": 0.22599920329305537, + "grad_norm": 1.1363104904390704, + "learning_rate": 1.9785112485283404e-05, + "loss": 0.36280643939971924, + "step": 851 + }, + { + "epoch": 0.22626477227459832, + "grad_norm": 1.1791591930929934, + "learning_rate": 1.978420619504123e-05, + "loss": 0.3713894486427307, + "step": 852 + }, + { + "epoch": 0.22653034125614127, + "grad_norm": 1.0682718312185442, + "learning_rate": 1.97832980185195e-05, + "loss": 0.3668733537197113, + "step": 853 + }, + { + "epoch": 0.22679591023768425, + "grad_norm": 1.06232834606136, + "learning_rate": 1.978238795589329e-05, + "loss": 0.4054701626300812, + "step": 854 + }, + { + "epoch": 0.2270614792192272, + "grad_norm": 1.1024819375758403, + "learning_rate": 1.9781476007338058e-05, + "loss": 0.3824681043624878, + "step": 855 + }, + { + "epoch": 0.22732704820077015, + "grad_norm": 1.0604830101195206, + "learning_rate": 1.978056217302961e-05, + "loss": 0.4009544253349304, + "step": 856 + }, + { + "epoch": 0.2275926171823131, + "grad_norm": 1.0150812264671392, + "learning_rate": 1.9779646453144133e-05, + "loss": 0.34773316979408264, + "step": 857 + }, + { + "epoch": 0.22785818616385606, + "grad_norm": 1.0737509474924387, + "learning_rate": 1.977872884785815e-05, + "loss": 0.4067278206348419, + "step": 858 + }, + { + "epoch": 0.228123755145399, + "grad_norm": 1.0566398666110703, + "learning_rate": 1.9777809357348584e-05, + "loss": 0.3843458890914917, + "step": 859 + }, + { + "epoch": 0.22838932412694196, + "grad_norm": 1.083451143522079, + "learning_rate": 1.977688798179269e-05, + "loss": 0.4261704683303833, + "step": 860 + }, + { + "epoch": 0.22865489310848494, + "grad_norm": 1.0145015740681522, + "learning_rate": 1.9775964721368098e-05, + "loss": 0.39109086990356445, + "step": 861 + }, + { + "epoch": 0.2289204620900279, + "grad_norm": 1.1472642326588585, + "learning_rate": 1.9775039576252807e-05, + "loss": 0.39436954259872437, + "step": 862 + }, + { + "epoch": 0.22918603107157084, + "grad_norm": 0.9770870267905873, + "learning_rate": 1.9774112546625168e-05, + "loss": 0.3787967562675476, + "step": 863 + }, + { + "epoch": 0.2294516000531138, + "grad_norm": 1.5071435779935147, + "learning_rate": 1.9773183632663907e-05, + "loss": 0.3729320466518402, + "step": 864 + }, + { + "epoch": 0.22971716903465675, + "grad_norm": 1.0048578103437809, + "learning_rate": 1.9772252834548108e-05, + "loss": 0.3817081153392792, + "step": 865 + }, + { + "epoch": 0.2299827380161997, + "grad_norm": 0.9709592169890221, + "learning_rate": 1.9771320152457212e-05, + "loss": 0.3362218737602234, + "step": 866 + }, + { + "epoch": 0.23024830699774265, + "grad_norm": 1.0194192402395448, + "learning_rate": 1.9770385586571033e-05, + "loss": 0.37274059653282166, + "step": 867 + }, + { + "epoch": 0.23051387597928563, + "grad_norm": 1.058710969457703, + "learning_rate": 1.9769449137069746e-05, + "loss": 0.3832330107688904, + "step": 868 + }, + { + "epoch": 0.23077944496082858, + "grad_norm": 0.9857605594513371, + "learning_rate": 1.9768510804133886e-05, + "loss": 0.37420010566711426, + "step": 869 + }, + { + "epoch": 0.23104501394237154, + "grad_norm": 1.0333482020677847, + "learning_rate": 1.976757058794435e-05, + "loss": 0.35314565896987915, + "step": 870 + }, + { + "epoch": 0.2313105829239145, + "grad_norm": 1.0404097802666386, + "learning_rate": 1.97666284886824e-05, + "loss": 0.34667372703552246, + "step": 871 + }, + { + "epoch": 0.23157615190545744, + "grad_norm": 1.1826768759617956, + "learning_rate": 1.976568450652967e-05, + "loss": 0.3465980589389801, + "step": 872 + }, + { + "epoch": 0.2318417208870004, + "grad_norm": 1.6479387485919323, + "learning_rate": 1.9764738641668137e-05, + "loss": 0.40539389848709106, + "step": 873 + }, + { + "epoch": 0.23210728986854334, + "grad_norm": 1.090454596374008, + "learning_rate": 1.976379089428016e-05, + "loss": 0.35154545307159424, + "step": 874 + }, + { + "epoch": 0.23237285885008632, + "grad_norm": 1.1033163387519414, + "learning_rate": 1.9762841264548453e-05, + "loss": 0.39748087525367737, + "step": 875 + }, + { + "epoch": 0.23263842783162927, + "grad_norm": 1.0600221119400453, + "learning_rate": 1.976188975265609e-05, + "loss": 0.41628387570381165, + "step": 876 + }, + { + "epoch": 0.23290399681317223, + "grad_norm": 1.0805125037340586, + "learning_rate": 1.976093635878652e-05, + "loss": 0.4076233208179474, + "step": 877 + }, + { + "epoch": 0.23316956579471518, + "grad_norm": 0.9221839355888705, + "learning_rate": 1.9759981083123533e-05, + "loss": 0.3262259364128113, + "step": 878 + }, + { + "epoch": 0.23343513477625813, + "grad_norm": 1.1690018828805817, + "learning_rate": 1.9759023925851302e-05, + "loss": 0.36561673879623413, + "step": 879 + }, + { + "epoch": 0.23370070375780108, + "grad_norm": 1.083829918240926, + "learning_rate": 1.9758064887154358e-05, + "loss": 0.36661773920059204, + "step": 880 + }, + { + "epoch": 0.23396627273934403, + "grad_norm": 1.0655263771494812, + "learning_rate": 1.9757103967217587e-05, + "loss": 0.34671685099601746, + "step": 881 + }, + { + "epoch": 0.234231841720887, + "grad_norm": 1.0056372913167473, + "learning_rate": 1.9756141166226246e-05, + "loss": 0.3486331105232239, + "step": 882 + }, + { + "epoch": 0.23449741070242996, + "grad_norm": 1.1177836982205323, + "learning_rate": 1.9755176484365953e-05, + "loss": 0.3883505165576935, + "step": 883 + }, + { + "epoch": 0.23476297968397292, + "grad_norm": 1.0548520245203914, + "learning_rate": 1.9754209921822683e-05, + "loss": 0.3832106590270996, + "step": 884 + }, + { + "epoch": 0.23502854866551587, + "grad_norm": 1.078830112662993, + "learning_rate": 1.975324147878278e-05, + "loss": 0.37876033782958984, + "step": 885 + }, + { + "epoch": 0.23529411764705882, + "grad_norm": 1.0689289829128008, + "learning_rate": 1.975227115543295e-05, + "loss": 0.38931846618652344, + "step": 886 + }, + { + "epoch": 0.23555968662860177, + "grad_norm": 0.956721500767322, + "learning_rate": 1.9751298951960258e-05, + "loss": 0.3581021726131439, + "step": 887 + }, + { + "epoch": 0.23582525561014472, + "grad_norm": 1.0206944172292924, + "learning_rate": 1.9750324868552133e-05, + "loss": 0.35196465253829956, + "step": 888 + }, + { + "epoch": 0.2360908245916877, + "grad_norm": 0.9996206423870837, + "learning_rate": 1.974934890539637e-05, + "loss": 0.3635658025741577, + "step": 889 + }, + { + "epoch": 0.23635639357323066, + "grad_norm": 0.9523927655707425, + "learning_rate": 1.9748371062681122e-05, + "loss": 0.345594197511673, + "step": 890 + }, + { + "epoch": 0.2366219625547736, + "grad_norm": 1.0443032231121456, + "learning_rate": 1.97473913405949e-05, + "loss": 0.357181191444397, + "step": 891 + }, + { + "epoch": 0.23688753153631656, + "grad_norm": 1.0008000126392016, + "learning_rate": 1.974640973932659e-05, + "loss": 0.3264622986316681, + "step": 892 + }, + { + "epoch": 0.2371531005178595, + "grad_norm": 0.9731630083329554, + "learning_rate": 1.9745426259065434e-05, + "loss": 0.37950894236564636, + "step": 893 + }, + { + "epoch": 0.23741866949940246, + "grad_norm": 1.1493289415276364, + "learning_rate": 1.9744440900001027e-05, + "loss": 0.37400782108306885, + "step": 894 + }, + { + "epoch": 0.23768423848094541, + "grad_norm": 1.0325785235739895, + "learning_rate": 1.974345366232334e-05, + "loss": 0.3455463945865631, + "step": 895 + }, + { + "epoch": 0.2379498074624884, + "grad_norm": 1.1059511993758653, + "learning_rate": 1.9742464546222702e-05, + "loss": 0.3605351150035858, + "step": 896 + }, + { + "epoch": 0.23821537644403135, + "grad_norm": 0.9763906212855142, + "learning_rate": 1.97414735518898e-05, + "loss": 0.3839051127433777, + "step": 897 + }, + { + "epoch": 0.2384809454255743, + "grad_norm": 1.0304758127284366, + "learning_rate": 1.974048067951569e-05, + "loss": 0.34562867879867554, + "step": 898 + }, + { + "epoch": 0.23874651440711725, + "grad_norm": 1.1332867443652592, + "learning_rate": 1.9739485929291778e-05, + "loss": 0.3986506760120392, + "step": 899 + }, + { + "epoch": 0.2390120833886602, + "grad_norm": 1.1598961775072092, + "learning_rate": 1.9738489301409848e-05, + "loss": 0.3955162465572357, + "step": 900 + }, + { + "epoch": 0.23927765237020315, + "grad_norm": 1.080226447361195, + "learning_rate": 1.9737490796062036e-05, + "loss": 0.370066374540329, + "step": 901 + }, + { + "epoch": 0.2395432213517461, + "grad_norm": 1.0637004733407822, + "learning_rate": 1.973649041344084e-05, + "loss": 0.3777826726436615, + "step": 902 + }, + { + "epoch": 0.23980879033328908, + "grad_norm": 1.1358293788080334, + "learning_rate": 1.9735488153739128e-05, + "loss": 0.327572226524353, + "step": 903 + }, + { + "epoch": 0.24007435931483204, + "grad_norm": 1.071729158749965, + "learning_rate": 1.973448401715011e-05, + "loss": 0.3921743929386139, + "step": 904 + }, + { + "epoch": 0.240339928296375, + "grad_norm": 1.0635179670685195, + "learning_rate": 1.973347800386739e-05, + "loss": 0.3683379888534546, + "step": 905 + }, + { + "epoch": 0.24060549727791794, + "grad_norm": 1.023832589054702, + "learning_rate": 1.9732470114084905e-05, + "loss": 0.390872597694397, + "step": 906 + }, + { + "epoch": 0.2408710662594609, + "grad_norm": 1.0814023137489452, + "learning_rate": 1.9731460347996964e-05, + "loss": 0.3772459626197815, + "step": 907 + }, + { + "epoch": 0.24113663524100384, + "grad_norm": 1.0280982913686894, + "learning_rate": 1.973044870579824e-05, + "loss": 0.37990954518318176, + "step": 908 + }, + { + "epoch": 0.2414022042225468, + "grad_norm": 1.0035238419205756, + "learning_rate": 1.972943518768377e-05, + "loss": 0.3380817770957947, + "step": 909 + }, + { + "epoch": 0.24166777320408978, + "grad_norm": 0.9879847056007396, + "learning_rate": 1.9728419793848935e-05, + "loss": 0.3348115384578705, + "step": 910 + }, + { + "epoch": 0.24193334218563273, + "grad_norm": 1.0561235323428824, + "learning_rate": 1.9727402524489505e-05, + "loss": 0.36936551332473755, + "step": 911 + }, + { + "epoch": 0.24219891116717568, + "grad_norm": 1.0744513063457712, + "learning_rate": 1.9726383379801593e-05, + "loss": 0.3871539235115051, + "step": 912 + }, + { + "epoch": 0.24246448014871863, + "grad_norm": 1.0904556770971818, + "learning_rate": 1.9725362359981676e-05, + "loss": 0.37087059020996094, + "step": 913 + }, + { + "epoch": 0.24273004913026158, + "grad_norm": 0.9802916629421812, + "learning_rate": 1.9724339465226595e-05, + "loss": 0.35582688450813293, + "step": 914 + }, + { + "epoch": 0.24299561811180453, + "grad_norm": 1.0947021466091125, + "learning_rate": 1.9723314695733557e-05, + "loss": 0.38500669598579407, + "step": 915 + }, + { + "epoch": 0.2432611870933475, + "grad_norm": 0.9834121517145057, + "learning_rate": 1.9722288051700116e-05, + "loss": 0.32470762729644775, + "step": 916 + }, + { + "epoch": 0.24352675607489047, + "grad_norm": 1.0805011919993295, + "learning_rate": 1.9721259533324207e-05, + "loss": 0.3822774589061737, + "step": 917 + }, + { + "epoch": 0.24379232505643342, + "grad_norm": 0.9937398719966192, + "learning_rate": 1.972022914080411e-05, + "loss": 0.38374873995780945, + "step": 918 + }, + { + "epoch": 0.24405789403797637, + "grad_norm": 1.0550770033370775, + "learning_rate": 1.9719196874338472e-05, + "loss": 0.3419352173805237, + "step": 919 + }, + { + "epoch": 0.24432346301951932, + "grad_norm": 1.0164630853495407, + "learning_rate": 1.9718162734126308e-05, + "loss": 0.3294275403022766, + "step": 920 + }, + { + "epoch": 0.24458903200106227, + "grad_norm": 1.0668295499881337, + "learning_rate": 1.9717126720366982e-05, + "loss": 0.3585365414619446, + "step": 921 + }, + { + "epoch": 0.24485460098260522, + "grad_norm": 1.0609325079201495, + "learning_rate": 1.9716088833260225e-05, + "loss": 0.38130316138267517, + "step": 922 + }, + { + "epoch": 0.24512016996414818, + "grad_norm": 1.0577067392982809, + "learning_rate": 1.9715049073006133e-05, + "loss": 0.3745136260986328, + "step": 923 + }, + { + "epoch": 0.24538573894569116, + "grad_norm": 1.0457228779122651, + "learning_rate": 1.971400743980516e-05, + "loss": 0.3771660327911377, + "step": 924 + }, + { + "epoch": 0.2456513079272341, + "grad_norm": 1.0133861698501567, + "learning_rate": 1.971296393385812e-05, + "loss": 0.29661691188812256, + "step": 925 + }, + { + "epoch": 0.24591687690877706, + "grad_norm": 0.9516714902458889, + "learning_rate": 1.9711918555366184e-05, + "loss": 0.33783960342407227, + "step": 926 + }, + { + "epoch": 0.24618244589032, + "grad_norm": 1.2469460687001952, + "learning_rate": 1.971087130453089e-05, + "loss": 0.42983683943748474, + "step": 927 + }, + { + "epoch": 0.24644801487186296, + "grad_norm": 0.9725914261438413, + "learning_rate": 1.9709822181554142e-05, + "loss": 0.32242363691329956, + "step": 928 + }, + { + "epoch": 0.24671358385340592, + "grad_norm": 1.0989308968162201, + "learning_rate": 1.970877118663819e-05, + "loss": 0.3576955795288086, + "step": 929 + }, + { + "epoch": 0.24697915283494887, + "grad_norm": 1.116595385391156, + "learning_rate": 1.9707718319985663e-05, + "loss": 0.4185359477996826, + "step": 930 + }, + { + "epoch": 0.24724472181649185, + "grad_norm": 1.1178442474909813, + "learning_rate": 1.970666358179953e-05, + "loss": 0.35377705097198486, + "step": 931 + }, + { + "epoch": 0.2475102907980348, + "grad_norm": 1.1350743092525455, + "learning_rate": 1.9705606972283143e-05, + "loss": 0.3860151171684265, + "step": 932 + }, + { + "epoch": 0.24777585977957775, + "grad_norm": 1.1915035264404457, + "learning_rate": 1.9704548491640195e-05, + "loss": 0.39463168382644653, + "step": 933 + }, + { + "epoch": 0.2480414287611207, + "grad_norm": 1.0462444044755623, + "learning_rate": 1.9703488140074752e-05, + "loss": 0.3670084774494171, + "step": 934 + }, + { + "epoch": 0.24830699774266365, + "grad_norm": 1.2914788702644175, + "learning_rate": 1.9702425917791242e-05, + "loss": 0.388730525970459, + "step": 935 + }, + { + "epoch": 0.2485725667242066, + "grad_norm": 1.128517931307855, + "learning_rate": 1.970136182499444e-05, + "loss": 0.38767656683921814, + "step": 936 + }, + { + "epoch": 0.24883813570574956, + "grad_norm": 1.0771582387425684, + "learning_rate": 1.9700295861889497e-05, + "loss": 0.35394930839538574, + "step": 937 + }, + { + "epoch": 0.24910370468729254, + "grad_norm": 1.0639329095738126, + "learning_rate": 1.9699228028681917e-05, + "loss": 0.3360324501991272, + "step": 938 + }, + { + "epoch": 0.2493692736688355, + "grad_norm": 1.116621384383513, + "learning_rate": 1.9698158325577563e-05, + "loss": 0.390169233083725, + "step": 939 + }, + { + "epoch": 0.24963484265037844, + "grad_norm": 1.108635788765439, + "learning_rate": 1.9697086752782666e-05, + "loss": 0.3921571671962738, + "step": 940 + }, + { + "epoch": 0.2499004116319214, + "grad_norm": 1.0665933445619122, + "learning_rate": 1.9696013310503808e-05, + "loss": 0.3795739710330963, + "step": 941 + }, + { + "epoch": 0.25016598061346434, + "grad_norm": 1.2202319167117164, + "learning_rate": 1.9694937998947935e-05, + "loss": 0.3891025185585022, + "step": 942 + }, + { + "epoch": 0.2504315495950073, + "grad_norm": 0.9751921056908068, + "learning_rate": 1.9693860818322357e-05, + "loss": 0.3548225164413452, + "step": 943 + }, + { + "epoch": 0.25069711857655025, + "grad_norm": 1.0555900207888067, + "learning_rate": 1.9692781768834747e-05, + "loss": 0.3696819543838501, + "step": 944 + }, + { + "epoch": 0.2509626875580932, + "grad_norm": 1.1322184210541604, + "learning_rate": 1.9691700850693126e-05, + "loss": 0.3906037211418152, + "step": 945 + }, + { + "epoch": 0.25122825653963615, + "grad_norm": 1.072434154806742, + "learning_rate": 1.9690618064105883e-05, + "loss": 0.38181206583976746, + "step": 946 + }, + { + "epoch": 0.2514938255211791, + "grad_norm": 1.0644124497842522, + "learning_rate": 1.9689533409281765e-05, + "loss": 0.36904582381248474, + "step": 947 + }, + { + "epoch": 0.25175939450272206, + "grad_norm": 1.097105891991116, + "learning_rate": 1.9688446886429885e-05, + "loss": 0.3635823130607605, + "step": 948 + }, + { + "epoch": 0.25202496348426506, + "grad_norm": 0.9954310874837226, + "learning_rate": 1.9687358495759713e-05, + "loss": 0.3527260422706604, + "step": 949 + }, + { + "epoch": 0.252290532465808, + "grad_norm": 1.1902017812011518, + "learning_rate": 1.968626823748107e-05, + "loss": 0.3781110346317291, + "step": 950 + }, + { + "epoch": 0.25255610144735097, + "grad_norm": 1.0346217070487125, + "learning_rate": 1.968517611180415e-05, + "loss": 0.3931560814380646, + "step": 951 + }, + { + "epoch": 0.2528216704288939, + "grad_norm": 1.0783245371828571, + "learning_rate": 1.9684082118939503e-05, + "loss": 0.39111074805259705, + "step": 952 + }, + { + "epoch": 0.25308723941043687, + "grad_norm": 1.2090013193363973, + "learning_rate": 1.9682986259098037e-05, + "loss": 0.385967880487442, + "step": 953 + }, + { + "epoch": 0.2533528083919798, + "grad_norm": 1.0103878099057118, + "learning_rate": 1.9681888532491022e-05, + "loss": 0.34006553888320923, + "step": 954 + }, + { + "epoch": 0.2536183773735228, + "grad_norm": 1.0077784550534965, + "learning_rate": 1.9680788939330086e-05, + "loss": 0.36069998145103455, + "step": 955 + }, + { + "epoch": 0.2538839463550657, + "grad_norm": 1.090649670414093, + "learning_rate": 1.9679687479827212e-05, + "loss": 0.3354898691177368, + "step": 956 + }, + { + "epoch": 0.2541495153366087, + "grad_norm": 1.0691933766101984, + "learning_rate": 1.9678584154194756e-05, + "loss": 0.35667335987091064, + "step": 957 + }, + { + "epoch": 0.25441508431815163, + "grad_norm": 1.2652121820599898, + "learning_rate": 1.9677478962645422e-05, + "loss": 0.4003029465675354, + "step": 958 + }, + { + "epoch": 0.2546806532996946, + "grad_norm": 1.0313200756086844, + "learning_rate": 1.9676371905392278e-05, + "loss": 0.34397056698799133, + "step": 959 + }, + { + "epoch": 0.25494622228123753, + "grad_norm": 1.0544706314753822, + "learning_rate": 1.9675262982648757e-05, + "loss": 0.35319578647613525, + "step": 960 + }, + { + "epoch": 0.2552117912627805, + "grad_norm": 1.0179000224070893, + "learning_rate": 1.967415219462864e-05, + "loss": 0.34840327501296997, + "step": 961 + }, + { + "epoch": 0.25547736024432344, + "grad_norm": 0.9360325612494472, + "learning_rate": 1.9673039541546076e-05, + "loss": 0.3298989534378052, + "step": 962 + }, + { + "epoch": 0.25574292922586644, + "grad_norm": 1.0904225305922717, + "learning_rate": 1.9671925023615572e-05, + "loss": 0.38438719511032104, + "step": 963 + }, + { + "epoch": 0.2560084982074094, + "grad_norm": 1.128608711014793, + "learning_rate": 1.9670808641051994e-05, + "loss": 0.3834493160247803, + "step": 964 + }, + { + "epoch": 0.25627406718895235, + "grad_norm": 1.0456501331264114, + "learning_rate": 1.9669690394070564e-05, + "loss": 0.3713288903236389, + "step": 965 + }, + { + "epoch": 0.2565396361704953, + "grad_norm": 1.0864184401996346, + "learning_rate": 1.966857028288687e-05, + "loss": 0.37564241886138916, + "step": 966 + }, + { + "epoch": 0.25680520515203825, + "grad_norm": 1.0329676619050974, + "learning_rate": 1.9667448307716857e-05, + "loss": 0.30162689089775085, + "step": 967 + }, + { + "epoch": 0.2570707741335812, + "grad_norm": 1.0948768995323135, + "learning_rate": 1.9666324468776826e-05, + "loss": 0.35969680547714233, + "step": 968 + }, + { + "epoch": 0.25733634311512416, + "grad_norm": 1.206651724690857, + "learning_rate": 1.9665198766283444e-05, + "loss": 0.40947285294532776, + "step": 969 + }, + { + "epoch": 0.2576019120966671, + "grad_norm": 1.0651964473806064, + "learning_rate": 1.9664071200453726e-05, + "loss": 0.35868343710899353, + "step": 970 + }, + { + "epoch": 0.25786748107821006, + "grad_norm": 1.1330033214419297, + "learning_rate": 1.966294177150506e-05, + "loss": 0.3569234311580658, + "step": 971 + }, + { + "epoch": 0.258133050059753, + "grad_norm": 1.1641224987322216, + "learning_rate": 1.9661810479655184e-05, + "loss": 0.3381764888763428, + "step": 972 + }, + { + "epoch": 0.25839861904129596, + "grad_norm": 1.535927577191984, + "learning_rate": 1.9660677325122196e-05, + "loss": 0.39847785234451294, + "step": 973 + }, + { + "epoch": 0.2586641880228389, + "grad_norm": 0.9608622914302752, + "learning_rate": 1.965954230812456e-05, + "loss": 0.33162468671798706, + "step": 974 + }, + { + "epoch": 0.25892975700438187, + "grad_norm": 1.0421688584245348, + "learning_rate": 1.9658405428881087e-05, + "loss": 0.3627605438232422, + "step": 975 + }, + { + "epoch": 0.2591953259859248, + "grad_norm": 1.0501672081861986, + "learning_rate": 1.9657266687610965e-05, + "loss": 0.3253796100616455, + "step": 976 + }, + { + "epoch": 0.2594608949674678, + "grad_norm": 1.0198628618780734, + "learning_rate": 1.9656126084533716e-05, + "loss": 0.3341265916824341, + "step": 977 + }, + { + "epoch": 0.2597264639490108, + "grad_norm": 1.0202967346949672, + "learning_rate": 1.9654983619869242e-05, + "loss": 0.3714970052242279, + "step": 978 + }, + { + "epoch": 0.25999203293055373, + "grad_norm": 1.0333982958482495, + "learning_rate": 1.9653839293837798e-05, + "loss": 0.3360912501811981, + "step": 979 + }, + { + "epoch": 0.2602576019120967, + "grad_norm": 1.0322459892827835, + "learning_rate": 1.9652693106659995e-05, + "loss": 0.3780854642391205, + "step": 980 + }, + { + "epoch": 0.26052317089363963, + "grad_norm": 1.1062219940451128, + "learning_rate": 1.9651545058556803e-05, + "loss": 0.33595478534698486, + "step": 981 + }, + { + "epoch": 0.2607887398751826, + "grad_norm": 1.111464982167328, + "learning_rate": 1.965039514974955e-05, + "loss": 0.3608357012271881, + "step": 982 + }, + { + "epoch": 0.26105430885672554, + "grad_norm": 1.0024532391943957, + "learning_rate": 1.964924338045993e-05, + "loss": 0.3807666599750519, + "step": 983 + }, + { + "epoch": 0.2613198778382685, + "grad_norm": 1.0213030373156555, + "learning_rate": 1.964808975090999e-05, + "loss": 0.3551647663116455, + "step": 984 + }, + { + "epoch": 0.26158544681981144, + "grad_norm": 1.0761922389740786, + "learning_rate": 1.9646934261322135e-05, + "loss": 0.3771904706954956, + "step": 985 + }, + { + "epoch": 0.2618510158013544, + "grad_norm": 1.1925998045571422, + "learning_rate": 1.964577691191913e-05, + "loss": 0.41103222966194153, + "step": 986 + }, + { + "epoch": 0.26211658478289734, + "grad_norm": 1.0270282722515527, + "learning_rate": 1.9644617702924093e-05, + "loss": 0.34439292550086975, + "step": 987 + }, + { + "epoch": 0.2623821537644403, + "grad_norm": 1.1578988390038234, + "learning_rate": 1.9643456634560515e-05, + "loss": 0.41214391589164734, + "step": 988 + }, + { + "epoch": 0.26264772274598325, + "grad_norm": 0.9879567855265076, + "learning_rate": 1.9642293707052232e-05, + "loss": 0.3186502754688263, + "step": 989 + }, + { + "epoch": 0.2629132917275262, + "grad_norm": 1.039224300824638, + "learning_rate": 1.9641128920623438e-05, + "loss": 0.3534559905529022, + "step": 990 + }, + { + "epoch": 0.2631788607090692, + "grad_norm": 1.0867820667103292, + "learning_rate": 1.96399622754987e-05, + "loss": 0.35217320919036865, + "step": 991 + }, + { + "epoch": 0.26344442969061216, + "grad_norm": 0.954421559413849, + "learning_rate": 1.9638793771902924e-05, + "loss": 0.31661587953567505, + "step": 992 + }, + { + "epoch": 0.2637099986721551, + "grad_norm": 0.9881195075112362, + "learning_rate": 1.9637623410061392e-05, + "loss": 0.32468482851982117, + "step": 993 + }, + { + "epoch": 0.26397556765369806, + "grad_norm": 1.0355017939200293, + "learning_rate": 1.9636451190199727e-05, + "loss": 0.346771776676178, + "step": 994 + }, + { + "epoch": 0.264241136635241, + "grad_norm": 1.0997948902450267, + "learning_rate": 1.9635277112543928e-05, + "loss": 0.36409270763397217, + "step": 995 + }, + { + "epoch": 0.26450670561678397, + "grad_norm": 1.2132528670947562, + "learning_rate": 1.963410117732034e-05, + "loss": 0.404967725276947, + "step": 996 + }, + { + "epoch": 0.2647722745983269, + "grad_norm": 1.1962964423617835, + "learning_rate": 1.9632923384755666e-05, + "loss": 0.39506661891937256, + "step": 997 + }, + { + "epoch": 0.26503784357986987, + "grad_norm": 1.1967751692769375, + "learning_rate": 1.9631743735076972e-05, + "loss": 0.3833203911781311, + "step": 998 + }, + { + "epoch": 0.2653034125614128, + "grad_norm": 1.083140773107417, + "learning_rate": 1.9630562228511682e-05, + "loss": 0.34522518515586853, + "step": 999 + }, + { + "epoch": 0.2655689815429558, + "grad_norm": 1.1367328076589556, + "learning_rate": 1.962937886528758e-05, + "loss": 0.3818400800228119, + "step": 1000 + }, + { + "epoch": 0.2658345505244987, + "grad_norm": 1.2496699132911573, + "learning_rate": 1.9628193645632796e-05, + "loss": 0.40827828645706177, + "step": 1001 + }, + { + "epoch": 0.2661001195060417, + "grad_norm": 1.0406728708542907, + "learning_rate": 1.962700656977583e-05, + "loss": 0.3448852002620697, + "step": 1002 + }, + { + "epoch": 0.26636568848758463, + "grad_norm": 1.1035895986897222, + "learning_rate": 1.9625817637945542e-05, + "loss": 0.36560773849487305, + "step": 1003 + }, + { + "epoch": 0.2666312574691276, + "grad_norm": 1.1637977684704512, + "learning_rate": 1.962462685037114e-05, + "loss": 0.38305893540382385, + "step": 1004 + }, + { + "epoch": 0.2668968264506706, + "grad_norm": 1.0320363555261158, + "learning_rate": 1.962343420728219e-05, + "loss": 0.3562568426132202, + "step": 1005 + }, + { + "epoch": 0.26716239543221354, + "grad_norm": 1.18312934129538, + "learning_rate": 1.9622239708908626e-05, + "loss": 0.37458860874176025, + "step": 1006 + }, + { + "epoch": 0.2674279644137565, + "grad_norm": 1.058042672523148, + "learning_rate": 1.9621043355480726e-05, + "loss": 0.35852503776550293, + "step": 1007 + }, + { + "epoch": 0.26769353339529944, + "grad_norm": 1.0975239398171568, + "learning_rate": 1.961984514722914e-05, + "loss": 0.4056578278541565, + "step": 1008 + }, + { + "epoch": 0.2679591023768424, + "grad_norm": 1.1773057151207822, + "learning_rate": 1.9618645084384863e-05, + "loss": 0.4531296491622925, + "step": 1009 + }, + { + "epoch": 0.26822467135838535, + "grad_norm": 0.9095840908563808, + "learning_rate": 1.9617443167179256e-05, + "loss": 0.3356376886367798, + "step": 1010 + }, + { + "epoch": 0.2684902403399283, + "grad_norm": 1.09880831555839, + "learning_rate": 1.9616239395844033e-05, + "loss": 0.38045161962509155, + "step": 1011 + }, + { + "epoch": 0.26875580932147125, + "grad_norm": 1.028451509847456, + "learning_rate": 1.9615033770611268e-05, + "loss": 0.3549511730670929, + "step": 1012 + }, + { + "epoch": 0.2690213783030142, + "grad_norm": 1.0546213631772847, + "learning_rate": 1.9613826291713393e-05, + "loss": 0.33363252878189087, + "step": 1013 + }, + { + "epoch": 0.26928694728455715, + "grad_norm": 0.9539256345754278, + "learning_rate": 1.961261695938319e-05, + "loss": 0.3443339467048645, + "step": 1014 + }, + { + "epoch": 0.2695525162661001, + "grad_norm": 0.9897755385014708, + "learning_rate": 1.9611405773853807e-05, + "loss": 0.3258364796638489, + "step": 1015 + }, + { + "epoch": 0.26981808524764306, + "grad_norm": 1.0357196980681809, + "learning_rate": 1.961019273535875e-05, + "loss": 0.357122540473938, + "step": 1016 + }, + { + "epoch": 0.270083654229186, + "grad_norm": 0.9668495504097999, + "learning_rate": 1.9608977844131875e-05, + "loss": 0.32092082500457764, + "step": 1017 + }, + { + "epoch": 0.27034922321072896, + "grad_norm": 1.0067299219043435, + "learning_rate": 1.96077611004074e-05, + "loss": 0.36354511976242065, + "step": 1018 + }, + { + "epoch": 0.27061479219227197, + "grad_norm": 1.0982243281899924, + "learning_rate": 1.9606542504419895e-05, + "loss": 0.37128758430480957, + "step": 1019 + }, + { + "epoch": 0.2708803611738149, + "grad_norm": 1.1112959838703056, + "learning_rate": 1.9605322056404294e-05, + "loss": 0.3732859790325165, + "step": 1020 + }, + { + "epoch": 0.2711459301553579, + "grad_norm": 1.0058814849372155, + "learning_rate": 1.9604099756595885e-05, + "loss": 0.32642674446105957, + "step": 1021 + }, + { + "epoch": 0.2714114991369008, + "grad_norm": 1.10371255398192, + "learning_rate": 1.9602875605230313e-05, + "loss": 0.376791775226593, + "step": 1022 + }, + { + "epoch": 0.2716770681184438, + "grad_norm": 1.0603007725295257, + "learning_rate": 1.960164960254358e-05, + "loss": 0.34514784812927246, + "step": 1023 + }, + { + "epoch": 0.27194263709998673, + "grad_norm": 1.225533197470795, + "learning_rate": 1.9600421748772044e-05, + "loss": 0.3752189576625824, + "step": 1024 + }, + { + "epoch": 0.2722082060815297, + "grad_norm": 1.0783483670765837, + "learning_rate": 1.959919204415242e-05, + "loss": 0.33100831508636475, + "step": 1025 + }, + { + "epoch": 0.27247377506307263, + "grad_norm": 1.1910668751599112, + "learning_rate": 1.9597960488921785e-05, + "loss": 0.42713654041290283, + "step": 1026 + }, + { + "epoch": 0.2727393440446156, + "grad_norm": 1.110777223027095, + "learning_rate": 1.9596727083317565e-05, + "loss": 0.3746519684791565, + "step": 1027 + }, + { + "epoch": 0.27300491302615854, + "grad_norm": 1.1133725792972708, + "learning_rate": 1.9595491827577543e-05, + "loss": 0.39962098002433777, + "step": 1028 + }, + { + "epoch": 0.2732704820077015, + "grad_norm": 1.0544310192284179, + "learning_rate": 1.9594254721939866e-05, + "loss": 0.35112401843070984, + "step": 1029 + }, + { + "epoch": 0.27353605098924444, + "grad_norm": 1.0749153592990304, + "learning_rate": 1.9593015766643037e-05, + "loss": 0.3648139238357544, + "step": 1030 + }, + { + "epoch": 0.2738016199707874, + "grad_norm": 1.0268996180520502, + "learning_rate": 1.9591774961925902e-05, + "loss": 0.31544098258018494, + "step": 1031 + }, + { + "epoch": 0.27406718895233034, + "grad_norm": 1.1260952074052377, + "learning_rate": 1.959053230802768e-05, + "loss": 0.3593738079071045, + "step": 1032 + }, + { + "epoch": 0.27433275793387335, + "grad_norm": 1.1009303195981317, + "learning_rate": 1.958928780518794e-05, + "loss": 0.39784368872642517, + "step": 1033 + }, + { + "epoch": 0.2745983269154163, + "grad_norm": 1.1304731324804922, + "learning_rate": 1.9588041453646606e-05, + "loss": 0.3869936168193817, + "step": 1034 + }, + { + "epoch": 0.27486389589695925, + "grad_norm": 0.9803124730292929, + "learning_rate": 1.958679325364396e-05, + "loss": 0.31108593940734863, + "step": 1035 + }, + { + "epoch": 0.2751294648785022, + "grad_norm": 1.098791994520666, + "learning_rate": 1.958554320542064e-05, + "loss": 0.3917708098888397, + "step": 1036 + }, + { + "epoch": 0.27539503386004516, + "grad_norm": 0.9969159455112034, + "learning_rate": 1.958429130921764e-05, + "loss": 0.36782944202423096, + "step": 1037 + }, + { + "epoch": 0.2756606028415881, + "grad_norm": 0.9381100088398062, + "learning_rate": 1.9583037565276314e-05, + "loss": 0.36196422576904297, + "step": 1038 + }, + { + "epoch": 0.27592617182313106, + "grad_norm": 1.0783473143219733, + "learning_rate": 1.9581781973838368e-05, + "loss": 0.32208555936813354, + "step": 1039 + }, + { + "epoch": 0.276191740804674, + "grad_norm": 0.9653316626874986, + "learning_rate": 1.958052453514586e-05, + "loss": 0.33451759815216064, + "step": 1040 + }, + { + "epoch": 0.27645730978621696, + "grad_norm": 1.0328342572912144, + "learning_rate": 1.9579265249441216e-05, + "loss": 0.3228047788143158, + "step": 1041 + }, + { + "epoch": 0.2767228787677599, + "grad_norm": 1.0944658380016739, + "learning_rate": 1.957800411696721e-05, + "loss": 0.36992791295051575, + "step": 1042 + }, + { + "epoch": 0.27698844774930287, + "grad_norm": 0.9799580951396849, + "learning_rate": 1.9576741137966967e-05, + "loss": 0.3072342276573181, + "step": 1043 + }, + { + "epoch": 0.2772540167308458, + "grad_norm": 1.0637046756594408, + "learning_rate": 1.9575476312683985e-05, + "loss": 0.3372080326080322, + "step": 1044 + }, + { + "epoch": 0.27751958571238877, + "grad_norm": 1.0509701364189301, + "learning_rate": 1.95742096413621e-05, + "loss": 0.34725332260131836, + "step": 1045 + }, + { + "epoch": 0.2777851546939317, + "grad_norm": 1.1053591471100805, + "learning_rate": 1.9572941124245516e-05, + "loss": 0.36714982986450195, + "step": 1046 + }, + { + "epoch": 0.27805072367547473, + "grad_norm": 1.208127444221669, + "learning_rate": 1.957167076157878e-05, + "loss": 0.4163498282432556, + "step": 1047 + }, + { + "epoch": 0.2783162926570177, + "grad_norm": 1.1861975128714084, + "learning_rate": 1.9570398553606815e-05, + "loss": 0.40059348940849304, + "step": 1048 + }, + { + "epoch": 0.27858186163856063, + "grad_norm": 1.085993120538819, + "learning_rate": 1.956912450057488e-05, + "loss": 0.3622320294380188, + "step": 1049 + }, + { + "epoch": 0.2788474306201036, + "grad_norm": 1.1326017870689584, + "learning_rate": 1.9567848602728595e-05, + "loss": 0.35159534215927124, + "step": 1050 + }, + { + "epoch": 0.27911299960164654, + "grad_norm": 0.9516936878211085, + "learning_rate": 1.9566570860313944e-05, + "loss": 0.3093762993812561, + "step": 1051 + }, + { + "epoch": 0.2793785685831895, + "grad_norm": 1.040326152894859, + "learning_rate": 1.9565291273577255e-05, + "loss": 0.341474324464798, + "step": 1052 + }, + { + "epoch": 0.27964413756473244, + "grad_norm": 1.0885626452470811, + "learning_rate": 1.9564009842765225e-05, + "loss": 0.35376566648483276, + "step": 1053 + }, + { + "epoch": 0.2799097065462754, + "grad_norm": 1.09154548256864, + "learning_rate": 1.9562726568124892e-05, + "loss": 0.3487662374973297, + "step": 1054 + }, + { + "epoch": 0.28017527552781835, + "grad_norm": 1.014222924008021, + "learning_rate": 1.956144144990366e-05, + "loss": 0.3610745370388031, + "step": 1055 + }, + { + "epoch": 0.2804408445093613, + "grad_norm": 0.9789890869027496, + "learning_rate": 1.9560154488349284e-05, + "loss": 0.33230137825012207, + "step": 1056 + }, + { + "epoch": 0.28070641349090425, + "grad_norm": 1.0104241821081763, + "learning_rate": 1.9558865683709875e-05, + "loss": 0.310351699590683, + "step": 1057 + }, + { + "epoch": 0.2809719824724472, + "grad_norm": 1.1188708821966176, + "learning_rate": 1.9557575036233897e-05, + "loss": 0.39930224418640137, + "step": 1058 + }, + { + "epoch": 0.28123755145399015, + "grad_norm": 1.0498907782820184, + "learning_rate": 1.955628254617017e-05, + "loss": 0.3345295488834381, + "step": 1059 + }, + { + "epoch": 0.2815031204355331, + "grad_norm": 1.1059864789744056, + "learning_rate": 1.9554988213767875e-05, + "loss": 0.37963107228279114, + "step": 1060 + }, + { + "epoch": 0.2817686894170761, + "grad_norm": 1.0825219178132603, + "learning_rate": 1.9553692039276545e-05, + "loss": 0.3923654854297638, + "step": 1061 + }, + { + "epoch": 0.28203425839861906, + "grad_norm": 1.0736283126776336, + "learning_rate": 1.9552394022946068e-05, + "loss": 0.363646924495697, + "step": 1062 + }, + { + "epoch": 0.282299827380162, + "grad_norm": 1.1051684289136041, + "learning_rate": 1.9551094165026677e-05, + "loss": 0.35486382246017456, + "step": 1063 + }, + { + "epoch": 0.28256539636170497, + "grad_norm": 1.0845117937449689, + "learning_rate": 1.954979246576898e-05, + "loss": 0.35215455293655396, + "step": 1064 + }, + { + "epoch": 0.2828309653432479, + "grad_norm": 1.1587243435425785, + "learning_rate": 1.9548488925423924e-05, + "loss": 0.3936809003353119, + "step": 1065 + }, + { + "epoch": 0.28309653432479087, + "grad_norm": 1.0399965264634783, + "learning_rate": 1.9547183544242817e-05, + "loss": 0.36852866411209106, + "step": 1066 + }, + { + "epoch": 0.2833621033063338, + "grad_norm": 1.0679817467710029, + "learning_rate": 1.954587632247732e-05, + "loss": 0.3552001714706421, + "step": 1067 + }, + { + "epoch": 0.2836276722878768, + "grad_norm": 1.1330169189394568, + "learning_rate": 1.9544567260379455e-05, + "loss": 0.3684498965740204, + "step": 1068 + }, + { + "epoch": 0.2838932412694197, + "grad_norm": 0.9857931835351914, + "learning_rate": 1.9543256358201586e-05, + "loss": 0.3367026448249817, + "step": 1069 + }, + { + "epoch": 0.2841588102509627, + "grad_norm": 1.0677692738667734, + "learning_rate": 1.9541943616196443e-05, + "loss": 0.3702335059642792, + "step": 1070 + }, + { + "epoch": 0.28442437923250563, + "grad_norm": 1.1114119189633371, + "learning_rate": 1.9540629034617108e-05, + "loss": 0.3430984318256378, + "step": 1071 + }, + { + "epoch": 0.2846899482140486, + "grad_norm": 1.1406170357402363, + "learning_rate": 1.953931261371702e-05, + "loss": 0.36514735221862793, + "step": 1072 + }, + { + "epoch": 0.28495551719559153, + "grad_norm": 1.0428104806049732, + "learning_rate": 1.9537994353749963e-05, + "loss": 0.3524945080280304, + "step": 1073 + }, + { + "epoch": 0.2852210861771345, + "grad_norm": 1.0283973360981475, + "learning_rate": 1.9536674254970088e-05, + "loss": 0.32405683398246765, + "step": 1074 + }, + { + "epoch": 0.2854866551586775, + "grad_norm": 1.0649875575316718, + "learning_rate": 1.9535352317631888e-05, + "loss": 0.30863165855407715, + "step": 1075 + }, + { + "epoch": 0.28575222414022045, + "grad_norm": 1.0647565002745494, + "learning_rate": 1.953402854199022e-05, + "loss": 0.34343889355659485, + "step": 1076 + }, + { + "epoch": 0.2860177931217634, + "grad_norm": 1.2339349330872973, + "learning_rate": 1.9532702928300292e-05, + "loss": 0.3639434576034546, + "step": 1077 + }, + { + "epoch": 0.28628336210330635, + "grad_norm": 1.0888261251069975, + "learning_rate": 1.9531375476817667e-05, + "loss": 0.3380300998687744, + "step": 1078 + }, + { + "epoch": 0.2865489310848493, + "grad_norm": 1.1078839119175599, + "learning_rate": 1.9530046187798267e-05, + "loss": 0.3323265016078949, + "step": 1079 + }, + { + "epoch": 0.28681450006639225, + "grad_norm": 1.0529271541493659, + "learning_rate": 1.9528715061498355e-05, + "loss": 0.3439220190048218, + "step": 1080 + }, + { + "epoch": 0.2870800690479352, + "grad_norm": 1.088357435010649, + "learning_rate": 1.952738209817456e-05, + "loss": 0.36376965045928955, + "step": 1081 + }, + { + "epoch": 0.28734563802947816, + "grad_norm": 1.0188116446188513, + "learning_rate": 1.952604729808386e-05, + "loss": 0.3281211853027344, + "step": 1082 + }, + { + "epoch": 0.2876112070110211, + "grad_norm": 1.0999135645201878, + "learning_rate": 1.9524710661483594e-05, + "loss": 0.3538089990615845, + "step": 1083 + }, + { + "epoch": 0.28787677599256406, + "grad_norm": 1.1475903462769852, + "learning_rate": 1.9523372188631442e-05, + "loss": 0.3982803225517273, + "step": 1084 + }, + { + "epoch": 0.288142344974107, + "grad_norm": 1.11408923860859, + "learning_rate": 1.9522031879785453e-05, + "loss": 0.3958810567855835, + "step": 1085 + }, + { + "epoch": 0.28840791395564996, + "grad_norm": 1.191451776763126, + "learning_rate": 1.9520689735204016e-05, + "loss": 0.40133988857269287, + "step": 1086 + }, + { + "epoch": 0.2886734829371929, + "grad_norm": 1.048862195613205, + "learning_rate": 1.9519345755145886e-05, + "loss": 0.32411646842956543, + "step": 1087 + }, + { + "epoch": 0.28893905191873587, + "grad_norm": 1.210003646730205, + "learning_rate": 1.9517999939870166e-05, + "loss": 0.38678207993507385, + "step": 1088 + }, + { + "epoch": 0.2892046209002789, + "grad_norm": 1.0663258874668164, + "learning_rate": 1.951665228963631e-05, + "loss": 0.36829686164855957, + "step": 1089 + }, + { + "epoch": 0.2894701898818218, + "grad_norm": 0.9884592653808488, + "learning_rate": 1.9515302804704134e-05, + "loss": 0.38631704449653625, + "step": 1090 + }, + { + "epoch": 0.2897357588633648, + "grad_norm": 1.1934503112083867, + "learning_rate": 1.9513951485333798e-05, + "loss": 0.39288902282714844, + "step": 1091 + }, + { + "epoch": 0.29000132784490773, + "grad_norm": 1.0804742457342014, + "learning_rate": 1.9512598331785822e-05, + "loss": 0.3655658960342407, + "step": 1092 + }, + { + "epoch": 0.2902668968264507, + "grad_norm": 0.9929300268939649, + "learning_rate": 1.9511243344321076e-05, + "loss": 0.3263852596282959, + "step": 1093 + }, + { + "epoch": 0.29053246580799363, + "grad_norm": 1.1166275426043832, + "learning_rate": 1.9509886523200792e-05, + "loss": 0.37939125299453735, + "step": 1094 + }, + { + "epoch": 0.2907980347895366, + "grad_norm": 1.074761796186792, + "learning_rate": 1.9508527868686543e-05, + "loss": 0.34218865633010864, + "step": 1095 + }, + { + "epoch": 0.29106360377107954, + "grad_norm": 1.036633851483027, + "learning_rate": 1.9507167381040263e-05, + "loss": 0.368261456489563, + "step": 1096 + }, + { + "epoch": 0.2913291727526225, + "grad_norm": 1.083724731335207, + "learning_rate": 1.950580506052424e-05, + "loss": 0.36133286356925964, + "step": 1097 + }, + { + "epoch": 0.29159474173416544, + "grad_norm": 1.0542758401630365, + "learning_rate": 1.9504440907401113e-05, + "loss": 0.3667418658733368, + "step": 1098 + }, + { + "epoch": 0.2918603107157084, + "grad_norm": 0.9961595646698646, + "learning_rate": 1.950307492193387e-05, + "loss": 0.34444570541381836, + "step": 1099 + }, + { + "epoch": 0.29212587969725134, + "grad_norm": 1.1203470867439278, + "learning_rate": 1.9501707104385863e-05, + "loss": 0.41261589527130127, + "step": 1100 + }, + { + "epoch": 0.2923914486787943, + "grad_norm": 1.0847270622391922, + "learning_rate": 1.9500337455020788e-05, + "loss": 0.3762981593608856, + "step": 1101 + }, + { + "epoch": 0.29265701766033725, + "grad_norm": 1.108635996430537, + "learning_rate": 1.9498965974102697e-05, + "loss": 0.3527417480945587, + "step": 1102 + }, + { + "epoch": 0.29292258664188026, + "grad_norm": 1.1555485155020386, + "learning_rate": 1.9497592661895996e-05, + "loss": 0.34812286496162415, + "step": 1103 + }, + { + "epoch": 0.2931881556234232, + "grad_norm": 0.9844968948580171, + "learning_rate": 1.9496217518665444e-05, + "loss": 0.33663398027420044, + "step": 1104 + }, + { + "epoch": 0.29345372460496616, + "grad_norm": 0.997090208380272, + "learning_rate": 1.9494840544676156e-05, + "loss": 0.3632991313934326, + "step": 1105 + }, + { + "epoch": 0.2937192935865091, + "grad_norm": 1.3515018592791732, + "learning_rate": 1.9493461740193587e-05, + "loss": 0.37389490008354187, + "step": 1106 + }, + { + "epoch": 0.29398486256805206, + "grad_norm": 1.204356467911551, + "learning_rate": 1.949208110548356e-05, + "loss": 0.3634020686149597, + "step": 1107 + }, + { + "epoch": 0.294250431549595, + "grad_norm": 1.0778805299295515, + "learning_rate": 1.9490698640812247e-05, + "loss": 0.36032742261886597, + "step": 1108 + }, + { + "epoch": 0.29451600053113797, + "grad_norm": 1.1504972318858309, + "learning_rate": 1.9489314346446164e-05, + "loss": 0.3385765552520752, + "step": 1109 + }, + { + "epoch": 0.2947815695126809, + "grad_norm": 1.0946200184976398, + "learning_rate": 1.9487928222652195e-05, + "loss": 0.3751915991306305, + "step": 1110 + }, + { + "epoch": 0.29504713849422387, + "grad_norm": 1.0903856446796527, + "learning_rate": 1.9486540269697564e-05, + "loss": 0.36069825291633606, + "step": 1111 + }, + { + "epoch": 0.2953127074757668, + "grad_norm": 1.009573568422265, + "learning_rate": 1.948515048784985e-05, + "loss": 0.32703787088394165, + "step": 1112 + }, + { + "epoch": 0.2955782764573098, + "grad_norm": 0.9196963642088989, + "learning_rate": 1.948375887737699e-05, + "loss": 0.312494158744812, + "step": 1113 + }, + { + "epoch": 0.2958438454388527, + "grad_norm": 0.9880564768480579, + "learning_rate": 1.9482365438547272e-05, + "loss": 0.30626165866851807, + "step": 1114 + }, + { + "epoch": 0.2961094144203957, + "grad_norm": 1.07827456569524, + "learning_rate": 1.948097017162933e-05, + "loss": 0.3625817894935608, + "step": 1115 + }, + { + "epoch": 0.29637498340193863, + "grad_norm": 1.1789711489550672, + "learning_rate": 1.9479573076892152e-05, + "loss": 0.38403773307800293, + "step": 1116 + }, + { + "epoch": 0.2966405523834816, + "grad_norm": 1.0638061154391991, + "learning_rate": 1.9478174154605093e-05, + "loss": 0.3645164966583252, + "step": 1117 + }, + { + "epoch": 0.2969061213650246, + "grad_norm": 1.0428170431433939, + "learning_rate": 1.9476773405037836e-05, + "loss": 0.3714389503002167, + "step": 1118 + }, + { + "epoch": 0.29717169034656754, + "grad_norm": 1.1488169814057956, + "learning_rate": 1.9475370828460436e-05, + "loss": 0.39809900522232056, + "step": 1119 + }, + { + "epoch": 0.2974372593281105, + "grad_norm": 1.0702503358715294, + "learning_rate": 1.9473966425143292e-05, + "loss": 0.3698490262031555, + "step": 1120 + }, + { + "epoch": 0.29770282830965344, + "grad_norm": 1.0166542138266799, + "learning_rate": 1.947256019535716e-05, + "loss": 0.3072658181190491, + "step": 1121 + }, + { + "epoch": 0.2979683972911964, + "grad_norm": 1.0479599499698302, + "learning_rate": 1.947115213937314e-05, + "loss": 0.3294365406036377, + "step": 1122 + }, + { + "epoch": 0.29823396627273935, + "grad_norm": 1.007749929257712, + "learning_rate": 1.9469742257462684e-05, + "loss": 0.34933674335479736, + "step": 1123 + }, + { + "epoch": 0.2984995352542823, + "grad_norm": 1.133473784296847, + "learning_rate": 1.946833054989761e-05, + "loss": 0.34586772322654724, + "step": 1124 + }, + { + "epoch": 0.29876510423582525, + "grad_norm": 1.0225090189343862, + "learning_rate": 1.9466917016950076e-05, + "loss": 0.33158159255981445, + "step": 1125 + }, + { + "epoch": 0.2990306732173682, + "grad_norm": 1.0162208348084125, + "learning_rate": 1.946550165889259e-05, + "loss": 0.32665887475013733, + "step": 1126 + }, + { + "epoch": 0.29929624219891116, + "grad_norm": 1.1065475895733048, + "learning_rate": 1.946408447599802e-05, + "loss": 0.3333032429218292, + "step": 1127 + }, + { + "epoch": 0.2995618111804541, + "grad_norm": 1.0958997421479173, + "learning_rate": 1.9462665468539582e-05, + "loss": 0.3747228980064392, + "step": 1128 + }, + { + "epoch": 0.29982738016199706, + "grad_norm": 0.9447906277138843, + "learning_rate": 1.9461244636790845e-05, + "loss": 0.34040436148643494, + "step": 1129 + }, + { + "epoch": 0.30009294914354, + "grad_norm": 1.0062775259583612, + "learning_rate": 1.9459821981025723e-05, + "loss": 0.3279584050178528, + "step": 1130 + }, + { + "epoch": 0.30035851812508296, + "grad_norm": 1.136819731097147, + "learning_rate": 1.9458397501518496e-05, + "loss": 0.33507707715034485, + "step": 1131 + }, + { + "epoch": 0.30062408710662597, + "grad_norm": 0.9978141677663763, + "learning_rate": 1.945697119854378e-05, + "loss": 0.3511529862880707, + "step": 1132 + }, + { + "epoch": 0.3008896560881689, + "grad_norm": 1.1038696900269844, + "learning_rate": 1.945554307237655e-05, + "loss": 0.33260345458984375, + "step": 1133 + }, + { + "epoch": 0.3011552250697119, + "grad_norm": 1.1267244347055163, + "learning_rate": 1.9454113123292133e-05, + "loss": 0.37698423862457275, + "step": 1134 + }, + { + "epoch": 0.3014207940512548, + "grad_norm": 1.0482054605062838, + "learning_rate": 1.945268135156621e-05, + "loss": 0.34843316674232483, + "step": 1135 + }, + { + "epoch": 0.3016863630327978, + "grad_norm": 1.1518938911568848, + "learning_rate": 1.9451247757474805e-05, + "loss": 0.38723987340927124, + "step": 1136 + }, + { + "epoch": 0.30195193201434073, + "grad_norm": 1.0597410032778982, + "learning_rate": 1.9449812341294302e-05, + "loss": 0.3836795389652252, + "step": 1137 + }, + { + "epoch": 0.3022175009958837, + "grad_norm": 0.9828275773453091, + "learning_rate": 1.9448375103301424e-05, + "loss": 0.3362433612346649, + "step": 1138 + }, + { + "epoch": 0.30248306997742663, + "grad_norm": 1.0750556057741842, + "learning_rate": 1.9446936043773264e-05, + "loss": 0.3615792393684387, + "step": 1139 + }, + { + "epoch": 0.3027486389589696, + "grad_norm": 1.0233339727957385, + "learning_rate": 1.944549516298725e-05, + "loss": 0.33693915605545044, + "step": 1140 + }, + { + "epoch": 0.30301420794051254, + "grad_norm": 1.0074205515838075, + "learning_rate": 1.9444052461221167e-05, + "loss": 0.32611170411109924, + "step": 1141 + }, + { + "epoch": 0.3032797769220555, + "grad_norm": 1.0257687736898828, + "learning_rate": 1.9442607938753153e-05, + "loss": 0.3504132032394409, + "step": 1142 + }, + { + "epoch": 0.30354534590359844, + "grad_norm": 1.081217851264946, + "learning_rate": 1.944116159586169e-05, + "loss": 0.3598168194293976, + "step": 1143 + }, + { + "epoch": 0.3038109148851414, + "grad_norm": 1.025673115447757, + "learning_rate": 1.9439713432825625e-05, + "loss": 0.33447909355163574, + "step": 1144 + }, + { + "epoch": 0.30407648386668434, + "grad_norm": 0.9795127759513904, + "learning_rate": 1.943826344992414e-05, + "loss": 0.34026333689689636, + "step": 1145 + }, + { + "epoch": 0.30434205284822735, + "grad_norm": 1.070042442644686, + "learning_rate": 1.9436811647436772e-05, + "loss": 0.323203980922699, + "step": 1146 + }, + { + "epoch": 0.3046076218297703, + "grad_norm": 1.0588861737680213, + "learning_rate": 1.943535802564342e-05, + "loss": 0.332398921251297, + "step": 1147 + }, + { + "epoch": 0.30487319081131325, + "grad_norm": 1.175168490214782, + "learning_rate": 1.9433902584824316e-05, + "loss": 0.3882995545864105, + "step": 1148 + }, + { + "epoch": 0.3051387597928562, + "grad_norm": 1.093435738226519, + "learning_rate": 1.943244532526006e-05, + "loss": 0.35262739658355713, + "step": 1149 + }, + { + "epoch": 0.30540432877439916, + "grad_norm": 1.1043029209432185, + "learning_rate": 1.9430986247231586e-05, + "loss": 0.39694511890411377, + "step": 1150 + }, + { + "epoch": 0.3056698977559421, + "grad_norm": 1.1276348856512544, + "learning_rate": 1.9429525351020197e-05, + "loss": 0.3692580759525299, + "step": 1151 + }, + { + "epoch": 0.30593546673748506, + "grad_norm": 1.1284903074468042, + "learning_rate": 1.9428062636907526e-05, + "loss": 0.3685402572154999, + "step": 1152 + }, + { + "epoch": 0.306201035719028, + "grad_norm": 1.1120189967723886, + "learning_rate": 1.9426598105175575e-05, + "loss": 0.37557253241539, + "step": 1153 + }, + { + "epoch": 0.30646660470057097, + "grad_norm": 0.9544414078231065, + "learning_rate": 1.9425131756106687e-05, + "loss": 0.3323203921318054, + "step": 1154 + }, + { + "epoch": 0.3067321736821139, + "grad_norm": 1.085159318227953, + "learning_rate": 1.9423663589983554e-05, + "loss": 0.37262290716171265, + "step": 1155 + }, + { + "epoch": 0.30699774266365687, + "grad_norm": 1.138203326668225, + "learning_rate": 1.9422193607089224e-05, + "loss": 0.36621618270874023, + "step": 1156 + }, + { + "epoch": 0.3072633116451998, + "grad_norm": 1.0326975743253168, + "learning_rate": 1.942072180770709e-05, + "loss": 0.3844982385635376, + "step": 1157 + }, + { + "epoch": 0.3075288806267428, + "grad_norm": 0.9983252957319158, + "learning_rate": 1.94192481921209e-05, + "loss": 0.3229531943798065, + "step": 1158 + }, + { + "epoch": 0.3077944496082857, + "grad_norm": 1.0805327657153956, + "learning_rate": 1.9417772760614745e-05, + "loss": 0.34862661361694336, + "step": 1159 + }, + { + "epoch": 0.30806001858982873, + "grad_norm": 1.0329581193958253, + "learning_rate": 1.941629551347308e-05, + "loss": 0.35496509075164795, + "step": 1160 + }, + { + "epoch": 0.3083255875713717, + "grad_norm": 1.051163133463375, + "learning_rate": 1.9414816450980686e-05, + "loss": 0.3695065975189209, + "step": 1161 + }, + { + "epoch": 0.30859115655291464, + "grad_norm": 1.0254769076684076, + "learning_rate": 1.9413335573422723e-05, + "loss": 0.3472525179386139, + "step": 1162 + }, + { + "epoch": 0.3088567255344576, + "grad_norm": 1.008969123299064, + "learning_rate": 1.9411852881084683e-05, + "loss": 0.3447483479976654, + "step": 1163 + }, + { + "epoch": 0.30912229451600054, + "grad_norm": 0.9333424416365893, + "learning_rate": 1.941036837425241e-05, + "loss": 0.31047824025154114, + "step": 1164 + }, + { + "epoch": 0.3093878634975435, + "grad_norm": 1.0570471012152007, + "learning_rate": 1.9408882053212094e-05, + "loss": 0.34502410888671875, + "step": 1165 + }, + { + "epoch": 0.30965343247908644, + "grad_norm": 1.1849442151759089, + "learning_rate": 1.940739391825029e-05, + "loss": 0.3663109540939331, + "step": 1166 + }, + { + "epoch": 0.3099190014606294, + "grad_norm": 1.1136723468346887, + "learning_rate": 1.9405903969653887e-05, + "loss": 0.3635792136192322, + "step": 1167 + }, + { + "epoch": 0.31018457044217235, + "grad_norm": 1.0769441486287206, + "learning_rate": 1.940441220771013e-05, + "loss": 0.359528124332428, + "step": 1168 + }, + { + "epoch": 0.3104501394237153, + "grad_norm": 1.043185528474707, + "learning_rate": 1.9402918632706618e-05, + "loss": 0.32566630840301514, + "step": 1169 + }, + { + "epoch": 0.31071570840525825, + "grad_norm": 1.0286897614370414, + "learning_rate": 1.940142324493129e-05, + "loss": 0.34758460521698, + "step": 1170 + }, + { + "epoch": 0.3109812773868012, + "grad_norm": 1.0148570847451444, + "learning_rate": 1.9399926044672438e-05, + "loss": 0.3484055995941162, + "step": 1171 + }, + { + "epoch": 0.31124684636834415, + "grad_norm": 1.1806099587394492, + "learning_rate": 1.93984270322187e-05, + "loss": 0.41958773136138916, + "step": 1172 + }, + { + "epoch": 0.3115124153498871, + "grad_norm": 1.085314216258339, + "learning_rate": 1.9396926207859085e-05, + "loss": 0.3578398525714874, + "step": 1173 + }, + { + "epoch": 0.3117779843314301, + "grad_norm": 1.0721505496116728, + "learning_rate": 1.9395423571882917e-05, + "loss": 0.38140422105789185, + "step": 1174 + }, + { + "epoch": 0.31204355331297307, + "grad_norm": 1.1224661464468277, + "learning_rate": 1.9393919124579898e-05, + "loss": 0.3782861828804016, + "step": 1175 + }, + { + "epoch": 0.312309122294516, + "grad_norm": 1.0482874367837718, + "learning_rate": 1.939241286624006e-05, + "loss": 0.3211040496826172, + "step": 1176 + }, + { + "epoch": 0.31257469127605897, + "grad_norm": 0.9909015391020882, + "learning_rate": 1.9390904797153795e-05, + "loss": 0.3090783953666687, + "step": 1177 + }, + { + "epoch": 0.3128402602576019, + "grad_norm": 1.0203166402095418, + "learning_rate": 1.938939491761184e-05, + "loss": 0.3542889654636383, + "step": 1178 + }, + { + "epoch": 0.3131058292391449, + "grad_norm": 1.016567110972503, + "learning_rate": 1.9387883227905285e-05, + "loss": 0.369164377450943, + "step": 1179 + }, + { + "epoch": 0.3133713982206878, + "grad_norm": 1.1492868354113897, + "learning_rate": 1.9386369728325562e-05, + "loss": 0.35200801491737366, + "step": 1180 + }, + { + "epoch": 0.3136369672022308, + "grad_norm": 1.1332626811675575, + "learning_rate": 1.9384854419164454e-05, + "loss": 0.3696276843547821, + "step": 1181 + }, + { + "epoch": 0.31390253618377373, + "grad_norm": 0.9856387823657043, + "learning_rate": 1.9383337300714104e-05, + "loss": 0.3403652012348175, + "step": 1182 + }, + { + "epoch": 0.3141681051653167, + "grad_norm": 0.9608300998441986, + "learning_rate": 1.9381818373266987e-05, + "loss": 0.3307063579559326, + "step": 1183 + }, + { + "epoch": 0.31443367414685963, + "grad_norm": 1.002604353314113, + "learning_rate": 1.9380297637115933e-05, + "loss": 0.3223465085029602, + "step": 1184 + }, + { + "epoch": 0.3146992431284026, + "grad_norm": 1.1668926481270334, + "learning_rate": 1.9378775092554124e-05, + "loss": 0.4013838768005371, + "step": 1185 + }, + { + "epoch": 0.31496481210994554, + "grad_norm": 1.2376602965184098, + "learning_rate": 1.9377250739875095e-05, + "loss": 0.3596574664115906, + "step": 1186 + }, + { + "epoch": 0.3152303810914885, + "grad_norm": 1.0683740579575798, + "learning_rate": 1.937572457937271e-05, + "loss": 0.41639968752861023, + "step": 1187 + }, + { + "epoch": 0.3154959500730315, + "grad_norm": 0.950341293536979, + "learning_rate": 1.9374196611341212e-05, + "loss": 0.3001318573951721, + "step": 1188 + }, + { + "epoch": 0.31576151905457445, + "grad_norm": 1.0390515723802394, + "learning_rate": 1.937266683607516e-05, + "loss": 0.33238667249679565, + "step": 1189 + }, + { + "epoch": 0.3160270880361174, + "grad_norm": 1.0559788990716998, + "learning_rate": 1.9371135253869483e-05, + "loss": 0.33638086915016174, + "step": 1190 + }, + { + "epoch": 0.31629265701766035, + "grad_norm": 1.0736881782093415, + "learning_rate": 1.9369601865019452e-05, + "loss": 0.34445878863334656, + "step": 1191 + }, + { + "epoch": 0.3165582259992033, + "grad_norm": 1.116672373820781, + "learning_rate": 1.9368066669820684e-05, + "loss": 0.33554553985595703, + "step": 1192 + }, + { + "epoch": 0.31682379498074625, + "grad_norm": 1.2940820576034424, + "learning_rate": 1.936652966856915e-05, + "loss": 0.3668493628501892, + "step": 1193 + }, + { + "epoch": 0.3170893639622892, + "grad_norm": 1.1460266164336763, + "learning_rate": 1.9364990861561163e-05, + "loss": 0.3813396990299225, + "step": 1194 + }, + { + "epoch": 0.31735493294383216, + "grad_norm": 1.048871056336621, + "learning_rate": 1.936345024909339e-05, + "loss": 0.33625900745391846, + "step": 1195 + }, + { + "epoch": 0.3176205019253751, + "grad_norm": 1.0238786804477913, + "learning_rate": 1.9361907831462836e-05, + "loss": 0.31131428480148315, + "step": 1196 + }, + { + "epoch": 0.31788607090691806, + "grad_norm": 0.9751456398999766, + "learning_rate": 1.936036360896687e-05, + "loss": 0.32571589946746826, + "step": 1197 + }, + { + "epoch": 0.318151639888461, + "grad_norm": 1.1296061558872548, + "learning_rate": 1.9358817581903193e-05, + "loss": 0.36207717657089233, + "step": 1198 + }, + { + "epoch": 0.31841720887000396, + "grad_norm": 1.062344543153862, + "learning_rate": 1.9357269750569864e-05, + "loss": 0.3743855059146881, + "step": 1199 + }, + { + "epoch": 0.3186827778515469, + "grad_norm": 1.1254060799620074, + "learning_rate": 1.9355720115265283e-05, + "loss": 0.3862137794494629, + "step": 1200 + }, + { + "epoch": 0.31894834683308987, + "grad_norm": 1.1135871061204583, + "learning_rate": 1.935416867628821e-05, + "loss": 0.33353424072265625, + "step": 1201 + }, + { + "epoch": 0.3192139158146329, + "grad_norm": 9.759113022509682, + "learning_rate": 1.9352615433937733e-05, + "loss": 0.3277953267097473, + "step": 1202 + }, + { + "epoch": 0.3194794847961758, + "grad_norm": 1.104737565124737, + "learning_rate": 1.9351060388513304e-05, + "loss": 0.38247692584991455, + "step": 1203 + }, + { + "epoch": 0.3197450537777188, + "grad_norm": 1.0645482624060865, + "learning_rate": 1.9349503540314724e-05, + "loss": 0.3330709934234619, + "step": 1204 + }, + { + "epoch": 0.32001062275926173, + "grad_norm": 1.1382102351287038, + "learning_rate": 1.9347944889642125e-05, + "loss": 0.3809449076652527, + "step": 1205 + }, + { + "epoch": 0.3202761917408047, + "grad_norm": 0.9591245399492223, + "learning_rate": 1.9346384436796e-05, + "loss": 0.33623188734054565, + "step": 1206 + }, + { + "epoch": 0.32054176072234764, + "grad_norm": 1.0414583731283242, + "learning_rate": 1.9344822182077184e-05, + "loss": 0.35465264320373535, + "step": 1207 + }, + { + "epoch": 0.3208073297038906, + "grad_norm": 1.0419539507532576, + "learning_rate": 1.9343258125786866e-05, + "loss": 0.3532233238220215, + "step": 1208 + }, + { + "epoch": 0.32107289868543354, + "grad_norm": 0.972348986123494, + "learning_rate": 1.9341692268226572e-05, + "loss": 0.3498903512954712, + "step": 1209 + }, + { + "epoch": 0.3213384676669765, + "grad_norm": 1.057700016356479, + "learning_rate": 1.9340124609698185e-05, + "loss": 0.36124879121780396, + "step": 1210 + }, + { + "epoch": 0.32160403664851944, + "grad_norm": 1.1891126233384992, + "learning_rate": 1.933855515050393e-05, + "loss": 0.38535434007644653, + "step": 1211 + }, + { + "epoch": 0.3218696056300624, + "grad_norm": 1.1201736183139164, + "learning_rate": 1.9336983890946383e-05, + "loss": 0.39999911189079285, + "step": 1212 + }, + { + "epoch": 0.32213517461160535, + "grad_norm": 1.1396977359685507, + "learning_rate": 1.9335410831328457e-05, + "loss": 0.3519791066646576, + "step": 1213 + }, + { + "epoch": 0.3224007435931483, + "grad_norm": 1.1624196201646915, + "learning_rate": 1.9333835971953424e-05, + "loss": 0.35882368683815, + "step": 1214 + }, + { + "epoch": 0.32266631257469125, + "grad_norm": 1.2089532713833613, + "learning_rate": 1.93322593131249e-05, + "loss": 0.36132001876831055, + "step": 1215 + }, + { + "epoch": 0.32293188155623426, + "grad_norm": 1.0741169297687752, + "learning_rate": 1.9330680855146845e-05, + "loss": 0.36840832233428955, + "step": 1216 + }, + { + "epoch": 0.3231974505377772, + "grad_norm": 1.1553079333487188, + "learning_rate": 1.9329100598323563e-05, + "loss": 0.3755963444709778, + "step": 1217 + }, + { + "epoch": 0.32346301951932016, + "grad_norm": 1.1792888887437214, + "learning_rate": 1.9327518542959717e-05, + "loss": 0.400601863861084, + "step": 1218 + }, + { + "epoch": 0.3237285885008631, + "grad_norm": 1.0342294479515497, + "learning_rate": 1.93259346893603e-05, + "loss": 0.3100128769874573, + "step": 1219 + }, + { + "epoch": 0.32399415748240606, + "grad_norm": 1.0633052239431813, + "learning_rate": 1.9324349037830665e-05, + "loss": 0.3439880609512329, + "step": 1220 + }, + { + "epoch": 0.324259726463949, + "grad_norm": 1.1634088151631976, + "learning_rate": 1.9322761588676505e-05, + "loss": 0.3612631559371948, + "step": 1221 + }, + { + "epoch": 0.32452529544549197, + "grad_norm": 1.1292400605185824, + "learning_rate": 1.9321172342203863e-05, + "loss": 0.38202327489852905, + "step": 1222 + }, + { + "epoch": 0.3247908644270349, + "grad_norm": 1.0253004653890312, + "learning_rate": 1.9319581298719127e-05, + "loss": 0.3405265808105469, + "step": 1223 + }, + { + "epoch": 0.32505643340857787, + "grad_norm": 1.1499639639111883, + "learning_rate": 1.931798845852903e-05, + "loss": 0.4110907018184662, + "step": 1224 + }, + { + "epoch": 0.3253220023901208, + "grad_norm": 1.2758168253168263, + "learning_rate": 1.9316393821940654e-05, + "loss": 0.3007548451423645, + "step": 1225 + }, + { + "epoch": 0.3255875713716638, + "grad_norm": 2.5438383009304673, + "learning_rate": 1.9314797389261426e-05, + "loss": 0.32769858837127686, + "step": 1226 + }, + { + "epoch": 0.3258531403532067, + "grad_norm": 1.0370704182885782, + "learning_rate": 1.931319916079912e-05, + "loss": 0.3619830310344696, + "step": 1227 + }, + { + "epoch": 0.3261187093347497, + "grad_norm": 1.2983573666738066, + "learning_rate": 1.9311599136861853e-05, + "loss": 0.3470210134983063, + "step": 1228 + }, + { + "epoch": 0.32638427831629263, + "grad_norm": 1.145435126731274, + "learning_rate": 1.9309997317758093e-05, + "loss": 0.3471665382385254, + "step": 1229 + }, + { + "epoch": 0.32664984729783564, + "grad_norm": 1.0757592201920594, + "learning_rate": 1.930839370379665e-05, + "loss": 0.3717760443687439, + "step": 1230 + }, + { + "epoch": 0.3269154162793786, + "grad_norm": 1.1173068015382108, + "learning_rate": 1.9306788295286687e-05, + "loss": 0.37279975414276123, + "step": 1231 + }, + { + "epoch": 0.32718098526092154, + "grad_norm": 1.1523781527891401, + "learning_rate": 1.93051810925377e-05, + "loss": 0.3884522020816803, + "step": 1232 + }, + { + "epoch": 0.3274465542424645, + "grad_norm": 1.1200431222189422, + "learning_rate": 1.9303572095859545e-05, + "loss": 0.4277604818344116, + "step": 1233 + }, + { + "epoch": 0.32771212322400745, + "grad_norm": 1.1197023145386935, + "learning_rate": 1.9301961305562415e-05, + "loss": 0.2888818681240082, + "step": 1234 + }, + { + "epoch": 0.3279776922055504, + "grad_norm": 1.0271311895282893, + "learning_rate": 1.9300348721956854e-05, + "loss": 0.3134511709213257, + "step": 1235 + }, + { + "epoch": 0.32824326118709335, + "grad_norm": 1.0800984792046815, + "learning_rate": 1.9298734345353745e-05, + "loss": 0.38525280356407166, + "step": 1236 + }, + { + "epoch": 0.3285088301686363, + "grad_norm": 1.134011749036063, + "learning_rate": 1.9297118176064324e-05, + "loss": 0.3692918121814728, + "step": 1237 + }, + { + "epoch": 0.32877439915017925, + "grad_norm": 1.0348260315377988, + "learning_rate": 1.9295500214400165e-05, + "loss": 0.3443421721458435, + "step": 1238 + }, + { + "epoch": 0.3290399681317222, + "grad_norm": 1.0129455663017488, + "learning_rate": 1.9293880460673197e-05, + "loss": 0.3228621184825897, + "step": 1239 + }, + { + "epoch": 0.32930553711326516, + "grad_norm": 1.0116024279908165, + "learning_rate": 1.9292258915195688e-05, + "loss": 0.330943763256073, + "step": 1240 + }, + { + "epoch": 0.3295711060948081, + "grad_norm": 1.1814587344422625, + "learning_rate": 1.929063557828025e-05, + "loss": 0.356637567281723, + "step": 1241 + }, + { + "epoch": 0.32983667507635106, + "grad_norm": 0.9888159780201056, + "learning_rate": 1.9289010450239843e-05, + "loss": 0.3481113910675049, + "step": 1242 + }, + { + "epoch": 0.330102244057894, + "grad_norm": 1.1876931030431213, + "learning_rate": 1.928738353138778e-05, + "loss": 0.36579906940460205, + "step": 1243 + }, + { + "epoch": 0.330367813039437, + "grad_norm": 1.0281454378567854, + "learning_rate": 1.9285754822037705e-05, + "loss": 0.33025234937667847, + "step": 1244 + }, + { + "epoch": 0.33063338202097997, + "grad_norm": 1.0936673160473642, + "learning_rate": 1.9284124322503613e-05, + "loss": 0.34848469495773315, + "step": 1245 + }, + { + "epoch": 0.3308989510025229, + "grad_norm": 1.1232405017277023, + "learning_rate": 1.928249203309985e-05, + "loss": 0.3523876368999481, + "step": 1246 + }, + { + "epoch": 0.3311645199840659, + "grad_norm": 1.140153458583263, + "learning_rate": 1.92808579541411e-05, + "loss": 0.3695565462112427, + "step": 1247 + }, + { + "epoch": 0.3314300889656088, + "grad_norm": 1.0267337296320096, + "learning_rate": 1.9279222085942396e-05, + "loss": 0.3557945191860199, + "step": 1248 + }, + { + "epoch": 0.3316956579471518, + "grad_norm": 1.0261133198060035, + "learning_rate": 1.9277584428819113e-05, + "loss": 0.3015502989292145, + "step": 1249 + }, + { + "epoch": 0.33196122692869473, + "grad_norm": 0.9384869314897972, + "learning_rate": 1.9275944983086964e-05, + "loss": 0.31333664059638977, + "step": 1250 + }, + { + "epoch": 0.3322267959102377, + "grad_norm": 1.103154580638619, + "learning_rate": 1.9274303749062028e-05, + "loss": 0.36595287919044495, + "step": 1251 + }, + { + "epoch": 0.33249236489178063, + "grad_norm": 1.0573816777840739, + "learning_rate": 1.9272660727060705e-05, + "loss": 0.3400266170501709, + "step": 1252 + }, + { + "epoch": 0.3327579338733236, + "grad_norm": 1.0994664368429343, + "learning_rate": 1.927101591739976e-05, + "loss": 0.3642529547214508, + "step": 1253 + }, + { + "epoch": 0.33302350285486654, + "grad_norm": 1.08059410662081, + "learning_rate": 1.926936932039628e-05, + "loss": 0.3418777287006378, + "step": 1254 + }, + { + "epoch": 0.3332890718364095, + "grad_norm": 1.0881678177934593, + "learning_rate": 1.9267720936367723e-05, + "loss": 0.33382388949394226, + "step": 1255 + }, + { + "epoch": 0.33355464081795244, + "grad_norm": 1.1227567600503816, + "learning_rate": 1.926607076563187e-05, + "loss": 0.36257779598236084, + "step": 1256 + }, + { + "epoch": 0.3338202097994954, + "grad_norm": 1.5546101865012443, + "learning_rate": 1.926441880850686e-05, + "loss": 0.3018002510070801, + "step": 1257 + }, + { + "epoch": 0.3340857787810384, + "grad_norm": 1.0263747105982135, + "learning_rate": 1.9262765065311165e-05, + "loss": 0.3373662233352661, + "step": 1258 + }, + { + "epoch": 0.33435134776258135, + "grad_norm": 1.0001644182280367, + "learning_rate": 1.9261109536363613e-05, + "loss": 0.3555397391319275, + "step": 1259 + }, + { + "epoch": 0.3346169167441243, + "grad_norm": 1.1519069907937776, + "learning_rate": 1.925945222198336e-05, + "loss": 0.3004256784915924, + "step": 1260 + }, + { + "epoch": 0.33488248572566726, + "grad_norm": 2.328412351070072, + "learning_rate": 1.925779312248993e-05, + "loss": 0.33299940824508667, + "step": 1261 + }, + { + "epoch": 0.3351480547072102, + "grad_norm": 1.0617967738999583, + "learning_rate": 1.9256132238203166e-05, + "loss": 0.3715725541114807, + "step": 1262 + }, + { + "epoch": 0.33541362368875316, + "grad_norm": 1.0140049717249513, + "learning_rate": 1.9254469569443274e-05, + "loss": 0.35133951902389526, + "step": 1263 + }, + { + "epoch": 0.3356791926702961, + "grad_norm": 0.9980129680534503, + "learning_rate": 1.92528051165308e-05, + "loss": 0.3328818380832672, + "step": 1264 + }, + { + "epoch": 0.33594476165183906, + "grad_norm": 1.0764552464682182, + "learning_rate": 1.925113887978662e-05, + "loss": 0.3665468692779541, + "step": 1265 + }, + { + "epoch": 0.336210330633382, + "grad_norm": 1.0446302802374996, + "learning_rate": 1.9249470859531976e-05, + "loss": 0.3489571511745453, + "step": 1266 + }, + { + "epoch": 0.33647589961492497, + "grad_norm": 1.0629721705272823, + "learning_rate": 1.9247801056088433e-05, + "loss": 0.30038982629776, + "step": 1267 + }, + { + "epoch": 0.3367414685964679, + "grad_norm": 1.1798569183028156, + "learning_rate": 1.9246129469777918e-05, + "loss": 0.4163355827331543, + "step": 1268 + }, + { + "epoch": 0.33700703757801087, + "grad_norm": 1.0428552063046848, + "learning_rate": 1.924445610092269e-05, + "loss": 0.33687612414360046, + "step": 1269 + }, + { + "epoch": 0.3372726065595538, + "grad_norm": 1.0466869124167506, + "learning_rate": 1.924278094984535e-05, + "loss": 0.3448297679424286, + "step": 1270 + }, + { + "epoch": 0.3375381755410968, + "grad_norm": 1.0979384797680924, + "learning_rate": 1.9241104016868853e-05, + "loss": 0.35257208347320557, + "step": 1271 + }, + { + "epoch": 0.3378037445226398, + "grad_norm": 1.0794393535441016, + "learning_rate": 1.9239425302316487e-05, + "loss": 0.34880566596984863, + "step": 1272 + }, + { + "epoch": 0.33806931350418273, + "grad_norm": 1.1081978913885613, + "learning_rate": 1.9237744806511895e-05, + "loss": 0.33643782138824463, + "step": 1273 + }, + { + "epoch": 0.3383348824857257, + "grad_norm": 1.0185962864877929, + "learning_rate": 1.9236062529779057e-05, + "loss": 0.32345050573349, + "step": 1274 + }, + { + "epoch": 0.33860045146726864, + "grad_norm": 1.0547576972102612, + "learning_rate": 1.9234378472442286e-05, + "loss": 0.33983978629112244, + "step": 1275 + }, + { + "epoch": 0.3388660204488116, + "grad_norm": 1.0305326470674594, + "learning_rate": 1.923269263482626e-05, + "loss": 0.32825571298599243, + "step": 1276 + }, + { + "epoch": 0.33913158943035454, + "grad_norm": 1.0836151603415423, + "learning_rate": 1.923100501725598e-05, + "loss": 0.3434044122695923, + "step": 1277 + }, + { + "epoch": 0.3393971584118975, + "grad_norm": 1.1293248576076373, + "learning_rate": 1.9229315620056805e-05, + "loss": 0.3463204503059387, + "step": 1278 + }, + { + "epoch": 0.33966272739344044, + "grad_norm": 1.0476463818396518, + "learning_rate": 1.9227624443554425e-05, + "loss": 0.3608240485191345, + "step": 1279 + }, + { + "epoch": 0.3399282963749834, + "grad_norm": 1.111712780266586, + "learning_rate": 1.9225931488074882e-05, + "loss": 0.36131763458251953, + "step": 1280 + }, + { + "epoch": 0.34019386535652635, + "grad_norm": 0.9948222919660873, + "learning_rate": 1.922423675394456e-05, + "loss": 0.3270101547241211, + "step": 1281 + }, + { + "epoch": 0.3404594343380693, + "grad_norm": 1.1047356141038558, + "learning_rate": 1.922254024149018e-05, + "loss": 0.3551778495311737, + "step": 1282 + }, + { + "epoch": 0.34072500331961225, + "grad_norm": 1.1057498393465535, + "learning_rate": 1.9220841951038815e-05, + "loss": 0.3686622381210327, + "step": 1283 + }, + { + "epoch": 0.3409905723011552, + "grad_norm": 1.0810198379819234, + "learning_rate": 1.921914188291787e-05, + "loss": 0.35161536931991577, + "step": 1284 + }, + { + "epoch": 0.34125614128269816, + "grad_norm": 1.1489267376414198, + "learning_rate": 1.92174400374551e-05, + "loss": 0.3549870550632477, + "step": 1285 + }, + { + "epoch": 0.34152171026424116, + "grad_norm": 1.0904860537070935, + "learning_rate": 1.9215736414978593e-05, + "loss": 0.36780738830566406, + "step": 1286 + }, + { + "epoch": 0.3417872792457841, + "grad_norm": 1.132171748367688, + "learning_rate": 1.9214031015816803e-05, + "loss": 0.36060047149658203, + "step": 1287 + }, + { + "epoch": 0.34205284822732707, + "grad_norm": 1.0753334155968608, + "learning_rate": 1.9212323840298502e-05, + "loss": 0.32578715682029724, + "step": 1288 + }, + { + "epoch": 0.34231841720887, + "grad_norm": 1.0380534929488934, + "learning_rate": 1.9210614888752813e-05, + "loss": 0.3505493402481079, + "step": 1289 + }, + { + "epoch": 0.34258398619041297, + "grad_norm": 1.0227959332298084, + "learning_rate": 1.9208904161509203e-05, + "loss": 0.32681795954704285, + "step": 1290 + }, + { + "epoch": 0.3428495551719559, + "grad_norm": 1.0227973616384467, + "learning_rate": 1.9207191658897473e-05, + "loss": 0.34808459877967834, + "step": 1291 + }, + { + "epoch": 0.3431151241534989, + "grad_norm": 1.0810974703490968, + "learning_rate": 1.920547738124779e-05, + "loss": 0.3588678240776062, + "step": 1292 + }, + { + "epoch": 0.3433806931350418, + "grad_norm": 1.2030053357742059, + "learning_rate": 1.9203761328890626e-05, + "loss": 0.3528832495212555, + "step": 1293 + }, + { + "epoch": 0.3436462621165848, + "grad_norm": 1.35729757891191, + "learning_rate": 1.9202043502156833e-05, + "loss": 0.33549001812934875, + "step": 1294 + }, + { + "epoch": 0.34391183109812773, + "grad_norm": 1.0986147605525078, + "learning_rate": 1.920032390137758e-05, + "loss": 0.3466021418571472, + "step": 1295 + }, + { + "epoch": 0.3441774000796707, + "grad_norm": 1.0492164389172054, + "learning_rate": 1.9198602526884388e-05, + "loss": 0.35646146535873413, + "step": 1296 + }, + { + "epoch": 0.34444296906121363, + "grad_norm": 1.0348991752364494, + "learning_rate": 1.9196879379009112e-05, + "loss": 0.3442128300666809, + "step": 1297 + }, + { + "epoch": 0.3447085380427566, + "grad_norm": 1.083291442034964, + "learning_rate": 1.9195154458083962e-05, + "loss": 0.3854391872882843, + "step": 1298 + }, + { + "epoch": 0.34497410702429954, + "grad_norm": 1.202325074766952, + "learning_rate": 1.9193427764441477e-05, + "loss": 0.376137375831604, + "step": 1299 + }, + { + "epoch": 0.34523967600584254, + "grad_norm": 1.1591691335477168, + "learning_rate": 1.9191699298414547e-05, + "loss": 0.3115769028663635, + "step": 1300 + }, + { + "epoch": 0.3455052449873855, + "grad_norm": 1.125127529667975, + "learning_rate": 1.9189969060336396e-05, + "loss": 0.32553282380104065, + "step": 1301 + }, + { + "epoch": 0.34577081396892845, + "grad_norm": 1.2442677252107, + "learning_rate": 1.9188237050540597e-05, + "loss": 0.39529356360435486, + "step": 1302 + }, + { + "epoch": 0.3460363829504714, + "grad_norm": 1.016155926476122, + "learning_rate": 1.9186503269361063e-05, + "loss": 0.3027458190917969, + "step": 1303 + }, + { + "epoch": 0.34630195193201435, + "grad_norm": 1.2178145504108082, + "learning_rate": 1.918476771713204e-05, + "loss": 0.39317795634269714, + "step": 1304 + }, + { + "epoch": 0.3465675209135573, + "grad_norm": 1.1358253756284789, + "learning_rate": 1.918303039418813e-05, + "loss": 0.3730325698852539, + "step": 1305 + }, + { + "epoch": 0.34683308989510025, + "grad_norm": 1.0835224567793253, + "learning_rate": 1.918129130086426e-05, + "loss": 0.34862780570983887, + "step": 1306 + }, + { + "epoch": 0.3470986588766432, + "grad_norm": 1.106131252801308, + "learning_rate": 1.9179550437495707e-05, + "loss": 0.32139018177986145, + "step": 1307 + }, + { + "epoch": 0.34736422785818616, + "grad_norm": 1.118754726003564, + "learning_rate": 1.91778078044181e-05, + "loss": 0.37246090173721313, + "step": 1308 + }, + { + "epoch": 0.3476297968397291, + "grad_norm": 1.035507147337034, + "learning_rate": 1.9176063401967386e-05, + "loss": 0.30985957384109497, + "step": 1309 + }, + { + "epoch": 0.34789536582127206, + "grad_norm": 1.1303664709170593, + "learning_rate": 1.917431723047987e-05, + "loss": 0.3713758587837219, + "step": 1310 + }, + { + "epoch": 0.348160934802815, + "grad_norm": 1.076206973404712, + "learning_rate": 1.9172569290292193e-05, + "loss": 0.3465833067893982, + "step": 1311 + }, + { + "epoch": 0.34842650378435797, + "grad_norm": 1.1789932919731194, + "learning_rate": 1.917081958174134e-05, + "loss": 0.34807220101356506, + "step": 1312 + }, + { + "epoch": 0.3486920727659009, + "grad_norm": 1.0178456651378849, + "learning_rate": 1.9169068105164627e-05, + "loss": 0.3369640111923218, + "step": 1313 + }, + { + "epoch": 0.3489576417474439, + "grad_norm": 1.1714339652663717, + "learning_rate": 1.9167314860899724e-05, + "loss": 0.3521544337272644, + "step": 1314 + }, + { + "epoch": 0.3492232107289869, + "grad_norm": 0.9756562815370131, + "learning_rate": 1.9165559849284635e-05, + "loss": 0.3256300687789917, + "step": 1315 + }, + { + "epoch": 0.34948877971052983, + "grad_norm": 1.1173269078403432, + "learning_rate": 1.9163803070657706e-05, + "loss": 0.32401931285858154, + "step": 1316 + }, + { + "epoch": 0.3497543486920728, + "grad_norm": 1.104564951170044, + "learning_rate": 1.916204452535762e-05, + "loss": 0.372749924659729, + "step": 1317 + }, + { + "epoch": 0.35001991767361573, + "grad_norm": 1.053240444697934, + "learning_rate": 1.9160284213723407e-05, + "loss": 0.35853224992752075, + "step": 1318 + }, + { + "epoch": 0.3502854866551587, + "grad_norm": 1.048325144857422, + "learning_rate": 1.9158522136094433e-05, + "loss": 0.32850801944732666, + "step": 1319 + }, + { + "epoch": 0.35055105563670164, + "grad_norm": 1.1274703494911789, + "learning_rate": 1.9156758292810404e-05, + "loss": 0.3548474907875061, + "step": 1320 + }, + { + "epoch": 0.3508166246182446, + "grad_norm": 1.10371779317482, + "learning_rate": 1.9154992684211372e-05, + "loss": 0.38709041476249695, + "step": 1321 + }, + { + "epoch": 0.35108219359978754, + "grad_norm": 1.1369910570736041, + "learning_rate": 1.9153225310637726e-05, + "loss": 0.40369266271591187, + "step": 1322 + }, + { + "epoch": 0.3513477625813305, + "grad_norm": 1.179710362637603, + "learning_rate": 1.9151456172430186e-05, + "loss": 0.3570155203342438, + "step": 1323 + }, + { + "epoch": 0.35161333156287344, + "grad_norm": 1.0315056954444073, + "learning_rate": 1.9149685269929833e-05, + "loss": 0.34426411986351013, + "step": 1324 + }, + { + "epoch": 0.3518789005444164, + "grad_norm": 1.0980268876500368, + "learning_rate": 1.9147912603478066e-05, + "loss": 0.35666006803512573, + "step": 1325 + }, + { + "epoch": 0.35214446952595935, + "grad_norm": 1.0320732816254274, + "learning_rate": 1.9146138173416643e-05, + "loss": 0.36225512623786926, + "step": 1326 + }, + { + "epoch": 0.3524100385075023, + "grad_norm": 1.0499655117353668, + "learning_rate": 1.9144361980087643e-05, + "loss": 0.3312349319458008, + "step": 1327 + }, + { + "epoch": 0.3526756074890453, + "grad_norm": 1.0828461821707789, + "learning_rate": 1.9142584023833506e-05, + "loss": 0.3590523302555084, + "step": 1328 + }, + { + "epoch": 0.35294117647058826, + "grad_norm": 1.2432343198034153, + "learning_rate": 1.9140804304996997e-05, + "loss": 0.341480016708374, + "step": 1329 + }, + { + "epoch": 0.3532067454521312, + "grad_norm": 1.0165353851066345, + "learning_rate": 1.913902282392122e-05, + "loss": 0.37246501445770264, + "step": 1330 + }, + { + "epoch": 0.35347231443367416, + "grad_norm": 1.0959834963108057, + "learning_rate": 1.913723958094963e-05, + "loss": 0.33834031224250793, + "step": 1331 + }, + { + "epoch": 0.3537378834152171, + "grad_norm": 1.0066884605687934, + "learning_rate": 1.913545457642601e-05, + "loss": 0.29285067319869995, + "step": 1332 + }, + { + "epoch": 0.35400345239676007, + "grad_norm": 1.0768479974972798, + "learning_rate": 1.913366781069449e-05, + "loss": 0.2903720736503601, + "step": 1333 + }, + { + "epoch": 0.354269021378303, + "grad_norm": 1.1311334028851072, + "learning_rate": 1.913187928409954e-05, + "loss": 0.36428314447402954, + "step": 1334 + }, + { + "epoch": 0.35453459035984597, + "grad_norm": 1.0473346547130091, + "learning_rate": 1.9130088996985967e-05, + "loss": 0.3379477560520172, + "step": 1335 + }, + { + "epoch": 0.3548001593413889, + "grad_norm": 1.0963924260325884, + "learning_rate": 1.912829694969891e-05, + "loss": 0.35286659002304077, + "step": 1336 + }, + { + "epoch": 0.3550657283229319, + "grad_norm": 1.1930831242867357, + "learning_rate": 1.9126503142583864e-05, + "loss": 0.3670174479484558, + "step": 1337 + }, + { + "epoch": 0.3553312973044748, + "grad_norm": 1.1294601866875984, + "learning_rate": 1.9124707575986642e-05, + "loss": 0.3422902226448059, + "step": 1338 + }, + { + "epoch": 0.3555968662860178, + "grad_norm": 0.9984746022499613, + "learning_rate": 1.912291025025342e-05, + "loss": 0.29778385162353516, + "step": 1339 + }, + { + "epoch": 0.35586243526756073, + "grad_norm": 1.1907673127670892, + "learning_rate": 1.91211111657307e-05, + "loss": 0.36249661445617676, + "step": 1340 + }, + { + "epoch": 0.3561280042491037, + "grad_norm": 1.1054946723600563, + "learning_rate": 1.9119310322765315e-05, + "loss": 0.340925395488739, + "step": 1341 + }, + { + "epoch": 0.3563935732306467, + "grad_norm": 1.1964466720866056, + "learning_rate": 1.9117507721704455e-05, + "loss": 0.35674089193344116, + "step": 1342 + }, + { + "epoch": 0.35665914221218964, + "grad_norm": 1.1077144979302902, + "learning_rate": 1.9115703362895636e-05, + "loss": 0.3602067828178406, + "step": 1343 + }, + { + "epoch": 0.3569247111937326, + "grad_norm": 1.1669501112510636, + "learning_rate": 1.9113897246686716e-05, + "loss": 0.35211697220802307, + "step": 1344 + }, + { + "epoch": 0.35719028017527554, + "grad_norm": 1.1098565168791754, + "learning_rate": 1.91120893734259e-05, + "loss": 0.3706115484237671, + "step": 1345 + }, + { + "epoch": 0.3574558491568185, + "grad_norm": 0.955637908965499, + "learning_rate": 1.9110279743461717e-05, + "loss": 0.3365110754966736, + "step": 1346 + }, + { + "epoch": 0.35772141813836145, + "grad_norm": 1.2071736385011052, + "learning_rate": 1.9108468357143047e-05, + "loss": 0.40012121200561523, + "step": 1347 + }, + { + "epoch": 0.3579869871199044, + "grad_norm": 1.1409634140225444, + "learning_rate": 1.91066552148191e-05, + "loss": 0.4003351926803589, + "step": 1348 + }, + { + "epoch": 0.35825255610144735, + "grad_norm": 1.0613274196364288, + "learning_rate": 1.910484031683943e-05, + "loss": 0.3574616014957428, + "step": 1349 + }, + { + "epoch": 0.3585181250829903, + "grad_norm": 1.0904662824068834, + "learning_rate": 1.910302366355393e-05, + "loss": 0.3345073461532593, + "step": 1350 + }, + { + "epoch": 0.35878369406453325, + "grad_norm": 1.0532412802136695, + "learning_rate": 1.910120525531283e-05, + "loss": 0.3467676341533661, + "step": 1351 + }, + { + "epoch": 0.3590492630460762, + "grad_norm": 1.0529131768701299, + "learning_rate": 1.9099385092466695e-05, + "loss": 0.32433655858039856, + "step": 1352 + }, + { + "epoch": 0.35931483202761916, + "grad_norm": 1.0442908892383016, + "learning_rate": 1.909756317536643e-05, + "loss": 0.3366447985172272, + "step": 1353 + }, + { + "epoch": 0.3595804010091621, + "grad_norm": 1.0770054348386777, + "learning_rate": 1.909573950436328e-05, + "loss": 0.310118168592453, + "step": 1354 + }, + { + "epoch": 0.35984596999070506, + "grad_norm": 1.4782002462322321, + "learning_rate": 1.909391407980883e-05, + "loss": 0.3503451943397522, + "step": 1355 + }, + { + "epoch": 0.36011153897224807, + "grad_norm": 1.0889726916887852, + "learning_rate": 1.9092086902054996e-05, + "loss": 0.3375343978404999, + "step": 1356 + }, + { + "epoch": 0.360377107953791, + "grad_norm": 0.9368081121032712, + "learning_rate": 1.909025797145404e-05, + "loss": 0.3056451082229614, + "step": 1357 + }, + { + "epoch": 0.360642676935334, + "grad_norm": 0.9554491579006472, + "learning_rate": 1.9088427288358556e-05, + "loss": 0.3063391447067261, + "step": 1358 + }, + { + "epoch": 0.3609082459168769, + "grad_norm": 0.9358824747825566, + "learning_rate": 1.908659485312148e-05, + "loss": 0.3055405616760254, + "step": 1359 + }, + { + "epoch": 0.3611738148984199, + "grad_norm": 1.1828231629690173, + "learning_rate": 1.908476066609608e-05, + "loss": 0.38323235511779785, + "step": 1360 + }, + { + "epoch": 0.36143938387996283, + "grad_norm": 1.0971994038941366, + "learning_rate": 1.908292472763597e-05, + "loss": 0.33526092767715454, + "step": 1361 + }, + { + "epoch": 0.3617049528615058, + "grad_norm": 1.0449346093027478, + "learning_rate": 1.9081087038095094e-05, + "loss": 0.34485238790512085, + "step": 1362 + }, + { + "epoch": 0.36197052184304873, + "grad_norm": 1.0943982229718532, + "learning_rate": 1.907924759782774e-05, + "loss": 0.2963239252567291, + "step": 1363 + }, + { + "epoch": 0.3622360908245917, + "grad_norm": 1.2033822452903298, + "learning_rate": 1.9077406407188532e-05, + "loss": 0.3536864221096039, + "step": 1364 + }, + { + "epoch": 0.36250165980613464, + "grad_norm": 1.1739216512613182, + "learning_rate": 1.907556346653242e-05, + "loss": 0.3724798858165741, + "step": 1365 + }, + { + "epoch": 0.3627672287876776, + "grad_norm": 1.2035474175290464, + "learning_rate": 1.9073718776214717e-05, + "loss": 0.36241161823272705, + "step": 1366 + }, + { + "epoch": 0.36303279776922054, + "grad_norm": 1.2262905723198394, + "learning_rate": 1.9071872336591042e-05, + "loss": 0.3484225273132324, + "step": 1367 + }, + { + "epoch": 0.3632983667507635, + "grad_norm": 1.11285184075262, + "learning_rate": 1.9070024148017375e-05, + "loss": 0.33606311678886414, + "step": 1368 + }, + { + "epoch": 0.36356393573230644, + "grad_norm": 1.076908267109863, + "learning_rate": 1.906817421085002e-05, + "loss": 0.3263503909111023, + "step": 1369 + }, + { + "epoch": 0.36382950471384945, + "grad_norm": 1.126388175466026, + "learning_rate": 1.906632252544563e-05, + "loss": 0.33454492688179016, + "step": 1370 + }, + { + "epoch": 0.3640950736953924, + "grad_norm": 1.1264022314316273, + "learning_rate": 1.9064469092161185e-05, + "loss": 0.34858438372612, + "step": 1371 + }, + { + "epoch": 0.36436064267693535, + "grad_norm": 1.0527021112264499, + "learning_rate": 1.9062613911354005e-05, + "loss": 0.3466234505176544, + "step": 1372 + }, + { + "epoch": 0.3646262116584783, + "grad_norm": 1.0325760706581486, + "learning_rate": 1.9060756983381743e-05, + "loss": 0.33574312925338745, + "step": 1373 + }, + { + "epoch": 0.36489178064002126, + "grad_norm": 1.0321788657369535, + "learning_rate": 1.90588983086024e-05, + "loss": 0.3012363016605377, + "step": 1374 + }, + { + "epoch": 0.3651573496215642, + "grad_norm": 1.0033389586223882, + "learning_rate": 1.90570378873743e-05, + "loss": 0.3050191402435303, + "step": 1375 + }, + { + "epoch": 0.36542291860310716, + "grad_norm": 1.0078763869776561, + "learning_rate": 1.905517572005611e-05, + "loss": 0.35090070962905884, + "step": 1376 + }, + { + "epoch": 0.3656884875846501, + "grad_norm": 1.011051809727729, + "learning_rate": 1.9053311807006845e-05, + "loss": 0.3276262581348419, + "step": 1377 + }, + { + "epoch": 0.36595405656619306, + "grad_norm": 1.300904148134606, + "learning_rate": 1.9051446148585833e-05, + "loss": 0.3303500711917877, + "step": 1378 + }, + { + "epoch": 0.366219625547736, + "grad_norm": 1.113413634877815, + "learning_rate": 1.9049578745152754e-05, + "loss": 0.3748486042022705, + "step": 1379 + }, + { + "epoch": 0.36648519452927897, + "grad_norm": 0.8707302355459249, + "learning_rate": 1.9047709597067628e-05, + "loss": 0.30339744687080383, + "step": 1380 + }, + { + "epoch": 0.3667507635108219, + "grad_norm": 1.0245709544347914, + "learning_rate": 1.9045838704690796e-05, + "loss": 0.31811147928237915, + "step": 1381 + }, + { + "epoch": 0.36701633249236487, + "grad_norm": 1.1759156162745943, + "learning_rate": 1.9043966068382945e-05, + "loss": 0.3541119694709778, + "step": 1382 + }, + { + "epoch": 0.3672819014739078, + "grad_norm": 1.0874467494483675, + "learning_rate": 1.9042091688505104e-05, + "loss": 0.36639657616615295, + "step": 1383 + }, + { + "epoch": 0.36754747045545083, + "grad_norm": 1.0242460437241268, + "learning_rate": 1.9040215565418628e-05, + "loss": 0.35859787464141846, + "step": 1384 + }, + { + "epoch": 0.3678130394369938, + "grad_norm": 1.017105790679022, + "learning_rate": 1.9038337699485207e-05, + "loss": 0.3210521340370178, + "step": 1385 + }, + { + "epoch": 0.36807860841853673, + "grad_norm": 1.0362268895966902, + "learning_rate": 1.9036458091066875e-05, + "loss": 0.3207433819770813, + "step": 1386 + }, + { + "epoch": 0.3683441774000797, + "grad_norm": 0.9948382455278952, + "learning_rate": 1.9034576740526e-05, + "loss": 0.3475082218647003, + "step": 1387 + }, + { + "epoch": 0.36860974638162264, + "grad_norm": 1.167057707852143, + "learning_rate": 1.903269364822528e-05, + "loss": 0.33252987265586853, + "step": 1388 + }, + { + "epoch": 0.3688753153631656, + "grad_norm": 1.0281516525035093, + "learning_rate": 1.903080881452776e-05, + "loss": 0.32200103998184204, + "step": 1389 + }, + { + "epoch": 0.36914088434470854, + "grad_norm": 1.0752934055327636, + "learning_rate": 1.9028922239796803e-05, + "loss": 0.34780022501945496, + "step": 1390 + }, + { + "epoch": 0.3694064533262515, + "grad_norm": 1.1028643639363398, + "learning_rate": 1.902703392439613e-05, + "loss": 0.35411912202835083, + "step": 1391 + }, + { + "epoch": 0.36967202230779445, + "grad_norm": 1.6627965093255739, + "learning_rate": 1.9025143868689773e-05, + "loss": 0.35232803225517273, + "step": 1392 + }, + { + "epoch": 0.3699375912893374, + "grad_norm": 1.168292115519334, + "learning_rate": 1.9023252073042128e-05, + "loss": 0.38561391830444336, + "step": 1393 + }, + { + "epoch": 0.37020316027088035, + "grad_norm": 0.9982322437598163, + "learning_rate": 1.9021358537817897e-05, + "loss": 0.3184170126914978, + "step": 1394 + }, + { + "epoch": 0.3704687292524233, + "grad_norm": 1.0557333187102689, + "learning_rate": 1.9019463263382142e-05, + "loss": 0.32455068826675415, + "step": 1395 + }, + { + "epoch": 0.37073429823396625, + "grad_norm": 1.0862364532602506, + "learning_rate": 1.901756625010024e-05, + "loss": 0.32998934388160706, + "step": 1396 + }, + { + "epoch": 0.3709998672155092, + "grad_norm": 1.1350071137219766, + "learning_rate": 1.901566749833792e-05, + "loss": 0.3361780643463135, + "step": 1397 + }, + { + "epoch": 0.37126543619705216, + "grad_norm": 1.1483051699341575, + "learning_rate": 1.9013767008461236e-05, + "loss": 0.3618711829185486, + "step": 1398 + }, + { + "epoch": 0.37153100517859516, + "grad_norm": 1.1250978483748488, + "learning_rate": 1.901186478083658e-05, + "loss": 0.3904131054878235, + "step": 1399 + }, + { + "epoch": 0.3717965741601381, + "grad_norm": 1.0885741580509858, + "learning_rate": 1.9009960815830676e-05, + "loss": 0.35742759704589844, + "step": 1400 + }, + { + "epoch": 0.37206214314168107, + "grad_norm": 1.073570835222054, + "learning_rate": 1.9008055113810595e-05, + "loss": 0.32880812883377075, + "step": 1401 + }, + { + "epoch": 0.372327712123224, + "grad_norm": 1.0645240727318732, + "learning_rate": 1.9006147675143724e-05, + "loss": 0.3379839360713959, + "step": 1402 + }, + { + "epoch": 0.37259328110476697, + "grad_norm": 1.1363528922504198, + "learning_rate": 1.90042385001978e-05, + "loss": 0.3635789453983307, + "step": 1403 + }, + { + "epoch": 0.3728588500863099, + "grad_norm": 1.1103620354136925, + "learning_rate": 1.900232758934089e-05, + "loss": 0.3462461233139038, + "step": 1404 + }, + { + "epoch": 0.3731244190678529, + "grad_norm": 1.1087128591527484, + "learning_rate": 1.900041494294139e-05, + "loss": 0.34578579664230347, + "step": 1405 + }, + { + "epoch": 0.3733899880493958, + "grad_norm": 1.1067984269435176, + "learning_rate": 1.899850056136804e-05, + "loss": 0.36266931891441345, + "step": 1406 + }, + { + "epoch": 0.3736555570309388, + "grad_norm": 1.089685836132972, + "learning_rate": 1.899658444498991e-05, + "loss": 0.34019365906715393, + "step": 1407 + }, + { + "epoch": 0.37392112601248173, + "grad_norm": 1.0009475991478056, + "learning_rate": 1.8994666594176404e-05, + "loss": 0.3057953119277954, + "step": 1408 + }, + { + "epoch": 0.3741866949940247, + "grad_norm": 1.1008245937613312, + "learning_rate": 1.8992747009297265e-05, + "loss": 0.3663131892681122, + "step": 1409 + }, + { + "epoch": 0.37445226397556763, + "grad_norm": 1.0696938984110862, + "learning_rate": 1.8990825690722557e-05, + "loss": 0.3402065634727478, + "step": 1410 + }, + { + "epoch": 0.3747178329571106, + "grad_norm": 1.017664192724319, + "learning_rate": 1.8988902638822693e-05, + "loss": 0.3437868654727936, + "step": 1411 + }, + { + "epoch": 0.37498340193865354, + "grad_norm": 1.2246388577961873, + "learning_rate": 1.8986977853968416e-05, + "loss": 0.40972524881362915, + "step": 1412 + }, + { + "epoch": 0.37524897092019655, + "grad_norm": 1.0293557658064552, + "learning_rate": 1.89850513365308e-05, + "loss": 0.3237977921962738, + "step": 1413 + }, + { + "epoch": 0.3755145399017395, + "grad_norm": 0.9581631299919097, + "learning_rate": 1.8983123086881254e-05, + "loss": 0.3146173357963562, + "step": 1414 + }, + { + "epoch": 0.37578010888328245, + "grad_norm": 0.9942979474502576, + "learning_rate": 1.8981193105391524e-05, + "loss": 0.33485543727874756, + "step": 1415 + }, + { + "epoch": 0.3760456778648254, + "grad_norm": 1.0963696340494955, + "learning_rate": 1.8979261392433685e-05, + "loss": 0.36379897594451904, + "step": 1416 + }, + { + "epoch": 0.37631124684636835, + "grad_norm": 0.902828061805848, + "learning_rate": 1.8977327948380154e-05, + "loss": 0.2737882137298584, + "step": 1417 + }, + { + "epoch": 0.3765768158279113, + "grad_norm": 1.1168765744666191, + "learning_rate": 1.897539277360367e-05, + "loss": 0.3554575443267822, + "step": 1418 + }, + { + "epoch": 0.37684238480945426, + "grad_norm": 1.0021058464909711, + "learning_rate": 1.897345586847731e-05, + "loss": 0.3297621011734009, + "step": 1419 + }, + { + "epoch": 0.3771079537909972, + "grad_norm": 1.1638469907551372, + "learning_rate": 1.8971517233374497e-05, + "loss": 0.32272985577583313, + "step": 1420 + }, + { + "epoch": 0.37737352277254016, + "grad_norm": 1.0280583772355378, + "learning_rate": 1.8969576868668967e-05, + "loss": 0.32175642251968384, + "step": 1421 + }, + { + "epoch": 0.3776390917540831, + "grad_norm": 1.1136468557030246, + "learning_rate": 1.8967634774734807e-05, + "loss": 0.35973137617111206, + "step": 1422 + }, + { + "epoch": 0.37790466073562606, + "grad_norm": 1.1892680335343753, + "learning_rate": 1.8965690951946424e-05, + "loss": 0.3385169506072998, + "step": 1423 + }, + { + "epoch": 0.378170229717169, + "grad_norm": 1.1245023779822048, + "learning_rate": 1.8963745400678564e-05, + "loss": 0.3683067560195923, + "step": 1424 + }, + { + "epoch": 0.37843579869871197, + "grad_norm": 1.1630069521478075, + "learning_rate": 1.896179812130631e-05, + "loss": 0.3711622357368469, + "step": 1425 + }, + { + "epoch": 0.3787013676802549, + "grad_norm": 1.015020556732164, + "learning_rate": 1.895984911420507e-05, + "loss": 0.30416572093963623, + "step": 1426 + }, + { + "epoch": 0.3789669366617979, + "grad_norm": 1.079958708031102, + "learning_rate": 1.8957898379750598e-05, + "loss": 0.3439522385597229, + "step": 1427 + }, + { + "epoch": 0.3792325056433409, + "grad_norm": 1.1382084488728177, + "learning_rate": 1.895594591831896e-05, + "loss": 0.3663806617259979, + "step": 1428 + }, + { + "epoch": 0.37949807462488383, + "grad_norm": 1.0501527452156108, + "learning_rate": 1.895399173028658e-05, + "loss": 0.32132354378700256, + "step": 1429 + }, + { + "epoch": 0.3797636436064268, + "grad_norm": 0.9916462964383544, + "learning_rate": 1.8952035816030196e-05, + "loss": 0.3040635585784912, + "step": 1430 + }, + { + "epoch": 0.38002921258796973, + "grad_norm": 1.1155299107557486, + "learning_rate": 1.8950078175926886e-05, + "loss": 0.3548869788646698, + "step": 1431 + }, + { + "epoch": 0.3802947815695127, + "grad_norm": 1.1280933582225339, + "learning_rate": 1.894811881035406e-05, + "loss": 0.3114319443702698, + "step": 1432 + }, + { + "epoch": 0.38056035055105564, + "grad_norm": 1.151174980739505, + "learning_rate": 1.894615771968946e-05, + "loss": 0.3589673936367035, + "step": 1433 + }, + { + "epoch": 0.3808259195325986, + "grad_norm": 1.1074661491088642, + "learning_rate": 1.894419490431116e-05, + "loss": 0.3073863983154297, + "step": 1434 + }, + { + "epoch": 0.38109148851414154, + "grad_norm": 1.0689323921068359, + "learning_rate": 1.8942230364597572e-05, + "loss": 0.32474076747894287, + "step": 1435 + }, + { + "epoch": 0.3813570574956845, + "grad_norm": 2.6127931856999314, + "learning_rate": 1.8940264100927432e-05, + "loss": 0.3363546133041382, + "step": 1436 + }, + { + "epoch": 0.38162262647722744, + "grad_norm": 0.9995665434586938, + "learning_rate": 1.8938296113679814e-05, + "loss": 0.33679312467575073, + "step": 1437 + }, + { + "epoch": 0.3818881954587704, + "grad_norm": 1.0113319573344832, + "learning_rate": 1.8936326403234125e-05, + "loss": 0.33171382546424866, + "step": 1438 + }, + { + "epoch": 0.38215376444031335, + "grad_norm": 1.0880785150495547, + "learning_rate": 1.8934354969970097e-05, + "loss": 0.3717402219772339, + "step": 1439 + }, + { + "epoch": 0.3824193334218563, + "grad_norm": 1.1102375952968466, + "learning_rate": 1.8932381814267802e-05, + "loss": 0.335337370634079, + "step": 1440 + }, + { + "epoch": 0.3826849024033993, + "grad_norm": 1.010201255539417, + "learning_rate": 1.893040693650764e-05, + "loss": 0.32745444774627686, + "step": 1441 + }, + { + "epoch": 0.38295047138494226, + "grad_norm": 1.045820108792802, + "learning_rate": 1.892843033707035e-05, + "loss": 0.34863507747650146, + "step": 1442 + }, + { + "epoch": 0.3832160403664852, + "grad_norm": 1.0344465763282014, + "learning_rate": 1.8926452016336987e-05, + "loss": 0.3428313732147217, + "step": 1443 + }, + { + "epoch": 0.38348160934802816, + "grad_norm": 0.9882681324904586, + "learning_rate": 1.8924471974688956e-05, + "loss": 0.3223801851272583, + "step": 1444 + }, + { + "epoch": 0.3837471783295711, + "grad_norm": 1.2003387152989082, + "learning_rate": 1.8922490212507983e-05, + "loss": 0.33248746395111084, + "step": 1445 + }, + { + "epoch": 0.38401274731111407, + "grad_norm": 1.0404747226700646, + "learning_rate": 1.8920506730176125e-05, + "loss": 0.3472076654434204, + "step": 1446 + }, + { + "epoch": 0.384278316292657, + "grad_norm": 1.229166058737197, + "learning_rate": 1.891852152807578e-05, + "loss": 0.4385136365890503, + "step": 1447 + }, + { + "epoch": 0.38454388527419997, + "grad_norm": 1.0444838405880497, + "learning_rate": 1.8916534606589666e-05, + "loss": 0.36871540546417236, + "step": 1448 + }, + { + "epoch": 0.3848094542557429, + "grad_norm": 1.0803859921763799, + "learning_rate": 1.8914545966100843e-05, + "loss": 0.3136710524559021, + "step": 1449 + }, + { + "epoch": 0.3850750232372859, + "grad_norm": 1.0902031451870209, + "learning_rate": 1.891255560699269e-05, + "loss": 0.3236457109451294, + "step": 1450 + }, + { + "epoch": 0.3853405922188288, + "grad_norm": 0.9936714818929803, + "learning_rate": 1.8910563529648933e-05, + "loss": 0.3176822066307068, + "step": 1451 + }, + { + "epoch": 0.3856061612003718, + "grad_norm": 1.0635659473367998, + "learning_rate": 1.890856973445362e-05, + "loss": 0.3531719744205475, + "step": 1452 + }, + { + "epoch": 0.38587173018191473, + "grad_norm": 0.9470574553293423, + "learning_rate": 1.8906574221791127e-05, + "loss": 0.2911416292190552, + "step": 1453 + }, + { + "epoch": 0.3861372991634577, + "grad_norm": 1.0992858203425024, + "learning_rate": 1.890457699204617e-05, + "loss": 0.3522392511367798, + "step": 1454 + }, + { + "epoch": 0.3864028681450007, + "grad_norm": 1.1706910837372075, + "learning_rate": 1.8902578045603787e-05, + "loss": 0.3724471628665924, + "step": 1455 + }, + { + "epoch": 0.38666843712654364, + "grad_norm": 1.1807687078274312, + "learning_rate": 1.890057738284935e-05, + "loss": 0.2935449481010437, + "step": 1456 + }, + { + "epoch": 0.3869340061080866, + "grad_norm": 1.1181603604376231, + "learning_rate": 1.8898575004168568e-05, + "loss": 0.3413137197494507, + "step": 1457 + }, + { + "epoch": 0.38719957508962954, + "grad_norm": 1.1002740783107277, + "learning_rate": 1.8896570909947477e-05, + "loss": 0.32282277941703796, + "step": 1458 + }, + { + "epoch": 0.3874651440711725, + "grad_norm": 1.0071931608273124, + "learning_rate": 1.8894565100572435e-05, + "loss": 0.3285476565361023, + "step": 1459 + }, + { + "epoch": 0.38773071305271545, + "grad_norm": 1.010871057653593, + "learning_rate": 1.8892557576430147e-05, + "loss": 0.29517480731010437, + "step": 1460 + }, + { + "epoch": 0.3879962820342584, + "grad_norm": 0.9710184588467288, + "learning_rate": 1.8890548337907636e-05, + "loss": 0.2913149297237396, + "step": 1461 + }, + { + "epoch": 0.38826185101580135, + "grad_norm": 1.096024980027641, + "learning_rate": 1.8888537385392258e-05, + "loss": 0.32154160737991333, + "step": 1462 + }, + { + "epoch": 0.3885274199973443, + "grad_norm": 1.157775550745099, + "learning_rate": 1.88865247192717e-05, + "loss": 0.30677905678749084, + "step": 1463 + }, + { + "epoch": 0.38879298897888726, + "grad_norm": 1.1509749466488566, + "learning_rate": 1.888451033993399e-05, + "loss": 0.37568169832229614, + "step": 1464 + }, + { + "epoch": 0.3890585579604302, + "grad_norm": 1.0554287268781006, + "learning_rate": 1.8882494247767465e-05, + "loss": 0.34972083568573, + "step": 1465 + }, + { + "epoch": 0.38932412694197316, + "grad_norm": 1.1253148629548142, + "learning_rate": 1.888047644316081e-05, + "loss": 0.3198736906051636, + "step": 1466 + }, + { + "epoch": 0.3895896959235161, + "grad_norm": 1.0268445477998984, + "learning_rate": 1.887845692650303e-05, + "loss": 0.3405846953392029, + "step": 1467 + }, + { + "epoch": 0.38985526490505906, + "grad_norm": 1.1800981831391237, + "learning_rate": 1.8876435698183465e-05, + "loss": 0.3600257337093353, + "step": 1468 + }, + { + "epoch": 0.39012083388660207, + "grad_norm": 1.042232512137109, + "learning_rate": 1.887441275859179e-05, + "loss": 0.32415103912353516, + "step": 1469 + }, + { + "epoch": 0.390386402868145, + "grad_norm": 1.1736259107415346, + "learning_rate": 1.8872388108117995e-05, + "loss": 0.3450891673564911, + "step": 1470 + }, + { + "epoch": 0.390651971849688, + "grad_norm": 1.0534871304087963, + "learning_rate": 1.8870361747152416e-05, + "loss": 0.3210057318210602, + "step": 1471 + }, + { + "epoch": 0.3909175408312309, + "grad_norm": 1.1749127166764717, + "learning_rate": 1.8868333676085707e-05, + "loss": 0.3615706264972687, + "step": 1472 + }, + { + "epoch": 0.3911831098127739, + "grad_norm": 1.0750237065987462, + "learning_rate": 1.8866303895308856e-05, + "loss": 0.34149813652038574, + "step": 1473 + }, + { + "epoch": 0.39144867879431683, + "grad_norm": 0.91786674858188, + "learning_rate": 1.8864272405213188e-05, + "loss": 0.2795295715332031, + "step": 1474 + }, + { + "epoch": 0.3917142477758598, + "grad_norm": 1.1110559595870293, + "learning_rate": 1.8862239206190337e-05, + "loss": 0.3459053933620453, + "step": 1475 + }, + { + "epoch": 0.39197981675740273, + "grad_norm": 1.1048084354602663, + "learning_rate": 1.8860204298632294e-05, + "loss": 0.3531072735786438, + "step": 1476 + }, + { + "epoch": 0.3922453857389457, + "grad_norm": 1.128095083544478, + "learning_rate": 1.8858167682931357e-05, + "loss": 0.3788977265357971, + "step": 1477 + }, + { + "epoch": 0.39251095472048864, + "grad_norm": 1.3263027090109385, + "learning_rate": 1.8856129359480163e-05, + "loss": 0.3210671544075012, + "step": 1478 + }, + { + "epoch": 0.3927765237020316, + "grad_norm": 1.0773816671223826, + "learning_rate": 1.8854089328671673e-05, + "loss": 0.3442102074623108, + "step": 1479 + }, + { + "epoch": 0.39304209268357454, + "grad_norm": 1.0501956367137624, + "learning_rate": 1.885204759089919e-05, + "loss": 0.29128211736679077, + "step": 1480 + }, + { + "epoch": 0.3933076616651175, + "grad_norm": 1.1403330671915806, + "learning_rate": 1.885000414655633e-05, + "loss": 0.3601154088973999, + "step": 1481 + }, + { + "epoch": 0.39357323064666044, + "grad_norm": 1.032058056545269, + "learning_rate": 1.8847958996037042e-05, + "loss": 0.3173052668571472, + "step": 1482 + }, + { + "epoch": 0.39383879962820345, + "grad_norm": 1.0840123249628424, + "learning_rate": 1.8845912139735616e-05, + "loss": 0.32759106159210205, + "step": 1483 + }, + { + "epoch": 0.3941043686097464, + "grad_norm": 1.0868479290241493, + "learning_rate": 1.8843863578046657e-05, + "loss": 0.3213586211204529, + "step": 1484 + }, + { + "epoch": 0.39436993759128935, + "grad_norm": 1.0263834848721582, + "learning_rate": 1.8841813311365105e-05, + "loss": 0.342970073223114, + "step": 1485 + }, + { + "epoch": 0.3946355065728323, + "grad_norm": 1.1467746465148738, + "learning_rate": 1.883976134008622e-05, + "loss": 0.3852401375770569, + "step": 1486 + }, + { + "epoch": 0.39490107555437526, + "grad_norm": 1.0974253808771965, + "learning_rate": 1.883770766460561e-05, + "loss": 0.2965390682220459, + "step": 1487 + }, + { + "epoch": 0.3951666445359182, + "grad_norm": 1.1655078685340161, + "learning_rate": 1.883565228531919e-05, + "loss": 0.3899655044078827, + "step": 1488 + }, + { + "epoch": 0.39543221351746116, + "grad_norm": 1.1086105484757183, + "learning_rate": 1.8833595202623222e-05, + "loss": 0.339199423789978, + "step": 1489 + }, + { + "epoch": 0.3956977824990041, + "grad_norm": 1.049526058190211, + "learning_rate": 1.8831536416914278e-05, + "loss": 0.3121682405471802, + "step": 1490 + }, + { + "epoch": 0.39596335148054707, + "grad_norm": 1.073417591294797, + "learning_rate": 1.8829475928589272e-05, + "loss": 0.31947991251945496, + "step": 1491 + }, + { + "epoch": 0.39622892046209, + "grad_norm": 1.1660176936819076, + "learning_rate": 1.882741373804544e-05, + "loss": 0.3569333553314209, + "step": 1492 + }, + { + "epoch": 0.39649448944363297, + "grad_norm": 1.1521030930761056, + "learning_rate": 1.882534984568035e-05, + "loss": 0.3739020526409149, + "step": 1493 + }, + { + "epoch": 0.3967600584251759, + "grad_norm": 1.0930221251915908, + "learning_rate": 1.882328425189189e-05, + "loss": 0.34350353479385376, + "step": 1494 + }, + { + "epoch": 0.3970256274067189, + "grad_norm": 1.0780622136577362, + "learning_rate": 1.882121695707829e-05, + "loss": 0.3103981614112854, + "step": 1495 + }, + { + "epoch": 0.3972911963882618, + "grad_norm": 1.066229649085828, + "learning_rate": 1.8819147961638104e-05, + "loss": 0.33847716450691223, + "step": 1496 + }, + { + "epoch": 0.39755676536980483, + "grad_norm": 0.943119049120047, + "learning_rate": 1.8817077265970196e-05, + "loss": 0.3080996870994568, + "step": 1497 + }, + { + "epoch": 0.3978223343513478, + "grad_norm": 0.9758181744675688, + "learning_rate": 1.8815004870473777e-05, + "loss": 0.3247831463813782, + "step": 1498 + }, + { + "epoch": 0.39808790333289074, + "grad_norm": 0.9965389459031595, + "learning_rate": 1.8812930775548387e-05, + "loss": 0.2919698655605316, + "step": 1499 + }, + { + "epoch": 0.3983534723144337, + "grad_norm": 1.1815639690812958, + "learning_rate": 1.8810854981593883e-05, + "loss": 0.3627319931983948, + "step": 1500 + }, + { + "epoch": 0.39861904129597664, + "grad_norm": 1.0245222516327634, + "learning_rate": 1.880877748901045e-05, + "loss": 0.3619319796562195, + "step": 1501 + }, + { + "epoch": 0.3988846102775196, + "grad_norm": 1.0294076265521692, + "learning_rate": 1.8806698298198608e-05, + "loss": 0.3393789827823639, + "step": 1502 + }, + { + "epoch": 0.39915017925906254, + "grad_norm": 1.1375999694611314, + "learning_rate": 1.88046174095592e-05, + "loss": 0.3736116886138916, + "step": 1503 + }, + { + "epoch": 0.3994157482406055, + "grad_norm": 0.9615847393601772, + "learning_rate": 1.8802534823493395e-05, + "loss": 0.32829388976097107, + "step": 1504 + }, + { + "epoch": 0.39968131722214845, + "grad_norm": 1.004520084683698, + "learning_rate": 1.8800450540402694e-05, + "loss": 0.340041846036911, + "step": 1505 + }, + { + "epoch": 0.3999468862036914, + "grad_norm": 1.6423190284198783, + "learning_rate": 1.8798364560688917e-05, + "loss": 0.2830736041069031, + "step": 1506 + }, + { + "epoch": 0.40021245518523435, + "grad_norm": 1.126838308447994, + "learning_rate": 1.8796276884754224e-05, + "loss": 0.33011579513549805, + "step": 1507 + }, + { + "epoch": 0.4004780241667773, + "grad_norm": 1.0024833819275993, + "learning_rate": 1.8794187513001088e-05, + "loss": 0.2893834114074707, + "step": 1508 + }, + { + "epoch": 0.40074359314832025, + "grad_norm": 1.0682148927963429, + "learning_rate": 1.8792096445832317e-05, + "loss": 0.3590015172958374, + "step": 1509 + }, + { + "epoch": 0.4010091621298632, + "grad_norm": 1.1883404603513603, + "learning_rate": 1.8790003683651045e-05, + "loss": 0.3968508541584015, + "step": 1510 + }, + { + "epoch": 0.4012747311114062, + "grad_norm": 1.1506641785596874, + "learning_rate": 1.878790922686073e-05, + "loss": 0.324398934841156, + "step": 1511 + }, + { + "epoch": 0.40154030009294917, + "grad_norm": 1.0455658872732225, + "learning_rate": 1.8785813075865164e-05, + "loss": 0.35111895203590393, + "step": 1512 + }, + { + "epoch": 0.4018058690744921, + "grad_norm": 1.055231257150353, + "learning_rate": 1.8783715231068452e-05, + "loss": 0.28124356269836426, + "step": 1513 + }, + { + "epoch": 0.40207143805603507, + "grad_norm": 1.0070468428923411, + "learning_rate": 1.878161569287504e-05, + "loss": 0.28962311148643494, + "step": 1514 + }, + { + "epoch": 0.402337007037578, + "grad_norm": 1.0934983041480315, + "learning_rate": 1.877951446168969e-05, + "loss": 0.3646606206893921, + "step": 1515 + }, + { + "epoch": 0.402602576019121, + "grad_norm": 1.1065863254454682, + "learning_rate": 1.8777411537917497e-05, + "loss": 0.2815355360507965, + "step": 1516 + }, + { + "epoch": 0.4028681450006639, + "grad_norm": 1.1372178900816394, + "learning_rate": 1.877530692196388e-05, + "loss": 0.33208370208740234, + "step": 1517 + }, + { + "epoch": 0.4031337139822069, + "grad_norm": 1.0968319662456871, + "learning_rate": 1.8773200614234587e-05, + "loss": 0.33741289377212524, + "step": 1518 + }, + { + "epoch": 0.40339928296374983, + "grad_norm": 1.1178822197952292, + "learning_rate": 1.877109261513568e-05, + "loss": 0.31304073333740234, + "step": 1519 + }, + { + "epoch": 0.4036648519452928, + "grad_norm": 1.264796618244999, + "learning_rate": 1.8768982925073566e-05, + "loss": 0.32556387782096863, + "step": 1520 + }, + { + "epoch": 0.40393042092683573, + "grad_norm": 1.1057344226732335, + "learning_rate": 1.8766871544454963e-05, + "loss": 0.3584224581718445, + "step": 1521 + }, + { + "epoch": 0.4041959899083787, + "grad_norm": 1.0109621512685618, + "learning_rate": 1.8764758473686918e-05, + "loss": 0.2864416837692261, + "step": 1522 + }, + { + "epoch": 0.40446155888992164, + "grad_norm": 1.0390539229722413, + "learning_rate": 1.8762643713176815e-05, + "loss": 0.28925320506095886, + "step": 1523 + }, + { + "epoch": 0.4047271278714646, + "grad_norm": 1.022628245189221, + "learning_rate": 1.876052726333235e-05, + "loss": 0.30940550565719604, + "step": 1524 + }, + { + "epoch": 0.4049926968530076, + "grad_norm": 1.1648500528958037, + "learning_rate": 1.875840912456155e-05, + "loss": 0.3463154733181, + "step": 1525 + }, + { + "epoch": 0.40525826583455055, + "grad_norm": 1.1823420506345301, + "learning_rate": 1.8756289297272764e-05, + "loss": 0.3349658250808716, + "step": 1526 + }, + { + "epoch": 0.4055238348160935, + "grad_norm": 1.0511817500052025, + "learning_rate": 1.8754167781874674e-05, + "loss": 0.32588714361190796, + "step": 1527 + }, + { + "epoch": 0.40578940379763645, + "grad_norm": 1.0750045197041278, + "learning_rate": 1.875204457877628e-05, + "loss": 0.33787310123443604, + "step": 1528 + }, + { + "epoch": 0.4060549727791794, + "grad_norm": 1.0444881434472735, + "learning_rate": 1.8749919688386912e-05, + "loss": 0.3223261833190918, + "step": 1529 + }, + { + "epoch": 0.40632054176072235, + "grad_norm": 1.2251483540500576, + "learning_rate": 1.8747793111116226e-05, + "loss": 0.38505882024765015, + "step": 1530 + }, + { + "epoch": 0.4065861107422653, + "grad_norm": 1.077913563059366, + "learning_rate": 1.8745664847374197e-05, + "loss": 0.33071833848953247, + "step": 1531 + }, + { + "epoch": 0.40685167972380826, + "grad_norm": 1.2405893427169952, + "learning_rate": 1.874353489757113e-05, + "loss": 0.36603987216949463, + "step": 1532 + }, + { + "epoch": 0.4071172487053512, + "grad_norm": 0.9982674001932202, + "learning_rate": 1.874140326211766e-05, + "loss": 0.3103085160255432, + "step": 1533 + }, + { + "epoch": 0.40738281768689416, + "grad_norm": 1.1470515997968143, + "learning_rate": 1.873926994142473e-05, + "loss": 0.3471127152442932, + "step": 1534 + }, + { + "epoch": 0.4076483866684371, + "grad_norm": 1.0759117431352352, + "learning_rate": 1.873713493590363e-05, + "loss": 0.33152899146080017, + "step": 1535 + }, + { + "epoch": 0.40791395564998006, + "grad_norm": 1.0887192073538825, + "learning_rate": 1.8734998245965958e-05, + "loss": 0.340177059173584, + "step": 1536 + }, + { + "epoch": 0.408179524631523, + "grad_norm": 1.175803638176176, + "learning_rate": 1.8732859872023644e-05, + "loss": 0.3331618010997772, + "step": 1537 + }, + { + "epoch": 0.40844509361306597, + "grad_norm": 1.0971311272588662, + "learning_rate": 1.8730719814488937e-05, + "loss": 0.3911997675895691, + "step": 1538 + }, + { + "epoch": 0.408710662594609, + "grad_norm": 1.0986179012488992, + "learning_rate": 1.8728578073774427e-05, + "loss": 0.3699817955493927, + "step": 1539 + }, + { + "epoch": 0.4089762315761519, + "grad_norm": 1.086312859301249, + "learning_rate": 1.8726434650293e-05, + "loss": 0.31567275524139404, + "step": 1540 + }, + { + "epoch": 0.4092418005576949, + "grad_norm": 1.1099279461258769, + "learning_rate": 1.8724289544457897e-05, + "loss": 0.3387305438518524, + "step": 1541 + }, + { + "epoch": 0.40950736953923783, + "grad_norm": 1.6366665349052443, + "learning_rate": 1.8722142756682663e-05, + "loss": 0.3460234999656677, + "step": 1542 + }, + { + "epoch": 0.4097729385207808, + "grad_norm": 1.1109783591024025, + "learning_rate": 1.8719994287381173e-05, + "loss": 0.35653382539749146, + "step": 1543 + }, + { + "epoch": 0.41003850750232373, + "grad_norm": 1.1054235252004945, + "learning_rate": 1.8717844136967626e-05, + "loss": 0.3828277885913849, + "step": 1544 + }, + { + "epoch": 0.4103040764838667, + "grad_norm": 1.0929819002464054, + "learning_rate": 1.871569230585655e-05, + "loss": 0.35883858799934387, + "step": 1545 + }, + { + "epoch": 0.41056964546540964, + "grad_norm": 0.988264800308937, + "learning_rate": 1.8713538794462783e-05, + "loss": 0.27414464950561523, + "step": 1546 + }, + { + "epoch": 0.4108352144469526, + "grad_norm": 1.0216234157414708, + "learning_rate": 1.871138360320151e-05, + "loss": 0.2924337387084961, + "step": 1547 + }, + { + "epoch": 0.41110078342849554, + "grad_norm": 1.1264719097344291, + "learning_rate": 1.8709226732488216e-05, + "loss": 0.34270918369293213, + "step": 1548 + }, + { + "epoch": 0.4113663524100385, + "grad_norm": 1.056133674601812, + "learning_rate": 1.870706818273872e-05, + "loss": 0.33866482973098755, + "step": 1549 + }, + { + "epoch": 0.41163192139158145, + "grad_norm": 1.0578429496037574, + "learning_rate": 1.8704907954369176e-05, + "loss": 0.3350633382797241, + "step": 1550 + }, + { + "epoch": 0.4118974903731244, + "grad_norm": 1.0981882806330738, + "learning_rate": 1.870274604779604e-05, + "loss": 0.32763785123825073, + "step": 1551 + }, + { + "epoch": 0.41216305935466735, + "grad_norm": 1.1235534336905566, + "learning_rate": 1.8700582463436102e-05, + "loss": 0.3130378723144531, + "step": 1552 + }, + { + "epoch": 0.41242862833621036, + "grad_norm": 1.1311593123986747, + "learning_rate": 1.8698417201706484e-05, + "loss": 0.34318777918815613, + "step": 1553 + }, + { + "epoch": 0.4126941973177533, + "grad_norm": 1.038517953287962, + "learning_rate": 1.8696250263024617e-05, + "loss": 0.3250104784965515, + "step": 1554 + }, + { + "epoch": 0.41295976629929626, + "grad_norm": 1.1047081419569766, + "learning_rate": 1.869408164780826e-05, + "loss": 0.3409217298030853, + "step": 1555 + }, + { + "epoch": 0.4132253352808392, + "grad_norm": 0.9892429720688775, + "learning_rate": 1.86919113564755e-05, + "loss": 0.2885017395019531, + "step": 1556 + }, + { + "epoch": 0.41349090426238216, + "grad_norm": 0.9861078966083267, + "learning_rate": 1.8689739389444744e-05, + "loss": 0.31912562251091003, + "step": 1557 + }, + { + "epoch": 0.4137564732439251, + "grad_norm": 1.0037060940033242, + "learning_rate": 1.8687565747134716e-05, + "loss": 0.29874011874198914, + "step": 1558 + }, + { + "epoch": 0.41402204222546807, + "grad_norm": 1.0308167425812278, + "learning_rate": 1.8685390429964473e-05, + "loss": 0.3132701516151428, + "step": 1559 + }, + { + "epoch": 0.414287611207011, + "grad_norm": 1.0029824533275895, + "learning_rate": 1.868321343835339e-05, + "loss": 0.31158843636512756, + "step": 1560 + }, + { + "epoch": 0.41455318018855397, + "grad_norm": 0.959841401113078, + "learning_rate": 1.8681034772721167e-05, + "loss": 0.30490344762802124, + "step": 1561 + }, + { + "epoch": 0.4148187491700969, + "grad_norm": 1.1053356359227535, + "learning_rate": 1.867885443348782e-05, + "loss": 0.3150998055934906, + "step": 1562 + }, + { + "epoch": 0.4150843181516399, + "grad_norm": 1.0578010897773087, + "learning_rate": 1.86766724210737e-05, + "loss": 0.3391645550727844, + "step": 1563 + }, + { + "epoch": 0.4153498871331828, + "grad_norm": 1.1317933031731224, + "learning_rate": 1.8674488735899466e-05, + "loss": 0.35013002157211304, + "step": 1564 + }, + { + "epoch": 0.4156154561147258, + "grad_norm": 1.1514144052665038, + "learning_rate": 1.867230337838611e-05, + "loss": 0.3455789387226105, + "step": 1565 + }, + { + "epoch": 0.41588102509626873, + "grad_norm": 1.0985743755307058, + "learning_rate": 1.8670116348954945e-05, + "loss": 0.3179319500923157, + "step": 1566 + }, + { + "epoch": 0.41614659407781174, + "grad_norm": 1.046997092909125, + "learning_rate": 1.8667927648027596e-05, + "loss": 0.3628920018672943, + "step": 1567 + }, + { + "epoch": 0.4164121630593547, + "grad_norm": 1.1175553372657145, + "learning_rate": 1.8665737276026033e-05, + "loss": 0.33599400520324707, + "step": 1568 + }, + { + "epoch": 0.41667773204089764, + "grad_norm": 1.0741100001694928, + "learning_rate": 1.8663545233372524e-05, + "loss": 0.31519144773483276, + "step": 1569 + }, + { + "epoch": 0.4169433010224406, + "grad_norm": 1.0564388001425704, + "learning_rate": 1.8661351520489667e-05, + "loss": 0.3326237201690674, + "step": 1570 + }, + { + "epoch": 0.41720887000398355, + "grad_norm": 1.0506499046982631, + "learning_rate": 1.865915613780039e-05, + "loss": 0.35254499316215515, + "step": 1571 + }, + { + "epoch": 0.4174744389855265, + "grad_norm": 1.134962500533026, + "learning_rate": 1.8656959085727936e-05, + "loss": 0.36689436435699463, + "step": 1572 + }, + { + "epoch": 0.41774000796706945, + "grad_norm": 1.104702895545828, + "learning_rate": 1.8654760364695873e-05, + "loss": 0.3113600015640259, + "step": 1573 + }, + { + "epoch": 0.4180055769486124, + "grad_norm": 1.0072243279377031, + "learning_rate": 1.865255997512808e-05, + "loss": 0.3336432874202728, + "step": 1574 + }, + { + "epoch": 0.41827114593015535, + "grad_norm": 1.1762721663897004, + "learning_rate": 1.8650357917448774e-05, + "loss": 0.3657492995262146, + "step": 1575 + }, + { + "epoch": 0.4185367149116983, + "grad_norm": 1.1286123264778107, + "learning_rate": 1.864815419208248e-05, + "loss": 0.3087846338748932, + "step": 1576 + }, + { + "epoch": 0.41880228389324126, + "grad_norm": 1.059893684126419, + "learning_rate": 1.8645948799454058e-05, + "loss": 0.31422343850135803, + "step": 1577 + }, + { + "epoch": 0.4190678528747842, + "grad_norm": 1.0232345658393134, + "learning_rate": 1.8643741739988672e-05, + "loss": 0.3172760009765625, + "step": 1578 + }, + { + "epoch": 0.41933342185632716, + "grad_norm": 1.131569038679809, + "learning_rate": 1.8641533014111824e-05, + "loss": 0.36819136142730713, + "step": 1579 + }, + { + "epoch": 0.4195989908378701, + "grad_norm": 1.0215370560204735, + "learning_rate": 1.863932262224933e-05, + "loss": 0.29081088304519653, + "step": 1580 + }, + { + "epoch": 0.4198645598194131, + "grad_norm": 1.0406040134422527, + "learning_rate": 1.8637110564827325e-05, + "loss": 0.3209632635116577, + "step": 1581 + }, + { + "epoch": 0.42013012880095607, + "grad_norm": 1.9161132832998955, + "learning_rate": 1.863489684227227e-05, + "loss": 0.3357914686203003, + "step": 1582 + }, + { + "epoch": 0.420395697782499, + "grad_norm": 1.0469990353974015, + "learning_rate": 1.8632681455010937e-05, + "loss": 0.285677969455719, + "step": 1583 + }, + { + "epoch": 0.420661266764042, + "grad_norm": 1.1491447855439996, + "learning_rate": 1.8630464403470435e-05, + "loss": 0.377876341342926, + "step": 1584 + }, + { + "epoch": 0.4209268357455849, + "grad_norm": 1.0642007656116979, + "learning_rate": 1.8628245688078187e-05, + "loss": 0.3141768276691437, + "step": 1585 + }, + { + "epoch": 0.4211924047271279, + "grad_norm": 1.078787810404599, + "learning_rate": 1.8626025309261927e-05, + "loss": 0.34249693155288696, + "step": 1586 + }, + { + "epoch": 0.42145797370867083, + "grad_norm": 1.1583509747022063, + "learning_rate": 1.8623803267449722e-05, + "loss": 0.32564717531204224, + "step": 1587 + }, + { + "epoch": 0.4217235426902138, + "grad_norm": 1.0623179841052965, + "learning_rate": 1.8621579563069957e-05, + "loss": 0.3425004184246063, + "step": 1588 + }, + { + "epoch": 0.42198911167175673, + "grad_norm": 1.05392590229203, + "learning_rate": 1.8619354196551333e-05, + "loss": 0.3676222562789917, + "step": 1589 + }, + { + "epoch": 0.4222546806532997, + "grad_norm": 0.9612536546184688, + "learning_rate": 1.8617127168322877e-05, + "loss": 0.28915971517562866, + "step": 1590 + }, + { + "epoch": 0.42252024963484264, + "grad_norm": 1.1293248025877465, + "learning_rate": 1.8614898478813933e-05, + "loss": 0.3387221097946167, + "step": 1591 + }, + { + "epoch": 0.4227858186163856, + "grad_norm": 1.0804518757125117, + "learning_rate": 1.8612668128454164e-05, + "loss": 0.33886784315109253, + "step": 1592 + }, + { + "epoch": 0.42305138759792854, + "grad_norm": 1.0780507904890781, + "learning_rate": 1.8610436117673557e-05, + "loss": 0.3364121913909912, + "step": 1593 + }, + { + "epoch": 0.4233169565794715, + "grad_norm": 1.0590527240631433, + "learning_rate": 1.8608202446902418e-05, + "loss": 0.3661370873451233, + "step": 1594 + }, + { + "epoch": 0.4235825255610145, + "grad_norm": 1.254416564930449, + "learning_rate": 1.8605967116571372e-05, + "loss": 0.2980557680130005, + "step": 1595 + }, + { + "epoch": 0.42384809454255745, + "grad_norm": 1.180518248335952, + "learning_rate": 1.8603730127111363e-05, + "loss": 0.36112043261528015, + "step": 1596 + }, + { + "epoch": 0.4241136635241004, + "grad_norm": 0.9967676484164163, + "learning_rate": 1.860149147895366e-05, + "loss": 0.30641958117485046, + "step": 1597 + }, + { + "epoch": 0.42437923250564336, + "grad_norm": 1.06006138769355, + "learning_rate": 1.8599251172529836e-05, + "loss": 0.3312561511993408, + "step": 1598 + }, + { + "epoch": 0.4246448014871863, + "grad_norm": 1.070580032885208, + "learning_rate": 1.859700920827181e-05, + "loss": 0.3757131099700928, + "step": 1599 + }, + { + "epoch": 0.42491037046872926, + "grad_norm": 1.0514692584176801, + "learning_rate": 1.8594765586611805e-05, + "loss": 0.3225080370903015, + "step": 1600 + }, + { + "epoch": 0.4251759394502722, + "grad_norm": 1.0857454483782787, + "learning_rate": 1.859252030798236e-05, + "loss": 0.35943928360939026, + "step": 1601 + }, + { + "epoch": 0.42544150843181516, + "grad_norm": 0.9907794348406631, + "learning_rate": 1.859027337281633e-05, + "loss": 0.29319390654563904, + "step": 1602 + }, + { + "epoch": 0.4257070774133581, + "grad_norm": 1.1441852776057728, + "learning_rate": 1.8588024781546914e-05, + "loss": 0.32320237159729004, + "step": 1603 + }, + { + "epoch": 0.42597264639490107, + "grad_norm": 1.1070076098385897, + "learning_rate": 1.8585774534607606e-05, + "loss": 0.3381520211696625, + "step": 1604 + }, + { + "epoch": 0.426238215376444, + "grad_norm": 0.9826840529093485, + "learning_rate": 1.858352263243223e-05, + "loss": 0.30010825395584106, + "step": 1605 + }, + { + "epoch": 0.42650378435798697, + "grad_norm": 0.9805553200940528, + "learning_rate": 1.8581269075454918e-05, + "loss": 0.26282748579978943, + "step": 1606 + }, + { + "epoch": 0.4267693533395299, + "grad_norm": 1.0395702570014627, + "learning_rate": 1.857901386411014e-05, + "loss": 0.33613401651382446, + "step": 1607 + }, + { + "epoch": 0.4270349223210729, + "grad_norm": 1.1625768546626036, + "learning_rate": 1.8576756998832667e-05, + "loss": 0.34522315859794617, + "step": 1608 + }, + { + "epoch": 0.4273004913026159, + "grad_norm": 1.0776480516530333, + "learning_rate": 1.8574498480057598e-05, + "loss": 0.3253153860569, + "step": 1609 + }, + { + "epoch": 0.42756606028415883, + "grad_norm": 1.177683979502923, + "learning_rate": 1.8572238308220347e-05, + "loss": 0.32180655002593994, + "step": 1610 + }, + { + "epoch": 0.4278316292657018, + "grad_norm": 1.2444289754345055, + "learning_rate": 1.856997648375665e-05, + "loss": 0.3274008333683014, + "step": 1611 + }, + { + "epoch": 0.42809719824724474, + "grad_norm": 1.006782047196068, + "learning_rate": 1.8567713007102565e-05, + "loss": 0.3196510374546051, + "step": 1612 + }, + { + "epoch": 0.4283627672287877, + "grad_norm": 1.0069133029708661, + "learning_rate": 1.8565447878694455e-05, + "loss": 0.2759617567062378, + "step": 1613 + }, + { + "epoch": 0.42862833621033064, + "grad_norm": 1.1572573238869637, + "learning_rate": 1.8563181098969017e-05, + "loss": 0.35069289803504944, + "step": 1614 + }, + { + "epoch": 0.4288939051918736, + "grad_norm": 1.1400434606874466, + "learning_rate": 1.8560912668363253e-05, + "loss": 0.3388484716415405, + "step": 1615 + }, + { + "epoch": 0.42915947417341654, + "grad_norm": 1.0338736294243014, + "learning_rate": 1.8558642587314496e-05, + "loss": 0.34116029739379883, + "step": 1616 + }, + { + "epoch": 0.4294250431549595, + "grad_norm": 1.0487376701262667, + "learning_rate": 1.8556370856260387e-05, + "loss": 0.30212706327438354, + "step": 1617 + }, + { + "epoch": 0.42969061213650245, + "grad_norm": 1.0633174136084793, + "learning_rate": 1.855409747563889e-05, + "loss": 0.32250338792800903, + "step": 1618 + }, + { + "epoch": 0.4299561811180454, + "grad_norm": 1.132237618998821, + "learning_rate": 1.8551822445888285e-05, + "loss": 0.35972943902015686, + "step": 1619 + }, + { + "epoch": 0.43022175009958835, + "grad_norm": 0.9921112897877987, + "learning_rate": 1.8549545767447174e-05, + "loss": 0.3112533390522003, + "step": 1620 + }, + { + "epoch": 0.4304873190811313, + "grad_norm": 1.0331176116114555, + "learning_rate": 1.854726744075447e-05, + "loss": 0.3044458031654358, + "step": 1621 + }, + { + "epoch": 0.43075288806267426, + "grad_norm": 1.0421498129424722, + "learning_rate": 1.8544987466249412e-05, + "loss": 0.3261772096157074, + "step": 1622 + }, + { + "epoch": 0.43101845704421726, + "grad_norm": 1.3249821498842442, + "learning_rate": 1.8542705844371544e-05, + "loss": 0.3485907018184662, + "step": 1623 + }, + { + "epoch": 0.4312840260257602, + "grad_norm": 2.6643478315387576, + "learning_rate": 1.8540422575560747e-05, + "loss": 0.3016113340854645, + "step": 1624 + }, + { + "epoch": 0.43154959500730317, + "grad_norm": 1.021133157663628, + "learning_rate": 1.8538137660257198e-05, + "loss": 0.35383081436157227, + "step": 1625 + }, + { + "epoch": 0.4318151639888461, + "grad_norm": 1.170997891522692, + "learning_rate": 1.8535851098901406e-05, + "loss": 0.32015109062194824, + "step": 1626 + }, + { + "epoch": 0.43208073297038907, + "grad_norm": 1.1526156179794622, + "learning_rate": 1.8533562891934195e-05, + "loss": 0.3801743984222412, + "step": 1627 + }, + { + "epoch": 0.432346301951932, + "grad_norm": 1.0686097183664227, + "learning_rate": 1.85312730397967e-05, + "loss": 0.33140939474105835, + "step": 1628 + }, + { + "epoch": 0.432611870933475, + "grad_norm": 1.232101025230023, + "learning_rate": 1.8528981542930382e-05, + "loss": 0.4052904546260834, + "step": 1629 + }, + { + "epoch": 0.4328774399150179, + "grad_norm": 1.0850305465298753, + "learning_rate": 1.8526688401777014e-05, + "loss": 0.3661607801914215, + "step": 1630 + }, + { + "epoch": 0.4331430088965609, + "grad_norm": 1.0520968780833948, + "learning_rate": 1.852439361677868e-05, + "loss": 0.33260756731033325, + "step": 1631 + }, + { + "epoch": 0.43340857787810383, + "grad_norm": 1.0137607762513057, + "learning_rate": 1.85220971883778e-05, + "loss": 0.30222776532173157, + "step": 1632 + }, + { + "epoch": 0.4336741468596468, + "grad_norm": 1.1138822281677037, + "learning_rate": 1.8519799117017086e-05, + "loss": 0.3444751799106598, + "step": 1633 + }, + { + "epoch": 0.43393971584118973, + "grad_norm": 1.0896517914007275, + "learning_rate": 1.8517499403139586e-05, + "loss": 0.33887404203414917, + "step": 1634 + }, + { + "epoch": 0.4342052848227327, + "grad_norm": 0.9260010903737679, + "learning_rate": 1.8515198047188652e-05, + "loss": 0.287893146276474, + "step": 1635 + }, + { + "epoch": 0.43447085380427564, + "grad_norm": 1.0080783350179279, + "learning_rate": 1.8512895049607965e-05, + "loss": 0.32236215472221375, + "step": 1636 + }, + { + "epoch": 0.43473642278581864, + "grad_norm": 1.0861808896793093, + "learning_rate": 1.8510590410841515e-05, + "loss": 0.30670079588890076, + "step": 1637 + }, + { + "epoch": 0.4350019917673616, + "grad_norm": 1.045996826542631, + "learning_rate": 1.8508284131333604e-05, + "loss": 0.34104713797569275, + "step": 1638 + }, + { + "epoch": 0.43526756074890455, + "grad_norm": 1.13616869746559, + "learning_rate": 1.8505976211528857e-05, + "loss": 0.3402378559112549, + "step": 1639 + }, + { + "epoch": 0.4355331297304475, + "grad_norm": 1.1414650328718847, + "learning_rate": 1.8503666651872217e-05, + "loss": 0.35236096382141113, + "step": 1640 + }, + { + "epoch": 0.43579869871199045, + "grad_norm": 1.1137846416322885, + "learning_rate": 1.850135545280894e-05, + "loss": 0.3385634422302246, + "step": 1641 + }, + { + "epoch": 0.4360642676935334, + "grad_norm": 1.0049349552180111, + "learning_rate": 1.849904261478459e-05, + "loss": 0.32222414016723633, + "step": 1642 + }, + { + "epoch": 0.43632983667507635, + "grad_norm": 1.1246487142505726, + "learning_rate": 1.8496728138245062e-05, + "loss": 0.3251120448112488, + "step": 1643 + }, + { + "epoch": 0.4365954056566193, + "grad_norm": 1.3230672810485753, + "learning_rate": 1.8494412023636563e-05, + "loss": 0.3199063837528229, + "step": 1644 + }, + { + "epoch": 0.43686097463816226, + "grad_norm": 1.031106173264746, + "learning_rate": 1.8492094271405605e-05, + "loss": 0.3470883071422577, + "step": 1645 + }, + { + "epoch": 0.4371265436197052, + "grad_norm": 1.1420067933967792, + "learning_rate": 1.848977488199903e-05, + "loss": 0.319596529006958, + "step": 1646 + }, + { + "epoch": 0.43739211260124816, + "grad_norm": 1.172387725238046, + "learning_rate": 1.848745385586398e-05, + "loss": 0.3445591628551483, + "step": 1647 + }, + { + "epoch": 0.4376576815827911, + "grad_norm": 1.0622512502557289, + "learning_rate": 1.848513119344793e-05, + "loss": 0.35861149430274963, + "step": 1648 + }, + { + "epoch": 0.43792325056433407, + "grad_norm": 1.3423176489021205, + "learning_rate": 1.8482806895198658e-05, + "loss": 0.36727622151374817, + "step": 1649 + }, + { + "epoch": 0.438188819545877, + "grad_norm": 1.0985203266462633, + "learning_rate": 1.848048096156426e-05, + "loss": 0.3505704402923584, + "step": 1650 + }, + { + "epoch": 0.43845438852742, + "grad_norm": 1.050005044594017, + "learning_rate": 1.8478153392993154e-05, + "loss": 0.3508742153644562, + "step": 1651 + }, + { + "epoch": 0.438719957508963, + "grad_norm": 1.0688095584032915, + "learning_rate": 1.8475824189934063e-05, + "loss": 0.32757264375686646, + "step": 1652 + }, + { + "epoch": 0.43898552649050593, + "grad_norm": 1.0768843323365103, + "learning_rate": 1.8473493352836032e-05, + "loss": 0.3117530643939972, + "step": 1653 + }, + { + "epoch": 0.4392510954720489, + "grad_norm": 1.1751248406507369, + "learning_rate": 1.8471160882148417e-05, + "loss": 0.3506043553352356, + "step": 1654 + }, + { + "epoch": 0.43951666445359183, + "grad_norm": 1.1247697965204402, + "learning_rate": 1.8468826778320892e-05, + "loss": 0.33997148275375366, + "step": 1655 + }, + { + "epoch": 0.4397822334351348, + "grad_norm": 1.007133328419329, + "learning_rate": 1.8466491041803446e-05, + "loss": 0.30060335993766785, + "step": 1656 + }, + { + "epoch": 0.44004780241667774, + "grad_norm": 0.9546594059496064, + "learning_rate": 1.846415367304638e-05, + "loss": 0.3057805597782135, + "step": 1657 + }, + { + "epoch": 0.4403133713982207, + "grad_norm": 1.006954520739026, + "learning_rate": 1.846181467250031e-05, + "loss": 0.30772098898887634, + "step": 1658 + }, + { + "epoch": 0.44057894037976364, + "grad_norm": 1.043209753174748, + "learning_rate": 1.845947404061617e-05, + "loss": 0.3183813989162445, + "step": 1659 + }, + { + "epoch": 0.4408445093613066, + "grad_norm": 1.0413807475941115, + "learning_rate": 1.8457131777845204e-05, + "loss": 0.2986184358596802, + "step": 1660 + }, + { + "epoch": 0.44111007834284954, + "grad_norm": 1.0330249735438937, + "learning_rate": 1.8454787884638973e-05, + "loss": 0.33342432975769043, + "step": 1661 + }, + { + "epoch": 0.4413756473243925, + "grad_norm": 1.6337494282252796, + "learning_rate": 1.8452442361449353e-05, + "loss": 0.33435192704200745, + "step": 1662 + }, + { + "epoch": 0.44164121630593545, + "grad_norm": 1.1084487395338765, + "learning_rate": 1.8450095208728537e-05, + "loss": 0.31596100330352783, + "step": 1663 + }, + { + "epoch": 0.4419067852874784, + "grad_norm": 1.0372033094770008, + "learning_rate": 1.8447746426929022e-05, + "loss": 0.29850512742996216, + "step": 1664 + }, + { + "epoch": 0.4421723542690214, + "grad_norm": 1.1891933812209383, + "learning_rate": 1.8445396016503628e-05, + "loss": 0.34898555278778076, + "step": 1665 + }, + { + "epoch": 0.44243792325056436, + "grad_norm": 1.0486597661615855, + "learning_rate": 1.8443043977905484e-05, + "loss": 0.283272385597229, + "step": 1666 + }, + { + "epoch": 0.4427034922321073, + "grad_norm": 1.041766578180328, + "learning_rate": 1.844069031158804e-05, + "loss": 0.32765433192253113, + "step": 1667 + }, + { + "epoch": 0.44296906121365026, + "grad_norm": 1.1465241668847563, + "learning_rate": 1.8438335018005052e-05, + "loss": 0.347957044839859, + "step": 1668 + }, + { + "epoch": 0.4432346301951932, + "grad_norm": 1.1330493919292772, + "learning_rate": 1.8435978097610594e-05, + "loss": 0.36188018321990967, + "step": 1669 + }, + { + "epoch": 0.44350019917673617, + "grad_norm": 1.1541714860130494, + "learning_rate": 1.843361955085905e-05, + "loss": 0.35944315791130066, + "step": 1670 + }, + { + "epoch": 0.4437657681582791, + "grad_norm": 1.0564596521414393, + "learning_rate": 1.8431259378205122e-05, + "loss": 0.33441367745399475, + "step": 1671 + }, + { + "epoch": 0.44403133713982207, + "grad_norm": 1.1043363461383413, + "learning_rate": 1.8428897580103827e-05, + "loss": 0.3157849907875061, + "step": 1672 + }, + { + "epoch": 0.444296906121365, + "grad_norm": 1.0760645254646117, + "learning_rate": 1.8426534157010486e-05, + "loss": 0.33416497707366943, + "step": 1673 + }, + { + "epoch": 0.444562475102908, + "grad_norm": 1.1629646905519946, + "learning_rate": 1.842416910938074e-05, + "loss": 0.3611617684364319, + "step": 1674 + }, + { + "epoch": 0.4448280440844509, + "grad_norm": 1.079831089952362, + "learning_rate": 1.8421802437670546e-05, + "loss": 0.3030395805835724, + "step": 1675 + }, + { + "epoch": 0.4450936130659939, + "grad_norm": 0.9867988845558019, + "learning_rate": 1.8419434142336167e-05, + "loss": 0.30281510949134827, + "step": 1676 + }, + { + "epoch": 0.44535918204753683, + "grad_norm": 1.2041533085675928, + "learning_rate": 1.8417064223834184e-05, + "loss": 0.3489738404750824, + "step": 1677 + }, + { + "epoch": 0.4456247510290798, + "grad_norm": 1.0320394434428715, + "learning_rate": 1.8414692682621487e-05, + "loss": 0.30453425645828247, + "step": 1678 + }, + { + "epoch": 0.44589032001062273, + "grad_norm": 0.9586890082829097, + "learning_rate": 1.841231951915528e-05, + "loss": 0.28717339038848877, + "step": 1679 + }, + { + "epoch": 0.44615588899216574, + "grad_norm": 1.0685350052372018, + "learning_rate": 1.840994473389309e-05, + "loss": 0.3227912187576294, + "step": 1680 + }, + { + "epoch": 0.4464214579737087, + "grad_norm": 1.0774879432227336, + "learning_rate": 1.8407568327292737e-05, + "loss": 0.3575928807258606, + "step": 1681 + }, + { + "epoch": 0.44668702695525164, + "grad_norm": 1.0240612597420884, + "learning_rate": 1.840519029981237e-05, + "loss": 0.35601454973220825, + "step": 1682 + }, + { + "epoch": 0.4469525959367946, + "grad_norm": 1.1829639598617365, + "learning_rate": 1.8402810651910444e-05, + "loss": 0.34867429733276367, + "step": 1683 + }, + { + "epoch": 0.44721816491833755, + "grad_norm": 1.0185115495756123, + "learning_rate": 1.8400429384045724e-05, + "loss": 0.3333359360694885, + "step": 1684 + }, + { + "epoch": 0.4474837338998805, + "grad_norm": 1.1658514468774803, + "learning_rate": 1.8398046496677296e-05, + "loss": 0.3269057273864746, + "step": 1685 + }, + { + "epoch": 0.44774930288142345, + "grad_norm": 1.0186865264151983, + "learning_rate": 1.839566199026455e-05, + "loss": 0.3507213890552521, + "step": 1686 + }, + { + "epoch": 0.4480148718629664, + "grad_norm": 1.0962029873559684, + "learning_rate": 1.8393275865267185e-05, + "loss": 0.32935822010040283, + "step": 1687 + }, + { + "epoch": 0.44828044084450935, + "grad_norm": 1.168811125319112, + "learning_rate": 1.8390888122145225e-05, + "loss": 0.3780096769332886, + "step": 1688 + }, + { + "epoch": 0.4485460098260523, + "grad_norm": 1.08432540630583, + "learning_rate": 1.8388498761358997e-05, + "loss": 0.3412250578403473, + "step": 1689 + }, + { + "epoch": 0.44881157880759526, + "grad_norm": 1.0725143861051711, + "learning_rate": 1.838610778336914e-05, + "loss": 0.33751022815704346, + "step": 1690 + }, + { + "epoch": 0.4490771477891382, + "grad_norm": 1.113628501747759, + "learning_rate": 1.8383715188636608e-05, + "loss": 0.35736170411109924, + "step": 1691 + }, + { + "epoch": 0.44934271677068116, + "grad_norm": 1.0608679340591776, + "learning_rate": 1.8381320977622664e-05, + "loss": 0.3133913278579712, + "step": 1692 + }, + { + "epoch": 0.4496082857522241, + "grad_norm": 1.0696112323301112, + "learning_rate": 1.8378925150788886e-05, + "loss": 0.2890821099281311, + "step": 1693 + }, + { + "epoch": 0.4498738547337671, + "grad_norm": 1.0759892831738864, + "learning_rate": 1.8376527708597155e-05, + "loss": 0.34016966819763184, + "step": 1694 + }, + { + "epoch": 0.45013942371531007, + "grad_norm": 1.0933611032669988, + "learning_rate": 1.8374128651509676e-05, + "loss": 0.3502900302410126, + "step": 1695 + }, + { + "epoch": 0.450404992696853, + "grad_norm": 1.1956521483077693, + "learning_rate": 1.8371727979988957e-05, + "loss": 0.31828251481056213, + "step": 1696 + }, + { + "epoch": 0.450670561678396, + "grad_norm": 1.1739995891800665, + "learning_rate": 1.836932569449782e-05, + "loss": 0.33322471380233765, + "step": 1697 + }, + { + "epoch": 0.4509361306599389, + "grad_norm": 0.977715581129718, + "learning_rate": 1.8366921795499394e-05, + "loss": 0.28489458560943604, + "step": 1698 + }, + { + "epoch": 0.4512016996414819, + "grad_norm": 1.0351592490047028, + "learning_rate": 1.8364516283457127e-05, + "loss": 0.3125787079334259, + "step": 1699 + }, + { + "epoch": 0.45146726862302483, + "grad_norm": 1.6801930060854708, + "learning_rate": 1.8362109158834767e-05, + "loss": 0.3352596163749695, + "step": 1700 + }, + { + "epoch": 0.4517328376045678, + "grad_norm": 1.0152758212914303, + "learning_rate": 1.8359700422096385e-05, + "loss": 0.2986747622489929, + "step": 1701 + }, + { + "epoch": 0.45199840658611073, + "grad_norm": 1.0704573865215896, + "learning_rate": 1.8357290073706355e-05, + "loss": 0.3276829123497009, + "step": 1702 + }, + { + "epoch": 0.4522639755676537, + "grad_norm": 1.05119725558451, + "learning_rate": 1.8354878114129368e-05, + "loss": 0.3183029890060425, + "step": 1703 + }, + { + "epoch": 0.45252954454919664, + "grad_norm": 1.0595099003295023, + "learning_rate": 1.835246454383041e-05, + "loss": 0.32149460911750793, + "step": 1704 + }, + { + "epoch": 0.4527951135307396, + "grad_norm": 1.0365725372264356, + "learning_rate": 1.8350049363274802e-05, + "loss": 0.2963859438896179, + "step": 1705 + }, + { + "epoch": 0.45306068251228254, + "grad_norm": 1.132218144997021, + "learning_rate": 1.8347632572928154e-05, + "loss": 0.35251080989837646, + "step": 1706 + }, + { + "epoch": 0.4533262514938255, + "grad_norm": 1.1840188868504486, + "learning_rate": 1.8345214173256395e-05, + "loss": 0.3585474491119385, + "step": 1707 + }, + { + "epoch": 0.4535918204753685, + "grad_norm": 1.1792148584627284, + "learning_rate": 1.834279416472577e-05, + "loss": 0.32339078187942505, + "step": 1708 + }, + { + "epoch": 0.45385738945691145, + "grad_norm": 1.030916532610971, + "learning_rate": 1.8340372547802822e-05, + "loss": 0.3473295569419861, + "step": 1709 + }, + { + "epoch": 0.4541229584384544, + "grad_norm": 1.149162033618886, + "learning_rate": 1.833794932295441e-05, + "loss": 0.35146117210388184, + "step": 1710 + }, + { + "epoch": 0.45438852741999736, + "grad_norm": 1.080751163824508, + "learning_rate": 1.833552449064771e-05, + "loss": 0.29697534441947937, + "step": 1711 + }, + { + "epoch": 0.4546540964015403, + "grad_norm": 1.0590764839143914, + "learning_rate": 1.8333098051350197e-05, + "loss": 0.30980685353279114, + "step": 1712 + }, + { + "epoch": 0.45491966538308326, + "grad_norm": 1.2023264217964575, + "learning_rate": 1.8330670005529657e-05, + "loss": 0.3271983861923218, + "step": 1713 + }, + { + "epoch": 0.4551852343646262, + "grad_norm": 1.061456665590969, + "learning_rate": 1.8328240353654193e-05, + "loss": 0.3421804904937744, + "step": 1714 + }, + { + "epoch": 0.45545080334616916, + "grad_norm": 0.988281834877126, + "learning_rate": 1.8325809096192207e-05, + "loss": 0.2949771285057068, + "step": 1715 + }, + { + "epoch": 0.4557163723277121, + "grad_norm": 1.1467541005281106, + "learning_rate": 1.832337623361242e-05, + "loss": 0.35578668117523193, + "step": 1716 + }, + { + "epoch": 0.45598194130925507, + "grad_norm": 1.099618839558401, + "learning_rate": 1.832094176638387e-05, + "loss": 0.3714647889137268, + "step": 1717 + }, + { + "epoch": 0.456247510290798, + "grad_norm": 1.116087725713372, + "learning_rate": 1.8318505694975877e-05, + "loss": 0.36253875494003296, + "step": 1718 + }, + { + "epoch": 0.45651307927234097, + "grad_norm": 1.0310426822464949, + "learning_rate": 1.8316068019858093e-05, + "loss": 0.3148016035556793, + "step": 1719 + }, + { + "epoch": 0.4567786482538839, + "grad_norm": 1.0869949789046671, + "learning_rate": 1.8313628741500476e-05, + "loss": 0.3420512080192566, + "step": 1720 + }, + { + "epoch": 0.4570442172354269, + "grad_norm": 1.0955610437646774, + "learning_rate": 1.831118786037329e-05, + "loss": 0.2941698431968689, + "step": 1721 + }, + { + "epoch": 0.4573097862169699, + "grad_norm": 0.9987507632564111, + "learning_rate": 1.83087453769471e-05, + "loss": 0.3033481240272522, + "step": 1722 + }, + { + "epoch": 0.45757535519851283, + "grad_norm": 1.0508818993675257, + "learning_rate": 1.8306301291692798e-05, + "loss": 0.3405943810939789, + "step": 1723 + }, + { + "epoch": 0.4578409241800558, + "grad_norm": 1.0291343903638976, + "learning_rate": 1.8303855605081567e-05, + "loss": 0.32217931747436523, + "step": 1724 + }, + { + "epoch": 0.45810649316159874, + "grad_norm": 1.1797464113481113, + "learning_rate": 1.8301408317584913e-05, + "loss": 0.3627573847770691, + "step": 1725 + }, + { + "epoch": 0.4583720621431417, + "grad_norm": 1.1425882725361838, + "learning_rate": 1.829895942967464e-05, + "loss": 0.3512224853038788, + "step": 1726 + }, + { + "epoch": 0.45863763112468464, + "grad_norm": 1.1358093316461328, + "learning_rate": 1.8296508941822868e-05, + "loss": 0.35433265566825867, + "step": 1727 + }, + { + "epoch": 0.4589032001062276, + "grad_norm": 1.1217406683513973, + "learning_rate": 1.829405685450202e-05, + "loss": 0.33105185627937317, + "step": 1728 + }, + { + "epoch": 0.45916876908777055, + "grad_norm": 1.0087946676492725, + "learning_rate": 1.829160316818483e-05, + "loss": 0.31765925884246826, + "step": 1729 + }, + { + "epoch": 0.4594343380693135, + "grad_norm": 1.0268902541251206, + "learning_rate": 1.8289147883344338e-05, + "loss": 0.3276101350784302, + "step": 1730 + }, + { + "epoch": 0.45969990705085645, + "grad_norm": 2.1185922480389676, + "learning_rate": 1.8286691000453895e-05, + "loss": 0.2921130061149597, + "step": 1731 + }, + { + "epoch": 0.4599654760323994, + "grad_norm": 0.9680106013727008, + "learning_rate": 1.828423251998716e-05, + "loss": 0.3025062382221222, + "step": 1732 + }, + { + "epoch": 0.46023104501394235, + "grad_norm": 1.0299077884479195, + "learning_rate": 1.82817724424181e-05, + "loss": 0.3128702640533447, + "step": 1733 + }, + { + "epoch": 0.4604966139954853, + "grad_norm": 0.9957682350134235, + "learning_rate": 1.8279310768220987e-05, + "loss": 0.31156033277511597, + "step": 1734 + }, + { + "epoch": 0.46076218297702826, + "grad_norm": 1.0327514294429654, + "learning_rate": 1.82768474978704e-05, + "loss": 0.30409976840019226, + "step": 1735 + }, + { + "epoch": 0.46102775195857126, + "grad_norm": 1.0533664417585449, + "learning_rate": 1.827438263184124e-05, + "loss": 0.305557519197464, + "step": 1736 + }, + { + "epoch": 0.4612933209401142, + "grad_norm": 1.1216722893854725, + "learning_rate": 1.827191617060869e-05, + "loss": 0.36079999804496765, + "step": 1737 + }, + { + "epoch": 0.46155888992165717, + "grad_norm": 1.0546022345807051, + "learning_rate": 1.8269448114648264e-05, + "loss": 0.3341830372810364, + "step": 1738 + }, + { + "epoch": 0.4618244589032001, + "grad_norm": 1.0085785444907966, + "learning_rate": 1.8266978464435764e-05, + "loss": 0.3222450017929077, + "step": 1739 + }, + { + "epoch": 0.46209002788474307, + "grad_norm": 1.112818872130856, + "learning_rate": 1.826450722044732e-05, + "loss": 0.34665441513061523, + "step": 1740 + }, + { + "epoch": 0.462355596866286, + "grad_norm": 1.1112300040840664, + "learning_rate": 1.8262034383159357e-05, + "loss": 0.31024169921875, + "step": 1741 + }, + { + "epoch": 0.462621165847829, + "grad_norm": 1.2322752248386413, + "learning_rate": 1.8259559953048606e-05, + "loss": 0.2950369119644165, + "step": 1742 + }, + { + "epoch": 0.4628867348293719, + "grad_norm": 1.109045795536776, + "learning_rate": 1.8257083930592102e-05, + "loss": 0.3378523886203766, + "step": 1743 + }, + { + "epoch": 0.4631523038109149, + "grad_norm": 0.9899845397184047, + "learning_rate": 1.8254606316267204e-05, + "loss": 0.2930060923099518, + "step": 1744 + }, + { + "epoch": 0.46341787279245783, + "grad_norm": 1.079619676645024, + "learning_rate": 1.8252127110551564e-05, + "loss": 0.3236517012119293, + "step": 1745 + }, + { + "epoch": 0.4636834417740008, + "grad_norm": 0.9852877201201444, + "learning_rate": 1.824964631392314e-05, + "loss": 0.3010406196117401, + "step": 1746 + }, + { + "epoch": 0.46394901075554373, + "grad_norm": 1.0095585954453505, + "learning_rate": 1.8247163926860204e-05, + "loss": 0.3269607424736023, + "step": 1747 + }, + { + "epoch": 0.4642145797370867, + "grad_norm": 1.0474961373680607, + "learning_rate": 1.8244679949841328e-05, + "loss": 0.3437904715538025, + "step": 1748 + }, + { + "epoch": 0.46448014871862964, + "grad_norm": 1.1512723462780612, + "learning_rate": 1.8242194383345394e-05, + "loss": 0.37820738554000854, + "step": 1749 + }, + { + "epoch": 0.46474571770017264, + "grad_norm": 1.0989334641357904, + "learning_rate": 1.8239707227851592e-05, + "loss": 0.3365899920463562, + "step": 1750 + }, + { + "epoch": 0.4650112866817156, + "grad_norm": 0.9943228703349263, + "learning_rate": 1.8237218483839414e-05, + "loss": 0.30418774485588074, + "step": 1751 + }, + { + "epoch": 0.46527685566325855, + "grad_norm": 0.9379554406122236, + "learning_rate": 1.823472815178866e-05, + "loss": 0.2923222780227661, + "step": 1752 + }, + { + "epoch": 0.4655424246448015, + "grad_norm": 1.1096787188742467, + "learning_rate": 1.823223623217944e-05, + "loss": 0.3358995020389557, + "step": 1753 + }, + { + "epoch": 0.46580799362634445, + "grad_norm": 1.0997620749237405, + "learning_rate": 1.822974272549216e-05, + "loss": 0.3413343131542206, + "step": 1754 + }, + { + "epoch": 0.4660735626078874, + "grad_norm": 1.0873990469892099, + "learning_rate": 1.822724763220755e-05, + "loss": 0.33553364872932434, + "step": 1755 + }, + { + "epoch": 0.46633913158943036, + "grad_norm": 1.0957210856960815, + "learning_rate": 1.8224750952806626e-05, + "loss": 0.35896626114845276, + "step": 1756 + }, + { + "epoch": 0.4666047005709733, + "grad_norm": 1.1032076691430248, + "learning_rate": 1.8222252687770718e-05, + "loss": 0.35345566272735596, + "step": 1757 + }, + { + "epoch": 0.46687026955251626, + "grad_norm": 1.0034635235769087, + "learning_rate": 1.8219752837581466e-05, + "loss": 0.3146013617515564, + "step": 1758 + }, + { + "epoch": 0.4671358385340592, + "grad_norm": 1.0191336075935247, + "learning_rate": 1.8217251402720807e-05, + "loss": 0.33270642161369324, + "step": 1759 + }, + { + "epoch": 0.46740140751560216, + "grad_norm": 1.030475428136688, + "learning_rate": 1.821474838367099e-05, + "loss": 0.3172033727169037, + "step": 1760 + }, + { + "epoch": 0.4676669764971451, + "grad_norm": 1.6535016363051902, + "learning_rate": 1.8212243780914578e-05, + "loss": 0.3277033567428589, + "step": 1761 + }, + { + "epoch": 0.46793254547868807, + "grad_norm": 1.1570228647748637, + "learning_rate": 1.820973759493441e-05, + "loss": 0.3523799777030945, + "step": 1762 + }, + { + "epoch": 0.468198114460231, + "grad_norm": 1.0907259849913267, + "learning_rate": 1.8207229826213664e-05, + "loss": 0.32437676191329956, + "step": 1763 + }, + { + "epoch": 0.468463683441774, + "grad_norm": 1.1347618214788342, + "learning_rate": 1.82047204752358e-05, + "loss": 0.34185051918029785, + "step": 1764 + }, + { + "epoch": 0.468729252423317, + "grad_norm": 1.0561382700570243, + "learning_rate": 1.8202209542484594e-05, + "loss": 0.32034197449684143, + "step": 1765 + }, + { + "epoch": 0.46899482140485993, + "grad_norm": 1.097207173265362, + "learning_rate": 1.8199697028444125e-05, + "loss": 0.30969515442848206, + "step": 1766 + }, + { + "epoch": 0.4692603903864029, + "grad_norm": 0.9320632629292236, + "learning_rate": 1.8197182933598776e-05, + "loss": 0.24751389026641846, + "step": 1767 + }, + { + "epoch": 0.46952595936794583, + "grad_norm": 1.2001835130139573, + "learning_rate": 1.8194667258433235e-05, + "loss": 0.3859948217868805, + "step": 1768 + }, + { + "epoch": 0.4697915283494888, + "grad_norm": 1.0989779617923678, + "learning_rate": 1.819215000343249e-05, + "loss": 0.29364967346191406, + "step": 1769 + }, + { + "epoch": 0.47005709733103174, + "grad_norm": 1.1161641657952082, + "learning_rate": 1.8189631169081845e-05, + "loss": 0.3560323715209961, + "step": 1770 + }, + { + "epoch": 0.4703226663125747, + "grad_norm": 1.6505675097600017, + "learning_rate": 1.8187110755866898e-05, + "loss": 0.3458098769187927, + "step": 1771 + }, + { + "epoch": 0.47058823529411764, + "grad_norm": 1.0148526914708587, + "learning_rate": 1.8184588764273555e-05, + "loss": 0.32131001353263855, + "step": 1772 + }, + { + "epoch": 0.4708538042756606, + "grad_norm": 1.0453234866463608, + "learning_rate": 1.8182065194788024e-05, + "loss": 0.3011054992675781, + "step": 1773 + }, + { + "epoch": 0.47111937325720354, + "grad_norm": 1.1076832582073854, + "learning_rate": 1.8179540047896827e-05, + "loss": 0.3314674496650696, + "step": 1774 + }, + { + "epoch": 0.4713849422387465, + "grad_norm": 1.0853788387965118, + "learning_rate": 1.8177013324086774e-05, + "loss": 0.3437536060810089, + "step": 1775 + }, + { + "epoch": 0.47165051122028945, + "grad_norm": 1.166112048160084, + "learning_rate": 1.8174485023844993e-05, + "loss": 0.36137935519218445, + "step": 1776 + }, + { + "epoch": 0.4719160802018324, + "grad_norm": 1.0726359370167762, + "learning_rate": 1.8171955147658905e-05, + "loss": 0.34018874168395996, + "step": 1777 + }, + { + "epoch": 0.4721816491833754, + "grad_norm": 1.0596665602066746, + "learning_rate": 1.8169423696016245e-05, + "loss": 0.33298587799072266, + "step": 1778 + }, + { + "epoch": 0.47244721816491836, + "grad_norm": 1.1107712039752602, + "learning_rate": 1.816689066940505e-05, + "loss": 0.3649418354034424, + "step": 1779 + }, + { + "epoch": 0.4727127871464613, + "grad_norm": 1.0148859742506888, + "learning_rate": 1.8164356068313646e-05, + "loss": 0.32419171929359436, + "step": 1780 + }, + { + "epoch": 0.47297835612800426, + "grad_norm": 1.047167823612948, + "learning_rate": 1.8161819893230688e-05, + "loss": 0.288555383682251, + "step": 1781 + }, + { + "epoch": 0.4732439251095472, + "grad_norm": 1.005455205363293, + "learning_rate": 1.815928214464511e-05, + "loss": 0.3231011629104614, + "step": 1782 + }, + { + "epoch": 0.47350949409109017, + "grad_norm": 1.0470674131364166, + "learning_rate": 1.815674282304617e-05, + "loss": 0.29310134053230286, + "step": 1783 + }, + { + "epoch": 0.4737750630726331, + "grad_norm": 1.0390137248114197, + "learning_rate": 1.815420192892341e-05, + "loss": 0.32683852314949036, + "step": 1784 + }, + { + "epoch": 0.47404063205417607, + "grad_norm": 1.0353379429668699, + "learning_rate": 1.8151659462766685e-05, + "loss": 0.3200969099998474, + "step": 1785 + }, + { + "epoch": 0.474306201035719, + "grad_norm": 1.051359679014311, + "learning_rate": 1.814911542506616e-05, + "loss": 0.3091360032558441, + "step": 1786 + }, + { + "epoch": 0.474571770017262, + "grad_norm": 1.1630088603070372, + "learning_rate": 1.814656981631229e-05, + "loss": 0.3679049611091614, + "step": 1787 + }, + { + "epoch": 0.4748373389988049, + "grad_norm": 1.1065634125772459, + "learning_rate": 1.814402263699584e-05, + "loss": 0.290119469165802, + "step": 1788 + }, + { + "epoch": 0.4751029079803479, + "grad_norm": 1.0987492456650414, + "learning_rate": 1.8141473887607874e-05, + "loss": 0.31878861784935, + "step": 1789 + }, + { + "epoch": 0.47536847696189083, + "grad_norm": 1.1254389921885528, + "learning_rate": 1.8138923568639763e-05, + "loss": 0.35820287466049194, + "step": 1790 + }, + { + "epoch": 0.4756340459434338, + "grad_norm": 1.0046454439717083, + "learning_rate": 1.8136371680583176e-05, + "loss": 0.2924647629261017, + "step": 1791 + }, + { + "epoch": 0.4758996149249768, + "grad_norm": 1.2202907606610718, + "learning_rate": 1.8133818223930092e-05, + "loss": 0.3799927234649658, + "step": 1792 + }, + { + "epoch": 0.47616518390651974, + "grad_norm": 1.1097316301591598, + "learning_rate": 1.8131263199172783e-05, + "loss": 0.3505420386791229, + "step": 1793 + }, + { + "epoch": 0.4764307528880627, + "grad_norm": 1.1021438648339534, + "learning_rate": 1.8128706606803823e-05, + "loss": 0.3291688859462738, + "step": 1794 + }, + { + "epoch": 0.47669632186960564, + "grad_norm": 1.0814065231113215, + "learning_rate": 1.8126148447316104e-05, + "loss": 0.34079697728157043, + "step": 1795 + }, + { + "epoch": 0.4769618908511486, + "grad_norm": 1.2185578909639558, + "learning_rate": 1.8123588721202802e-05, + "loss": 0.2898064851760864, + "step": 1796 + }, + { + "epoch": 0.47722745983269155, + "grad_norm": 1.0448194415877836, + "learning_rate": 1.8121027428957402e-05, + "loss": 0.32089224457740784, + "step": 1797 + }, + { + "epoch": 0.4774930288142345, + "grad_norm": 1.903396083379018, + "learning_rate": 1.8118464571073697e-05, + "loss": 0.3402039408683777, + "step": 1798 + }, + { + "epoch": 0.47775859779577745, + "grad_norm": 1.1693256768707747, + "learning_rate": 1.8115900148045767e-05, + "loss": 0.29904159903526306, + "step": 1799 + }, + { + "epoch": 0.4780241667773204, + "grad_norm": 1.0688058843932313, + "learning_rate": 1.8113334160368007e-05, + "loss": 0.34074240922927856, + "step": 1800 + }, + { + "epoch": 0.47828973575886335, + "grad_norm": 1.0404364284009804, + "learning_rate": 1.811076660853511e-05, + "loss": 0.28566253185272217, + "step": 1801 + }, + { + "epoch": 0.4785553047404063, + "grad_norm": 1.0267154270839738, + "learning_rate": 1.8108197493042065e-05, + "loss": 0.34523358941078186, + "step": 1802 + }, + { + "epoch": 0.47882087372194926, + "grad_norm": 1.0082361251695107, + "learning_rate": 1.8105626814384173e-05, + "loss": 0.3261171281337738, + "step": 1803 + }, + { + "epoch": 0.4790864427034922, + "grad_norm": 1.0353580811121572, + "learning_rate": 1.8103054573057027e-05, + "loss": 0.2915942966938019, + "step": 1804 + }, + { + "epoch": 0.47935201168503516, + "grad_norm": 1.117140176261941, + "learning_rate": 1.810048076955653e-05, + "loss": 0.2999255657196045, + "step": 1805 + }, + { + "epoch": 0.47961758066657817, + "grad_norm": 1.0967176640726466, + "learning_rate": 1.8097905404378874e-05, + "loss": 0.3294594883918762, + "step": 1806 + }, + { + "epoch": 0.4798831496481211, + "grad_norm": 1.025641731681811, + "learning_rate": 1.8095328478020563e-05, + "loss": 0.30720093846321106, + "step": 1807 + }, + { + "epoch": 0.4801487186296641, + "grad_norm": 1.0583824100775536, + "learning_rate": 1.8092749990978395e-05, + "loss": 0.31076985597610474, + "step": 1808 + }, + { + "epoch": 0.480414287611207, + "grad_norm": 1.0650372083327142, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.3182013928890228, + "step": 1809 + }, + { + "epoch": 0.48067985659275, + "grad_norm": 1.1560421045272382, + "learning_rate": 1.8087588336831206e-05, + "loss": 0.325716108083725, + "step": 1810 + }, + { + "epoch": 0.48094542557429293, + "grad_norm": 1.034822212222003, + "learning_rate": 1.8085005170721287e-05, + "loss": 0.3148769736289978, + "step": 1811 + }, + { + "epoch": 0.4812109945558359, + "grad_norm": 0.9998987744353804, + "learning_rate": 1.8082420445917727e-05, + "loss": 0.30645644664764404, + "step": 1812 + }, + { + "epoch": 0.48147656353737883, + "grad_norm": 0.9765412034449941, + "learning_rate": 1.807983416291883e-05, + "loss": 0.2978900969028473, + "step": 1813 + }, + { + "epoch": 0.4817421325189218, + "grad_norm": 1.1281577444413164, + "learning_rate": 1.8077246322223194e-05, + "loss": 0.34340181946754456, + "step": 1814 + }, + { + "epoch": 0.48200770150046474, + "grad_norm": 1.0940690010095575, + "learning_rate": 1.8074656924329733e-05, + "loss": 0.3272106349468231, + "step": 1815 + }, + { + "epoch": 0.4822732704820077, + "grad_norm": 1.0823130111098402, + "learning_rate": 1.807206596973765e-05, + "loss": 0.31061962246894836, + "step": 1816 + }, + { + "epoch": 0.48253883946355064, + "grad_norm": 1.1134329507970786, + "learning_rate": 1.8069473458946445e-05, + "loss": 0.28947243094444275, + "step": 1817 + }, + { + "epoch": 0.4828044084450936, + "grad_norm": 1.066867737773279, + "learning_rate": 1.8066879392455932e-05, + "loss": 0.35057532787323, + "step": 1818 + }, + { + "epoch": 0.48306997742663654, + "grad_norm": 1.5202577425125505, + "learning_rate": 1.8064283770766212e-05, + "loss": 0.31032001972198486, + "step": 1819 + }, + { + "epoch": 0.48333554640817955, + "grad_norm": 1.1166414917810035, + "learning_rate": 1.8061686594377685e-05, + "loss": 0.3802293539047241, + "step": 1820 + }, + { + "epoch": 0.4836011153897225, + "grad_norm": 1.122052528401037, + "learning_rate": 1.8059087863791066e-05, + "loss": 0.3306402564048767, + "step": 1821 + }, + { + "epoch": 0.48386668437126545, + "grad_norm": 1.051177925612534, + "learning_rate": 1.8056487579507352e-05, + "loss": 0.32170724868774414, + "step": 1822 + }, + { + "epoch": 0.4841322533528084, + "grad_norm": 1.0182895505748566, + "learning_rate": 1.8053885742027854e-05, + "loss": 0.35058924555778503, + "step": 1823 + }, + { + "epoch": 0.48439782233435136, + "grad_norm": 1.079491665486815, + "learning_rate": 1.8051282351854168e-05, + "loss": 0.3796595335006714, + "step": 1824 + }, + { + "epoch": 0.4846633913158943, + "grad_norm": 1.0882057457557335, + "learning_rate": 1.8048677409488205e-05, + "loss": 0.28997284173965454, + "step": 1825 + }, + { + "epoch": 0.48492896029743726, + "grad_norm": 1.7307038017833063, + "learning_rate": 1.804607091543216e-05, + "loss": 0.35110151767730713, + "step": 1826 + }, + { + "epoch": 0.4851945292789802, + "grad_norm": 1.1036882170711018, + "learning_rate": 1.8043462870188535e-05, + "loss": 0.3194088637828827, + "step": 1827 + }, + { + "epoch": 0.48546009826052317, + "grad_norm": 1.0664676604065728, + "learning_rate": 1.8040853274260137e-05, + "loss": 0.28777945041656494, + "step": 1828 + }, + { + "epoch": 0.4857256672420661, + "grad_norm": 1.0702584286398438, + "learning_rate": 1.803824212815006e-05, + "loss": 0.3642069697380066, + "step": 1829 + }, + { + "epoch": 0.48599123622360907, + "grad_norm": 1.0626897024145745, + "learning_rate": 1.80356294323617e-05, + "loss": 0.32396575808525085, + "step": 1830 + }, + { + "epoch": 0.486256805205152, + "grad_norm": 1.205959051296984, + "learning_rate": 1.8033015187398758e-05, + "loss": 0.36421436071395874, + "step": 1831 + }, + { + "epoch": 0.486522374186695, + "grad_norm": 1.0011906322370974, + "learning_rate": 1.8030399393765227e-05, + "loss": 0.3170832395553589, + "step": 1832 + }, + { + "epoch": 0.4867879431682379, + "grad_norm": 0.9739220394650455, + "learning_rate": 1.8027782051965408e-05, + "loss": 0.3003416359424591, + "step": 1833 + }, + { + "epoch": 0.48705351214978093, + "grad_norm": 1.0701369618567955, + "learning_rate": 1.802516316250388e-05, + "loss": 0.30362898111343384, + "step": 1834 + }, + { + "epoch": 0.4873190811313239, + "grad_norm": 1.0466563888798912, + "learning_rate": 1.802254272588555e-05, + "loss": 0.32721444964408875, + "step": 1835 + }, + { + "epoch": 0.48758465011286684, + "grad_norm": 1.345049864677536, + "learning_rate": 1.8019920742615596e-05, + "loss": 0.317483514547348, + "step": 1836 + }, + { + "epoch": 0.4878502190944098, + "grad_norm": 1.0589953518283157, + "learning_rate": 1.801729721319951e-05, + "loss": 0.2928479015827179, + "step": 1837 + }, + { + "epoch": 0.48811578807595274, + "grad_norm": 1.1098495840377043, + "learning_rate": 1.8014672138143073e-05, + "loss": 0.3425772190093994, + "step": 1838 + }, + { + "epoch": 0.4883813570574957, + "grad_norm": 1.0286414092040284, + "learning_rate": 1.801204551795238e-05, + "loss": 0.334087997674942, + "step": 1839 + }, + { + "epoch": 0.48864692603903864, + "grad_norm": 1.0797374159140127, + "learning_rate": 1.80094173531338e-05, + "loss": 0.3186641335487366, + "step": 1840 + }, + { + "epoch": 0.4889124950205816, + "grad_norm": 1.0361897985848911, + "learning_rate": 1.800678764419401e-05, + "loss": 0.3153733015060425, + "step": 1841 + }, + { + "epoch": 0.48917806400212455, + "grad_norm": 1.070217807683518, + "learning_rate": 1.8004156391640004e-05, + "loss": 0.3323214054107666, + "step": 1842 + }, + { + "epoch": 0.4894436329836675, + "grad_norm": 0.9455521865874897, + "learning_rate": 1.8001523595979043e-05, + "loss": 0.2856762409210205, + "step": 1843 + }, + { + "epoch": 0.48970920196521045, + "grad_norm": 1.0256135363684138, + "learning_rate": 1.79988892577187e-05, + "loss": 0.32493725419044495, + "step": 1844 + }, + { + "epoch": 0.4899747709467534, + "grad_norm": 1.1082860888483268, + "learning_rate": 1.7996253377366846e-05, + "loss": 0.350448876619339, + "step": 1845 + }, + { + "epoch": 0.49024033992829635, + "grad_norm": 1.096249407467401, + "learning_rate": 1.7993615955431648e-05, + "loss": 0.32246965169906616, + "step": 1846 + }, + { + "epoch": 0.4905059089098393, + "grad_norm": 0.9715072313794847, + "learning_rate": 1.799097699242157e-05, + "loss": 0.302636057138443, + "step": 1847 + }, + { + "epoch": 0.4907714778913823, + "grad_norm": 1.1573319310132777, + "learning_rate": 1.7988336488845374e-05, + "loss": 0.34280693531036377, + "step": 1848 + }, + { + "epoch": 0.49103704687292526, + "grad_norm": 1.1205814585182334, + "learning_rate": 1.7985694445212118e-05, + "loss": 0.3650673031806946, + "step": 1849 + }, + { + "epoch": 0.4913026158544682, + "grad_norm": 1.1348057531260405, + "learning_rate": 1.798305086203115e-05, + "loss": 0.33800822496414185, + "step": 1850 + }, + { + "epoch": 0.49156818483601117, + "grad_norm": 1.0428655272942455, + "learning_rate": 1.7980405739812134e-05, + "loss": 0.31522083282470703, + "step": 1851 + }, + { + "epoch": 0.4918337538175541, + "grad_norm": 1.177464907100392, + "learning_rate": 1.7977759079065003e-05, + "loss": 0.3374335765838623, + "step": 1852 + }, + { + "epoch": 0.49209932279909707, + "grad_norm": 1.060278247692231, + "learning_rate": 1.7975110880300018e-05, + "loss": 0.33803191781044006, + "step": 1853 + }, + { + "epoch": 0.49236489178064, + "grad_norm": 1.0982376140773644, + "learning_rate": 1.797246114402771e-05, + "loss": 0.37764933705329895, + "step": 1854 + }, + { + "epoch": 0.492630460762183, + "grad_norm": 0.9654297547716862, + "learning_rate": 1.796980987075892e-05, + "loss": 0.3075840473175049, + "step": 1855 + }, + { + "epoch": 0.4928960297437259, + "grad_norm": 0.9768928030686648, + "learning_rate": 1.7967157061004782e-05, + "loss": 0.306305855512619, + "step": 1856 + }, + { + "epoch": 0.4931615987252689, + "grad_norm": 1.0225684543938522, + "learning_rate": 1.796450271527673e-05, + "loss": 0.3474302291870117, + "step": 1857 + }, + { + "epoch": 0.49342716770681183, + "grad_norm": 1.0243106870487633, + "learning_rate": 1.7961846834086483e-05, + "loss": 0.31059685349464417, + "step": 1858 + }, + { + "epoch": 0.4936927366883548, + "grad_norm": 1.0236396527349367, + "learning_rate": 1.795918941794607e-05, + "loss": 0.346218079328537, + "step": 1859 + }, + { + "epoch": 0.49395830566989773, + "grad_norm": 0.9969229384493907, + "learning_rate": 1.7956530467367805e-05, + "loss": 0.28371214866638184, + "step": 1860 + }, + { + "epoch": 0.4942238746514407, + "grad_norm": 0.8979156608776232, + "learning_rate": 1.7953869982864306e-05, + "loss": 0.27775150537490845, + "step": 1861 + }, + { + "epoch": 0.4944894436329837, + "grad_norm": 1.279703247293047, + "learning_rate": 1.795120796494848e-05, + "loss": 0.328782856464386, + "step": 1862 + }, + { + "epoch": 0.49475501261452665, + "grad_norm": 1.0950381369417217, + "learning_rate": 1.7948544414133534e-05, + "loss": 0.33220064640045166, + "step": 1863 + }, + { + "epoch": 0.4950205815960696, + "grad_norm": 1.0528449584388764, + "learning_rate": 1.794587933093297e-05, + "loss": 0.32681554555892944, + "step": 1864 + }, + { + "epoch": 0.49528615057761255, + "grad_norm": 1.1023465974826758, + "learning_rate": 1.7943212715860586e-05, + "loss": 0.32202866673469543, + "step": 1865 + }, + { + "epoch": 0.4955517195591555, + "grad_norm": 2.266456857585339, + "learning_rate": 1.7940544569430468e-05, + "loss": 0.3051350712776184, + "step": 1866 + }, + { + "epoch": 0.49581728854069845, + "grad_norm": 1.1617568134775966, + "learning_rate": 1.793787489215701e-05, + "loss": 0.3924705386161804, + "step": 1867 + }, + { + "epoch": 0.4960828575222414, + "grad_norm": 1.018817969430421, + "learning_rate": 1.793520368455489e-05, + "loss": 0.30267882347106934, + "step": 1868 + }, + { + "epoch": 0.49634842650378436, + "grad_norm": 1.0585020042998596, + "learning_rate": 1.793253094713909e-05, + "loss": 0.3150729238986969, + "step": 1869 + }, + { + "epoch": 0.4966139954853273, + "grad_norm": 1.314679145900761, + "learning_rate": 1.7929856680424872e-05, + "loss": 0.33814147114753723, + "step": 1870 + }, + { + "epoch": 0.49687956446687026, + "grad_norm": 1.010460021909887, + "learning_rate": 1.7927180884927814e-05, + "loss": 0.31929856538772583, + "step": 1871 + }, + { + "epoch": 0.4971451334484132, + "grad_norm": 1.1376790681693039, + "learning_rate": 1.7924503561163775e-05, + "loss": 0.3797461688518524, + "step": 1872 + }, + { + "epoch": 0.49741070242995616, + "grad_norm": 1.057594588942085, + "learning_rate": 1.792182470964891e-05, + "loss": 0.3056377172470093, + "step": 1873 + }, + { + "epoch": 0.4976762714114991, + "grad_norm": 1.1254473942016883, + "learning_rate": 1.7919144330899668e-05, + "loss": 0.3526398539543152, + "step": 1874 + }, + { + "epoch": 0.49794184039304207, + "grad_norm": 1.0289140670533532, + "learning_rate": 1.79164624254328e-05, + "loss": 0.3183595538139343, + "step": 1875 + }, + { + "epoch": 0.4982074093745851, + "grad_norm": 1.1908370019011798, + "learning_rate": 1.791377899376534e-05, + "loss": 0.3604113459587097, + "step": 1876 + }, + { + "epoch": 0.498472978356128, + "grad_norm": 1.1651856770093412, + "learning_rate": 1.7911094036414623e-05, + "loss": 0.3219848573207855, + "step": 1877 + }, + { + "epoch": 0.498738547337671, + "grad_norm": 1.0586801467718077, + "learning_rate": 1.7908407553898282e-05, + "loss": 0.28773394227027893, + "step": 1878 + }, + { + "epoch": 0.49900411631921393, + "grad_norm": 1.0649509880321448, + "learning_rate": 1.7905719546734233e-05, + "loss": 0.31453996896743774, + "step": 1879 + }, + { + "epoch": 0.4992696853007569, + "grad_norm": 0.9878415524405192, + "learning_rate": 1.7903030015440696e-05, + "loss": 0.2947153151035309, + "step": 1880 + }, + { + "epoch": 0.49953525428229983, + "grad_norm": 1.0652111521233423, + "learning_rate": 1.7900338960536178e-05, + "loss": 0.313723087310791, + "step": 1881 + }, + { + "epoch": 0.4998008232638428, + "grad_norm": 1.0853994840945123, + "learning_rate": 1.7897646382539485e-05, + "loss": 0.3385108709335327, + "step": 1882 + }, + { + "epoch": 0.5000663922453857, + "grad_norm": 1.0993457819479324, + "learning_rate": 1.7894952281969712e-05, + "loss": 0.31417039036750793, + "step": 1883 + }, + { + "epoch": 0.5003319612269287, + "grad_norm": 1.1452192213941934, + "learning_rate": 1.7892256659346253e-05, + "loss": 0.3555717468261719, + "step": 1884 + }, + { + "epoch": 0.5005975302084716, + "grad_norm": 1.1989261836629121, + "learning_rate": 1.7889559515188793e-05, + "loss": 0.3724518120288849, + "step": 1885 + }, + { + "epoch": 0.5008630991900146, + "grad_norm": 1.0516015708006068, + "learning_rate": 1.7886860850017306e-05, + "loss": 0.32646167278289795, + "step": 1886 + }, + { + "epoch": 0.5011286681715575, + "grad_norm": 1.079300223054909, + "learning_rate": 1.7884160664352062e-05, + "loss": 0.31072959303855896, + "step": 1887 + }, + { + "epoch": 0.5013942371531005, + "grad_norm": 0.9518526173941219, + "learning_rate": 1.7881458958713628e-05, + "loss": 0.26987242698669434, + "step": 1888 + }, + { + "epoch": 0.5016598061346434, + "grad_norm": 0.9908294117764815, + "learning_rate": 1.787875573362286e-05, + "loss": 0.30105817317962646, + "step": 1889 + }, + { + "epoch": 0.5019253751161864, + "grad_norm": 1.0444226583374554, + "learning_rate": 1.7876050989600908e-05, + "loss": 0.31277188658714294, + "step": 1890 + }, + { + "epoch": 0.5021909440977294, + "grad_norm": 1.0192470233304842, + "learning_rate": 1.7873344727169214e-05, + "loss": 0.31068161129951477, + "step": 1891 + }, + { + "epoch": 0.5024565130792723, + "grad_norm": 1.0797105219167356, + "learning_rate": 1.7870636946849512e-05, + "loss": 0.3491121530532837, + "step": 1892 + }, + { + "epoch": 0.5027220820608153, + "grad_norm": 1.0753654491775293, + "learning_rate": 1.7867927649163838e-05, + "loss": 0.3223581612110138, + "step": 1893 + }, + { + "epoch": 0.5029876510423582, + "grad_norm": 1.1295999155195493, + "learning_rate": 1.7865216834634506e-05, + "loss": 0.345224529504776, + "step": 1894 + }, + { + "epoch": 0.5032532200239012, + "grad_norm": 1.1419032071310418, + "learning_rate": 1.7862504503784123e-05, + "loss": 0.3408205211162567, + "step": 1895 + }, + { + "epoch": 0.5035187890054441, + "grad_norm": 0.9713066472066385, + "learning_rate": 1.7859790657135608e-05, + "loss": 0.2680068016052246, + "step": 1896 + }, + { + "epoch": 0.5037843579869872, + "grad_norm": 0.9186813995364894, + "learning_rate": 1.7857075295212148e-05, + "loss": 0.29733535647392273, + "step": 1897 + }, + { + "epoch": 0.5040499269685301, + "grad_norm": 1.1196248802118025, + "learning_rate": 1.785435841853724e-05, + "loss": 0.34820133447647095, + "step": 1898 + }, + { + "epoch": 0.5043154959500731, + "grad_norm": 1.134445876132798, + "learning_rate": 1.785164002763466e-05, + "loss": 0.3306594491004944, + "step": 1899 + }, + { + "epoch": 0.504581064931616, + "grad_norm": 1.0579272410020724, + "learning_rate": 1.7848920123028482e-05, + "loss": 0.3166846036911011, + "step": 1900 + }, + { + "epoch": 0.504846633913159, + "grad_norm": 1.2213509498849395, + "learning_rate": 1.784619870524308e-05, + "loss": 0.3406408727169037, + "step": 1901 + }, + { + "epoch": 0.5051122028947019, + "grad_norm": 1.0410168562106317, + "learning_rate": 1.78434757748031e-05, + "loss": 0.36358171701431274, + "step": 1902 + }, + { + "epoch": 0.5053777718762449, + "grad_norm": 1.0510382236040618, + "learning_rate": 1.7840751332233498e-05, + "loss": 0.34045761823654175, + "step": 1903 + }, + { + "epoch": 0.5056433408577878, + "grad_norm": 1.0566120463915532, + "learning_rate": 1.783802537805951e-05, + "loss": 0.3442475199699402, + "step": 1904 + }, + { + "epoch": 0.5059089098393308, + "grad_norm": 1.1632822330113848, + "learning_rate": 1.7835297912806675e-05, + "loss": 0.3488585650920868, + "step": 1905 + }, + { + "epoch": 0.5061744788208737, + "grad_norm": 1.098650773563784, + "learning_rate": 1.7832568937000808e-05, + "loss": 0.3340107500553131, + "step": 1906 + }, + { + "epoch": 0.5064400478024167, + "grad_norm": 1.0195614065654457, + "learning_rate": 1.7829838451168027e-05, + "loss": 0.3206177353858948, + "step": 1907 + }, + { + "epoch": 0.5067056167839596, + "grad_norm": 1.0219563874782234, + "learning_rate": 1.782710645583473e-05, + "loss": 0.2851010262966156, + "step": 1908 + }, + { + "epoch": 0.5069711857655026, + "grad_norm": 1.0249326570563306, + "learning_rate": 1.782437295152763e-05, + "loss": 0.31850844621658325, + "step": 1909 + }, + { + "epoch": 0.5072367547470455, + "grad_norm": 1.0890541355083159, + "learning_rate": 1.7821637938773704e-05, + "loss": 0.3343108892440796, + "step": 1910 + }, + { + "epoch": 0.5075023237285885, + "grad_norm": 1.1131994842325255, + "learning_rate": 1.781890141810023e-05, + "loss": 0.3423745930194855, + "step": 1911 + }, + { + "epoch": 0.5077678927101315, + "grad_norm": 1.057536319451762, + "learning_rate": 1.7816163390034775e-05, + "loss": 0.30980780720710754, + "step": 1912 + }, + { + "epoch": 0.5080334616916744, + "grad_norm": 1.0099692843485935, + "learning_rate": 1.7813423855105203e-05, + "loss": 0.31217479705810547, + "step": 1913 + }, + { + "epoch": 0.5082990306732174, + "grad_norm": 1.0721675523916532, + "learning_rate": 1.7810682813839664e-05, + "loss": 0.34741947054862976, + "step": 1914 + }, + { + "epoch": 0.5085645996547603, + "grad_norm": 1.1098427332228447, + "learning_rate": 1.7807940266766595e-05, + "loss": 0.32275527715682983, + "step": 1915 + }, + { + "epoch": 0.5088301686363033, + "grad_norm": 1.1130434711054393, + "learning_rate": 1.7805196214414728e-05, + "loss": 0.32760411500930786, + "step": 1916 + }, + { + "epoch": 0.5090957376178462, + "grad_norm": 1.1445787919507704, + "learning_rate": 1.7802450657313086e-05, + "loss": 0.3877720832824707, + "step": 1917 + }, + { + "epoch": 0.5093613065993892, + "grad_norm": 1.1135916509560913, + "learning_rate": 1.779970359599098e-05, + "loss": 0.33458876609802246, + "step": 1918 + }, + { + "epoch": 0.5096268755809321, + "grad_norm": 0.9826034605244246, + "learning_rate": 1.7796955030978007e-05, + "loss": 0.30603206157684326, + "step": 1919 + }, + { + "epoch": 0.5098924445624751, + "grad_norm": 0.9902684589377142, + "learning_rate": 1.7794204962804063e-05, + "loss": 0.2920286953449249, + "step": 1920 + }, + { + "epoch": 0.510158013544018, + "grad_norm": 1.1034173597508874, + "learning_rate": 1.7791453391999325e-05, + "loss": 0.32407981157302856, + "step": 1921 + }, + { + "epoch": 0.510423582525561, + "grad_norm": 1.3200648964540613, + "learning_rate": 1.7788700319094263e-05, + "loss": 0.30423563718795776, + "step": 1922 + }, + { + "epoch": 0.5106891515071039, + "grad_norm": 1.1213502448496324, + "learning_rate": 1.7785945744619642e-05, + "loss": 0.34691399335861206, + "step": 1923 + }, + { + "epoch": 0.5109547204886469, + "grad_norm": 1.0498801582672959, + "learning_rate": 1.7783189669106503e-05, + "loss": 0.3217603266239166, + "step": 1924 + }, + { + "epoch": 0.5112202894701899, + "grad_norm": 1.1943957961346587, + "learning_rate": 1.7780432093086198e-05, + "loss": 0.365132212638855, + "step": 1925 + }, + { + "epoch": 0.5114858584517329, + "grad_norm": 0.9783494867108459, + "learning_rate": 1.7777673017090344e-05, + "loss": 0.29662930965423584, + "step": 1926 + }, + { + "epoch": 0.5117514274332758, + "grad_norm": 1.0707541061431447, + "learning_rate": 1.7774912441650857e-05, + "loss": 0.3324819803237915, + "step": 1927 + }, + { + "epoch": 0.5120169964148188, + "grad_norm": 1.0040789031204058, + "learning_rate": 1.7772150367299953e-05, + "loss": 0.29331067204475403, + "step": 1928 + }, + { + "epoch": 0.5122825653963617, + "grad_norm": 1.064062495235822, + "learning_rate": 1.7769386794570117e-05, + "loss": 0.3158259987831116, + "step": 1929 + }, + { + "epoch": 0.5125481343779047, + "grad_norm": 1.020159871349018, + "learning_rate": 1.7766621723994145e-05, + "loss": 0.2824791967868805, + "step": 1930 + }, + { + "epoch": 0.5128137033594476, + "grad_norm": 1.0493215169042918, + "learning_rate": 1.7763855156105097e-05, + "loss": 0.2690732777118683, + "step": 1931 + }, + { + "epoch": 0.5130792723409906, + "grad_norm": 1.043157004637876, + "learning_rate": 1.7761087091436346e-05, + "loss": 0.31360942125320435, + "step": 1932 + }, + { + "epoch": 0.5133448413225336, + "grad_norm": 0.9858891902519169, + "learning_rate": 1.7758317530521535e-05, + "loss": 0.28334349393844604, + "step": 1933 + }, + { + "epoch": 0.5136104103040765, + "grad_norm": 1.1739380172138798, + "learning_rate": 1.7755546473894604e-05, + "loss": 0.3857404589653015, + "step": 1934 + }, + { + "epoch": 0.5138759792856195, + "grad_norm": 1.0280582546011092, + "learning_rate": 1.7752773922089784e-05, + "loss": 0.2852492332458496, + "step": 1935 + }, + { + "epoch": 0.5141415482671624, + "grad_norm": 1.003050995152578, + "learning_rate": 1.7749999875641585e-05, + "loss": 0.2959831953048706, + "step": 1936 + }, + { + "epoch": 0.5144071172487054, + "grad_norm": 1.100974201889633, + "learning_rate": 1.7747224335084815e-05, + "loss": 0.3129635453224182, + "step": 1937 + }, + { + "epoch": 0.5146726862302483, + "grad_norm": 1.0336946735940622, + "learning_rate": 1.774444730095456e-05, + "loss": 0.31391531229019165, + "step": 1938 + }, + { + "epoch": 0.5149382552117913, + "grad_norm": 1.0155253897885985, + "learning_rate": 1.7741668773786202e-05, + "loss": 0.30274757742881775, + "step": 1939 + }, + { + "epoch": 0.5152038241933342, + "grad_norm": 1.026561688701391, + "learning_rate": 1.7738888754115413e-05, + "loss": 0.29162222146987915, + "step": 1940 + }, + { + "epoch": 0.5154693931748772, + "grad_norm": 1.045931473256506, + "learning_rate": 1.7736107242478143e-05, + "loss": 0.30358970165252686, + "step": 1941 + }, + { + "epoch": 0.5157349621564201, + "grad_norm": 1.11915386227621, + "learning_rate": 1.7733324239410634e-05, + "loss": 0.32268065214157104, + "step": 1942 + }, + { + "epoch": 0.5160005311379631, + "grad_norm": 1.0626040245012975, + "learning_rate": 1.7730539745449417e-05, + "loss": 0.31925222277641296, + "step": 1943 + }, + { + "epoch": 0.516266100119506, + "grad_norm": 1.1170224886553113, + "learning_rate": 1.7727753761131312e-05, + "loss": 0.32883748412132263, + "step": 1944 + }, + { + "epoch": 0.516531669101049, + "grad_norm": 1.101510406621582, + "learning_rate": 1.7724966286993425e-05, + "loss": 0.3212829530239105, + "step": 1945 + }, + { + "epoch": 0.5167972380825919, + "grad_norm": 1.1477333753851342, + "learning_rate": 1.772217732357314e-05, + "loss": 0.32909759879112244, + "step": 1946 + }, + { + "epoch": 0.5170628070641349, + "grad_norm": 33.3722959000957, + "learning_rate": 1.7719386871408147e-05, + "loss": 0.3451213538646698, + "step": 1947 + }, + { + "epoch": 0.5173283760456778, + "grad_norm": 1.0792459943819739, + "learning_rate": 1.7716594931036402e-05, + "loss": 0.318422794342041, + "step": 1948 + }, + { + "epoch": 0.5175939450272208, + "grad_norm": 1.1243494025490273, + "learning_rate": 1.7713801502996166e-05, + "loss": 0.3165292739868164, + "step": 1949 + }, + { + "epoch": 0.5178595140087637, + "grad_norm": 1.1353818628503742, + "learning_rate": 1.7711006587825975e-05, + "loss": 0.3116700351238251, + "step": 1950 + }, + { + "epoch": 0.5181250829903067, + "grad_norm": 1.2005138291757869, + "learning_rate": 1.7708210186064656e-05, + "loss": 0.32102686166763306, + "step": 1951 + }, + { + "epoch": 0.5183906519718496, + "grad_norm": 1.079523368082095, + "learning_rate": 1.7705412298251323e-05, + "loss": 0.33025500178337097, + "step": 1952 + }, + { + "epoch": 0.5186562209533926, + "grad_norm": 1.2087703844513067, + "learning_rate": 1.7702612924925377e-05, + "loss": 0.36113062500953674, + "step": 1953 + }, + { + "epoch": 0.5189217899349357, + "grad_norm": 1.1242566727618883, + "learning_rate": 1.7699812066626503e-05, + "loss": 0.3092479109764099, + "step": 1954 + }, + { + "epoch": 0.5191873589164786, + "grad_norm": 1.117146005158035, + "learning_rate": 1.769700972389467e-05, + "loss": 0.3389117419719696, + "step": 1955 + }, + { + "epoch": 0.5194529278980216, + "grad_norm": 1.1525168535902064, + "learning_rate": 1.7694205897270147e-05, + "loss": 0.3225803077220917, + "step": 1956 + }, + { + "epoch": 0.5197184968795645, + "grad_norm": 1.0237361691251219, + "learning_rate": 1.7691400587293467e-05, + "loss": 0.3226786255836487, + "step": 1957 + }, + { + "epoch": 0.5199840658611075, + "grad_norm": 1.0060672564491426, + "learning_rate": 1.7688593794505466e-05, + "loss": 0.27708399295806885, + "step": 1958 + }, + { + "epoch": 0.5202496348426504, + "grad_norm": 1.0763214880079806, + "learning_rate": 1.768578551944726e-05, + "loss": 0.36100950837135315, + "step": 1959 + }, + { + "epoch": 0.5205152038241934, + "grad_norm": 1.043549985204807, + "learning_rate": 1.768297576266025e-05, + "loss": 0.3138211965560913, + "step": 1960 + }, + { + "epoch": 0.5207807728057363, + "grad_norm": 1.0618046264640966, + "learning_rate": 1.7680164524686128e-05, + "loss": 0.33959656953811646, + "step": 1961 + }, + { + "epoch": 0.5210463417872793, + "grad_norm": 0.9826913420332539, + "learning_rate": 1.7677351806066863e-05, + "loss": 0.3093605637550354, + "step": 1962 + }, + { + "epoch": 0.5213119107688222, + "grad_norm": 1.13307401094871, + "learning_rate": 1.7674537607344717e-05, + "loss": 0.3098641633987427, + "step": 1963 + }, + { + "epoch": 0.5215774797503652, + "grad_norm": 1.0810255128706003, + "learning_rate": 1.767172192906223e-05, + "loss": 0.35172683000564575, + "step": 1964 + }, + { + "epoch": 0.5218430487319081, + "grad_norm": 1.0729896509671073, + "learning_rate": 1.7668904771762242e-05, + "loss": 0.3535798192024231, + "step": 1965 + }, + { + "epoch": 0.5221086177134511, + "grad_norm": 1.2521081937006913, + "learning_rate": 1.766608613598785e-05, + "loss": 0.36183854937553406, + "step": 1966 + }, + { + "epoch": 0.522374186694994, + "grad_norm": 1.0735439944400962, + "learning_rate": 1.7663266022282473e-05, + "loss": 0.35995131731033325, + "step": 1967 + }, + { + "epoch": 0.522639755676537, + "grad_norm": 1.117054454049305, + "learning_rate": 1.766044443118978e-05, + "loss": 0.38672733306884766, + "step": 1968 + }, + { + "epoch": 0.5229053246580799, + "grad_norm": 1.0862044019422723, + "learning_rate": 1.765762136325375e-05, + "loss": 0.3389524221420288, + "step": 1969 + }, + { + "epoch": 0.5231708936396229, + "grad_norm": 0.9847521483407152, + "learning_rate": 1.7654796819018635e-05, + "loss": 0.3325779139995575, + "step": 1970 + }, + { + "epoch": 0.5234364626211658, + "grad_norm": 1.014607581135561, + "learning_rate": 1.7651970799028976e-05, + "loss": 0.328407347202301, + "step": 1971 + }, + { + "epoch": 0.5237020316027088, + "grad_norm": 0.9793310107257689, + "learning_rate": 1.764914330382959e-05, + "loss": 0.3050537705421448, + "step": 1972 + }, + { + "epoch": 0.5239676005842517, + "grad_norm": 1.1408686145630131, + "learning_rate": 1.7646314333965588e-05, + "loss": 0.35500285029411316, + "step": 1973 + }, + { + "epoch": 0.5242331695657947, + "grad_norm": 1.1035893819341516, + "learning_rate": 1.7643483889982364e-05, + "loss": 0.30319780111312866, + "step": 1974 + }, + { + "epoch": 0.5244987385473376, + "grad_norm": 1.0161223434375823, + "learning_rate": 1.7640651972425592e-05, + "loss": 0.315757691860199, + "step": 1975 + }, + { + "epoch": 0.5247643075288806, + "grad_norm": 1.0278713767432786, + "learning_rate": 1.7637818581841234e-05, + "loss": 0.28562331199645996, + "step": 1976 + }, + { + "epoch": 0.5250298765104235, + "grad_norm": 1.017204404946826, + "learning_rate": 1.763498371877553e-05, + "loss": 0.29798296093940735, + "step": 1977 + }, + { + "epoch": 0.5252954454919665, + "grad_norm": 1.1245986087835715, + "learning_rate": 1.763214738377501e-05, + "loss": 0.2923639416694641, + "step": 1978 + }, + { + "epoch": 0.5255610144735094, + "grad_norm": 1.0282257211254215, + "learning_rate": 1.7629309577386492e-05, + "loss": 0.2858009934425354, + "step": 1979 + }, + { + "epoch": 0.5258265834550524, + "grad_norm": 1.1185725636940211, + "learning_rate": 1.7626470300157064e-05, + "loss": 0.3615952134132385, + "step": 1980 + }, + { + "epoch": 0.5260921524365954, + "grad_norm": 1.1357118701340632, + "learning_rate": 1.762362955263411e-05, + "loss": 0.36142098903656006, + "step": 1981 + }, + { + "epoch": 0.5263577214181384, + "grad_norm": 1.1305105783283786, + "learning_rate": 1.762078733536529e-05, + "loss": 0.3335961699485779, + "step": 1982 + }, + { + "epoch": 0.5266232903996814, + "grad_norm": 1.2367655641806865, + "learning_rate": 1.761794364889855e-05, + "loss": 0.34549272060394287, + "step": 1983 + }, + { + "epoch": 0.5268888593812243, + "grad_norm": 1.1166612317693478, + "learning_rate": 1.761509849378212e-05, + "loss": 0.3177812993526459, + "step": 1984 + }, + { + "epoch": 0.5271544283627673, + "grad_norm": 1.1485560676920734, + "learning_rate": 1.7612251870564515e-05, + "loss": 0.33191388845443726, + "step": 1985 + }, + { + "epoch": 0.5274199973443102, + "grad_norm": 1.0807821541967428, + "learning_rate": 1.7609403779794523e-05, + "loss": 0.30732038617134094, + "step": 1986 + }, + { + "epoch": 0.5276855663258532, + "grad_norm": 1.1038043700347457, + "learning_rate": 1.7606554222021226e-05, + "loss": 0.33012068271636963, + "step": 1987 + }, + { + "epoch": 0.5279511353073961, + "grad_norm": 1.2233212729045404, + "learning_rate": 1.760370319779399e-05, + "loss": 0.3396066427230835, + "step": 1988 + }, + { + "epoch": 0.5282167042889391, + "grad_norm": 1.0755028443639627, + "learning_rate": 1.7600850707662454e-05, + "loss": 0.29053401947021484, + "step": 1989 + }, + { + "epoch": 0.528482273270482, + "grad_norm": 1.0859289781343007, + "learning_rate": 1.7597996752176545e-05, + "loss": 0.32927206158638, + "step": 1990 + }, + { + "epoch": 0.528747842252025, + "grad_norm": 1.0494460781018915, + "learning_rate": 1.759514133188647e-05, + "loss": 0.309224933385849, + "step": 1991 + }, + { + "epoch": 0.5290134112335679, + "grad_norm": 1.0870307368096292, + "learning_rate": 1.7592284447342725e-05, + "loss": 0.31973862648010254, + "step": 1992 + }, + { + "epoch": 0.5292789802151109, + "grad_norm": 1.0491029702582455, + "learning_rate": 1.758942609909608e-05, + "loss": 0.3331080377101898, + "step": 1993 + }, + { + "epoch": 0.5295445491966538, + "grad_norm": 1.0710245753206995, + "learning_rate": 1.7586566287697592e-05, + "loss": 0.32755160331726074, + "step": 1994 + }, + { + "epoch": 0.5298101181781968, + "grad_norm": 1.0377451052992368, + "learning_rate": 1.7583705013698602e-05, + "loss": 0.31942498683929443, + "step": 1995 + }, + { + "epoch": 0.5300756871597397, + "grad_norm": 1.1665695354682926, + "learning_rate": 1.7580842277650723e-05, + "loss": 0.3199199438095093, + "step": 1996 + }, + { + "epoch": 0.5303412561412827, + "grad_norm": 0.9680761404148592, + "learning_rate": 1.7577978080105864e-05, + "loss": 0.28153708577156067, + "step": 1997 + }, + { + "epoch": 0.5306068251228256, + "grad_norm": 1.0336529884327843, + "learning_rate": 1.7575112421616203e-05, + "loss": 0.3050921559333801, + "step": 1998 + }, + { + "epoch": 0.5308723941043686, + "grad_norm": 1.0836881519572394, + "learning_rate": 1.7572245302734208e-05, + "loss": 0.3242149353027344, + "step": 1999 + }, + { + "epoch": 0.5311379630859115, + "grad_norm": 0.9889139549595165, + "learning_rate": 1.7569376724012622e-05, + "loss": 0.29947227239608765, + "step": 2000 + }, + { + "epoch": 0.5314035320674545, + "grad_norm": 1.132976441688301, + "learning_rate": 1.756650668600448e-05, + "loss": 0.3229755163192749, + "step": 2001 + }, + { + "epoch": 0.5316691010489975, + "grad_norm": 1.0802391073518836, + "learning_rate": 1.7563635189263086e-05, + "loss": 0.3544544577598572, + "step": 2002 + }, + { + "epoch": 0.5319346700305404, + "grad_norm": 1.0996284853033707, + "learning_rate": 1.756076223434203e-05, + "loss": 0.32807621359825134, + "step": 2003 + }, + { + "epoch": 0.5322002390120834, + "grad_norm": 0.9920629294688551, + "learning_rate": 1.7557887821795192e-05, + "loss": 0.3057190477848053, + "step": 2004 + }, + { + "epoch": 0.5324658079936263, + "grad_norm": 1.0234244423063892, + "learning_rate": 1.7555011952176716e-05, + "loss": 0.29419198632240295, + "step": 2005 + }, + { + "epoch": 0.5327313769751693, + "grad_norm": 0.9799120327217228, + "learning_rate": 1.755213462604104e-05, + "loss": 0.3232089877128601, + "step": 2006 + }, + { + "epoch": 0.5329969459567122, + "grad_norm": 1.0186576745896931, + "learning_rate": 1.7549255843942875e-05, + "loss": 0.29784274101257324, + "step": 2007 + }, + { + "epoch": 0.5332625149382552, + "grad_norm": 1.0470325382276877, + "learning_rate": 1.7546375606437216e-05, + "loss": 0.31421899795532227, + "step": 2008 + }, + { + "epoch": 0.5335280839197981, + "grad_norm": 1.0641694414781755, + "learning_rate": 1.7543493914079345e-05, + "loss": 0.30681121349334717, + "step": 2009 + }, + { + "epoch": 0.5337936529013412, + "grad_norm": 1.0092085906510277, + "learning_rate": 1.7540610767424813e-05, + "loss": 0.3114027976989746, + "step": 2010 + }, + { + "epoch": 0.5340592218828841, + "grad_norm": 1.0064230726553411, + "learning_rate": 1.753772616702946e-05, + "loss": 0.3030378520488739, + "step": 2011 + }, + { + "epoch": 0.5343247908644271, + "grad_norm": 1.1096181297712675, + "learning_rate": 1.75348401134494e-05, + "loss": 0.30272024869918823, + "step": 2012 + }, + { + "epoch": 0.53459035984597, + "grad_norm": 1.049795668852804, + "learning_rate": 1.7531952607241033e-05, + "loss": 0.35117241740226746, + "step": 2013 + }, + { + "epoch": 0.534855928827513, + "grad_norm": 1.2552056089457548, + "learning_rate": 1.7529063648961035e-05, + "loss": 0.297889769077301, + "step": 2014 + }, + { + "epoch": 0.5351214978090559, + "grad_norm": 1.1238332501182418, + "learning_rate": 1.752617323916636e-05, + "loss": 0.32858210802078247, + "step": 2015 + }, + { + "epoch": 0.5353870667905989, + "grad_norm": 1.117582559290418, + "learning_rate": 1.7523281378414246e-05, + "loss": 0.3095484673976898, + "step": 2016 + }, + { + "epoch": 0.5356526357721418, + "grad_norm": 1.1072331793921826, + "learning_rate": 1.752038806726222e-05, + "loss": 0.34490731358528137, + "step": 2017 + }, + { + "epoch": 0.5359182047536848, + "grad_norm": 1.1427367564985542, + "learning_rate": 1.751749330626806e-05, + "loss": 0.35144859552383423, + "step": 2018 + }, + { + "epoch": 0.5361837737352277, + "grad_norm": 1.0337528414474293, + "learning_rate": 1.751459709598985e-05, + "loss": 0.26337549090385437, + "step": 2019 + }, + { + "epoch": 0.5364493427167707, + "grad_norm": 1.0719958558069054, + "learning_rate": 1.7511699436985952e-05, + "loss": 0.3235297203063965, + "step": 2020 + }, + { + "epoch": 0.5367149116983136, + "grad_norm": 1.1655117185465573, + "learning_rate": 1.7508800329814993e-05, + "loss": 0.35195302963256836, + "step": 2021 + }, + { + "epoch": 0.5369804806798566, + "grad_norm": 1.0547432431007058, + "learning_rate": 1.7505899775035887e-05, + "loss": 0.3226467967033386, + "step": 2022 + }, + { + "epoch": 0.5372460496613995, + "grad_norm": 1.0406958245289468, + "learning_rate": 1.750299777320783e-05, + "loss": 0.30616605281829834, + "step": 2023 + }, + { + "epoch": 0.5375116186429425, + "grad_norm": 1.074902411593199, + "learning_rate": 1.7500094324890294e-05, + "loss": 0.3007400333881378, + "step": 2024 + }, + { + "epoch": 0.5377771876244855, + "grad_norm": 1.1883491645763606, + "learning_rate": 1.7497189430643025e-05, + "loss": 0.35409432649612427, + "step": 2025 + }, + { + "epoch": 0.5380427566060284, + "grad_norm": 1.6951314154408594, + "learning_rate": 1.7494283091026053e-05, + "loss": 0.33718281984329224, + "step": 2026 + }, + { + "epoch": 0.5383083255875714, + "grad_norm": 1.0940933435725269, + "learning_rate": 1.749137530659969e-05, + "loss": 0.3589650094509125, + "step": 2027 + }, + { + "epoch": 0.5385738945691143, + "grad_norm": 1.1114345705753812, + "learning_rate": 1.7488466077924525e-05, + "loss": 0.35314273834228516, + "step": 2028 + }, + { + "epoch": 0.5388394635506573, + "grad_norm": 1.017869922891923, + "learning_rate": 1.7485555405561412e-05, + "loss": 0.28393587470054626, + "step": 2029 + }, + { + "epoch": 0.5391050325322002, + "grad_norm": 1.0276825009259218, + "learning_rate": 1.7482643290071503e-05, + "loss": 0.3262496292591095, + "step": 2030 + }, + { + "epoch": 0.5393706015137432, + "grad_norm": 1.122887144479208, + "learning_rate": 1.7479729732016218e-05, + "loss": 0.3549670875072479, + "step": 2031 + }, + { + "epoch": 0.5396361704952861, + "grad_norm": 1.0211791251004596, + "learning_rate": 1.7476814731957253e-05, + "loss": 0.30668947100639343, + "step": 2032 + }, + { + "epoch": 0.5399017394768291, + "grad_norm": 0.9278865240006526, + "learning_rate": 1.747389829045659e-05, + "loss": 0.2942228317260742, + "step": 2033 + }, + { + "epoch": 0.540167308458372, + "grad_norm": 1.023956047651912, + "learning_rate": 1.7470980408076484e-05, + "loss": 0.3166583478450775, + "step": 2034 + }, + { + "epoch": 0.540432877439915, + "grad_norm": 1.1503051826481139, + "learning_rate": 1.7468061085379467e-05, + "loss": 0.35149675607681274, + "step": 2035 + }, + { + "epoch": 0.5406984464214579, + "grad_norm": 1.1081467050264138, + "learning_rate": 1.7465140322928353e-05, + "loss": 0.32645004987716675, + "step": 2036 + }, + { + "epoch": 0.5409640154030009, + "grad_norm": 1.1656339653416823, + "learning_rate": 1.7462218121286224e-05, + "loss": 0.3078027367591858, + "step": 2037 + }, + { + "epoch": 0.5412295843845439, + "grad_norm": 1.0310810248927436, + "learning_rate": 1.7459294481016452e-05, + "loss": 0.28726300597190857, + "step": 2038 + }, + { + "epoch": 0.5414951533660869, + "grad_norm": 1.028103971871598, + "learning_rate": 1.7456369402682675e-05, + "loss": 0.29330572485923767, + "step": 2039 + }, + { + "epoch": 0.5417607223476298, + "grad_norm": 1.176742297493161, + "learning_rate": 1.7453442886848818e-05, + "loss": 0.3151019215583801, + "step": 2040 + }, + { + "epoch": 0.5420262913291728, + "grad_norm": 1.0830810759861134, + "learning_rate": 1.745051493407908e-05, + "loss": 0.3267561197280884, + "step": 2041 + }, + { + "epoch": 0.5422918603107157, + "grad_norm": 1.0462822233377385, + "learning_rate": 1.7447585544937933e-05, + "loss": 0.2834410071372986, + "step": 2042 + }, + { + "epoch": 0.5425574292922587, + "grad_norm": 0.9922210453154783, + "learning_rate": 1.7444654719990128e-05, + "loss": 0.29896080493927, + "step": 2043 + }, + { + "epoch": 0.5428229982738016, + "grad_norm": 1.0716195406510356, + "learning_rate": 1.7441722459800695e-05, + "loss": 0.3084600865840912, + "step": 2044 + }, + { + "epoch": 0.5430885672553446, + "grad_norm": 1.100381998832612, + "learning_rate": 1.743878876493494e-05, + "loss": 0.3178163170814514, + "step": 2045 + }, + { + "epoch": 0.5433541362368876, + "grad_norm": 1.1512124937535644, + "learning_rate": 1.743585363595844e-05, + "loss": 0.32886385917663574, + "step": 2046 + }, + { + "epoch": 0.5436197052184305, + "grad_norm": 1.0499932799675828, + "learning_rate": 1.743291707343706e-05, + "loss": 0.31810784339904785, + "step": 2047 + }, + { + "epoch": 0.5438852741999735, + "grad_norm": 0.994229574171737, + "learning_rate": 1.7429979077936928e-05, + "loss": 0.3003198504447937, + "step": 2048 + }, + { + "epoch": 0.5441508431815164, + "grad_norm": 1.1622503660754158, + "learning_rate": 1.7427039650024462e-05, + "loss": 0.33889323472976685, + "step": 2049 + }, + { + "epoch": 0.5444164121630594, + "grad_norm": 1.062972427778211, + "learning_rate": 1.7424098790266343e-05, + "loss": 0.3238763213157654, + "step": 2050 + }, + { + "epoch": 0.5446819811446023, + "grad_norm": 1.3651581380225686, + "learning_rate": 1.742115649922954e-05, + "loss": 0.34304776787757874, + "step": 2051 + }, + { + "epoch": 0.5449475501261453, + "grad_norm": 1.1192647204238841, + "learning_rate": 1.741821277748128e-05, + "loss": 0.31528347730636597, + "step": 2052 + }, + { + "epoch": 0.5452131191076882, + "grad_norm": 1.0728286121769783, + "learning_rate": 1.7415267625589094e-05, + "loss": 0.2992726266384125, + "step": 2053 + }, + { + "epoch": 0.5454786880892312, + "grad_norm": 1.0217638219637288, + "learning_rate": 1.741232104412076e-05, + "loss": 0.31706419587135315, + "step": 2054 + }, + { + "epoch": 0.5457442570707741, + "grad_norm": 1.8373163603702176, + "learning_rate": 1.7409373033644355e-05, + "loss": 0.2887676954269409, + "step": 2055 + }, + { + "epoch": 0.5460098260523171, + "grad_norm": 1.1434290988558236, + "learning_rate": 1.740642359472821e-05, + "loss": 0.3410964906215668, + "step": 2056 + }, + { + "epoch": 0.54627539503386, + "grad_norm": 1.0501323660770627, + "learning_rate": 1.740347272794095e-05, + "loss": 0.3711693286895752, + "step": 2057 + }, + { + "epoch": 0.546540964015403, + "grad_norm": 1.10922453334831, + "learning_rate": 1.7400520433851457e-05, + "loss": 0.3512499928474426, + "step": 2058 + }, + { + "epoch": 0.5468065329969459, + "grad_norm": 1.0790222544341648, + "learning_rate": 1.739756671302891e-05, + "loss": 0.3136678636074066, + "step": 2059 + }, + { + "epoch": 0.5470721019784889, + "grad_norm": 1.0417668658369865, + "learning_rate": 1.7394611566042748e-05, + "loss": 0.2983730435371399, + "step": 2060 + }, + { + "epoch": 0.5473376709600318, + "grad_norm": 1.1233530419836393, + "learning_rate": 1.7391654993462686e-05, + "loss": 0.36603933572769165, + "step": 2061 + }, + { + "epoch": 0.5476032399415748, + "grad_norm": 1.1758952832381078, + "learning_rate": 1.7388696995858717e-05, + "loss": 0.3651789128780365, + "step": 2062 + }, + { + "epoch": 0.5478688089231177, + "grad_norm": 1.2065493864331982, + "learning_rate": 1.7385737573801108e-05, + "loss": 0.30580615997314453, + "step": 2063 + }, + { + "epoch": 0.5481343779046607, + "grad_norm": 0.981372496476623, + "learning_rate": 1.7382776727860406e-05, + "loss": 0.2630755305290222, + "step": 2064 + }, + { + "epoch": 0.5483999468862036, + "grad_norm": 1.0020540486713174, + "learning_rate": 1.7379814458607416e-05, + "loss": 0.2947537899017334, + "step": 2065 + }, + { + "epoch": 0.5486655158677467, + "grad_norm": 1.034048631807644, + "learning_rate": 1.737685076661324e-05, + "loss": 0.3119455873966217, + "step": 2066 + }, + { + "epoch": 0.5489310848492897, + "grad_norm": 1.052273536899897, + "learning_rate": 1.7373885652449237e-05, + "loss": 0.3162347972393036, + "step": 2067 + }, + { + "epoch": 0.5491966538308326, + "grad_norm": 1.2320011234530202, + "learning_rate": 1.7370919116687047e-05, + "loss": 0.34120452404022217, + "step": 2068 + }, + { + "epoch": 0.5494622228123756, + "grad_norm": 1.095244169583748, + "learning_rate": 1.7367951159898583e-05, + "loss": 0.3126780092716217, + "step": 2069 + }, + { + "epoch": 0.5497277917939185, + "grad_norm": 0.9591128480333501, + "learning_rate": 1.7364981782656033e-05, + "loss": 0.2833349406719208, + "step": 2070 + }, + { + "epoch": 0.5499933607754615, + "grad_norm": 1.0921809927618633, + "learning_rate": 1.7362010985531855e-05, + "loss": 0.31617453694343567, + "step": 2071 + }, + { + "epoch": 0.5502589297570044, + "grad_norm": 1.0809700153666713, + "learning_rate": 1.735903876909879e-05, + "loss": 0.31372442841529846, + "step": 2072 + }, + { + "epoch": 0.5505244987385474, + "grad_norm": 1.1616077591637106, + "learning_rate": 1.735606513392984e-05, + "loss": 0.3500489592552185, + "step": 2073 + }, + { + "epoch": 0.5507900677200903, + "grad_norm": 1.0373404262028456, + "learning_rate": 1.735309008059829e-05, + "loss": 0.3219031095504761, + "step": 2074 + }, + { + "epoch": 0.5510556367016333, + "grad_norm": 1.0701365395287485, + "learning_rate": 1.7350113609677694e-05, + "loss": 0.32419610023498535, + "step": 2075 + }, + { + "epoch": 0.5513212056831762, + "grad_norm": 1.1054492395059694, + "learning_rate": 1.7347135721741874e-05, + "loss": 0.34804612398147583, + "step": 2076 + }, + { + "epoch": 0.5515867746647192, + "grad_norm": 1.09814942010155, + "learning_rate": 1.7344156417364946e-05, + "loss": 0.33105939626693726, + "step": 2077 + }, + { + "epoch": 0.5518523436462621, + "grad_norm": 1.0139790776190714, + "learning_rate": 1.7341175697121273e-05, + "loss": 0.3426011800765991, + "step": 2078 + }, + { + "epoch": 0.5521179126278051, + "grad_norm": 1.1120942872149455, + "learning_rate": 1.7338193561585507e-05, + "loss": 0.33207643032073975, + "step": 2079 + }, + { + "epoch": 0.552383481609348, + "grad_norm": 0.9807946500665143, + "learning_rate": 1.7335210011332573e-05, + "loss": 0.31849467754364014, + "step": 2080 + }, + { + "epoch": 0.552649050590891, + "grad_norm": 1.081622565959563, + "learning_rate": 1.7332225046937655e-05, + "loss": 0.3549337685108185, + "step": 2081 + }, + { + "epoch": 0.5529146195724339, + "grad_norm": 0.9652343930669623, + "learning_rate": 1.7329238668976224e-05, + "loss": 0.2850857377052307, + "step": 2082 + }, + { + "epoch": 0.5531801885539769, + "grad_norm": 1.1370461672740964, + "learning_rate": 1.732625087802402e-05, + "loss": 0.3277609348297119, + "step": 2083 + }, + { + "epoch": 0.5534457575355198, + "grad_norm": 1.0712095451099939, + "learning_rate": 1.732326167465705e-05, + "loss": 0.2951444983482361, + "step": 2084 + }, + { + "epoch": 0.5537113265170628, + "grad_norm": 1.0893938459197319, + "learning_rate": 1.7320271059451597e-05, + "loss": 0.36634138226509094, + "step": 2085 + }, + { + "epoch": 0.5539768954986057, + "grad_norm": 1.060256238160636, + "learning_rate": 1.7317279032984222e-05, + "loss": 0.3407907783985138, + "step": 2086 + }, + { + "epoch": 0.5542424644801487, + "grad_norm": 1.0563310141876696, + "learning_rate": 1.7314285595831747e-05, + "loss": 0.34038978815078735, + "step": 2087 + }, + { + "epoch": 0.5545080334616916, + "grad_norm": 1.0558109709205228, + "learning_rate": 1.7311290748571273e-05, + "loss": 0.337898313999176, + "step": 2088 + }, + { + "epoch": 0.5547736024432346, + "grad_norm": 1.1543867929059073, + "learning_rate": 1.7308294491780175e-05, + "loss": 0.3250765800476074, + "step": 2089 + }, + { + "epoch": 0.5550391714247775, + "grad_norm": 1.101568217376945, + "learning_rate": 1.730529682603609e-05, + "loss": 0.31562721729278564, + "step": 2090 + }, + { + "epoch": 0.5553047404063205, + "grad_norm": 1.2678079753749867, + "learning_rate": 1.730229775191693e-05, + "loss": 0.32757896184921265, + "step": 2091 + }, + { + "epoch": 0.5555703093878634, + "grad_norm": 1.1010819086774664, + "learning_rate": 1.7299297270000894e-05, + "loss": 0.35861605405807495, + "step": 2092 + }, + { + "epoch": 0.5558358783694064, + "grad_norm": 1.0999873688088635, + "learning_rate": 1.7296295380866425e-05, + "loss": 0.3383220434188843, + "step": 2093 + }, + { + "epoch": 0.5561014473509495, + "grad_norm": 1.1431134206724336, + "learning_rate": 1.7293292085092263e-05, + "loss": 0.30144187808036804, + "step": 2094 + }, + { + "epoch": 0.5563670163324924, + "grad_norm": 1.0354659821546437, + "learning_rate": 1.72902873832574e-05, + "loss": 0.2626546323299408, + "step": 2095 + }, + { + "epoch": 0.5566325853140354, + "grad_norm": 1.0939710377386638, + "learning_rate": 1.7287281275941112e-05, + "loss": 0.3289363980293274, + "step": 2096 + }, + { + "epoch": 0.5568981542955783, + "grad_norm": 0.9797533003070389, + "learning_rate": 1.7284273763722943e-05, + "loss": 0.26631784439086914, + "step": 2097 + }, + { + "epoch": 0.5571637232771213, + "grad_norm": 1.0035421194069876, + "learning_rate": 1.7281264847182697e-05, + "loss": 0.3051939606666565, + "step": 2098 + }, + { + "epoch": 0.5574292922586642, + "grad_norm": 1.0515034870910809, + "learning_rate": 1.7278254526900468e-05, + "loss": 0.34456121921539307, + "step": 2099 + }, + { + "epoch": 0.5576948612402072, + "grad_norm": 1.2038994359149542, + "learning_rate": 1.72752428034566e-05, + "loss": 0.2747807502746582, + "step": 2100 + }, + { + "epoch": 0.5579604302217501, + "grad_norm": 2.186270123050143, + "learning_rate": 1.7272229677431723e-05, + "loss": 0.31111812591552734, + "step": 2101 + }, + { + "epoch": 0.5582259992032931, + "grad_norm": 1.0150701360001215, + "learning_rate": 1.7269215149406737e-05, + "loss": 0.29648226499557495, + "step": 2102 + }, + { + "epoch": 0.558491568184836, + "grad_norm": 0.9846402594569152, + "learning_rate": 1.72661992199628e-05, + "loss": 0.28303876519203186, + "step": 2103 + }, + { + "epoch": 0.558757137166379, + "grad_norm": 1.1069492435421613, + "learning_rate": 1.726318188968135e-05, + "loss": 0.30540165305137634, + "step": 2104 + }, + { + "epoch": 0.5590227061479219, + "grad_norm": 1.2177152582591586, + "learning_rate": 1.726016315914409e-05, + "loss": 0.31810393929481506, + "step": 2105 + }, + { + "epoch": 0.5592882751294649, + "grad_norm": 1.134577587954556, + "learning_rate": 1.7257143028933004e-05, + "loss": 0.33605068922042847, + "step": 2106 + }, + { + "epoch": 0.5595538441110078, + "grad_norm": 1.089019585879268, + "learning_rate": 1.725412149963033e-05, + "loss": 0.3340590298175812, + "step": 2107 + }, + { + "epoch": 0.5598194130925508, + "grad_norm": 0.9872121137775324, + "learning_rate": 1.7251098571818586e-05, + "loss": 0.29560500383377075, + "step": 2108 + }, + { + "epoch": 0.5600849820740937, + "grad_norm": 1.0964006197085026, + "learning_rate": 1.7248074246080555e-05, + "loss": 0.30100107192993164, + "step": 2109 + }, + { + "epoch": 0.5603505510556367, + "grad_norm": 1.1506338140671328, + "learning_rate": 1.7245048522999294e-05, + "loss": 0.35551172494888306, + "step": 2110 + }, + { + "epoch": 0.5606161200371796, + "grad_norm": 1.0513397818607815, + "learning_rate": 1.724202140315812e-05, + "loss": 0.3182663023471832, + "step": 2111 + }, + { + "epoch": 0.5608816890187226, + "grad_norm": 1.092960095111009, + "learning_rate": 1.723899288714064e-05, + "loss": 0.3160201609134674, + "step": 2112 + }, + { + "epoch": 0.5611472580002655, + "grad_norm": 1.0656744789709975, + "learning_rate": 1.72359629755307e-05, + "loss": 0.3126063942909241, + "step": 2113 + }, + { + "epoch": 0.5614128269818085, + "grad_norm": 1.0376603045942787, + "learning_rate": 1.723293166891244e-05, + "loss": 0.3222552239894867, + "step": 2114 + }, + { + "epoch": 0.5616783959633515, + "grad_norm": 1.1154320347150413, + "learning_rate": 1.722989896787026e-05, + "loss": 0.33601805567741394, + "step": 2115 + }, + { + "epoch": 0.5619439649448944, + "grad_norm": 1.0241046952841495, + "learning_rate": 1.722686487298883e-05, + "loss": 0.28679755330085754, + "step": 2116 + }, + { + "epoch": 0.5622095339264374, + "grad_norm": 0.9498185678215705, + "learning_rate": 1.722382938485308e-05, + "loss": 0.2895340323448181, + "step": 2117 + }, + { + "epoch": 0.5624751029079803, + "grad_norm": 1.3753225282493697, + "learning_rate": 1.7220792504048227e-05, + "loss": 0.310183048248291, + "step": 2118 + }, + { + "epoch": 0.5627406718895233, + "grad_norm": 0.9776305745351022, + "learning_rate": 1.7217754231159737e-05, + "loss": 0.2768586277961731, + "step": 2119 + }, + { + "epoch": 0.5630062408710662, + "grad_norm": 0.9838874956474448, + "learning_rate": 1.7214714566773358e-05, + "loss": 0.2785574793815613, + "step": 2120 + }, + { + "epoch": 0.5632718098526092, + "grad_norm": 1.1815363465765012, + "learning_rate": 1.72116735114751e-05, + "loss": 0.30544358491897583, + "step": 2121 + }, + { + "epoch": 0.5635373788341522, + "grad_norm": 1.0704755380783626, + "learning_rate": 1.7208631065851243e-05, + "loss": 0.31662559509277344, + "step": 2122 + }, + { + "epoch": 0.5638029478156952, + "grad_norm": 0.9893085866675072, + "learning_rate": 1.7205587230488335e-05, + "loss": 0.31466105580329895, + "step": 2123 + }, + { + "epoch": 0.5640685167972381, + "grad_norm": 1.1520731756820097, + "learning_rate": 1.720254200597319e-05, + "loss": 0.3471367359161377, + "step": 2124 + }, + { + "epoch": 0.5643340857787811, + "grad_norm": 1.056530578075146, + "learning_rate": 1.7199495392892892e-05, + "loss": 0.3325269818305969, + "step": 2125 + }, + { + "epoch": 0.564599654760324, + "grad_norm": 1.1040662937900534, + "learning_rate": 1.7196447391834797e-05, + "loss": 0.32423460483551025, + "step": 2126 + }, + { + "epoch": 0.564865223741867, + "grad_norm": 1.0403895710374138, + "learning_rate": 1.7193398003386514e-05, + "loss": 0.3083527088165283, + "step": 2127 + }, + { + "epoch": 0.5651307927234099, + "grad_norm": 1.1794029606730059, + "learning_rate": 1.7190347228135933e-05, + "loss": 0.3418716490268707, + "step": 2128 + }, + { + "epoch": 0.5653963617049529, + "grad_norm": 1.0509473075306943, + "learning_rate": 1.7187295066671214e-05, + "loss": 0.33037957549095154, + "step": 2129 + }, + { + "epoch": 0.5656619306864958, + "grad_norm": 1.229094630243538, + "learning_rate": 1.7184241519580767e-05, + "loss": 0.3383673131465912, + "step": 2130 + }, + { + "epoch": 0.5659274996680388, + "grad_norm": 0.9364933789266218, + "learning_rate": 1.718118658745329e-05, + "loss": 0.27756133675575256, + "step": 2131 + }, + { + "epoch": 0.5661930686495817, + "grad_norm": 1.1307081535546069, + "learning_rate": 1.717813027087773e-05, + "loss": 0.2987852692604065, + "step": 2132 + }, + { + "epoch": 0.5664586376311247, + "grad_norm": 1.0924971268375117, + "learning_rate": 1.717507257044331e-05, + "loss": 0.30016621947288513, + "step": 2133 + }, + { + "epoch": 0.5667242066126676, + "grad_norm": 1.0923612277165435, + "learning_rate": 1.7172013486739528e-05, + "loss": 0.31592345237731934, + "step": 2134 + }, + { + "epoch": 0.5669897755942106, + "grad_norm": 1.0932899901018698, + "learning_rate": 1.716895302035613e-05, + "loss": 0.3500048816204071, + "step": 2135 + }, + { + "epoch": 0.5672553445757536, + "grad_norm": 1.0529476139624208, + "learning_rate": 1.7165891171883134e-05, + "loss": 0.32069307565689087, + "step": 2136 + }, + { + "epoch": 0.5675209135572965, + "grad_norm": 1.10329279559138, + "learning_rate": 1.7162827941910837e-05, + "loss": 0.3100130558013916, + "step": 2137 + }, + { + "epoch": 0.5677864825388395, + "grad_norm": 1.080836142172887, + "learning_rate": 1.715976333102979e-05, + "loss": 0.3205985128879547, + "step": 2138 + }, + { + "epoch": 0.5680520515203824, + "grad_norm": 1.0861679281182697, + "learning_rate": 1.715669733983081e-05, + "loss": 0.3243224024772644, + "step": 2139 + }, + { + "epoch": 0.5683176205019254, + "grad_norm": 1.0818895017967487, + "learning_rate": 1.7153629968904997e-05, + "loss": 0.3278832733631134, + "step": 2140 + }, + { + "epoch": 0.5685831894834683, + "grad_norm": 0.9949896264020713, + "learning_rate": 1.7150561218843693e-05, + "loss": 0.29137033224105835, + "step": 2141 + }, + { + "epoch": 0.5688487584650113, + "grad_norm": 1.0470808838345107, + "learning_rate": 1.7147491090238516e-05, + "loss": 0.3065168857574463, + "step": 2142 + }, + { + "epoch": 0.5691143274465542, + "grad_norm": 1.0368441449557109, + "learning_rate": 1.7144419583681354e-05, + "loss": 0.3367912173271179, + "step": 2143 + }, + { + "epoch": 0.5693798964280972, + "grad_norm": 1.086220090850542, + "learning_rate": 1.7141346699764357e-05, + "loss": 0.32278239727020264, + "step": 2144 + }, + { + "epoch": 0.5696454654096401, + "grad_norm": 1.080765529331453, + "learning_rate": 1.713827243907994e-05, + "loss": 0.2887166440486908, + "step": 2145 + }, + { + "epoch": 0.5699110343911831, + "grad_norm": 1.1353258061614586, + "learning_rate": 1.713519680222079e-05, + "loss": 0.33214619755744934, + "step": 2146 + }, + { + "epoch": 0.570176603372726, + "grad_norm": 1.1145274058321384, + "learning_rate": 1.7132119789779846e-05, + "loss": 0.2865470051765442, + "step": 2147 + }, + { + "epoch": 0.570442172354269, + "grad_norm": 1.1145678631141913, + "learning_rate": 1.7129041402350317e-05, + "loss": 0.32746967673301697, + "step": 2148 + }, + { + "epoch": 0.5707077413358119, + "grad_norm": 1.0454330804264187, + "learning_rate": 1.712596164052569e-05, + "loss": 0.3029513359069824, + "step": 2149 + }, + { + "epoch": 0.570973310317355, + "grad_norm": 0.9779058393705973, + "learning_rate": 1.7122880504899698e-05, + "loss": 0.3052698075771332, + "step": 2150 + }, + { + "epoch": 0.5712388792988979, + "grad_norm": 1.055591157713499, + "learning_rate": 1.7119797996066355e-05, + "loss": 0.29221272468566895, + "step": 2151 + }, + { + "epoch": 0.5715044482804409, + "grad_norm": 1.0014263274293047, + "learning_rate": 1.711671411461993e-05, + "loss": 0.3165368139743805, + "step": 2152 + }, + { + "epoch": 0.5717700172619838, + "grad_norm": 1.0763149059705845, + "learning_rate": 1.7113628861154953e-05, + "loss": 0.30877187848091125, + "step": 2153 + }, + { + "epoch": 0.5720355862435268, + "grad_norm": 1.0826550246568385, + "learning_rate": 1.711054223626623e-05, + "loss": 0.2985781729221344, + "step": 2154 + }, + { + "epoch": 0.5723011552250697, + "grad_norm": 1.1063225967671673, + "learning_rate": 1.7107454240548825e-05, + "loss": 0.3449699878692627, + "step": 2155 + }, + { + "epoch": 0.5725667242066127, + "grad_norm": 1.0430022801820942, + "learning_rate": 1.7104364874598066e-05, + "loss": 0.3219606578350067, + "step": 2156 + }, + { + "epoch": 0.5728322931881557, + "grad_norm": 1.0017795464639185, + "learning_rate": 1.710127413900955e-05, + "loss": 0.3059350550174713, + "step": 2157 + }, + { + "epoch": 0.5730978621696986, + "grad_norm": 1.0027463566346577, + "learning_rate": 1.7098182034379132e-05, + "loss": 0.29461371898651123, + "step": 2158 + }, + { + "epoch": 0.5733634311512416, + "grad_norm": 1.0159484116581767, + "learning_rate": 1.709508856130293e-05, + "loss": 0.2998795509338379, + "step": 2159 + }, + { + "epoch": 0.5736290001327845, + "grad_norm": 1.0092216110834475, + "learning_rate": 1.7091993720377336e-05, + "loss": 0.28214582800865173, + "step": 2160 + }, + { + "epoch": 0.5738945691143275, + "grad_norm": 1.2106483053766084, + "learning_rate": 1.708889751219899e-05, + "loss": 0.3036864697933197, + "step": 2161 + }, + { + "epoch": 0.5741601380958704, + "grad_norm": 1.1139097359759478, + "learning_rate": 1.7085799937364815e-05, + "loss": 0.34146320819854736, + "step": 2162 + }, + { + "epoch": 0.5744257070774134, + "grad_norm": 1.0631963944232283, + "learning_rate": 1.708270099647198e-05, + "loss": 0.33996909856796265, + "step": 2163 + }, + { + "epoch": 0.5746912760589563, + "grad_norm": 1.0779467399705778, + "learning_rate": 1.7079600690117924e-05, + "loss": 0.3308744728565216, + "step": 2164 + }, + { + "epoch": 0.5749568450404993, + "grad_norm": 1.0447240453690412, + "learning_rate": 1.707649901890035e-05, + "loss": 0.2945587933063507, + "step": 2165 + }, + { + "epoch": 0.5752224140220422, + "grad_norm": 1.0321317558144223, + "learning_rate": 1.7073395983417227e-05, + "loss": 0.30348697304725647, + "step": 2166 + }, + { + "epoch": 0.5754879830035852, + "grad_norm": 1.025806147580304, + "learning_rate": 1.707029158426678e-05, + "loss": 0.28789055347442627, + "step": 2167 + }, + { + "epoch": 0.5757535519851281, + "grad_norm": 1.168965754707192, + "learning_rate": 1.7067185822047502e-05, + "loss": 0.3026643693447113, + "step": 2168 + }, + { + "epoch": 0.5760191209666711, + "grad_norm": 1.1108861255752682, + "learning_rate": 1.7064078697358147e-05, + "loss": 0.34021061658859253, + "step": 2169 + }, + { + "epoch": 0.576284689948214, + "grad_norm": 1.1062563353075296, + "learning_rate": 1.7060970210797735e-05, + "loss": 0.32793867588043213, + "step": 2170 + }, + { + "epoch": 0.576550258929757, + "grad_norm": 1.1692826638365306, + "learning_rate": 1.705786036296554e-05, + "loss": 0.36144691705703735, + "step": 2171 + }, + { + "epoch": 0.5768158279112999, + "grad_norm": 1.1177501875227254, + "learning_rate": 1.7054749154461105e-05, + "loss": 0.3630291223526001, + "step": 2172 + }, + { + "epoch": 0.5770813968928429, + "grad_norm": 1.144365708172633, + "learning_rate": 1.705163658588424e-05, + "loss": 0.34964969754219055, + "step": 2173 + }, + { + "epoch": 0.5773469658743858, + "grad_norm": 1.0298961015626151, + "learning_rate": 1.7048522657835004e-05, + "loss": 0.2877815067768097, + "step": 2174 + }, + { + "epoch": 0.5776125348559288, + "grad_norm": 1.1148926749607628, + "learning_rate": 1.7045407370913732e-05, + "loss": 0.3185664713382721, + "step": 2175 + }, + { + "epoch": 0.5778781038374717, + "grad_norm": 1.0393243287048395, + "learning_rate": 1.704229072572101e-05, + "loss": 0.3035257160663605, + "step": 2176 + }, + { + "epoch": 0.5781436728190147, + "grad_norm": 1.048139429574759, + "learning_rate": 1.7039172722857695e-05, + "loss": 0.325702965259552, + "step": 2177 + }, + { + "epoch": 0.5784092418005577, + "grad_norm": 1.1046410504333486, + "learning_rate": 1.7036053362924896e-05, + "loss": 0.32837462425231934, + "step": 2178 + }, + { + "epoch": 0.5786748107821007, + "grad_norm": 1.066094854816524, + "learning_rate": 1.703293264652399e-05, + "loss": 0.3430028259754181, + "step": 2179 + }, + { + "epoch": 0.5789403797636437, + "grad_norm": 1.1007701198247044, + "learning_rate": 1.702981057425662e-05, + "loss": 0.32792964577674866, + "step": 2180 + }, + { + "epoch": 0.5792059487451866, + "grad_norm": 0.9964902607677808, + "learning_rate": 1.7026687146724675e-05, + "loss": 0.3037140965461731, + "step": 2181 + }, + { + "epoch": 0.5794715177267296, + "grad_norm": 0.9962684392556416, + "learning_rate": 1.7023562364530322e-05, + "loss": 0.33083540201187134, + "step": 2182 + }, + { + "epoch": 0.5797370867082725, + "grad_norm": 0.9979777099745417, + "learning_rate": 1.702043622827598e-05, + "loss": 0.3108663260936737, + "step": 2183 + }, + { + "epoch": 0.5800026556898155, + "grad_norm": 0.9618495492417584, + "learning_rate": 1.7017308738564336e-05, + "loss": 0.2939792573451996, + "step": 2184 + }, + { + "epoch": 0.5802682246713584, + "grad_norm": 1.1315656989934186, + "learning_rate": 1.7014179895998322e-05, + "loss": 0.3686106503009796, + "step": 2185 + }, + { + "epoch": 0.5805337936529014, + "grad_norm": 1.0524191997810952, + "learning_rate": 1.7011049701181152e-05, + "loss": 0.3497159779071808, + "step": 2186 + }, + { + "epoch": 0.5807993626344443, + "grad_norm": 1.0989364128809138, + "learning_rate": 1.7007918154716286e-05, + "loss": 0.31730401515960693, + "step": 2187 + }, + { + "epoch": 0.5810649316159873, + "grad_norm": 1.0000330799865447, + "learning_rate": 1.7004785257207456e-05, + "loss": 0.3064701557159424, + "step": 2188 + }, + { + "epoch": 0.5813305005975302, + "grad_norm": 1.1111458283716926, + "learning_rate": 1.7001651009258635e-05, + "loss": 0.37174129486083984, + "step": 2189 + }, + { + "epoch": 0.5815960695790732, + "grad_norm": 1.068050904458805, + "learning_rate": 1.699851541147408e-05, + "loss": 0.3548140823841095, + "step": 2190 + }, + { + "epoch": 0.5818616385606161, + "grad_norm": 1.2340650081251097, + "learning_rate": 1.6995378464458292e-05, + "loss": 0.3486049473285675, + "step": 2191 + }, + { + "epoch": 0.5821272075421591, + "grad_norm": 1.996025853729682, + "learning_rate": 1.6992240168816037e-05, + "loss": 0.3083210587501526, + "step": 2192 + }, + { + "epoch": 0.582392776523702, + "grad_norm": 1.0284637251594817, + "learning_rate": 1.6989100525152346e-05, + "loss": 0.3006829619407654, + "step": 2193 + }, + { + "epoch": 0.582658345505245, + "grad_norm": 1.103386023825705, + "learning_rate": 1.6985959534072502e-05, + "loss": 0.32856425642967224, + "step": 2194 + }, + { + "epoch": 0.5829239144867879, + "grad_norm": 1.1293873964177752, + "learning_rate": 1.6982817196182052e-05, + "loss": 0.3382526934146881, + "step": 2195 + }, + { + "epoch": 0.5831894834683309, + "grad_norm": 1.0326113865244562, + "learning_rate": 1.69796735120868e-05, + "loss": 0.3311583399772644, + "step": 2196 + }, + { + "epoch": 0.5834550524498738, + "grad_norm": 1.0267321140886136, + "learning_rate": 1.6976528482392815e-05, + "loss": 0.312778115272522, + "step": 2197 + }, + { + "epoch": 0.5837206214314168, + "grad_norm": 1.0148067463802801, + "learning_rate": 1.697338210770642e-05, + "loss": 0.2996736466884613, + "step": 2198 + }, + { + "epoch": 0.5839861904129597, + "grad_norm": 1.1885772355333009, + "learning_rate": 1.6970234388634192e-05, + "loss": 0.344571590423584, + "step": 2199 + }, + { + "epoch": 0.5842517593945027, + "grad_norm": 0.9183671512098872, + "learning_rate": 1.6967085325782984e-05, + "loss": 0.25299468636512756, + "step": 2200 + }, + { + "epoch": 0.5845173283760456, + "grad_norm": 1.042142544774348, + "learning_rate": 1.6963934919759896e-05, + "loss": 0.3080691695213318, + "step": 2201 + }, + { + "epoch": 0.5847828973575886, + "grad_norm": 1.0216299822000434, + "learning_rate": 1.6960783171172286e-05, + "loss": 0.27491697669029236, + "step": 2202 + }, + { + "epoch": 0.5850484663391315, + "grad_norm": 1.1629234714983534, + "learning_rate": 1.6957630080627772e-05, + "loss": 0.3422500193119049, + "step": 2203 + }, + { + "epoch": 0.5853140353206745, + "grad_norm": 1.0832524871656921, + "learning_rate": 1.695447564873424e-05, + "loss": 0.27703234553337097, + "step": 2204 + }, + { + "epoch": 0.5855796043022174, + "grad_norm": 1.0275000328668338, + "learning_rate": 1.6951319876099825e-05, + "loss": 0.3088543117046356, + "step": 2205 + }, + { + "epoch": 0.5858451732837605, + "grad_norm": 1.0671359142705343, + "learning_rate": 1.694816276333292e-05, + "loss": 0.29875609278678894, + "step": 2206 + }, + { + "epoch": 0.5861107422653035, + "grad_norm": 1.0185982306074886, + "learning_rate": 1.6945004311042176e-05, + "loss": 0.30804386734962463, + "step": 2207 + }, + { + "epoch": 0.5863763112468464, + "grad_norm": 1.081134235929082, + "learning_rate": 1.694184451983651e-05, + "loss": 0.3324572741985321, + "step": 2208 + }, + { + "epoch": 0.5866418802283894, + "grad_norm": 1.0822730402391103, + "learning_rate": 1.6938683390325096e-05, + "loss": 0.30302488803863525, + "step": 2209 + }, + { + "epoch": 0.5869074492099323, + "grad_norm": 1.1499037543983048, + "learning_rate": 1.6935520923117355e-05, + "loss": 0.3264358341693878, + "step": 2210 + }, + { + "epoch": 0.5871730181914753, + "grad_norm": 1.1305858167915457, + "learning_rate": 1.693235711882298e-05, + "loss": 0.3172164261341095, + "step": 2211 + }, + { + "epoch": 0.5874385871730182, + "grad_norm": 0.9910314790510931, + "learning_rate": 1.6929191978051908e-05, + "loss": 0.300851047039032, + "step": 2212 + }, + { + "epoch": 0.5877041561545612, + "grad_norm": 1.1122516205102002, + "learning_rate": 1.6926025501414352e-05, + "loss": 0.2887764871120453, + "step": 2213 + }, + { + "epoch": 0.5879697251361041, + "grad_norm": 1.0991421920944897, + "learning_rate": 1.692285768952076e-05, + "loss": 0.3246796727180481, + "step": 2214 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1069795382063548, + "learning_rate": 1.6919688542981852e-05, + "loss": 0.30595412850379944, + "step": 2215 + }, + { + "epoch": 0.58850086309919, + "grad_norm": 1.068918741300791, + "learning_rate": 1.6916518062408604e-05, + "loss": 0.2885501980781555, + "step": 2216 + }, + { + "epoch": 0.588766432080733, + "grad_norm": 1.066918066226772, + "learning_rate": 1.6913346248412245e-05, + "loss": 0.34449082612991333, + "step": 2217 + }, + { + "epoch": 0.5890320010622759, + "grad_norm": 1.0585511422631098, + "learning_rate": 1.6910173101604267e-05, + "loss": 0.29410409927368164, + "step": 2218 + }, + { + "epoch": 0.5892975700438189, + "grad_norm": 1.1710793080996782, + "learning_rate": 1.690699862259641e-05, + "loss": 0.3250378370285034, + "step": 2219 + }, + { + "epoch": 0.5895631390253618, + "grad_norm": 1.3327292763951073, + "learning_rate": 1.690382281200068e-05, + "loss": 0.34420648217201233, + "step": 2220 + }, + { + "epoch": 0.5898287080069048, + "grad_norm": 1.1196949637967406, + "learning_rate": 1.6900645670429338e-05, + "loss": 0.33951860666275024, + "step": 2221 + }, + { + "epoch": 0.5900942769884477, + "grad_norm": 1.064177847952839, + "learning_rate": 1.6897467198494892e-05, + "loss": 0.35045644640922546, + "step": 2222 + }, + { + "epoch": 0.5903598459699907, + "grad_norm": 1.0378256375427404, + "learning_rate": 1.689428739681012e-05, + "loss": 0.3262789845466614, + "step": 2223 + }, + { + "epoch": 0.5906254149515336, + "grad_norm": 1.0662878016953237, + "learning_rate": 1.689110626598805e-05, + "loss": 0.2959234118461609, + "step": 2224 + }, + { + "epoch": 0.5908909839330766, + "grad_norm": 1.040953230887288, + "learning_rate": 1.6887923806641965e-05, + "loss": 0.3185187876224518, + "step": 2225 + }, + { + "epoch": 0.5911565529146195, + "grad_norm": 0.9754385668000993, + "learning_rate": 1.6884740019385403e-05, + "loss": 0.2861860692501068, + "step": 2226 + }, + { + "epoch": 0.5914221218961625, + "grad_norm": 1.0067160421449919, + "learning_rate": 1.6881554904832163e-05, + "loss": 0.28718897700309753, + "step": 2227 + }, + { + "epoch": 0.5916876908777055, + "grad_norm": 1.0412433017248806, + "learning_rate": 1.68783684635963e-05, + "loss": 0.2919235825538635, + "step": 2228 + }, + { + "epoch": 0.5919532598592484, + "grad_norm": 0.9981457951279066, + "learning_rate": 1.687518069629212e-05, + "loss": 0.29265689849853516, + "step": 2229 + }, + { + "epoch": 0.5922188288407914, + "grad_norm": 1.105624159979672, + "learning_rate": 1.6871991603534183e-05, + "loss": 0.3257937431335449, + "step": 2230 + }, + { + "epoch": 0.5924843978223343, + "grad_norm": 0.9776528734928177, + "learning_rate": 1.6868801185937318e-05, + "loss": 0.30709922313690186, + "step": 2231 + }, + { + "epoch": 0.5927499668038773, + "grad_norm": 1.0470693079191735, + "learning_rate": 1.6865609444116594e-05, + "loss": 0.34016695618629456, + "step": 2232 + }, + { + "epoch": 0.5930155357854202, + "grad_norm": 3.119158292180646, + "learning_rate": 1.686241637868734e-05, + "loss": 0.27988332509994507, + "step": 2233 + }, + { + "epoch": 0.5932811047669632, + "grad_norm": 1.0478488923431404, + "learning_rate": 1.685922199026514e-05, + "loss": 0.33241748809814453, + "step": 2234 + }, + { + "epoch": 0.5935466737485062, + "grad_norm": 1.131470783603603, + "learning_rate": 1.685602627946584e-05, + "loss": 0.29636645317077637, + "step": 2235 + }, + { + "epoch": 0.5938122427300492, + "grad_norm": 1.0270882549188534, + "learning_rate": 1.6852829246905532e-05, + "loss": 0.32173705101013184, + "step": 2236 + }, + { + "epoch": 0.5940778117115921, + "grad_norm": 1.0825392737706068, + "learning_rate": 1.6849630893200567e-05, + "loss": 0.318726122379303, + "step": 2237 + }, + { + "epoch": 0.5943433806931351, + "grad_norm": 1.0382165285294276, + "learning_rate": 1.684643121896755e-05, + "loss": 0.3085494339466095, + "step": 2238 + }, + { + "epoch": 0.594608949674678, + "grad_norm": 1.0527313536489507, + "learning_rate": 1.684323022482334e-05, + "loss": 0.3402160406112671, + "step": 2239 + }, + { + "epoch": 0.594874518656221, + "grad_norm": 1.0380085019224927, + "learning_rate": 1.684002791138505e-05, + "loss": 0.28099578619003296, + "step": 2240 + }, + { + "epoch": 0.5951400876377639, + "grad_norm": 1.0821564922133853, + "learning_rate": 1.6836824279270053e-05, + "loss": 0.3049670159816742, + "step": 2241 + }, + { + "epoch": 0.5954056566193069, + "grad_norm": 1.0644252940512267, + "learning_rate": 1.6833619329095966e-05, + "loss": 0.2999834716320038, + "step": 2242 + }, + { + "epoch": 0.5956712256008498, + "grad_norm": 1.0828247808996563, + "learning_rate": 1.6830413061480663e-05, + "loss": 0.2976648509502411, + "step": 2243 + }, + { + "epoch": 0.5959367945823928, + "grad_norm": 0.9516700397999099, + "learning_rate": 1.6827205477042282e-05, + "loss": 0.2937200963497162, + "step": 2244 + }, + { + "epoch": 0.5962023635639357, + "grad_norm": 0.9800041770842799, + "learning_rate": 1.6823996576399208e-05, + "loss": 0.27944231033325195, + "step": 2245 + }, + { + "epoch": 0.5964679325454787, + "grad_norm": 1.2497901059935828, + "learning_rate": 1.6820786360170073e-05, + "loss": 0.37821248173713684, + "step": 2246 + }, + { + "epoch": 0.5967335015270216, + "grad_norm": 1.0764913922139379, + "learning_rate": 1.681757482897377e-05, + "loss": 0.31929296255111694, + "step": 2247 + }, + { + "epoch": 0.5969990705085646, + "grad_norm": 1.0997353700477965, + "learning_rate": 1.6814361983429446e-05, + "loss": 0.29905542731285095, + "step": 2248 + }, + { + "epoch": 0.5972646394901076, + "grad_norm": 1.1012066663218303, + "learning_rate": 1.6811147824156503e-05, + "loss": 0.31056714057922363, + "step": 2249 + }, + { + "epoch": 0.5975302084716505, + "grad_norm": 1.0740873036211436, + "learning_rate": 1.6807932351774585e-05, + "loss": 0.3311445415019989, + "step": 2250 + }, + { + "epoch": 0.5977957774531935, + "grad_norm": 0.9539008733822649, + "learning_rate": 1.6804715566903603e-05, + "loss": 0.28413334488868713, + "step": 2251 + }, + { + "epoch": 0.5980613464347364, + "grad_norm": 1.068533794622215, + "learning_rate": 1.6801497470163717e-05, + "loss": 0.27681154012680054, + "step": 2252 + }, + { + "epoch": 0.5983269154162794, + "grad_norm": 1.0654200190327086, + "learning_rate": 1.679827806217533e-05, + "loss": 0.290216863155365, + "step": 2253 + }, + { + "epoch": 0.5985924843978223, + "grad_norm": 1.1041469834048565, + "learning_rate": 1.6795057343559115e-05, + "loss": 0.31263259053230286, + "step": 2254 + }, + { + "epoch": 0.5988580533793653, + "grad_norm": 1.126601485756597, + "learning_rate": 1.6791835314935984e-05, + "loss": 0.31527474522590637, + "step": 2255 + }, + { + "epoch": 0.5991236223609082, + "grad_norm": 1.078203294441185, + "learning_rate": 1.6788611976927104e-05, + "loss": 0.308803915977478, + "step": 2256 + }, + { + "epoch": 0.5993891913424512, + "grad_norm": 1.0503773076355036, + "learning_rate": 1.6785387330153898e-05, + "loss": 0.3038686215877533, + "step": 2257 + }, + { + "epoch": 0.5996547603239941, + "grad_norm": 1.0216209005739547, + "learning_rate": 1.6782161375238045e-05, + "loss": 0.32485973834991455, + "step": 2258 + }, + { + "epoch": 0.5999203293055371, + "grad_norm": 1.182450532742011, + "learning_rate": 1.6778934112801467e-05, + "loss": 0.32350587844848633, + "step": 2259 + }, + { + "epoch": 0.60018589828708, + "grad_norm": 1.0888151703509321, + "learning_rate": 1.6775705543466337e-05, + "loss": 0.31593745946884155, + "step": 2260 + }, + { + "epoch": 0.600451467268623, + "grad_norm": 1.0882766479814592, + "learning_rate": 1.6772475667855098e-05, + "loss": 0.3266843855381012, + "step": 2261 + }, + { + "epoch": 0.6007170362501659, + "grad_norm": 1.1815872316974045, + "learning_rate": 1.676924448659042e-05, + "loss": 0.3334394693374634, + "step": 2262 + }, + { + "epoch": 0.600982605231709, + "grad_norm": 1.1019346354795203, + "learning_rate": 1.676601200029524e-05, + "loss": 0.29688704013824463, + "step": 2263 + }, + { + "epoch": 0.6012481742132519, + "grad_norm": 1.0675092497220116, + "learning_rate": 1.6762778209592744e-05, + "loss": 0.3163599967956543, + "step": 2264 + }, + { + "epoch": 0.6015137431947949, + "grad_norm": 3.310146638883422, + "learning_rate": 1.675954311510637e-05, + "loss": 0.3001909554004669, + "step": 2265 + }, + { + "epoch": 0.6017793121763378, + "grad_norm": 1.052342150287052, + "learning_rate": 1.6756306717459804e-05, + "loss": 0.306442528963089, + "step": 2266 + }, + { + "epoch": 0.6020448811578808, + "grad_norm": 1.0462245388504205, + "learning_rate": 1.6753069017276988e-05, + "loss": 0.32714736461639404, + "step": 2267 + }, + { + "epoch": 0.6023104501394237, + "grad_norm": 1.1462408299032063, + "learning_rate": 1.6749830015182106e-05, + "loss": 0.3276352286338806, + "step": 2268 + }, + { + "epoch": 0.6025760191209667, + "grad_norm": 1.196238497855594, + "learning_rate": 1.6746589711799607e-05, + "loss": 0.3151017427444458, + "step": 2269 + }, + { + "epoch": 0.6028415881025097, + "grad_norm": 1.0342963680315473, + "learning_rate": 1.674334810775418e-05, + "loss": 0.30252715945243835, + "step": 2270 + }, + { + "epoch": 0.6031071570840526, + "grad_norm": 1.013150034994447, + "learning_rate": 1.674010520367077e-05, + "loss": 0.28994205594062805, + "step": 2271 + }, + { + "epoch": 0.6033727260655956, + "grad_norm": 1.060884408167446, + "learning_rate": 1.6736861000174566e-05, + "loss": 0.31821542978286743, + "step": 2272 + }, + { + "epoch": 0.6036382950471385, + "grad_norm": 1.0745731746159097, + "learning_rate": 1.6733615497891018e-05, + "loss": 0.33488404750823975, + "step": 2273 + }, + { + "epoch": 0.6039038640286815, + "grad_norm": 1.1687722013665731, + "learning_rate": 1.6730368697445815e-05, + "loss": 0.32545825839042664, + "step": 2274 + }, + { + "epoch": 0.6041694330102244, + "grad_norm": 1.0959659967153625, + "learning_rate": 1.6727120599464904e-05, + "loss": 0.3229105770587921, + "step": 2275 + }, + { + "epoch": 0.6044350019917674, + "grad_norm": 1.0190980223229251, + "learning_rate": 1.672387120457448e-05, + "loss": 0.29090648889541626, + "step": 2276 + }, + { + "epoch": 0.6047005709733103, + "grad_norm": 1.0135966931724694, + "learning_rate": 1.6720620513400993e-05, + "loss": 0.3102695345878601, + "step": 2277 + }, + { + "epoch": 0.6049661399548533, + "grad_norm": 0.9853472262099896, + "learning_rate": 1.6717368526571133e-05, + "loss": 0.3104533851146698, + "step": 2278 + }, + { + "epoch": 0.6052317089363962, + "grad_norm": 1.0624907138843722, + "learning_rate": 1.671411524471184e-05, + "loss": 0.3340798616409302, + "step": 2279 + }, + { + "epoch": 0.6054972779179392, + "grad_norm": 0.9362556276145145, + "learning_rate": 1.6710860668450318e-05, + "loss": 0.2807982563972473, + "step": 2280 + }, + { + "epoch": 0.6057628468994821, + "grad_norm": 1.0604829312359818, + "learning_rate": 1.6707604798414005e-05, + "loss": 0.28892064094543457, + "step": 2281 + }, + { + "epoch": 0.6060284158810251, + "grad_norm": 1.1005771261022437, + "learning_rate": 1.6704347635230594e-05, + "loss": 0.29660698771476746, + "step": 2282 + }, + { + "epoch": 0.606293984862568, + "grad_norm": 1.0826898129560842, + "learning_rate": 1.6701089179528032e-05, + "loss": 0.32079893350601196, + "step": 2283 + }, + { + "epoch": 0.606559553844111, + "grad_norm": 1.0711524337358722, + "learning_rate": 1.6697829431934508e-05, + "loss": 0.3464012145996094, + "step": 2284 + }, + { + "epoch": 0.6068251228256539, + "grad_norm": 1.113831391037599, + "learning_rate": 1.669456839307846e-05, + "loss": 0.3378494381904602, + "step": 2285 + }, + { + "epoch": 0.6070906918071969, + "grad_norm": 1.1314381443012484, + "learning_rate": 1.6691306063588583e-05, + "loss": 0.2856704294681549, + "step": 2286 + }, + { + "epoch": 0.6073562607887398, + "grad_norm": 1.117095467957477, + "learning_rate": 1.6688042444093816e-05, + "loss": 0.317970871925354, + "step": 2287 + }, + { + "epoch": 0.6076218297702828, + "grad_norm": 0.9765740214705895, + "learning_rate": 1.6684777535223338e-05, + "loss": 0.3067381978034973, + "step": 2288 + }, + { + "epoch": 0.6078873987518257, + "grad_norm": 0.9795122588790717, + "learning_rate": 1.6681511337606594e-05, + "loss": 0.28682243824005127, + "step": 2289 + }, + { + "epoch": 0.6081529677333687, + "grad_norm": 1.0967806384391572, + "learning_rate": 1.667824385187327e-05, + "loss": 0.30516478419303894, + "step": 2290 + }, + { + "epoch": 0.6084185367149118, + "grad_norm": 1.2090889717256932, + "learning_rate": 1.6674975078653284e-05, + "loss": 0.3114034831523895, + "step": 2291 + }, + { + "epoch": 0.6086841056964547, + "grad_norm": 1.045779035897072, + "learning_rate": 1.6671705018576837e-05, + "loss": 0.3119916617870331, + "step": 2292 + }, + { + "epoch": 0.6089496746779977, + "grad_norm": 1.0110290976394836, + "learning_rate": 1.666843367227434e-05, + "loss": 0.2695278823375702, + "step": 2293 + }, + { + "epoch": 0.6092152436595406, + "grad_norm": 1.1042693591067085, + "learning_rate": 1.6665161040376483e-05, + "loss": 0.32162508368492126, + "step": 2294 + }, + { + "epoch": 0.6094808126410836, + "grad_norm": 1.1533266295102853, + "learning_rate": 1.6661887123514183e-05, + "loss": 0.3115222752094269, + "step": 2295 + }, + { + "epoch": 0.6097463816226265, + "grad_norm": 1.1903173397636237, + "learning_rate": 1.6658611922318618e-05, + "loss": 0.3239362835884094, + "step": 2296 + }, + { + "epoch": 0.6100119506041695, + "grad_norm": 1.0224008240467277, + "learning_rate": 1.66553354374212e-05, + "loss": 0.29716256260871887, + "step": 2297 + }, + { + "epoch": 0.6102775195857124, + "grad_norm": 1.1579823586849616, + "learning_rate": 1.6652057669453606e-05, + "loss": 0.3337557911872864, + "step": 2298 + }, + { + "epoch": 0.6105430885672554, + "grad_norm": 1.0726602627394455, + "learning_rate": 1.6648778619047747e-05, + "loss": 0.30258649587631226, + "step": 2299 + }, + { + "epoch": 0.6108086575487983, + "grad_norm": 1.0836532202857172, + "learning_rate": 1.6645498286835784e-05, + "loss": 0.3151426315307617, + "step": 2300 + }, + { + "epoch": 0.6110742265303413, + "grad_norm": 0.9639622977001232, + "learning_rate": 1.664221667345013e-05, + "loss": 0.274954617023468, + "step": 2301 + }, + { + "epoch": 0.6113397955118842, + "grad_norm": 1.0454921478368049, + "learning_rate": 1.6638933779523437e-05, + "loss": 0.3055363893508911, + "step": 2302 + }, + { + "epoch": 0.6116053644934272, + "grad_norm": 1.0132221767482874, + "learning_rate": 1.663564960568861e-05, + "loss": 0.30296921730041504, + "step": 2303 + }, + { + "epoch": 0.6118709334749701, + "grad_norm": 1.0766188111034134, + "learning_rate": 1.66323641525788e-05, + "loss": 0.3118343651294708, + "step": 2304 + }, + { + "epoch": 0.6121365024565131, + "grad_norm": 1.164685781665666, + "learning_rate": 1.6629077420827405e-05, + "loss": 0.3277447819709778, + "step": 2305 + }, + { + "epoch": 0.612402071438056, + "grad_norm": 1.11996036014055, + "learning_rate": 1.6625789411068063e-05, + "loss": 0.307643860578537, + "step": 2306 + }, + { + "epoch": 0.612667640419599, + "grad_norm": 1.0752891079202938, + "learning_rate": 1.6622500123934665e-05, + "loss": 0.3043777346611023, + "step": 2307 + }, + { + "epoch": 0.6129332094011419, + "grad_norm": 1.1229566611504027, + "learning_rate": 1.6619209560061352e-05, + "loss": 0.28634852170944214, + "step": 2308 + }, + { + "epoch": 0.6131987783826849, + "grad_norm": 1.1746890844036781, + "learning_rate": 1.6615917720082503e-05, + "loss": 0.33200016617774963, + "step": 2309 + }, + { + "epoch": 0.6134643473642278, + "grad_norm": 1.0620493011215435, + "learning_rate": 1.661262460463274e-05, + "loss": 0.26568055152893066, + "step": 2310 + }, + { + "epoch": 0.6137299163457708, + "grad_norm": 1.0408157138123326, + "learning_rate": 1.6609330214346945e-05, + "loss": 0.2772855758666992, + "step": 2311 + }, + { + "epoch": 0.6139954853273137, + "grad_norm": 1.2060076126932109, + "learning_rate": 1.6606034549860236e-05, + "loss": 0.3330409824848175, + "step": 2312 + }, + { + "epoch": 0.6142610543088567, + "grad_norm": 1.0235644562455184, + "learning_rate": 1.6602737611807975e-05, + "loss": 0.27702978253364563, + "step": 2313 + }, + { + "epoch": 0.6145266232903996, + "grad_norm": 1.1266755606893777, + "learning_rate": 1.6599439400825775e-05, + "loss": 0.29985183477401733, + "step": 2314 + }, + { + "epoch": 0.6147921922719426, + "grad_norm": 1.0266522277907775, + "learning_rate": 1.659613991754949e-05, + "loss": 0.2666100859642029, + "step": 2315 + }, + { + "epoch": 0.6150577612534855, + "grad_norm": 1.0676553477298287, + "learning_rate": 1.6592839162615223e-05, + "loss": 0.2968613803386688, + "step": 2316 + }, + { + "epoch": 0.6153233302350285, + "grad_norm": 1.26155090118547, + "learning_rate": 1.6589537136659326e-05, + "loss": 0.2693714499473572, + "step": 2317 + }, + { + "epoch": 0.6155888992165715, + "grad_norm": 1.1411779960646509, + "learning_rate": 1.658623384031838e-05, + "loss": 0.3192713260650635, + "step": 2318 + }, + { + "epoch": 0.6158544681981145, + "grad_norm": 1.099028639770974, + "learning_rate": 1.658292927422923e-05, + "loss": 0.2958469092845917, + "step": 2319 + }, + { + "epoch": 0.6161200371796575, + "grad_norm": 1.0613129939040433, + "learning_rate": 1.657962343902895e-05, + "loss": 0.28580743074417114, + "step": 2320 + }, + { + "epoch": 0.6163856061612004, + "grad_norm": 1.2105545865052383, + "learning_rate": 1.6576316335354875e-05, + "loss": 0.34325680136680603, + "step": 2321 + }, + { + "epoch": 0.6166511751427434, + "grad_norm": 1.076014963599046, + "learning_rate": 1.657300796384457e-05, + "loss": 0.3220894932746887, + "step": 2322 + }, + { + "epoch": 0.6169167441242863, + "grad_norm": 1.003861259990267, + "learning_rate": 1.656969832513585e-05, + "loss": 0.2934642434120178, + "step": 2323 + }, + { + "epoch": 0.6171823131058293, + "grad_norm": 1.0182182491222724, + "learning_rate": 1.656638741986677e-05, + "loss": 0.3066999912261963, + "step": 2324 + }, + { + "epoch": 0.6174478820873722, + "grad_norm": 1.0780285957414313, + "learning_rate": 1.6563075248675645e-05, + "loss": 0.2947896122932434, + "step": 2325 + }, + { + "epoch": 0.6177134510689152, + "grad_norm": 1.1567241875430703, + "learning_rate": 1.6559761812201018e-05, + "loss": 0.33616161346435547, + "step": 2326 + }, + { + "epoch": 0.6179790200504581, + "grad_norm": 1.0754490235924812, + "learning_rate": 1.6556447111081678e-05, + "loss": 0.29555875062942505, + "step": 2327 + }, + { + "epoch": 0.6182445890320011, + "grad_norm": 1.0070791342344025, + "learning_rate": 1.655313114595666e-05, + "loss": 0.276498019695282, + "step": 2328 + }, + { + "epoch": 0.618510158013544, + "grad_norm": 1.0894248364537533, + "learning_rate": 1.6549813917465242e-05, + "loss": 0.3081165552139282, + "step": 2329 + }, + { + "epoch": 0.618775726995087, + "grad_norm": 1.2153046006588315, + "learning_rate": 1.654649542624695e-05, + "loss": 0.3610053062438965, + "step": 2330 + }, + { + "epoch": 0.6190412959766299, + "grad_norm": 1.0676492266011808, + "learning_rate": 1.654317567294155e-05, + "loss": 0.2775106430053711, + "step": 2331 + }, + { + "epoch": 0.6193068649581729, + "grad_norm": 4.371469554540211, + "learning_rate": 1.653985465818905e-05, + "loss": 0.2915893793106079, + "step": 2332 + }, + { + "epoch": 0.6195724339397158, + "grad_norm": 1.0032536414224313, + "learning_rate": 1.6536532382629696e-05, + "loss": 0.30868977308273315, + "step": 2333 + }, + { + "epoch": 0.6198380029212588, + "grad_norm": 1.1011191125099704, + "learning_rate": 1.6533208846903996e-05, + "loss": 0.3083038330078125, + "step": 2334 + }, + { + "epoch": 0.6201035719028017, + "grad_norm": 0.9895882037041855, + "learning_rate": 1.652988405165268e-05, + "loss": 0.25192466378211975, + "step": 2335 + }, + { + "epoch": 0.6203691408843447, + "grad_norm": 1.1020677364796136, + "learning_rate": 1.6526557997516737e-05, + "loss": 0.32154130935668945, + "step": 2336 + }, + { + "epoch": 0.6206347098658876, + "grad_norm": 1.1174587266065723, + "learning_rate": 1.6523230685137382e-05, + "loss": 0.2860945165157318, + "step": 2337 + }, + { + "epoch": 0.6209002788474306, + "grad_norm": 1.1647384960602913, + "learning_rate": 1.6519902115156084e-05, + "loss": 0.3279789984226227, + "step": 2338 + }, + { + "epoch": 0.6211658478289735, + "grad_norm": 1.062678685453679, + "learning_rate": 1.6516572288214555e-05, + "loss": 0.3082200884819031, + "step": 2339 + }, + { + "epoch": 0.6214314168105165, + "grad_norm": 1.1253285275737313, + "learning_rate": 1.6513241204954745e-05, + "loss": 0.29032304883003235, + "step": 2340 + }, + { + "epoch": 0.6216969857920595, + "grad_norm": 1.004918906125766, + "learning_rate": 1.6509908866018843e-05, + "loss": 0.3096848130226135, + "step": 2341 + }, + { + "epoch": 0.6219625547736024, + "grad_norm": 1.021047856460921, + "learning_rate": 1.6506575272049294e-05, + "loss": 0.309989333152771, + "step": 2342 + }, + { + "epoch": 0.6222281237551454, + "grad_norm": 1.119097166323709, + "learning_rate": 1.6503240423688768e-05, + "loss": 0.311350554227829, + "step": 2343 + }, + { + "epoch": 0.6224936927366883, + "grad_norm": 1.0659510240862446, + "learning_rate": 1.6499904321580187e-05, + "loss": 0.3313952386379242, + "step": 2344 + }, + { + "epoch": 0.6227592617182313, + "grad_norm": 1.0702797293760455, + "learning_rate": 1.649656696636671e-05, + "loss": 0.2984781265258789, + "step": 2345 + }, + { + "epoch": 0.6230248306997742, + "grad_norm": 1.0312282361562104, + "learning_rate": 1.6493228358691748e-05, + "loss": 0.3058238625526428, + "step": 2346 + }, + { + "epoch": 0.6232903996813173, + "grad_norm": 1.0462474005488736, + "learning_rate": 1.6489888499198935e-05, + "loss": 0.33439138531684875, + "step": 2347 + }, + { + "epoch": 0.6235559686628602, + "grad_norm": 1.0386002000588619, + "learning_rate": 1.6486547388532157e-05, + "loss": 0.2883133292198181, + "step": 2348 + }, + { + "epoch": 0.6238215376444032, + "grad_norm": 0.9997410916606129, + "learning_rate": 1.648320502733555e-05, + "loss": 0.30258435010910034, + "step": 2349 + }, + { + "epoch": 0.6240871066259461, + "grad_norm": 1.0226158069339855, + "learning_rate": 1.6479861416253476e-05, + "loss": 0.316353440284729, + "step": 2350 + }, + { + "epoch": 0.6243526756074891, + "grad_norm": 1.0638089423798769, + "learning_rate": 1.647651655593054e-05, + "loss": 0.3230556547641754, + "step": 2351 + }, + { + "epoch": 0.624618244589032, + "grad_norm": 1.2043111611037318, + "learning_rate": 1.6473170447011593e-05, + "loss": 0.3327128291130066, + "step": 2352 + }, + { + "epoch": 0.624883813570575, + "grad_norm": 1.081123131766037, + "learning_rate": 1.6469823090141733e-05, + "loss": 0.3152993619441986, + "step": 2353 + }, + { + "epoch": 0.6251493825521179, + "grad_norm": 1.0655193061859811, + "learning_rate": 1.6466474485966286e-05, + "loss": 0.26792511343955994, + "step": 2354 + }, + { + "epoch": 0.6254149515336609, + "grad_norm": 1.121022507517606, + "learning_rate": 1.6463124635130824e-05, + "loss": 0.31665652990341187, + "step": 2355 + }, + { + "epoch": 0.6256805205152038, + "grad_norm": 1.0108098757868682, + "learning_rate": 1.645977353828115e-05, + "loss": 0.29573655128479004, + "step": 2356 + }, + { + "epoch": 0.6259460894967468, + "grad_norm": 1.0973823257435635, + "learning_rate": 1.6456421196063334e-05, + "loss": 0.3210436999797821, + "step": 2357 + }, + { + "epoch": 0.6262116584782897, + "grad_norm": 1.2424369194288305, + "learning_rate": 1.6453067609123656e-05, + "loss": 0.2837316691875458, + "step": 2358 + }, + { + "epoch": 0.6264772274598327, + "grad_norm": 1.0217734190114693, + "learning_rate": 1.6449712778108645e-05, + "loss": 0.2885812520980835, + "step": 2359 + }, + { + "epoch": 0.6267427964413756, + "grad_norm": 1.1369177274860889, + "learning_rate": 1.6446356703665078e-05, + "loss": 0.34908249974250793, + "step": 2360 + }, + { + "epoch": 0.6270083654229186, + "grad_norm": 0.9942151080492051, + "learning_rate": 1.6442999386439967e-05, + "loss": 0.30398470163345337, + "step": 2361 + }, + { + "epoch": 0.6272739344044616, + "grad_norm": 0.9838105681310805, + "learning_rate": 1.6439640827080565e-05, + "loss": 0.2780487537384033, + "step": 2362 + }, + { + "epoch": 0.6275395033860045, + "grad_norm": 0.956534505955689, + "learning_rate": 1.6436281026234357e-05, + "loss": 0.2575770616531372, + "step": 2363 + }, + { + "epoch": 0.6278050723675475, + "grad_norm": 0.9675911826739493, + "learning_rate": 1.6432919984549077e-05, + "loss": 0.2888547480106354, + "step": 2364 + }, + { + "epoch": 0.6280706413490904, + "grad_norm": 1.2303845977564731, + "learning_rate": 1.6429557702672694e-05, + "loss": 0.3259009122848511, + "step": 2365 + }, + { + "epoch": 0.6283362103306334, + "grad_norm": 1.3923197622537806, + "learning_rate": 1.6426194181253415e-05, + "loss": 0.2899959683418274, + "step": 2366 + }, + { + "epoch": 0.6286017793121763, + "grad_norm": 1.058685915432802, + "learning_rate": 1.6422829420939688e-05, + "loss": 0.28471851348876953, + "step": 2367 + }, + { + "epoch": 0.6288673482937193, + "grad_norm": 1.0822140266216713, + "learning_rate": 1.64194634223802e-05, + "loss": 0.2958947420120239, + "step": 2368 + }, + { + "epoch": 0.6291329172752622, + "grad_norm": 1.1251439755337522, + "learning_rate": 1.6416096186223872e-05, + "loss": 0.3089750111103058, + "step": 2369 + }, + { + "epoch": 0.6293984862568052, + "grad_norm": 1.0517657351777636, + "learning_rate": 1.641272771311987e-05, + "loss": 0.31597089767456055, + "step": 2370 + }, + { + "epoch": 0.6296640552383481, + "grad_norm": 1.237586073778816, + "learning_rate": 1.6409358003717598e-05, + "loss": 0.2968488931655884, + "step": 2371 + }, + { + "epoch": 0.6299296242198911, + "grad_norm": 1.0062603647307793, + "learning_rate": 1.6405987058666694e-05, + "loss": 0.27532660961151123, + "step": 2372 + }, + { + "epoch": 0.630195193201434, + "grad_norm": 1.0061271713511417, + "learning_rate": 1.6402614878617037e-05, + "loss": 0.2800731956958771, + "step": 2373 + }, + { + "epoch": 0.630460762182977, + "grad_norm": 1.0867786948587836, + "learning_rate": 1.6399241464218744e-05, + "loss": 0.31728652119636536, + "step": 2374 + }, + { + "epoch": 0.63072633116452, + "grad_norm": 1.0634834793994077, + "learning_rate": 1.6395866816122167e-05, + "loss": 0.2776367664337158, + "step": 2375 + }, + { + "epoch": 0.630991900146063, + "grad_norm": 1.2696308030410766, + "learning_rate": 1.63924909349779e-05, + "loss": 0.3308418095111847, + "step": 2376 + }, + { + "epoch": 0.6312574691276059, + "grad_norm": 1.027144235831433, + "learning_rate": 1.6389113821436775e-05, + "loss": 0.31589487195014954, + "step": 2377 + }, + { + "epoch": 0.6315230381091489, + "grad_norm": 0.9983142729953255, + "learning_rate": 1.6385735476149855e-05, + "loss": 0.27181899547576904, + "step": 2378 + }, + { + "epoch": 0.6317886070906918, + "grad_norm": 1.0656862561919935, + "learning_rate": 1.638235589976845e-05, + "loss": 0.2603747546672821, + "step": 2379 + }, + { + "epoch": 0.6320541760722348, + "grad_norm": 1.0543823342651422, + "learning_rate": 1.63789750929441e-05, + "loss": 0.29050707817077637, + "step": 2380 + }, + { + "epoch": 0.6323197450537777, + "grad_norm": 1.0310549396867945, + "learning_rate": 1.6375593056328586e-05, + "loss": 0.2979413866996765, + "step": 2381 + }, + { + "epoch": 0.6325853140353207, + "grad_norm": 1.0460005843129836, + "learning_rate": 1.6372209790573926e-05, + "loss": 0.30875420570373535, + "step": 2382 + }, + { + "epoch": 0.6328508830168637, + "grad_norm": 0.9698416111844145, + "learning_rate": 1.6368825296332366e-05, + "loss": 0.2755935788154602, + "step": 2383 + }, + { + "epoch": 0.6331164519984066, + "grad_norm": 1.1336778567410772, + "learning_rate": 1.6365439574256406e-05, + "loss": 0.3459136486053467, + "step": 2384 + }, + { + "epoch": 0.6333820209799496, + "grad_norm": 1.116018329054477, + "learning_rate": 1.6362052624998767e-05, + "loss": 0.29043829441070557, + "step": 2385 + }, + { + "epoch": 0.6336475899614925, + "grad_norm": 1.123039696178655, + "learning_rate": 1.635866444921242e-05, + "loss": 0.321551114320755, + "step": 2386 + }, + { + "epoch": 0.6339131589430355, + "grad_norm": 1.0451682936950502, + "learning_rate": 1.6355275047550553e-05, + "loss": 0.28478139638900757, + "step": 2387 + }, + { + "epoch": 0.6341787279245784, + "grad_norm": 1.060617338056141, + "learning_rate": 1.6351884420666616e-05, + "loss": 0.30913087725639343, + "step": 2388 + }, + { + "epoch": 0.6344442969061214, + "grad_norm": 1.0996519301974148, + "learning_rate": 1.6348492569214275e-05, + "loss": 0.328342467546463, + "step": 2389 + }, + { + "epoch": 0.6347098658876643, + "grad_norm": 1.0657562962668374, + "learning_rate": 1.634509949384744e-05, + "loss": 0.3291119933128357, + "step": 2390 + }, + { + "epoch": 0.6349754348692073, + "grad_norm": 1.0805286951038287, + "learning_rate": 1.6341705195220257e-05, + "loss": 0.3542378544807434, + "step": 2391 + }, + { + "epoch": 0.6352410038507502, + "grad_norm": 1.1387422668526126, + "learning_rate": 1.63383096739871e-05, + "loss": 0.3167935609817505, + "step": 2392 + }, + { + "epoch": 0.6355065728322932, + "grad_norm": 0.9614211236141011, + "learning_rate": 1.63349129308026e-05, + "loss": 0.27623263001441956, + "step": 2393 + }, + { + "epoch": 0.6357721418138361, + "grad_norm": 1.1351525352268206, + "learning_rate": 1.6331514966321596e-05, + "loss": 0.3615761399269104, + "step": 2394 + }, + { + "epoch": 0.6360377107953791, + "grad_norm": 1.1430561223010627, + "learning_rate": 1.632811578119918e-05, + "loss": 0.3503292500972748, + "step": 2395 + }, + { + "epoch": 0.636303279776922, + "grad_norm": 1.0400637290516392, + "learning_rate": 1.6324715376090673e-05, + "loss": 0.2994767129421234, + "step": 2396 + }, + { + "epoch": 0.636568848758465, + "grad_norm": 1.2836743734514182, + "learning_rate": 1.6321313751651638e-05, + "loss": 0.29903143644332886, + "step": 2397 + }, + { + "epoch": 0.6368344177400079, + "grad_norm": 1.0273086079776361, + "learning_rate": 1.6317910908537865e-05, + "loss": 0.310536652803421, + "step": 2398 + }, + { + "epoch": 0.6370999867215509, + "grad_norm": 1.2820707601171073, + "learning_rate": 1.6314506847405382e-05, + "loss": 0.32584354281425476, + "step": 2399 + }, + { + "epoch": 0.6373655557030938, + "grad_norm": 1.186095937719991, + "learning_rate": 1.6311101568910448e-05, + "loss": 0.3536352217197418, + "step": 2400 + }, + { + "epoch": 0.6376311246846368, + "grad_norm": 1.0361661707144088, + "learning_rate": 1.6307695073709565e-05, + "loss": 0.3198434114456177, + "step": 2401 + }, + { + "epoch": 0.6378966936661797, + "grad_norm": 0.8809138916670839, + "learning_rate": 1.6304287362459462e-05, + "loss": 0.264182448387146, + "step": 2402 + }, + { + "epoch": 0.6381622626477228, + "grad_norm": 1.0526335869529386, + "learning_rate": 1.6300878435817115e-05, + "loss": 0.31182044744491577, + "step": 2403 + }, + { + "epoch": 0.6384278316292658, + "grad_norm": 1.0495886453587215, + "learning_rate": 1.6297468294439708e-05, + "loss": 0.28221404552459717, + "step": 2404 + }, + { + "epoch": 0.6386934006108087, + "grad_norm": 1.0211141314743026, + "learning_rate": 1.6294056938984693e-05, + "loss": 0.27788785099983215, + "step": 2405 + }, + { + "epoch": 0.6389589695923517, + "grad_norm": 1.068610455564362, + "learning_rate": 1.6290644370109728e-05, + "loss": 0.3300796151161194, + "step": 2406 + }, + { + "epoch": 0.6392245385738946, + "grad_norm": 1.0949996094795582, + "learning_rate": 1.628723058847272e-05, + "loss": 0.32170963287353516, + "step": 2407 + }, + { + "epoch": 0.6394901075554376, + "grad_norm": 1.1320309851276869, + "learning_rate": 1.628381559473181e-05, + "loss": 0.3243589997291565, + "step": 2408 + }, + { + "epoch": 0.6397556765369805, + "grad_norm": 1.4458945786524546, + "learning_rate": 1.6280399389545358e-05, + "loss": 0.311046838760376, + "step": 2409 + }, + { + "epoch": 0.6400212455185235, + "grad_norm": 1.0237689913585555, + "learning_rate": 1.6276981973571973e-05, + "loss": 0.2642543911933899, + "step": 2410 + }, + { + "epoch": 0.6402868145000664, + "grad_norm": 1.1424399755044237, + "learning_rate": 1.62735633474705e-05, + "loss": 0.3593730926513672, + "step": 2411 + }, + { + "epoch": 0.6405523834816094, + "grad_norm": 1.1145611429504636, + "learning_rate": 1.62701435119e-05, + "loss": 0.3147425353527069, + "step": 2412 + }, + { + "epoch": 0.6408179524631523, + "grad_norm": 1.1400749315540035, + "learning_rate": 1.6266722467519783e-05, + "loss": 0.32639142870903015, + "step": 2413 + }, + { + "epoch": 0.6410835214446953, + "grad_norm": 1.1011849489387644, + "learning_rate": 1.626330021498938e-05, + "loss": 0.32113659381866455, + "step": 2414 + }, + { + "epoch": 0.6413490904262382, + "grad_norm": 1.0371621680767618, + "learning_rate": 1.6259876754968568e-05, + "loss": 0.3188290297985077, + "step": 2415 + }, + { + "epoch": 0.6416146594077812, + "grad_norm": 1.076893351246201, + "learning_rate": 1.625645208811734e-05, + "loss": 0.3145543932914734, + "step": 2416 + }, + { + "epoch": 0.6418802283893241, + "grad_norm": 1.1368093372185335, + "learning_rate": 1.6253026215095943e-05, + "loss": 0.30433323979377747, + "step": 2417 + }, + { + "epoch": 0.6421457973708671, + "grad_norm": 1.1042321396184265, + "learning_rate": 1.6249599136564837e-05, + "loss": 0.30946728587150574, + "step": 2418 + }, + { + "epoch": 0.64241136635241, + "grad_norm": 0.991248414026241, + "learning_rate": 1.6246170853184726e-05, + "loss": 0.26245906949043274, + "step": 2419 + }, + { + "epoch": 0.642676935333953, + "grad_norm": 1.1213671588278835, + "learning_rate": 1.624274136561654e-05, + "loss": 0.31468862295150757, + "step": 2420 + }, + { + "epoch": 0.6429425043154959, + "grad_norm": 1.0200744973975597, + "learning_rate": 1.6239310674521443e-05, + "loss": 0.28946155309677124, + "step": 2421 + }, + { + "epoch": 0.6432080732970389, + "grad_norm": 1.1088143851501708, + "learning_rate": 1.6235878780560835e-05, + "loss": 0.26272106170654297, + "step": 2422 + }, + { + "epoch": 0.6434736422785818, + "grad_norm": 1.1185700160494145, + "learning_rate": 1.6232445684396347e-05, + "loss": 0.3094574213027954, + "step": 2423 + }, + { + "epoch": 0.6437392112601248, + "grad_norm": 0.9377280048944331, + "learning_rate": 1.6229011386689832e-05, + "loss": 0.2503833770751953, + "step": 2424 + }, + { + "epoch": 0.6440047802416677, + "grad_norm": 0.9657663244207705, + "learning_rate": 1.6225575888103387e-05, + "loss": 0.2655009627342224, + "step": 2425 + }, + { + "epoch": 0.6442703492232107, + "grad_norm": 1.123117061290067, + "learning_rate": 1.6222139189299336e-05, + "loss": 0.2819611728191376, + "step": 2426 + }, + { + "epoch": 0.6445359182047536, + "grad_norm": 1.0859641118248262, + "learning_rate": 1.6218701290940232e-05, + "loss": 0.2956068217754364, + "step": 2427 + }, + { + "epoch": 0.6448014871862966, + "grad_norm": 1.2445728810553593, + "learning_rate": 1.6215262193688862e-05, + "loss": 0.3330997824668884, + "step": 2428 + }, + { + "epoch": 0.6450670561678395, + "grad_norm": 1.0073602881165937, + "learning_rate": 1.6211821898208242e-05, + "loss": 0.25897055864334106, + "step": 2429 + }, + { + "epoch": 0.6453326251493825, + "grad_norm": 1.1228221759016932, + "learning_rate": 1.6208380405161623e-05, + "loss": 0.3119947016239166, + "step": 2430 + }, + { + "epoch": 0.6455981941309256, + "grad_norm": 1.143631742936843, + "learning_rate": 1.6204937715212482e-05, + "loss": 0.30833956599235535, + "step": 2431 + }, + { + "epoch": 0.6458637631124685, + "grad_norm": 1.1584271404994573, + "learning_rate": 1.620149382902453e-05, + "loss": 0.2935214638710022, + "step": 2432 + }, + { + "epoch": 0.6461293320940115, + "grad_norm": 1.6063755788258844, + "learning_rate": 1.619804874726171e-05, + "loss": 0.24297356605529785, + "step": 2433 + }, + { + "epoch": 0.6463949010755544, + "grad_norm": 1.14218339304969, + "learning_rate": 1.6194602470588186e-05, + "loss": 0.319774866104126, + "step": 2434 + }, + { + "epoch": 0.6466604700570974, + "grad_norm": 1.1751618225153557, + "learning_rate": 1.6191154999668368e-05, + "loss": 0.29197463393211365, + "step": 2435 + }, + { + "epoch": 0.6469260390386403, + "grad_norm": 1.1008916130088804, + "learning_rate": 1.6187706335166882e-05, + "loss": 0.2939727306365967, + "step": 2436 + }, + { + "epoch": 0.6471916080201833, + "grad_norm": 1.0935449463761302, + "learning_rate": 1.6184256477748595e-05, + "loss": 0.2941162586212158, + "step": 2437 + }, + { + "epoch": 0.6474571770017262, + "grad_norm": 1.1336931987797143, + "learning_rate": 1.6180805428078593e-05, + "loss": 0.2823144197463989, + "step": 2438 + }, + { + "epoch": 0.6477227459832692, + "grad_norm": 1.0912252779984561, + "learning_rate": 1.61773531868222e-05, + "loss": 0.30048274993896484, + "step": 2439 + }, + { + "epoch": 0.6479883149648121, + "grad_norm": 1.183044095349839, + "learning_rate": 1.617389975464497e-05, + "loss": 0.30927354097366333, + "step": 2440 + }, + { + "epoch": 0.6482538839463551, + "grad_norm": 1.166570736507726, + "learning_rate": 1.6170445132212678e-05, + "loss": 0.34835004806518555, + "step": 2441 + }, + { + "epoch": 0.648519452927898, + "grad_norm": 1.0325781129961564, + "learning_rate": 1.616698932019134e-05, + "loss": 0.2890225648880005, + "step": 2442 + }, + { + "epoch": 0.648785021909441, + "grad_norm": 1.1182329319338478, + "learning_rate": 1.6163532319247195e-05, + "loss": 0.31410521268844604, + "step": 2443 + }, + { + "epoch": 0.6490505908909839, + "grad_norm": 0.9213656240638256, + "learning_rate": 1.616007413004671e-05, + "loss": 0.267375111579895, + "step": 2444 + }, + { + "epoch": 0.6493161598725269, + "grad_norm": 1.1587177777274813, + "learning_rate": 1.6156614753256583e-05, + "loss": 0.3300023376941681, + "step": 2445 + }, + { + "epoch": 0.6495817288540698, + "grad_norm": 1.0295072511714587, + "learning_rate": 1.615315418954374e-05, + "loss": 0.2822847366333008, + "step": 2446 + }, + { + "epoch": 0.6498472978356128, + "grad_norm": 1.1626615137060834, + "learning_rate": 1.6149692439575348e-05, + "loss": 0.3093401789665222, + "step": 2447 + }, + { + "epoch": 0.6501128668171557, + "grad_norm": 1.0475923101386018, + "learning_rate": 1.6146229504018777e-05, + "loss": 0.2892506718635559, + "step": 2448 + }, + { + "epoch": 0.6503784357986987, + "grad_norm": 0.9972012319936079, + "learning_rate": 1.6142765383541643e-05, + "loss": 0.2805558741092682, + "step": 2449 + }, + { + "epoch": 0.6506440047802416, + "grad_norm": 1.0535842654025462, + "learning_rate": 1.6139300078811794e-05, + "loss": 0.29852935671806335, + "step": 2450 + }, + { + "epoch": 0.6509095737617846, + "grad_norm": 1.193949473615032, + "learning_rate": 1.6135833590497295e-05, + "loss": 0.3567991256713867, + "step": 2451 + }, + { + "epoch": 0.6511751427433276, + "grad_norm": 1.1265709697559396, + "learning_rate": 1.6132365919266442e-05, + "loss": 0.29564782977104187, + "step": 2452 + }, + { + "epoch": 0.6514407117248705, + "grad_norm": 1.011180050217134, + "learning_rate": 1.612889706578777e-05, + "loss": 0.30027297139167786, + "step": 2453 + }, + { + "epoch": 0.6517062807064135, + "grad_norm": 1.0908136110597069, + "learning_rate": 1.6125427030730027e-05, + "loss": 0.3318096697330475, + "step": 2454 + }, + { + "epoch": 0.6519718496879564, + "grad_norm": 1.0728958387824694, + "learning_rate": 1.612195581476219e-05, + "loss": 0.30962997674942017, + "step": 2455 + }, + { + "epoch": 0.6522374186694994, + "grad_norm": 1.2969539714019946, + "learning_rate": 1.6118483418553476e-05, + "loss": 0.3152836859226227, + "step": 2456 + }, + { + "epoch": 0.6525029876510423, + "grad_norm": 1.0160215490589632, + "learning_rate": 1.6115009842773322e-05, + "loss": 0.26117920875549316, + "step": 2457 + }, + { + "epoch": 0.6527685566325853, + "grad_norm": 0.9780826840488046, + "learning_rate": 1.6111535088091388e-05, + "loss": 0.2705717384815216, + "step": 2458 + }, + { + "epoch": 0.6530341256141283, + "grad_norm": 1.112935626593024, + "learning_rate": 1.6108059155177568e-05, + "loss": 0.3281205892562866, + "step": 2459 + }, + { + "epoch": 0.6532996945956713, + "grad_norm": 1.0805050021999307, + "learning_rate": 1.6104582044701983e-05, + "loss": 0.3300125002861023, + "step": 2460 + }, + { + "epoch": 0.6535652635772142, + "grad_norm": 1.0596352955938992, + "learning_rate": 1.6101103757334973e-05, + "loss": 0.29286977648735046, + "step": 2461 + }, + { + "epoch": 0.6538308325587572, + "grad_norm": 1.114611766363321, + "learning_rate": 1.6097624293747115e-05, + "loss": 0.2920498847961426, + "step": 2462 + }, + { + "epoch": 0.6540964015403001, + "grad_norm": 1.0455118881549736, + "learning_rate": 1.609414365460921e-05, + "loss": 0.31018689274787903, + "step": 2463 + }, + { + "epoch": 0.6543619705218431, + "grad_norm": 1.0028130278859915, + "learning_rate": 1.609066184059228e-05, + "loss": 0.26806512475013733, + "step": 2464 + }, + { + "epoch": 0.654627539503386, + "grad_norm": 1.0385768164913443, + "learning_rate": 1.608717885236758e-05, + "loss": 0.29770639538764954, + "step": 2465 + }, + { + "epoch": 0.654893108484929, + "grad_norm": 1.0811683391440958, + "learning_rate": 1.6083694690606592e-05, + "loss": 0.36161965131759644, + "step": 2466 + }, + { + "epoch": 0.6551586774664719, + "grad_norm": 1.1455214370068598, + "learning_rate": 1.6080209355981016e-05, + "loss": 0.36114081740379333, + "step": 2467 + }, + { + "epoch": 0.6554242464480149, + "grad_norm": 0.9911085328884063, + "learning_rate": 1.6076722849162786e-05, + "loss": 0.28924882411956787, + "step": 2468 + }, + { + "epoch": 0.6556898154295578, + "grad_norm": 1.1198872767040324, + "learning_rate": 1.6073235170824058e-05, + "loss": 0.3088049292564392, + "step": 2469 + }, + { + "epoch": 0.6559553844111008, + "grad_norm": 1.062389027957873, + "learning_rate": 1.6069746321637216e-05, + "loss": 0.2684907615184784, + "step": 2470 + }, + { + "epoch": 0.6562209533926437, + "grad_norm": 0.9850175058697045, + "learning_rate": 1.6066256302274873e-05, + "loss": 0.2674641013145447, + "step": 2471 + }, + { + "epoch": 0.6564865223741867, + "grad_norm": 1.0658104164235327, + "learning_rate": 1.6062765113409854e-05, + "loss": 0.2865106165409088, + "step": 2472 + }, + { + "epoch": 0.6567520913557297, + "grad_norm": 1.1117203943537428, + "learning_rate": 1.605927275571523e-05, + "loss": 0.33163607120513916, + "step": 2473 + }, + { + "epoch": 0.6570176603372726, + "grad_norm": 1.1177244627769223, + "learning_rate": 1.6055779229864276e-05, + "loss": 0.32725927233695984, + "step": 2474 + }, + { + "epoch": 0.6572832293188156, + "grad_norm": 1.171322314473831, + "learning_rate": 1.605228453653051e-05, + "loss": 0.31537747383117676, + "step": 2475 + }, + { + "epoch": 0.6575487983003585, + "grad_norm": 1.0855461390356589, + "learning_rate": 1.604878867638767e-05, + "loss": 0.29331761598587036, + "step": 2476 + }, + { + "epoch": 0.6578143672819015, + "grad_norm": 1.0342424424241736, + "learning_rate": 1.6045291650109706e-05, + "loss": 0.315193772315979, + "step": 2477 + }, + { + "epoch": 0.6580799362634444, + "grad_norm": 1.2286540067411784, + "learning_rate": 1.6041793458370812e-05, + "loss": 0.3595796227455139, + "step": 2478 + }, + { + "epoch": 0.6583455052449874, + "grad_norm": 1.0251892797499218, + "learning_rate": 1.6038294101845394e-05, + "loss": 0.3069949150085449, + "step": 2479 + }, + { + "epoch": 0.6586110742265303, + "grad_norm": 1.1576253586981062, + "learning_rate": 1.603479358120809e-05, + "loss": 0.3154812455177307, + "step": 2480 + }, + { + "epoch": 0.6588766432080733, + "grad_norm": 1.1008921076459075, + "learning_rate": 1.6031291897133756e-05, + "loss": 0.3005039691925049, + "step": 2481 + }, + { + "epoch": 0.6591422121896162, + "grad_norm": 1.1463594149599334, + "learning_rate": 1.6027789050297476e-05, + "loss": 0.2885095775127411, + "step": 2482 + }, + { + "epoch": 0.6594077811711592, + "grad_norm": 1.002066881102099, + "learning_rate": 1.602428504137456e-05, + "loss": 0.291950523853302, + "step": 2483 + }, + { + "epoch": 0.6596733501527021, + "grad_norm": 1.0919380790727968, + "learning_rate": 1.6020779871040538e-05, + "loss": 0.31630760431289673, + "step": 2484 + }, + { + "epoch": 0.6599389191342451, + "grad_norm": 1.0827567425634856, + "learning_rate": 1.6017273539971167e-05, + "loss": 0.29767507314682007, + "step": 2485 + }, + { + "epoch": 0.660204488115788, + "grad_norm": 1.036820980968177, + "learning_rate": 1.601376604884242e-05, + "loss": 0.2882775664329529, + "step": 2486 + }, + { + "epoch": 0.6604700570973311, + "grad_norm": 1.0885135950320362, + "learning_rate": 1.601025739833051e-05, + "loss": 0.325736403465271, + "step": 2487 + }, + { + "epoch": 0.660735626078874, + "grad_norm": 1.048580856774253, + "learning_rate": 1.6006747589111854e-05, + "loss": 0.3007255792617798, + "step": 2488 + }, + { + "epoch": 0.661001195060417, + "grad_norm": 1.146836506523448, + "learning_rate": 1.6003236621863107e-05, + "loss": 0.33199968934059143, + "step": 2489 + }, + { + "epoch": 0.6612667640419599, + "grad_norm": 1.1430196866694278, + "learning_rate": 1.5999724497261138e-05, + "loss": 0.3784569799900055, + "step": 2490 + }, + { + "epoch": 0.6615323330235029, + "grad_norm": 1.0506667031587968, + "learning_rate": 1.5996211215983052e-05, + "loss": 0.28146931529045105, + "step": 2491 + }, + { + "epoch": 0.6617979020050458, + "grad_norm": 1.0621415260673002, + "learning_rate": 1.599269677870616e-05, + "loss": 0.32187730073928833, + "step": 2492 + }, + { + "epoch": 0.6620634709865888, + "grad_norm": 1.0631524880676668, + "learning_rate": 1.5989181186108003e-05, + "loss": 0.3021823465824127, + "step": 2493 + }, + { + "epoch": 0.6623290399681317, + "grad_norm": 1.0248198480240434, + "learning_rate": 1.5985664438866354e-05, + "loss": 0.3309648334980011, + "step": 2494 + }, + { + "epoch": 0.6625946089496747, + "grad_norm": 1.0183038789118495, + "learning_rate": 1.598214653765919e-05, + "loss": 0.2939694821834564, + "step": 2495 + }, + { + "epoch": 0.6628601779312177, + "grad_norm": 1.0091208408649601, + "learning_rate": 1.597862748316473e-05, + "loss": 0.31219810247421265, + "step": 2496 + }, + { + "epoch": 0.6631257469127606, + "grad_norm": 1.3669850946739606, + "learning_rate": 1.5975107276061405e-05, + "loss": 0.29435622692108154, + "step": 2497 + }, + { + "epoch": 0.6633913158943036, + "grad_norm": 1.0359724885535866, + "learning_rate": 1.5971585917027864e-05, + "loss": 0.27167004346847534, + "step": 2498 + }, + { + "epoch": 0.6636568848758465, + "grad_norm": 1.121619558624798, + "learning_rate": 1.5968063406742988e-05, + "loss": 0.3360658884048462, + "step": 2499 + }, + { + "epoch": 0.6639224538573895, + "grad_norm": 1.0767207810238415, + "learning_rate": 1.596453974588587e-05, + "loss": 0.2994089424610138, + "step": 2500 + }, + { + "epoch": 0.6641880228389324, + "grad_norm": 1.0997593865705806, + "learning_rate": 1.596101493513584e-05, + "loss": 0.32302889227867126, + "step": 2501 + }, + { + "epoch": 0.6644535918204754, + "grad_norm": 1.1249891187970829, + "learning_rate": 1.595748897517243e-05, + "loss": 0.3122987747192383, + "step": 2502 + }, + { + "epoch": 0.6647191608020183, + "grad_norm": 1.014108779554691, + "learning_rate": 1.5953961866675408e-05, + "loss": 0.2746438980102539, + "step": 2503 + }, + { + "epoch": 0.6649847297835613, + "grad_norm": 1.0758059481680302, + "learning_rate": 1.5950433610324758e-05, + "loss": 0.3043097257614136, + "step": 2504 + }, + { + "epoch": 0.6652502987651042, + "grad_norm": 1.2204942135197403, + "learning_rate": 1.594690420680069e-05, + "loss": 0.3208698332309723, + "step": 2505 + }, + { + "epoch": 0.6655158677466472, + "grad_norm": 1.1502218188727449, + "learning_rate": 1.5943373656783628e-05, + "loss": 0.317341148853302, + "step": 2506 + }, + { + "epoch": 0.6657814367281901, + "grad_norm": 1.1223078751349502, + "learning_rate": 1.5939841960954218e-05, + "loss": 0.3250347673892975, + "step": 2507 + }, + { + "epoch": 0.6660470057097331, + "grad_norm": 1.066903715567463, + "learning_rate": 1.5936309119993333e-05, + "loss": 0.32255828380584717, + "step": 2508 + }, + { + "epoch": 0.666312574691276, + "grad_norm": 1.0591506680476068, + "learning_rate": 1.593277513458206e-05, + "loss": 0.3247614800930023, + "step": 2509 + }, + { + "epoch": 0.666578143672819, + "grad_norm": 1.087253896768941, + "learning_rate": 1.5929240005401715e-05, + "loss": 0.34171730279922485, + "step": 2510 + }, + { + "epoch": 0.6668437126543619, + "grad_norm": 1.092874100004657, + "learning_rate": 1.5925703733133823e-05, + "loss": 0.30671584606170654, + "step": 2511 + }, + { + "epoch": 0.6671092816359049, + "grad_norm": 1.1250075389065, + "learning_rate": 1.5922166318460138e-05, + "loss": 0.3387908339500427, + "step": 2512 + }, + { + "epoch": 0.6673748506174478, + "grad_norm": 1.0272141820522305, + "learning_rate": 1.5918627762062635e-05, + "loss": 0.2772873044013977, + "step": 2513 + }, + { + "epoch": 0.6676404195989908, + "grad_norm": 1.0802689739154336, + "learning_rate": 1.59150880646235e-05, + "loss": 0.31555238366127014, + "step": 2514 + }, + { + "epoch": 0.6679059885805337, + "grad_norm": 0.9930963010924009, + "learning_rate": 1.5911547226825154e-05, + "loss": 0.2821594476699829, + "step": 2515 + }, + { + "epoch": 0.6681715575620768, + "grad_norm": 1.098936156337469, + "learning_rate": 1.5908005249350217e-05, + "loss": 0.3176054358482361, + "step": 2516 + }, + { + "epoch": 0.6684371265436198, + "grad_norm": 1.083365844116071, + "learning_rate": 1.590446213288155e-05, + "loss": 0.28484907746315, + "step": 2517 + }, + { + "epoch": 0.6687026955251627, + "grad_norm": 1.0028500327966023, + "learning_rate": 1.590091787810222e-05, + "loss": 0.25227850675582886, + "step": 2518 + }, + { + "epoch": 0.6689682645067057, + "grad_norm": 0.993931866088294, + "learning_rate": 1.5897372485695514e-05, + "loss": 0.276819109916687, + "step": 2519 + }, + { + "epoch": 0.6692338334882486, + "grad_norm": 1.1883846939575156, + "learning_rate": 1.589382595634495e-05, + "loss": 0.27944183349609375, + "step": 2520 + }, + { + "epoch": 0.6694994024697916, + "grad_norm": 1.0217591474349375, + "learning_rate": 1.589027829073425e-05, + "loss": 0.295337975025177, + "step": 2521 + }, + { + "epoch": 0.6697649714513345, + "grad_norm": 1.0940479681497102, + "learning_rate": 1.5886729489547365e-05, + "loss": 0.31168580055236816, + "step": 2522 + }, + { + "epoch": 0.6700305404328775, + "grad_norm": 1.0847233646991081, + "learning_rate": 1.5883179553468465e-05, + "loss": 0.34520941972732544, + "step": 2523 + }, + { + "epoch": 0.6702961094144204, + "grad_norm": 1.0941539012056998, + "learning_rate": 1.587962848318193e-05, + "loss": 0.3121863901615143, + "step": 2524 + }, + { + "epoch": 0.6705616783959634, + "grad_norm": 1.2414605611463847, + "learning_rate": 1.587607627937237e-05, + "loss": 0.3450377583503723, + "step": 2525 + }, + { + "epoch": 0.6708272473775063, + "grad_norm": 1.0575484463097053, + "learning_rate": 1.58725229427246e-05, + "loss": 0.33431196212768555, + "step": 2526 + }, + { + "epoch": 0.6710928163590493, + "grad_norm": 2.8101197900274433, + "learning_rate": 1.5868968473923675e-05, + "loss": 0.2753226161003113, + "step": 2527 + }, + { + "epoch": 0.6713583853405922, + "grad_norm": 1.1171540013343635, + "learning_rate": 1.586541287365484e-05, + "loss": 0.31394219398498535, + "step": 2528 + }, + { + "epoch": 0.6716239543221352, + "grad_norm": 1.0940027543433968, + "learning_rate": 1.586185614260358e-05, + "loss": 0.352859765291214, + "step": 2529 + }, + { + "epoch": 0.6718895233036781, + "grad_norm": 1.158790754412002, + "learning_rate": 1.5858298281455592e-05, + "loss": 0.3182204067707062, + "step": 2530 + }, + { + "epoch": 0.6721550922852211, + "grad_norm": 1.0901686159979078, + "learning_rate": 1.5854739290896785e-05, + "loss": 0.3107008934020996, + "step": 2531 + }, + { + "epoch": 0.672420661266764, + "grad_norm": 1.0367853416177613, + "learning_rate": 1.5851179171613294e-05, + "loss": 0.2737328112125397, + "step": 2532 + }, + { + "epoch": 0.672686230248307, + "grad_norm": 1.070700914663809, + "learning_rate": 1.5847617924291466e-05, + "loss": 0.2744509279727936, + "step": 2533 + }, + { + "epoch": 0.6729517992298499, + "grad_norm": 1.0763385778363233, + "learning_rate": 1.584405554961787e-05, + "loss": 0.3149082660675049, + "step": 2534 + }, + { + "epoch": 0.6732173682113929, + "grad_norm": 1.1199335422347676, + "learning_rate": 1.584049204827929e-05, + "loss": 0.32643741369247437, + "step": 2535 + }, + { + "epoch": 0.6734829371929358, + "grad_norm": 1.1153920819002263, + "learning_rate": 1.583692742096272e-05, + "loss": 0.31901559233665466, + "step": 2536 + }, + { + "epoch": 0.6737485061744788, + "grad_norm": 1.037012713250851, + "learning_rate": 1.583336166835539e-05, + "loss": 0.3020802140235901, + "step": 2537 + }, + { + "epoch": 0.6740140751560217, + "grad_norm": 0.9884255382698084, + "learning_rate": 1.5829794791144723e-05, + "loss": 0.29683804512023926, + "step": 2538 + }, + { + "epoch": 0.6742796441375647, + "grad_norm": 1.0549080502640127, + "learning_rate": 1.582622679001838e-05, + "loss": 0.2898966073989868, + "step": 2539 + }, + { + "epoch": 0.6745452131191076, + "grad_norm": 1.0628349250468347, + "learning_rate": 1.582265766566422e-05, + "loss": 0.2665000855922699, + "step": 2540 + }, + { + "epoch": 0.6748107821006506, + "grad_norm": 1.1059852721256176, + "learning_rate": 1.581908741877034e-05, + "loss": 0.2987207770347595, + "step": 2541 + }, + { + "epoch": 0.6750763510821935, + "grad_norm": 1.1051901132495052, + "learning_rate": 1.5815516050025032e-05, + "loss": 0.32591086626052856, + "step": 2542 + }, + { + "epoch": 0.6753419200637365, + "grad_norm": 0.9752097662975195, + "learning_rate": 1.581194356011682e-05, + "loss": 0.28181299567222595, + "step": 2543 + }, + { + "epoch": 0.6756074890452796, + "grad_norm": 1.0983389872703522, + "learning_rate": 1.5808369949734433e-05, + "loss": 0.3256041407585144, + "step": 2544 + }, + { + "epoch": 0.6758730580268225, + "grad_norm": 1.1228012917357884, + "learning_rate": 1.5804795219566825e-05, + "loss": 0.3079703152179718, + "step": 2545 + }, + { + "epoch": 0.6761386270083655, + "grad_norm": 1.1504916593616519, + "learning_rate": 1.580121937030316e-05, + "loss": 0.3364162743091583, + "step": 2546 + }, + { + "epoch": 0.6764041959899084, + "grad_norm": 1.046870504650359, + "learning_rate": 1.5797642402632816e-05, + "loss": 0.2774898111820221, + "step": 2547 + }, + { + "epoch": 0.6766697649714514, + "grad_norm": 1.1108782100380157, + "learning_rate": 1.5794064317245396e-05, + "loss": 0.33260244131088257, + "step": 2548 + }, + { + "epoch": 0.6769353339529943, + "grad_norm": 1.16229568793775, + "learning_rate": 1.5790485114830708e-05, + "loss": 0.3327571153640747, + "step": 2549 + }, + { + "epoch": 0.6772009029345373, + "grad_norm": 1.1256526679188055, + "learning_rate": 1.5786904796078783e-05, + "loss": 0.28527912497520447, + "step": 2550 + }, + { + "epoch": 0.6774664719160802, + "grad_norm": 1.1757868172389025, + "learning_rate": 1.5783323361679865e-05, + "loss": 0.3100908100605011, + "step": 2551 + }, + { + "epoch": 0.6777320408976232, + "grad_norm": 1.1187226402475792, + "learning_rate": 1.577974081232441e-05, + "loss": 0.3434574007987976, + "step": 2552 + }, + { + "epoch": 0.6779976098791661, + "grad_norm": 1.0691671390255433, + "learning_rate": 1.5776157148703094e-05, + "loss": 0.3151341676712036, + "step": 2553 + }, + { + "epoch": 0.6782631788607091, + "grad_norm": 1.1432839314923735, + "learning_rate": 1.5772572371506803e-05, + "loss": 0.33334124088287354, + "step": 2554 + }, + { + "epoch": 0.678528747842252, + "grad_norm": 0.9718187941404679, + "learning_rate": 1.576898648142664e-05, + "loss": 0.26933547854423523, + "step": 2555 + }, + { + "epoch": 0.678794316823795, + "grad_norm": 1.0146251280063243, + "learning_rate": 1.576539947915392e-05, + "loss": 0.3087029755115509, + "step": 2556 + }, + { + "epoch": 0.6790598858053379, + "grad_norm": 2.0746649121309244, + "learning_rate": 1.576181136538018e-05, + "loss": 0.32620540261268616, + "step": 2557 + }, + { + "epoch": 0.6793254547868809, + "grad_norm": 1.0462752825892652, + "learning_rate": 1.575822214079716e-05, + "loss": 0.29112139344215393, + "step": 2558 + }, + { + "epoch": 0.6795910237684238, + "grad_norm": 1.108770761520566, + "learning_rate": 1.5754631806096822e-05, + "loss": 0.3394843339920044, + "step": 2559 + }, + { + "epoch": 0.6798565927499668, + "grad_norm": 1.0789431162979184, + "learning_rate": 1.5751040361971342e-05, + "loss": 0.32754629850387573, + "step": 2560 + }, + { + "epoch": 0.6801221617315097, + "grad_norm": 1.055729440740922, + "learning_rate": 1.574744780911311e-05, + "loss": 0.2829592823982239, + "step": 2561 + }, + { + "epoch": 0.6803877307130527, + "grad_norm": 3.1916720491195423, + "learning_rate": 1.5743854148214724e-05, + "loss": 0.2718046307563782, + "step": 2562 + }, + { + "epoch": 0.6806532996945956, + "grad_norm": 1.0355755791413483, + "learning_rate": 1.5740259379969002e-05, + "loss": 0.29244256019592285, + "step": 2563 + }, + { + "epoch": 0.6809188686761386, + "grad_norm": 1.0678189150114252, + "learning_rate": 1.5736663505068972e-05, + "loss": 0.2925388514995575, + "step": 2564 + }, + { + "epoch": 0.6811844376576816, + "grad_norm": 1.109826571766002, + "learning_rate": 1.5733066524207875e-05, + "loss": 0.26742440462112427, + "step": 2565 + }, + { + "epoch": 0.6814500066392245, + "grad_norm": 1.0365586719986022, + "learning_rate": 1.5729468438079167e-05, + "loss": 0.33688807487487793, + "step": 2566 + }, + { + "epoch": 0.6817155756207675, + "grad_norm": 1.0939355325909954, + "learning_rate": 1.5725869247376514e-05, + "loss": 0.2953096330165863, + "step": 2567 + }, + { + "epoch": 0.6819811446023104, + "grad_norm": 1.081510188555139, + "learning_rate": 1.5722268952793806e-05, + "loss": 0.321500301361084, + "step": 2568 + }, + { + "epoch": 0.6822467135838534, + "grad_norm": 1.1427798210793014, + "learning_rate": 1.5718667555025127e-05, + "loss": 0.29148590564727783, + "step": 2569 + }, + { + "epoch": 0.6825122825653963, + "grad_norm": 1.0849106130015975, + "learning_rate": 1.5715065054764792e-05, + "loss": 0.26887139678001404, + "step": 2570 + }, + { + "epoch": 0.6827778515469393, + "grad_norm": 0.9118900514894542, + "learning_rate": 1.5711461452707316e-05, + "loss": 0.2698139250278473, + "step": 2571 + }, + { + "epoch": 0.6830434205284823, + "grad_norm": 0.9420578172190551, + "learning_rate": 1.5707856749547433e-05, + "loss": 0.264956533908844, + "step": 2572 + }, + { + "epoch": 0.6833089895100253, + "grad_norm": 1.0786584040903482, + "learning_rate": 1.5704250945980085e-05, + "loss": 0.32535314559936523, + "step": 2573 + }, + { + "epoch": 0.6835745584915682, + "grad_norm": 1.1132312438200667, + "learning_rate": 1.5700644042700432e-05, + "loss": 0.30529654026031494, + "step": 2574 + }, + { + "epoch": 0.6838401274731112, + "grad_norm": 0.9518994724553314, + "learning_rate": 1.569703604040384e-05, + "loss": 0.27253150939941406, + "step": 2575 + }, + { + "epoch": 0.6841056964546541, + "grad_norm": 1.0559070796873817, + "learning_rate": 1.5693426939785886e-05, + "loss": 0.27451053261756897, + "step": 2576 + }, + { + "epoch": 0.6843712654361971, + "grad_norm": 1.1393124405849042, + "learning_rate": 1.5689816741542374e-05, + "loss": 0.33280283212661743, + "step": 2577 + }, + { + "epoch": 0.68463683441774, + "grad_norm": 1.1306113061745138, + "learning_rate": 1.5686205446369293e-05, + "loss": 0.2911887764930725, + "step": 2578 + }, + { + "epoch": 0.684902403399283, + "grad_norm": 1.0940465986734231, + "learning_rate": 1.5682593054962866e-05, + "loss": 0.2950279116630554, + "step": 2579 + }, + { + "epoch": 0.6851679723808259, + "grad_norm": 1.0911163136563768, + "learning_rate": 1.5678979568019518e-05, + "loss": 0.3267458975315094, + "step": 2580 + }, + { + "epoch": 0.6854335413623689, + "grad_norm": 1.2739312763430675, + "learning_rate": 1.5675364986235887e-05, + "loss": 0.3209132254123688, + "step": 2581 + }, + { + "epoch": 0.6856991103439118, + "grad_norm": 1.1101887519376679, + "learning_rate": 1.5671749310308818e-05, + "loss": 0.3186662197113037, + "step": 2582 + }, + { + "epoch": 0.6859646793254548, + "grad_norm": 0.9652854961372175, + "learning_rate": 1.566813254093538e-05, + "loss": 0.24875827133655548, + "step": 2583 + }, + { + "epoch": 0.6862302483069977, + "grad_norm": 1.0684425959326884, + "learning_rate": 1.5664514678812835e-05, + "loss": 0.26657983660697937, + "step": 2584 + }, + { + "epoch": 0.6864958172885407, + "grad_norm": 1.0670123202559558, + "learning_rate": 1.5660895724638666e-05, + "loss": 0.2889682650566101, + "step": 2585 + }, + { + "epoch": 0.6867613862700837, + "grad_norm": 1.2310590689373582, + "learning_rate": 1.5657275679110564e-05, + "loss": 0.32035061717033386, + "step": 2586 + }, + { + "epoch": 0.6870269552516266, + "grad_norm": 0.9946580402808185, + "learning_rate": 1.5653654542926435e-05, + "loss": 0.2844264507293701, + "step": 2587 + }, + { + "epoch": 0.6872925242331696, + "grad_norm": 1.0738818938413612, + "learning_rate": 1.5650032316784388e-05, + "loss": 0.27645713090896606, + "step": 2588 + }, + { + "epoch": 0.6875580932147125, + "grad_norm": 1.0078062598096618, + "learning_rate": 1.5646409001382745e-05, + "loss": 0.29902809858322144, + "step": 2589 + }, + { + "epoch": 0.6878236621962555, + "grad_norm": 1.0662439819494403, + "learning_rate": 1.564278459742004e-05, + "loss": 0.28179824352264404, + "step": 2590 + }, + { + "epoch": 0.6880892311777984, + "grad_norm": 0.9959782320912598, + "learning_rate": 1.563915910559502e-05, + "loss": 0.30527305603027344, + "step": 2591 + }, + { + "epoch": 0.6883548001593414, + "grad_norm": 0.9640464455731136, + "learning_rate": 1.5635532526606625e-05, + "loss": 0.29411792755126953, + "step": 2592 + }, + { + "epoch": 0.6886203691408843, + "grad_norm": 1.0659796212639145, + "learning_rate": 1.563190486115403e-05, + "loss": 0.32294154167175293, + "step": 2593 + }, + { + "epoch": 0.6888859381224273, + "grad_norm": 1.0983041505312465, + "learning_rate": 1.5628276109936594e-05, + "loss": 0.31873172521591187, + "step": 2594 + }, + { + "epoch": 0.6891515071039702, + "grad_norm": 1.2163401358885952, + "learning_rate": 1.5624646273653908e-05, + "loss": 0.37790048122406006, + "step": 2595 + }, + { + "epoch": 0.6894170760855132, + "grad_norm": 1.0271206309222516, + "learning_rate": 1.5621015353005754e-05, + "loss": 0.27596205472946167, + "step": 2596 + }, + { + "epoch": 0.6896826450670561, + "grad_norm": 1.2915034278595348, + "learning_rate": 1.5617383348692135e-05, + "loss": 0.30952686071395874, + "step": 2597 + }, + { + "epoch": 0.6899482140485991, + "grad_norm": 1.089414433310086, + "learning_rate": 1.5613750261413256e-05, + "loss": 0.2933235764503479, + "step": 2598 + }, + { + "epoch": 0.690213783030142, + "grad_norm": 1.1151043496896997, + "learning_rate": 1.5610116091869538e-05, + "loss": 0.2961776554584503, + "step": 2599 + }, + { + "epoch": 0.6904793520116851, + "grad_norm": 1.0596230408388436, + "learning_rate": 1.56064808407616e-05, + "loss": 0.2843313217163086, + "step": 2600 + }, + { + "epoch": 0.690744920993228, + "grad_norm": 1.0545406618996236, + "learning_rate": 1.560284450879028e-05, + "loss": 0.29366564750671387, + "step": 2601 + }, + { + "epoch": 0.691010489974771, + "grad_norm": 1.028254286030692, + "learning_rate": 1.5599207096656614e-05, + "loss": 0.32668614387512207, + "step": 2602 + }, + { + "epoch": 0.6912760589563139, + "grad_norm": 1.1962201821774399, + "learning_rate": 1.5595568605061858e-05, + "loss": 0.344367653131485, + "step": 2603 + }, + { + "epoch": 0.6915416279378569, + "grad_norm": 1.2250839657368426, + "learning_rate": 1.5591929034707468e-05, + "loss": 0.2875809371471405, + "step": 2604 + }, + { + "epoch": 0.6918071969193998, + "grad_norm": 0.9717157700868733, + "learning_rate": 1.5588288386295113e-05, + "loss": 0.2688799202442169, + "step": 2605 + }, + { + "epoch": 0.6920727659009428, + "grad_norm": 1.2520016236289049, + "learning_rate": 1.558464666052667e-05, + "loss": 0.28575828671455383, + "step": 2606 + }, + { + "epoch": 0.6923383348824858, + "grad_norm": 1.0741907315089707, + "learning_rate": 1.5581003858104203e-05, + "loss": 0.2800632119178772, + "step": 2607 + }, + { + "epoch": 0.6926039038640287, + "grad_norm": 1.096176752690496, + "learning_rate": 1.5577359979730022e-05, + "loss": 0.3066416382789612, + "step": 2608 + }, + { + "epoch": 0.6928694728455717, + "grad_norm": 1.0146792499875503, + "learning_rate": 1.5573715026106617e-05, + "loss": 0.3164110779762268, + "step": 2609 + }, + { + "epoch": 0.6931350418271146, + "grad_norm": 1.0292100354922897, + "learning_rate": 1.5570068997936686e-05, + "loss": 0.2908422350883484, + "step": 2610 + }, + { + "epoch": 0.6934006108086576, + "grad_norm": 0.9996966110923509, + "learning_rate": 1.5566421895923148e-05, + "loss": 0.29055240750312805, + "step": 2611 + }, + { + "epoch": 0.6936661797902005, + "grad_norm": 1.1296077877181152, + "learning_rate": 1.556277372076912e-05, + "loss": 0.3247227370738983, + "step": 2612 + }, + { + "epoch": 0.6939317487717435, + "grad_norm": 1.0869397458201258, + "learning_rate": 1.555912447317792e-05, + "loss": 0.29944315552711487, + "step": 2613 + }, + { + "epoch": 0.6941973177532864, + "grad_norm": 1.140637727836958, + "learning_rate": 1.5555474153853092e-05, + "loss": 0.2984931170940399, + "step": 2614 + }, + { + "epoch": 0.6944628867348294, + "grad_norm": 1.0644561032518303, + "learning_rate": 1.5551822763498364e-05, + "loss": 0.301285982131958, + "step": 2615 + }, + { + "epoch": 0.6947284557163723, + "grad_norm": 1.0271314049069311, + "learning_rate": 1.5548170302817683e-05, + "loss": 0.2862967252731323, + "step": 2616 + }, + { + "epoch": 0.6949940246979153, + "grad_norm": 1.0216494335731472, + "learning_rate": 1.5544516772515207e-05, + "loss": 0.3071482181549072, + "step": 2617 + }, + { + "epoch": 0.6952595936794582, + "grad_norm": 1.153798162838472, + "learning_rate": 1.5540862173295285e-05, + "loss": 0.33668914437294006, + "step": 2618 + }, + { + "epoch": 0.6955251626610012, + "grad_norm": 1.0451730984690786, + "learning_rate": 1.5537206505862486e-05, + "loss": 0.32204627990722656, + "step": 2619 + }, + { + "epoch": 0.6957907316425441, + "grad_norm": 1.083101648134336, + "learning_rate": 1.5533549770921576e-05, + "loss": 0.30210041999816895, + "step": 2620 + }, + { + "epoch": 0.6960563006240871, + "grad_norm": 1.1518417167078652, + "learning_rate": 1.5529891969177535e-05, + "loss": 0.3116886019706726, + "step": 2621 + }, + { + "epoch": 0.69632186960563, + "grad_norm": 1.1473344970327815, + "learning_rate": 1.5526233101335543e-05, + "loss": 0.3460058867931366, + "step": 2622 + }, + { + "epoch": 0.696587438587173, + "grad_norm": 1.0477810576486106, + "learning_rate": 1.552257316810098e-05, + "loss": 0.30080512166023254, + "step": 2623 + }, + { + "epoch": 0.6968530075687159, + "grad_norm": 1.1107090823955428, + "learning_rate": 1.5518912170179447e-05, + "loss": 0.3381347954273224, + "step": 2624 + }, + { + "epoch": 0.6971185765502589, + "grad_norm": 1.0737064011248665, + "learning_rate": 1.5515250108276733e-05, + "loss": 0.30345672369003296, + "step": 2625 + }, + { + "epoch": 0.6973841455318018, + "grad_norm": 1.1809134250993814, + "learning_rate": 1.5511586983098847e-05, + "loss": 0.3002641797065735, + "step": 2626 + }, + { + "epoch": 0.6976497145133448, + "grad_norm": 0.9975793486319376, + "learning_rate": 1.5507922795351992e-05, + "loss": 0.2848126292228699, + "step": 2627 + }, + { + "epoch": 0.6979152834948879, + "grad_norm": 1.1203755244922207, + "learning_rate": 1.5504257545742585e-05, + "loss": 0.32360371947288513, + "step": 2628 + }, + { + "epoch": 0.6981808524764308, + "grad_norm": 1.0674295201271842, + "learning_rate": 1.5500591234977237e-05, + "loss": 0.2970595955848694, + "step": 2629 + }, + { + "epoch": 0.6984464214579738, + "grad_norm": 1.1343972682519483, + "learning_rate": 1.5496923863762773e-05, + "loss": 0.35431474447250366, + "step": 2630 + }, + { + "epoch": 0.6987119904395167, + "grad_norm": 1.027377246814574, + "learning_rate": 1.549325543280622e-05, + "loss": 0.30133551359176636, + "step": 2631 + }, + { + "epoch": 0.6989775594210597, + "grad_norm": 1.066148832325447, + "learning_rate": 1.5489585942814807e-05, + "loss": 0.3013160824775696, + "step": 2632 + }, + { + "epoch": 0.6992431284026026, + "grad_norm": 1.1981871164483473, + "learning_rate": 1.5485915394495967e-05, + "loss": 0.3291313052177429, + "step": 2633 + }, + { + "epoch": 0.6995086973841456, + "grad_norm": 1.3083774012082008, + "learning_rate": 1.5482243788557336e-05, + "loss": 0.32308053970336914, + "step": 2634 + }, + { + "epoch": 0.6997742663656885, + "grad_norm": 1.0802428984314951, + "learning_rate": 1.5478571125706762e-05, + "loss": 0.321450412273407, + "step": 2635 + }, + { + "epoch": 0.7000398353472315, + "grad_norm": 1.1144035500723286, + "learning_rate": 1.547489740665229e-05, + "loss": 0.30871254205703735, + "step": 2636 + }, + { + "epoch": 0.7003054043287744, + "grad_norm": 1.1599776854022048, + "learning_rate": 1.5471222632102168e-05, + "loss": 0.29414835572242737, + "step": 2637 + }, + { + "epoch": 0.7005709733103174, + "grad_norm": 1.019484878273918, + "learning_rate": 1.546754680276485e-05, + "loss": 0.2841604948043823, + "step": 2638 + }, + { + "epoch": 0.7008365422918603, + "grad_norm": 1.039625714192533, + "learning_rate": 1.546386991934899e-05, + "loss": 0.2895316183567047, + "step": 2639 + }, + { + "epoch": 0.7011021112734033, + "grad_norm": 1.0418724746200432, + "learning_rate": 1.546019198256345e-05, + "loss": 0.310278058052063, + "step": 2640 + }, + { + "epoch": 0.7013676802549462, + "grad_norm": 1.1737622034955963, + "learning_rate": 1.5456512993117297e-05, + "loss": 0.3000732660293579, + "step": 2641 + }, + { + "epoch": 0.7016332492364892, + "grad_norm": 1.034060473081883, + "learning_rate": 1.545283295171979e-05, + "loss": 0.2650133967399597, + "step": 2642 + }, + { + "epoch": 0.7018988182180321, + "grad_norm": 1.1833814596994714, + "learning_rate": 1.5449151859080395e-05, + "loss": 0.3414345681667328, + "step": 2643 + }, + { + "epoch": 0.7021643871995751, + "grad_norm": 0.9407765615747015, + "learning_rate": 1.5445469715908793e-05, + "loss": 0.26955321431159973, + "step": 2644 + }, + { + "epoch": 0.702429956181118, + "grad_norm": 1.0775826100815478, + "learning_rate": 1.5441786522914855e-05, + "loss": 0.3028743863105774, + "step": 2645 + }, + { + "epoch": 0.702695525162661, + "grad_norm": 1.1630883359211883, + "learning_rate": 1.5438102280808653e-05, + "loss": 0.28710106015205383, + "step": 2646 + }, + { + "epoch": 0.7029610941442039, + "grad_norm": 1.0828201415955274, + "learning_rate": 1.543441699030047e-05, + "loss": 0.33343076705932617, + "step": 2647 + }, + { + "epoch": 0.7032266631257469, + "grad_norm": 2.8774903725783445, + "learning_rate": 1.543073065210078e-05, + "loss": 0.27760642766952515, + "step": 2648 + }, + { + "epoch": 0.7034922321072898, + "grad_norm": 1.0939125975780095, + "learning_rate": 1.5427043266920276e-05, + "loss": 0.2844334840774536, + "step": 2649 + }, + { + "epoch": 0.7037578010888328, + "grad_norm": 1.0671776711844796, + "learning_rate": 1.542335483546983e-05, + "loss": 0.28979432582855225, + "step": 2650 + }, + { + "epoch": 0.7040233700703757, + "grad_norm": 1.1018820862649594, + "learning_rate": 1.5419665358460537e-05, + "loss": 0.313267320394516, + "step": 2651 + }, + { + "epoch": 0.7042889390519187, + "grad_norm": 1.122792570050495, + "learning_rate": 1.5415974836603676e-05, + "loss": 0.26702141761779785, + "step": 2652 + }, + { + "epoch": 0.7045545080334616, + "grad_norm": 1.084104909381419, + "learning_rate": 1.5412283270610752e-05, + "loss": 0.3256012499332428, + "step": 2653 + }, + { + "epoch": 0.7048200770150046, + "grad_norm": 1.1096374178765924, + "learning_rate": 1.540859066119344e-05, + "loss": 0.3035642206668854, + "step": 2654 + }, + { + "epoch": 0.7050856459965475, + "grad_norm": 1.1410920430169775, + "learning_rate": 1.5404897009063636e-05, + "loss": 0.32206645607948303, + "step": 2655 + }, + { + "epoch": 0.7053512149780906, + "grad_norm": 0.9596610334229038, + "learning_rate": 1.5401202314933436e-05, + "loss": 0.3023940920829773, + "step": 2656 + }, + { + "epoch": 0.7056167839596336, + "grad_norm": 0.9678878502259071, + "learning_rate": 1.539750657951513e-05, + "loss": 0.2839987277984619, + "step": 2657 + }, + { + "epoch": 0.7058823529411765, + "grad_norm": 0.9744312269236198, + "learning_rate": 1.5393809803521213e-05, + "loss": 0.2488149106502533, + "step": 2658 + }, + { + "epoch": 0.7061479219227195, + "grad_norm": 1.0311988168007409, + "learning_rate": 1.539011198766438e-05, + "loss": 0.27156201004981995, + "step": 2659 + }, + { + "epoch": 0.7064134909042624, + "grad_norm": 1.0925039664890526, + "learning_rate": 1.5386413132657528e-05, + "loss": 0.3038437068462372, + "step": 2660 + }, + { + "epoch": 0.7066790598858054, + "grad_norm": 0.9713190505037098, + "learning_rate": 1.5382713239213746e-05, + "loss": 0.27626922726631165, + "step": 2661 + }, + { + "epoch": 0.7069446288673483, + "grad_norm": 1.9675808121081846, + "learning_rate": 1.537901230804634e-05, + "loss": 0.27338162064552307, + "step": 2662 + }, + { + "epoch": 0.7072101978488913, + "grad_norm": 0.9540020890839573, + "learning_rate": 1.5375310339868798e-05, + "loss": 0.2635098099708557, + "step": 2663 + }, + { + "epoch": 0.7074757668304342, + "grad_norm": 1.1274430903932144, + "learning_rate": 1.537160733539482e-05, + "loss": 0.3245551288127899, + "step": 2664 + }, + { + "epoch": 0.7077413358119772, + "grad_norm": 1.1100804783644485, + "learning_rate": 1.53679032953383e-05, + "loss": 0.3226238787174225, + "step": 2665 + }, + { + "epoch": 0.7080069047935201, + "grad_norm": 1.0972084780717322, + "learning_rate": 1.536419822041333e-05, + "loss": 0.31588318943977356, + "step": 2666 + }, + { + "epoch": 0.7082724737750631, + "grad_norm": 1.031778059845932, + "learning_rate": 1.536049211133421e-05, + "loss": 0.2494429647922516, + "step": 2667 + }, + { + "epoch": 0.708538042756606, + "grad_norm": 1.1110915785079796, + "learning_rate": 1.5356784968815436e-05, + "loss": 0.30966901779174805, + "step": 2668 + }, + { + "epoch": 0.708803611738149, + "grad_norm": 1.1803956993815392, + "learning_rate": 1.5353076793571692e-05, + "loss": 0.29383328557014465, + "step": 2669 + }, + { + "epoch": 0.7090691807196919, + "grad_norm": 1.086625008831518, + "learning_rate": 1.5349367586317875e-05, + "loss": 0.30337825417518616, + "step": 2670 + }, + { + "epoch": 0.7093347497012349, + "grad_norm": 1.0049086741144315, + "learning_rate": 1.5345657347769082e-05, + "loss": 0.28128665685653687, + "step": 2671 + }, + { + "epoch": 0.7096003186827778, + "grad_norm": 1.1819105498956106, + "learning_rate": 1.5341946078640594e-05, + "loss": 0.35167062282562256, + "step": 2672 + }, + { + "epoch": 0.7098658876643208, + "grad_norm": 1.0441531577784944, + "learning_rate": 1.533823377964791e-05, + "loss": 0.30409517884254456, + "step": 2673 + }, + { + "epoch": 0.7101314566458637, + "grad_norm": 1.013441954819978, + "learning_rate": 1.5334520451506706e-05, + "loss": 0.2667735815048218, + "step": 2674 + }, + { + "epoch": 0.7103970256274067, + "grad_norm": 1.130854753100919, + "learning_rate": 1.5330806094932876e-05, + "loss": 0.290219247341156, + "step": 2675 + }, + { + "epoch": 0.7106625946089496, + "grad_norm": 1.120803532670259, + "learning_rate": 1.5327090710642503e-05, + "loss": 0.33118927478790283, + "step": 2676 + }, + { + "epoch": 0.7109281635904926, + "grad_norm": 1.2896959817209073, + "learning_rate": 1.5323374299351867e-05, + "loss": 0.34287041425704956, + "step": 2677 + }, + { + "epoch": 0.7111937325720356, + "grad_norm": 1.0183367847991263, + "learning_rate": 1.531965686177745e-05, + "loss": 0.27093711495399475, + "step": 2678 + }, + { + "epoch": 0.7114593015535785, + "grad_norm": 1.0913550671130643, + "learning_rate": 1.531593839863593e-05, + "loss": 0.2987911105155945, + "step": 2679 + }, + { + "epoch": 0.7117248705351215, + "grad_norm": 1.0145664449432468, + "learning_rate": 1.5312218910644185e-05, + "loss": 0.2914583086967468, + "step": 2680 + }, + { + "epoch": 0.7119904395166644, + "grad_norm": 1.0712171950199525, + "learning_rate": 1.530849839851928e-05, + "loss": 0.34159964323043823, + "step": 2681 + }, + { + "epoch": 0.7122560084982074, + "grad_norm": 1.0132523095253043, + "learning_rate": 1.5304776862978496e-05, + "loss": 0.28327372670173645, + "step": 2682 + }, + { + "epoch": 0.7125215774797503, + "grad_norm": 1.0473430655235008, + "learning_rate": 1.5301054304739292e-05, + "loss": 0.2902851104736328, + "step": 2683 + }, + { + "epoch": 0.7127871464612934, + "grad_norm": 1.106440530120003, + "learning_rate": 1.5297330724519344e-05, + "loss": 0.3192726969718933, + "step": 2684 + }, + { + "epoch": 0.7130527154428363, + "grad_norm": 1.0682705697817987, + "learning_rate": 1.5293606123036508e-05, + "loss": 0.30242764949798584, + "step": 2685 + }, + { + "epoch": 0.7133182844243793, + "grad_norm": 1.0059439200202651, + "learning_rate": 1.528988050100884e-05, + "loss": 0.2718653082847595, + "step": 2686 + }, + { + "epoch": 0.7135838534059222, + "grad_norm": 1.019566462631627, + "learning_rate": 1.52861538591546e-05, + "loss": 0.3014821708202362, + "step": 2687 + }, + { + "epoch": 0.7138494223874652, + "grad_norm": 1.1473508187880241, + "learning_rate": 1.528242619819224e-05, + "loss": 0.3378177881240845, + "step": 2688 + }, + { + "epoch": 0.7141149913690081, + "grad_norm": 1.0632179838195628, + "learning_rate": 1.5278697518840415e-05, + "loss": 0.29286471009254456, + "step": 2689 + }, + { + "epoch": 0.7143805603505511, + "grad_norm": 1.1140242619678895, + "learning_rate": 1.527496782181796e-05, + "loss": 0.3371768593788147, + "step": 2690 + }, + { + "epoch": 0.714646129332094, + "grad_norm": 1.0421377750374783, + "learning_rate": 1.5271237107843925e-05, + "loss": 0.30571556091308594, + "step": 2691 + }, + { + "epoch": 0.714911698313637, + "grad_norm": 1.0650624138184501, + "learning_rate": 1.526750537763754e-05, + "loss": 0.33064618706703186, + "step": 2692 + }, + { + "epoch": 0.7151772672951799, + "grad_norm": 1.0787164498543842, + "learning_rate": 1.5263772631918242e-05, + "loss": 0.3369274139404297, + "step": 2693 + }, + { + "epoch": 0.7154428362767229, + "grad_norm": 1.079249778019668, + "learning_rate": 1.5260038871405663e-05, + "loss": 0.2422705739736557, + "step": 2694 + }, + { + "epoch": 0.7157084052582658, + "grad_norm": 1.3990281605221084, + "learning_rate": 1.5256304096819628e-05, + "loss": 0.35786008834838867, + "step": 2695 + }, + { + "epoch": 0.7159739742398088, + "grad_norm": 1.0368618301698236, + "learning_rate": 1.5252568308880155e-05, + "loss": 0.2853243052959442, + "step": 2696 + }, + { + "epoch": 0.7162395432213517, + "grad_norm": 1.1300838792843926, + "learning_rate": 1.5248831508307459e-05, + "loss": 0.2903040051460266, + "step": 2697 + }, + { + "epoch": 0.7165051122028947, + "grad_norm": 1.0779989148221412, + "learning_rate": 1.5245093695821954e-05, + "loss": 0.3375359773635864, + "step": 2698 + }, + { + "epoch": 0.7167706811844377, + "grad_norm": 0.9828776196369989, + "learning_rate": 1.5241354872144242e-05, + "loss": 0.27855974435806274, + "step": 2699 + }, + { + "epoch": 0.7170362501659806, + "grad_norm": 1.0672391327565405, + "learning_rate": 1.5237615037995129e-05, + "loss": 0.32226768136024475, + "step": 2700 + }, + { + "epoch": 0.7173018191475236, + "grad_norm": 1.1089458515112456, + "learning_rate": 1.5233874194095606e-05, + "loss": 0.32856303453445435, + "step": 2701 + }, + { + "epoch": 0.7175673881290665, + "grad_norm": 1.15556869357308, + "learning_rate": 1.5230132341166868e-05, + "loss": 0.31619006395339966, + "step": 2702 + }, + { + "epoch": 0.7178329571106095, + "grad_norm": 1.09474796019269, + "learning_rate": 1.5226389479930296e-05, + "loss": 0.29736411571502686, + "step": 2703 + }, + { + "epoch": 0.7180985260921524, + "grad_norm": 1.0969127487202406, + "learning_rate": 1.5222645611107477e-05, + "loss": 0.2767728865146637, + "step": 2704 + }, + { + "epoch": 0.7183640950736954, + "grad_norm": 1.054074095850648, + "learning_rate": 1.5218900735420174e-05, + "loss": 0.30994221568107605, + "step": 2705 + }, + { + "epoch": 0.7186296640552383, + "grad_norm": 1.0931807335310835, + "learning_rate": 1.5215154853590362e-05, + "loss": 0.3419484496116638, + "step": 2706 + }, + { + "epoch": 0.7188952330367813, + "grad_norm": 1.0503021732812985, + "learning_rate": 1.5211407966340203e-05, + "loss": 0.3063664436340332, + "step": 2707 + }, + { + "epoch": 0.7191608020183242, + "grad_norm": 1.0345938706194526, + "learning_rate": 1.520766007439205e-05, + "loss": 0.2856604754924774, + "step": 2708 + }, + { + "epoch": 0.7194263709998672, + "grad_norm": 0.9757823992785323, + "learning_rate": 1.5203911178468453e-05, + "loss": 0.23257851600646973, + "step": 2709 + }, + { + "epoch": 0.7196919399814101, + "grad_norm": 1.0292145399058534, + "learning_rate": 1.5200161279292154e-05, + "loss": 0.31451839208602905, + "step": 2710 + }, + { + "epoch": 0.7199575089629531, + "grad_norm": 1.1017577588578753, + "learning_rate": 1.5196410377586095e-05, + "loss": 0.30298277735710144, + "step": 2711 + }, + { + "epoch": 0.7202230779444961, + "grad_norm": 1.0759590578514124, + "learning_rate": 1.5192658474073398e-05, + "loss": 0.28654640913009644, + "step": 2712 + }, + { + "epoch": 0.7204886469260391, + "grad_norm": 1.1189221983197806, + "learning_rate": 1.5188905569477391e-05, + "loss": 0.3148455023765564, + "step": 2713 + }, + { + "epoch": 0.720754215907582, + "grad_norm": 1.079970608729249, + "learning_rate": 1.5185151664521585e-05, + "loss": 0.3004840612411499, + "step": 2714 + }, + { + "epoch": 0.721019784889125, + "grad_norm": 1.206470642332625, + "learning_rate": 1.518139675992969e-05, + "loss": 0.3378010392189026, + "step": 2715 + }, + { + "epoch": 0.721285353870668, + "grad_norm": 1.0802971688897103, + "learning_rate": 1.517764085642561e-05, + "loss": 0.3084215223789215, + "step": 2716 + }, + { + "epoch": 0.7215509228522109, + "grad_norm": 1.1196175790564493, + "learning_rate": 1.517388395473344e-05, + "loss": 0.3434324264526367, + "step": 2717 + }, + { + "epoch": 0.7218164918337538, + "grad_norm": 1.2084125695848371, + "learning_rate": 1.517012605557746e-05, + "loss": 0.2862265706062317, + "step": 2718 + }, + { + "epoch": 0.7220820608152968, + "grad_norm": 0.9574562560549519, + "learning_rate": 1.5166367159682156e-05, + "loss": 0.2760370671749115, + "step": 2719 + }, + { + "epoch": 0.7223476297968398, + "grad_norm": 1.0623260792686084, + "learning_rate": 1.5162607267772194e-05, + "loss": 0.26659202575683594, + "step": 2720 + }, + { + "epoch": 0.7226131987783827, + "grad_norm": 1.069380288412464, + "learning_rate": 1.5158846380572439e-05, + "loss": 0.31900978088378906, + "step": 2721 + }, + { + "epoch": 0.7228787677599257, + "grad_norm": 0.9775730121294547, + "learning_rate": 1.5155084498807941e-05, + "loss": 0.2983658015727997, + "step": 2722 + }, + { + "epoch": 0.7231443367414686, + "grad_norm": 1.0202126383266699, + "learning_rate": 1.5151321623203953e-05, + "loss": 0.3086162805557251, + "step": 2723 + }, + { + "epoch": 0.7234099057230116, + "grad_norm": 1.2685875339489936, + "learning_rate": 1.5147557754485908e-05, + "loss": 0.3233461380004883, + "step": 2724 + }, + { + "epoch": 0.7236754747045545, + "grad_norm": 1.1386667332230644, + "learning_rate": 1.5143792893379441e-05, + "loss": 0.2979195713996887, + "step": 2725 + }, + { + "epoch": 0.7239410436860975, + "grad_norm": 0.9598628443474388, + "learning_rate": 1.5140027040610367e-05, + "loss": 0.27854713797569275, + "step": 2726 + }, + { + "epoch": 0.7242066126676404, + "grad_norm": 1.0735596908703036, + "learning_rate": 1.5136260196904704e-05, + "loss": 0.293560266494751, + "step": 2727 + }, + { + "epoch": 0.7244721816491834, + "grad_norm": 1.1273149809893865, + "learning_rate": 1.513249236298865e-05, + "loss": 0.3033742308616638, + "step": 2728 + }, + { + "epoch": 0.7247377506307263, + "grad_norm": 1.1425183002588892, + "learning_rate": 1.51287235395886e-05, + "loss": 0.27958324551582336, + "step": 2729 + }, + { + "epoch": 0.7250033196122693, + "grad_norm": 1.022839475112705, + "learning_rate": 1.512495372743114e-05, + "loss": 0.3063122034072876, + "step": 2730 + }, + { + "epoch": 0.7252688885938122, + "grad_norm": 1.0524007495354166, + "learning_rate": 1.5121182927243043e-05, + "loss": 0.29126864671707153, + "step": 2731 + }, + { + "epoch": 0.7255344575753552, + "grad_norm": 1.0517432179455284, + "learning_rate": 1.5117411139751279e-05, + "loss": 0.27507084608078003, + "step": 2732 + }, + { + "epoch": 0.7258000265568981, + "grad_norm": 1.1167955582078537, + "learning_rate": 1.5113638365682996e-05, + "loss": 0.3432404398918152, + "step": 2733 + }, + { + "epoch": 0.7260655955384411, + "grad_norm": 1.0687371329401973, + "learning_rate": 1.5109864605765552e-05, + "loss": 0.27633196115493774, + "step": 2734 + }, + { + "epoch": 0.726331164519984, + "grad_norm": 1.0811244514830984, + "learning_rate": 1.5106089860726474e-05, + "loss": 0.274509072303772, + "step": 2735 + }, + { + "epoch": 0.726596733501527, + "grad_norm": 0.97012581020674, + "learning_rate": 1.5102314131293494e-05, + "loss": 0.26650723814964294, + "step": 2736 + }, + { + "epoch": 0.7268623024830699, + "grad_norm": 0.9681782432226156, + "learning_rate": 1.5098537418194524e-05, + "loss": 0.24476298689842224, + "step": 2737 + }, + { + "epoch": 0.7271278714646129, + "grad_norm": 1.1154772400244737, + "learning_rate": 1.5094759722157671e-05, + "loss": 0.3337150812149048, + "step": 2738 + }, + { + "epoch": 0.7273934404461558, + "grad_norm": 1.0187825093211873, + "learning_rate": 1.509098104391123e-05, + "loss": 0.3147660195827484, + "step": 2739 + }, + { + "epoch": 0.7276590094276989, + "grad_norm": 0.969229068573487, + "learning_rate": 1.5087201384183687e-05, + "loss": 0.2613281309604645, + "step": 2740 + }, + { + "epoch": 0.7279245784092419, + "grad_norm": 1.0641712204852296, + "learning_rate": 1.5083420743703717e-05, + "loss": 0.2773926854133606, + "step": 2741 + }, + { + "epoch": 0.7281901473907848, + "grad_norm": 1.0826759541494775, + "learning_rate": 1.5079639123200179e-05, + "loss": 0.30515575408935547, + "step": 2742 + }, + { + "epoch": 0.7284557163723278, + "grad_norm": 1.0619554532285063, + "learning_rate": 1.5075856523402128e-05, + "loss": 0.3174355626106262, + "step": 2743 + }, + { + "epoch": 0.7287212853538707, + "grad_norm": 0.9676487172589012, + "learning_rate": 1.5072072945038802e-05, + "loss": 0.25163760781288147, + "step": 2744 + }, + { + "epoch": 0.7289868543354137, + "grad_norm": 1.009992458232401, + "learning_rate": 1.5068288388839634e-05, + "loss": 0.28822118043899536, + "step": 2745 + }, + { + "epoch": 0.7292524233169566, + "grad_norm": 1.1623698216562623, + "learning_rate": 1.5064502855534237e-05, + "loss": 0.3129134476184845, + "step": 2746 + }, + { + "epoch": 0.7295179922984996, + "grad_norm": 1.0993962878508883, + "learning_rate": 1.5060716345852423e-05, + "loss": 0.332313597202301, + "step": 2747 + }, + { + "epoch": 0.7297835612800425, + "grad_norm": 1.1989932540466257, + "learning_rate": 1.5056928860524181e-05, + "loss": 0.3425176739692688, + "step": 2748 + }, + { + "epoch": 0.7300491302615855, + "grad_norm": 1.006044605592889, + "learning_rate": 1.5053140400279693e-05, + "loss": 0.2737991511821747, + "step": 2749 + }, + { + "epoch": 0.7303146992431284, + "grad_norm": 0.963162900300573, + "learning_rate": 1.5049350965849337e-05, + "loss": 0.27506589889526367, + "step": 2750 + }, + { + "epoch": 0.7305802682246714, + "grad_norm": 0.9901021314780329, + "learning_rate": 1.5045560557963663e-05, + "loss": 0.25581830739974976, + "step": 2751 + }, + { + "epoch": 0.7308458372062143, + "grad_norm": 1.0977147554610498, + "learning_rate": 1.5041769177353423e-05, + "loss": 0.31746333837509155, + "step": 2752 + }, + { + "epoch": 0.7311114061877573, + "grad_norm": 1.142455577048558, + "learning_rate": 1.5037976824749545e-05, + "loss": 0.3119337260723114, + "step": 2753 + }, + { + "epoch": 0.7313769751693002, + "grad_norm": 1.0824713857839723, + "learning_rate": 1.5034183500883153e-05, + "loss": 0.3330266773700714, + "step": 2754 + }, + { + "epoch": 0.7316425441508432, + "grad_norm": 1.1870819737785345, + "learning_rate": 1.5030389206485554e-05, + "loss": 0.2794867753982544, + "step": 2755 + }, + { + "epoch": 0.7319081131323861, + "grad_norm": 1.0826714009199063, + "learning_rate": 1.5026593942288248e-05, + "loss": 0.33273079991340637, + "step": 2756 + }, + { + "epoch": 0.7321736821139291, + "grad_norm": 1.1000195904608074, + "learning_rate": 1.502279770902291e-05, + "loss": 0.30673256516456604, + "step": 2757 + }, + { + "epoch": 0.732439251095472, + "grad_norm": 1.1311236734843304, + "learning_rate": 1.5019000507421412e-05, + "loss": 0.3126910924911499, + "step": 2758 + }, + { + "epoch": 0.732704820077015, + "grad_norm": 1.1665747930638253, + "learning_rate": 1.5015202338215811e-05, + "loss": 0.35423290729522705, + "step": 2759 + }, + { + "epoch": 0.7329703890585579, + "grad_norm": 1.0691634248957984, + "learning_rate": 1.5011403202138346e-05, + "loss": 0.31541377305984497, + "step": 2760 + }, + { + "epoch": 0.7332359580401009, + "grad_norm": 3.4446251175420257, + "learning_rate": 1.5007603099921451e-05, + "loss": 0.31460440158843994, + "step": 2761 + }, + { + "epoch": 0.7335015270216438, + "grad_norm": 1.0828016056563536, + "learning_rate": 1.5003802032297735e-05, + "loss": 0.2786293923854828, + "step": 2762 + }, + { + "epoch": 0.7337670960031868, + "grad_norm": 1.1025311021139896, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.27977997064590454, + "step": 2763 + }, + { + "epoch": 0.7340326649847297, + "grad_norm": 1.1136339551828278, + "learning_rate": 1.4996197003761237e-05, + "loss": 0.2933383584022522, + "step": 2764 + }, + { + "epoch": 0.7342982339662727, + "grad_norm": 1.0743056930311463, + "learning_rate": 1.4992393044314617e-05, + "loss": 0.30623573064804077, + "step": 2765 + }, + { + "epoch": 0.7345638029478156, + "grad_norm": 1.112681662128017, + "learning_rate": 1.4988588122393497e-05, + "loss": 0.28665077686309814, + "step": 2766 + }, + { + "epoch": 0.7348293719293586, + "grad_norm": 1.0268941907147413, + "learning_rate": 1.4984782238731422e-05, + "loss": 0.3245697021484375, + "step": 2767 + }, + { + "epoch": 0.7350949409109017, + "grad_norm": 1.118864717612721, + "learning_rate": 1.4980975394062122e-05, + "loss": 0.29477447271347046, + "step": 2768 + }, + { + "epoch": 0.7353605098924446, + "grad_norm": 1.009879072463833, + "learning_rate": 1.4977167589119508e-05, + "loss": 0.29174134135246277, + "step": 2769 + }, + { + "epoch": 0.7356260788739876, + "grad_norm": 1.010733766191454, + "learning_rate": 1.4973358824637687e-05, + "loss": 0.29473474621772766, + "step": 2770 + }, + { + "epoch": 0.7358916478555305, + "grad_norm": 1.3454647120520804, + "learning_rate": 1.4969549101350938e-05, + "loss": 0.3095156252384186, + "step": 2771 + }, + { + "epoch": 0.7361572168370735, + "grad_norm": 1.0578448721867733, + "learning_rate": 1.4965738419993733e-05, + "loss": 0.26295265555381775, + "step": 2772 + }, + { + "epoch": 0.7364227858186164, + "grad_norm": 1.0590497560307077, + "learning_rate": 1.4961926781300723e-05, + "loss": 0.2989509701728821, + "step": 2773 + }, + { + "epoch": 0.7366883548001594, + "grad_norm": 1.0783454816561941, + "learning_rate": 1.4958114186006756e-05, + "loss": 0.31087079644203186, + "step": 2774 + }, + { + "epoch": 0.7369539237817023, + "grad_norm": 1.0953647378016445, + "learning_rate": 1.4954300634846845e-05, + "loss": 0.3063197433948517, + "step": 2775 + }, + { + "epoch": 0.7372194927632453, + "grad_norm": 1.0858506486148067, + "learning_rate": 1.4950486128556208e-05, + "loss": 0.3149424195289612, + "step": 2776 + }, + { + "epoch": 0.7374850617447882, + "grad_norm": 1.0199984929310564, + "learning_rate": 1.4946670667870224e-05, + "loss": 0.2724878191947937, + "step": 2777 + }, + { + "epoch": 0.7377506307263312, + "grad_norm": 1.0033150283887489, + "learning_rate": 1.4942854253524479e-05, + "loss": 0.2556690275669098, + "step": 2778 + }, + { + "epoch": 0.7380161997078741, + "grad_norm": 1.0594159401263619, + "learning_rate": 1.4939036886254727e-05, + "loss": 0.2704542875289917, + "step": 2779 + }, + { + "epoch": 0.7382817686894171, + "grad_norm": 1.052456117640013, + "learning_rate": 1.4935218566796918e-05, + "loss": 0.26762163639068604, + "step": 2780 + }, + { + "epoch": 0.73854733767096, + "grad_norm": 1.1328164222449624, + "learning_rate": 1.4931399295887172e-05, + "loss": 0.3376831114292145, + "step": 2781 + }, + { + "epoch": 0.738812906652503, + "grad_norm": 1.0695003562166123, + "learning_rate": 1.4927579074261803e-05, + "loss": 0.2980082631111145, + "step": 2782 + }, + { + "epoch": 0.7390784756340459, + "grad_norm": 1.0340858480290613, + "learning_rate": 1.4923757902657306e-05, + "loss": 0.27693796157836914, + "step": 2783 + }, + { + "epoch": 0.7393440446155889, + "grad_norm": 1.0204290883803, + "learning_rate": 1.4919935781810353e-05, + "loss": 0.3109282851219177, + "step": 2784 + }, + { + "epoch": 0.7396096135971318, + "grad_norm": 1.12631585013599, + "learning_rate": 1.4916112712457807e-05, + "loss": 0.3123949468135834, + "step": 2785 + }, + { + "epoch": 0.7398751825786748, + "grad_norm": 1.143039341014623, + "learning_rate": 1.4912288695336709e-05, + "loss": 0.3232062757015228, + "step": 2786 + }, + { + "epoch": 0.7401407515602177, + "grad_norm": 1.0315778016896975, + "learning_rate": 1.4908463731184287e-05, + "loss": 0.2685563862323761, + "step": 2787 + }, + { + "epoch": 0.7404063205417607, + "grad_norm": 1.076569860938466, + "learning_rate": 1.4904637820737945e-05, + "loss": 0.25752881169319153, + "step": 2788 + }, + { + "epoch": 0.7406718895233037, + "grad_norm": 1.2236263687690485, + "learning_rate": 1.4900810964735279e-05, + "loss": 0.2887497544288635, + "step": 2789 + }, + { + "epoch": 0.7409374585048466, + "grad_norm": 1.126755867019387, + "learning_rate": 1.489698316391406e-05, + "loss": 0.28804779052734375, + "step": 2790 + }, + { + "epoch": 0.7412030274863896, + "grad_norm": 1.0931262335064922, + "learning_rate": 1.489315441901224e-05, + "loss": 0.2684408724308014, + "step": 2791 + }, + { + "epoch": 0.7414685964679325, + "grad_norm": 1.0509233991385625, + "learning_rate": 1.4889324730767959e-05, + "loss": 0.31945526599884033, + "step": 2792 + }, + { + "epoch": 0.7417341654494755, + "grad_norm": 1.3391113530092205, + "learning_rate": 1.488549409991953e-05, + "loss": 0.34446024894714355, + "step": 2793 + }, + { + "epoch": 0.7419997344310184, + "grad_norm": 1.094751814978447, + "learning_rate": 1.488166252720546e-05, + "loss": 0.28849151730537415, + "step": 2794 + }, + { + "epoch": 0.7422653034125614, + "grad_norm": 1.0431424597135226, + "learning_rate": 1.4877830013364429e-05, + "loss": 0.2793633043766022, + "step": 2795 + }, + { + "epoch": 0.7425308723941043, + "grad_norm": 1.1811188011136542, + "learning_rate": 1.4873996559135298e-05, + "loss": 0.3211687505245209, + "step": 2796 + }, + { + "epoch": 0.7427964413756474, + "grad_norm": 1.004634818722801, + "learning_rate": 1.4870162165257114e-05, + "loss": 0.26225876808166504, + "step": 2797 + }, + { + "epoch": 0.7430620103571903, + "grad_norm": 1.7885293848946355, + "learning_rate": 1.4866326832469105e-05, + "loss": 0.3100029528141022, + "step": 2798 + }, + { + "epoch": 0.7433275793387333, + "grad_norm": 1.0428487423040855, + "learning_rate": 1.4862490561510675e-05, + "loss": 0.29399827122688293, + "step": 2799 + }, + { + "epoch": 0.7435931483202762, + "grad_norm": 0.9886298200418341, + "learning_rate": 1.4858653353121412e-05, + "loss": 0.27357399463653564, + "step": 2800 + }, + { + "epoch": 0.7438587173018192, + "grad_norm": 1.1101962385134683, + "learning_rate": 1.4854815208041087e-05, + "loss": 0.34575730562210083, + "step": 2801 + }, + { + "epoch": 0.7441242862833621, + "grad_norm": 1.0351474931606812, + "learning_rate": 1.4850976127009644e-05, + "loss": 0.28487247228622437, + "step": 2802 + }, + { + "epoch": 0.7443898552649051, + "grad_norm": 1.0283492066128257, + "learning_rate": 1.484713611076722e-05, + "loss": 0.264443576335907, + "step": 2803 + }, + { + "epoch": 0.744655424246448, + "grad_norm": 1.085429543255666, + "learning_rate": 1.4843295160054116e-05, + "loss": 0.32750973105430603, + "step": 2804 + }, + { + "epoch": 0.744920993227991, + "grad_norm": 1.0136013055294886, + "learning_rate": 1.4839453275610827e-05, + "loss": 0.24080191552639008, + "step": 2805 + }, + { + "epoch": 0.7451865622095339, + "grad_norm": 1.1486643921382949, + "learning_rate": 1.4835610458178025e-05, + "loss": 0.31667011976242065, + "step": 2806 + }, + { + "epoch": 0.7454521311910769, + "grad_norm": 1.0103490185384167, + "learning_rate": 1.4831766708496553e-05, + "loss": 0.2754175066947937, + "step": 2807 + }, + { + "epoch": 0.7457177001726198, + "grad_norm": 1.0607394107689443, + "learning_rate": 1.482792202730745e-05, + "loss": 0.2890132963657379, + "step": 2808 + }, + { + "epoch": 0.7459832691541628, + "grad_norm": 1.049970305589495, + "learning_rate": 1.4824076415351918e-05, + "loss": 0.3402877748012543, + "step": 2809 + }, + { + "epoch": 0.7462488381357057, + "grad_norm": 1.0879104018503691, + "learning_rate": 1.4820229873371347e-05, + "loss": 0.3167210519313812, + "step": 2810 + }, + { + "epoch": 0.7465144071172487, + "grad_norm": 0.9983910427341833, + "learning_rate": 1.4816382402107308e-05, + "loss": 0.2653643786907196, + "step": 2811 + }, + { + "epoch": 0.7467799760987917, + "grad_norm": 1.2191167585139304, + "learning_rate": 1.4812534002301547e-05, + "loss": 0.3202674984931946, + "step": 2812 + }, + { + "epoch": 0.7470455450803346, + "grad_norm": 1.0461975743299208, + "learning_rate": 1.4808684674695985e-05, + "loss": 0.2942724823951721, + "step": 2813 + }, + { + "epoch": 0.7473111140618776, + "grad_norm": 1.0581736193326858, + "learning_rate": 1.480483442003273e-05, + "loss": 0.28640663623809814, + "step": 2814 + }, + { + "epoch": 0.7475766830434205, + "grad_norm": 0.9932743335315769, + "learning_rate": 1.4800983239054071e-05, + "loss": 0.26214420795440674, + "step": 2815 + }, + { + "epoch": 0.7478422520249635, + "grad_norm": 1.0324489729554576, + "learning_rate": 1.4797131132502464e-05, + "loss": 0.3288992643356323, + "step": 2816 + }, + { + "epoch": 0.7481078210065064, + "grad_norm": 0.9775792939666473, + "learning_rate": 1.4793278101120551e-05, + "loss": 0.2622208297252655, + "step": 2817 + }, + { + "epoch": 0.7483733899880494, + "grad_norm": 1.0856486279870832, + "learning_rate": 1.4789424145651152e-05, + "loss": 0.3223533034324646, + "step": 2818 + }, + { + "epoch": 0.7486389589695923, + "grad_norm": 0.9640735701611682, + "learning_rate": 1.4785569266837264e-05, + "loss": 0.25849875807762146, + "step": 2819 + }, + { + "epoch": 0.7489045279511353, + "grad_norm": 1.20204465384733, + "learning_rate": 1.478171346542206e-05, + "loss": 0.3477833569049835, + "step": 2820 + }, + { + "epoch": 0.7491700969326782, + "grad_norm": 1.0577809669167442, + "learning_rate": 1.4777856742148897e-05, + "loss": 0.2799205780029297, + "step": 2821 + }, + { + "epoch": 0.7494356659142212, + "grad_norm": 1.624939710599736, + "learning_rate": 1.4773999097761304e-05, + "loss": 0.2591988444328308, + "step": 2822 + }, + { + "epoch": 0.7497012348957641, + "grad_norm": 1.2869478314125868, + "learning_rate": 1.477014053300299e-05, + "loss": 0.30161747336387634, + "step": 2823 + }, + { + "epoch": 0.7499668038773071, + "grad_norm": 1.0738509532979332, + "learning_rate": 1.4766281048617837e-05, + "loss": 0.28202176094055176, + "step": 2824 + }, + { + "epoch": 0.7502323728588501, + "grad_norm": 1.0042946509670743, + "learning_rate": 1.4762420645349912e-05, + "loss": 0.26074907183647156, + "step": 2825 + }, + { + "epoch": 0.7504979418403931, + "grad_norm": 1.1385436298617553, + "learning_rate": 1.4758559323943455e-05, + "loss": 0.2822819948196411, + "step": 2826 + }, + { + "epoch": 0.750763510821936, + "grad_norm": 1.1069166183989807, + "learning_rate": 1.4754697085142879e-05, + "loss": 0.2704991102218628, + "step": 2827 + }, + { + "epoch": 0.751029079803479, + "grad_norm": 1.1005590878466516, + "learning_rate": 1.4750833929692785e-05, + "loss": 0.2627401053905487, + "step": 2828 + }, + { + "epoch": 0.751294648785022, + "grad_norm": 1.0886740028659867, + "learning_rate": 1.474696985833794e-05, + "loss": 0.2898240089416504, + "step": 2829 + }, + { + "epoch": 0.7515602177665649, + "grad_norm": 1.0291450176805186, + "learning_rate": 1.4743104871823291e-05, + "loss": 0.30080029368400574, + "step": 2830 + }, + { + "epoch": 0.7518257867481078, + "grad_norm": 1.0953597523125502, + "learning_rate": 1.473923897089396e-05, + "loss": 0.2950359284877777, + "step": 2831 + }, + { + "epoch": 0.7520913557296508, + "grad_norm": 1.1129882579718784, + "learning_rate": 1.4735372156295253e-05, + "loss": 0.31936827301979065, + "step": 2832 + }, + { + "epoch": 0.7523569247111938, + "grad_norm": 1.1117484749822675, + "learning_rate": 1.4731504428772642e-05, + "loss": 0.2771468460559845, + "step": 2833 + }, + { + "epoch": 0.7526224936927367, + "grad_norm": 1.1332551367729735, + "learning_rate": 1.4727635789071779e-05, + "loss": 0.3135997951030731, + "step": 2834 + }, + { + "epoch": 0.7528880626742797, + "grad_norm": 1.1215560189558773, + "learning_rate": 1.4723766237938495e-05, + "loss": 0.29874372482299805, + "step": 2835 + }, + { + "epoch": 0.7531536316558226, + "grad_norm": 1.0292177835845961, + "learning_rate": 1.4719895776118789e-05, + "loss": 0.249681293964386, + "step": 2836 + }, + { + "epoch": 0.7534192006373656, + "grad_norm": 1.0567186687732057, + "learning_rate": 1.4716024404358847e-05, + "loss": 0.28544771671295166, + "step": 2837 + }, + { + "epoch": 0.7536847696189085, + "grad_norm": 1.1290911495331684, + "learning_rate": 1.4712152123405018e-05, + "loss": 0.32532355189323425, + "step": 2838 + }, + { + "epoch": 0.7539503386004515, + "grad_norm": 1.1212187873017119, + "learning_rate": 1.4708278934003835e-05, + "loss": 0.31663140654563904, + "step": 2839 + }, + { + "epoch": 0.7542159075819944, + "grad_norm": 1.123142254862964, + "learning_rate": 1.4704404836902005e-05, + "loss": 0.30552318692207336, + "step": 2840 + }, + { + "epoch": 0.7544814765635374, + "grad_norm": 1.1574657252500693, + "learning_rate": 1.47005298328464e-05, + "loss": 0.3019601106643677, + "step": 2841 + }, + { + "epoch": 0.7547470455450803, + "grad_norm": 1.0814580547673966, + "learning_rate": 1.4696653922584084e-05, + "loss": 0.321606308221817, + "step": 2842 + }, + { + "epoch": 0.7550126145266233, + "grad_norm": 1.138590953455986, + "learning_rate": 1.4692777106862281e-05, + "loss": 0.2709462642669678, + "step": 2843 + }, + { + "epoch": 0.7552781835081662, + "grad_norm": 1.1366302949330385, + "learning_rate": 1.46888993864284e-05, + "loss": 0.2882609963417053, + "step": 2844 + }, + { + "epoch": 0.7555437524897092, + "grad_norm": 0.9948609987035232, + "learning_rate": 1.4685020762030019e-05, + "loss": 0.25843000411987305, + "step": 2845 + }, + { + "epoch": 0.7558093214712521, + "grad_norm": 1.1002004205654323, + "learning_rate": 1.4681141234414889e-05, + "loss": 0.30962038040161133, + "step": 2846 + }, + { + "epoch": 0.7560748904527951, + "grad_norm": 1.2025960097123465, + "learning_rate": 1.4677260804330938e-05, + "loss": 0.304874062538147, + "step": 2847 + }, + { + "epoch": 0.756340459434338, + "grad_norm": 1.2287867091921092, + "learning_rate": 1.4673379472526268e-05, + "loss": 0.3425619602203369, + "step": 2848 + }, + { + "epoch": 0.756606028415881, + "grad_norm": 1.0701256182117689, + "learning_rate": 1.4669497239749153e-05, + "loss": 0.3002302050590515, + "step": 2849 + }, + { + "epoch": 0.7568715973974239, + "grad_norm": 1.1005370830207322, + "learning_rate": 1.4665614106748038e-05, + "loss": 0.31008803844451904, + "step": 2850 + }, + { + "epoch": 0.7571371663789669, + "grad_norm": 1.0175712407141912, + "learning_rate": 1.4661730074271551e-05, + "loss": 0.27829408645629883, + "step": 2851 + }, + { + "epoch": 0.7574027353605098, + "grad_norm": 1.0501959661073665, + "learning_rate": 1.4657845143068488e-05, + "loss": 0.25915467739105225, + "step": 2852 + }, + { + "epoch": 0.7576683043420529, + "grad_norm": 1.0719536636155031, + "learning_rate": 1.4653959313887813e-05, + "loss": 0.2843416929244995, + "step": 2853 + }, + { + "epoch": 0.7579338733235959, + "grad_norm": 1.0489373710223147, + "learning_rate": 1.465007258747867e-05, + "loss": 0.2851647138595581, + "step": 2854 + }, + { + "epoch": 0.7581994423051388, + "grad_norm": 1.085754694338766, + "learning_rate": 1.4646184964590378e-05, + "loss": 0.266017884016037, + "step": 2855 + }, + { + "epoch": 0.7584650112866818, + "grad_norm": 1.0789098348141843, + "learning_rate": 1.4642296445972421e-05, + "loss": 0.30142179131507874, + "step": 2856 + }, + { + "epoch": 0.7587305802682247, + "grad_norm": 0.9904299934324251, + "learning_rate": 1.463840703237446e-05, + "loss": 0.2878327965736389, + "step": 2857 + }, + { + "epoch": 0.7589961492497677, + "grad_norm": 1.114310168260114, + "learning_rate": 1.4634516724546326e-05, + "loss": 0.2919169068336487, + "step": 2858 + }, + { + "epoch": 0.7592617182313106, + "grad_norm": 0.9954308342175644, + "learning_rate": 1.4630625523238027e-05, + "loss": 0.2530924081802368, + "step": 2859 + }, + { + "epoch": 0.7595272872128536, + "grad_norm": 1.0858688189416337, + "learning_rate": 1.462673342919974e-05, + "loss": 0.3009106516838074, + "step": 2860 + }, + { + "epoch": 0.7597928561943965, + "grad_norm": 1.1572533440881312, + "learning_rate": 1.4622840443181817e-05, + "loss": 0.3114222288131714, + "step": 2861 + }, + { + "epoch": 0.7600584251759395, + "grad_norm": 1.2224434370177688, + "learning_rate": 1.4618946565934775e-05, + "loss": 0.344540536403656, + "step": 2862 + }, + { + "epoch": 0.7603239941574824, + "grad_norm": 1.0685722656113568, + "learning_rate": 1.4615051798209312e-05, + "loss": 0.263607919216156, + "step": 2863 + }, + { + "epoch": 0.7605895631390254, + "grad_norm": 1.018611353798299, + "learning_rate": 1.4611156140756293e-05, + "loss": 0.2685706317424774, + "step": 2864 + }, + { + "epoch": 0.7608551321205683, + "grad_norm": 1.1431197890714058, + "learning_rate": 1.4607259594326752e-05, + "loss": 0.32342326641082764, + "step": 2865 + }, + { + "epoch": 0.7611207011021113, + "grad_norm": 1.182050624874759, + "learning_rate": 1.4603362159671902e-05, + "loss": 0.3088849186897278, + "step": 2866 + }, + { + "epoch": 0.7613862700836542, + "grad_norm": 1.0482348167122462, + "learning_rate": 1.4599463837543114e-05, + "loss": 0.26718589663505554, + "step": 2867 + }, + { + "epoch": 0.7616518390651972, + "grad_norm": 1.0051992534296357, + "learning_rate": 1.4595564628691944e-05, + "loss": 0.29511263966560364, + "step": 2868 + }, + { + "epoch": 0.7619174080467401, + "grad_norm": 1.0974088254649037, + "learning_rate": 1.4591664533870118e-05, + "loss": 0.2940484285354614, + "step": 2869 + }, + { + "epoch": 0.7621829770282831, + "grad_norm": 1.1564456059915547, + "learning_rate": 1.4587763553829521e-05, + "loss": 0.28167295455932617, + "step": 2870 + }, + { + "epoch": 0.762448546009826, + "grad_norm": 1.0590804851451585, + "learning_rate": 1.4583861689322219e-05, + "loss": 0.3362962007522583, + "step": 2871 + }, + { + "epoch": 0.762714114991369, + "grad_norm": 1.1206777555300773, + "learning_rate": 1.4579958941100445e-05, + "loss": 0.3003339171409607, + "step": 2872 + }, + { + "epoch": 0.7629796839729119, + "grad_norm": 1.0572512051509857, + "learning_rate": 1.4576055309916602e-05, + "loss": 0.3191443979740143, + "step": 2873 + }, + { + "epoch": 0.7632452529544549, + "grad_norm": 1.0684782615871369, + "learning_rate": 1.4572150796523265e-05, + "loss": 0.30804574489593506, + "step": 2874 + }, + { + "epoch": 0.7635108219359978, + "grad_norm": 1.0214046475154577, + "learning_rate": 1.4568245401673178e-05, + "loss": 0.32462549209594727, + "step": 2875 + }, + { + "epoch": 0.7637763909175408, + "grad_norm": 1.1357318078490404, + "learning_rate": 1.4564339126119254e-05, + "loss": 0.27751386165618896, + "step": 2876 + }, + { + "epoch": 0.7640419598990837, + "grad_norm": 1.0701221152994065, + "learning_rate": 1.4560431970614578e-05, + "loss": 0.27194011211395264, + "step": 2877 + }, + { + "epoch": 0.7643075288806267, + "grad_norm": 1.134082938487784, + "learning_rate": 1.4556523935912406e-05, + "loss": 0.28701072931289673, + "step": 2878 + }, + { + "epoch": 0.7645730978621696, + "grad_norm": 1.0814539768930527, + "learning_rate": 1.4552615022766156e-05, + "loss": 0.3278783857822418, + "step": 2879 + }, + { + "epoch": 0.7648386668437126, + "grad_norm": 1.096499511679905, + "learning_rate": 1.4548705231929426e-05, + "loss": 0.3292006254196167, + "step": 2880 + }, + { + "epoch": 0.7651042358252557, + "grad_norm": 1.30563906707581, + "learning_rate": 1.4544794564155971e-05, + "loss": 0.33038759231567383, + "step": 2881 + }, + { + "epoch": 0.7653698048067986, + "grad_norm": 1.0799053745016685, + "learning_rate": 1.4540883020199725e-05, + "loss": 0.29183000326156616, + "step": 2882 + }, + { + "epoch": 0.7656353737883416, + "grad_norm": 1.049945067498866, + "learning_rate": 1.4536970600814789e-05, + "loss": 0.28066399693489075, + "step": 2883 + }, + { + "epoch": 0.7659009427698845, + "grad_norm": 1.0673215015420034, + "learning_rate": 1.4533057306755427e-05, + "loss": 0.2832046151161194, + "step": 2884 + }, + { + "epoch": 0.7661665117514275, + "grad_norm": 1.0799218487874103, + "learning_rate": 1.4529143138776078e-05, + "loss": 0.3006540834903717, + "step": 2885 + }, + { + "epoch": 0.7664320807329704, + "grad_norm": 0.965945374746046, + "learning_rate": 1.4525228097631351e-05, + "loss": 0.2793240547180176, + "step": 2886 + }, + { + "epoch": 0.7666976497145134, + "grad_norm": 1.0791298696355873, + "learning_rate": 1.452131218407602e-05, + "loss": 0.2895192503929138, + "step": 2887 + }, + { + "epoch": 0.7669632186960563, + "grad_norm": 1.1085071656285739, + "learning_rate": 1.4517395398865022e-05, + "loss": 0.27707618474960327, + "step": 2888 + }, + { + "epoch": 0.7672287876775993, + "grad_norm": 0.9801959170871006, + "learning_rate": 1.4513477742753465e-05, + "loss": 0.29167065024375916, + "step": 2889 + }, + { + "epoch": 0.7674943566591422, + "grad_norm": 0.9760628575291594, + "learning_rate": 1.4509559216496631e-05, + "loss": 0.2670987844467163, + "step": 2890 + }, + { + "epoch": 0.7677599256406852, + "grad_norm": 1.0541213606202946, + "learning_rate": 1.4505639820849968e-05, + "loss": 0.3025206923484802, + "step": 2891 + }, + { + "epoch": 0.7680254946222281, + "grad_norm": 1.0721054101606857, + "learning_rate": 1.4501719556569087e-05, + "loss": 0.3104705512523651, + "step": 2892 + }, + { + "epoch": 0.7682910636037711, + "grad_norm": 1.1715745485021363, + "learning_rate": 1.4497798424409766e-05, + "loss": 0.2972267270088196, + "step": 2893 + }, + { + "epoch": 0.768556632585314, + "grad_norm": 1.3084992927105763, + "learning_rate": 1.4493876425127957e-05, + "loss": 0.34956347942352295, + "step": 2894 + }, + { + "epoch": 0.768822201566857, + "grad_norm": 1.0910589486872886, + "learning_rate": 1.4489953559479775e-05, + "loss": 0.3122873902320862, + "step": 2895 + }, + { + "epoch": 0.7690877705483999, + "grad_norm": 1.0070263080445798, + "learning_rate": 1.4486029828221497e-05, + "loss": 0.29645755887031555, + "step": 2896 + }, + { + "epoch": 0.7693533395299429, + "grad_norm": 1.1312479199974272, + "learning_rate": 1.448210523210958e-05, + "loss": 0.33357223868370056, + "step": 2897 + }, + { + "epoch": 0.7696189085114858, + "grad_norm": 1.0807209302083978, + "learning_rate": 1.4478179771900634e-05, + "loss": 0.2780191898345947, + "step": 2898 + }, + { + "epoch": 0.7698844774930288, + "grad_norm": 1.098992372480737, + "learning_rate": 1.447425344835144e-05, + "loss": 0.31503236293792725, + "step": 2899 + }, + { + "epoch": 0.7701500464745717, + "grad_norm": 1.0152023365250116, + "learning_rate": 1.4470326262218955e-05, + "loss": 0.2843332290649414, + "step": 2900 + }, + { + "epoch": 0.7704156154561147, + "grad_norm": 1.1041753681410225, + "learning_rate": 1.4466398214260286e-05, + "loss": 0.305475652217865, + "step": 2901 + }, + { + "epoch": 0.7706811844376577, + "grad_norm": 1.0159008972115877, + "learning_rate": 1.446246930523272e-05, + "loss": 0.28418007493019104, + "step": 2902 + }, + { + "epoch": 0.7709467534192006, + "grad_norm": 2.0289726917266027, + "learning_rate": 1.44585395358937e-05, + "loss": 0.28237032890319824, + "step": 2903 + }, + { + "epoch": 0.7712123224007436, + "grad_norm": 1.1334683720848762, + "learning_rate": 1.4454608907000843e-05, + "loss": 0.33727777004241943, + "step": 2904 + }, + { + "epoch": 0.7714778913822865, + "grad_norm": 1.1393257541232447, + "learning_rate": 1.4450677419311925e-05, + "loss": 0.2977198660373688, + "step": 2905 + }, + { + "epoch": 0.7717434603638295, + "grad_norm": 1.0793508547506123, + "learning_rate": 1.4446745073584891e-05, + "loss": 0.3095981776714325, + "step": 2906 + }, + { + "epoch": 0.7720090293453724, + "grad_norm": 1.138471500425881, + "learning_rate": 1.4442811870577851e-05, + "loss": 0.29808440804481506, + "step": 2907 + }, + { + "epoch": 0.7722745983269154, + "grad_norm": 1.2668271633221484, + "learning_rate": 1.4438877811049079e-05, + "loss": 0.32444530725479126, + "step": 2908 + }, + { + "epoch": 0.7725401673084584, + "grad_norm": 1.0229226464155372, + "learning_rate": 1.443494289575702e-05, + "loss": 0.24782602488994598, + "step": 2909 + }, + { + "epoch": 0.7728057362900014, + "grad_norm": 1.079755307057506, + "learning_rate": 1.4431007125460274e-05, + "loss": 0.31289762258529663, + "step": 2910 + }, + { + "epoch": 0.7730713052715443, + "grad_norm": 1.0928540626872372, + "learning_rate": 1.4427070500917615e-05, + "loss": 0.31444042921066284, + "step": 2911 + }, + { + "epoch": 0.7733368742530873, + "grad_norm": 1.1235251868548595, + "learning_rate": 1.4423133022887973e-05, + "loss": 0.31347882747650146, + "step": 2912 + }, + { + "epoch": 0.7736024432346302, + "grad_norm": 1.1449169077961199, + "learning_rate": 1.4419194692130453e-05, + "loss": 0.3025411367416382, + "step": 2913 + }, + { + "epoch": 0.7738680122161732, + "grad_norm": 0.9734590933720824, + "learning_rate": 1.4415255509404316e-05, + "loss": 0.2954581081867218, + "step": 2914 + }, + { + "epoch": 0.7741335811977161, + "grad_norm": 1.051295802747811, + "learning_rate": 1.4411315475468988e-05, + "loss": 0.2675531506538391, + "step": 2915 + }, + { + "epoch": 0.7743991501792591, + "grad_norm": 1.0207923958770302, + "learning_rate": 1.4407374591084064e-05, + "loss": 0.29307854175567627, + "step": 2916 + }, + { + "epoch": 0.774664719160802, + "grad_norm": 0.9134258889524259, + "learning_rate": 1.4403432857009295e-05, + "loss": 0.2805953025817871, + "step": 2917 + }, + { + "epoch": 0.774930288142345, + "grad_norm": 1.1114518211112974, + "learning_rate": 1.439949027400461e-05, + "loss": 0.30805838108062744, + "step": 2918 + }, + { + "epoch": 0.7751958571238879, + "grad_norm": 1.063187320260136, + "learning_rate": 1.4395546842830085e-05, + "loss": 0.31501835584640503, + "step": 2919 + }, + { + "epoch": 0.7754614261054309, + "grad_norm": 1.025310766436644, + "learning_rate": 1.4391602564245975e-05, + "loss": 0.2719186246395111, + "step": 2920 + }, + { + "epoch": 0.7757269950869738, + "grad_norm": 1.0474571998069828, + "learning_rate": 1.4387657439012677e-05, + "loss": 0.29554325342178345, + "step": 2921 + }, + { + "epoch": 0.7759925640685168, + "grad_norm": 1.0103166752174864, + "learning_rate": 1.4383711467890776e-05, + "loss": 0.2993816137313843, + "step": 2922 + }, + { + "epoch": 0.7762581330500598, + "grad_norm": 1.087143911717871, + "learning_rate": 1.4379764651641004e-05, + "loss": 0.3412264883518219, + "step": 2923 + }, + { + "epoch": 0.7765237020316027, + "grad_norm": 1.3163055539647115, + "learning_rate": 1.4375816991024263e-05, + "loss": 0.3137913942337036, + "step": 2924 + }, + { + "epoch": 0.7767892710131457, + "grad_norm": 1.0026858390591848, + "learning_rate": 1.4371868486801611e-05, + "loss": 0.2710151672363281, + "step": 2925 + }, + { + "epoch": 0.7770548399946886, + "grad_norm": 1.060508746597415, + "learning_rate": 1.4367919139734279e-05, + "loss": 0.28521692752838135, + "step": 2926 + }, + { + "epoch": 0.7773204089762316, + "grad_norm": 0.9938687291505847, + "learning_rate": 1.4363968950583651e-05, + "loss": 0.2889919579029083, + "step": 2927 + }, + { + "epoch": 0.7775859779577745, + "grad_norm": 1.0641534591195945, + "learning_rate": 1.436001792011128e-05, + "loss": 0.31562381982803345, + "step": 2928 + }, + { + "epoch": 0.7778515469393175, + "grad_norm": 0.980719397790632, + "learning_rate": 1.4356066049078871e-05, + "loss": 0.2747528553009033, + "step": 2929 + }, + { + "epoch": 0.7781171159208604, + "grad_norm": 1.0890864939874727, + "learning_rate": 1.4352113338248303e-05, + "loss": 0.2918938398361206, + "step": 2930 + }, + { + "epoch": 0.7783826849024034, + "grad_norm": 1.1375978489291394, + "learning_rate": 1.4348159788381615e-05, + "loss": 0.3348507285118103, + "step": 2931 + }, + { + "epoch": 0.7786482538839463, + "grad_norm": 1.049930284325584, + "learning_rate": 1.4344205400241e-05, + "loss": 0.27206242084503174, + "step": 2932 + }, + { + "epoch": 0.7789138228654893, + "grad_norm": 1.0635705360778813, + "learning_rate": 1.434025017458882e-05, + "loss": 0.28496092557907104, + "step": 2933 + }, + { + "epoch": 0.7791793918470322, + "grad_norm": 1.1207237235097192, + "learning_rate": 1.4336294112187595e-05, + "loss": 0.3080131411552429, + "step": 2934 + }, + { + "epoch": 0.7794449608285752, + "grad_norm": 1.1562549835000784, + "learning_rate": 1.4332337213800008e-05, + "loss": 0.3116779029369354, + "step": 2935 + }, + { + "epoch": 0.7797105298101181, + "grad_norm": 1.0230593279992428, + "learning_rate": 1.43283794801889e-05, + "loss": 0.26526543498039246, + "step": 2936 + }, + { + "epoch": 0.7799760987916612, + "grad_norm": 1.0768548459396885, + "learning_rate": 1.4324420912117274e-05, + "loss": 0.2829325497150421, + "step": 2937 + }, + { + "epoch": 0.7802416677732041, + "grad_norm": 1.197165846783245, + "learning_rate": 1.43204615103483e-05, + "loss": 0.34146445989608765, + "step": 2938 + }, + { + "epoch": 0.7805072367547471, + "grad_norm": 1.1418950254878286, + "learning_rate": 1.43165012756453e-05, + "loss": 0.316609650850296, + "step": 2939 + }, + { + "epoch": 0.78077280573629, + "grad_norm": 1.119861281862994, + "learning_rate": 1.4312540208771766e-05, + "loss": 0.3215107321739197, + "step": 2940 + }, + { + "epoch": 0.781038374717833, + "grad_norm": 1.0591732101512668, + "learning_rate": 1.4308578310491342e-05, + "loss": 0.2834000587463379, + "step": 2941 + }, + { + "epoch": 0.781303943699376, + "grad_norm": 1.1186376453102755, + "learning_rate": 1.430461558156783e-05, + "loss": 0.30184993147850037, + "step": 2942 + }, + { + "epoch": 0.7815695126809189, + "grad_norm": 1.1319557052801907, + "learning_rate": 1.4300652022765207e-05, + "loss": 0.3299996256828308, + "step": 2943 + }, + { + "epoch": 0.7818350816624619, + "grad_norm": 1.1269288601015153, + "learning_rate": 1.4296687634847592e-05, + "loss": 0.27565228939056396, + "step": 2944 + }, + { + "epoch": 0.7821006506440048, + "grad_norm": 1.1019395409868211, + "learning_rate": 1.4292722418579278e-05, + "loss": 0.30347493290901184, + "step": 2945 + }, + { + "epoch": 0.7823662196255478, + "grad_norm": 1.125677517693181, + "learning_rate": 1.4288756374724709e-05, + "loss": 0.31469428539276123, + "step": 2946 + }, + { + "epoch": 0.7826317886070907, + "grad_norm": 1.0500101449680372, + "learning_rate": 1.4284789504048493e-05, + "loss": 0.27361029386520386, + "step": 2947 + }, + { + "epoch": 0.7828973575886337, + "grad_norm": 1.057442611584268, + "learning_rate": 1.428082180731539e-05, + "loss": 0.29180705547332764, + "step": 2948 + }, + { + "epoch": 0.7831629265701766, + "grad_norm": 1.0218659697209738, + "learning_rate": 1.4276853285290334e-05, + "loss": 0.281120628118515, + "step": 2949 + }, + { + "epoch": 0.7834284955517196, + "grad_norm": 1.0029783457826962, + "learning_rate": 1.4272883938738406e-05, + "loss": 0.26144471764564514, + "step": 2950 + }, + { + "epoch": 0.7836940645332625, + "grad_norm": 1.0904458839940374, + "learning_rate": 1.4268913768424848e-05, + "loss": 0.3118991255760193, + "step": 2951 + }, + { + "epoch": 0.7839596335148055, + "grad_norm": 1.0581869365443632, + "learning_rate": 1.4264942775115065e-05, + "loss": 0.29352328181266785, + "step": 2952 + }, + { + "epoch": 0.7842252024963484, + "grad_norm": 1.025234952757571, + "learning_rate": 1.426097095957461e-05, + "loss": 0.2687748968601227, + "step": 2953 + }, + { + "epoch": 0.7844907714778914, + "grad_norm": 1.0817782920006436, + "learning_rate": 1.4256998322569212e-05, + "loss": 0.3106890916824341, + "step": 2954 + }, + { + "epoch": 0.7847563404594343, + "grad_norm": 1.0039841255701216, + "learning_rate": 1.4253024864864742e-05, + "loss": 0.2522161304950714, + "step": 2955 + }, + { + "epoch": 0.7850219094409773, + "grad_norm": 1.031799618380073, + "learning_rate": 1.424905058722724e-05, + "loss": 0.2994377613067627, + "step": 2956 + }, + { + "epoch": 0.7852874784225202, + "grad_norm": 1.295564211303899, + "learning_rate": 1.4245075490422893e-05, + "loss": 0.3753565549850464, + "step": 2957 + }, + { + "epoch": 0.7855530474040632, + "grad_norm": 1.2386689798654595, + "learning_rate": 1.424109957521806e-05, + "loss": 0.29544737935066223, + "step": 2958 + }, + { + "epoch": 0.7858186163856061, + "grad_norm": 1.0381164701705432, + "learning_rate": 1.423712284237925e-05, + "loss": 0.307847797870636, + "step": 2959 + }, + { + "epoch": 0.7860841853671491, + "grad_norm": 1.1107576873332587, + "learning_rate": 1.4233145292673127e-05, + "loss": 0.31758183240890503, + "step": 2960 + }, + { + "epoch": 0.786349754348692, + "grad_norm": 1.0358601319268448, + "learning_rate": 1.4229166926866517e-05, + "loss": 0.307254433631897, + "step": 2961 + }, + { + "epoch": 0.786615323330235, + "grad_norm": 1.2228062733167704, + "learning_rate": 1.42251877457264e-05, + "loss": 0.3513748049736023, + "step": 2962 + }, + { + "epoch": 0.7868808923117779, + "grad_norm": 1.1359729522705007, + "learning_rate": 1.422120775001992e-05, + "loss": 0.3025718629360199, + "step": 2963 + }, + { + "epoch": 0.7871464612933209, + "grad_norm": 1.076503168390535, + "learning_rate": 1.4217226940514367e-05, + "loss": 0.2922811508178711, + "step": 2964 + }, + { + "epoch": 0.787412030274864, + "grad_norm": 1.07297262661661, + "learning_rate": 1.42132453179772e-05, + "loss": 0.29599297046661377, + "step": 2965 + }, + { + "epoch": 0.7876775992564069, + "grad_norm": 0.992121967255531, + "learning_rate": 1.4209262883176025e-05, + "loss": 0.28336548805236816, + "step": 2966 + }, + { + "epoch": 0.7879431682379499, + "grad_norm": 1.0655541697156172, + "learning_rate": 1.4205279636878613e-05, + "loss": 0.3100801110267639, + "step": 2967 + }, + { + "epoch": 0.7882087372194928, + "grad_norm": 1.165527486411767, + "learning_rate": 1.4201295579852881e-05, + "loss": 0.33067989349365234, + "step": 2968 + }, + { + "epoch": 0.7884743062010358, + "grad_norm": 1.1896877635723886, + "learning_rate": 1.4197310712866909e-05, + "loss": 0.282347172498703, + "step": 2969 + }, + { + "epoch": 0.7887398751825787, + "grad_norm": 1.0769183433483809, + "learning_rate": 1.419332503668894e-05, + "loss": 0.30585426092147827, + "step": 2970 + }, + { + "epoch": 0.7890054441641217, + "grad_norm": 1.0616062054836604, + "learning_rate": 1.4189338552087351e-05, + "loss": 0.3011561632156372, + "step": 2971 + }, + { + "epoch": 0.7892710131456646, + "grad_norm": 0.9722574451184507, + "learning_rate": 1.4185351259830705e-05, + "loss": 0.2700524926185608, + "step": 2972 + }, + { + "epoch": 0.7895365821272076, + "grad_norm": 1.0849811262666431, + "learning_rate": 1.4181363160687693e-05, + "loss": 0.2963382303714752, + "step": 2973 + }, + { + "epoch": 0.7898021511087505, + "grad_norm": 1.0388990841328773, + "learning_rate": 1.4177374255427183e-05, + "loss": 0.27132824063301086, + "step": 2974 + }, + { + "epoch": 0.7900677200902935, + "grad_norm": 0.9602477794817199, + "learning_rate": 1.417338454481818e-05, + "loss": 0.2539706826210022, + "step": 2975 + }, + { + "epoch": 0.7903332890718364, + "grad_norm": 1.0972216427869486, + "learning_rate": 1.416939402962986e-05, + "loss": 0.28465601801872253, + "step": 2976 + }, + { + "epoch": 0.7905988580533794, + "grad_norm": 1.1885027397372414, + "learning_rate": 1.4165402710631544e-05, + "loss": 0.3020748198032379, + "step": 2977 + }, + { + "epoch": 0.7908644270349223, + "grad_norm": 1.0709231597298363, + "learning_rate": 1.416141058859271e-05, + "loss": 0.3157690465450287, + "step": 2978 + }, + { + "epoch": 0.7911299960164653, + "grad_norm": 1.0874979641604023, + "learning_rate": 1.4157417664282994e-05, + "loss": 0.2720191776752472, + "step": 2979 + }, + { + "epoch": 0.7913955649980082, + "grad_norm": 1.0670143355557837, + "learning_rate": 1.4153423938472185e-05, + "loss": 0.2931746542453766, + "step": 2980 + }, + { + "epoch": 0.7916611339795512, + "grad_norm": 1.0836941185599118, + "learning_rate": 1.4149429411930226e-05, + "loss": 0.2683875560760498, + "step": 2981 + }, + { + "epoch": 0.7919267029610941, + "grad_norm": 1.0454189872619364, + "learning_rate": 1.4145434085427216e-05, + "loss": 0.2559819519519806, + "step": 2982 + }, + { + "epoch": 0.7921922719426371, + "grad_norm": 1.1028368657772893, + "learning_rate": 1.4141437959733404e-05, + "loss": 0.2845582365989685, + "step": 2983 + }, + { + "epoch": 0.79245784092418, + "grad_norm": 1.05827279827959, + "learning_rate": 1.4137441035619197e-05, + "loss": 0.26766544580459595, + "step": 2984 + }, + { + "epoch": 0.792723409905723, + "grad_norm": 1.2459472391823172, + "learning_rate": 1.4133443313855155e-05, + "loss": 0.32089024782180786, + "step": 2985 + }, + { + "epoch": 0.7929889788872659, + "grad_norm": 1.053106908199776, + "learning_rate": 1.4129444795211993e-05, + "loss": 0.2756182551383972, + "step": 2986 + }, + { + "epoch": 0.7932545478688089, + "grad_norm": 1.231241306668284, + "learning_rate": 1.4125445480460573e-05, + "loss": 0.29487302899360657, + "step": 2987 + }, + { + "epoch": 0.7935201168503518, + "grad_norm": 1.1738297230948855, + "learning_rate": 1.4121445370371922e-05, + "loss": 0.3362561762332916, + "step": 2988 + }, + { + "epoch": 0.7937856858318948, + "grad_norm": 1.1591988507026376, + "learning_rate": 1.4117444465717209e-05, + "loss": 0.2986692488193512, + "step": 2989 + }, + { + "epoch": 0.7940512548134377, + "grad_norm": 1.0341012671875776, + "learning_rate": 1.4113442767267766e-05, + "loss": 0.2725266218185425, + "step": 2990 + }, + { + "epoch": 0.7943168237949807, + "grad_norm": 1.1125466640148414, + "learning_rate": 1.4109440275795071e-05, + "loss": 0.29827257990837097, + "step": 2991 + }, + { + "epoch": 0.7945823927765236, + "grad_norm": 1.0512885973195232, + "learning_rate": 1.410543699207076e-05, + "loss": 0.2506203055381775, + "step": 2992 + }, + { + "epoch": 0.7948479617580667, + "grad_norm": 0.9867416114744889, + "learning_rate": 1.410143291686661e-05, + "loss": 0.2675034701824188, + "step": 2993 + }, + { + "epoch": 0.7951135307396097, + "grad_norm": 1.1763547306282318, + "learning_rate": 1.4097428050954571e-05, + "loss": 0.34528690576553345, + "step": 2994 + }, + { + "epoch": 0.7953790997211526, + "grad_norm": 1.1374135219725177, + "learning_rate": 1.4093422395106726e-05, + "loss": 0.27551063895225525, + "step": 2995 + }, + { + "epoch": 0.7956446687026956, + "grad_norm": 1.1195982376159075, + "learning_rate": 1.408941595009532e-05, + "loss": 0.3176268935203552, + "step": 2996 + }, + { + "epoch": 0.7959102376842385, + "grad_norm": 1.1804373403956752, + "learning_rate": 1.408540871669275e-05, + "loss": 0.30056723952293396, + "step": 2997 + }, + { + "epoch": 0.7961758066657815, + "grad_norm": 1.124570387942151, + "learning_rate": 1.4081400695671562e-05, + "loss": 0.32109886407852173, + "step": 2998 + }, + { + "epoch": 0.7964413756473244, + "grad_norm": 1.1262740571855958, + "learning_rate": 1.4077391887804457e-05, + "loss": 0.33622005581855774, + "step": 2999 + }, + { + "epoch": 0.7967069446288674, + "grad_norm": 1.1195153536613822, + "learning_rate": 1.4073382293864283e-05, + "loss": 0.3054961860179901, + "step": 3000 + }, + { + "epoch": 0.7969725136104103, + "grad_norm": 1.1210721039096916, + "learning_rate": 1.4069371914624044e-05, + "loss": 0.3022462725639343, + "step": 3001 + }, + { + "epoch": 0.7972380825919533, + "grad_norm": 1.0116555063320039, + "learning_rate": 1.4065360750856891e-05, + "loss": 0.2500512897968292, + "step": 3002 + }, + { + "epoch": 0.7975036515734962, + "grad_norm": 1.233947002119444, + "learning_rate": 1.4061348803336135e-05, + "loss": 0.2960171699523926, + "step": 3003 + }, + { + "epoch": 0.7977692205550392, + "grad_norm": 3.53476121579318, + "learning_rate": 1.4057336072835228e-05, + "loss": 0.2941724359989166, + "step": 3004 + }, + { + "epoch": 0.7980347895365821, + "grad_norm": 1.0143157952003843, + "learning_rate": 1.4053322560127779e-05, + "loss": 0.2827858328819275, + "step": 3005 + }, + { + "epoch": 0.7983003585181251, + "grad_norm": 1.34417890867956, + "learning_rate": 1.4049308265987544e-05, + "loss": 0.32525116205215454, + "step": 3006 + }, + { + "epoch": 0.798565927499668, + "grad_norm": 1.1622605286979444, + "learning_rate": 1.4045293191188431e-05, + "loss": 0.26509979367256165, + "step": 3007 + }, + { + "epoch": 0.798831496481211, + "grad_norm": 1.1649049829769997, + "learning_rate": 1.4041277336504503e-05, + "loss": 0.3462742567062378, + "step": 3008 + }, + { + "epoch": 0.7990970654627539, + "grad_norm": 1.118975693723979, + "learning_rate": 1.4037260702709967e-05, + "loss": 0.2971092164516449, + "step": 3009 + }, + { + "epoch": 0.7993626344442969, + "grad_norm": 1.0541078602131526, + "learning_rate": 1.4033243290579182e-05, + "loss": 0.32359808683395386, + "step": 3010 + }, + { + "epoch": 0.7996282034258398, + "grad_norm": 0.9819968107477214, + "learning_rate": 1.4029225100886657e-05, + "loss": 0.2949031591415405, + "step": 3011 + }, + { + "epoch": 0.7998937724073828, + "grad_norm": 0.9639154080405838, + "learning_rate": 1.4025206134407051e-05, + "loss": 0.29888901114463806, + "step": 3012 + }, + { + "epoch": 0.8001593413889257, + "grad_norm": 1.0921369087209054, + "learning_rate": 1.4021186391915181e-05, + "loss": 0.2999705672264099, + "step": 3013 + }, + { + "epoch": 0.8004249103704687, + "grad_norm": 1.027092536189555, + "learning_rate": 1.4017165874185996e-05, + "loss": 0.2725638449192047, + "step": 3014 + }, + { + "epoch": 0.8006904793520117, + "grad_norm": 1.6251260873819724, + "learning_rate": 1.4013144581994609e-05, + "loss": 0.2809314727783203, + "step": 3015 + }, + { + "epoch": 0.8009560483335546, + "grad_norm": 1.194026798460289, + "learning_rate": 1.400912251611628e-05, + "loss": 0.30335327982902527, + "step": 3016 + }, + { + "epoch": 0.8012216173150976, + "grad_norm": 1.0526756572542106, + "learning_rate": 1.400509967732641e-05, + "loss": 0.27780598402023315, + "step": 3017 + }, + { + "epoch": 0.8014871862966405, + "grad_norm": 1.0036615790617616, + "learning_rate": 1.400107606640056e-05, + "loss": 0.2865309715270996, + "step": 3018 + }, + { + "epoch": 0.8017527552781835, + "grad_norm": 1.067182271229665, + "learning_rate": 1.3997051684114431e-05, + "loss": 0.2691546082496643, + "step": 3019 + }, + { + "epoch": 0.8020183242597264, + "grad_norm": 1.0174199108878024, + "learning_rate": 1.3993026531243876e-05, + "loss": 0.30289226770401, + "step": 3020 + }, + { + "epoch": 0.8022838932412695, + "grad_norm": 1.1180967643802684, + "learning_rate": 1.3989000608564905e-05, + "loss": 0.2767682671546936, + "step": 3021 + }, + { + "epoch": 0.8025494622228124, + "grad_norm": 1.1982508587685934, + "learning_rate": 1.3984973916853657e-05, + "loss": 0.3423742353916168, + "step": 3022 + }, + { + "epoch": 0.8028150312043554, + "grad_norm": 1.1718790013716964, + "learning_rate": 1.3980946456886439e-05, + "loss": 0.3000536561012268, + "step": 3023 + }, + { + "epoch": 0.8030806001858983, + "grad_norm": 1.1431161282459077, + "learning_rate": 1.3976918229439698e-05, + "loss": 0.3071063756942749, + "step": 3024 + }, + { + "epoch": 0.8033461691674413, + "grad_norm": 1.6885640285561154, + "learning_rate": 1.397288923529002e-05, + "loss": 0.31261157989501953, + "step": 3025 + }, + { + "epoch": 0.8036117381489842, + "grad_norm": 1.0076153318556622, + "learning_rate": 1.3968859475214156e-05, + "loss": 0.2658939063549042, + "step": 3026 + }, + { + "epoch": 0.8038773071305272, + "grad_norm": 1.0309089161631302, + "learning_rate": 1.3964828949988993e-05, + "loss": 0.2772905230522156, + "step": 3027 + }, + { + "epoch": 0.8041428761120701, + "grad_norm": 1.1271894525974708, + "learning_rate": 1.396079766039157e-05, + "loss": 0.2903479337692261, + "step": 3028 + }, + { + "epoch": 0.8044084450936131, + "grad_norm": 1.2165332424367126, + "learning_rate": 1.3956765607199069e-05, + "loss": 0.35709524154663086, + "step": 3029 + }, + { + "epoch": 0.804674014075156, + "grad_norm": 1.0863328323430816, + "learning_rate": 1.3952732791188828e-05, + "loss": 0.2929389774799347, + "step": 3030 + }, + { + "epoch": 0.804939583056699, + "grad_norm": 0.999480167032172, + "learning_rate": 1.3948699213138321e-05, + "loss": 0.2609884440898895, + "step": 3031 + }, + { + "epoch": 0.805205152038242, + "grad_norm": 1.0946442757602284, + "learning_rate": 1.394466487382518e-05, + "loss": 0.3026544749736786, + "step": 3032 + }, + { + "epoch": 0.8054707210197849, + "grad_norm": 1.0415601836945267, + "learning_rate": 1.394062977402717e-05, + "loss": 0.28281137347221375, + "step": 3033 + }, + { + "epoch": 0.8057362900013278, + "grad_norm": 0.9908513124522437, + "learning_rate": 1.3936593914522214e-05, + "loss": 0.26189178228378296, + "step": 3034 + }, + { + "epoch": 0.8060018589828708, + "grad_norm": 1.0541854732158313, + "learning_rate": 1.3932557296088383e-05, + "loss": 0.27987509965896606, + "step": 3035 + }, + { + "epoch": 0.8062674279644138, + "grad_norm": 0.9961129101435677, + "learning_rate": 1.3928519919503884e-05, + "loss": 0.2857724130153656, + "step": 3036 + }, + { + "epoch": 0.8065329969459567, + "grad_norm": 0.9752377302684325, + "learning_rate": 1.3924481785547076e-05, + "loss": 0.28102418780326843, + "step": 3037 + }, + { + "epoch": 0.8067985659274997, + "grad_norm": 1.06882045524996, + "learning_rate": 1.3920442894996464e-05, + "loss": 0.30250412225723267, + "step": 3038 + }, + { + "epoch": 0.8070641349090426, + "grad_norm": 0.9854538363943691, + "learning_rate": 1.3916403248630703e-05, + "loss": 0.28951483964920044, + "step": 3039 + }, + { + "epoch": 0.8073297038905856, + "grad_norm": 0.990016753911339, + "learning_rate": 1.3912362847228585e-05, + "loss": 0.28455328941345215, + "step": 3040 + }, + { + "epoch": 0.8075952728721285, + "grad_norm": 1.0887176497400486, + "learning_rate": 1.3908321691569048e-05, + "loss": 0.29541105031967163, + "step": 3041 + }, + { + "epoch": 0.8078608418536715, + "grad_norm": 1.162648796815669, + "learning_rate": 1.3904279782431187e-05, + "loss": 0.3057629466056824, + "step": 3042 + }, + { + "epoch": 0.8081264108352144, + "grad_norm": 1.0909846424659564, + "learning_rate": 1.3900237120594226e-05, + "loss": 0.3204082250595093, + "step": 3043 + }, + { + "epoch": 0.8083919798167574, + "grad_norm": 0.9793203113476959, + "learning_rate": 1.3896193706837551e-05, + "loss": 0.28629523515701294, + "step": 3044 + }, + { + "epoch": 0.8086575487983003, + "grad_norm": 1.1874958252714642, + "learning_rate": 1.389214954194068e-05, + "loss": 0.298164427280426, + "step": 3045 + }, + { + "epoch": 0.8089231177798433, + "grad_norm": 1.005892758898695, + "learning_rate": 1.3888104626683282e-05, + "loss": 0.27309298515319824, + "step": 3046 + }, + { + "epoch": 0.8091886867613862, + "grad_norm": 0.9950263488620656, + "learning_rate": 1.3884058961845166e-05, + "loss": 0.25635263323783875, + "step": 3047 + }, + { + "epoch": 0.8094542557429292, + "grad_norm": 1.002808171969614, + "learning_rate": 1.3880012548206292e-05, + "loss": 0.29926127195358276, + "step": 3048 + }, + { + "epoch": 0.8097198247244722, + "grad_norm": 0.9867331912864394, + "learning_rate": 1.387596538654676e-05, + "loss": 0.26633137464523315, + "step": 3049 + }, + { + "epoch": 0.8099853937060152, + "grad_norm": 1.0757993931692869, + "learning_rate": 1.387191747764681e-05, + "loss": 0.28725534677505493, + "step": 3050 + }, + { + "epoch": 0.8102509626875581, + "grad_norm": 1.4955713597704303, + "learning_rate": 1.3867868822286838e-05, + "loss": 0.3015314042568207, + "step": 3051 + }, + { + "epoch": 0.8105165316691011, + "grad_norm": 1.048643971484194, + "learning_rate": 1.3863819421247375e-05, + "loss": 0.3054691553115845, + "step": 3052 + }, + { + "epoch": 0.810782100650644, + "grad_norm": 1.1596568650600225, + "learning_rate": 1.3859769275309097e-05, + "loss": 0.26315444707870483, + "step": 3053 + }, + { + "epoch": 0.811047669632187, + "grad_norm": 1.024319547072995, + "learning_rate": 1.3855718385252824e-05, + "loss": 0.2973077595233917, + "step": 3054 + }, + { + "epoch": 0.81131323861373, + "grad_norm": 1.1845129171721744, + "learning_rate": 1.385166675185952e-05, + "loss": 0.32824432849884033, + "step": 3055 + }, + { + "epoch": 0.8115788075952729, + "grad_norm": 1.2351976774044444, + "learning_rate": 1.3847614375910292e-05, + "loss": 0.3127811849117279, + "step": 3056 + }, + { + "epoch": 0.8118443765768159, + "grad_norm": 1.0840317870226388, + "learning_rate": 1.384356125818639e-05, + "loss": 0.2631932497024536, + "step": 3057 + }, + { + "epoch": 0.8121099455583588, + "grad_norm": 1.0251225163823416, + "learning_rate": 1.3839507399469213e-05, + "loss": 0.2856106162071228, + "step": 3058 + }, + { + "epoch": 0.8123755145399018, + "grad_norm": 1.2604810760435325, + "learning_rate": 1.3835452800540288e-05, + "loss": 0.28986629843711853, + "step": 3059 + }, + { + "epoch": 0.8126410835214447, + "grad_norm": 1.0804422287227695, + "learning_rate": 1.3831397462181298e-05, + "loss": 0.28411972522735596, + "step": 3060 + }, + { + "epoch": 0.8129066525029877, + "grad_norm": 1.117697190248139, + "learning_rate": 1.3827341385174063e-05, + "loss": 0.3234354853630066, + "step": 3061 + }, + { + "epoch": 0.8131722214845306, + "grad_norm": 0.9917598533716923, + "learning_rate": 1.3823284570300551e-05, + "loss": 0.24779736995697021, + "step": 3062 + }, + { + "epoch": 0.8134377904660736, + "grad_norm": 1.1743500466494587, + "learning_rate": 1.3819227018342865e-05, + "loss": 0.3306904137134552, + "step": 3063 + }, + { + "epoch": 0.8137033594476165, + "grad_norm": 1.1120224667451313, + "learning_rate": 1.3815168730083254e-05, + "loss": 0.31705451011657715, + "step": 3064 + }, + { + "epoch": 0.8139689284291595, + "grad_norm": 1.1351768868234977, + "learning_rate": 1.3811109706304105e-05, + "loss": 0.29830047488212585, + "step": 3065 + }, + { + "epoch": 0.8142344974107024, + "grad_norm": 1.1496885073051233, + "learning_rate": 1.3807049947787954e-05, + "loss": 0.30605942010879517, + "step": 3066 + }, + { + "epoch": 0.8145000663922454, + "grad_norm": 1.0745429008877887, + "learning_rate": 1.3802989455317475e-05, + "loss": 0.3139193058013916, + "step": 3067 + }, + { + "epoch": 0.8147656353737883, + "grad_norm": 1.0541430221228831, + "learning_rate": 1.3798928229675478e-05, + "loss": 0.3175879716873169, + "step": 3068 + }, + { + "epoch": 0.8150312043553313, + "grad_norm": 1.0450888698469754, + "learning_rate": 1.3794866271644922e-05, + "loss": 0.26391106843948364, + "step": 3069 + }, + { + "epoch": 0.8152967733368742, + "grad_norm": 0.945534402365018, + "learning_rate": 1.3790803582008906e-05, + "loss": 0.24128863215446472, + "step": 3070 + }, + { + "epoch": 0.8155623423184172, + "grad_norm": 1.1627322372772537, + "learning_rate": 1.378674016155067e-05, + "loss": 0.3249368965625763, + "step": 3071 + }, + { + "epoch": 0.8158279112999601, + "grad_norm": 1.0060562228451158, + "learning_rate": 1.3782676011053592e-05, + "loss": 0.2871986925601959, + "step": 3072 + }, + { + "epoch": 0.8160934802815031, + "grad_norm": 1.1624248444882197, + "learning_rate": 1.377861113130119e-05, + "loss": 0.29047372937202454, + "step": 3073 + }, + { + "epoch": 0.816359049263046, + "grad_norm": 1.0925698386610025, + "learning_rate": 1.3774545523077122e-05, + "loss": 0.3055281341075897, + "step": 3074 + }, + { + "epoch": 0.816624618244589, + "grad_norm": 0.9197098274775629, + "learning_rate": 1.37704791871652e-05, + "loss": 0.2565494179725647, + "step": 3075 + }, + { + "epoch": 0.8168901872261319, + "grad_norm": 1.0377185359248249, + "learning_rate": 1.3766412124349358e-05, + "loss": 0.3016049861907959, + "step": 3076 + }, + { + "epoch": 0.8171557562076749, + "grad_norm": 1.0790995041055653, + "learning_rate": 1.3762344335413677e-05, + "loss": 0.3021200895309448, + "step": 3077 + }, + { + "epoch": 0.817421325189218, + "grad_norm": 1.0643017770253544, + "learning_rate": 1.3758275821142382e-05, + "loss": 0.3024774193763733, + "step": 3078 + }, + { + "epoch": 0.8176868941707609, + "grad_norm": 1.0591328005001268, + "learning_rate": 1.3754206582319836e-05, + "loss": 0.33114269375801086, + "step": 3079 + }, + { + "epoch": 0.8179524631523039, + "grad_norm": 1.0815809107319383, + "learning_rate": 1.3750136619730534e-05, + "loss": 0.27339494228363037, + "step": 3080 + }, + { + "epoch": 0.8182180321338468, + "grad_norm": 1.170674128986789, + "learning_rate": 1.3746065934159123e-05, + "loss": 0.2827128767967224, + "step": 3081 + }, + { + "epoch": 0.8184836011153898, + "grad_norm": 1.1064880736532463, + "learning_rate": 1.3741994526390379e-05, + "loss": 0.2972746193408966, + "step": 3082 + }, + { + "epoch": 0.8187491700969327, + "grad_norm": 1.143548636761381, + "learning_rate": 1.3737922397209222e-05, + "loss": 0.29932117462158203, + "step": 3083 + }, + { + "epoch": 0.8190147390784757, + "grad_norm": 1.0415876434255473, + "learning_rate": 1.3733849547400713e-05, + "loss": 0.28307998180389404, + "step": 3084 + }, + { + "epoch": 0.8192803080600186, + "grad_norm": 1.1070561443231863, + "learning_rate": 1.3729775977750048e-05, + "loss": 0.2885883152484894, + "step": 3085 + }, + { + "epoch": 0.8195458770415616, + "grad_norm": 1.1106477390667713, + "learning_rate": 1.3725701689042564e-05, + "loss": 0.28837913274765015, + "step": 3086 + }, + { + "epoch": 0.8198114460231045, + "grad_norm": 1.0553526039271008, + "learning_rate": 1.3721626682063733e-05, + "loss": 0.2775058150291443, + "step": 3087 + }, + { + "epoch": 0.8200770150046475, + "grad_norm": 1.153176622627066, + "learning_rate": 1.3717550957599172e-05, + "loss": 0.2813493609428406, + "step": 3088 + }, + { + "epoch": 0.8203425839861904, + "grad_norm": 1.1477738573738745, + "learning_rate": 1.371347451643463e-05, + "loss": 0.2677592933177948, + "step": 3089 + }, + { + "epoch": 0.8206081529677334, + "grad_norm": 1.184705398593534, + "learning_rate": 1.3709397359355998e-05, + "loss": 0.3104957938194275, + "step": 3090 + }, + { + "epoch": 0.8208737219492763, + "grad_norm": 1.1714327280441006, + "learning_rate": 1.3705319487149303e-05, + "loss": 0.29315799474716187, + "step": 3091 + }, + { + "epoch": 0.8211392909308193, + "grad_norm": 1.1179168081295616, + "learning_rate": 1.370124090060071e-05, + "loss": 0.3044348657131195, + "step": 3092 + }, + { + "epoch": 0.8214048599123622, + "grad_norm": 1.1122209585212142, + "learning_rate": 1.3697161600496525e-05, + "loss": 0.2918691635131836, + "step": 3093 + }, + { + "epoch": 0.8216704288939052, + "grad_norm": 1.0702091422822353, + "learning_rate": 1.3693081587623187e-05, + "loss": 0.2887750267982483, + "step": 3094 + }, + { + "epoch": 0.8219359978754481, + "grad_norm": 1.1155429990394359, + "learning_rate": 1.3689000862767274e-05, + "loss": 0.3055661916732788, + "step": 3095 + }, + { + "epoch": 0.8222015668569911, + "grad_norm": 1.0251756704247361, + "learning_rate": 1.3684919426715504e-05, + "loss": 0.271525114774704, + "step": 3096 + }, + { + "epoch": 0.822467135838534, + "grad_norm": 1.1269584199088303, + "learning_rate": 1.3680837280254726e-05, + "loss": 0.3220426142215729, + "step": 3097 + }, + { + "epoch": 0.822732704820077, + "grad_norm": 1.0149552227204566, + "learning_rate": 1.3676754424171935e-05, + "loss": 0.29091203212738037, + "step": 3098 + }, + { + "epoch": 0.8229982738016199, + "grad_norm": 1.051328362150218, + "learning_rate": 1.3672670859254252e-05, + "loss": 0.2928692102432251, + "step": 3099 + }, + { + "epoch": 0.8232638427831629, + "grad_norm": 1.0366528987524315, + "learning_rate": 1.3668586586288942e-05, + "loss": 0.28635919094085693, + "step": 3100 + }, + { + "epoch": 0.8235294117647058, + "grad_norm": 1.0374876833794577, + "learning_rate": 1.3664501606063402e-05, + "loss": 0.2912571430206299, + "step": 3101 + }, + { + "epoch": 0.8237949807462488, + "grad_norm": 1.051516198651511, + "learning_rate": 1.3660415919365178e-05, + "loss": 0.2783615291118622, + "step": 3102 + }, + { + "epoch": 0.8240605497277917, + "grad_norm": 1.088921494432588, + "learning_rate": 1.365632952698193e-05, + "loss": 0.3064395785331726, + "step": 3103 + }, + { + "epoch": 0.8243261187093347, + "grad_norm": 1.023130230207284, + "learning_rate": 1.3652242429701477e-05, + "loss": 0.2528907358646393, + "step": 3104 + }, + { + "epoch": 0.8245916876908777, + "grad_norm": 1.0503421945431453, + "learning_rate": 1.3648154628311754e-05, + "loss": 0.2648676633834839, + "step": 3105 + }, + { + "epoch": 0.8248572566724207, + "grad_norm": 1.2732480631249905, + "learning_rate": 1.3644066123600846e-05, + "loss": 0.33425620198249817, + "step": 3106 + }, + { + "epoch": 0.8251228256539637, + "grad_norm": 1.0925062122156084, + "learning_rate": 1.3639976916356965e-05, + "loss": 0.3108072280883789, + "step": 3107 + }, + { + "epoch": 0.8253883946355066, + "grad_norm": 1.0815679409684162, + "learning_rate": 1.3635887007368467e-05, + "loss": 0.2860543131828308, + "step": 3108 + }, + { + "epoch": 0.8256539636170496, + "grad_norm": 1.0711932859903586, + "learning_rate": 1.3631796397423833e-05, + "loss": 0.25440749526023865, + "step": 3109 + }, + { + "epoch": 0.8259195325985925, + "grad_norm": 1.1006663978120534, + "learning_rate": 1.3627705087311687e-05, + "loss": 0.2676115334033966, + "step": 3110 + }, + { + "epoch": 0.8261851015801355, + "grad_norm": 1.1597529133358384, + "learning_rate": 1.3623613077820788e-05, + "loss": 0.28977078199386597, + "step": 3111 + }, + { + "epoch": 0.8264506705616784, + "grad_norm": 1.1046761011596355, + "learning_rate": 1.361952036974002e-05, + "loss": 0.30161401629447937, + "step": 3112 + }, + { + "epoch": 0.8267162395432214, + "grad_norm": 1.135120464396266, + "learning_rate": 1.3615426963858416e-05, + "loss": 0.28676310181617737, + "step": 3113 + }, + { + "epoch": 0.8269818085247643, + "grad_norm": 1.100109147839879, + "learning_rate": 1.361133286096513e-05, + "loss": 0.2957243323326111, + "step": 3114 + }, + { + "epoch": 0.8272473775063073, + "grad_norm": 1.0691905028493969, + "learning_rate": 1.3607238061849461e-05, + "loss": 0.3036375343799591, + "step": 3115 + }, + { + "epoch": 0.8275129464878502, + "grad_norm": 1.1142331461612014, + "learning_rate": 1.360314256730084e-05, + "loss": 0.31175294518470764, + "step": 3116 + }, + { + "epoch": 0.8277785154693932, + "grad_norm": 1.0665802680669934, + "learning_rate": 1.3599046378108825e-05, + "loss": 0.30212485790252686, + "step": 3117 + }, + { + "epoch": 0.8280440844509361, + "grad_norm": 1.1992776426845386, + "learning_rate": 1.3594949495063117e-05, + "loss": 0.3290692865848541, + "step": 3118 + }, + { + "epoch": 0.8283096534324791, + "grad_norm": 1.007005509411099, + "learning_rate": 1.3590851918953542e-05, + "loss": 0.25952839851379395, + "step": 3119 + }, + { + "epoch": 0.828575222414022, + "grad_norm": 1.0949064818424232, + "learning_rate": 1.3586753650570069e-05, + "loss": 0.27737247943878174, + "step": 3120 + }, + { + "epoch": 0.828840791395565, + "grad_norm": 1.0156990629875267, + "learning_rate": 1.3582654690702795e-05, + "loss": 0.29415374994277954, + "step": 3121 + }, + { + "epoch": 0.8291063603771079, + "grad_norm": 1.066804105313739, + "learning_rate": 1.3578555040141948e-05, + "loss": 0.29197627305984497, + "step": 3122 + }, + { + "epoch": 0.8293719293586509, + "grad_norm": 1.1089730397237387, + "learning_rate": 1.3574454699677893e-05, + "loss": 0.30318522453308105, + "step": 3123 + }, + { + "epoch": 0.8296374983401938, + "grad_norm": 1.0916871079120407, + "learning_rate": 1.357035367010113e-05, + "loss": 0.3184241056442261, + "step": 3124 + }, + { + "epoch": 0.8299030673217368, + "grad_norm": 1.3286365770942894, + "learning_rate": 1.3566251952202288e-05, + "loss": 0.30330199003219604, + "step": 3125 + }, + { + "epoch": 0.8301686363032797, + "grad_norm": 1.1117453782986153, + "learning_rate": 1.356214954677213e-05, + "loss": 0.25366994738578796, + "step": 3126 + }, + { + "epoch": 0.8304342052848227, + "grad_norm": 1.109752753436135, + "learning_rate": 1.3558046454601552e-05, + "loss": 0.3213343918323517, + "step": 3127 + }, + { + "epoch": 0.8306997742663657, + "grad_norm": 1.0918389418395038, + "learning_rate": 1.355394267648158e-05, + "loss": 0.3012468218803406, + "step": 3128 + }, + { + "epoch": 0.8309653432479086, + "grad_norm": 1.1319633441718049, + "learning_rate": 1.3549838213203374e-05, + "loss": 0.3272971510887146, + "step": 3129 + }, + { + "epoch": 0.8312309122294516, + "grad_norm": 1.0778057413430624, + "learning_rate": 1.354573306555823e-05, + "loss": 0.30032482743263245, + "step": 3130 + }, + { + "epoch": 0.8314964812109945, + "grad_norm": 1.0778331818873157, + "learning_rate": 1.3541627234337567e-05, + "loss": 0.2820669412612915, + "step": 3131 + }, + { + "epoch": 0.8317620501925375, + "grad_norm": 1.0187129279356677, + "learning_rate": 1.3537520720332943e-05, + "loss": 0.2638673782348633, + "step": 3132 + }, + { + "epoch": 0.8320276191740804, + "grad_norm": 1.0843507637886551, + "learning_rate": 1.3533413524336043e-05, + "loss": 0.2766842246055603, + "step": 3133 + }, + { + "epoch": 0.8322931881556235, + "grad_norm": 1.2660530642163288, + "learning_rate": 1.3529305647138689e-05, + "loss": 0.330536425113678, + "step": 3134 + }, + { + "epoch": 0.8325587571371664, + "grad_norm": 1.0925834195413107, + "learning_rate": 1.3525197089532833e-05, + "loss": 0.30375364422798157, + "step": 3135 + }, + { + "epoch": 0.8328243261187094, + "grad_norm": 1.1657669106128519, + "learning_rate": 1.3521087852310555e-05, + "loss": 0.3092171549797058, + "step": 3136 + }, + { + "epoch": 0.8330898951002523, + "grad_norm": 1.1686338102407274, + "learning_rate": 1.3516977936264062e-05, + "loss": 0.28651195764541626, + "step": 3137 + }, + { + "epoch": 0.8333554640817953, + "grad_norm": 1.0845327487717817, + "learning_rate": 1.3512867342185705e-05, + "loss": 0.2882133722305298, + "step": 3138 + }, + { + "epoch": 0.8336210330633382, + "grad_norm": 1.1325019700739036, + "learning_rate": 1.3508756070867955e-05, + "loss": 0.30633628368377686, + "step": 3139 + }, + { + "epoch": 0.8338866020448812, + "grad_norm": 1.090943303162736, + "learning_rate": 1.3504644123103415e-05, + "loss": 0.2819565236568451, + "step": 3140 + }, + { + "epoch": 0.8341521710264241, + "grad_norm": 1.0804420637943886, + "learning_rate": 1.3500531499684819e-05, + "loss": 0.29544374346733093, + "step": 3141 + }, + { + "epoch": 0.8344177400079671, + "grad_norm": 1.10400689114043, + "learning_rate": 1.3496418201405037e-05, + "loss": 0.29383376240730286, + "step": 3142 + }, + { + "epoch": 0.83468330898951, + "grad_norm": 0.9862964562028984, + "learning_rate": 1.3492304229057062e-05, + "loss": 0.24945983290672302, + "step": 3143 + }, + { + "epoch": 0.834948877971053, + "grad_norm": 1.2055608503616826, + "learning_rate": 1.3488189583434023e-05, + "loss": 0.338919997215271, + "step": 3144 + }, + { + "epoch": 0.835214446952596, + "grad_norm": 1.071166648249549, + "learning_rate": 1.348407426532917e-05, + "loss": 0.29555821418762207, + "step": 3145 + }, + { + "epoch": 0.8354800159341389, + "grad_norm": 1.0650010322896095, + "learning_rate": 1.3479958275535887e-05, + "loss": 0.31038299202919006, + "step": 3146 + }, + { + "epoch": 0.8357455849156818, + "grad_norm": 1.021351909092412, + "learning_rate": 1.347584161484769e-05, + "loss": 0.2595089077949524, + "step": 3147 + }, + { + "epoch": 0.8360111538972248, + "grad_norm": 1.1885926674667484, + "learning_rate": 1.3471724284058227e-05, + "loss": 0.3287338614463806, + "step": 3148 + }, + { + "epoch": 0.8362767228787678, + "grad_norm": 1.1997618392346763, + "learning_rate": 1.3467606283961268e-05, + "loss": 0.3109680414199829, + "step": 3149 + }, + { + "epoch": 0.8365422918603107, + "grad_norm": 1.0762954067078139, + "learning_rate": 1.346348761535071e-05, + "loss": 0.2584227919578552, + "step": 3150 + }, + { + "epoch": 0.8368078608418537, + "grad_norm": 1.137771769139511, + "learning_rate": 1.345936827902059e-05, + "loss": 0.3038554787635803, + "step": 3151 + }, + { + "epoch": 0.8370734298233966, + "grad_norm": 1.029659281383911, + "learning_rate": 1.3455248275765067e-05, + "loss": 0.28267812728881836, + "step": 3152 + }, + { + "epoch": 0.8373389988049396, + "grad_norm": 1.163661242492436, + "learning_rate": 1.3451127606378425e-05, + "loss": 0.3328094184398651, + "step": 3153 + }, + { + "epoch": 0.8376045677864825, + "grad_norm": 1.084045978606854, + "learning_rate": 1.3447006271655082e-05, + "loss": 0.3235865533351898, + "step": 3154 + }, + { + "epoch": 0.8378701367680255, + "grad_norm": 1.037100355990568, + "learning_rate": 1.3442884272389583e-05, + "loss": 0.25394493341445923, + "step": 3155 + }, + { + "epoch": 0.8381357057495684, + "grad_norm": 1.1250984496593863, + "learning_rate": 1.3438761609376604e-05, + "loss": 0.29841768741607666, + "step": 3156 + }, + { + "epoch": 0.8384012747311114, + "grad_norm": 1.1999100818775306, + "learning_rate": 1.3434638283410942e-05, + "loss": 0.3161924183368683, + "step": 3157 + }, + { + "epoch": 0.8386668437126543, + "grad_norm": 0.9017579941601053, + "learning_rate": 1.3430514295287526e-05, + "loss": 0.22781039774417877, + "step": 3158 + }, + { + "epoch": 0.8389324126941973, + "grad_norm": 1.0534948555265085, + "learning_rate": 1.3426389645801415e-05, + "loss": 0.2947984039783478, + "step": 3159 + }, + { + "epoch": 0.8391979816757402, + "grad_norm": 1.0286789238265646, + "learning_rate": 1.342226433574779e-05, + "loss": 0.2827467918395996, + "step": 3160 + }, + { + "epoch": 0.8394635506572832, + "grad_norm": 1.0453932660244052, + "learning_rate": 1.3418138365921962e-05, + "loss": 0.3149232268333435, + "step": 3161 + }, + { + "epoch": 0.8397291196388262, + "grad_norm": 1.2487567497076437, + "learning_rate": 1.3414011737119373e-05, + "loss": 0.33154603838920593, + "step": 3162 + }, + { + "epoch": 0.8399946886203692, + "grad_norm": 1.074983718750332, + "learning_rate": 1.3409884450135581e-05, + "loss": 0.28532034158706665, + "step": 3163 + }, + { + "epoch": 0.8402602576019121, + "grad_norm": 1.0695327636228384, + "learning_rate": 1.3405756505766286e-05, + "loss": 0.2539500892162323, + "step": 3164 + }, + { + "epoch": 0.8405258265834551, + "grad_norm": 1.0653532722719707, + "learning_rate": 1.3401627904807302e-05, + "loss": 0.3023888170719147, + "step": 3165 + }, + { + "epoch": 0.840791395564998, + "grad_norm": 1.0811844194203637, + "learning_rate": 1.3397498648054579e-05, + "loss": 0.3088506758213043, + "step": 3166 + }, + { + "epoch": 0.841056964546541, + "grad_norm": 1.2249048833028835, + "learning_rate": 1.3393368736304184e-05, + "loss": 0.3223467469215393, + "step": 3167 + }, + { + "epoch": 0.841322533528084, + "grad_norm": 1.0772937869709083, + "learning_rate": 1.3389238170352318e-05, + "loss": 0.2541419565677643, + "step": 3168 + }, + { + "epoch": 0.8415881025096269, + "grad_norm": 1.0463826735598363, + "learning_rate": 1.3385106950995308e-05, + "loss": 0.2915497422218323, + "step": 3169 + }, + { + "epoch": 0.8418536714911699, + "grad_norm": 1.1726858597591174, + "learning_rate": 1.3380975079029598e-05, + "loss": 0.2907465994358063, + "step": 3170 + }, + { + "epoch": 0.8421192404727128, + "grad_norm": 1.0581221380369799, + "learning_rate": 1.337684255525177e-05, + "loss": 0.2587417960166931, + "step": 3171 + }, + { + "epoch": 0.8423848094542558, + "grad_norm": 1.1080472137531636, + "learning_rate": 1.3372709380458522e-05, + "loss": 0.2932469844818115, + "step": 3172 + }, + { + "epoch": 0.8426503784357987, + "grad_norm": 1.2359417241278925, + "learning_rate": 1.3368575555446681e-05, + "loss": 0.31451860070228577, + "step": 3173 + }, + { + "epoch": 0.8429159474173417, + "grad_norm": 1.067745190297883, + "learning_rate": 1.3364441081013205e-05, + "loss": 0.24513742327690125, + "step": 3174 + }, + { + "epoch": 0.8431815163988846, + "grad_norm": 1.0795526820997523, + "learning_rate": 1.3360305957955166e-05, + "loss": 0.29781201481819153, + "step": 3175 + }, + { + "epoch": 0.8434470853804276, + "grad_norm": 1.3176130252584213, + "learning_rate": 1.3356170187069775e-05, + "loss": 0.30925726890563965, + "step": 3176 + }, + { + "epoch": 0.8437126543619705, + "grad_norm": 1.1110632932678028, + "learning_rate": 1.3352033769154347e-05, + "loss": 0.2822851538658142, + "step": 3177 + }, + { + "epoch": 0.8439782233435135, + "grad_norm": 1.0033731418220575, + "learning_rate": 1.3347896705006344e-05, + "loss": 0.2511071264743805, + "step": 3178 + }, + { + "epoch": 0.8442437923250564, + "grad_norm": 1.1921629041957855, + "learning_rate": 1.3343758995423344e-05, + "loss": 0.3002505302429199, + "step": 3179 + }, + { + "epoch": 0.8445093613065994, + "grad_norm": 0.9942107511416755, + "learning_rate": 1.3339620641203043e-05, + "loss": 0.285504549741745, + "step": 3180 + }, + { + "epoch": 0.8447749302881423, + "grad_norm": 1.1880306222164103, + "learning_rate": 1.3335481643143271e-05, + "loss": 0.31988856196403503, + "step": 3181 + }, + { + "epoch": 0.8450404992696853, + "grad_norm": 1.0905691447057935, + "learning_rate": 1.3331342002041973e-05, + "loss": 0.29330819845199585, + "step": 3182 + }, + { + "epoch": 0.8453060682512282, + "grad_norm": 1.049547579497453, + "learning_rate": 1.3327201718697232e-05, + "loss": 0.28694427013397217, + "step": 3183 + }, + { + "epoch": 0.8455716372327712, + "grad_norm": 1.0561569710297949, + "learning_rate": 1.3323060793907239e-05, + "loss": 0.24912211298942566, + "step": 3184 + }, + { + "epoch": 0.8458372062143141, + "grad_norm": 1.1346018526864223, + "learning_rate": 1.3318919228470315e-05, + "loss": 0.28117647767066956, + "step": 3185 + }, + { + "epoch": 0.8461027751958571, + "grad_norm": 1.2524387900920857, + "learning_rate": 1.3314777023184907e-05, + "loss": 0.3176446557044983, + "step": 3186 + }, + { + "epoch": 0.8463683441774, + "grad_norm": 1.0728463380702977, + "learning_rate": 1.3310634178849583e-05, + "loss": 0.31205689907073975, + "step": 3187 + }, + { + "epoch": 0.846633913158943, + "grad_norm": 1.1500545538779043, + "learning_rate": 1.3306490696263034e-05, + "loss": 0.29942232370376587, + "step": 3188 + }, + { + "epoch": 0.8468994821404859, + "grad_norm": 1.161750107962421, + "learning_rate": 1.3302346576224077e-05, + "loss": 0.3149508833885193, + "step": 3189 + }, + { + "epoch": 0.847165051122029, + "grad_norm": 1.0924626607758976, + "learning_rate": 1.3298201819531646e-05, + "loss": 0.2930619418621063, + "step": 3190 + }, + { + "epoch": 0.847430620103572, + "grad_norm": 1.0958680594537196, + "learning_rate": 1.3294056426984804e-05, + "loss": 0.3089582920074463, + "step": 3191 + }, + { + "epoch": 0.8476961890851149, + "grad_norm": 1.2175163313381927, + "learning_rate": 1.3289910399382733e-05, + "loss": 0.3120991587638855, + "step": 3192 + }, + { + "epoch": 0.8479617580666579, + "grad_norm": 1.0535688994558223, + "learning_rate": 1.3285763737524738e-05, + "loss": 0.2728833258152008, + "step": 3193 + }, + { + "epoch": 0.8482273270482008, + "grad_norm": 1.0457465617551238, + "learning_rate": 1.3281616442210246e-05, + "loss": 0.2833358347415924, + "step": 3194 + }, + { + "epoch": 0.8484928960297438, + "grad_norm": 1.0714039101779447, + "learning_rate": 1.3277468514238803e-05, + "loss": 0.26218950748443604, + "step": 3195 + }, + { + "epoch": 0.8487584650112867, + "grad_norm": 1.0938436245702892, + "learning_rate": 1.3273319954410088e-05, + "loss": 0.3120720386505127, + "step": 3196 + }, + { + "epoch": 0.8490240339928297, + "grad_norm": 1.0412833763909957, + "learning_rate": 1.3269170763523892e-05, + "loss": 0.2748696208000183, + "step": 3197 + }, + { + "epoch": 0.8492896029743726, + "grad_norm": 1.0148051769031237, + "learning_rate": 1.326502094238013e-05, + "loss": 0.2892690598964691, + "step": 3198 + }, + { + "epoch": 0.8495551719559156, + "grad_norm": 1.068648430192615, + "learning_rate": 1.3260870491778835e-05, + "loss": 0.26583510637283325, + "step": 3199 + }, + { + "epoch": 0.8498207409374585, + "grad_norm": 1.105620955007001, + "learning_rate": 1.325671941252017e-05, + "loss": 0.31602388620376587, + "step": 3200 + }, + { + "epoch": 0.8500863099190015, + "grad_norm": 1.068517421778971, + "learning_rate": 1.3252567705404409e-05, + "loss": 0.2980017364025116, + "step": 3201 + }, + { + "epoch": 0.8503518789005444, + "grad_norm": 1.0740685936810315, + "learning_rate": 1.3248415371231957e-05, + "loss": 0.27081727981567383, + "step": 3202 + }, + { + "epoch": 0.8506174478820874, + "grad_norm": 1.2590520587844396, + "learning_rate": 1.3244262410803333e-05, + "loss": 0.28895002603530884, + "step": 3203 + }, + { + "epoch": 0.8508830168636303, + "grad_norm": 1.1373552047630993, + "learning_rate": 1.3240108824919176e-05, + "loss": 0.30804315209388733, + "step": 3204 + }, + { + "epoch": 0.8511485858451733, + "grad_norm": 1.1074447190812993, + "learning_rate": 1.3235954614380253e-05, + "loss": 0.28173667192459106, + "step": 3205 + }, + { + "epoch": 0.8514141548267162, + "grad_norm": 1.097058715769224, + "learning_rate": 1.3231799779987445e-05, + "loss": 0.3113047778606415, + "step": 3206 + }, + { + "epoch": 0.8516797238082592, + "grad_norm": 1.0285862677327642, + "learning_rate": 1.3227644322541754e-05, + "loss": 0.247248113155365, + "step": 3207 + }, + { + "epoch": 0.8519452927898021, + "grad_norm": 1.1032823581833329, + "learning_rate": 1.3223488242844309e-05, + "loss": 0.27078187465667725, + "step": 3208 + }, + { + "epoch": 0.8522108617713451, + "grad_norm": 1.0635139884249352, + "learning_rate": 1.321933154169634e-05, + "loss": 0.2749357223510742, + "step": 3209 + }, + { + "epoch": 0.852476430752888, + "grad_norm": 1.0129100217319345, + "learning_rate": 1.3215174219899224e-05, + "loss": 0.25382956862449646, + "step": 3210 + }, + { + "epoch": 0.852741999734431, + "grad_norm": 1.0528151094235563, + "learning_rate": 1.3211016278254436e-05, + "loss": 0.3237685263156891, + "step": 3211 + }, + { + "epoch": 0.8530075687159739, + "grad_norm": 1.273911241149791, + "learning_rate": 1.3206857717563581e-05, + "loss": 0.2899032235145569, + "step": 3212 + }, + { + "epoch": 0.8532731376975169, + "grad_norm": 1.040323856520164, + "learning_rate": 1.3202698538628376e-05, + "loss": 0.25997933745384216, + "step": 3213 + }, + { + "epoch": 0.8535387066790598, + "grad_norm": 1.121125084608177, + "learning_rate": 1.3198538742250668e-05, + "loss": 0.3228183090686798, + "step": 3214 + }, + { + "epoch": 0.8538042756606028, + "grad_norm": 1.1002230220524851, + "learning_rate": 1.3194378329232413e-05, + "loss": 0.31993368268013, + "step": 3215 + }, + { + "epoch": 0.8540698446421457, + "grad_norm": 1.157115702913611, + "learning_rate": 1.3190217300375694e-05, + "loss": 0.29520007967948914, + "step": 3216 + }, + { + "epoch": 0.8543354136236887, + "grad_norm": 1.0898926058638614, + "learning_rate": 1.3186055656482702e-05, + "loss": 0.31073522567749023, + "step": 3217 + }, + { + "epoch": 0.8546009826052318, + "grad_norm": 1.1465583376043518, + "learning_rate": 1.3181893398355752e-05, + "loss": 0.34354183077812195, + "step": 3218 + }, + { + "epoch": 0.8548665515867747, + "grad_norm": 1.179928846812524, + "learning_rate": 1.3177730526797286e-05, + "loss": 0.27676698565483093, + "step": 3219 + }, + { + "epoch": 0.8551321205683177, + "grad_norm": 1.0792983255501365, + "learning_rate": 1.3173567042609852e-05, + "loss": 0.27313530445098877, + "step": 3220 + }, + { + "epoch": 0.8553976895498606, + "grad_norm": 0.9249374113484707, + "learning_rate": 1.3169402946596119e-05, + "loss": 0.2517555058002472, + "step": 3221 + }, + { + "epoch": 0.8556632585314036, + "grad_norm": 1.0684778793194236, + "learning_rate": 1.3165238239558878e-05, + "loss": 0.29700207710266113, + "step": 3222 + }, + { + "epoch": 0.8559288275129465, + "grad_norm": 1.1262235464302217, + "learning_rate": 1.3161072922301037e-05, + "loss": 0.3182620704174042, + "step": 3223 + }, + { + "epoch": 0.8561943964944895, + "grad_norm": 1.123570804553303, + "learning_rate": 1.3156906995625615e-05, + "loss": 0.3112961947917938, + "step": 3224 + }, + { + "epoch": 0.8564599654760324, + "grad_norm": 1.1746597736734636, + "learning_rate": 1.3152740460335757e-05, + "loss": 0.3080563545227051, + "step": 3225 + }, + { + "epoch": 0.8567255344575754, + "grad_norm": 1.1646363575237453, + "learning_rate": 1.3148573317234726e-05, + "loss": 0.31197935342788696, + "step": 3226 + }, + { + "epoch": 0.8569911034391183, + "grad_norm": 1.0455051980244612, + "learning_rate": 1.3144405567125886e-05, + "loss": 0.27377086877822876, + "step": 3227 + }, + { + "epoch": 0.8572566724206613, + "grad_norm": 1.050528412475655, + "learning_rate": 1.3140237210812741e-05, + "loss": 0.25303182005882263, + "step": 3228 + }, + { + "epoch": 0.8575222414022042, + "grad_norm": 1.0664458431943622, + "learning_rate": 1.3136068249098899e-05, + "loss": 0.27949726581573486, + "step": 3229 + }, + { + "epoch": 0.8577878103837472, + "grad_norm": 1.0907347405782384, + "learning_rate": 1.3131898682788082e-05, + "loss": 0.278359055519104, + "step": 3230 + }, + { + "epoch": 0.8580533793652901, + "grad_norm": 1.081462335761227, + "learning_rate": 1.312772851268414e-05, + "loss": 0.28507643938064575, + "step": 3231 + }, + { + "epoch": 0.8583189483468331, + "grad_norm": 1.0256133822907842, + "learning_rate": 1.3123557739591026e-05, + "loss": 0.2689790427684784, + "step": 3232 + }, + { + "epoch": 0.858584517328376, + "grad_norm": 1.1569049456144243, + "learning_rate": 1.3119386364312821e-05, + "loss": 0.31956973671913147, + "step": 3233 + }, + { + "epoch": 0.858850086309919, + "grad_norm": 1.0914807974802394, + "learning_rate": 1.3115214387653711e-05, + "loss": 0.2837323546409607, + "step": 3234 + }, + { + "epoch": 0.8591156552914619, + "grad_norm": 1.0015578039784754, + "learning_rate": 1.3111041810418011e-05, + "loss": 0.2756272554397583, + "step": 3235 + }, + { + "epoch": 0.8593812242730049, + "grad_norm": 1.0283979772106548, + "learning_rate": 1.3106868633410139e-05, + "loss": 0.2664923369884491, + "step": 3236 + }, + { + "epoch": 0.8596467932545478, + "grad_norm": 1.2217960050611696, + "learning_rate": 1.3102694857434637e-05, + "loss": 0.2842246890068054, + "step": 3237 + }, + { + "epoch": 0.8599123622360908, + "grad_norm": 1.0632739499737671, + "learning_rate": 1.3098520483296159e-05, + "loss": 0.3066467344760895, + "step": 3238 + }, + { + "epoch": 0.8601779312176338, + "grad_norm": 1.148754786147734, + "learning_rate": 1.3094345511799478e-05, + "loss": 0.3042510151863098, + "step": 3239 + }, + { + "epoch": 0.8604435001991767, + "grad_norm": 0.9995895975923785, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.2753696143627167, + "step": 3240 + }, + { + "epoch": 0.8607090691807197, + "grad_norm": 1.0325788591675433, + "learning_rate": 1.3085993779951154e-05, + "loss": 0.2561766803264618, + "step": 3241 + }, + { + "epoch": 0.8609746381622626, + "grad_norm": 1.2136300404308455, + "learning_rate": 1.3081817021209626e-05, + "loss": 0.297982782125473, + "step": 3242 + }, + { + "epoch": 0.8612402071438056, + "grad_norm": 1.0615498924909679, + "learning_rate": 1.3077639668330124e-05, + "loss": 0.2961920499801636, + "step": 3243 + }, + { + "epoch": 0.8615057761253485, + "grad_norm": 1.1445145037694135, + "learning_rate": 1.3073461722117991e-05, + "loss": 0.2868857979774475, + "step": 3244 + }, + { + "epoch": 0.8617713451068915, + "grad_norm": 0.9475657969770804, + "learning_rate": 1.3069283183378683e-05, + "loss": 0.22930951416492462, + "step": 3245 + }, + { + "epoch": 0.8620369140884345, + "grad_norm": 1.1416904771862697, + "learning_rate": 1.306510405291778e-05, + "loss": 0.29737964272499084, + "step": 3246 + }, + { + "epoch": 0.8623024830699775, + "grad_norm": 1.0401904023883137, + "learning_rate": 1.3060924331540964e-05, + "loss": 0.2764522433280945, + "step": 3247 + }, + { + "epoch": 0.8625680520515204, + "grad_norm": 0.9863739655208709, + "learning_rate": 1.3056744020054039e-05, + "loss": 0.27608832716941833, + "step": 3248 + }, + { + "epoch": 0.8628336210330634, + "grad_norm": 1.0115944755696356, + "learning_rate": 1.3052563119262915e-05, + "loss": 0.25667035579681396, + "step": 3249 + }, + { + "epoch": 0.8630991900146063, + "grad_norm": 1.1289498412687866, + "learning_rate": 1.3048381629973622e-05, + "loss": 0.3015863597393036, + "step": 3250 + }, + { + "epoch": 0.8633647589961493, + "grad_norm": 1.123802742380982, + "learning_rate": 1.3044199552992307e-05, + "loss": 0.2798422873020172, + "step": 3251 + }, + { + "epoch": 0.8636303279776922, + "grad_norm": 1.1385670465264601, + "learning_rate": 1.304001688912522e-05, + "loss": 0.2856596112251282, + "step": 3252 + }, + { + "epoch": 0.8638958969592352, + "grad_norm": 1.2094473565150297, + "learning_rate": 1.303583363917873e-05, + "loss": 0.30247554183006287, + "step": 3253 + }, + { + "epoch": 0.8641614659407781, + "grad_norm": 1.1517937069448307, + "learning_rate": 1.303164980395932e-05, + "loss": 0.26817965507507324, + "step": 3254 + }, + { + "epoch": 0.8644270349223211, + "grad_norm": 1.197653632931973, + "learning_rate": 1.3027465384273579e-05, + "loss": 0.26919034123420715, + "step": 3255 + }, + { + "epoch": 0.864692603903864, + "grad_norm": 1.1206851183742237, + "learning_rate": 1.3023280380928223e-05, + "loss": 0.29495447874069214, + "step": 3256 + }, + { + "epoch": 0.864958172885407, + "grad_norm": 1.0428738517831404, + "learning_rate": 1.3019094794730063e-05, + "loss": 0.26766717433929443, + "step": 3257 + }, + { + "epoch": 0.86522374186695, + "grad_norm": 0.9998039586765358, + "learning_rate": 1.3014908626486032e-05, + "loss": 0.2573341131210327, + "step": 3258 + }, + { + "epoch": 0.8654893108484929, + "grad_norm": 1.226366277313196, + "learning_rate": 1.3010721877003177e-05, + "loss": 0.32776498794555664, + "step": 3259 + }, + { + "epoch": 0.8657548798300359, + "grad_norm": 1.1631189448763641, + "learning_rate": 1.3006534547088651e-05, + "loss": 0.3107950687408447, + "step": 3260 + }, + { + "epoch": 0.8660204488115788, + "grad_norm": 1.0476224109192296, + "learning_rate": 1.3002346637549726e-05, + "loss": 0.26143360137939453, + "step": 3261 + }, + { + "epoch": 0.8662860177931218, + "grad_norm": 1.035123297672666, + "learning_rate": 1.2998158149193773e-05, + "loss": 0.25666722655296326, + "step": 3262 + }, + { + "epoch": 0.8665515867746647, + "grad_norm": 1.1492097701405037, + "learning_rate": 1.2993969082828296e-05, + "loss": 0.2982695698738098, + "step": 3263 + }, + { + "epoch": 0.8668171557562077, + "grad_norm": 1.0937256102841277, + "learning_rate": 1.2989779439260888e-05, + "loss": 0.30144304037094116, + "step": 3264 + }, + { + "epoch": 0.8670827247377506, + "grad_norm": 1.0563159913050848, + "learning_rate": 1.2985589219299264e-05, + "loss": 0.30421534180641174, + "step": 3265 + }, + { + "epoch": 0.8673482937192936, + "grad_norm": 1.0698350081311019, + "learning_rate": 1.298139842375125e-05, + "loss": 0.23653842508792877, + "step": 3266 + }, + { + "epoch": 0.8676138627008365, + "grad_norm": 1.2059661362441823, + "learning_rate": 1.2977207053424781e-05, + "loss": 0.284118115901947, + "step": 3267 + }, + { + "epoch": 0.8678794316823795, + "grad_norm": 1.0387152548948486, + "learning_rate": 1.2973015109127907e-05, + "loss": 0.30857348442077637, + "step": 3268 + }, + { + "epoch": 0.8681450006639224, + "grad_norm": 1.0987728632322369, + "learning_rate": 1.2968822591668784e-05, + "loss": 0.2826589047908783, + "step": 3269 + }, + { + "epoch": 0.8684105696454654, + "grad_norm": 1.109218087764862, + "learning_rate": 1.2964629501855678e-05, + "loss": 0.27634552121162415, + "step": 3270 + }, + { + "epoch": 0.8686761386270083, + "grad_norm": 1.0217259699141916, + "learning_rate": 1.296043584049697e-05, + "loss": 0.25823545455932617, + "step": 3271 + }, + { + "epoch": 0.8689417076085513, + "grad_norm": 1.148249635090711, + "learning_rate": 1.2956241608401145e-05, + "loss": 0.28939294815063477, + "step": 3272 + }, + { + "epoch": 0.8692072765900942, + "grad_norm": 1.0622455952024017, + "learning_rate": 1.2952046806376806e-05, + "loss": 0.3042459785938263, + "step": 3273 + }, + { + "epoch": 0.8694728455716373, + "grad_norm": 1.042505415392428, + "learning_rate": 1.2947851435232658e-05, + "loss": 0.2834415137767792, + "step": 3274 + }, + { + "epoch": 0.8697384145531802, + "grad_norm": 1.144903021800522, + "learning_rate": 1.2943655495777518e-05, + "loss": 0.28226330876350403, + "step": 3275 + }, + { + "epoch": 0.8700039835347232, + "grad_norm": 1.023547316743189, + "learning_rate": 1.2939458988820317e-05, + "loss": 0.2796105742454529, + "step": 3276 + }, + { + "epoch": 0.8702695525162661, + "grad_norm": 0.9903193313068561, + "learning_rate": 1.2935261915170091e-05, + "loss": 0.24790553748607635, + "step": 3277 + }, + { + "epoch": 0.8705351214978091, + "grad_norm": 1.0279177898991045, + "learning_rate": 1.2931064275635987e-05, + "loss": 0.25101587176322937, + "step": 3278 + }, + { + "epoch": 0.870800690479352, + "grad_norm": 1.1728597267839225, + "learning_rate": 1.2926866071027257e-05, + "loss": 0.3060816526412964, + "step": 3279 + }, + { + "epoch": 0.871066259460895, + "grad_norm": 1.1510511467115991, + "learning_rate": 1.2922667302153268e-05, + "loss": 0.3137212097644806, + "step": 3280 + }, + { + "epoch": 0.871331828442438, + "grad_norm": 0.9977159840643061, + "learning_rate": 1.2918467969823497e-05, + "loss": 0.2391548752784729, + "step": 3281 + }, + { + "epoch": 0.8715973974239809, + "grad_norm": 1.2003880700717509, + "learning_rate": 1.2914268074847516e-05, + "loss": 0.3219330608844757, + "step": 3282 + }, + { + "epoch": 0.8718629664055239, + "grad_norm": 1.126134187698585, + "learning_rate": 1.2910067618035025e-05, + "loss": 0.2934436798095703, + "step": 3283 + }, + { + "epoch": 0.8721285353870668, + "grad_norm": 1.2016016844780073, + "learning_rate": 1.2905866600195815e-05, + "loss": 0.2919486165046692, + "step": 3284 + }, + { + "epoch": 0.8723941043686098, + "grad_norm": 1.1895929482131946, + "learning_rate": 1.2901665022139796e-05, + "loss": 0.2840641438961029, + "step": 3285 + }, + { + "epoch": 0.8726596733501527, + "grad_norm": 1.0215741253911979, + "learning_rate": 1.2897462884676983e-05, + "loss": 0.24151530861854553, + "step": 3286 + }, + { + "epoch": 0.8729252423316957, + "grad_norm": 1.0040194757671277, + "learning_rate": 1.28932601886175e-05, + "loss": 0.24515505135059357, + "step": 3287 + }, + { + "epoch": 0.8731908113132386, + "grad_norm": 1.2173512735867882, + "learning_rate": 1.2889056934771577e-05, + "loss": 0.2561264634132385, + "step": 3288 + }, + { + "epoch": 0.8734563802947816, + "grad_norm": 1.1645401251165897, + "learning_rate": 1.2884853123949547e-05, + "loss": 0.2798641622066498, + "step": 3289 + }, + { + "epoch": 0.8737219492763245, + "grad_norm": 1.2693161910394721, + "learning_rate": 1.288064875696186e-05, + "loss": 0.35207298398017883, + "step": 3290 + }, + { + "epoch": 0.8739875182578675, + "grad_norm": 1.0184365377421387, + "learning_rate": 1.2876443834619066e-05, + "loss": 0.2778821289539337, + "step": 3291 + }, + { + "epoch": 0.8742530872394104, + "grad_norm": 1.044209880952949, + "learning_rate": 1.2872238357731825e-05, + "loss": 0.2691737413406372, + "step": 3292 + }, + { + "epoch": 0.8745186562209534, + "grad_norm": 1.1392637940929287, + "learning_rate": 1.2868032327110904e-05, + "loss": 0.25476595759391785, + "step": 3293 + }, + { + "epoch": 0.8747842252024963, + "grad_norm": 1.012064080488804, + "learning_rate": 1.2863825743567174e-05, + "loss": 0.258474737405777, + "step": 3294 + }, + { + "epoch": 0.8750497941840393, + "grad_norm": 1.17733236715245, + "learning_rate": 1.285961860791162e-05, + "loss": 0.32421568036079407, + "step": 3295 + }, + { + "epoch": 0.8753153631655822, + "grad_norm": 1.0747747984737868, + "learning_rate": 1.2855410920955323e-05, + "loss": 0.3090333342552185, + "step": 3296 + }, + { + "epoch": 0.8755809321471252, + "grad_norm": 1.1729934635240566, + "learning_rate": 1.2851202683509476e-05, + "loss": 0.26548707485198975, + "step": 3297 + }, + { + "epoch": 0.8758465011286681, + "grad_norm": 2.497627852681845, + "learning_rate": 1.2846993896385378e-05, + "loss": 0.3002355098724365, + "step": 3298 + }, + { + "epoch": 0.8761120701102111, + "grad_norm": 1.1706582997439863, + "learning_rate": 1.2842784560394433e-05, + "loss": 0.2924933135509491, + "step": 3299 + }, + { + "epoch": 0.876377639091754, + "grad_norm": 1.1544391256229967, + "learning_rate": 1.2838574676348155e-05, + "loss": 0.2886514663696289, + "step": 3300 + }, + { + "epoch": 0.876643208073297, + "grad_norm": 1.1131138367993383, + "learning_rate": 1.2834364245058155e-05, + "loss": 0.29821154475212097, + "step": 3301 + }, + { + "epoch": 0.87690877705484, + "grad_norm": 1.0278540671542709, + "learning_rate": 1.2830153267336159e-05, + "loss": 0.2656530737876892, + "step": 3302 + }, + { + "epoch": 0.877174346036383, + "grad_norm": 1.2018449655833119, + "learning_rate": 1.282594174399399e-05, + "loss": 0.3437826633453369, + "step": 3303 + }, + { + "epoch": 0.877439915017926, + "grad_norm": 1.0564301800372577, + "learning_rate": 1.2821729675843581e-05, + "loss": 0.29773175716400146, + "step": 3304 + }, + { + "epoch": 0.8777054839994689, + "grad_norm": 1.0707167209814024, + "learning_rate": 1.2817517063696973e-05, + "loss": 0.29772818088531494, + "step": 3305 + }, + { + "epoch": 0.8779710529810119, + "grad_norm": 1.1530012432828134, + "learning_rate": 1.2813303908366303e-05, + "loss": 0.3266611099243164, + "step": 3306 + }, + { + "epoch": 0.8782366219625548, + "grad_norm": 1.0044541774243023, + "learning_rate": 1.2809090210663818e-05, + "loss": 0.26599690318107605, + "step": 3307 + }, + { + "epoch": 0.8785021909440978, + "grad_norm": 1.0142651525790767, + "learning_rate": 1.2804875971401872e-05, + "loss": 0.27988117933273315, + "step": 3308 + }, + { + "epoch": 0.8787677599256407, + "grad_norm": 1.0221522532224918, + "learning_rate": 1.2800661191392916e-05, + "loss": 0.2630334496498108, + "step": 3309 + }, + { + "epoch": 0.8790333289071837, + "grad_norm": 1.022950247187023, + "learning_rate": 1.2796445871449517e-05, + "loss": 0.2628091871738434, + "step": 3310 + }, + { + "epoch": 0.8792988978887266, + "grad_norm": 1.1994310454875075, + "learning_rate": 1.2792230012384333e-05, + "loss": 0.3443898558616638, + "step": 3311 + }, + { + "epoch": 0.8795644668702696, + "grad_norm": 1.0673533832636588, + "learning_rate": 1.2788013615010136e-05, + "loss": 0.2966022491455078, + "step": 3312 + }, + { + "epoch": 0.8798300358518125, + "grad_norm": 1.1030087744198647, + "learning_rate": 1.2783796680139793e-05, + "loss": 0.2995494604110718, + "step": 3313 + }, + { + "epoch": 0.8800956048333555, + "grad_norm": 1.0504434000468303, + "learning_rate": 1.2779579208586283e-05, + "loss": 0.2652590870857239, + "step": 3314 + }, + { + "epoch": 0.8803611738148984, + "grad_norm": 1.1388460976467547, + "learning_rate": 1.2775361201162684e-05, + "loss": 0.3145690858364105, + "step": 3315 + }, + { + "epoch": 0.8806267427964414, + "grad_norm": 1.040210802651612, + "learning_rate": 1.2771142658682175e-05, + "loss": 0.25744086503982544, + "step": 3316 + }, + { + "epoch": 0.8808923117779843, + "grad_norm": 1.1618029117732733, + "learning_rate": 1.2766923581958046e-05, + "loss": 0.3129793405532837, + "step": 3317 + }, + { + "epoch": 0.8811578807595273, + "grad_norm": 1.166975234876197, + "learning_rate": 1.2762703971803684e-05, + "loss": 0.233384907245636, + "step": 3318 + }, + { + "epoch": 0.8814234497410702, + "grad_norm": 0.9242808009438505, + "learning_rate": 1.2758483829032579e-05, + "loss": 0.2422962635755539, + "step": 3319 + }, + { + "epoch": 0.8816890187226132, + "grad_norm": 1.0844595421589949, + "learning_rate": 1.2754263154458328e-05, + "loss": 0.2801973819732666, + "step": 3320 + }, + { + "epoch": 0.8819545877041561, + "grad_norm": 1.294346594070355, + "learning_rate": 1.2750041948894621e-05, + "loss": 0.30659937858581543, + "step": 3321 + }, + { + "epoch": 0.8822201566856991, + "grad_norm": 1.0921019252616484, + "learning_rate": 1.274582021315526e-05, + "loss": 0.28527066111564636, + "step": 3322 + }, + { + "epoch": 0.882485725667242, + "grad_norm": 1.0598264473011552, + "learning_rate": 1.2741597948054146e-05, + "loss": 0.23065675795078278, + "step": 3323 + }, + { + "epoch": 0.882751294648785, + "grad_norm": 1.0918730747592962, + "learning_rate": 1.2737375154405283e-05, + "loss": 0.2727832794189453, + "step": 3324 + }, + { + "epoch": 0.8830168636303279, + "grad_norm": 1.0789259788038712, + "learning_rate": 1.273315183302277e-05, + "loss": 0.26809507608413696, + "step": 3325 + }, + { + "epoch": 0.8832824326118709, + "grad_norm": 1.1647625824499415, + "learning_rate": 1.2728927984720823e-05, + "loss": 0.3250407576560974, + "step": 3326 + }, + { + "epoch": 0.8835480015934138, + "grad_norm": 1.0915300736309757, + "learning_rate": 1.2724703610313742e-05, + "loss": 0.2651330232620239, + "step": 3327 + }, + { + "epoch": 0.8838135705749568, + "grad_norm": 1.206298710080754, + "learning_rate": 1.2720478710615944e-05, + "loss": 0.27337920665740967, + "step": 3328 + }, + { + "epoch": 0.8840791395564997, + "grad_norm": 1.0282478968996285, + "learning_rate": 1.2716253286441935e-05, + "loss": 0.2664092183113098, + "step": 3329 + }, + { + "epoch": 0.8843447085380428, + "grad_norm": 1.1354570950284573, + "learning_rate": 1.2712027338606323e-05, + "loss": 0.27927765250205994, + "step": 3330 + }, + { + "epoch": 0.8846102775195858, + "grad_norm": 1.1204979208217445, + "learning_rate": 1.270780086792383e-05, + "loss": 0.27241113781929016, + "step": 3331 + }, + { + "epoch": 0.8848758465011287, + "grad_norm": 1.0795162414965664, + "learning_rate": 1.2703573875209264e-05, + "loss": 0.28279373049736023, + "step": 3332 + }, + { + "epoch": 0.8851414154826717, + "grad_norm": 1.1634487658284207, + "learning_rate": 1.2699346361277538e-05, + "loss": 0.3011108934879303, + "step": 3333 + }, + { + "epoch": 0.8854069844642146, + "grad_norm": 2.772716513531517, + "learning_rate": 1.2695118326943671e-05, + "loss": 0.3071288764476776, + "step": 3334 + }, + { + "epoch": 0.8856725534457576, + "grad_norm": 1.0969950934626527, + "learning_rate": 1.2690889773022778e-05, + "loss": 0.2688761353492737, + "step": 3335 + }, + { + "epoch": 0.8859381224273005, + "grad_norm": 1.1363327585955358, + "learning_rate": 1.2686660700330074e-05, + "loss": 0.2788669466972351, + "step": 3336 + }, + { + "epoch": 0.8862036914088435, + "grad_norm": 1.0884694079711634, + "learning_rate": 1.268243110968087e-05, + "loss": 0.2801516652107239, + "step": 3337 + }, + { + "epoch": 0.8864692603903864, + "grad_norm": 1.0414904749451368, + "learning_rate": 1.2678201001890587e-05, + "loss": 0.2876908779144287, + "step": 3338 + }, + { + "epoch": 0.8867348293719294, + "grad_norm": 1.1731879069090343, + "learning_rate": 1.2673970377774733e-05, + "loss": 0.27709734439849854, + "step": 3339 + }, + { + "epoch": 0.8870003983534723, + "grad_norm": 1.2053408848372587, + "learning_rate": 1.266973923814893e-05, + "loss": 0.3191622793674469, + "step": 3340 + }, + { + "epoch": 0.8872659673350153, + "grad_norm": 1.098682297791164, + "learning_rate": 1.2665507583828889e-05, + "loss": 0.2873385548591614, + "step": 3341 + }, + { + "epoch": 0.8875315363165582, + "grad_norm": 1.1730973936717166, + "learning_rate": 1.2661275415630421e-05, + "loss": 0.2922922372817993, + "step": 3342 + }, + { + "epoch": 0.8877971052981012, + "grad_norm": 1.1127017834272521, + "learning_rate": 1.2657042734369443e-05, + "loss": 0.305694043636322, + "step": 3343 + }, + { + "epoch": 0.8880626742796441, + "grad_norm": 1.120364019457983, + "learning_rate": 1.2652809540861958e-05, + "loss": 0.29108062386512756, + "step": 3344 + }, + { + "epoch": 0.8883282432611871, + "grad_norm": 1.076655765525218, + "learning_rate": 1.2648575835924084e-05, + "loss": 0.24170495569705963, + "step": 3345 + }, + { + "epoch": 0.88859381224273, + "grad_norm": 1.4853370236272063, + "learning_rate": 1.2644341620372025e-05, + "loss": 0.2987719476222992, + "step": 3346 + }, + { + "epoch": 0.888859381224273, + "grad_norm": 0.9743774864126274, + "learning_rate": 1.2640106895022088e-05, + "loss": 0.21037599444389343, + "step": 3347 + }, + { + "epoch": 0.889124950205816, + "grad_norm": 1.034527053965976, + "learning_rate": 1.2635871660690677e-05, + "loss": 0.25263655185699463, + "step": 3348 + }, + { + "epoch": 0.8893905191873589, + "grad_norm": 1.2196740502064325, + "learning_rate": 1.2631635918194301e-05, + "loss": 0.30169543623924255, + "step": 3349 + }, + { + "epoch": 0.8896560881689018, + "grad_norm": 1.0624381650731511, + "learning_rate": 1.2627399668349554e-05, + "loss": 0.26982420682907104, + "step": 3350 + }, + { + "epoch": 0.8899216571504448, + "grad_norm": 1.1785068724165282, + "learning_rate": 1.262316291197314e-05, + "loss": 0.3281899690628052, + "step": 3351 + }, + { + "epoch": 0.8901872261319878, + "grad_norm": 1.1157278400935415, + "learning_rate": 1.2618925649881852e-05, + "loss": 0.30140435695648193, + "step": 3352 + }, + { + "epoch": 0.8904527951135307, + "grad_norm": 0.9928732296573972, + "learning_rate": 1.261468788289259e-05, + "loss": 0.22343885898590088, + "step": 3353 + }, + { + "epoch": 0.8907183640950737, + "grad_norm": 1.0410264886026745, + "learning_rate": 1.261044961182234e-05, + "loss": 0.2889901399612427, + "step": 3354 + }, + { + "epoch": 0.8909839330766166, + "grad_norm": 1.0933214790144683, + "learning_rate": 1.260621083748819e-05, + "loss": 0.27896153926849365, + "step": 3355 + }, + { + "epoch": 0.8912495020581596, + "grad_norm": 1.077111437166839, + "learning_rate": 1.2601971560707328e-05, + "loss": 0.29390811920166016, + "step": 3356 + }, + { + "epoch": 0.8915150710397025, + "grad_norm": 1.0468332572471015, + "learning_rate": 1.2597731782297036e-05, + "loss": 0.2872384190559387, + "step": 3357 + }, + { + "epoch": 0.8917806400212455, + "grad_norm": 1.3094137802442116, + "learning_rate": 1.2593491503074698e-05, + "loss": 0.29753726720809937, + "step": 3358 + }, + { + "epoch": 0.8920462090027885, + "grad_norm": 1.1441306843080605, + "learning_rate": 1.2589250723857782e-05, + "loss": 0.31631946563720703, + "step": 3359 + }, + { + "epoch": 0.8923117779843315, + "grad_norm": 1.1374138683367387, + "learning_rate": 1.2585009445463867e-05, + "loss": 0.2932048738002777, + "step": 3360 + }, + { + "epoch": 0.8925773469658744, + "grad_norm": 1.0483655110874528, + "learning_rate": 1.2580767668710614e-05, + "loss": 0.2902034521102905, + "step": 3361 + }, + { + "epoch": 0.8928429159474174, + "grad_norm": 1.0712531988705474, + "learning_rate": 1.2576525394415795e-05, + "loss": 0.2596299648284912, + "step": 3362 + }, + { + "epoch": 0.8931084849289603, + "grad_norm": 1.1916540375753872, + "learning_rate": 1.2572282623397268e-05, + "loss": 0.29102641344070435, + "step": 3363 + }, + { + "epoch": 0.8933740539105033, + "grad_norm": 1.236954620143465, + "learning_rate": 1.2568039356472985e-05, + "loss": 0.2970406711101532, + "step": 3364 + }, + { + "epoch": 0.8936396228920462, + "grad_norm": 1.1384210267422126, + "learning_rate": 1.2563795594461003e-05, + "loss": 0.2916618585586548, + "step": 3365 + }, + { + "epoch": 0.8939051918735892, + "grad_norm": 1.1769911575713834, + "learning_rate": 1.2559551338179468e-05, + "loss": 0.3217374086380005, + "step": 3366 + }, + { + "epoch": 0.8941707608551321, + "grad_norm": 1.1228623922561494, + "learning_rate": 1.255530658844662e-05, + "loss": 0.3000059425830841, + "step": 3367 + }, + { + "epoch": 0.8944363298366751, + "grad_norm": 1.2170346898517979, + "learning_rate": 1.2551061346080804e-05, + "loss": 0.2848728895187378, + "step": 3368 + }, + { + "epoch": 0.894701898818218, + "grad_norm": 1.3197542136745113, + "learning_rate": 1.2546815611900442e-05, + "loss": 0.3328903317451477, + "step": 3369 + }, + { + "epoch": 0.894967467799761, + "grad_norm": 1.0838958961687528, + "learning_rate": 1.2542569386724069e-05, + "loss": 0.2920045256614685, + "step": 3370 + }, + { + "epoch": 0.895233036781304, + "grad_norm": 1.0679716869166582, + "learning_rate": 1.2538322671370305e-05, + "loss": 0.30370092391967773, + "step": 3371 + }, + { + "epoch": 0.8954986057628469, + "grad_norm": 1.069215534600395, + "learning_rate": 1.2534075466657866e-05, + "loss": 0.24454624950885773, + "step": 3372 + }, + { + "epoch": 0.8957641747443899, + "grad_norm": 1.172481734803523, + "learning_rate": 1.2529827773405566e-05, + "loss": 0.30908581614494324, + "step": 3373 + }, + { + "epoch": 0.8960297437259328, + "grad_norm": 1.1095939186212227, + "learning_rate": 1.2525579592432304e-05, + "loss": 0.2792360782623291, + "step": 3374 + }, + { + "epoch": 0.8962953127074758, + "grad_norm": 1.0658472517819026, + "learning_rate": 1.2521330924557087e-05, + "loss": 0.285555362701416, + "step": 3375 + }, + { + "epoch": 0.8965608816890187, + "grad_norm": 1.1649386203925687, + "learning_rate": 1.2517081770599002e-05, + "loss": 0.3159451484680176, + "step": 3376 + }, + { + "epoch": 0.8968264506705617, + "grad_norm": 1.2867424735092035, + "learning_rate": 1.2512832131377237e-05, + "loss": 0.35929200053215027, + "step": 3377 + }, + { + "epoch": 0.8970920196521046, + "grad_norm": 1.0781651079446009, + "learning_rate": 1.2508582007711074e-05, + "loss": 0.28624874353408813, + "step": 3378 + }, + { + "epoch": 0.8973575886336476, + "grad_norm": 1.0156684050998903, + "learning_rate": 1.2504331400419884e-05, + "loss": 0.27670109272003174, + "step": 3379 + }, + { + "epoch": 0.8976231576151905, + "grad_norm": 1.0786636895703534, + "learning_rate": 1.2500080310323139e-05, + "loss": 0.2894589304924011, + "step": 3380 + }, + { + "epoch": 0.8978887265967335, + "grad_norm": 1.1385795160382524, + "learning_rate": 1.2495828738240396e-05, + "loss": 0.31378716230392456, + "step": 3381 + }, + { + "epoch": 0.8981542955782764, + "grad_norm": 1.3149597134232174, + "learning_rate": 1.2491576684991306e-05, + "loss": 0.33676713705062866, + "step": 3382 + }, + { + "epoch": 0.8984198645598194, + "grad_norm": 0.9814689350619926, + "learning_rate": 1.2487324151395618e-05, + "loss": 0.2875351011753082, + "step": 3383 + }, + { + "epoch": 0.8986854335413623, + "grad_norm": 1.1646557221945626, + "learning_rate": 1.2483071138273168e-05, + "loss": 0.29729989171028137, + "step": 3384 + }, + { + "epoch": 0.8989510025229053, + "grad_norm": 1.0864970585536224, + "learning_rate": 1.2478817646443888e-05, + "loss": 0.3227398991584778, + "step": 3385 + }, + { + "epoch": 0.8992165715044482, + "grad_norm": 1.1586445900518523, + "learning_rate": 1.2474563676727803e-05, + "loss": 0.2664690315723419, + "step": 3386 + }, + { + "epoch": 0.8994821404859913, + "grad_norm": 1.1748792923054732, + "learning_rate": 1.2470309229945021e-05, + "loss": 0.29543352127075195, + "step": 3387 + }, + { + "epoch": 0.8997477094675342, + "grad_norm": 0.9899792334789409, + "learning_rate": 1.2466054306915756e-05, + "loss": 0.26658856868743896, + "step": 3388 + }, + { + "epoch": 0.9000132784490772, + "grad_norm": 1.123207894421506, + "learning_rate": 1.2461798908460305e-05, + "loss": 0.2899627387523651, + "step": 3389 + }, + { + "epoch": 0.9002788474306201, + "grad_norm": 1.1137567335053833, + "learning_rate": 1.245754303539906e-05, + "loss": 0.2708336114883423, + "step": 3390 + }, + { + "epoch": 0.9005444164121631, + "grad_norm": 1.1459655330577214, + "learning_rate": 1.2453286688552502e-05, + "loss": 0.28124746680259705, + "step": 3391 + }, + { + "epoch": 0.900809985393706, + "grad_norm": 1.0470005335558448, + "learning_rate": 1.2449029868741202e-05, + "loss": 0.2599399983882904, + "step": 3392 + }, + { + "epoch": 0.901075554375249, + "grad_norm": 0.9576026734877732, + "learning_rate": 1.2444772576785828e-05, + "loss": 0.25035667419433594, + "step": 3393 + }, + { + "epoch": 0.901341123356792, + "grad_norm": 1.1148471766082222, + "learning_rate": 1.2440514813507136e-05, + "loss": 0.2772521376609802, + "step": 3394 + }, + { + "epoch": 0.9016066923383349, + "grad_norm": 1.103787889433512, + "learning_rate": 1.2436256579725969e-05, + "loss": 0.3282839357852936, + "step": 3395 + }, + { + "epoch": 0.9018722613198779, + "grad_norm": 1.080988888326222, + "learning_rate": 1.2431997876263269e-05, + "loss": 0.2507914900779724, + "step": 3396 + }, + { + "epoch": 0.9021378303014208, + "grad_norm": 1.1123927965933749, + "learning_rate": 1.2427738703940055e-05, + "loss": 0.2620914876461029, + "step": 3397 + }, + { + "epoch": 0.9024033992829638, + "grad_norm": 1.0713438905056172, + "learning_rate": 1.2423479063577458e-05, + "loss": 0.26561641693115234, + "step": 3398 + }, + { + "epoch": 0.9026689682645067, + "grad_norm": 1.151582271756571, + "learning_rate": 1.2419218955996677e-05, + "loss": 0.2998678386211395, + "step": 3399 + }, + { + "epoch": 0.9029345372460497, + "grad_norm": 1.0484454707225395, + "learning_rate": 1.2414958382019017e-05, + "loss": 0.2368398755788803, + "step": 3400 + }, + { + "epoch": 0.9032001062275926, + "grad_norm": 1.0429929570241405, + "learning_rate": 1.241069734246586e-05, + "loss": 0.2623558044433594, + "step": 3401 + }, + { + "epoch": 0.9034656752091356, + "grad_norm": 1.0283944167565489, + "learning_rate": 1.2406435838158686e-05, + "loss": 0.2693074941635132, + "step": 3402 + }, + { + "epoch": 0.9037312441906785, + "grad_norm": 1.1211950634171715, + "learning_rate": 1.2402173869919063e-05, + "loss": 0.2933652698993683, + "step": 3403 + }, + { + "epoch": 0.9039968131722215, + "grad_norm": 1.0858313001207585, + "learning_rate": 1.2397911438568651e-05, + "loss": 0.28515487909317017, + "step": 3404 + }, + { + "epoch": 0.9042623821537644, + "grad_norm": 1.1243916508543286, + "learning_rate": 1.2393648544929193e-05, + "loss": 0.282942533493042, + "step": 3405 + }, + { + "epoch": 0.9045279511353074, + "grad_norm": 1.112018853789466, + "learning_rate": 1.2389385189822526e-05, + "loss": 0.28300392627716064, + "step": 3406 + }, + { + "epoch": 0.9047935201168503, + "grad_norm": 1.0490322847853841, + "learning_rate": 1.2385121374070577e-05, + "loss": 0.25697019696235657, + "step": 3407 + }, + { + "epoch": 0.9050590890983933, + "grad_norm": 1.15038978087342, + "learning_rate": 1.2380857098495355e-05, + "loss": 0.31156057119369507, + "step": 3408 + }, + { + "epoch": 0.9053246580799362, + "grad_norm": 1.1544066045654053, + "learning_rate": 1.2376592363918967e-05, + "loss": 0.2943422794342041, + "step": 3409 + }, + { + "epoch": 0.9055902270614792, + "grad_norm": 0.9968457114080438, + "learning_rate": 1.2372327171163596e-05, + "loss": 0.2792074680328369, + "step": 3410 + }, + { + "epoch": 0.9058557960430221, + "grad_norm": 1.0328662447203703, + "learning_rate": 1.2368061521051526e-05, + "loss": 0.2547443211078644, + "step": 3411 + }, + { + "epoch": 0.9061213650245651, + "grad_norm": 1.068901181257851, + "learning_rate": 1.2363795414405125e-05, + "loss": 0.25637373328208923, + "step": 3412 + }, + { + "epoch": 0.906386934006108, + "grad_norm": 1.1660475318941728, + "learning_rate": 1.2359528852046844e-05, + "loss": 0.3269123435020447, + "step": 3413 + }, + { + "epoch": 0.906652502987651, + "grad_norm": 1.0197427295072394, + "learning_rate": 1.2355261834799232e-05, + "loss": 0.28538423776626587, + "step": 3414 + }, + { + "epoch": 0.906918071969194, + "grad_norm": 1.1343354993973966, + "learning_rate": 1.2350994363484915e-05, + "loss": 0.2961096167564392, + "step": 3415 + }, + { + "epoch": 0.907183640950737, + "grad_norm": 1.0930595123597455, + "learning_rate": 1.2346726438926613e-05, + "loss": 0.3134537935256958, + "step": 3416 + }, + { + "epoch": 0.90744920993228, + "grad_norm": 1.018679268761631, + "learning_rate": 1.2342458061947129e-05, + "loss": 0.2614031434059143, + "step": 3417 + }, + { + "epoch": 0.9077147789138229, + "grad_norm": 1.0403373381004117, + "learning_rate": 1.2338189233369357e-05, + "loss": 0.27166056632995605, + "step": 3418 + }, + { + "epoch": 0.9079803478953659, + "grad_norm": 1.0735839504787106, + "learning_rate": 1.2333919954016277e-05, + "loss": 0.26053497195243835, + "step": 3419 + }, + { + "epoch": 0.9082459168769088, + "grad_norm": 1.1112591016079632, + "learning_rate": 1.2329650224710956e-05, + "loss": 0.3109636902809143, + "step": 3420 + }, + { + "epoch": 0.9085114858584518, + "grad_norm": 1.081828404421451, + "learning_rate": 1.232538004627655e-05, + "loss": 0.2576507329940796, + "step": 3421 + }, + { + "epoch": 0.9087770548399947, + "grad_norm": 1.0981308884589311, + "learning_rate": 1.2321109419536292e-05, + "loss": 0.2525216341018677, + "step": 3422 + }, + { + "epoch": 0.9090426238215377, + "grad_norm": 1.0732531844020532, + "learning_rate": 1.2316838345313517e-05, + "loss": 0.2483336180448532, + "step": 3423 + }, + { + "epoch": 0.9093081928030806, + "grad_norm": 1.1592146270526706, + "learning_rate": 1.2312566824431631e-05, + "loss": 0.26372796297073364, + "step": 3424 + }, + { + "epoch": 0.9095737617846236, + "grad_norm": 1.1537675520237485, + "learning_rate": 1.2308294857714138e-05, + "loss": 0.2933644950389862, + "step": 3425 + }, + { + "epoch": 0.9098393307661665, + "grad_norm": 1.0330883162146767, + "learning_rate": 1.2304022445984618e-05, + "loss": 0.2543371915817261, + "step": 3426 + }, + { + "epoch": 0.9101048997477095, + "grad_norm": 1.1689002717846686, + "learning_rate": 1.2299749590066745e-05, + "loss": 0.29246431589126587, + "step": 3427 + }, + { + "epoch": 0.9103704687292524, + "grad_norm": 1.0141798843769114, + "learning_rate": 1.2295476290784273e-05, + "loss": 0.2475431263446808, + "step": 3428 + }, + { + "epoch": 0.9106360377107954, + "grad_norm": 1.1845034794986053, + "learning_rate": 1.2291202548961042e-05, + "loss": 0.3312363624572754, + "step": 3429 + }, + { + "epoch": 0.9109016066923383, + "grad_norm": 1.0459618447051044, + "learning_rate": 1.2286928365420987e-05, + "loss": 0.25192639231681824, + "step": 3430 + }, + { + "epoch": 0.9111671756738813, + "grad_norm": 1.2038671566275931, + "learning_rate": 1.2282653740988114e-05, + "loss": 0.23189345002174377, + "step": 3431 + }, + { + "epoch": 0.9114327446554242, + "grad_norm": 1.17767221221897, + "learning_rate": 1.2278378676486522e-05, + "loss": 0.2888398766517639, + "step": 3432 + }, + { + "epoch": 0.9116983136369672, + "grad_norm": 1.1295595703903276, + "learning_rate": 1.2274103172740387e-05, + "loss": 0.2857785224914551, + "step": 3433 + }, + { + "epoch": 0.9119638826185101, + "grad_norm": 1.039533312390003, + "learning_rate": 1.2269827230573986e-05, + "loss": 0.23961025476455688, + "step": 3434 + }, + { + "epoch": 0.9122294516000531, + "grad_norm": 1.1192521835175562, + "learning_rate": 1.2265550850811663e-05, + "loss": 0.2791004478931427, + "step": 3435 + }, + { + "epoch": 0.912495020581596, + "grad_norm": 1.052040685054951, + "learning_rate": 1.2261274034277858e-05, + "loss": 0.2875480651855469, + "step": 3436 + }, + { + "epoch": 0.912760589563139, + "grad_norm": 1.12188070500717, + "learning_rate": 1.2256996781797086e-05, + "loss": 0.29422929883003235, + "step": 3437 + }, + { + "epoch": 0.9130261585446819, + "grad_norm": 1.2976046274469295, + "learning_rate": 1.225271909419395e-05, + "loss": 0.27114444971084595, + "step": 3438 + }, + { + "epoch": 0.9132917275262249, + "grad_norm": 1.0684416452719028, + "learning_rate": 1.2248440972293146e-05, + "loss": 0.3007166385650635, + "step": 3439 + }, + { + "epoch": 0.9135572965077678, + "grad_norm": 1.1408150577224654, + "learning_rate": 1.224416241691944e-05, + "loss": 0.28550055623054504, + "step": 3440 + }, + { + "epoch": 0.9138228654893108, + "grad_norm": 1.1159473328967766, + "learning_rate": 1.2239883428897687e-05, + "loss": 0.2861761450767517, + "step": 3441 + }, + { + "epoch": 0.9140884344708538, + "grad_norm": 1.1186358936011263, + "learning_rate": 1.2235604009052823e-05, + "loss": 0.3288506865501404, + "step": 3442 + }, + { + "epoch": 0.9143540034523968, + "grad_norm": 1.2101661293343442, + "learning_rate": 1.2231324158209876e-05, + "loss": 0.33189019560813904, + "step": 3443 + }, + { + "epoch": 0.9146195724339398, + "grad_norm": 0.9931883995236199, + "learning_rate": 1.2227043877193947e-05, + "loss": 0.20846885442733765, + "step": 3444 + }, + { + "epoch": 0.9148851414154827, + "grad_norm": 0.9579263575635046, + "learning_rate": 1.2222763166830223e-05, + "loss": 0.25184741616249084, + "step": 3445 + }, + { + "epoch": 0.9151507103970257, + "grad_norm": 1.0775642304955, + "learning_rate": 1.2218482027943977e-05, + "loss": 0.2954701781272888, + "step": 3446 + }, + { + "epoch": 0.9154162793785686, + "grad_norm": 1.055908963813806, + "learning_rate": 1.221420046136056e-05, + "loss": 0.263336718082428, + "step": 3447 + }, + { + "epoch": 0.9156818483601116, + "grad_norm": 1.2181481624195412, + "learning_rate": 1.2209918467905405e-05, + "loss": 0.31178128719329834, + "step": 3448 + }, + { + "epoch": 0.9159474173416545, + "grad_norm": 1.1248939907914326, + "learning_rate": 1.2205636048404037e-05, + "loss": 0.30373090505599976, + "step": 3449 + }, + { + "epoch": 0.9162129863231975, + "grad_norm": 1.1316476755108689, + "learning_rate": 1.2201353203682052e-05, + "loss": 0.31057459115982056, + "step": 3450 + }, + { + "epoch": 0.9164785553047404, + "grad_norm": 1.0432699213656527, + "learning_rate": 1.2197069934565126e-05, + "loss": 0.26834744215011597, + "step": 3451 + }, + { + "epoch": 0.9167441242862834, + "grad_norm": 1.0235490532622333, + "learning_rate": 1.2192786241879033e-05, + "loss": 0.30224066972732544, + "step": 3452 + }, + { + "epoch": 0.9170096932678263, + "grad_norm": 1.1136690118430506, + "learning_rate": 1.2188502126449616e-05, + "loss": 0.28249508142471313, + "step": 3453 + }, + { + "epoch": 0.9172752622493693, + "grad_norm": 1.0210144972314754, + "learning_rate": 1.2184217589102798e-05, + "loss": 0.24823793768882751, + "step": 3454 + }, + { + "epoch": 0.9175408312309122, + "grad_norm": 1.1878687209379464, + "learning_rate": 1.2179932630664589e-05, + "loss": 0.32556289434432983, + "step": 3455 + }, + { + "epoch": 0.9178064002124552, + "grad_norm": 1.0899520670240972, + "learning_rate": 1.217564725196108e-05, + "loss": 0.29420584440231323, + "step": 3456 + }, + { + "epoch": 0.9180719691939981, + "grad_norm": 1.028247015068141, + "learning_rate": 1.2171361453818437e-05, + "loss": 0.29294469952583313, + "step": 3457 + }, + { + "epoch": 0.9183375381755411, + "grad_norm": 1.0399893903415627, + "learning_rate": 1.2167075237062918e-05, + "loss": 0.3173823952674866, + "step": 3458 + }, + { + "epoch": 0.918603107157084, + "grad_norm": 1.1571492956528482, + "learning_rate": 1.2162788602520851e-05, + "loss": 0.32950159907341003, + "step": 3459 + }, + { + "epoch": 0.918868676138627, + "grad_norm": 1.0478118037587627, + "learning_rate": 1.2158501551018647e-05, + "loss": 0.3011544942855835, + "step": 3460 + }, + { + "epoch": 0.91913424512017, + "grad_norm": 1.0135067760604335, + "learning_rate": 1.2154214083382802e-05, + "loss": 0.25775954127311707, + "step": 3461 + }, + { + "epoch": 0.9193998141017129, + "grad_norm": 1.0514508898774713, + "learning_rate": 1.214992620043989e-05, + "loss": 0.286748468875885, + "step": 3462 + }, + { + "epoch": 0.9196653830832558, + "grad_norm": 1.1050004366949897, + "learning_rate": 1.214563790301656e-05, + "loss": 0.30588221549987793, + "step": 3463 + }, + { + "epoch": 0.9199309520647988, + "grad_norm": 1.0079666808538812, + "learning_rate": 1.214134919193955e-05, + "loss": 0.23506608605384827, + "step": 3464 + }, + { + "epoch": 0.9201965210463418, + "grad_norm": 1.037364536446331, + "learning_rate": 1.2137060068035672e-05, + "loss": 0.2612350285053253, + "step": 3465 + }, + { + "epoch": 0.9204620900278847, + "grad_norm": 1.0810309706979688, + "learning_rate": 1.2132770532131815e-05, + "loss": 0.3268318772315979, + "step": 3466 + }, + { + "epoch": 0.9207276590094277, + "grad_norm": 1.0723394192428657, + "learning_rate": 1.2128480585054951e-05, + "loss": 0.2970179319381714, + "step": 3467 + }, + { + "epoch": 0.9209932279909706, + "grad_norm": 1.0036147426745694, + "learning_rate": 1.2124190227632138e-05, + "loss": 0.2910206615924835, + "step": 3468 + }, + { + "epoch": 0.9212587969725136, + "grad_norm": 1.1089890742219906, + "learning_rate": 1.2119899460690496e-05, + "loss": 0.3000222444534302, + "step": 3469 + }, + { + "epoch": 0.9215243659540565, + "grad_norm": 1.1166450826016983, + "learning_rate": 1.2115608285057242e-05, + "loss": 0.30304765701293945, + "step": 3470 + }, + { + "epoch": 0.9217899349355996, + "grad_norm": 0.9893826238823328, + "learning_rate": 1.2111316701559663e-05, + "loss": 0.26393038034439087, + "step": 3471 + }, + { + "epoch": 0.9220555039171425, + "grad_norm": 1.1384217438340345, + "learning_rate": 1.2107024711025128e-05, + "loss": 0.3111063838005066, + "step": 3472 + }, + { + "epoch": 0.9223210728986855, + "grad_norm": 0.9599961450252364, + "learning_rate": 1.2102732314281073e-05, + "loss": 0.2897321581840515, + "step": 3473 + }, + { + "epoch": 0.9225866418802284, + "grad_norm": 1.1396280258666305, + "learning_rate": 1.2098439512155028e-05, + "loss": 0.2835896611213684, + "step": 3474 + }, + { + "epoch": 0.9228522108617714, + "grad_norm": 1.0165194494005183, + "learning_rate": 1.2094146305474596e-05, + "loss": 0.27648821473121643, + "step": 3475 + }, + { + "epoch": 0.9231177798433143, + "grad_norm": 1.1221504506656363, + "learning_rate": 1.2089852695067457e-05, + "loss": 0.2528097629547119, + "step": 3476 + }, + { + "epoch": 0.9233833488248573, + "grad_norm": 1.1105562286202324, + "learning_rate": 1.2085558681761361e-05, + "loss": 0.2750067412853241, + "step": 3477 + }, + { + "epoch": 0.9236489178064002, + "grad_norm": 1.1199967050670125, + "learning_rate": 1.2081264266384148e-05, + "loss": 0.3115938901901245, + "step": 3478 + }, + { + "epoch": 0.9239144867879432, + "grad_norm": 1.1203071431737686, + "learning_rate": 1.2076969449763734e-05, + "loss": 0.2858419418334961, + "step": 3479 + }, + { + "epoch": 0.9241800557694861, + "grad_norm": 1.051118385350032, + "learning_rate": 1.2072674232728105e-05, + "loss": 0.24990032613277435, + "step": 3480 + }, + { + "epoch": 0.9244456247510291, + "grad_norm": 1.2991104394876676, + "learning_rate": 1.206837861610533e-05, + "loss": 0.23106999695301056, + "step": 3481 + }, + { + "epoch": 0.924711193732572, + "grad_norm": 1.0396779513824141, + "learning_rate": 1.2064082600723546e-05, + "loss": 0.2737967371940613, + "step": 3482 + }, + { + "epoch": 0.924976762714115, + "grad_norm": 1.1890061925781694, + "learning_rate": 1.2059786187410984e-05, + "loss": 0.2810317873954773, + "step": 3483 + }, + { + "epoch": 0.925242331695658, + "grad_norm": 1.1358698893490913, + "learning_rate": 1.2055489376995938e-05, + "loss": 0.30852559208869934, + "step": 3484 + }, + { + "epoch": 0.9255079006772009, + "grad_norm": 1.1003932874354148, + "learning_rate": 1.2051192170306784e-05, + "loss": 0.2956348657608032, + "step": 3485 + }, + { + "epoch": 0.9257734696587439, + "grad_norm": 1.18261367067389, + "learning_rate": 1.204689456817197e-05, + "loss": 0.2825953960418701, + "step": 3486 + }, + { + "epoch": 0.9260390386402868, + "grad_norm": 1.2502616697865143, + "learning_rate": 1.2042596571420025e-05, + "loss": 0.3351168632507324, + "step": 3487 + }, + { + "epoch": 0.9263046076218298, + "grad_norm": 1.2354469073344645, + "learning_rate": 1.2038298180879548e-05, + "loss": 0.2718926668167114, + "step": 3488 + }, + { + "epoch": 0.9265701766033727, + "grad_norm": 1.1387239259181285, + "learning_rate": 1.2033999397379223e-05, + "loss": 0.29036587476730347, + "step": 3489 + }, + { + "epoch": 0.9268357455849157, + "grad_norm": 0.9499049433325992, + "learning_rate": 1.2029700221747804e-05, + "loss": 0.22917689383029938, + "step": 3490 + }, + { + "epoch": 0.9271013145664586, + "grad_norm": 1.2322966399012754, + "learning_rate": 1.2025400654814119e-05, + "loss": 0.2963443398475647, + "step": 3491 + }, + { + "epoch": 0.9273668835480016, + "grad_norm": 1.100231072465541, + "learning_rate": 1.2021100697407075e-05, + "loss": 0.2866464853286743, + "step": 3492 + }, + { + "epoch": 0.9276324525295445, + "grad_norm": 1.1717529025248212, + "learning_rate": 1.2016800350355654e-05, + "loss": 0.3069216012954712, + "step": 3493 + }, + { + "epoch": 0.9278980215110875, + "grad_norm": 1.0745448017128252, + "learning_rate": 1.2012499614488913e-05, + "loss": 0.27206870913505554, + "step": 3494 + }, + { + "epoch": 0.9281635904926304, + "grad_norm": 1.0995365532444106, + "learning_rate": 1.2008198490635978e-05, + "loss": 0.32130372524261475, + "step": 3495 + }, + { + "epoch": 0.9284291594741734, + "grad_norm": 1.151015013814654, + "learning_rate": 1.2003896979626061e-05, + "loss": 0.30631259083747864, + "step": 3496 + }, + { + "epoch": 0.9286947284557163, + "grad_norm": 1.125856079122124, + "learning_rate": 1.199959508228844e-05, + "loss": 0.3005716800689697, + "step": 3497 + }, + { + "epoch": 0.9289602974372593, + "grad_norm": 0.9983757548693274, + "learning_rate": 1.1995292799452472e-05, + "loss": 0.2381039410829544, + "step": 3498 + }, + { + "epoch": 0.9292258664188023, + "grad_norm": 1.1338580261514946, + "learning_rate": 1.1990990131947582e-05, + "loss": 0.31764286756515503, + "step": 3499 + }, + { + "epoch": 0.9294914354003453, + "grad_norm": 1.1445030838538803, + "learning_rate": 1.1986687080603273e-05, + "loss": 0.3029370903968811, + "step": 3500 + }, + { + "epoch": 0.9297570043818882, + "grad_norm": 1.0814133109661386, + "learning_rate": 1.198238364624913e-05, + "loss": 0.30967646837234497, + "step": 3501 + }, + { + "epoch": 0.9300225733634312, + "grad_norm": 1.0376796287878236, + "learning_rate": 1.1978079829714799e-05, + "loss": 0.24687506258487701, + "step": 3502 + }, + { + "epoch": 0.9302881423449741, + "grad_norm": 1.0529899744692286, + "learning_rate": 1.1973775631830007e-05, + "loss": 0.25909408926963806, + "step": 3503 + }, + { + "epoch": 0.9305537113265171, + "grad_norm": 1.1136411983367804, + "learning_rate": 1.196947105342455e-05, + "loss": 0.281025230884552, + "step": 3504 + }, + { + "epoch": 0.93081928030806, + "grad_norm": 1.2858712177395888, + "learning_rate": 1.1965166095328302e-05, + "loss": 0.33401811122894287, + "step": 3505 + }, + { + "epoch": 0.931084849289603, + "grad_norm": 0.9732764276792689, + "learning_rate": 1.1960860758371208e-05, + "loss": 0.25839388370513916, + "step": 3506 + }, + { + "epoch": 0.931350418271146, + "grad_norm": 0.954364218435113, + "learning_rate": 1.1956555043383286e-05, + "loss": 0.23343560099601746, + "step": 3507 + }, + { + "epoch": 0.9316159872526889, + "grad_norm": 1.176408931412559, + "learning_rate": 1.1952248951194629e-05, + "loss": 0.31106436252593994, + "step": 3508 + }, + { + "epoch": 0.9318815562342319, + "grad_norm": 1.108418204277134, + "learning_rate": 1.1947942482635395e-05, + "loss": 0.29152095317840576, + "step": 3509 + }, + { + "epoch": 0.9321471252157748, + "grad_norm": 1.2651732065185788, + "learning_rate": 1.1943635638535827e-05, + "loss": 0.31517675518989563, + "step": 3510 + }, + { + "epoch": 0.9324126941973178, + "grad_norm": 1.2309480505410157, + "learning_rate": 1.1939328419726231e-05, + "loss": 0.33221137523651123, + "step": 3511 + }, + { + "epoch": 0.9326782631788607, + "grad_norm": 1.2277892053470791, + "learning_rate": 1.193502082703699e-05, + "loss": 0.314359575510025, + "step": 3512 + }, + { + "epoch": 0.9329438321604037, + "grad_norm": 1.129757464324541, + "learning_rate": 1.1930712861298553e-05, + "loss": 0.2879924178123474, + "step": 3513 + }, + { + "epoch": 0.9332094011419466, + "grad_norm": 1.1622909402406336, + "learning_rate": 1.1926404523341443e-05, + "loss": 0.2732955515384674, + "step": 3514 + }, + { + "epoch": 0.9334749701234896, + "grad_norm": 1.1586501434218468, + "learning_rate": 1.1922095813996264e-05, + "loss": 0.32156097888946533, + "step": 3515 + }, + { + "epoch": 0.9337405391050325, + "grad_norm": 1.110486475282156, + "learning_rate": 1.1917786734093682e-05, + "loss": 0.2694319486618042, + "step": 3516 + }, + { + "epoch": 0.9340061080865755, + "grad_norm": 1.0871387001943549, + "learning_rate": 1.1913477284464434e-05, + "loss": 0.3049655258655548, + "step": 3517 + }, + { + "epoch": 0.9342716770681184, + "grad_norm": 1.0962864613999421, + "learning_rate": 1.1909167465939334e-05, + "loss": 0.30053725838661194, + "step": 3518 + }, + { + "epoch": 0.9345372460496614, + "grad_norm": 1.0261517334123498, + "learning_rate": 1.1904857279349265e-05, + "loss": 0.2611788809299469, + "step": 3519 + }, + { + "epoch": 0.9348028150312043, + "grad_norm": 1.1400957154071245, + "learning_rate": 1.1900546725525175e-05, + "loss": 0.28344646096229553, + "step": 3520 + }, + { + "epoch": 0.9350683840127473, + "grad_norm": 1.067093022484818, + "learning_rate": 1.1896235805298093e-05, + "loss": 0.2504042685031891, + "step": 3521 + }, + { + "epoch": 0.9353339529942902, + "grad_norm": 1.0534608212516616, + "learning_rate": 1.1891924519499113e-05, + "loss": 0.27877938747406006, + "step": 3522 + }, + { + "epoch": 0.9355995219758332, + "grad_norm": 1.046331705593262, + "learning_rate": 1.1887612868959394e-05, + "loss": 0.28176525235176086, + "step": 3523 + }, + { + "epoch": 0.9358650909573761, + "grad_norm": 1.1750063194789062, + "learning_rate": 1.1883300854510178e-05, + "loss": 0.32376354932785034, + "step": 3524 + }, + { + "epoch": 0.9361306599389191, + "grad_norm": 1.0908366283033504, + "learning_rate": 1.1878988476982772e-05, + "loss": 0.2846054434776306, + "step": 3525 + }, + { + "epoch": 0.936396228920462, + "grad_norm": 1.0507783491664777, + "learning_rate": 1.1874675737208546e-05, + "loss": 0.25711044669151306, + "step": 3526 + }, + { + "epoch": 0.9366617979020051, + "grad_norm": 1.078360429057703, + "learning_rate": 1.1870362636018946e-05, + "loss": 0.2810837924480438, + "step": 3527 + }, + { + "epoch": 0.936927366883548, + "grad_norm": 1.2088151262046463, + "learning_rate": 1.186604917424549e-05, + "loss": 0.3090322017669678, + "step": 3528 + }, + { + "epoch": 0.937192935865091, + "grad_norm": 1.061646146170892, + "learning_rate": 1.1861735352719763e-05, + "loss": 0.2797972559928894, + "step": 3529 + }, + { + "epoch": 0.937458504846634, + "grad_norm": 1.3937474116807773, + "learning_rate": 1.1857421172273415e-05, + "loss": 0.3124893605709076, + "step": 3530 + }, + { + "epoch": 0.9377240738281769, + "grad_norm": 1.1043040217194096, + "learning_rate": 1.1853106633738174e-05, + "loss": 0.28317195177078247, + "step": 3531 + }, + { + "epoch": 0.9379896428097199, + "grad_norm": 1.0483798154842934, + "learning_rate": 1.1848791737945823e-05, + "loss": 0.27804574370384216, + "step": 3532 + }, + { + "epoch": 0.9382552117912628, + "grad_norm": 1.1007797171562173, + "learning_rate": 1.1844476485728236e-05, + "loss": 0.24936731159687042, + "step": 3533 + }, + { + "epoch": 0.9385207807728058, + "grad_norm": 1.16922301793574, + "learning_rate": 1.1840160877917335e-05, + "loss": 0.296974778175354, + "step": 3534 + }, + { + "epoch": 0.9387863497543487, + "grad_norm": 1.1172266681075624, + "learning_rate": 1.1835844915345117e-05, + "loss": 0.3048890233039856, + "step": 3535 + }, + { + "epoch": 0.9390519187358917, + "grad_norm": 1.0372698095624082, + "learning_rate": 1.1831528598843654e-05, + "loss": 0.2703601121902466, + "step": 3536 + }, + { + "epoch": 0.9393174877174346, + "grad_norm": 1.123009081238491, + "learning_rate": 1.1827211929245075e-05, + "loss": 0.30738013982772827, + "step": 3537 + }, + { + "epoch": 0.9395830566989776, + "grad_norm": 1.0660333251952498, + "learning_rate": 1.1822894907381589e-05, + "loss": 0.26538529992103577, + "step": 3538 + }, + { + "epoch": 0.9398486256805205, + "grad_norm": 1.1050453871275616, + "learning_rate": 1.1818577534085462e-05, + "loss": 0.26795464754104614, + "step": 3539 + }, + { + "epoch": 0.9401141946620635, + "grad_norm": 1.1533311536850575, + "learning_rate": 1.1814259810189034e-05, + "loss": 0.30891868472099304, + "step": 3540 + }, + { + "epoch": 0.9403797636436064, + "grad_norm": 1.8167204702159565, + "learning_rate": 1.1809941736524713e-05, + "loss": 0.29164037108421326, + "step": 3541 + }, + { + "epoch": 0.9406453326251494, + "grad_norm": 1.0875424396631934, + "learning_rate": 1.180562331392497e-05, + "loss": 0.30322739481925964, + "step": 3542 + }, + { + "epoch": 0.9409109016066923, + "grad_norm": 1.0765622649066557, + "learning_rate": 1.1801304543222349e-05, + "loss": 0.275432288646698, + "step": 3543 + }, + { + "epoch": 0.9411764705882353, + "grad_norm": 1.1566847425916267, + "learning_rate": 1.1796985425249459e-05, + "loss": 0.2788141965866089, + "step": 3544 + }, + { + "epoch": 0.9414420395697782, + "grad_norm": 1.203313197377309, + "learning_rate": 1.1792665960838967e-05, + "loss": 0.24254676699638367, + "step": 3545 + }, + { + "epoch": 0.9417076085513212, + "grad_norm": 1.1050026210111878, + "learning_rate": 1.1788346150823625e-05, + "loss": 0.2803058326244354, + "step": 3546 + }, + { + "epoch": 0.9419731775328641, + "grad_norm": 1.0993090963339842, + "learning_rate": 1.1784025996036232e-05, + "loss": 0.3068317174911499, + "step": 3547 + }, + { + "epoch": 0.9422387465144071, + "grad_norm": 0.9977731134117688, + "learning_rate": 1.1779705497309673e-05, + "loss": 0.23124024271965027, + "step": 3548 + }, + { + "epoch": 0.94250431549595, + "grad_norm": 1.080710306089679, + "learning_rate": 1.177538465547688e-05, + "loss": 0.2815462648868561, + "step": 3549 + }, + { + "epoch": 0.942769884477493, + "grad_norm": 1.1118952137889662, + "learning_rate": 1.1771063471370862e-05, + "loss": 0.29448196291923523, + "step": 3550 + }, + { + "epoch": 0.9430354534590359, + "grad_norm": 1.2691077751501818, + "learning_rate": 1.1766741945824698e-05, + "loss": 0.3176615834236145, + "step": 3551 + }, + { + "epoch": 0.9433010224405789, + "grad_norm": 1.1390071879475103, + "learning_rate": 1.1762420079671527e-05, + "loss": 0.29126274585723877, + "step": 3552 + }, + { + "epoch": 0.9435665914221218, + "grad_norm": 1.084504171285626, + "learning_rate": 1.1758097873744547e-05, + "loss": 0.27074337005615234, + "step": 3553 + }, + { + "epoch": 0.9438321604036648, + "grad_norm": 1.0495499557301764, + "learning_rate": 1.175377532887703e-05, + "loss": 0.2756083011627197, + "step": 3554 + }, + { + "epoch": 0.9440977293852079, + "grad_norm": 1.1028881447166687, + "learning_rate": 1.1749452445902315e-05, + "loss": 0.26918384432792664, + "step": 3555 + }, + { + "epoch": 0.9443632983667508, + "grad_norm": 1.0856468025535497, + "learning_rate": 1.17451292256538e-05, + "loss": 0.2550349235534668, + "step": 3556 + }, + { + "epoch": 0.9446288673482938, + "grad_norm": 1.0791996633460945, + "learning_rate": 1.1740805668964954e-05, + "loss": 0.2601481080055237, + "step": 3557 + }, + { + "epoch": 0.9448944363298367, + "grad_norm": 1.1367109564667788, + "learning_rate": 1.1736481776669307e-05, + "loss": 0.2848352789878845, + "step": 3558 + }, + { + "epoch": 0.9451600053113797, + "grad_norm": 1.1168278064757895, + "learning_rate": 1.173215754960045e-05, + "loss": 0.266584575176239, + "step": 3559 + }, + { + "epoch": 0.9454255742929226, + "grad_norm": 0.9979692557530664, + "learning_rate": 1.172783298859205e-05, + "loss": 0.25037410855293274, + "step": 3560 + }, + { + "epoch": 0.9456911432744656, + "grad_norm": 1.1049326363207628, + "learning_rate": 1.1723508094477825e-05, + "loss": 0.30239278078079224, + "step": 3561 + }, + { + "epoch": 0.9459567122560085, + "grad_norm": 1.0413977608943958, + "learning_rate": 1.1719182868091567e-05, + "loss": 0.2893553078174591, + "step": 3562 + }, + { + "epoch": 0.9462222812375515, + "grad_norm": 1.215187947788902, + "learning_rate": 1.1714857310267124e-05, + "loss": 0.2840202748775482, + "step": 3563 + }, + { + "epoch": 0.9464878502190944, + "grad_norm": 1.0615180068139964, + "learning_rate": 1.1710531421838422e-05, + "loss": 0.2614031732082367, + "step": 3564 + }, + { + "epoch": 0.9467534192006374, + "grad_norm": 1.0290230331800772, + "learning_rate": 1.1706205203639433e-05, + "loss": 0.267095148563385, + "step": 3565 + }, + { + "epoch": 0.9470189881821803, + "grad_norm": 1.2397291626994196, + "learning_rate": 1.1701878656504206e-05, + "loss": 0.25835227966308594, + "step": 3566 + }, + { + "epoch": 0.9472845571637233, + "grad_norm": 1.1319162410146095, + "learning_rate": 1.1697551781266845e-05, + "loss": 0.27547580003738403, + "step": 3567 + }, + { + "epoch": 0.9475501261452662, + "grad_norm": 1.089656044815204, + "learning_rate": 1.169322457876152e-05, + "loss": 0.251165509223938, + "step": 3568 + }, + { + "epoch": 0.9478156951268092, + "grad_norm": 1.2350323802819905, + "learning_rate": 1.1688897049822467e-05, + "loss": 0.2738516926765442, + "step": 3569 + }, + { + "epoch": 0.9480812641083521, + "grad_norm": 1.0315369616879289, + "learning_rate": 1.1684569195283981e-05, + "loss": 0.2745274305343628, + "step": 3570 + }, + { + "epoch": 0.9483468330898951, + "grad_norm": 1.180099592022995, + "learning_rate": 1.1680241015980423e-05, + "loss": 0.28586819767951965, + "step": 3571 + }, + { + "epoch": 0.948612402071438, + "grad_norm": 1.2233918967574897, + "learning_rate": 1.167591251274621e-05, + "loss": 0.2559577524662018, + "step": 3572 + }, + { + "epoch": 0.948877971052981, + "grad_norm": 1.155824963337958, + "learning_rate": 1.1671583686415833e-05, + "loss": 0.26069143414497375, + "step": 3573 + }, + { + "epoch": 0.949143540034524, + "grad_norm": 1.078529730225554, + "learning_rate": 1.1667254537823838e-05, + "loss": 0.26866453886032104, + "step": 3574 + }, + { + "epoch": 0.9494091090160669, + "grad_norm": 1.0772599867154102, + "learning_rate": 1.166292506780483e-05, + "loss": 0.25285348296165466, + "step": 3575 + }, + { + "epoch": 0.9496746779976099, + "grad_norm": 1.1335172942215501, + "learning_rate": 1.1658595277193479e-05, + "loss": 0.3330434262752533, + "step": 3576 + }, + { + "epoch": 0.9499402469791528, + "grad_norm": 1.076438251163932, + "learning_rate": 1.1654265166824522e-05, + "loss": 0.2789473533630371, + "step": 3577 + }, + { + "epoch": 0.9502058159606958, + "grad_norm": 1.2746037306212283, + "learning_rate": 1.164993473753275e-05, + "loss": 0.30984824895858765, + "step": 3578 + }, + { + "epoch": 0.9504713849422387, + "grad_norm": 1.0517088315750878, + "learning_rate": 1.164560399015302e-05, + "loss": 0.23881833255290985, + "step": 3579 + }, + { + "epoch": 0.9507369539237817, + "grad_norm": 1.1012484750770577, + "learning_rate": 1.164127292552025e-05, + "loss": 0.3027937114238739, + "step": 3580 + }, + { + "epoch": 0.9510025229053246, + "grad_norm": 1.1998484228117954, + "learning_rate": 1.1636941544469413e-05, + "loss": 0.2901906371116638, + "step": 3581 + }, + { + "epoch": 0.9512680918868676, + "grad_norm": 1.069491787313744, + "learning_rate": 1.1632609847835556e-05, + "loss": 0.28961148858070374, + "step": 3582 + }, + { + "epoch": 0.9515336608684106, + "grad_norm": 1.0782542825887276, + "learning_rate": 1.1628277836453774e-05, + "loss": 0.2730783224105835, + "step": 3583 + }, + { + "epoch": 0.9517992298499536, + "grad_norm": 1.0952017771476839, + "learning_rate": 1.1623945511159232e-05, + "loss": 0.3195485770702362, + "step": 3584 + }, + { + "epoch": 0.9520647988314965, + "grad_norm": 1.1514370971708257, + "learning_rate": 1.1619612872787144e-05, + "loss": 0.3097516894340515, + "step": 3585 + }, + { + "epoch": 0.9523303678130395, + "grad_norm": 1.0422990071728377, + "learning_rate": 1.1615279922172796e-05, + "loss": 0.2716284692287445, + "step": 3586 + }, + { + "epoch": 0.9525959367945824, + "grad_norm": 0.9669355988334725, + "learning_rate": 1.1610946660151531e-05, + "loss": 0.2601209878921509, + "step": 3587 + }, + { + "epoch": 0.9528615057761254, + "grad_norm": 1.1027425019898653, + "learning_rate": 1.1606613087558748e-05, + "loss": 0.28665289282798767, + "step": 3588 + }, + { + "epoch": 0.9531270747576683, + "grad_norm": 1.082078861677668, + "learning_rate": 1.1602279205229912e-05, + "loss": 0.3019893765449524, + "step": 3589 + }, + { + "epoch": 0.9533926437392113, + "grad_norm": 0.9778282797717269, + "learning_rate": 1.1597945014000537e-05, + "loss": 0.2635146677494049, + "step": 3590 + }, + { + "epoch": 0.9536582127207542, + "grad_norm": 1.0527782897227813, + "learning_rate": 1.1593610514706217e-05, + "loss": 0.2704858183860779, + "step": 3591 + }, + { + "epoch": 0.9539237817022972, + "grad_norm": 1.2295509988273574, + "learning_rate": 1.1589275708182581e-05, + "loss": 0.31997931003570557, + "step": 3592 + }, + { + "epoch": 0.9541893506838401, + "grad_norm": 1.1529907760165448, + "learning_rate": 1.1584940595265332e-05, + "loss": 0.2308788150548935, + "step": 3593 + }, + { + "epoch": 0.9544549196653831, + "grad_norm": 1.0980235303762964, + "learning_rate": 1.1580605176790229e-05, + "loss": 0.28886470198631287, + "step": 3594 + }, + { + "epoch": 0.954720488646926, + "grad_norm": 1.313883667721807, + "learning_rate": 1.157626945359309e-05, + "loss": 0.30698686838150024, + "step": 3595 + }, + { + "epoch": 0.954986057628469, + "grad_norm": 1.1087251273709688, + "learning_rate": 1.1571933426509789e-05, + "loss": 0.27475905418395996, + "step": 3596 + }, + { + "epoch": 0.955251626610012, + "grad_norm": 1.1064883207545173, + "learning_rate": 1.1567597096376264e-05, + "loss": 0.2568071484565735, + "step": 3597 + }, + { + "epoch": 0.9555171955915549, + "grad_norm": 1.28706485993144, + "learning_rate": 1.1563260464028507e-05, + "loss": 0.2574060261249542, + "step": 3598 + }, + { + "epoch": 0.9557827645730979, + "grad_norm": 1.193494963897618, + "learning_rate": 1.1558923530302571e-05, + "loss": 0.2847997546195984, + "step": 3599 + }, + { + "epoch": 0.9560483335546408, + "grad_norm": 1.0723094070831873, + "learning_rate": 1.155458629603456e-05, + "loss": 0.2594734728336334, + "step": 3600 + }, + { + "epoch": 0.9563139025361838, + "grad_norm": 1.0020160427681732, + "learning_rate": 1.155024876206065e-05, + "loss": 0.2300589680671692, + "step": 3601 + }, + { + "epoch": 0.9565794715177267, + "grad_norm": 1.1475438454718678, + "learning_rate": 1.1545910929217059e-05, + "loss": 0.29174795746803284, + "step": 3602 + }, + { + "epoch": 0.9568450404992697, + "grad_norm": 1.0425930414114217, + "learning_rate": 1.1541572798340076e-05, + "loss": 0.2666400074958801, + "step": 3603 + }, + { + "epoch": 0.9571106094808126, + "grad_norm": 1.0067559469755134, + "learning_rate": 1.1537234370266035e-05, + "loss": 0.24651308357715607, + "step": 3604 + }, + { + "epoch": 0.9573761784623556, + "grad_norm": 1.1542471481522265, + "learning_rate": 1.1532895645831339e-05, + "loss": 0.29991376399993896, + "step": 3605 + }, + { + "epoch": 0.9576417474438985, + "grad_norm": 1.0631305192934537, + "learning_rate": 1.1528556625872443e-05, + "loss": 0.27713578939437866, + "step": 3606 + }, + { + "epoch": 0.9579073164254415, + "grad_norm": 1.0497999275546905, + "learning_rate": 1.1524217311225857e-05, + "loss": 0.26503294706344604, + "step": 3607 + }, + { + "epoch": 0.9581728854069844, + "grad_norm": 1.1479000180189152, + "learning_rate": 1.1519877702728149e-05, + "loss": 0.28627675771713257, + "step": 3608 + }, + { + "epoch": 0.9584384543885274, + "grad_norm": 1.0333891142616893, + "learning_rate": 1.1515537801215944e-05, + "loss": 0.26862916350364685, + "step": 3609 + }, + { + "epoch": 0.9587040233700703, + "grad_norm": 1.2518522451268181, + "learning_rate": 1.1511197607525926e-05, + "loss": 0.29697147011756897, + "step": 3610 + }, + { + "epoch": 0.9589695923516134, + "grad_norm": 1.0668919106736792, + "learning_rate": 1.1506857122494832e-05, + "loss": 0.2980155944824219, + "step": 3611 + }, + { + "epoch": 0.9592351613331563, + "grad_norm": 1.1016644329026075, + "learning_rate": 1.1502516346959458e-05, + "loss": 0.2847440838813782, + "step": 3612 + }, + { + "epoch": 0.9595007303146993, + "grad_norm": 1.1131533712076647, + "learning_rate": 1.149817528175665e-05, + "loss": 0.2812016010284424, + "step": 3613 + }, + { + "epoch": 0.9597662992962422, + "grad_norm": 1.0387818826049915, + "learning_rate": 1.1493833927723319e-05, + "loss": 0.26856982707977295, + "step": 3614 + }, + { + "epoch": 0.9600318682777852, + "grad_norm": 1.0595715138301371, + "learning_rate": 1.1489492285696424e-05, + "loss": 0.2651693820953369, + "step": 3615 + }, + { + "epoch": 0.9602974372593281, + "grad_norm": 1.1384265947297394, + "learning_rate": 1.1485150356512986e-05, + "loss": 0.29811644554138184, + "step": 3616 + }, + { + "epoch": 0.9605630062408711, + "grad_norm": 1.0449713925688802, + "learning_rate": 1.1480808141010071e-05, + "loss": 0.2622855007648468, + "step": 3617 + }, + { + "epoch": 0.960828575222414, + "grad_norm": 1.1964334046740135, + "learning_rate": 1.1476465640024814e-05, + "loss": 0.3067246377468109, + "step": 3618 + }, + { + "epoch": 0.961094144203957, + "grad_norm": 1.0999678942020576, + "learning_rate": 1.1472122854394394e-05, + "loss": 0.25928011536598206, + "step": 3619 + }, + { + "epoch": 0.9613597131855, + "grad_norm": 1.0356853160291564, + "learning_rate": 1.146777978495605e-05, + "loss": 0.2574170231819153, + "step": 3620 + }, + { + "epoch": 0.9616252821670429, + "grad_norm": 1.1366453776894136, + "learning_rate": 1.1463436432547073e-05, + "loss": 0.2845388650894165, + "step": 3621 + }, + { + "epoch": 0.9618908511485859, + "grad_norm": 1.1067131961561003, + "learning_rate": 1.145909279800481e-05, + "loss": 0.28735876083374023, + "step": 3622 + }, + { + "epoch": 0.9621564201301288, + "grad_norm": 1.100639151702203, + "learning_rate": 1.1454748882166666e-05, + "loss": 0.25739723443984985, + "step": 3623 + }, + { + "epoch": 0.9624219891116718, + "grad_norm": 1.0743852778260963, + "learning_rate": 1.1450404685870098e-05, + "loss": 0.25144338607788086, + "step": 3624 + }, + { + "epoch": 0.9626875580932147, + "grad_norm": 1.0451944769292063, + "learning_rate": 1.144606020995261e-05, + "loss": 0.23981891572475433, + "step": 3625 + }, + { + "epoch": 0.9629531270747577, + "grad_norm": 1.1215387475511582, + "learning_rate": 1.1441715455251764e-05, + "loss": 0.30925339460372925, + "step": 3626 + }, + { + "epoch": 0.9632186960563006, + "grad_norm": 1.1193965021491372, + "learning_rate": 1.1437370422605184e-05, + "loss": 0.2559184432029724, + "step": 3627 + }, + { + "epoch": 0.9634842650378436, + "grad_norm": 1.221260182162867, + "learning_rate": 1.1433025112850542e-05, + "loss": 0.3001229166984558, + "step": 3628 + }, + { + "epoch": 0.9637498340193865, + "grad_norm": 0.9957913669659347, + "learning_rate": 1.1428679526825557e-05, + "loss": 0.24304218590259552, + "step": 3629 + }, + { + "epoch": 0.9640154030009295, + "grad_norm": 1.0405086595778643, + "learning_rate": 1.1424333665368011e-05, + "loss": 0.25677186250686646, + "step": 3630 + }, + { + "epoch": 0.9642809719824724, + "grad_norm": 1.0362119568252992, + "learning_rate": 1.141998752931573e-05, + "loss": 0.2589085102081299, + "step": 3631 + }, + { + "epoch": 0.9645465409640154, + "grad_norm": 1.1004952842028541, + "learning_rate": 1.1415641119506601e-05, + "loss": 0.2588059604167938, + "step": 3632 + }, + { + "epoch": 0.9648121099455583, + "grad_norm": 1.1379378571012249, + "learning_rate": 1.1411294436778562e-05, + "loss": 0.26097869873046875, + "step": 3633 + }, + { + "epoch": 0.9650776789271013, + "grad_norm": 1.2218308438631786, + "learning_rate": 1.1406947481969598e-05, + "loss": 0.26022520661354065, + "step": 3634 + }, + { + "epoch": 0.9653432479086442, + "grad_norm": 1.0737420773814035, + "learning_rate": 1.140260025591775e-05, + "loss": 0.26242876052856445, + "step": 3635 + }, + { + "epoch": 0.9656088168901872, + "grad_norm": 1.1396910340144906, + "learning_rate": 1.1398252759461119e-05, + "loss": 0.30035555362701416, + "step": 3636 + }, + { + "epoch": 0.9658743858717301, + "grad_norm": 1.1365210980452296, + "learning_rate": 1.1393904993437848e-05, + "loss": 0.26388341188430786, + "step": 3637 + }, + { + "epoch": 0.9661399548532731, + "grad_norm": 1.06242333907382, + "learning_rate": 1.1389556958686132e-05, + "loss": 0.28116434812545776, + "step": 3638 + }, + { + "epoch": 0.966405523834816, + "grad_norm": 1.0513966621960738, + "learning_rate": 1.1385208656044222e-05, + "loss": 0.25372493267059326, + "step": 3639 + }, + { + "epoch": 0.9666710928163591, + "grad_norm": 1.1171784181414381, + "learning_rate": 1.1380860086350422e-05, + "loss": 0.2648317813873291, + "step": 3640 + }, + { + "epoch": 0.966936661797902, + "grad_norm": 1.0508956007113521, + "learning_rate": 1.1376511250443082e-05, + "loss": 0.26981276273727417, + "step": 3641 + }, + { + "epoch": 0.967202230779445, + "grad_norm": 1.1513465918880585, + "learning_rate": 1.1372162149160608e-05, + "loss": 0.2934207618236542, + "step": 3642 + }, + { + "epoch": 0.967467799760988, + "grad_norm": 0.9705407845284122, + "learning_rate": 1.1367812783341454e-05, + "loss": 0.24250900745391846, + "step": 3643 + }, + { + "epoch": 0.9677333687425309, + "grad_norm": 1.0409007473472116, + "learning_rate": 1.1363463153824125e-05, + "loss": 0.2565772235393524, + "step": 3644 + }, + { + "epoch": 0.9679989377240739, + "grad_norm": 1.2386980142351325, + "learning_rate": 1.1359113261447183e-05, + "loss": 0.28407829999923706, + "step": 3645 + }, + { + "epoch": 0.9682645067056168, + "grad_norm": 1.1134220293120092, + "learning_rate": 1.1354763107049234e-05, + "loss": 0.2974489629268646, + "step": 3646 + }, + { + "epoch": 0.9685300756871598, + "grad_norm": 1.1611486704366027, + "learning_rate": 1.1350412691468935e-05, + "loss": 0.27539899945259094, + "step": 3647 + }, + { + "epoch": 0.9687956446687027, + "grad_norm": 1.1777496863563888, + "learning_rate": 1.1346062015544997e-05, + "loss": 0.28256523609161377, + "step": 3648 + }, + { + "epoch": 0.9690612136502457, + "grad_norm": 1.0910813538672366, + "learning_rate": 1.1341711080116176e-05, + "loss": 0.27582883834838867, + "step": 3649 + }, + { + "epoch": 0.9693267826317886, + "grad_norm": 1.2299419127493794, + "learning_rate": 1.1337359886021285e-05, + "loss": 0.3199389576911926, + "step": 3650 + }, + { + "epoch": 0.9695923516133316, + "grad_norm": 1.078226808322517, + "learning_rate": 1.1333008434099178e-05, + "loss": 0.2922326922416687, + "step": 3651 + }, + { + "epoch": 0.9698579205948745, + "grad_norm": 1.1833154338367669, + "learning_rate": 1.1328656725188767e-05, + "loss": 0.285635381937027, + "step": 3652 + }, + { + "epoch": 0.9701234895764175, + "grad_norm": 1.1606724829825772, + "learning_rate": 1.1324304760129009e-05, + "loss": 0.3347492814064026, + "step": 3653 + }, + { + "epoch": 0.9703890585579604, + "grad_norm": 1.1079831575977723, + "learning_rate": 1.1319952539758912e-05, + "loss": 0.27379873394966125, + "step": 3654 + }, + { + "epoch": 0.9706546275395034, + "grad_norm": 1.2487680540467303, + "learning_rate": 1.1315600064917534e-05, + "loss": 0.27911311388015747, + "step": 3655 + }, + { + "epoch": 0.9709201965210463, + "grad_norm": 1.187492816658345, + "learning_rate": 1.1311247336443982e-05, + "loss": 0.25750118494033813, + "step": 3656 + }, + { + "epoch": 0.9711857655025893, + "grad_norm": 1.1010343448161526, + "learning_rate": 1.1306894355177405e-05, + "loss": 0.28723078966140747, + "step": 3657 + }, + { + "epoch": 0.9714513344841322, + "grad_norm": 1.0378840795289885, + "learning_rate": 1.1302541121957008e-05, + "loss": 0.25269389152526855, + "step": 3658 + }, + { + "epoch": 0.9717169034656752, + "grad_norm": 1.1923604766845932, + "learning_rate": 1.1298187637622046e-05, + "loss": 0.3041607439517975, + "step": 3659 + }, + { + "epoch": 0.9719824724472181, + "grad_norm": 1.0812687625707742, + "learning_rate": 1.1293833903011819e-05, + "loss": 0.2826605439186096, + "step": 3660 + }, + { + "epoch": 0.9722480414287611, + "grad_norm": 1.1010565715724137, + "learning_rate": 1.1289479918965675e-05, + "loss": 0.2830520570278168, + "step": 3661 + }, + { + "epoch": 0.972513610410304, + "grad_norm": 1.0160541896764337, + "learning_rate": 1.1285125686323011e-05, + "loss": 0.24295952916145325, + "step": 3662 + }, + { + "epoch": 0.972779179391847, + "grad_norm": 1.108181435484162, + "learning_rate": 1.1280771205923269e-05, + "loss": 0.28775808215141296, + "step": 3663 + }, + { + "epoch": 0.97304474837339, + "grad_norm": 0.9715417125511246, + "learning_rate": 1.127641647860595e-05, + "loss": 0.24650296568870544, + "step": 3664 + }, + { + "epoch": 0.9733103173549329, + "grad_norm": 0.9305293200248026, + "learning_rate": 1.1272061505210584e-05, + "loss": 0.22344040870666504, + "step": 3665 + }, + { + "epoch": 0.9735758863364758, + "grad_norm": 1.0859092127038839, + "learning_rate": 1.1267706286576759e-05, + "loss": 0.26920852065086365, + "step": 3666 + }, + { + "epoch": 0.9738414553180188, + "grad_norm": 1.1792674236289236, + "learning_rate": 1.1263350823544115e-05, + "loss": 0.27615875005722046, + "step": 3667 + }, + { + "epoch": 0.9741070242995619, + "grad_norm": 1.0470064037587914, + "learning_rate": 1.1258995116952334e-05, + "loss": 0.2768712043762207, + "step": 3668 + }, + { + "epoch": 0.9743725932811048, + "grad_norm": 1.0568329464095596, + "learning_rate": 1.1254639167641141e-05, + "loss": 0.27764153480529785, + "step": 3669 + }, + { + "epoch": 0.9746381622626478, + "grad_norm": 1.139437307258024, + "learning_rate": 1.1250282976450316e-05, + "loss": 0.27423611283302307, + "step": 3670 + }, + { + "epoch": 0.9749037312441907, + "grad_norm": 1.1238013222894891, + "learning_rate": 1.1245926544219676e-05, + "loss": 0.2626228332519531, + "step": 3671 + }, + { + "epoch": 0.9751693002257337, + "grad_norm": 1.2807555997920204, + "learning_rate": 1.1241569871789096e-05, + "loss": 0.25524014234542847, + "step": 3672 + }, + { + "epoch": 0.9754348692072766, + "grad_norm": 1.1042234540757712, + "learning_rate": 1.1237212959998485e-05, + "loss": 0.30857735872268677, + "step": 3673 + }, + { + "epoch": 0.9757004381888196, + "grad_norm": 1.0235359310129009, + "learning_rate": 1.1232855809687807e-05, + "loss": 0.25099021196365356, + "step": 3674 + }, + { + "epoch": 0.9759660071703625, + "grad_norm": 1.0116202981123898, + "learning_rate": 1.1228498421697068e-05, + "loss": 0.22664576768875122, + "step": 3675 + }, + { + "epoch": 0.9762315761519055, + "grad_norm": 1.151038777130998, + "learning_rate": 1.1224140796866322e-05, + "loss": 0.24727366864681244, + "step": 3676 + }, + { + "epoch": 0.9764971451334484, + "grad_norm": 1.160849411640656, + "learning_rate": 1.121978293603567e-05, + "loss": 0.2561935782432556, + "step": 3677 + }, + { + "epoch": 0.9767627141149914, + "grad_norm": 1.10648815955184, + "learning_rate": 1.1215424840045254e-05, + "loss": 0.2594214677810669, + "step": 3678 + }, + { + "epoch": 0.9770282830965343, + "grad_norm": 1.130419852826836, + "learning_rate": 1.1211066509735265e-05, + "loss": 0.2383778691291809, + "step": 3679 + }, + { + "epoch": 0.9772938520780773, + "grad_norm": 1.2393377504128167, + "learning_rate": 1.1206707945945934e-05, + "loss": 0.2864387035369873, + "step": 3680 + }, + { + "epoch": 0.9775594210596202, + "grad_norm": 1.2012269867709167, + "learning_rate": 1.1202349149517541e-05, + "loss": 0.30415672063827515, + "step": 3681 + }, + { + "epoch": 0.9778249900411632, + "grad_norm": 1.1590063847406842, + "learning_rate": 1.1197990121290415e-05, + "loss": 0.3030807375907898, + "step": 3682 + }, + { + "epoch": 0.9780905590227061, + "grad_norm": 1.1251124481371277, + "learning_rate": 1.1193630862104922e-05, + "loss": 0.2518938481807709, + "step": 3683 + }, + { + "epoch": 0.9783561280042491, + "grad_norm": 1.2096921428918863, + "learning_rate": 1.1189271372801474e-05, + "loss": 0.25353187322616577, + "step": 3684 + }, + { + "epoch": 0.978621696985792, + "grad_norm": 1.401372369430627, + "learning_rate": 1.1184911654220534e-05, + "loss": 0.30639684200286865, + "step": 3685 + }, + { + "epoch": 0.978887265967335, + "grad_norm": 1.1636733460077495, + "learning_rate": 1.1180551707202602e-05, + "loss": 0.295099139213562, + "step": 3686 + }, + { + "epoch": 0.979152834948878, + "grad_norm": 1.0596592048702305, + "learning_rate": 1.1176191532588224e-05, + "loss": 0.2428167164325714, + "step": 3687 + }, + { + "epoch": 0.9794184039304209, + "grad_norm": 1.0401088292404943, + "learning_rate": 1.1171831131217989e-05, + "loss": 0.2716362774372101, + "step": 3688 + }, + { + "epoch": 0.9796839729119639, + "grad_norm": 1.1130709970940986, + "learning_rate": 1.1167470503932534e-05, + "loss": 0.28350287675857544, + "step": 3689 + }, + { + "epoch": 0.9799495418935068, + "grad_norm": 1.0214004744947676, + "learning_rate": 1.1163109651572535e-05, + "loss": 0.2776945233345032, + "step": 3690 + }, + { + "epoch": 0.9802151108750498, + "grad_norm": 1.041237294346951, + "learning_rate": 1.115874857497871e-05, + "loss": 0.2712942063808441, + "step": 3691 + }, + { + "epoch": 0.9804806798565927, + "grad_norm": 1.058232702389033, + "learning_rate": 1.1154387274991829e-05, + "loss": 0.2530008852481842, + "step": 3692 + }, + { + "epoch": 0.9807462488381357, + "grad_norm": 1.0327043619893976, + "learning_rate": 1.1150025752452693e-05, + "loss": 0.24889500439167023, + "step": 3693 + }, + { + "epoch": 0.9810118178196786, + "grad_norm": 1.1013842404358833, + "learning_rate": 1.1145664008202158e-05, + "loss": 0.3051255941390991, + "step": 3694 + }, + { + "epoch": 0.9812773868012216, + "grad_norm": 1.0503003262830894, + "learning_rate": 1.1141302043081112e-05, + "loss": 0.24781765043735504, + "step": 3695 + }, + { + "epoch": 0.9815429557827646, + "grad_norm": 1.2510153019418302, + "learning_rate": 1.1136939857930497e-05, + "loss": 0.3021858036518097, + "step": 3696 + }, + { + "epoch": 0.9818085247643076, + "grad_norm": 1.1052947984569603, + "learning_rate": 1.1132577453591284e-05, + "loss": 0.3026372194290161, + "step": 3697 + }, + { + "epoch": 0.9820740937458505, + "grad_norm": 1.2367828155450835, + "learning_rate": 1.1128214830904494e-05, + "loss": 0.31511861085891724, + "step": 3698 + }, + { + "epoch": 0.9823396627273935, + "grad_norm": 1.076549494496895, + "learning_rate": 1.112385199071119e-05, + "loss": 0.27885258197784424, + "step": 3699 + }, + { + "epoch": 0.9826052317089364, + "grad_norm": 1.0546536629749794, + "learning_rate": 1.1119488933852477e-05, + "loss": 0.2724893391132355, + "step": 3700 + }, + { + "epoch": 0.9828708006904794, + "grad_norm": 1.0683428715266594, + "learning_rate": 1.1115125661169503e-05, + "loss": 0.2836218774318695, + "step": 3701 + }, + { + "epoch": 0.9831363696720223, + "grad_norm": 1.1039385208642913, + "learning_rate": 1.111076217350345e-05, + "loss": 0.24220457673072815, + "step": 3702 + }, + { + "epoch": 0.9834019386535653, + "grad_norm": 1.1586770288767172, + "learning_rate": 1.1106398471695554e-05, + "loss": 0.28599557280540466, + "step": 3703 + }, + { + "epoch": 0.9836675076351082, + "grad_norm": 1.0806945340822165, + "learning_rate": 1.110203455658708e-05, + "loss": 0.30559849739074707, + "step": 3704 + }, + { + "epoch": 0.9839330766166512, + "grad_norm": 1.0573640293446354, + "learning_rate": 1.109767042901934e-05, + "loss": 0.2763117551803589, + "step": 3705 + }, + { + "epoch": 0.9841986455981941, + "grad_norm": 0.9563131800944344, + "learning_rate": 1.109330608983369e-05, + "loss": 0.2028101086616516, + "step": 3706 + }, + { + "epoch": 0.9844642145797371, + "grad_norm": 0.9787835815750591, + "learning_rate": 1.1088941539871515e-05, + "loss": 0.25386112928390503, + "step": 3707 + }, + { + "epoch": 0.98472978356128, + "grad_norm": 1.075996733851366, + "learning_rate": 1.1084576779974257e-05, + "loss": 0.2588289976119995, + "step": 3708 + }, + { + "epoch": 0.984995352542823, + "grad_norm": 1.3003014971272602, + "learning_rate": 1.1080211810983385e-05, + "loss": 0.3201071321964264, + "step": 3709 + }, + { + "epoch": 0.985260921524366, + "grad_norm": 1.2030478206249715, + "learning_rate": 1.107584663374042e-05, + "loss": 0.28439003229141235, + "step": 3710 + }, + { + "epoch": 0.9855264905059089, + "grad_norm": 1.060347062251152, + "learning_rate": 1.1071481249086908e-05, + "loss": 0.2734091579914093, + "step": 3711 + }, + { + "epoch": 0.9857920594874519, + "grad_norm": 1.2115603819692051, + "learning_rate": 1.1067115657864451e-05, + "loss": 0.2917581796646118, + "step": 3712 + }, + { + "epoch": 0.9860576284689948, + "grad_norm": 1.2063997459644484, + "learning_rate": 1.1062749860914681e-05, + "loss": 0.3569914996623993, + "step": 3713 + }, + { + "epoch": 0.9863231974505378, + "grad_norm": 1.127711451799425, + "learning_rate": 1.1058383859079271e-05, + "loss": 0.2574514150619507, + "step": 3714 + }, + { + "epoch": 0.9865887664320807, + "grad_norm": 1.119813552337215, + "learning_rate": 1.1054017653199936e-05, + "loss": 0.3035826086997986, + "step": 3715 + }, + { + "epoch": 0.9868543354136237, + "grad_norm": 1.5863085854725767, + "learning_rate": 1.1049651244118424e-05, + "loss": 0.28067824244499207, + "step": 3716 + }, + { + "epoch": 0.9871199043951666, + "grad_norm": 1.0916600834300794, + "learning_rate": 1.1045284632676535e-05, + "loss": 0.2511579394340515, + "step": 3717 + }, + { + "epoch": 0.9873854733767096, + "grad_norm": 1.2657546371764674, + "learning_rate": 1.1040917819716097e-05, + "loss": 0.3059889078140259, + "step": 3718 + }, + { + "epoch": 0.9876510423582525, + "grad_norm": 1.1224253435238671, + "learning_rate": 1.103655080607898e-05, + "loss": 0.2642200291156769, + "step": 3719 + }, + { + "epoch": 0.9879166113397955, + "grad_norm": 1.0969568004465404, + "learning_rate": 1.1032183592607094e-05, + "loss": 0.2743483781814575, + "step": 3720 + }, + { + "epoch": 0.9881821803213384, + "grad_norm": 1.1317768374698567, + "learning_rate": 1.1027816180142383e-05, + "loss": 0.2597433030605316, + "step": 3721 + }, + { + "epoch": 0.9884477493028814, + "grad_norm": 1.0759312888673545, + "learning_rate": 1.1023448569526834e-05, + "loss": 0.24439337849617004, + "step": 3722 + }, + { + "epoch": 0.9887133182844243, + "grad_norm": 1.0386429343076329, + "learning_rate": 1.1019080761602473e-05, + "loss": 0.2520195245742798, + "step": 3723 + }, + { + "epoch": 0.9889788872659674, + "grad_norm": 1.0921837996926786, + "learning_rate": 1.1014712757211359e-05, + "loss": 0.2904737889766693, + "step": 3724 + }, + { + "epoch": 0.9892444562475103, + "grad_norm": 1.12008182824954, + "learning_rate": 1.1010344557195588e-05, + "loss": 0.28096869587898254, + "step": 3725 + }, + { + "epoch": 0.9895100252290533, + "grad_norm": 1.8392230806075218, + "learning_rate": 1.1005976162397309e-05, + "loss": 0.317839652299881, + "step": 3726 + }, + { + "epoch": 0.9897755942105962, + "grad_norm": 1.19381185696067, + "learning_rate": 1.100160757365869e-05, + "loss": 0.29213201999664307, + "step": 3727 + }, + { + "epoch": 0.9900411631921392, + "grad_norm": 1.215113877896921, + "learning_rate": 1.0997238791821943e-05, + "loss": 0.27034991979599, + "step": 3728 + }, + { + "epoch": 0.9903067321736821, + "grad_norm": 1.2893524723691567, + "learning_rate": 1.0992869817729317e-05, + "loss": 0.30504971742630005, + "step": 3729 + }, + { + "epoch": 0.9905723011552251, + "grad_norm": 1.109889585740049, + "learning_rate": 1.09885006522231e-05, + "loss": 0.30673110485076904, + "step": 3730 + }, + { + "epoch": 0.990837870136768, + "grad_norm": 1.0963153712692437, + "learning_rate": 1.0984131296145616e-05, + "loss": 0.27990686893463135, + "step": 3731 + }, + { + "epoch": 0.991103439118311, + "grad_norm": 1.0228240366531471, + "learning_rate": 1.0979761750339225e-05, + "loss": 0.24379019439220428, + "step": 3732 + }, + { + "epoch": 0.991369008099854, + "grad_norm": 1.1055702239918885, + "learning_rate": 1.0975392015646323e-05, + "loss": 0.30554595589637756, + "step": 3733 + }, + { + "epoch": 0.9916345770813969, + "grad_norm": 1.062606047652276, + "learning_rate": 1.0971022092909342e-05, + "loss": 0.245269775390625, + "step": 3734 + }, + { + "epoch": 0.9919001460629399, + "grad_norm": 1.0977829197687445, + "learning_rate": 1.0966651982970757e-05, + "loss": 0.2732948064804077, + "step": 3735 + }, + { + "epoch": 0.9921657150444828, + "grad_norm": 0.992060831416128, + "learning_rate": 1.0962281686673071e-05, + "loss": 0.25989004969596863, + "step": 3736 + }, + { + "epoch": 0.9924312840260258, + "grad_norm": 1.1415489224758493, + "learning_rate": 1.0957911204858824e-05, + "loss": 0.32891198992729187, + "step": 3737 + }, + { + "epoch": 0.9926968530075687, + "grad_norm": 1.094277657297916, + "learning_rate": 1.0953540538370591e-05, + "loss": 0.29184675216674805, + "step": 3738 + }, + { + "epoch": 0.9929624219891117, + "grad_norm": 1.1381026162174743, + "learning_rate": 1.094916968805099e-05, + "loss": 0.2784018814563751, + "step": 3739 + }, + { + "epoch": 0.9932279909706546, + "grad_norm": 1.1670677505581852, + "learning_rate": 1.094479865474267e-05, + "loss": 0.26586195826530457, + "step": 3740 + }, + { + "epoch": 0.9934935599521976, + "grad_norm": 0.9575913416137994, + "learning_rate": 1.094042743928831e-05, + "loss": 0.24593298137187958, + "step": 3741 + }, + { + "epoch": 0.9937591289337405, + "grad_norm": 1.065966707682552, + "learning_rate": 1.0936056042530632e-05, + "loss": 0.2462792694568634, + "step": 3742 + }, + { + "epoch": 0.9940246979152835, + "grad_norm": 1.2074020558104472, + "learning_rate": 1.0931684465312388e-05, + "loss": 0.2688900828361511, + "step": 3743 + }, + { + "epoch": 0.9942902668968264, + "grad_norm": 1.099682442025033, + "learning_rate": 1.0927312708476367e-05, + "loss": 0.2842782735824585, + "step": 3744 + }, + { + "epoch": 0.9945558358783694, + "grad_norm": 1.0548829148077135, + "learning_rate": 1.0922940772865393e-05, + "loss": 0.249299556016922, + "step": 3745 + }, + { + "epoch": 0.9948214048599123, + "grad_norm": 1.175705262338143, + "learning_rate": 1.0918568659322325e-05, + "loss": 0.2765413522720337, + "step": 3746 + }, + { + "epoch": 0.9950869738414553, + "grad_norm": 1.1414819691892306, + "learning_rate": 1.0914196368690049e-05, + "loss": 0.29750365018844604, + "step": 3747 + }, + { + "epoch": 0.9953525428229982, + "grad_norm": 1.153321336461836, + "learning_rate": 1.0909823901811496e-05, + "loss": 0.25272879004478455, + "step": 3748 + }, + { + "epoch": 0.9956181118045412, + "grad_norm": 1.1906489486154657, + "learning_rate": 1.0905451259529626e-05, + "loss": 0.3056861460208893, + "step": 3749 + }, + { + "epoch": 0.9958836807860841, + "grad_norm": 1.1596775625362263, + "learning_rate": 1.090107844268743e-05, + "loss": 0.26723814010620117, + "step": 3750 + }, + { + "epoch": 0.9961492497676271, + "grad_norm": 1.167023454532776, + "learning_rate": 1.0896705452127943e-05, + "loss": 0.29998716711997986, + "step": 3751 + }, + { + "epoch": 0.9964148187491702, + "grad_norm": 1.1519689723038142, + "learning_rate": 1.0892332288694216e-05, + "loss": 0.2690891623497009, + "step": 3752 + }, + { + "epoch": 0.9966803877307131, + "grad_norm": 1.1385088428140973, + "learning_rate": 1.0887958953229349e-05, + "loss": 0.25555333495140076, + "step": 3753 + }, + { + "epoch": 0.996945956712256, + "grad_norm": 1.1617836993376212, + "learning_rate": 1.088358544657647e-05, + "loss": 0.27788421511650085, + "step": 3754 + }, + { + "epoch": 0.997211525693799, + "grad_norm": 1.0981105518173184, + "learning_rate": 1.0879211769578734e-05, + "loss": 0.2566586136817932, + "step": 3755 + }, + { + "epoch": 0.997477094675342, + "grad_norm": 1.1742409056404244, + "learning_rate": 1.0874837923079339e-05, + "loss": 0.3028980493545532, + "step": 3756 + }, + { + "epoch": 0.9977426636568849, + "grad_norm": 1.151070664269376, + "learning_rate": 1.0870463907921512e-05, + "loss": 0.30244824290275574, + "step": 3757 + }, + { + "epoch": 0.9980082326384279, + "grad_norm": 1.0175517300218122, + "learning_rate": 1.086608972494851e-05, + "loss": 0.2610962390899658, + "step": 3758 + }, + { + "epoch": 0.9982738016199708, + "grad_norm": 1.1587347636182326, + "learning_rate": 1.0861715375003623e-05, + "loss": 0.2733536660671234, + "step": 3759 + }, + { + "epoch": 0.9985393706015138, + "grad_norm": 1.094010099730521, + "learning_rate": 1.0857340858930175e-05, + "loss": 0.2915020287036896, + "step": 3760 + }, + { + "epoch": 0.9988049395830567, + "grad_norm": 1.1164899423303463, + "learning_rate": 1.085296617757152e-05, + "loss": 0.2940186560153961, + "step": 3761 + }, + { + "epoch": 0.9990705085645997, + "grad_norm": 1.1441195343158572, + "learning_rate": 1.0848591331771045e-05, + "loss": 0.3002738952636719, + "step": 3762 + }, + { + "epoch": 0.9993360775461426, + "grad_norm": 1.0530840422742196, + "learning_rate": 1.0844216322372172e-05, + "loss": 0.284588485956192, + "step": 3763 + }, + { + "epoch": 0.9996016465276856, + "grad_norm": 1.0971261053209735, + "learning_rate": 1.0839841150218347e-05, + "loss": 0.29395923018455505, + "step": 3764 + }, + { + "epoch": 0.9998672155092285, + "grad_norm": 1.1355876604442514, + "learning_rate": 1.083546581615305e-05, + "loss": 0.2574613094329834, + "step": 3765 + }, + { + "epoch": 1.0, + "grad_norm": 1.535375625820537, + "learning_rate": 1.0831090321019801e-05, + "loss": 0.177712082862854, + "step": 3766 + }, + { + "epoch": 1.000265568981543, + "grad_norm": 1.1101315935040728, + "learning_rate": 1.0826714665662139e-05, + "loss": 0.29758381843566895, + "step": 3767 + }, + { + "epoch": 1.000531137963086, + "grad_norm": 1.055973006911073, + "learning_rate": 1.0822338850923644e-05, + "loss": 0.23377545177936554, + "step": 3768 + }, + { + "epoch": 1.0007967069446289, + "grad_norm": 1.1573191222761028, + "learning_rate": 1.0817962877647911e-05, + "loss": 0.2505020797252655, + "step": 3769 + }, + { + "epoch": 1.0010622759261718, + "grad_norm": 1.0395021899779042, + "learning_rate": 1.0813586746678584e-05, + "loss": 0.26122647523880005, + "step": 3770 + }, + { + "epoch": 1.0013278449077148, + "grad_norm": 1.1508778318464672, + "learning_rate": 1.0809210458859327e-05, + "loss": 0.27962177991867065, + "step": 3771 + }, + { + "epoch": 1.0015934138892577, + "grad_norm": 1.0479777844917506, + "learning_rate": 1.080483401503384e-05, + "loss": 0.21921640634536743, + "step": 3772 + }, + { + "epoch": 1.0018589828708007, + "grad_norm": 1.1277812491041006, + "learning_rate": 1.0800457416045845e-05, + "loss": 0.24623796343803406, + "step": 3773 + }, + { + "epoch": 1.0021245518523436, + "grad_norm": 1.259401152466985, + "learning_rate": 1.0796080662739098e-05, + "loss": 0.3130728006362915, + "step": 3774 + }, + { + "epoch": 1.0023901208338866, + "grad_norm": 1.1209083810179328, + "learning_rate": 1.0791703755957392e-05, + "loss": 0.2548064589500427, + "step": 3775 + }, + { + "epoch": 1.0026556898154295, + "grad_norm": 1.1167206534835417, + "learning_rate": 1.078732669654454e-05, + "loss": 0.20517288148403168, + "step": 3776 + }, + { + "epoch": 1.0029212587969725, + "grad_norm": 1.1055374385175383, + "learning_rate": 1.0782949485344385e-05, + "loss": 0.2634897530078888, + "step": 3777 + }, + { + "epoch": 1.0031868277785154, + "grad_norm": 1.3696848286677328, + "learning_rate": 1.0778572123200804e-05, + "loss": 0.2743223309516907, + "step": 3778 + }, + { + "epoch": 1.0034523967600584, + "grad_norm": 0.9930991365195264, + "learning_rate": 1.0774194610957695e-05, + "loss": 0.24595436453819275, + "step": 3779 + }, + { + "epoch": 1.0037179657416013, + "grad_norm": 1.0885778480679946, + "learning_rate": 1.0769816949459002e-05, + "loss": 0.2508128881454468, + "step": 3780 + }, + { + "epoch": 1.0039835347231443, + "grad_norm": 1.1243431648812525, + "learning_rate": 1.0765439139548677e-05, + "loss": 0.2326367199420929, + "step": 3781 + }, + { + "epoch": 1.0042491037046872, + "grad_norm": 1.1514050771182385, + "learning_rate": 1.0761061182070716e-05, + "loss": 0.2888404130935669, + "step": 3782 + }, + { + "epoch": 1.0045146726862302, + "grad_norm": 1.1399638718055765, + "learning_rate": 1.0756683077869133e-05, + "loss": 0.2804296612739563, + "step": 3783 + }, + { + "epoch": 1.0047802416677731, + "grad_norm": 1.1286027319524963, + "learning_rate": 1.0752304827787979e-05, + "loss": 0.2644953429698944, + "step": 3784 + }, + { + "epoch": 1.005045810649316, + "grad_norm": 1.2396532451569051, + "learning_rate": 1.0747926432671323e-05, + "loss": 0.297788143157959, + "step": 3785 + }, + { + "epoch": 1.005311379630859, + "grad_norm": 1.065071455363874, + "learning_rate": 1.0743547893363276e-05, + "loss": 0.2644156515598297, + "step": 3786 + }, + { + "epoch": 1.005576948612402, + "grad_norm": 1.1640867578019738, + "learning_rate": 1.073916921070796e-05, + "loss": 0.23818905651569366, + "step": 3787 + }, + { + "epoch": 1.005842517593945, + "grad_norm": 1.11872081222192, + "learning_rate": 1.0734790385549538e-05, + "loss": 0.2544933259487152, + "step": 3788 + }, + { + "epoch": 1.006108086575488, + "grad_norm": 1.0836442452511366, + "learning_rate": 1.0730411418732198e-05, + "loss": 0.2569275498390198, + "step": 3789 + }, + { + "epoch": 1.0063736555570308, + "grad_norm": 1.0348585374954582, + "learning_rate": 1.0726032311100153e-05, + "loss": 0.2248159945011139, + "step": 3790 + }, + { + "epoch": 1.0066392245385738, + "grad_norm": 1.1242207493876892, + "learning_rate": 1.072165306349764e-05, + "loss": 0.25541940331459045, + "step": 3791 + }, + { + "epoch": 1.0069047935201167, + "grad_norm": 9.328291099250833, + "learning_rate": 1.0717273676768924e-05, + "loss": 0.24429568648338318, + "step": 3792 + }, + { + "epoch": 1.0071703625016597, + "grad_norm": 1.0574884647737486, + "learning_rate": 1.0712894151758306e-05, + "loss": 0.2586621344089508, + "step": 3793 + }, + { + "epoch": 1.0074359314832027, + "grad_norm": 1.165205157800888, + "learning_rate": 1.0708514489310103e-05, + "loss": 0.28685104846954346, + "step": 3794 + }, + { + "epoch": 1.0077015004647458, + "grad_norm": 1.1536672746294196, + "learning_rate": 1.0704134690268661e-05, + "loss": 0.2847924530506134, + "step": 3795 + }, + { + "epoch": 1.0079670694462888, + "grad_norm": 1.1168453704329862, + "learning_rate": 1.0699754755478358e-05, + "loss": 0.24646440148353577, + "step": 3796 + }, + { + "epoch": 1.0082326384278317, + "grad_norm": 1.217438590106057, + "learning_rate": 1.0695374685783586e-05, + "loss": 0.22286385297775269, + "step": 3797 + }, + { + "epoch": 1.0084982074093747, + "grad_norm": 1.1352166249232278, + "learning_rate": 1.069099448202878e-05, + "loss": 0.2524179518222809, + "step": 3798 + }, + { + "epoch": 1.0087637763909176, + "grad_norm": 1.109981913009372, + "learning_rate": 1.0686614145058387e-05, + "loss": 0.2625758647918701, + "step": 3799 + }, + { + "epoch": 1.0090293453724606, + "grad_norm": 1.0622342238121125, + "learning_rate": 1.0682233675716884e-05, + "loss": 0.25318068265914917, + "step": 3800 + }, + { + "epoch": 1.0092949143540035, + "grad_norm": 1.073699024276181, + "learning_rate": 1.0677853074848774e-05, + "loss": 0.24224570393562317, + "step": 3801 + }, + { + "epoch": 1.0095604833355465, + "grad_norm": 1.1995813349182267, + "learning_rate": 1.0673472343298588e-05, + "loss": 0.28595417737960815, + "step": 3802 + }, + { + "epoch": 1.0098260523170894, + "grad_norm": 1.1558738404506108, + "learning_rate": 1.0669091481910874e-05, + "loss": 0.26894015073776245, + "step": 3803 + }, + { + "epoch": 1.0100916212986324, + "grad_norm": 1.0901744125075639, + "learning_rate": 1.0664710491530214e-05, + "loss": 0.2605208158493042, + "step": 3804 + }, + { + "epoch": 1.0103571902801753, + "grad_norm": 1.082458382717597, + "learning_rate": 1.0660329373001212e-05, + "loss": 0.2595113515853882, + "step": 3805 + }, + { + "epoch": 1.0106227592617183, + "grad_norm": 1.2467081294979763, + "learning_rate": 1.0655948127168494e-05, + "loss": 0.27478674054145813, + "step": 3806 + }, + { + "epoch": 1.0108883282432612, + "grad_norm": 1.0742167098010935, + "learning_rate": 1.0651566754876715e-05, + "loss": 0.2587064504623413, + "step": 3807 + }, + { + "epoch": 1.0111538972248042, + "grad_norm": 1.0593019665426413, + "learning_rate": 1.064718525697055e-05, + "loss": 0.2420537769794464, + "step": 3808 + }, + { + "epoch": 1.0114194662063472, + "grad_norm": 1.1660072059036033, + "learning_rate": 1.0642803634294699e-05, + "loss": 0.29424652457237244, + "step": 3809 + }, + { + "epoch": 1.01168503518789, + "grad_norm": 1.0902934718743655, + "learning_rate": 1.0638421887693887e-05, + "loss": 0.25162142515182495, + "step": 3810 + }, + { + "epoch": 1.011950604169433, + "grad_norm": 1.1456242703963635, + "learning_rate": 1.0634040018012865e-05, + "loss": 0.25661247968673706, + "step": 3811 + }, + { + "epoch": 1.012216173150976, + "grad_norm": 1.0060634238068926, + "learning_rate": 1.0629658026096408e-05, + "loss": 0.2042091339826584, + "step": 3812 + }, + { + "epoch": 1.012481742132519, + "grad_norm": 1.0129340658577524, + "learning_rate": 1.0625275912789307e-05, + "loss": 0.22496266663074493, + "step": 3813 + }, + { + "epoch": 1.012747311114062, + "grad_norm": 1.1382961966722176, + "learning_rate": 1.0620893678936385e-05, + "loss": 0.23609521985054016, + "step": 3814 + }, + { + "epoch": 1.0130128800956049, + "grad_norm": 1.2645443214744188, + "learning_rate": 1.0616511325382486e-05, + "loss": 0.2561722993850708, + "step": 3815 + }, + { + "epoch": 1.0132784490771478, + "grad_norm": 1.1379816472778304, + "learning_rate": 1.0612128852972474e-05, + "loss": 0.2617529630661011, + "step": 3816 + }, + { + "epoch": 1.0135440180586908, + "grad_norm": 1.1862833237483508, + "learning_rate": 1.060774626255124e-05, + "loss": 0.2633543014526367, + "step": 3817 + }, + { + "epoch": 1.0138095870402337, + "grad_norm": 1.0263666085354948, + "learning_rate": 1.0603363554963693e-05, + "loss": 0.19401729106903076, + "step": 3818 + }, + { + "epoch": 1.0140751560217767, + "grad_norm": 1.0891094169836097, + "learning_rate": 1.0598980731054765e-05, + "loss": 0.2583369016647339, + "step": 3819 + }, + { + "epoch": 1.0143407250033196, + "grad_norm": 1.1826598806695992, + "learning_rate": 1.0594597791669419e-05, + "loss": 0.26138922572135925, + "step": 3820 + }, + { + "epoch": 1.0146062939848626, + "grad_norm": 1.1580137447688548, + "learning_rate": 1.0590214737652632e-05, + "loss": 0.2506800591945648, + "step": 3821 + }, + { + "epoch": 1.0148718629664055, + "grad_norm": 1.032579662550809, + "learning_rate": 1.0585831569849405e-05, + "loss": 0.21569974720478058, + "step": 3822 + }, + { + "epoch": 1.0151374319479485, + "grad_norm": 1.37079648056154, + "learning_rate": 1.0581448289104759e-05, + "loss": 0.2765602767467499, + "step": 3823 + }, + { + "epoch": 1.0154030009294914, + "grad_norm": 1.2046968903946047, + "learning_rate": 1.0577064896263743e-05, + "loss": 0.25180384516716003, + "step": 3824 + }, + { + "epoch": 1.0156685699110344, + "grad_norm": 1.0796182560924539, + "learning_rate": 1.0572681392171417e-05, + "loss": 0.24164071679115295, + "step": 3825 + }, + { + "epoch": 1.0159341388925773, + "grad_norm": 1.1523354919316235, + "learning_rate": 1.0568297777672875e-05, + "loss": 0.24206972122192383, + "step": 3826 + }, + { + "epoch": 1.0161997078741203, + "grad_norm": 1.115771237946875, + "learning_rate": 1.0563914053613227e-05, + "loss": 0.24563468992710114, + "step": 3827 + }, + { + "epoch": 1.0164652768556632, + "grad_norm": 1.121826691352643, + "learning_rate": 1.0559530220837593e-05, + "loss": 0.23226243257522583, + "step": 3828 + }, + { + "epoch": 1.0167308458372062, + "grad_norm": 1.4499652400392462, + "learning_rate": 1.0555146280191137e-05, + "loss": 0.2245083749294281, + "step": 3829 + }, + { + "epoch": 1.0169964148187491, + "grad_norm": 1.1230707875328865, + "learning_rate": 1.0550762232519023e-05, + "loss": 0.24455049633979797, + "step": 3830 + }, + { + "epoch": 1.017261983800292, + "grad_norm": 1.1434011419253403, + "learning_rate": 1.0546378078666448e-05, + "loss": 0.2540651857852936, + "step": 3831 + }, + { + "epoch": 1.017527552781835, + "grad_norm": 1.222189193306495, + "learning_rate": 1.0541993819478622e-05, + "loss": 0.23392565548419952, + "step": 3832 + }, + { + "epoch": 1.017793121763378, + "grad_norm": 1.239236731837986, + "learning_rate": 1.053760945580078e-05, + "loss": 0.21601927280426025, + "step": 3833 + }, + { + "epoch": 1.018058690744921, + "grad_norm": 1.1697918037357793, + "learning_rate": 1.0533224988478176e-05, + "loss": 0.24622616171836853, + "step": 3834 + }, + { + "epoch": 1.018324259726464, + "grad_norm": 1.186224891573799, + "learning_rate": 1.0528840418356086e-05, + "loss": 0.2774650752544403, + "step": 3835 + }, + { + "epoch": 1.0185898287080069, + "grad_norm": 1.1218094293898884, + "learning_rate": 1.0524455746279795e-05, + "loss": 0.22323890030384064, + "step": 3836 + }, + { + "epoch": 1.0188553976895498, + "grad_norm": 1.0569207532138136, + "learning_rate": 1.0520070973094622e-05, + "loss": 0.21901552379131317, + "step": 3837 + }, + { + "epoch": 1.0191209666710928, + "grad_norm": 1.1936231752235407, + "learning_rate": 1.0515686099645901e-05, + "loss": 0.3037784695625305, + "step": 3838 + }, + { + "epoch": 1.0193865356526357, + "grad_norm": 1.0847362828180318, + "learning_rate": 1.0511301126778984e-05, + "loss": 0.22658365964889526, + "step": 3839 + }, + { + "epoch": 1.0196521046341787, + "grad_norm": 1.09040618490447, + "learning_rate": 1.0506916055339237e-05, + "loss": 0.23144160211086273, + "step": 3840 + }, + { + "epoch": 1.0199176736157216, + "grad_norm": 1.28339134317777, + "learning_rate": 1.0502530886172055e-05, + "loss": 0.25658899545669556, + "step": 3841 + }, + { + "epoch": 1.0201832425972646, + "grad_norm": 0.9689646092731519, + "learning_rate": 1.0498145620122845e-05, + "loss": 0.19658756256103516, + "step": 3842 + }, + { + "epoch": 1.0204488115788075, + "grad_norm": 1.0949311372526576, + "learning_rate": 1.049376025803703e-05, + "loss": 0.19045208394527435, + "step": 3843 + }, + { + "epoch": 1.0207143805603505, + "grad_norm": 1.1626763108379607, + "learning_rate": 1.0489374800760066e-05, + "loss": 0.2577810287475586, + "step": 3844 + }, + { + "epoch": 1.0209799495418934, + "grad_norm": 1.1521055149329589, + "learning_rate": 1.048498924913741e-05, + "loss": 0.2807403802871704, + "step": 3845 + }, + { + "epoch": 1.0212455185234364, + "grad_norm": 1.2275557893789377, + "learning_rate": 1.0480603604014545e-05, + "loss": 0.2710269093513489, + "step": 3846 + }, + { + "epoch": 1.0215110875049793, + "grad_norm": 1.173604136076929, + "learning_rate": 1.0476217866236974e-05, + "loss": 0.2560620903968811, + "step": 3847 + }, + { + "epoch": 1.0217766564865223, + "grad_norm": 1.1571778426612858, + "learning_rate": 1.0471832036650217e-05, + "loss": 0.2599894404411316, + "step": 3848 + }, + { + "epoch": 1.0220422254680652, + "grad_norm": 1.1339420848197217, + "learning_rate": 1.046744611609981e-05, + "loss": 0.2411944717168808, + "step": 3849 + }, + { + "epoch": 1.0223077944496084, + "grad_norm": 1.1528658942490468, + "learning_rate": 1.0463060105431303e-05, + "loss": 0.25216251611709595, + "step": 3850 + }, + { + "epoch": 1.0225733634311513, + "grad_norm": 1.1884423925105638, + "learning_rate": 1.0458674005490263e-05, + "loss": 0.255629301071167, + "step": 3851 + }, + { + "epoch": 1.0228389324126943, + "grad_norm": 1.0777718220336832, + "learning_rate": 1.0454287817122291e-05, + "loss": 0.24032849073410034, + "step": 3852 + }, + { + "epoch": 1.0231045013942373, + "grad_norm": 1.1154013609024198, + "learning_rate": 1.0449901541172983e-05, + "loss": 0.23188306391239166, + "step": 3853 + }, + { + "epoch": 1.0233700703757802, + "grad_norm": 1.149374478972437, + "learning_rate": 1.0445515178487965e-05, + "loss": 0.2718146741390228, + "step": 3854 + }, + { + "epoch": 1.0236356393573232, + "grad_norm": 1.460691184866812, + "learning_rate": 1.0441128729912876e-05, + "loss": 0.30279839038848877, + "step": 3855 + }, + { + "epoch": 1.023901208338866, + "grad_norm": 1.0711762201816422, + "learning_rate": 1.0436742196293368e-05, + "loss": 0.2185024917125702, + "step": 3856 + }, + { + "epoch": 1.024166777320409, + "grad_norm": 1.2737960148140446, + "learning_rate": 1.0432355578475118e-05, + "loss": 0.2956481873989105, + "step": 3857 + }, + { + "epoch": 1.024432346301952, + "grad_norm": 1.1913794327080105, + "learning_rate": 1.0427968877303809e-05, + "loss": 0.28460678458213806, + "step": 3858 + }, + { + "epoch": 1.024697915283495, + "grad_norm": 1.1716718579119476, + "learning_rate": 1.0423582093625146e-05, + "loss": 0.24597057700157166, + "step": 3859 + }, + { + "epoch": 1.024963484265038, + "grad_norm": 0.987642591779768, + "learning_rate": 1.0419195228284856e-05, + "loss": 0.23986583948135376, + "step": 3860 + }, + { + "epoch": 1.0252290532465809, + "grad_norm": 1.0867576400643644, + "learning_rate": 1.0414808282128668e-05, + "loss": 0.2489446997642517, + "step": 3861 + }, + { + "epoch": 1.0254946222281238, + "grad_norm": 1.1200031637603385, + "learning_rate": 1.0410421256002334e-05, + "loss": 0.26777884364128113, + "step": 3862 + }, + { + "epoch": 1.0257601912096668, + "grad_norm": 1.1645962699086565, + "learning_rate": 1.0406034150751625e-05, + "loss": 0.23506489396095276, + "step": 3863 + }, + { + "epoch": 1.0260257601912097, + "grad_norm": 1.1861093965134106, + "learning_rate": 1.040164696722232e-05, + "loss": 0.2526484429836273, + "step": 3864 + }, + { + "epoch": 1.0262913291727527, + "grad_norm": 1.1320109702434422, + "learning_rate": 1.0397259706260216e-05, + "loss": 0.2179267853498459, + "step": 3865 + }, + { + "epoch": 1.0265568981542956, + "grad_norm": 1.0267487594121727, + "learning_rate": 1.0392872368711126e-05, + "loss": 0.2431088387966156, + "step": 3866 + }, + { + "epoch": 1.0268224671358386, + "grad_norm": 1.1394336459602463, + "learning_rate": 1.0388484955420877e-05, + "loss": 0.26101407408714294, + "step": 3867 + }, + { + "epoch": 1.0270880361173815, + "grad_norm": 1.0741553283028158, + "learning_rate": 1.0384097467235308e-05, + "loss": 0.23780573904514313, + "step": 3868 + }, + { + "epoch": 1.0273536050989245, + "grad_norm": 1.467981467949694, + "learning_rate": 1.0379709905000278e-05, + "loss": 0.2469894289970398, + "step": 3869 + }, + { + "epoch": 1.0276191740804674, + "grad_norm": 1.074989572738127, + "learning_rate": 1.0375322269561658e-05, + "loss": 0.21271926164627075, + "step": 3870 + }, + { + "epoch": 1.0278847430620104, + "grad_norm": 1.1192343716648714, + "learning_rate": 1.0370934561765331e-05, + "loss": 0.22995726764202118, + "step": 3871 + }, + { + "epoch": 1.0281503120435533, + "grad_norm": 1.2051770162428763, + "learning_rate": 1.0366546782457196e-05, + "loss": 0.27448171377182007, + "step": 3872 + }, + { + "epoch": 1.0284158810250963, + "grad_norm": 1.232887313588547, + "learning_rate": 1.0362158932483165e-05, + "loss": 0.25459539890289307, + "step": 3873 + }, + { + "epoch": 1.0286814500066392, + "grad_norm": 1.1436601222318827, + "learning_rate": 1.0357771012689162e-05, + "loss": 0.23213380575180054, + "step": 3874 + }, + { + "epoch": 1.0289470189881822, + "grad_norm": 1.107979602389345, + "learning_rate": 1.0353383023921127e-05, + "loss": 0.2219776064157486, + "step": 3875 + }, + { + "epoch": 1.0292125879697251, + "grad_norm": 1.2445278934711803, + "learning_rate": 1.0348994967025012e-05, + "loss": 0.27059125900268555, + "step": 3876 + }, + { + "epoch": 1.029478156951268, + "grad_norm": 1.2314072238589235, + "learning_rate": 1.034460684284678e-05, + "loss": 0.26921501755714417, + "step": 3877 + }, + { + "epoch": 1.029743725932811, + "grad_norm": 1.153389282583655, + "learning_rate": 1.0340218652232419e-05, + "loss": 0.24727991223335266, + "step": 3878 + }, + { + "epoch": 1.030009294914354, + "grad_norm": 1.2105369925319034, + "learning_rate": 1.0335830396027912e-05, + "loss": 0.26276054978370667, + "step": 3879 + }, + { + "epoch": 1.030274863895897, + "grad_norm": 1.1222835146983237, + "learning_rate": 1.0331442075079268e-05, + "loss": 0.25906458497047424, + "step": 3880 + }, + { + "epoch": 1.03054043287744, + "grad_norm": 1.1936099182612667, + "learning_rate": 1.0327053690232498e-05, + "loss": 0.2708794176578522, + "step": 3881 + }, + { + "epoch": 1.0308060018589829, + "grad_norm": 1.1283814494585969, + "learning_rate": 1.0322665242333634e-05, + "loss": 0.24968653917312622, + "step": 3882 + }, + { + "epoch": 1.0310715708405258, + "grad_norm": 1.1912763351930955, + "learning_rate": 1.0318276732228716e-05, + "loss": 0.2669135332107544, + "step": 3883 + }, + { + "epoch": 1.0313371398220688, + "grad_norm": 1.0733368423352447, + "learning_rate": 1.0313888160763799e-05, + "loss": 0.24173730611801147, + "step": 3884 + }, + { + "epoch": 1.0316027088036117, + "grad_norm": 1.4084549111395024, + "learning_rate": 1.0309499528784948e-05, + "loss": 0.27513059973716736, + "step": 3885 + }, + { + "epoch": 1.0318682777851547, + "grad_norm": 1.163470416419209, + "learning_rate": 1.0305110837138235e-05, + "loss": 0.2512688934803009, + "step": 3886 + }, + { + "epoch": 1.0321338467666976, + "grad_norm": 1.100016135139411, + "learning_rate": 1.0300722086669753e-05, + "loss": 0.2584962844848633, + "step": 3887 + }, + { + "epoch": 1.0323994157482406, + "grad_norm": 1.1125458904355436, + "learning_rate": 1.0296333278225599e-05, + "loss": 0.23692303895950317, + "step": 3888 + }, + { + "epoch": 1.0326649847297835, + "grad_norm": 1.1981051682884363, + "learning_rate": 1.0291944412651884e-05, + "loss": 0.2570871114730835, + "step": 3889 + }, + { + "epoch": 1.0329305537113265, + "grad_norm": 1.1839354606788588, + "learning_rate": 1.028755549079473e-05, + "loss": 0.2896367609500885, + "step": 3890 + }, + { + "epoch": 1.0331961226928694, + "grad_norm": 0.958593784491898, + "learning_rate": 1.0283166513500267e-05, + "loss": 0.19990365207195282, + "step": 3891 + }, + { + "epoch": 1.0334616916744124, + "grad_norm": 1.1157517117826752, + "learning_rate": 1.0278777481614639e-05, + "loss": 0.25235646963119507, + "step": 3892 + }, + { + "epoch": 1.0337272606559553, + "grad_norm": 1.1808927381569394, + "learning_rate": 1.0274388395984003e-05, + "loss": 0.23675012588500977, + "step": 3893 + }, + { + "epoch": 1.0339928296374983, + "grad_norm": 1.1370597202642294, + "learning_rate": 1.026999925745452e-05, + "loss": 0.250516414642334, + "step": 3894 + }, + { + "epoch": 1.0342583986190412, + "grad_norm": 1.0692414219621886, + "learning_rate": 1.0265610066872365e-05, + "loss": 0.24573490023612976, + "step": 3895 + }, + { + "epoch": 1.0345239676005842, + "grad_norm": 1.085358990363196, + "learning_rate": 1.026122082508372e-05, + "loss": 0.2473086714744568, + "step": 3896 + }, + { + "epoch": 1.0347895365821271, + "grad_norm": 1.162338198859519, + "learning_rate": 1.0256831532934783e-05, + "loss": 0.26546406745910645, + "step": 3897 + }, + { + "epoch": 1.03505510556367, + "grad_norm": 1.1034436628854154, + "learning_rate": 1.0252442191271754e-05, + "loss": 0.2565246522426605, + "step": 3898 + }, + { + "epoch": 1.035320674545213, + "grad_norm": 1.0272875416109402, + "learning_rate": 1.0248052800940846e-05, + "loss": 0.24923476576805115, + "step": 3899 + }, + { + "epoch": 1.035586243526756, + "grad_norm": 1.1519345059696067, + "learning_rate": 1.0243663362788286e-05, + "loss": 0.3079240322113037, + "step": 3900 + }, + { + "epoch": 1.035851812508299, + "grad_norm": 1.0586971174066726, + "learning_rate": 1.0239273877660302e-05, + "loss": 0.2482951581478119, + "step": 3901 + }, + { + "epoch": 1.036117381489842, + "grad_norm": 1.1495296797401515, + "learning_rate": 1.0234884346403138e-05, + "loss": 0.2626204192638397, + "step": 3902 + }, + { + "epoch": 1.0363829504713848, + "grad_norm": 1.0578834148114886, + "learning_rate": 1.023049476986304e-05, + "loss": 0.23181654512882233, + "step": 3903 + }, + { + "epoch": 1.0366485194529278, + "grad_norm": 1.2527800012652353, + "learning_rate": 1.0226105148886272e-05, + "loss": 0.29164040088653564, + "step": 3904 + }, + { + "epoch": 1.0369140884344707, + "grad_norm": 1.034136654365203, + "learning_rate": 1.0221715484319094e-05, + "loss": 0.22025801241397858, + "step": 3905 + }, + { + "epoch": 1.0371796574160137, + "grad_norm": 1.1162047929812215, + "learning_rate": 1.021732577700779e-05, + "loss": 0.2819385826587677, + "step": 3906 + }, + { + "epoch": 1.0374452263975567, + "grad_norm": 1.0524498644463125, + "learning_rate": 1.0212936027798637e-05, + "loss": 0.24709002673625946, + "step": 3907 + }, + { + "epoch": 1.0377107953790998, + "grad_norm": 0.9984579723832369, + "learning_rate": 1.0208546237537928e-05, + "loss": 0.22570034861564636, + "step": 3908 + }, + { + "epoch": 1.0379763643606428, + "grad_norm": 1.1543900299803864, + "learning_rate": 1.0204156407071964e-05, + "loss": 0.25642865896224976, + "step": 3909 + }, + { + "epoch": 1.0382419333421857, + "grad_norm": 1.1657404882715603, + "learning_rate": 1.0199766537247053e-05, + "loss": 0.25970256328582764, + "step": 3910 + }, + { + "epoch": 1.0385075023237287, + "grad_norm": 1.1347864223586095, + "learning_rate": 1.019537662890951e-05, + "loss": 0.2560003101825714, + "step": 3911 + }, + { + "epoch": 1.0387730713052716, + "grad_norm": 1.3160565196765366, + "learning_rate": 1.0190986682905656e-05, + "loss": 0.28138649463653564, + "step": 3912 + }, + { + "epoch": 1.0390386402868146, + "grad_norm": 1.4353879235637104, + "learning_rate": 1.0186596700081825e-05, + "loss": 0.23531222343444824, + "step": 3913 + }, + { + "epoch": 1.0393042092683575, + "grad_norm": 1.1850676655471586, + "learning_rate": 1.018220668128435e-05, + "loss": 0.24912862479686737, + "step": 3914 + }, + { + "epoch": 1.0395697782499005, + "grad_norm": 1.0811585337632708, + "learning_rate": 1.0177816627359575e-05, + "loss": 0.24188724160194397, + "step": 3915 + }, + { + "epoch": 1.0398353472314434, + "grad_norm": 1.2093489820950423, + "learning_rate": 1.0173426539153853e-05, + "loss": 0.2709474563598633, + "step": 3916 + }, + { + "epoch": 1.0401009162129864, + "grad_norm": 1.1793292324294091, + "learning_rate": 1.0169036417513538e-05, + "loss": 0.2400204837322235, + "step": 3917 + }, + { + "epoch": 1.0403664851945293, + "grad_norm": 1.0489256907825586, + "learning_rate": 1.0164646263284993e-05, + "loss": 0.2687132954597473, + "step": 3918 + }, + { + "epoch": 1.0406320541760723, + "grad_norm": 1.1628887826217675, + "learning_rate": 1.0160256077314592e-05, + "loss": 0.25139346718788147, + "step": 3919 + }, + { + "epoch": 1.0408976231576152, + "grad_norm": 1.1762633281473511, + "learning_rate": 1.0155865860448712e-05, + "loss": 0.25873464345932007, + "step": 3920 + }, + { + "epoch": 1.0411631921391582, + "grad_norm": 1.1207165962030725, + "learning_rate": 1.0151475613533732e-05, + "loss": 0.2510434687137604, + "step": 3921 + }, + { + "epoch": 1.0414287611207012, + "grad_norm": 1.2260247662339232, + "learning_rate": 1.0147085337416036e-05, + "loss": 0.24567106366157532, + "step": 3922 + }, + { + "epoch": 1.041694330102244, + "grad_norm": 1.1642096823951156, + "learning_rate": 1.0142695032942024e-05, + "loss": 0.25028282403945923, + "step": 3923 + }, + { + "epoch": 1.041959899083787, + "grad_norm": 1.140963361472911, + "learning_rate": 1.0138304700958096e-05, + "loss": 0.23542484641075134, + "step": 3924 + }, + { + "epoch": 1.04222546806533, + "grad_norm": 1.2475887570620718, + "learning_rate": 1.0133914342310649e-05, + "loss": 0.28974449634552, + "step": 3925 + }, + { + "epoch": 1.042491037046873, + "grad_norm": 1.0648736453755918, + "learning_rate": 1.0129523957846097e-05, + "loss": 0.23417247831821442, + "step": 3926 + }, + { + "epoch": 1.042756606028416, + "grad_norm": 1.1427047582178407, + "learning_rate": 1.0125133548410852e-05, + "loss": 0.23247018456459045, + "step": 3927 + }, + { + "epoch": 1.0430221750099589, + "grad_norm": 1.1496713132119072, + "learning_rate": 1.0120743114851337e-05, + "loss": 0.23860129714012146, + "step": 3928 + }, + { + "epoch": 1.0432877439915018, + "grad_norm": 1.1567405333157526, + "learning_rate": 1.0116352658013973e-05, + "loss": 0.2609105706214905, + "step": 3929 + }, + { + "epoch": 1.0435533129730448, + "grad_norm": 1.2453984448185509, + "learning_rate": 1.0111962178745187e-05, + "loss": 0.2559507489204407, + "step": 3930 + }, + { + "epoch": 1.0438188819545877, + "grad_norm": 1.2247288020965454, + "learning_rate": 1.0107571677891415e-05, + "loss": 0.2708527147769928, + "step": 3931 + }, + { + "epoch": 1.0440844509361307, + "grad_norm": 1.2373037230453465, + "learning_rate": 1.0103181156299091e-05, + "loss": 0.25884875655174255, + "step": 3932 + }, + { + "epoch": 1.0443500199176736, + "grad_norm": 1.3022673165052032, + "learning_rate": 1.0098790614814658e-05, + "loss": 0.2631877660751343, + "step": 3933 + }, + { + "epoch": 1.0446155888992166, + "grad_norm": 1.0267097797291302, + "learning_rate": 1.0094400054284559e-05, + "loss": 0.27179086208343506, + "step": 3934 + }, + { + "epoch": 1.0448811578807595, + "grad_norm": 2.1081344450494144, + "learning_rate": 1.0090009475555245e-05, + "loss": 0.21690386533737183, + "step": 3935 + }, + { + "epoch": 1.0451467268623025, + "grad_norm": 1.0188398651288513, + "learning_rate": 1.0085618879473162e-05, + "loss": 0.20192815363407135, + "step": 3936 + }, + { + "epoch": 1.0454122958438454, + "grad_norm": 1.213624997308106, + "learning_rate": 1.0081228266884773e-05, + "loss": 0.2680777907371521, + "step": 3937 + }, + { + "epoch": 1.0456778648253884, + "grad_norm": 1.1871222610891168, + "learning_rate": 1.007683763863653e-05, + "loss": 0.2566579580307007, + "step": 3938 + }, + { + "epoch": 1.0459434338069313, + "grad_norm": 1.1229802475790265, + "learning_rate": 1.0072446995574895e-05, + "loss": 0.2508152723312378, + "step": 3939 + }, + { + "epoch": 1.0462090027884743, + "grad_norm": 1.0850640213400236, + "learning_rate": 1.0068056338546335e-05, + "loss": 0.2880190908908844, + "step": 3940 + }, + { + "epoch": 1.0464745717700172, + "grad_norm": 1.1129549761108044, + "learning_rate": 1.0063665668397316e-05, + "loss": 0.2646787464618683, + "step": 3941 + }, + { + "epoch": 1.0467401407515602, + "grad_norm": 1.1116528447502043, + "learning_rate": 1.0059274985974305e-05, + "loss": 0.2327616810798645, + "step": 3942 + }, + { + "epoch": 1.0470057097331031, + "grad_norm": 1.1644185595792014, + "learning_rate": 1.0054884292123778e-05, + "loss": 0.24756258726119995, + "step": 3943 + }, + { + "epoch": 1.047271278714646, + "grad_norm": 1.1010853288322209, + "learning_rate": 1.0050493587692207e-05, + "loss": 0.23657771944999695, + "step": 3944 + }, + { + "epoch": 1.047536847696189, + "grad_norm": 1.1386107444709148, + "learning_rate": 1.0046102873526068e-05, + "loss": 0.2541351616382599, + "step": 3945 + }, + { + "epoch": 1.047802416677732, + "grad_norm": 1.0912263009271301, + "learning_rate": 1.0041712150471839e-05, + "loss": 0.2330317348241806, + "step": 3946 + }, + { + "epoch": 1.048067985659275, + "grad_norm": 1.0696190454357721, + "learning_rate": 1.0037321419375997e-05, + "loss": 0.23411181569099426, + "step": 3947 + }, + { + "epoch": 1.048333554640818, + "grad_norm": 1.1223872975815399, + "learning_rate": 1.0032930681085028e-05, + "loss": 0.2605017125606537, + "step": 3948 + }, + { + "epoch": 1.0485991236223609, + "grad_norm": 1.1766579775240698, + "learning_rate": 1.0028539936445407e-05, + "loss": 0.28651514649391174, + "step": 3949 + }, + { + "epoch": 1.0488646926039038, + "grad_norm": 1.1469362905517786, + "learning_rate": 1.0024149186303628e-05, + "loss": 0.22912876307964325, + "step": 3950 + }, + { + "epoch": 1.0491302615854468, + "grad_norm": 1.206814749340921, + "learning_rate": 1.001975843150617e-05, + "loss": 0.24032847583293915, + "step": 3951 + }, + { + "epoch": 1.0493958305669897, + "grad_norm": 1.0089656289438405, + "learning_rate": 1.0015367672899521e-05, + "loss": 0.17826229333877563, + "step": 3952 + }, + { + "epoch": 1.0496613995485327, + "grad_norm": 1.1440301784208975, + "learning_rate": 1.0010976911330163e-05, + "loss": 0.2619745433330536, + "step": 3953 + }, + { + "epoch": 1.0499269685300756, + "grad_norm": 1.1124743886634039, + "learning_rate": 1.0006586147644585e-05, + "loss": 0.24104374647140503, + "step": 3954 + }, + { + "epoch": 1.0501925375116186, + "grad_norm": 1.2465051058358483, + "learning_rate": 1.0002195382689277e-05, + "loss": 0.22913998365402222, + "step": 3955 + }, + { + "epoch": 1.0504581064931615, + "grad_norm": 1.2288244416278613, + "learning_rate": 9.997804617310724e-06, + "loss": 0.2625126838684082, + "step": 3956 + }, + { + "epoch": 1.0507236754747045, + "grad_norm": 1.1016811290492863, + "learning_rate": 9.993413852355416e-06, + "loss": 0.23098430037498474, + "step": 3957 + }, + { + "epoch": 1.0509892444562474, + "grad_norm": 1.2581954843436995, + "learning_rate": 9.98902308866984e-06, + "loss": 0.2866731882095337, + "step": 3958 + }, + { + "epoch": 1.0512548134377904, + "grad_norm": 1.2595027481112393, + "learning_rate": 9.984632327100482e-06, + "loss": 0.2520306706428528, + "step": 3959 + }, + { + "epoch": 1.0515203824193333, + "grad_norm": 1.2731218614589663, + "learning_rate": 9.980241568493834e-06, + "loss": 0.29688766598701477, + "step": 3960 + }, + { + "epoch": 1.0517859514008763, + "grad_norm": 1.2865298416208544, + "learning_rate": 9.975850813696375e-06, + "loss": 0.2876695990562439, + "step": 3961 + }, + { + "epoch": 1.0520515203824194, + "grad_norm": 1.1190033835182807, + "learning_rate": 9.971460063554595e-06, + "loss": 0.2402629554271698, + "step": 3962 + }, + { + "epoch": 1.0523170893639624, + "grad_norm": 1.288030170241207, + "learning_rate": 9.967069318914977e-06, + "loss": 0.32080164551734924, + "step": 3963 + }, + { + "epoch": 1.0525826583455054, + "grad_norm": 1.3484684025161604, + "learning_rate": 9.962678580624008e-06, + "loss": 0.2642936110496521, + "step": 3964 + }, + { + "epoch": 1.0528482273270483, + "grad_norm": 1.1668064537758471, + "learning_rate": 9.958287849528163e-06, + "loss": 0.255870521068573, + "step": 3965 + }, + { + "epoch": 1.0531137963085913, + "grad_norm": 1.1779058124731279, + "learning_rate": 9.953897126473933e-06, + "loss": 0.2695184350013733, + "step": 3966 + }, + { + "epoch": 1.0533793652901342, + "grad_norm": 1.1937956388734083, + "learning_rate": 9.949506412307795e-06, + "loss": 0.24576464295387268, + "step": 3967 + }, + { + "epoch": 1.0536449342716772, + "grad_norm": 1.210893055599799, + "learning_rate": 9.945115707876224e-06, + "loss": 0.26517459750175476, + "step": 3968 + }, + { + "epoch": 1.05391050325322, + "grad_norm": 1.261309936483727, + "learning_rate": 9.940725014025696e-06, + "loss": 0.30468082427978516, + "step": 3969 + }, + { + "epoch": 1.054176072234763, + "grad_norm": 1.1007633858966879, + "learning_rate": 9.936334331602687e-06, + "loss": 0.25299298763275146, + "step": 3970 + }, + { + "epoch": 1.054441641216306, + "grad_norm": 1.1621642625136148, + "learning_rate": 9.931943661453668e-06, + "loss": 0.2659488320350647, + "step": 3971 + }, + { + "epoch": 1.054707210197849, + "grad_norm": 1.129768041847351, + "learning_rate": 9.92755300442511e-06, + "loss": 0.25957295298576355, + "step": 3972 + }, + { + "epoch": 1.054972779179392, + "grad_norm": 1.0969185518732962, + "learning_rate": 9.923162361363476e-06, + "loss": 0.2416645884513855, + "step": 3973 + }, + { + "epoch": 1.0552383481609349, + "grad_norm": 1.1032067417924427, + "learning_rate": 9.91877173311523e-06, + "loss": 0.2627662122249603, + "step": 3974 + }, + { + "epoch": 1.0555039171424778, + "grad_norm": 1.1485553701369502, + "learning_rate": 9.91438112052684e-06, + "loss": 0.2876631021499634, + "step": 3975 + }, + { + "epoch": 1.0557694861240208, + "grad_norm": 1.1306607772682384, + "learning_rate": 9.90999052444476e-06, + "loss": 0.28336596488952637, + "step": 3976 + }, + { + "epoch": 1.0560350551055637, + "grad_norm": 1.266085815857313, + "learning_rate": 9.905599945715443e-06, + "loss": 0.2970484495162964, + "step": 3977 + }, + { + "epoch": 1.0563006240871067, + "grad_norm": 1.188464425479595, + "learning_rate": 9.901209385185345e-06, + "loss": 0.27202755212783813, + "step": 3978 + }, + { + "epoch": 1.0565661930686496, + "grad_norm": 1.0823738866829473, + "learning_rate": 9.896818843700912e-06, + "loss": 0.2702459990978241, + "step": 3979 + }, + { + "epoch": 1.0568317620501926, + "grad_norm": 1.2166105195755876, + "learning_rate": 9.89242832210859e-06, + "loss": 0.26057881116867065, + "step": 3980 + }, + { + "epoch": 1.0570973310317355, + "grad_norm": 1.1526398422075472, + "learning_rate": 9.888037821254816e-06, + "loss": 0.24006876349449158, + "step": 3981 + }, + { + "epoch": 1.0573629000132785, + "grad_norm": 1.0864441989704317, + "learning_rate": 9.883647341986032e-06, + "loss": 0.2437625676393509, + "step": 3982 + }, + { + "epoch": 1.0576284689948214, + "grad_norm": 1.0572722810626467, + "learning_rate": 9.879256885148666e-06, + "loss": 0.24256819486618042, + "step": 3983 + }, + { + "epoch": 1.0578940379763644, + "grad_norm": 1.2008491436753201, + "learning_rate": 9.874866451589151e-06, + "loss": 0.2714581787586212, + "step": 3984 + }, + { + "epoch": 1.0581596069579073, + "grad_norm": 1.1859043120388024, + "learning_rate": 9.870476042153907e-06, + "loss": 0.30309075117111206, + "step": 3985 + }, + { + "epoch": 1.0584251759394503, + "grad_norm": 1.3001941243887445, + "learning_rate": 9.866085657689355e-06, + "loss": 0.2938288450241089, + "step": 3986 + }, + { + "epoch": 1.0586907449209932, + "grad_norm": 1.1041962963159588, + "learning_rate": 9.86169529904191e-06, + "loss": 0.23748518526554108, + "step": 3987 + }, + { + "epoch": 1.0589563139025362, + "grad_norm": 1.2345572480055271, + "learning_rate": 9.857304967057977e-06, + "loss": 0.2883969247341156, + "step": 3988 + }, + { + "epoch": 1.0592218828840791, + "grad_norm": 1.0871048681541509, + "learning_rate": 9.852914662583966e-06, + "loss": 0.28301289677619934, + "step": 3989 + }, + { + "epoch": 1.059487451865622, + "grad_norm": 1.0733060702724175, + "learning_rate": 9.848524386466273e-06, + "loss": 0.22616548836231232, + "step": 3990 + }, + { + "epoch": 1.059753020847165, + "grad_norm": 1.06530549901144, + "learning_rate": 9.844134139551291e-06, + "loss": 0.2282804250717163, + "step": 3991 + }, + { + "epoch": 1.060018589828708, + "grad_norm": 1.154557745213229, + "learning_rate": 9.839743922685408e-06, + "loss": 0.2407834678888321, + "step": 3992 + }, + { + "epoch": 1.060284158810251, + "grad_norm": 1.0504099183304738, + "learning_rate": 9.835353736715007e-06, + "loss": 0.22690361738204956, + "step": 3993 + }, + { + "epoch": 1.060549727791794, + "grad_norm": 1.529267187296219, + "learning_rate": 9.830963582486465e-06, + "loss": 0.23291411995887756, + "step": 3994 + }, + { + "epoch": 1.0608152967733369, + "grad_norm": 1.0804914844168854, + "learning_rate": 9.82657346084615e-06, + "loss": 0.24524198472499847, + "step": 3995 + }, + { + "epoch": 1.0610808657548798, + "grad_norm": 1.130929241291739, + "learning_rate": 9.822183372640426e-06, + "loss": 0.22087743878364563, + "step": 3996 + }, + { + "epoch": 1.0613464347364228, + "grad_norm": 1.1374060021264791, + "learning_rate": 9.817793318715652e-06, + "loss": 0.2459079772233963, + "step": 3997 + }, + { + "epoch": 1.0616120037179657, + "grad_norm": 1.1393890830478974, + "learning_rate": 9.813403299918178e-06, + "loss": 0.24429920315742493, + "step": 3998 + }, + { + "epoch": 1.0618775726995087, + "grad_norm": 1.140499707599593, + "learning_rate": 9.809013317094345e-06, + "loss": 0.2332335114479065, + "step": 3999 + }, + { + "epoch": 1.0621431416810516, + "grad_norm": 1.2157908167694267, + "learning_rate": 9.804623371090493e-06, + "loss": 0.2861659526824951, + "step": 4000 + }, + { + "epoch": 1.0624087106625946, + "grad_norm": 1.1293440606459217, + "learning_rate": 9.800233462752949e-06, + "loss": 0.22731532156467438, + "step": 4001 + }, + { + "epoch": 1.0626742796441375, + "grad_norm": 1.127775309467411, + "learning_rate": 9.795843592928036e-06, + "loss": 0.245025634765625, + "step": 4002 + }, + { + "epoch": 1.0629398486256805, + "grad_norm": 1.2380242649872155, + "learning_rate": 9.791453762462075e-06, + "loss": 0.2826273441314697, + "step": 4003 + }, + { + "epoch": 1.0632054176072234, + "grad_norm": 1.1330484645300947, + "learning_rate": 9.787063972201368e-06, + "loss": 0.24737229943275452, + "step": 4004 + }, + { + "epoch": 1.0634709865887664, + "grad_norm": 1.3814870803010457, + "learning_rate": 9.782674222992214e-06, + "loss": 0.23368477821350098, + "step": 4005 + }, + { + "epoch": 1.0637365555703093, + "grad_norm": 1.2631953536046527, + "learning_rate": 9.778284515680908e-06, + "loss": 0.2754492461681366, + "step": 4006 + }, + { + "epoch": 1.0640021245518523, + "grad_norm": 1.1906091191722363, + "learning_rate": 9.773894851113732e-06, + "loss": 0.2814168334007263, + "step": 4007 + }, + { + "epoch": 1.0642676935333952, + "grad_norm": 1.1594492512554253, + "learning_rate": 9.769505230136962e-06, + "loss": 0.25388047099113464, + "step": 4008 + }, + { + "epoch": 1.0645332625149382, + "grad_norm": 1.2618382745485697, + "learning_rate": 9.765115653596867e-06, + "loss": 0.25435230135917664, + "step": 4009 + }, + { + "epoch": 1.0647988314964811, + "grad_norm": 1.2251032153283614, + "learning_rate": 9.760726122339698e-06, + "loss": 0.265840083360672, + "step": 4010 + }, + { + "epoch": 1.065064400478024, + "grad_norm": 1.1297656349054435, + "learning_rate": 9.756336637211716e-06, + "loss": 0.2533451020717621, + "step": 4011 + }, + { + "epoch": 1.065329969459567, + "grad_norm": 1.0890158421111886, + "learning_rate": 9.751947199059155e-06, + "loss": 0.25214290618896484, + "step": 4012 + }, + { + "epoch": 1.06559553844111, + "grad_norm": 1.0603532415232781, + "learning_rate": 9.74755780872825e-06, + "loss": 0.25039419531822205, + "step": 4013 + }, + { + "epoch": 1.065861107422653, + "grad_norm": 1.0177623632775965, + "learning_rate": 9.74316846706522e-06, + "loss": 0.21251091361045837, + "step": 4014 + }, + { + "epoch": 1.066126676404196, + "grad_norm": 1.123294230398497, + "learning_rate": 9.738779174916281e-06, + "loss": 0.25898969173431396, + "step": 4015 + }, + { + "epoch": 1.0663922453857388, + "grad_norm": 1.1054663361669936, + "learning_rate": 9.734389933127639e-06, + "loss": 0.2655499577522278, + "step": 4016 + }, + { + "epoch": 1.0666578143672818, + "grad_norm": 1.1153507141873742, + "learning_rate": 9.730000742545485e-06, + "loss": 0.2221338450908661, + "step": 4017 + }, + { + "epoch": 1.0669233833488247, + "grad_norm": 1.1746716643835395, + "learning_rate": 9.725611604016002e-06, + "loss": 0.2567589581012726, + "step": 4018 + }, + { + "epoch": 1.0671889523303677, + "grad_norm": 1.1090772377521565, + "learning_rate": 9.721222518385361e-06, + "loss": 0.24440976977348328, + "step": 4019 + }, + { + "epoch": 1.0674545213119107, + "grad_norm": 1.061787642846094, + "learning_rate": 9.716833486499735e-06, + "loss": 0.2229192852973938, + "step": 4020 + }, + { + "epoch": 1.0677200902934538, + "grad_norm": 1.1014121727705226, + "learning_rate": 9.712444509205273e-06, + "loss": 0.26231470704078674, + "step": 4021 + }, + { + "epoch": 1.0679856592749968, + "grad_norm": 1.2531191320236732, + "learning_rate": 9.708055587348119e-06, + "loss": 0.25099092721939087, + "step": 4022 + }, + { + "epoch": 1.0682512282565397, + "grad_norm": 1.1402160070516023, + "learning_rate": 9.703666721774403e-06, + "loss": 0.22979633510112762, + "step": 4023 + }, + { + "epoch": 1.0685167972380827, + "grad_norm": 1.09571485621585, + "learning_rate": 9.699277913330252e-06, + "loss": 0.2361093908548355, + "step": 4024 + }, + { + "epoch": 1.0687823662196256, + "grad_norm": 1.0765448804717204, + "learning_rate": 9.694889162861768e-06, + "loss": 0.2390863001346588, + "step": 4025 + }, + { + "epoch": 1.0690479352011686, + "grad_norm": 1.2569917808844517, + "learning_rate": 9.690500471215057e-06, + "loss": 0.24917885661125183, + "step": 4026 + }, + { + "epoch": 1.0693135041827115, + "grad_norm": 1.1387127210628816, + "learning_rate": 9.686111839236206e-06, + "loss": 0.24215272068977356, + "step": 4027 + }, + { + "epoch": 1.0695790731642545, + "grad_norm": 1.2809085503832063, + "learning_rate": 9.681723267771284e-06, + "loss": 0.27874231338500977, + "step": 4028 + }, + { + "epoch": 1.0698446421457974, + "grad_norm": 1.1707122559783085, + "learning_rate": 9.677334757666368e-06, + "loss": 0.24076086282730103, + "step": 4029 + }, + { + "epoch": 1.0701102111273404, + "grad_norm": 1.1092369229920938, + "learning_rate": 9.672946309767504e-06, + "loss": 0.2444242238998413, + "step": 4030 + }, + { + "epoch": 1.0703757801088833, + "grad_norm": 1.2086874522857378, + "learning_rate": 9.668557924920735e-06, + "loss": 0.2737279236316681, + "step": 4031 + }, + { + "epoch": 1.0706413490904263, + "grad_norm": 1.1006436240463247, + "learning_rate": 9.664169603972091e-06, + "loss": 0.24105575680732727, + "step": 4032 + }, + { + "epoch": 1.0709069180719692, + "grad_norm": 1.336482466569566, + "learning_rate": 9.659781347767584e-06, + "loss": 0.27791836857795715, + "step": 4033 + }, + { + "epoch": 1.0711724870535122, + "grad_norm": 1.1518461528529822, + "learning_rate": 9.655393157153221e-06, + "loss": 0.255472868680954, + "step": 4034 + }, + { + "epoch": 1.0714380560350552, + "grad_norm": 1.371220848551681, + "learning_rate": 9.651005032974994e-06, + "loss": 0.2523707151412964, + "step": 4035 + }, + { + "epoch": 1.071703625016598, + "grad_norm": 1.235756547113907, + "learning_rate": 9.64661697607888e-06, + "loss": 0.24584606289863586, + "step": 4036 + }, + { + "epoch": 1.071969193998141, + "grad_norm": 1.1497174260677319, + "learning_rate": 9.64222898731084e-06, + "loss": 0.25182732939720154, + "step": 4037 + }, + { + "epoch": 1.072234762979684, + "grad_norm": 1.0822892740683951, + "learning_rate": 9.637841067516837e-06, + "loss": 0.254008412361145, + "step": 4038 + }, + { + "epoch": 1.072500331961227, + "grad_norm": 1.080204167750926, + "learning_rate": 9.633453217542806e-06, + "loss": 0.2314324826002121, + "step": 4039 + }, + { + "epoch": 1.07276590094277, + "grad_norm": 1.1139945732367915, + "learning_rate": 9.62906543823467e-06, + "loss": 0.2256058305501938, + "step": 4040 + }, + { + "epoch": 1.0730314699243129, + "grad_norm": 1.283214941862177, + "learning_rate": 9.624677730438344e-06, + "loss": 0.2577894330024719, + "step": 4041 + }, + { + "epoch": 1.0732970389058558, + "grad_norm": 1.0911199623079508, + "learning_rate": 9.620290094999723e-06, + "loss": 0.23520560562610626, + "step": 4042 + }, + { + "epoch": 1.0735626078873988, + "grad_norm": 1.1791405346126818, + "learning_rate": 9.615902532764695e-06, + "loss": 0.2472849190235138, + "step": 4043 + }, + { + "epoch": 1.0738281768689417, + "grad_norm": 1.2195787110249676, + "learning_rate": 9.611515044579128e-06, + "loss": 0.25053414702415466, + "step": 4044 + }, + { + "epoch": 1.0740937458504847, + "grad_norm": 1.1090102650773974, + "learning_rate": 9.607127631288879e-06, + "loss": 0.24229007959365845, + "step": 4045 + }, + { + "epoch": 1.0743593148320276, + "grad_norm": 1.4628298980675831, + "learning_rate": 9.602740293739786e-06, + "loss": 0.2793073058128357, + "step": 4046 + }, + { + "epoch": 1.0746248838135706, + "grad_norm": 1.225079236387791, + "learning_rate": 9.598353032777682e-06, + "loss": 0.24547399580478668, + "step": 4047 + }, + { + "epoch": 1.0748904527951135, + "grad_norm": 1.1980997957436126, + "learning_rate": 9.593965849248378e-06, + "loss": 0.2776937186717987, + "step": 4048 + }, + { + "epoch": 1.0751560217766565, + "grad_norm": 1.0781858695117066, + "learning_rate": 9.589578743997668e-06, + "loss": 0.22677727043628693, + "step": 4049 + }, + { + "epoch": 1.0754215907581994, + "grad_norm": 1.4867723677136682, + "learning_rate": 9.585191717871336e-06, + "loss": 0.23254704475402832, + "step": 4050 + }, + { + "epoch": 1.0756871597397424, + "grad_norm": 1.3243435003953368, + "learning_rate": 9.580804771715148e-06, + "loss": 0.2899828255176544, + "step": 4051 + }, + { + "epoch": 1.0759527287212853, + "grad_norm": 1.1397018772236696, + "learning_rate": 9.576417906374856e-06, + "loss": 0.24632850289344788, + "step": 4052 + }, + { + "epoch": 1.0762182977028283, + "grad_norm": 1.2322214200527608, + "learning_rate": 9.572031122696196e-06, + "loss": 0.2661561369895935, + "step": 4053 + }, + { + "epoch": 1.0764838666843712, + "grad_norm": 1.1394013200357536, + "learning_rate": 9.567644421524889e-06, + "loss": 0.22364279627799988, + "step": 4054 + }, + { + "epoch": 1.0767494356659142, + "grad_norm": 1.5026366502842776, + "learning_rate": 9.563257803706635e-06, + "loss": 0.26748427748680115, + "step": 4055 + }, + { + "epoch": 1.0770150046474571, + "grad_norm": 1.1794922225625246, + "learning_rate": 9.55887127008713e-06, + "loss": 0.22851283848285675, + "step": 4056 + }, + { + "epoch": 1.077280573629, + "grad_norm": 1.1340260741391435, + "learning_rate": 9.554484821512037e-06, + "loss": 0.2456260323524475, + "step": 4057 + }, + { + "epoch": 1.077546142610543, + "grad_norm": 1.2884657617459025, + "learning_rate": 9.55009845882702e-06, + "loss": 0.2556169629096985, + "step": 4058 + }, + { + "epoch": 1.077811711592086, + "grad_norm": 1.274618544457263, + "learning_rate": 9.545712182877714e-06, + "loss": 0.280727744102478, + "step": 4059 + }, + { + "epoch": 1.078077280573629, + "grad_norm": 1.1205087247319334, + "learning_rate": 9.54132599450974e-06, + "loss": 0.25315386056900024, + "step": 4060 + }, + { + "epoch": 1.078342849555172, + "grad_norm": 1.1990539773915618, + "learning_rate": 9.536939894568704e-06, + "loss": 0.21985477209091187, + "step": 4061 + }, + { + "epoch": 1.0786084185367149, + "grad_norm": 1.1575613416248978, + "learning_rate": 9.532553883900196e-06, + "loss": 0.24329043924808502, + "step": 4062 + }, + { + "epoch": 1.0788739875182578, + "grad_norm": 1.173950465827748, + "learning_rate": 9.528167963349786e-06, + "loss": 0.2362256497144699, + "step": 4063 + }, + { + "epoch": 1.0791395564998008, + "grad_norm": 1.1458704347110154, + "learning_rate": 9.523782133763027e-06, + "loss": 0.23685476183891296, + "step": 4064 + }, + { + "epoch": 1.0794051254813437, + "grad_norm": 1.2383774104342302, + "learning_rate": 9.519396395985456e-06, + "loss": 0.26232481002807617, + "step": 4065 + }, + { + "epoch": 1.0796706944628867, + "grad_norm": 1.2768574792534622, + "learning_rate": 9.515010750862594e-06, + "loss": 0.25196313858032227, + "step": 4066 + }, + { + "epoch": 1.0799362634444296, + "grad_norm": 1.082792256362845, + "learning_rate": 9.510625199239939e-06, + "loss": 0.22520464658737183, + "step": 4067 + }, + { + "epoch": 1.0802018324259726, + "grad_norm": 1.190229461562689, + "learning_rate": 9.506239741962971e-06, + "loss": 0.27422505617141724, + "step": 4068 + }, + { + "epoch": 1.0804674014075155, + "grad_norm": 1.3120430811123187, + "learning_rate": 9.50185437987716e-06, + "loss": 0.2646682560443878, + "step": 4069 + }, + { + "epoch": 1.0807329703890585, + "grad_norm": 1.3425819541318131, + "learning_rate": 9.497469113827949e-06, + "loss": 0.2661365866661072, + "step": 4070 + }, + { + "epoch": 1.0809985393706014, + "grad_norm": 1.1101351469883673, + "learning_rate": 9.493083944660766e-06, + "loss": 0.23156839609146118, + "step": 4071 + }, + { + "epoch": 1.0812641083521444, + "grad_norm": 1.1805541153651362, + "learning_rate": 9.488698873221021e-06, + "loss": 0.25353243947029114, + "step": 4072 + }, + { + "epoch": 1.0815296773336873, + "grad_norm": 1.2862671823918606, + "learning_rate": 9.484313900354099e-06, + "loss": 0.27488404512405396, + "step": 4073 + }, + { + "epoch": 1.0817952463152305, + "grad_norm": 1.4041005997261422, + "learning_rate": 9.479929026905378e-06, + "loss": 0.2580753564834595, + "step": 4074 + }, + { + "epoch": 1.0820608152967734, + "grad_norm": 1.1405056260482733, + "learning_rate": 9.475544253720206e-06, + "loss": 0.2425471544265747, + "step": 4075 + }, + { + "epoch": 1.0823263842783164, + "grad_norm": 1.2040355319488043, + "learning_rate": 9.471159581643918e-06, + "loss": 0.25268295407295227, + "step": 4076 + }, + { + "epoch": 1.0825919532598594, + "grad_norm": 1.1573228524057126, + "learning_rate": 9.466775011521825e-06, + "loss": 0.2683602571487427, + "step": 4077 + }, + { + "epoch": 1.0828575222414023, + "grad_norm": 1.1300610618916742, + "learning_rate": 9.462390544199221e-06, + "loss": 0.24945034086704254, + "step": 4078 + }, + { + "epoch": 1.0831230912229453, + "grad_norm": 1.1698494765527112, + "learning_rate": 9.458006180521379e-06, + "loss": 0.21784156560897827, + "step": 4079 + }, + { + "epoch": 1.0833886602044882, + "grad_norm": 1.136268907040887, + "learning_rate": 9.453621921333554e-06, + "loss": 0.22704020142555237, + "step": 4080 + }, + { + "epoch": 1.0836542291860312, + "grad_norm": 1.1373990713388034, + "learning_rate": 9.449237767480979e-06, + "loss": 0.2532106637954712, + "step": 4081 + }, + { + "epoch": 1.0839197981675741, + "grad_norm": 1.1568862012297532, + "learning_rate": 9.444853719808864e-06, + "loss": 0.27809134125709534, + "step": 4082 + }, + { + "epoch": 1.084185367149117, + "grad_norm": 1.2102387789201872, + "learning_rate": 9.440469779162407e-06, + "loss": 0.25704264640808105, + "step": 4083 + }, + { + "epoch": 1.08445093613066, + "grad_norm": 1.1827141084910668, + "learning_rate": 9.436085946386778e-06, + "loss": 0.2656276226043701, + "step": 4084 + }, + { + "epoch": 1.084716505112203, + "grad_norm": 1.256991317445651, + "learning_rate": 9.431702222327126e-06, + "loss": 0.277826726436615, + "step": 4085 + }, + { + "epoch": 1.084982074093746, + "grad_norm": 1.2975495041461134, + "learning_rate": 9.427318607828584e-06, + "loss": 0.24656976759433746, + "step": 4086 + }, + { + "epoch": 1.0852476430752889, + "grad_norm": 1.1974770836803283, + "learning_rate": 9.42293510373626e-06, + "loss": 0.2498110830783844, + "step": 4087 + }, + { + "epoch": 1.0855132120568318, + "grad_norm": 1.1492935678310237, + "learning_rate": 9.418551710895243e-06, + "loss": 0.24574093520641327, + "step": 4088 + }, + { + "epoch": 1.0857787810383748, + "grad_norm": 1.2274895872775384, + "learning_rate": 9.414168430150601e-06, + "loss": 0.25271761417388916, + "step": 4089 + }, + { + "epoch": 1.0860443500199177, + "grad_norm": 1.1759358027679858, + "learning_rate": 9.409785262347373e-06, + "loss": 0.29269370436668396, + "step": 4090 + }, + { + "epoch": 1.0863099190014607, + "grad_norm": 1.1247973273146177, + "learning_rate": 9.405402208330581e-06, + "loss": 0.244449645280838, + "step": 4091 + }, + { + "epoch": 1.0865754879830036, + "grad_norm": 1.186787867713906, + "learning_rate": 9.401019268945237e-06, + "loss": 0.23785406351089478, + "step": 4092 + }, + { + "epoch": 1.0868410569645466, + "grad_norm": 1.1479686632621091, + "learning_rate": 9.39663644503631e-06, + "loss": 0.2493479996919632, + "step": 4093 + }, + { + "epoch": 1.0871066259460895, + "grad_norm": 1.1474347559215512, + "learning_rate": 9.392253737448764e-06, + "loss": 0.23758000135421753, + "step": 4094 + }, + { + "epoch": 1.0873721949276325, + "grad_norm": 1.0946885138749496, + "learning_rate": 9.387871147027528e-06, + "loss": 0.22560475766658783, + "step": 4095 + }, + { + "epoch": 1.0876377639091754, + "grad_norm": 1.1552533162715968, + "learning_rate": 9.383488674617515e-06, + "loss": 0.2558273673057556, + "step": 4096 + }, + { + "epoch": 1.0879033328907184, + "grad_norm": 1.2619180705972233, + "learning_rate": 9.379106321063618e-06, + "loss": 0.2822023034095764, + "step": 4097 + }, + { + "epoch": 1.0881689018722613, + "grad_norm": 1.2076346653444254, + "learning_rate": 9.374724087210698e-06, + "loss": 0.2596978545188904, + "step": 4098 + }, + { + "epoch": 1.0884344708538043, + "grad_norm": 1.6785014002913365, + "learning_rate": 9.370341973903597e-06, + "loss": 0.25353628396987915, + "step": 4099 + }, + { + "epoch": 1.0887000398353472, + "grad_norm": 1.2184499887942242, + "learning_rate": 9.365959981987135e-06, + "loss": 0.2547294497489929, + "step": 4100 + }, + { + "epoch": 1.0889656088168902, + "grad_norm": 1.40658558629773, + "learning_rate": 9.361578112306115e-06, + "loss": 0.2688470780849457, + "step": 4101 + }, + { + "epoch": 1.0892311777984331, + "grad_norm": 1.207208011814592, + "learning_rate": 9.357196365705303e-06, + "loss": 0.25772029161453247, + "step": 4102 + }, + { + "epoch": 1.089496746779976, + "grad_norm": 1.3552039168974384, + "learning_rate": 9.352814743029454e-06, + "loss": 0.2875550091266632, + "step": 4103 + }, + { + "epoch": 1.089762315761519, + "grad_norm": 1.4164869081453233, + "learning_rate": 9.34843324512329e-06, + "loss": 0.23085735738277435, + "step": 4104 + }, + { + "epoch": 1.090027884743062, + "grad_norm": 1.2013725541896922, + "learning_rate": 9.34405187283151e-06, + "loss": 0.2607901096343994, + "step": 4105 + }, + { + "epoch": 1.090293453724605, + "grad_norm": 1.1738523720935938, + "learning_rate": 9.339670626998791e-06, + "loss": 0.26165345311164856, + "step": 4106 + }, + { + "epoch": 1.090559022706148, + "grad_norm": 1.1931234826270498, + "learning_rate": 9.335289508469789e-06, + "loss": 0.27884238958358765, + "step": 4107 + }, + { + "epoch": 1.0908245916876909, + "grad_norm": 1.283025870689831, + "learning_rate": 9.33090851808913e-06, + "loss": 0.2689289152622223, + "step": 4108 + }, + { + "epoch": 1.0910901606692338, + "grad_norm": 1.2574326426613287, + "learning_rate": 9.326527656701414e-06, + "loss": 0.2633207440376282, + "step": 4109 + }, + { + "epoch": 1.0913557296507768, + "grad_norm": 1.1611202948336292, + "learning_rate": 9.322146925151226e-06, + "loss": 0.26001888513565063, + "step": 4110 + }, + { + "epoch": 1.0916212986323197, + "grad_norm": 1.1436383156785508, + "learning_rate": 9.31776632428312e-06, + "loss": 0.2739099860191345, + "step": 4111 + }, + { + "epoch": 1.0918868676138627, + "grad_norm": 1.1080458686771364, + "learning_rate": 9.313385854941616e-06, + "loss": 0.24885550141334534, + "step": 4112 + }, + { + "epoch": 1.0921524365954056, + "grad_norm": 1.1643870148920956, + "learning_rate": 9.309005517971222e-06, + "loss": 0.2609873414039612, + "step": 4113 + }, + { + "epoch": 1.0924180055769486, + "grad_norm": 1.427636157796487, + "learning_rate": 9.304625314216415e-06, + "loss": 0.28853538632392883, + "step": 4114 + }, + { + "epoch": 1.0926835745584915, + "grad_norm": 1.072833070391428, + "learning_rate": 9.300245244521647e-06, + "loss": 0.2629924714565277, + "step": 4115 + }, + { + "epoch": 1.0929491435400345, + "grad_norm": 1.1804644749067619, + "learning_rate": 9.295865309731342e-06, + "loss": 0.2687820494174957, + "step": 4116 + }, + { + "epoch": 1.0932147125215774, + "grad_norm": 1.0831905202820669, + "learning_rate": 9.2914855106899e-06, + "loss": 0.2293676733970642, + "step": 4117 + }, + { + "epoch": 1.0934802815031204, + "grad_norm": 1.1645005992728827, + "learning_rate": 9.287105848241694e-06, + "loss": 0.25261443853378296, + "step": 4118 + }, + { + "epoch": 1.0937458504846633, + "grad_norm": 1.1209341991417805, + "learning_rate": 9.282726323231077e-06, + "loss": 0.26238197088241577, + "step": 4119 + }, + { + "epoch": 1.0940114194662063, + "grad_norm": 1.1230838898563178, + "learning_rate": 9.278346936502364e-06, + "loss": 0.25718310475349426, + "step": 4120 + }, + { + "epoch": 1.0942769884477492, + "grad_norm": 1.1872711264618019, + "learning_rate": 9.273967688899849e-06, + "loss": 0.23810459673404694, + "step": 4121 + }, + { + "epoch": 1.0945425574292922, + "grad_norm": 1.0680734314830214, + "learning_rate": 9.269588581267804e-06, + "loss": 0.2197081446647644, + "step": 4122 + }, + { + "epoch": 1.0948081264108351, + "grad_norm": 1.1043223190124707, + "learning_rate": 9.265209614450463e-06, + "loss": 0.2429335117340088, + "step": 4123 + }, + { + "epoch": 1.095073695392378, + "grad_norm": 1.1380552272436657, + "learning_rate": 9.260830789292043e-06, + "loss": 0.23028087615966797, + "step": 4124 + }, + { + "epoch": 1.095339264373921, + "grad_norm": 1.2203393500716264, + "learning_rate": 9.25645210663673e-06, + "loss": 0.2783699035644531, + "step": 4125 + }, + { + "epoch": 1.095604833355464, + "grad_norm": 1.1686978964802806, + "learning_rate": 9.25207356732868e-06, + "loss": 0.25055867433547974, + "step": 4126 + }, + { + "epoch": 1.095870402337007, + "grad_norm": 1.2313132067115398, + "learning_rate": 9.247695172212026e-06, + "loss": 0.28629350662231445, + "step": 4127 + }, + { + "epoch": 1.09613597131855, + "grad_norm": 1.2403423880097748, + "learning_rate": 9.24331692213087e-06, + "loss": 0.2626604735851288, + "step": 4128 + }, + { + "epoch": 1.0964015403000928, + "grad_norm": 1.2478078302425437, + "learning_rate": 9.238938817929288e-06, + "loss": 0.237881600856781, + "step": 4129 + }, + { + "epoch": 1.0966671092816358, + "grad_norm": 1.144955023428898, + "learning_rate": 9.234560860451325e-06, + "loss": 0.2602109909057617, + "step": 4130 + }, + { + "epoch": 1.0969326782631788, + "grad_norm": 1.1775071297104545, + "learning_rate": 9.230183050541001e-06, + "loss": 0.2721475064754486, + "step": 4131 + }, + { + "epoch": 1.0971982472447217, + "grad_norm": 1.7664052681173497, + "learning_rate": 9.225805389042307e-06, + "loss": 0.25844910740852356, + "step": 4132 + }, + { + "epoch": 1.0974638162262647, + "grad_norm": 1.1612334633259545, + "learning_rate": 9.221427876799201e-06, + "loss": 0.26671040058135986, + "step": 4133 + }, + { + "epoch": 1.0977293852078078, + "grad_norm": 1.3116748641368057, + "learning_rate": 9.21705051465562e-06, + "loss": 0.2610115706920624, + "step": 4134 + }, + { + "epoch": 1.0979949541893508, + "grad_norm": 1.1348320206960383, + "learning_rate": 9.212673303455464e-06, + "loss": 0.2518802881240845, + "step": 4135 + }, + { + "epoch": 1.0982605231708937, + "grad_norm": 1.2313324732863455, + "learning_rate": 9.20829624404261e-06, + "loss": 0.28600364923477173, + "step": 4136 + }, + { + "epoch": 1.0985260921524367, + "grad_norm": 1.0787729379648288, + "learning_rate": 9.203919337260903e-06, + "loss": 0.2649504840373993, + "step": 4137 + }, + { + "epoch": 1.0987916611339796, + "grad_norm": 1.0717018301402161, + "learning_rate": 9.199542583954159e-06, + "loss": 0.22613298892974854, + "step": 4138 + }, + { + "epoch": 1.0990572301155226, + "grad_norm": 1.1049408193201318, + "learning_rate": 9.195165984966163e-06, + "loss": 0.22546961903572083, + "step": 4139 + }, + { + "epoch": 1.0993227990970655, + "grad_norm": 1.1132579479037434, + "learning_rate": 9.190789541140675e-06, + "loss": 0.20618169009685516, + "step": 4140 + }, + { + "epoch": 1.0995883680786085, + "grad_norm": 1.1910818165933836, + "learning_rate": 9.18641325332142e-06, + "loss": 0.2434382289648056, + "step": 4141 + }, + { + "epoch": 1.0998539370601514, + "grad_norm": 1.0160349259469954, + "learning_rate": 9.182037122352092e-06, + "loss": 0.19114840030670166, + "step": 4142 + }, + { + "epoch": 1.1001195060416944, + "grad_norm": 1.371175220167047, + "learning_rate": 9.17766114907636e-06, + "loss": 0.2793614864349365, + "step": 4143 + }, + { + "epoch": 1.1003850750232373, + "grad_norm": 1.3230746818872392, + "learning_rate": 9.173285334337863e-06, + "loss": 0.2908466160297394, + "step": 4144 + }, + { + "epoch": 1.1006506440047803, + "grad_norm": 1.1707475106499343, + "learning_rate": 9.168909678980199e-06, + "loss": 0.260933518409729, + "step": 4145 + }, + { + "epoch": 1.1009162129863233, + "grad_norm": 1.170079737982666, + "learning_rate": 9.16453418384695e-06, + "loss": 0.2819761037826538, + "step": 4146 + }, + { + "epoch": 1.1011817819678662, + "grad_norm": 1.251357168283767, + "learning_rate": 9.160158849781657e-06, + "loss": 0.25290411710739136, + "step": 4147 + }, + { + "epoch": 1.1014473509494092, + "grad_norm": 1.0782378998536035, + "learning_rate": 9.155783677627831e-06, + "loss": 0.21255841851234436, + "step": 4148 + }, + { + "epoch": 1.101712919930952, + "grad_norm": 0.9808101112826028, + "learning_rate": 9.151408668228958e-06, + "loss": 0.20631751418113708, + "step": 4149 + }, + { + "epoch": 1.101978488912495, + "grad_norm": 1.0273447794760797, + "learning_rate": 9.147033822428484e-06, + "loss": 0.20976273715496063, + "step": 4150 + }, + { + "epoch": 1.102244057894038, + "grad_norm": 1.0193138467531315, + "learning_rate": 9.142659141069828e-06, + "loss": 0.21464477479457855, + "step": 4151 + }, + { + "epoch": 1.102509626875581, + "grad_norm": 1.182770191723374, + "learning_rate": 9.13828462499638e-06, + "loss": 0.2262338101863861, + "step": 4152 + }, + { + "epoch": 1.102775195857124, + "grad_norm": 1.2057409707570275, + "learning_rate": 9.133910275051493e-06, + "loss": 0.26331469416618347, + "step": 4153 + }, + { + "epoch": 1.1030407648386669, + "grad_norm": 1.1729382721759571, + "learning_rate": 9.129536092078488e-06, + "loss": 0.26280921697616577, + "step": 4154 + }, + { + "epoch": 1.1033063338202098, + "grad_norm": 1.1474203361843618, + "learning_rate": 9.12516207692066e-06, + "loss": 0.2527182698249817, + "step": 4155 + }, + { + "epoch": 1.1035719028017528, + "grad_norm": 1.114868090084267, + "learning_rate": 9.120788230421267e-06, + "loss": 0.21416455507278442, + "step": 4156 + }, + { + "epoch": 1.1038374717832957, + "grad_norm": 1.149698502937602, + "learning_rate": 9.116414553423535e-06, + "loss": 0.25882014632225037, + "step": 4157 + }, + { + "epoch": 1.1041030407648387, + "grad_norm": 1.1615644224212993, + "learning_rate": 9.112041046770653e-06, + "loss": 0.20510248839855194, + "step": 4158 + }, + { + "epoch": 1.1043686097463816, + "grad_norm": 1.372282887646487, + "learning_rate": 9.107667711305786e-06, + "loss": 0.2348058819770813, + "step": 4159 + }, + { + "epoch": 1.1046341787279246, + "grad_norm": 1.2389958643414019, + "learning_rate": 9.10329454787206e-06, + "loss": 0.24561384320259094, + "step": 4160 + }, + { + "epoch": 1.1048997477094675, + "grad_norm": 1.133562757165387, + "learning_rate": 9.098921557312573e-06, + "loss": 0.23025226593017578, + "step": 4161 + }, + { + "epoch": 1.1051653166910105, + "grad_norm": 1.2483870007074676, + "learning_rate": 9.094548740470375e-06, + "loss": 0.2724589705467224, + "step": 4162 + }, + { + "epoch": 1.1054308856725534, + "grad_norm": 1.2319217483915181, + "learning_rate": 9.090176098188504e-06, + "loss": 0.25196704268455505, + "step": 4163 + }, + { + "epoch": 1.1056964546540964, + "grad_norm": 1.0723466269314343, + "learning_rate": 9.085803631309953e-06, + "loss": 0.22673696279525757, + "step": 4164 + }, + { + "epoch": 1.1059620236356393, + "grad_norm": 1.3129015386402236, + "learning_rate": 9.081431340677679e-06, + "loss": 0.23913519084453583, + "step": 4165 + }, + { + "epoch": 1.1062275926171823, + "grad_norm": 1.3859005835374885, + "learning_rate": 9.07705922713461e-06, + "loss": 0.2723861336708069, + "step": 4166 + }, + { + "epoch": 1.1064931615987252, + "grad_norm": 1.15651219284811, + "learning_rate": 9.072687291523636e-06, + "loss": 0.262167364358902, + "step": 4167 + }, + { + "epoch": 1.1067587305802682, + "grad_norm": 1.4186208937810438, + "learning_rate": 9.068315534687615e-06, + "loss": 0.2394658625125885, + "step": 4168 + }, + { + "epoch": 1.1070242995618111, + "grad_norm": 1.116555661084851, + "learning_rate": 9.063943957469373e-06, + "loss": 0.2547619938850403, + "step": 4169 + }, + { + "epoch": 1.107289868543354, + "grad_norm": 1.1242129377429575, + "learning_rate": 9.059572560711697e-06, + "loss": 0.24057570099830627, + "step": 4170 + }, + { + "epoch": 1.107555437524897, + "grad_norm": 1.057297781351654, + "learning_rate": 9.055201345257331e-06, + "loss": 0.21729445457458496, + "step": 4171 + }, + { + "epoch": 1.10782100650644, + "grad_norm": 1.2310508574302907, + "learning_rate": 9.05083031194901e-06, + "loss": 0.26590001583099365, + "step": 4172 + }, + { + "epoch": 1.108086575487983, + "grad_norm": 1.2932563576951384, + "learning_rate": 9.04645946162941e-06, + "loss": 0.26114848256111145, + "step": 4173 + }, + { + "epoch": 1.108352144469526, + "grad_norm": 1.1776684059902396, + "learning_rate": 9.04208879514118e-06, + "loss": 0.2255469262599945, + "step": 4174 + }, + { + "epoch": 1.1086177134510689, + "grad_norm": 1.1791871226781019, + "learning_rate": 9.037718313326932e-06, + "loss": 0.2597671151161194, + "step": 4175 + }, + { + "epoch": 1.1088832824326118, + "grad_norm": 1.1140795273935102, + "learning_rate": 9.033348017029247e-06, + "loss": 0.24820469319820404, + "step": 4176 + }, + { + "epoch": 1.1091488514141548, + "grad_norm": 1.2459789693741423, + "learning_rate": 9.028977907090661e-06, + "loss": 0.23886600136756897, + "step": 4177 + }, + { + "epoch": 1.1094144203956977, + "grad_norm": 1.091274384086243, + "learning_rate": 9.024607984353682e-06, + "loss": 0.24204152822494507, + "step": 4178 + }, + { + "epoch": 1.1096799893772407, + "grad_norm": 1.0934112812518066, + "learning_rate": 9.02023824966078e-06, + "loss": 0.23246638476848602, + "step": 4179 + }, + { + "epoch": 1.1099455583587836, + "grad_norm": 1.124332043141092, + "learning_rate": 9.015868703854386e-06, + "loss": 0.25057342648506165, + "step": 4180 + }, + { + "epoch": 1.1102111273403266, + "grad_norm": 1.117105393632997, + "learning_rate": 9.011499347776902e-06, + "loss": 0.2316257357597351, + "step": 4181 + }, + { + "epoch": 1.1104766963218695, + "grad_norm": 1.4294765240232425, + "learning_rate": 9.007130182270685e-06, + "loss": 0.24824783205986023, + "step": 4182 + }, + { + "epoch": 1.1107422653034125, + "grad_norm": 1.1667528236187257, + "learning_rate": 9.002761208178059e-06, + "loss": 0.25174480676651, + "step": 4183 + }, + { + "epoch": 1.1110078342849554, + "grad_norm": 1.0615254217045484, + "learning_rate": 8.998392426341313e-06, + "loss": 0.22364717721939087, + "step": 4184 + }, + { + "epoch": 1.1112734032664984, + "grad_norm": 1.0478203412338092, + "learning_rate": 8.994023837602694e-06, + "loss": 0.2205432504415512, + "step": 4185 + }, + { + "epoch": 1.1115389722480415, + "grad_norm": 1.4181125559874541, + "learning_rate": 8.989655442804413e-06, + "loss": 0.23303675651550293, + "step": 4186 + }, + { + "epoch": 1.1118045412295845, + "grad_norm": 1.2558407878646785, + "learning_rate": 8.985287242788646e-06, + "loss": 0.3003222644329071, + "step": 4187 + }, + { + "epoch": 1.1120701102111274, + "grad_norm": 1.146183553652687, + "learning_rate": 8.980919238397532e-06, + "loss": 0.2734413146972656, + "step": 4188 + }, + { + "epoch": 1.1123356791926704, + "grad_norm": 1.200748942223162, + "learning_rate": 8.976551430473166e-06, + "loss": 0.24086692929267883, + "step": 4189 + }, + { + "epoch": 1.1126012481742134, + "grad_norm": 1.2277073829430902, + "learning_rate": 8.972183819857618e-06, + "loss": 0.2531188130378723, + "step": 4190 + }, + { + "epoch": 1.1128668171557563, + "grad_norm": 1.1067327267341682, + "learning_rate": 8.96781640739291e-06, + "loss": 0.25059640407562256, + "step": 4191 + }, + { + "epoch": 1.1131323861372993, + "grad_norm": 1.1987793097859372, + "learning_rate": 8.963449193921023e-06, + "loss": 0.22427335381507874, + "step": 4192 + }, + { + "epoch": 1.1133979551188422, + "grad_norm": 1.1842662472837817, + "learning_rate": 8.959082180283906e-06, + "loss": 0.28835898637771606, + "step": 4193 + }, + { + "epoch": 1.1136635241003852, + "grad_norm": 1.1161865281550452, + "learning_rate": 8.954715367323468e-06, + "loss": 0.23919034004211426, + "step": 4194 + }, + { + "epoch": 1.1139290930819281, + "grad_norm": 1.186821665962327, + "learning_rate": 8.950348755881578e-06, + "loss": 0.24583986401557922, + "step": 4195 + }, + { + "epoch": 1.114194662063471, + "grad_norm": 1.2519292440490923, + "learning_rate": 8.94598234680007e-06, + "loss": 0.23869696259498596, + "step": 4196 + }, + { + "epoch": 1.114460231045014, + "grad_norm": 1.1662462204488522, + "learning_rate": 8.941616140920734e-06, + "loss": 0.2672434449195862, + "step": 4197 + }, + { + "epoch": 1.114725800026557, + "grad_norm": 1.2253961517889995, + "learning_rate": 8.937250139085322e-06, + "loss": 0.2660336494445801, + "step": 4198 + }, + { + "epoch": 1.1149913690081, + "grad_norm": 1.1608224464613695, + "learning_rate": 8.932884342135552e-06, + "loss": 0.26461780071258545, + "step": 4199 + }, + { + "epoch": 1.1152569379896429, + "grad_norm": 1.1632580978978435, + "learning_rate": 8.928518750913094e-06, + "loss": 0.22947481274604797, + "step": 4200 + }, + { + "epoch": 1.1155225069711858, + "grad_norm": 1.116659758904741, + "learning_rate": 8.924153366259584e-06, + "loss": 0.22715970873832703, + "step": 4201 + }, + { + "epoch": 1.1157880759527288, + "grad_norm": 1.3785482068816968, + "learning_rate": 8.919788189016618e-06, + "loss": 0.2994215190410614, + "step": 4202 + }, + { + "epoch": 1.1160536449342717, + "grad_norm": 1.158412598714371, + "learning_rate": 8.915423220025747e-06, + "loss": 0.2290656566619873, + "step": 4203 + }, + { + "epoch": 1.1163192139158147, + "grad_norm": 1.093685203516635, + "learning_rate": 8.911058460128489e-06, + "loss": 0.22284844517707825, + "step": 4204 + }, + { + "epoch": 1.1165847828973576, + "grad_norm": 1.0534371355750514, + "learning_rate": 8.906693910166316e-06, + "loss": 0.2095392495393753, + "step": 4205 + }, + { + "epoch": 1.1168503518789006, + "grad_norm": 1.197609739800315, + "learning_rate": 8.902329570980665e-06, + "loss": 0.25098133087158203, + "step": 4206 + }, + { + "epoch": 1.1171159208604435, + "grad_norm": 1.1630125842119448, + "learning_rate": 8.897965443412923e-06, + "loss": 0.24768148362636566, + "step": 4207 + }, + { + "epoch": 1.1173814898419865, + "grad_norm": 1.1213395777051767, + "learning_rate": 8.89360152830445e-06, + "loss": 0.22255480289459229, + "step": 4208 + }, + { + "epoch": 1.1176470588235294, + "grad_norm": 1.2306365389400118, + "learning_rate": 8.889237826496551e-06, + "loss": 0.23721200227737427, + "step": 4209 + }, + { + "epoch": 1.1179126278050724, + "grad_norm": 1.1422779685655824, + "learning_rate": 8.8848743388305e-06, + "loss": 0.25002530217170715, + "step": 4210 + }, + { + "epoch": 1.1181781967866153, + "grad_norm": 1.2862841308153614, + "learning_rate": 8.880511066147524e-06, + "loss": 0.27188029885292053, + "step": 4211 + }, + { + "epoch": 1.1184437657681583, + "grad_norm": 1.1517061730387759, + "learning_rate": 8.876148009288813e-06, + "loss": 0.23056066036224365, + "step": 4212 + }, + { + "epoch": 1.1187093347497012, + "grad_norm": 1.172676602980077, + "learning_rate": 8.87178516909551e-06, + "loss": 0.2336079478263855, + "step": 4213 + }, + { + "epoch": 1.1189749037312442, + "grad_norm": 1.1868473876345316, + "learning_rate": 8.86742254640872e-06, + "loss": 0.27449533343315125, + "step": 4214 + }, + { + "epoch": 1.1192404727127871, + "grad_norm": 1.1500112066365369, + "learning_rate": 8.863060142069508e-06, + "loss": 0.24714893102645874, + "step": 4215 + }, + { + "epoch": 1.11950604169433, + "grad_norm": 1.072070573678295, + "learning_rate": 8.858697956918886e-06, + "loss": 0.2155439257621765, + "step": 4216 + }, + { + "epoch": 1.119771610675873, + "grad_norm": 1.1798452175680678, + "learning_rate": 8.854335991797842e-06, + "loss": 0.23189155757427216, + "step": 4217 + }, + { + "epoch": 1.120037179657416, + "grad_norm": 1.0773206236657924, + "learning_rate": 8.849974247547307e-06, + "loss": 0.23413527011871338, + "step": 4218 + }, + { + "epoch": 1.120302748638959, + "grad_norm": 1.1991513784988423, + "learning_rate": 8.845612725008173e-06, + "loss": 0.2569039463996887, + "step": 4219 + }, + { + "epoch": 1.120568317620502, + "grad_norm": 1.1795807532964264, + "learning_rate": 8.84125142502129e-06, + "loss": 0.2699541449546814, + "step": 4220 + }, + { + "epoch": 1.1208338866020449, + "grad_norm": 1.1092727759218166, + "learning_rate": 8.836890348427468e-06, + "loss": 0.27172449231147766, + "step": 4221 + }, + { + "epoch": 1.1210994555835878, + "grad_norm": 1.2315684717645485, + "learning_rate": 8.83252949606747e-06, + "loss": 0.2839444875717163, + "step": 4222 + }, + { + "epoch": 1.1213650245651308, + "grad_norm": 1.1676850588618106, + "learning_rate": 8.828168868782013e-06, + "loss": 0.22782178223133087, + "step": 4223 + }, + { + "epoch": 1.1216305935466737, + "grad_norm": 1.132889704492098, + "learning_rate": 8.82380846741178e-06, + "loss": 0.2567726671695709, + "step": 4224 + }, + { + "epoch": 1.1218961625282167, + "grad_norm": 1.1872540675130212, + "learning_rate": 8.8194482927974e-06, + "loss": 0.25879523158073425, + "step": 4225 + }, + { + "epoch": 1.1221617315097596, + "grad_norm": 1.0193477801534692, + "learning_rate": 8.815088345779466e-06, + "loss": 0.22109058499336243, + "step": 4226 + }, + { + "epoch": 1.1224273004913026, + "grad_norm": 1.1414592493281657, + "learning_rate": 8.810728627198526e-06, + "loss": 0.23615925014019012, + "step": 4227 + }, + { + "epoch": 1.1226928694728455, + "grad_norm": 1.160290266155045, + "learning_rate": 8.806369137895081e-06, + "loss": 0.2751353085041046, + "step": 4228 + }, + { + "epoch": 1.1229584384543885, + "grad_norm": 1.2566953981709197, + "learning_rate": 8.802009878709587e-06, + "loss": 0.2361963391304016, + "step": 4229 + }, + { + "epoch": 1.1232240074359314, + "grad_norm": 1.186723455251228, + "learning_rate": 8.79765085048246e-06, + "loss": 0.22435930371284485, + "step": 4230 + }, + { + "epoch": 1.1234895764174744, + "grad_norm": 1.1759467333820823, + "learning_rate": 8.79329205405407e-06, + "loss": 0.2355855256319046, + "step": 4231 + }, + { + "epoch": 1.1237551453990173, + "grad_norm": 1.1450490838951077, + "learning_rate": 8.78893349026474e-06, + "loss": 0.24127572774887085, + "step": 4232 + }, + { + "epoch": 1.1240207143805603, + "grad_norm": 1.222656849347683, + "learning_rate": 8.784575159954748e-06, + "loss": 0.2677989602088928, + "step": 4233 + }, + { + "epoch": 1.1242862833621032, + "grad_norm": 1.109384474337522, + "learning_rate": 8.78021706396433e-06, + "loss": 0.2283135950565338, + "step": 4234 + }, + { + "epoch": 1.1245518523436462, + "grad_norm": 1.1669732456316693, + "learning_rate": 8.775859203133678e-06, + "loss": 0.2686103582382202, + "step": 4235 + }, + { + "epoch": 1.1248174213251891, + "grad_norm": 1.3869789172842044, + "learning_rate": 8.771501578302934e-06, + "loss": 0.2638726234436035, + "step": 4236 + }, + { + "epoch": 1.125082990306732, + "grad_norm": 1.0752600847920544, + "learning_rate": 8.767144190312196e-06, + "loss": 0.2517441511154175, + "step": 4237 + }, + { + "epoch": 1.125348559288275, + "grad_norm": 1.1903096570499558, + "learning_rate": 8.762787040001518e-06, + "loss": 0.2593642771244049, + "step": 4238 + }, + { + "epoch": 1.125614128269818, + "grad_norm": 1.123653942868709, + "learning_rate": 8.758430128210908e-06, + "loss": 0.23758336901664734, + "step": 4239 + }, + { + "epoch": 1.125879697251361, + "grad_norm": 1.182033088729647, + "learning_rate": 8.754073455780327e-06, + "loss": 0.2557980716228485, + "step": 4240 + }, + { + "epoch": 1.126145266232904, + "grad_norm": 1.1182311632466304, + "learning_rate": 8.74971702354969e-06, + "loss": 0.2484067678451538, + "step": 4241 + }, + { + "epoch": 1.1264108352144468, + "grad_norm": 1.121886097833982, + "learning_rate": 8.745360832358864e-06, + "loss": 0.23103098571300507, + "step": 4242 + }, + { + "epoch": 1.1266764041959898, + "grad_norm": 1.1856800379472048, + "learning_rate": 8.741004883047667e-06, + "loss": 0.2630731463432312, + "step": 4243 + }, + { + "epoch": 1.1269419731775328, + "grad_norm": 1.1814851216743405, + "learning_rate": 8.736649176455885e-06, + "loss": 0.2413114309310913, + "step": 4244 + }, + { + "epoch": 1.1272075421590757, + "grad_norm": 1.1465608986560651, + "learning_rate": 8.732293713423243e-06, + "loss": 0.22463169693946838, + "step": 4245 + }, + { + "epoch": 1.1274731111406187, + "grad_norm": 1.1943136125759177, + "learning_rate": 8.727938494789421e-06, + "loss": 0.23641429841518402, + "step": 4246 + }, + { + "epoch": 1.1277386801221616, + "grad_norm": 1.399290186521162, + "learning_rate": 8.723583521394054e-06, + "loss": 0.2547767162322998, + "step": 4247 + }, + { + "epoch": 1.1280042491037048, + "grad_norm": 1.1274578262359225, + "learning_rate": 8.719228794076733e-06, + "loss": 0.25753074884414673, + "step": 4248 + }, + { + "epoch": 1.1282698180852477, + "grad_norm": 1.2581544322188265, + "learning_rate": 8.714874313676992e-06, + "loss": 0.30602240562438965, + "step": 4249 + }, + { + "epoch": 1.1285353870667907, + "grad_norm": 1.3693509289176364, + "learning_rate": 8.710520081034328e-06, + "loss": 0.28336623311042786, + "step": 4250 + }, + { + "epoch": 1.1288009560483336, + "grad_norm": 1.179198933472593, + "learning_rate": 8.706166096988185e-06, + "loss": 0.24065867066383362, + "step": 4251 + }, + { + "epoch": 1.1290665250298766, + "grad_norm": 1.1350442144429624, + "learning_rate": 8.701812362377954e-06, + "loss": 0.25674968957901, + "step": 4252 + }, + { + "epoch": 1.1293320940114195, + "grad_norm": 1.0526431620404462, + "learning_rate": 8.697458878042992e-06, + "loss": 0.21502923965454102, + "step": 4253 + }, + { + "epoch": 1.1295976629929625, + "grad_norm": 1.199807552125115, + "learning_rate": 8.693105644822598e-06, + "loss": 0.26848286390304565, + "step": 4254 + }, + { + "epoch": 1.1298632319745054, + "grad_norm": 1.1632395937948599, + "learning_rate": 8.688752663556022e-06, + "loss": 0.24283824861049652, + "step": 4255 + }, + { + "epoch": 1.1301288009560484, + "grad_norm": 1.231861138079484, + "learning_rate": 8.684399935082468e-06, + "loss": 0.2511506974697113, + "step": 4256 + }, + { + "epoch": 1.1303943699375913, + "grad_norm": 1.1293067099587706, + "learning_rate": 8.68004746024109e-06, + "loss": 0.23932483792304993, + "step": 4257 + }, + { + "epoch": 1.1306599389191343, + "grad_norm": 1.229437521917496, + "learning_rate": 8.675695239870993e-06, + "loss": 0.30030694603919983, + "step": 4258 + }, + { + "epoch": 1.1309255079006773, + "grad_norm": 1.1154596754627621, + "learning_rate": 8.671343274811238e-06, + "loss": 0.24699059128761292, + "step": 4259 + }, + { + "epoch": 1.1311910768822202, + "grad_norm": 1.1288414782501015, + "learning_rate": 8.666991565900827e-06, + "loss": 0.26828041672706604, + "step": 4260 + }, + { + "epoch": 1.1314566458637632, + "grad_norm": 1.0765132569205758, + "learning_rate": 8.662640113978717e-06, + "loss": 0.2372082769870758, + "step": 4261 + }, + { + "epoch": 1.131722214845306, + "grad_norm": 1.2100447285144145, + "learning_rate": 8.658288919883824e-06, + "loss": 0.26367881894111633, + "step": 4262 + }, + { + "epoch": 1.131987783826849, + "grad_norm": 1.1035052537421275, + "learning_rate": 8.653937984455007e-06, + "loss": 0.2287222146987915, + "step": 4263 + }, + { + "epoch": 1.132253352808392, + "grad_norm": 1.1417963040520365, + "learning_rate": 8.649587308531067e-06, + "loss": 0.244521826505661, + "step": 4264 + }, + { + "epoch": 1.132518921789935, + "grad_norm": 1.2243689126496846, + "learning_rate": 8.64523689295077e-06, + "loss": 0.26912257075309753, + "step": 4265 + }, + { + "epoch": 1.132784490771478, + "grad_norm": 1.2384832947619873, + "learning_rate": 8.64088673855282e-06, + "loss": 0.23002780973911285, + "step": 4266 + }, + { + "epoch": 1.1330500597530209, + "grad_norm": 1.253742603342847, + "learning_rate": 8.636536846175878e-06, + "loss": 0.2561958432197571, + "step": 4267 + }, + { + "epoch": 1.1333156287345638, + "grad_norm": 1.2156026453092519, + "learning_rate": 8.63218721665855e-06, + "loss": 0.25553008913993835, + "step": 4268 + }, + { + "epoch": 1.1335811977161068, + "grad_norm": 1.1992385112791626, + "learning_rate": 8.627837850839398e-06, + "loss": 0.1992083340883255, + "step": 4269 + }, + { + "epoch": 1.1338467666976497, + "grad_norm": 1.3643398602160783, + "learning_rate": 8.62348874955692e-06, + "loss": 0.23075388371944427, + "step": 4270 + }, + { + "epoch": 1.1341123356791927, + "grad_norm": 1.1072751580070286, + "learning_rate": 8.619139913649582e-06, + "loss": 0.23691913485527039, + "step": 4271 + }, + { + "epoch": 1.1343779046607356, + "grad_norm": 1.2656689209279672, + "learning_rate": 8.61479134395578e-06, + "loss": 0.2536017894744873, + "step": 4272 + }, + { + "epoch": 1.1346434736422786, + "grad_norm": 1.2870409796681632, + "learning_rate": 8.61044304131387e-06, + "loss": 0.3014161288738251, + "step": 4273 + }, + { + "epoch": 1.1349090426238215, + "grad_norm": 1.1669055614665604, + "learning_rate": 8.606095006562156e-06, + "loss": 0.26333582401275635, + "step": 4274 + }, + { + "epoch": 1.1351746116053645, + "grad_norm": 1.2370251285176135, + "learning_rate": 8.601747240538883e-06, + "loss": 0.23796264827251434, + "step": 4275 + }, + { + "epoch": 1.1354401805869074, + "grad_norm": 1.1989417705813543, + "learning_rate": 8.597399744082251e-06, + "loss": 0.23737141489982605, + "step": 4276 + }, + { + "epoch": 1.1357057495684504, + "grad_norm": 1.1281376384049915, + "learning_rate": 8.593052518030407e-06, + "loss": 0.21073032915592194, + "step": 4277 + }, + { + "epoch": 1.1359713185499933, + "grad_norm": 1.2935455290015059, + "learning_rate": 8.588705563221444e-06, + "loss": 0.2597163915634155, + "step": 4278 + }, + { + "epoch": 1.1362368875315363, + "grad_norm": 1.137636804234172, + "learning_rate": 8.584358880493402e-06, + "loss": 0.24541154503822327, + "step": 4279 + }, + { + "epoch": 1.1365024565130792, + "grad_norm": 1.1331800338594176, + "learning_rate": 8.580012470684273e-06, + "loss": 0.19294027984142303, + "step": 4280 + }, + { + "epoch": 1.1367680254946222, + "grad_norm": 1.2387583554091215, + "learning_rate": 8.575666334631994e-06, + "loss": 0.26909738779067993, + "step": 4281 + }, + { + "epoch": 1.1370335944761651, + "grad_norm": 1.2850664046416893, + "learning_rate": 8.571320473174444e-06, + "loss": 0.2550502121448517, + "step": 4282 + }, + { + "epoch": 1.137299163457708, + "grad_norm": 1.138070930000495, + "learning_rate": 8.566974887149461e-06, + "loss": 0.2256634682416916, + "step": 4283 + }, + { + "epoch": 1.137564732439251, + "grad_norm": 1.3289753418379673, + "learning_rate": 8.562629577394817e-06, + "loss": 0.26154983043670654, + "step": 4284 + }, + { + "epoch": 1.137830301420794, + "grad_norm": 1.2426566834274124, + "learning_rate": 8.558284544748239e-06, + "loss": 0.24685145914554596, + "step": 4285 + }, + { + "epoch": 1.138095870402337, + "grad_norm": 1.177162412641928, + "learning_rate": 8.553939790047396e-06, + "loss": 0.2584421932697296, + "step": 4286 + }, + { + "epoch": 1.13836143938388, + "grad_norm": 1.2486541463378953, + "learning_rate": 8.549595314129907e-06, + "loss": 0.24582788348197937, + "step": 4287 + }, + { + "epoch": 1.1386270083654229, + "grad_norm": 1.1978925998644077, + "learning_rate": 8.545251117833334e-06, + "loss": 0.26023977994918823, + "step": 4288 + }, + { + "epoch": 1.1388925773469658, + "grad_norm": 1.2566090334130535, + "learning_rate": 8.54090720199519e-06, + "loss": 0.25575515627861023, + "step": 4289 + }, + { + "epoch": 1.1391581463285088, + "grad_norm": 1.2234599227483165, + "learning_rate": 8.53656356745293e-06, + "loss": 0.2784460783004761, + "step": 4290 + }, + { + "epoch": 1.1394237153100517, + "grad_norm": 1.11922615590049, + "learning_rate": 8.532220215043953e-06, + "loss": 0.24723297357559204, + "step": 4291 + }, + { + "epoch": 1.1396892842915947, + "grad_norm": 1.1960822646368614, + "learning_rate": 8.52787714560561e-06, + "loss": 0.24694418907165527, + "step": 4292 + }, + { + "epoch": 1.1399548532731376, + "grad_norm": 1.2073723964066632, + "learning_rate": 8.52353435997519e-06, + "loss": 0.19976040720939636, + "step": 4293 + }, + { + "epoch": 1.1402204222546806, + "grad_norm": 1.0875644999756633, + "learning_rate": 8.519191858989932e-06, + "loss": 0.21742458641529083, + "step": 4294 + }, + { + "epoch": 1.1404859912362235, + "grad_norm": 1.2040315384402727, + "learning_rate": 8.514849643487018e-06, + "loss": 0.26382917165756226, + "step": 4295 + }, + { + "epoch": 1.1407515602177665, + "grad_norm": 1.3073789721234685, + "learning_rate": 8.510507714303577e-06, + "loss": 0.30778488516807556, + "step": 4296 + }, + { + "epoch": 1.1410171291993096, + "grad_norm": 1.0727267660957265, + "learning_rate": 8.506166072276681e-06, + "loss": 0.20894449949264526, + "step": 4297 + }, + { + "epoch": 1.1412826981808526, + "grad_norm": 1.2119089915252295, + "learning_rate": 8.50182471824335e-06, + "loss": 0.2389567494392395, + "step": 4298 + }, + { + "epoch": 1.1415482671623955, + "grad_norm": 1.0286533711803312, + "learning_rate": 8.497483653040545e-06, + "loss": 0.20531126856803894, + "step": 4299 + }, + { + "epoch": 1.1418138361439385, + "grad_norm": 1.2153067733576255, + "learning_rate": 8.49314287750517e-06, + "loss": 0.2577363848686218, + "step": 4300 + }, + { + "epoch": 1.1420794051254815, + "grad_norm": 1.211343687077752, + "learning_rate": 8.488802392474076e-06, + "loss": 0.24225997924804688, + "step": 4301 + }, + { + "epoch": 1.1423449741070244, + "grad_norm": 1.2698570110354703, + "learning_rate": 8.484462198784058e-06, + "loss": 0.26494917273521423, + "step": 4302 + }, + { + "epoch": 1.1426105430885674, + "grad_norm": 1.2988704892129896, + "learning_rate": 8.480122297271855e-06, + "loss": 0.24903994798660278, + "step": 4303 + }, + { + "epoch": 1.1428761120701103, + "grad_norm": 1.1681075442122268, + "learning_rate": 8.475782688774147e-06, + "loss": 0.25291907787323, + "step": 4304 + }, + { + "epoch": 1.1431416810516533, + "grad_norm": 1.1301459507046017, + "learning_rate": 8.47144337412756e-06, + "loss": 0.22958475351333618, + "step": 4305 + }, + { + "epoch": 1.1434072500331962, + "grad_norm": 1.175766015682232, + "learning_rate": 8.46710435416866e-06, + "loss": 0.2305452972650528, + "step": 4306 + }, + { + "epoch": 1.1436728190147392, + "grad_norm": 1.2105790475425935, + "learning_rate": 8.462765629733965e-06, + "loss": 0.25028055906295776, + "step": 4307 + }, + { + "epoch": 1.1439383879962821, + "grad_norm": 1.2809924485725674, + "learning_rate": 8.458427201659926e-06, + "loss": 0.24873222410678864, + "step": 4308 + }, + { + "epoch": 1.144203956977825, + "grad_norm": 1.2345010944986379, + "learning_rate": 8.454089070782943e-06, + "loss": 0.23396535217761993, + "step": 4309 + }, + { + "epoch": 1.144469525959368, + "grad_norm": 1.1955062282547588, + "learning_rate": 8.449751237939354e-06, + "loss": 0.27120494842529297, + "step": 4310 + }, + { + "epoch": 1.144735094940911, + "grad_norm": 1.182924840045628, + "learning_rate": 8.445413703965441e-06, + "loss": 0.2734759449958801, + "step": 4311 + }, + { + "epoch": 1.145000663922454, + "grad_norm": 1.1584309667252248, + "learning_rate": 8.441076469697434e-06, + "loss": 0.25353512167930603, + "step": 4312 + }, + { + "epoch": 1.1452662329039969, + "grad_norm": 1.1913513856414861, + "learning_rate": 8.436739535971497e-06, + "loss": 0.23851020634174347, + "step": 4313 + }, + { + "epoch": 1.1455318018855398, + "grad_norm": 1.2006838398252668, + "learning_rate": 8.432402903623741e-06, + "loss": 0.26320093870162964, + "step": 4314 + }, + { + "epoch": 1.1457973708670828, + "grad_norm": 1.1065666799118796, + "learning_rate": 8.428066573490211e-06, + "loss": 0.23859955370426178, + "step": 4315 + }, + { + "epoch": 1.1460629398486257, + "grad_norm": 1.197716796975668, + "learning_rate": 8.423730546406911e-06, + "loss": 0.2636772096157074, + "step": 4316 + }, + { + "epoch": 1.1463285088301687, + "grad_norm": 1.2459962038175347, + "learning_rate": 8.419394823209773e-06, + "loss": 0.2656415104866028, + "step": 4317 + }, + { + "epoch": 1.1465940778117116, + "grad_norm": 1.2225993542972535, + "learning_rate": 8.41505940473467e-06, + "loss": 0.2872830033302307, + "step": 4318 + }, + { + "epoch": 1.1468596467932546, + "grad_norm": 1.4653362839323858, + "learning_rate": 8.410724291817422e-06, + "loss": 0.229783833026886, + "step": 4319 + }, + { + "epoch": 1.1471252157747975, + "grad_norm": 4.273944826146497, + "learning_rate": 8.406389485293786e-06, + "loss": 0.24418675899505615, + "step": 4320 + }, + { + "epoch": 1.1473907847563405, + "grad_norm": 1.2385236183806463, + "learning_rate": 8.402054985999464e-06, + "loss": 0.2535584270954132, + "step": 4321 + }, + { + "epoch": 1.1476563537378834, + "grad_norm": 1.2116145926695832, + "learning_rate": 8.397720794770093e-06, + "loss": 0.23207828402519226, + "step": 4322 + }, + { + "epoch": 1.1479219227194264, + "grad_norm": 1.8129143471218838, + "learning_rate": 8.393386912441257e-06, + "loss": 0.27990391850471497, + "step": 4323 + }, + { + "epoch": 1.1481874917009693, + "grad_norm": 1.059877272327032, + "learning_rate": 8.38905333984847e-06, + "loss": 0.2098318189382553, + "step": 4324 + }, + { + "epoch": 1.1484530606825123, + "grad_norm": 1.1462464609840002, + "learning_rate": 8.384720077827204e-06, + "loss": 0.25303804874420166, + "step": 4325 + }, + { + "epoch": 1.1487186296640552, + "grad_norm": 1.0794728099252306, + "learning_rate": 8.380387127212858e-06, + "loss": 0.23481838405132294, + "step": 4326 + }, + { + "epoch": 1.1489841986455982, + "grad_norm": 1.1782142095551065, + "learning_rate": 8.376054488840771e-06, + "loss": 0.24842356145381927, + "step": 4327 + }, + { + "epoch": 1.1492497676271411, + "grad_norm": 1.136832039914945, + "learning_rate": 8.37172216354623e-06, + "loss": 0.23927366733551025, + "step": 4328 + }, + { + "epoch": 1.149515336608684, + "grad_norm": 1.1577812724546028, + "learning_rate": 8.367390152164448e-06, + "loss": 0.23836453258991241, + "step": 4329 + }, + { + "epoch": 1.149780905590227, + "grad_norm": 1.2492179140984832, + "learning_rate": 8.36305845553059e-06, + "loss": 0.2562161982059479, + "step": 4330 + }, + { + "epoch": 1.15004647457177, + "grad_norm": 1.120151700121908, + "learning_rate": 8.358727074479755e-06, + "loss": 0.21255920827388763, + "step": 4331 + }, + { + "epoch": 1.150312043553313, + "grad_norm": 1.1011600870179878, + "learning_rate": 8.354396009846985e-06, + "loss": 0.24200043082237244, + "step": 4332 + }, + { + "epoch": 1.150577612534856, + "grad_norm": 1.1644551235897023, + "learning_rate": 8.35006526246725e-06, + "loss": 0.23582379519939423, + "step": 4333 + }, + { + "epoch": 1.1508431815163989, + "grad_norm": 1.093546349726341, + "learning_rate": 8.34573483317548e-06, + "loss": 0.21554499864578247, + "step": 4334 + }, + { + "epoch": 1.1511087504979418, + "grad_norm": 1.2460346716976907, + "learning_rate": 8.341404722806525e-06, + "loss": 0.2789759039878845, + "step": 4335 + }, + { + "epoch": 1.1513743194794848, + "grad_norm": 1.212813860768853, + "learning_rate": 8.337074932195175e-06, + "loss": 0.24677832424640656, + "step": 4336 + }, + { + "epoch": 1.1516398884610277, + "grad_norm": 1.2351497128261646, + "learning_rate": 8.332745462176166e-06, + "loss": 0.28122392296791077, + "step": 4337 + }, + { + "epoch": 1.1519054574425707, + "grad_norm": 1.2447069177647443, + "learning_rate": 8.328416313584169e-06, + "loss": 0.23219403624534607, + "step": 4338 + }, + { + "epoch": 1.1521710264241136, + "grad_norm": 1.1258797089625292, + "learning_rate": 8.324087487253792e-06, + "loss": 0.19928379356861115, + "step": 4339 + }, + { + "epoch": 1.1524365954056566, + "grad_norm": 1.2737910298174706, + "learning_rate": 8.31975898401958e-06, + "loss": 0.27730467915534973, + "step": 4340 + }, + { + "epoch": 1.1527021643871995, + "grad_norm": 1.3906235348842741, + "learning_rate": 8.315430804716022e-06, + "loss": 0.25462737679481506, + "step": 4341 + }, + { + "epoch": 1.1529677333687425, + "grad_norm": 1.1703737499238527, + "learning_rate": 8.311102950177533e-06, + "loss": 0.2363007366657257, + "step": 4342 + }, + { + "epoch": 1.1532333023502854, + "grad_norm": 1.2498285131266695, + "learning_rate": 8.306775421238482e-06, + "loss": 0.2648352384567261, + "step": 4343 + }, + { + "epoch": 1.1534988713318284, + "grad_norm": 1.394847110607811, + "learning_rate": 8.302448218733158e-06, + "loss": 0.25645309686660767, + "step": 4344 + }, + { + "epoch": 1.1537644403133713, + "grad_norm": 1.2178564426244172, + "learning_rate": 8.298121343495797e-06, + "loss": 0.22962522506713867, + "step": 4345 + }, + { + "epoch": 1.1540300092949143, + "grad_norm": 1.132403649349265, + "learning_rate": 8.293794796360569e-06, + "loss": 0.21269623935222626, + "step": 4346 + }, + { + "epoch": 1.1542955782764572, + "grad_norm": 1.1646919704485588, + "learning_rate": 8.289468578161581e-06, + "loss": 0.2518436014652252, + "step": 4347 + }, + { + "epoch": 1.1545611472580002, + "grad_norm": 1.193830808481187, + "learning_rate": 8.285142689732877e-06, + "loss": 0.2318439483642578, + "step": 4348 + }, + { + "epoch": 1.1548267162395431, + "grad_norm": 1.0953821300718658, + "learning_rate": 8.280817131908438e-06, + "loss": 0.2278512567281723, + "step": 4349 + }, + { + "epoch": 1.155092285221086, + "grad_norm": 1.3446091578493078, + "learning_rate": 8.27649190552218e-06, + "loss": 0.2521114945411682, + "step": 4350 + }, + { + "epoch": 1.155357854202629, + "grad_norm": 1.1722019112748296, + "learning_rate": 8.272167011407955e-06, + "loss": 0.2565760016441345, + "step": 4351 + }, + { + "epoch": 1.155623423184172, + "grad_norm": 1.3209067321897832, + "learning_rate": 8.267842450399552e-06, + "loss": 0.2603546679019928, + "step": 4352 + }, + { + "epoch": 1.155888992165715, + "grad_norm": 1.1697050726438265, + "learning_rate": 8.263518223330698e-06, + "loss": 0.2175855189561844, + "step": 4353 + }, + { + "epoch": 1.156154561147258, + "grad_norm": 1.1937135661774867, + "learning_rate": 8.25919433103505e-06, + "loss": 0.24521774053573608, + "step": 4354 + }, + { + "epoch": 1.1564201301288008, + "grad_norm": 1.3267445452853517, + "learning_rate": 8.254870774346203e-06, + "loss": 0.29673823714256287, + "step": 4355 + }, + { + "epoch": 1.1566856991103438, + "grad_norm": 1.260162624950344, + "learning_rate": 8.25054755409769e-06, + "loss": 0.26994144916534424, + "step": 4356 + }, + { + "epoch": 1.1569512680918868, + "grad_norm": 1.1578908727655277, + "learning_rate": 8.246224671122974e-06, + "loss": 0.2545935809612274, + "step": 4357 + }, + { + "epoch": 1.1572168370734297, + "grad_norm": 1.1469888258961152, + "learning_rate": 8.241902126255458e-06, + "loss": 0.23589034378528595, + "step": 4358 + }, + { + "epoch": 1.1574824060549727, + "grad_norm": 1.229284708155894, + "learning_rate": 8.237579920328478e-06, + "loss": 0.2617190480232239, + "step": 4359 + }, + { + "epoch": 1.1577479750365158, + "grad_norm": 1.2741716320060574, + "learning_rate": 8.233258054175302e-06, + "loss": 0.3092418313026428, + "step": 4360 + }, + { + "epoch": 1.1580135440180588, + "grad_norm": 1.1377305602079475, + "learning_rate": 8.228936528629138e-06, + "loss": 0.22873908281326294, + "step": 4361 + }, + { + "epoch": 1.1582791129996017, + "grad_norm": 1.0592847205754, + "learning_rate": 8.224615344523123e-06, + "loss": 0.22549089789390564, + "step": 4362 + }, + { + "epoch": 1.1585446819811447, + "grad_norm": 1.0288617285826194, + "learning_rate": 8.22029450269033e-06, + "loss": 0.19141459465026855, + "step": 4363 + }, + { + "epoch": 1.1588102509626876, + "grad_norm": 1.1679333849265336, + "learning_rate": 8.21597400396377e-06, + "loss": 0.24277547001838684, + "step": 4364 + }, + { + "epoch": 1.1590758199442306, + "grad_norm": 1.1463053400858605, + "learning_rate": 8.21165384917638e-06, + "loss": 0.2429513931274414, + "step": 4365 + }, + { + "epoch": 1.1593413889257735, + "grad_norm": 1.0775583631999657, + "learning_rate": 8.207334039161035e-06, + "loss": 0.24710172414779663, + "step": 4366 + }, + { + "epoch": 1.1596069579073165, + "grad_norm": 1.1226530732908067, + "learning_rate": 8.203014574750546e-06, + "loss": 0.2553783357143402, + "step": 4367 + }, + { + "epoch": 1.1598725268888594, + "grad_norm": 1.1664625510577165, + "learning_rate": 8.198695456777653e-06, + "loss": 0.2558436095714569, + "step": 4368 + }, + { + "epoch": 1.1601380958704024, + "grad_norm": 1.093371491828669, + "learning_rate": 8.19437668607503e-06, + "loss": 0.20780377089977264, + "step": 4369 + }, + { + "epoch": 1.1604036648519453, + "grad_norm": 1.0184271240235683, + "learning_rate": 8.190058263475288e-06, + "loss": 0.22397254407405853, + "step": 4370 + }, + { + "epoch": 1.1606692338334883, + "grad_norm": 1.1123966470918765, + "learning_rate": 8.185740189810967e-06, + "loss": 0.2763773798942566, + "step": 4371 + }, + { + "epoch": 1.1609348028150313, + "grad_norm": 1.234569017856286, + "learning_rate": 8.181422465914541e-06, + "loss": 0.2801940441131592, + "step": 4372 + }, + { + "epoch": 1.1612003717965742, + "grad_norm": 1.3078225086374202, + "learning_rate": 8.177105092618413e-06, + "loss": 0.20949441194534302, + "step": 4373 + }, + { + "epoch": 1.1614659407781172, + "grad_norm": 1.020800458401727, + "learning_rate": 8.172788070754927e-06, + "loss": 0.24503354728221893, + "step": 4374 + }, + { + "epoch": 1.16173150975966, + "grad_norm": 1.212252624187319, + "learning_rate": 8.16847140115635e-06, + "loss": 0.256147563457489, + "step": 4375 + }, + { + "epoch": 1.161997078741203, + "grad_norm": 1.079933692504349, + "learning_rate": 8.164155084654886e-06, + "loss": 0.2178848683834076, + "step": 4376 + }, + { + "epoch": 1.162262647722746, + "grad_norm": 1.0121292441974634, + "learning_rate": 8.159839122082668e-06, + "loss": 0.22624582052230835, + "step": 4377 + }, + { + "epoch": 1.162528216704289, + "grad_norm": 1.0294597777179986, + "learning_rate": 8.155523514271764e-06, + "loss": 0.2184191346168518, + "step": 4378 + }, + { + "epoch": 1.162793785685832, + "grad_norm": 1.2825595051682412, + "learning_rate": 8.151208262054175e-06, + "loss": 0.2623840868473053, + "step": 4379 + }, + { + "epoch": 1.1630593546673749, + "grad_norm": 1.2529929341607686, + "learning_rate": 8.14689336626183e-06, + "loss": 0.27181199193000793, + "step": 4380 + }, + { + "epoch": 1.1633249236489178, + "grad_norm": 1.282994089786083, + "learning_rate": 8.142578827726587e-06, + "loss": 0.2791554629802704, + "step": 4381 + }, + { + "epoch": 1.1635904926304608, + "grad_norm": 1.221608581014812, + "learning_rate": 8.13826464728024e-06, + "loss": 0.2466641068458557, + "step": 4382 + }, + { + "epoch": 1.1638560616120037, + "grad_norm": 0.9724735599541757, + "learning_rate": 8.133950825754511e-06, + "loss": 0.1951724737882614, + "step": 4383 + }, + { + "epoch": 1.1641216305935467, + "grad_norm": 1.2462068833977051, + "learning_rate": 8.129637363981056e-06, + "loss": 0.2520062029361725, + "step": 4384 + }, + { + "epoch": 1.1643871995750896, + "grad_norm": 1.230128345167748, + "learning_rate": 8.12532426279146e-06, + "loss": 0.24101334810256958, + "step": 4385 + }, + { + "epoch": 1.1646527685566326, + "grad_norm": 1.244671245504639, + "learning_rate": 8.121011523017235e-06, + "loss": 0.2741190791130066, + "step": 4386 + }, + { + "epoch": 1.1649183375381755, + "grad_norm": 1.1570746383559662, + "learning_rate": 8.116699145489822e-06, + "loss": 0.2575281858444214, + "step": 4387 + }, + { + "epoch": 1.1651839065197185, + "grad_norm": 1.157233381368316, + "learning_rate": 8.112387131040608e-06, + "loss": 0.2557298243045807, + "step": 4388 + }, + { + "epoch": 1.1654494755012614, + "grad_norm": 1.2560692108341776, + "learning_rate": 8.108075480500892e-06, + "loss": 0.27485036849975586, + "step": 4389 + }, + { + "epoch": 1.1657150444828044, + "grad_norm": 1.2517544472207511, + "learning_rate": 8.103764194701909e-06, + "loss": 0.26458340883255005, + "step": 4390 + }, + { + "epoch": 1.1659806134643473, + "grad_norm": 1.2310585386329624, + "learning_rate": 8.099453274474827e-06, + "loss": 0.2281840592622757, + "step": 4391 + }, + { + "epoch": 1.1662461824458903, + "grad_norm": 1.2367230880082285, + "learning_rate": 8.095142720650739e-06, + "loss": 0.24956555664539337, + "step": 4392 + }, + { + "epoch": 1.1665117514274332, + "grad_norm": 1.109202461245095, + "learning_rate": 8.090832534060671e-06, + "loss": 0.22619420289993286, + "step": 4393 + }, + { + "epoch": 1.1667773204089762, + "grad_norm": 1.2922206575995636, + "learning_rate": 8.086522715535571e-06, + "loss": 0.2780688405036926, + "step": 4394 + }, + { + "epoch": 1.1670428893905191, + "grad_norm": 1.2699378735794575, + "learning_rate": 8.082213265906323e-06, + "loss": 0.2600886821746826, + "step": 4395 + }, + { + "epoch": 1.167308458372062, + "grad_norm": 1.244234758234162, + "learning_rate": 8.077904186003736e-06, + "loss": 0.25049078464508057, + "step": 4396 + }, + { + "epoch": 1.167574027353605, + "grad_norm": 1.2327544821473595, + "learning_rate": 8.073595476658558e-06, + "loss": 0.27745798230171204, + "step": 4397 + }, + { + "epoch": 1.167839596335148, + "grad_norm": 1.1682547274263488, + "learning_rate": 8.069287138701452e-06, + "loss": 0.2191929668188095, + "step": 4398 + }, + { + "epoch": 1.168105165316691, + "grad_norm": 1.297306908163856, + "learning_rate": 8.064979172963014e-06, + "loss": 0.24307313561439514, + "step": 4399 + }, + { + "epoch": 1.168370734298234, + "grad_norm": 1.1837345133145987, + "learning_rate": 8.060671580273772e-06, + "loss": 0.23036238551139832, + "step": 4400 + }, + { + "epoch": 1.1686363032797769, + "grad_norm": 1.096627050675377, + "learning_rate": 8.056364361464176e-06, + "loss": 0.2394433617591858, + "step": 4401 + }, + { + "epoch": 1.1689018722613198, + "grad_norm": 1.183557399538609, + "learning_rate": 8.052057517364608e-06, + "loss": 0.24099211394786835, + "step": 4402 + }, + { + "epoch": 1.1691674412428628, + "grad_norm": 1.1293667282926971, + "learning_rate": 8.047751048805376e-06, + "loss": 0.22036173939704895, + "step": 4403 + }, + { + "epoch": 1.1694330102244057, + "grad_norm": 1.185484128157471, + "learning_rate": 8.043444956616717e-06, + "loss": 0.22400429844856262, + "step": 4404 + }, + { + "epoch": 1.1696985792059487, + "grad_norm": 1.0594769241160498, + "learning_rate": 8.039139241628792e-06, + "loss": 0.21649131178855896, + "step": 4405 + }, + { + "epoch": 1.1699641481874916, + "grad_norm": 1.150957898906185, + "learning_rate": 8.034833904671698e-06, + "loss": 0.23412205278873444, + "step": 4406 + }, + { + "epoch": 1.1702297171690346, + "grad_norm": 1.2025485392569255, + "learning_rate": 8.030528946575453e-06, + "loss": 0.23822304606437683, + "step": 4407 + }, + { + "epoch": 1.1704952861505775, + "grad_norm": 1.2929661052617345, + "learning_rate": 8.026224368169998e-06, + "loss": 0.29250186681747437, + "step": 4408 + }, + { + "epoch": 1.1707608551321207, + "grad_norm": 1.4098437716027425, + "learning_rate": 8.021920170285205e-06, + "loss": 0.26794207096099854, + "step": 4409 + }, + { + "epoch": 1.1710264241136636, + "grad_norm": 1.2469013694849018, + "learning_rate": 8.017616353750874e-06, + "loss": 0.2573787271976471, + "step": 4410 + }, + { + "epoch": 1.1712919930952066, + "grad_norm": 1.1835378975512396, + "learning_rate": 8.01331291939673e-06, + "loss": 0.2744356691837311, + "step": 4411 + }, + { + "epoch": 1.1715575620767495, + "grad_norm": 1.4542599881672131, + "learning_rate": 8.009009868052424e-06, + "loss": 0.2582886815071106, + "step": 4412 + }, + { + "epoch": 1.1718231310582925, + "grad_norm": 1.1766031171819216, + "learning_rate": 8.004707200547534e-06, + "loss": 0.2553568482398987, + "step": 4413 + }, + { + "epoch": 1.1720887000398355, + "grad_norm": 1.144579662849428, + "learning_rate": 8.00040491771156e-06, + "loss": 0.2670289874076843, + "step": 4414 + }, + { + "epoch": 1.1723542690213784, + "grad_norm": 1.1520006084984327, + "learning_rate": 7.99610302037394e-06, + "loss": 0.215460866689682, + "step": 4415 + }, + { + "epoch": 1.1726198380029214, + "grad_norm": 1.2764670908026035, + "learning_rate": 7.991801509364023e-06, + "loss": 0.26481571793556213, + "step": 4416 + }, + { + "epoch": 1.1728854069844643, + "grad_norm": 1.0239999030663398, + "learning_rate": 7.98750038551109e-06, + "loss": 0.2060776650905609, + "step": 4417 + }, + { + "epoch": 1.1731509759660073, + "grad_norm": 1.147707044406535, + "learning_rate": 7.983199649644349e-06, + "loss": 0.2401561588048935, + "step": 4418 + }, + { + "epoch": 1.1734165449475502, + "grad_norm": 1.3064882111410037, + "learning_rate": 7.978899302592927e-06, + "loss": 0.2545842230319977, + "step": 4419 + }, + { + "epoch": 1.1736821139290932, + "grad_norm": 1.199445262296627, + "learning_rate": 7.974599345185884e-06, + "loss": 0.29925093054771423, + "step": 4420 + }, + { + "epoch": 1.1739476829106361, + "grad_norm": 1.7583031900565322, + "learning_rate": 7.9702997782522e-06, + "loss": 0.23944757878780365, + "step": 4421 + }, + { + "epoch": 1.174213251892179, + "grad_norm": 1.057746400765015, + "learning_rate": 7.96600060262078e-06, + "loss": 0.23745761811733246, + "step": 4422 + }, + { + "epoch": 1.174478820873722, + "grad_norm": 1.1164780002442092, + "learning_rate": 7.961701819120453e-06, + "loss": 0.22170330584049225, + "step": 4423 + }, + { + "epoch": 1.174744389855265, + "grad_norm": 1.2607094160663312, + "learning_rate": 7.95740342857998e-06, + "loss": 0.2645890712738037, + "step": 4424 + }, + { + "epoch": 1.175009958836808, + "grad_norm": 1.2171129338535713, + "learning_rate": 7.953105431828032e-06, + "loss": 0.25232207775115967, + "step": 4425 + }, + { + "epoch": 1.1752755278183509, + "grad_norm": 1.20503293579659, + "learning_rate": 7.948807829693219e-06, + "loss": 0.2656644880771637, + "step": 4426 + }, + { + "epoch": 1.1755410967998938, + "grad_norm": 1.069230366230624, + "learning_rate": 7.944510623004063e-06, + "loss": 0.25290653109550476, + "step": 4427 + }, + { + "epoch": 1.1758066657814368, + "grad_norm": 1.1825821036814732, + "learning_rate": 7.940213812589018e-06, + "loss": 0.27464741468429565, + "step": 4428 + }, + { + "epoch": 1.1760722347629797, + "grad_norm": 1.4910942744639428, + "learning_rate": 7.935917399276455e-06, + "loss": 0.2562064528465271, + "step": 4429 + }, + { + "epoch": 1.1763378037445227, + "grad_norm": 1.2720371671465533, + "learning_rate": 7.931621383894676e-06, + "loss": 0.267793208360672, + "step": 4430 + }, + { + "epoch": 1.1766033727260656, + "grad_norm": 1.1490167098873316, + "learning_rate": 7.9273257672719e-06, + "loss": 0.23651085793972015, + "step": 4431 + }, + { + "epoch": 1.1768689417076086, + "grad_norm": 1.0804412076412697, + "learning_rate": 7.923030550236267e-06, + "loss": 0.23691008985042572, + "step": 4432 + }, + { + "epoch": 1.1771345106891515, + "grad_norm": 1.1540873295746452, + "learning_rate": 7.918735733615852e-06, + "loss": 0.24495704472064972, + "step": 4433 + }, + { + "epoch": 1.1774000796706945, + "grad_norm": 1.4423069413713672, + "learning_rate": 7.91444131823864e-06, + "loss": 0.25423017144203186, + "step": 4434 + }, + { + "epoch": 1.1776656486522374, + "grad_norm": 1.1113893983435537, + "learning_rate": 7.910147304932548e-06, + "loss": 0.22870390117168427, + "step": 4435 + }, + { + "epoch": 1.1779312176337804, + "grad_norm": 1.0473620824498977, + "learning_rate": 7.905853694525405e-06, + "loss": 0.23037508130073547, + "step": 4436 + }, + { + "epoch": 1.1781967866153233, + "grad_norm": 1.2886040363623328, + "learning_rate": 7.901560487844973e-06, + "loss": 0.31184864044189453, + "step": 4437 + }, + { + "epoch": 1.1784623555968663, + "grad_norm": 1.302197101799982, + "learning_rate": 7.89726768571893e-06, + "loss": 0.24140426516532898, + "step": 4438 + }, + { + "epoch": 1.1787279245784092, + "grad_norm": 1.2134032336682008, + "learning_rate": 7.892975288974877e-06, + "loss": 0.25602301955223083, + "step": 4439 + }, + { + "epoch": 1.1789934935599522, + "grad_norm": 1.1868063067331378, + "learning_rate": 7.888683298440339e-06, + "loss": 0.2717514932155609, + "step": 4440 + }, + { + "epoch": 1.1792590625414952, + "grad_norm": 1.1670818939848298, + "learning_rate": 7.884391714942757e-06, + "loss": 0.252475380897522, + "step": 4441 + }, + { + "epoch": 1.179524631523038, + "grad_norm": 1.161546405047816, + "learning_rate": 7.880100539309506e-06, + "loss": 0.24777942895889282, + "step": 4442 + }, + { + "epoch": 1.179790200504581, + "grad_norm": 1.194146333188245, + "learning_rate": 7.875809772367867e-06, + "loss": 0.25111010670661926, + "step": 4443 + }, + { + "epoch": 1.180055769486124, + "grad_norm": 1.163412583383914, + "learning_rate": 7.87151941494505e-06, + "loss": 0.26183217763900757, + "step": 4444 + }, + { + "epoch": 1.180321338467667, + "grad_norm": 1.2974065116766642, + "learning_rate": 7.867229467868189e-06, + "loss": 0.27538490295410156, + "step": 4445 + }, + { + "epoch": 1.18058690744921, + "grad_norm": 1.078206017492716, + "learning_rate": 7.862939931964333e-06, + "loss": 0.2192106693983078, + "step": 4446 + }, + { + "epoch": 1.1808524764307529, + "grad_norm": 1.2415747879020278, + "learning_rate": 7.858650808060453e-06, + "loss": 0.26506057381629944, + "step": 4447 + }, + { + "epoch": 1.1811180454122958, + "grad_norm": 1.103375758703505, + "learning_rate": 7.854362096983443e-06, + "loss": 0.2345719337463379, + "step": 4448 + }, + { + "epoch": 1.1813836143938388, + "grad_norm": 1.1651284585435833, + "learning_rate": 7.850073799560114e-06, + "loss": 0.21404311060905457, + "step": 4449 + }, + { + "epoch": 1.1816491833753817, + "grad_norm": 1.1572235550991925, + "learning_rate": 7.8457859166172e-06, + "loss": 0.24332138895988464, + "step": 4450 + }, + { + "epoch": 1.1819147523569247, + "grad_norm": 1.1687901862394692, + "learning_rate": 7.841498448981354e-06, + "loss": 0.25025150179862976, + "step": 4451 + }, + { + "epoch": 1.1821803213384676, + "grad_norm": 1.167419454587793, + "learning_rate": 7.837211397479152e-06, + "loss": 0.21918940544128418, + "step": 4452 + }, + { + "epoch": 1.1824458903200106, + "grad_norm": 1.1517463754639392, + "learning_rate": 7.832924762937083e-06, + "loss": 0.24976079165935516, + "step": 4453 + }, + { + "epoch": 1.1827114593015535, + "grad_norm": 1.1165052000707918, + "learning_rate": 7.828638546181565e-06, + "loss": 0.21146243810653687, + "step": 4454 + }, + { + "epoch": 1.1829770282830965, + "grad_norm": 1.1110608449393633, + "learning_rate": 7.824352748038924e-06, + "loss": 0.22921445965766907, + "step": 4455 + }, + { + "epoch": 1.1832425972646394, + "grad_norm": 1.1833669908026252, + "learning_rate": 7.820067369335413e-06, + "loss": 0.24401478469371796, + "step": 4456 + }, + { + "epoch": 1.1835081662461824, + "grad_norm": 1.2543977272663969, + "learning_rate": 7.815782410897209e-06, + "loss": 0.2717207074165344, + "step": 4457 + }, + { + "epoch": 1.1837737352277253, + "grad_norm": 1.0934075655453726, + "learning_rate": 7.81149787355039e-06, + "loss": 0.20752058923244476, + "step": 4458 + }, + { + "epoch": 1.1840393042092683, + "grad_norm": 1.3448722481333402, + "learning_rate": 7.807213758120965e-06, + "loss": 0.31095850467681885, + "step": 4459 + }, + { + "epoch": 1.1843048731908112, + "grad_norm": 1.1769654791590503, + "learning_rate": 7.802930065434874e-06, + "loss": 0.23761102557182312, + "step": 4460 + }, + { + "epoch": 1.1845704421723542, + "grad_norm": 1.3225327364557968, + "learning_rate": 7.798646796317952e-06, + "loss": 0.2509460151195526, + "step": 4461 + }, + { + "epoch": 1.1848360111538971, + "grad_norm": 1.472525937697874, + "learning_rate": 7.794363951595966e-06, + "loss": 0.25903213024139404, + "step": 4462 + }, + { + "epoch": 1.18510158013544, + "grad_norm": 1.1904413554334654, + "learning_rate": 7.790081532094596e-06, + "loss": 0.23304736614227295, + "step": 4463 + }, + { + "epoch": 1.185367149116983, + "grad_norm": 1.311875765456408, + "learning_rate": 7.785799538639445e-06, + "loss": 0.28707265853881836, + "step": 4464 + }, + { + "epoch": 1.185632718098526, + "grad_norm": 1.0202920254712324, + "learning_rate": 7.781517972056028e-06, + "loss": 0.20282745361328125, + "step": 4465 + }, + { + "epoch": 1.185898287080069, + "grad_norm": 1.2606153791729335, + "learning_rate": 7.777236833169782e-06, + "loss": 0.24056631326675415, + "step": 4466 + }, + { + "epoch": 1.186163856061612, + "grad_norm": 1.4946194524955894, + "learning_rate": 7.772956122806058e-06, + "loss": 0.2677255868911743, + "step": 4467 + }, + { + "epoch": 1.1864294250431549, + "grad_norm": 1.2681064192856966, + "learning_rate": 7.768675841790124e-06, + "loss": 0.22032876312732697, + "step": 4468 + }, + { + "epoch": 1.1866949940246978, + "grad_norm": 1.3138325978828467, + "learning_rate": 7.764395990947177e-06, + "loss": 0.2980336546897888, + "step": 4469 + }, + { + "epoch": 1.1869605630062408, + "grad_norm": 1.2624280680532078, + "learning_rate": 7.760116571102314e-06, + "loss": 0.2562638521194458, + "step": 4470 + }, + { + "epoch": 1.1872261319877837, + "grad_norm": 1.2207997545500016, + "learning_rate": 7.755837583080561e-06, + "loss": 0.262576699256897, + "step": 4471 + }, + { + "epoch": 1.1874917009693267, + "grad_norm": 1.2672893771429377, + "learning_rate": 7.751559027706858e-06, + "loss": 0.2654029130935669, + "step": 4472 + }, + { + "epoch": 1.1877572699508698, + "grad_norm": 1.2996444615622489, + "learning_rate": 7.747280905806051e-06, + "loss": 0.2946662902832031, + "step": 4473 + }, + { + "epoch": 1.1880228389324128, + "grad_norm": 1.193974235945654, + "learning_rate": 7.743003218202921e-06, + "loss": 0.25140905380249023, + "step": 4474 + }, + { + "epoch": 1.1882884079139557, + "grad_norm": 1.2240016583398612, + "learning_rate": 7.738725965722149e-06, + "loss": 0.2601654529571533, + "step": 4475 + }, + { + "epoch": 1.1885539768954987, + "grad_norm": 1.9675422662507516, + "learning_rate": 7.73444914918834e-06, + "loss": 0.2639954090118408, + "step": 4476 + }, + { + "epoch": 1.1888195458770416, + "grad_norm": 1.174151986382161, + "learning_rate": 7.730172769426014e-06, + "loss": 0.23391291499137878, + "step": 4477 + }, + { + "epoch": 1.1890851148585846, + "grad_norm": 2.254589386622623, + "learning_rate": 7.725896827259613e-06, + "loss": 0.2912144958972931, + "step": 4478 + }, + { + "epoch": 1.1893506838401275, + "grad_norm": 1.0905445077469016, + "learning_rate": 7.72162132351348e-06, + "loss": 0.23867549002170563, + "step": 4479 + }, + { + "epoch": 1.1896162528216705, + "grad_norm": 1.1124853975848743, + "learning_rate": 7.717346259011888e-06, + "loss": 0.22434742748737335, + "step": 4480 + }, + { + "epoch": 1.1898818218032134, + "grad_norm": 1.2440839352544732, + "learning_rate": 7.713071634579017e-06, + "loss": 0.2504398822784424, + "step": 4481 + }, + { + "epoch": 1.1901473907847564, + "grad_norm": 1.1759629506533034, + "learning_rate": 7.70879745103896e-06, + "loss": 0.24887195229530334, + "step": 4482 + }, + { + "epoch": 1.1904129597662994, + "grad_norm": 1.2603454999195398, + "learning_rate": 7.704523709215732e-06, + "loss": 0.2730141580104828, + "step": 4483 + }, + { + "epoch": 1.1906785287478423, + "grad_norm": 1.2285382464481551, + "learning_rate": 7.70025040993326e-06, + "loss": 0.22197315096855164, + "step": 4484 + }, + { + "epoch": 1.1909440977293853, + "grad_norm": 1.2004564929121084, + "learning_rate": 7.695977554015387e-06, + "loss": 0.2852731943130493, + "step": 4485 + }, + { + "epoch": 1.1912096667109282, + "grad_norm": 1.2815387200597224, + "learning_rate": 7.691705142285863e-06, + "loss": 0.2577238976955414, + "step": 4486 + }, + { + "epoch": 1.1914752356924712, + "grad_norm": 1.066499567502605, + "learning_rate": 7.68743317556837e-06, + "loss": 0.23510503768920898, + "step": 4487 + }, + { + "epoch": 1.191740804674014, + "grad_norm": 1.557745891642732, + "learning_rate": 7.683161654686486e-06, + "loss": 0.2553985118865967, + "step": 4488 + }, + { + "epoch": 1.192006373655557, + "grad_norm": 1.1965147913981737, + "learning_rate": 7.67889058046371e-06, + "loss": 0.2778642475605011, + "step": 4489 + }, + { + "epoch": 1.1922719426371, + "grad_norm": 1.1622951487110165, + "learning_rate": 7.674619953723455e-06, + "loss": 0.24740618467330933, + "step": 4490 + }, + { + "epoch": 1.192537511618643, + "grad_norm": 1.1598996003550786, + "learning_rate": 7.670349775289047e-06, + "loss": 0.2453901171684265, + "step": 4491 + }, + { + "epoch": 1.192803080600186, + "grad_norm": 1.1444233008842855, + "learning_rate": 7.666080045983726e-06, + "loss": 0.2336064875125885, + "step": 4492 + }, + { + "epoch": 1.1930686495817289, + "grad_norm": 1.18047841753512, + "learning_rate": 7.661810766630648e-06, + "loss": 0.2375800907611847, + "step": 4493 + }, + { + "epoch": 1.1933342185632718, + "grad_norm": 1.1241813274405275, + "learning_rate": 7.657541938052876e-06, + "loss": 0.21272733807563782, + "step": 4494 + }, + { + "epoch": 1.1935997875448148, + "grad_norm": 1.1531042348696576, + "learning_rate": 7.65327356107339e-06, + "loss": 0.26597708463668823, + "step": 4495 + }, + { + "epoch": 1.1938653565263577, + "grad_norm": 1.1715955143508257, + "learning_rate": 7.649005636515088e-06, + "loss": 0.267806738615036, + "step": 4496 + }, + { + "epoch": 1.1941309255079007, + "grad_norm": 1.1812545197713797, + "learning_rate": 7.64473816520077e-06, + "loss": 0.2260194569826126, + "step": 4497 + }, + { + "epoch": 1.1943964944894436, + "grad_norm": 1.298416110387325, + "learning_rate": 7.640471147953157e-06, + "loss": 0.24523532390594482, + "step": 4498 + }, + { + "epoch": 1.1946620634709866, + "grad_norm": 1.1020194586485352, + "learning_rate": 7.636204585594879e-06, + "loss": 0.23230910301208496, + "step": 4499 + }, + { + "epoch": 1.1949276324525295, + "grad_norm": 1.1141631171804318, + "learning_rate": 7.631938478948478e-06, + "loss": 0.23322705924510956, + "step": 4500 + }, + { + "epoch": 1.1951932014340725, + "grad_norm": 1.3011711597097497, + "learning_rate": 7.6276728288364086e-06, + "loss": 0.25614386796951294, + "step": 4501 + }, + { + "epoch": 1.1954587704156154, + "grad_norm": 1.2188058731839337, + "learning_rate": 7.62340763608104e-06, + "loss": 0.22921821475028992, + "step": 4502 + }, + { + "epoch": 1.1957243393971584, + "grad_norm": 1.1538976889459698, + "learning_rate": 7.619142901504649e-06, + "loss": 0.25528913736343384, + "step": 4503 + }, + { + "epoch": 1.1959899083787013, + "grad_norm": 1.1730292690453887, + "learning_rate": 7.614878625929425e-06, + "loss": 0.2528502643108368, + "step": 4504 + }, + { + "epoch": 1.1962554773602443, + "grad_norm": 1.2636827238002009, + "learning_rate": 7.610614810177474e-06, + "loss": 0.2519027590751648, + "step": 4505 + }, + { + "epoch": 1.1965210463417872, + "grad_norm": 1.3563109831905724, + "learning_rate": 7.606351455070808e-06, + "loss": 0.2895655333995819, + "step": 4506 + }, + { + "epoch": 1.1967866153233302, + "grad_norm": 1.2317858842714817, + "learning_rate": 7.6020885614313515e-06, + "loss": 0.24588793516159058, + "step": 4507 + }, + { + "epoch": 1.1970521843048731, + "grad_norm": 1.3148149004868621, + "learning_rate": 7.597826130080938e-06, + "loss": 0.2996830940246582, + "step": 4508 + }, + { + "epoch": 1.197317753286416, + "grad_norm": 1.2289139982746875, + "learning_rate": 7.593564161841318e-06, + "loss": 0.2654343247413635, + "step": 4509 + }, + { + "epoch": 1.197583322267959, + "grad_norm": 1.2104660234722762, + "learning_rate": 7.589302657534144e-06, + "loss": 0.24949109554290771, + "step": 4510 + }, + { + "epoch": 1.197848891249502, + "grad_norm": 1.1785955409512114, + "learning_rate": 7.5850416179809886e-06, + "loss": 0.23205731809139252, + "step": 4511 + }, + { + "epoch": 1.198114460231045, + "grad_norm": 3.351023225066079, + "learning_rate": 7.580781044003324e-06, + "loss": 0.232904314994812, + "step": 4512 + }, + { + "epoch": 1.198380029212588, + "grad_norm": 1.0569352775404934, + "learning_rate": 7.576520936422542e-06, + "loss": 0.25071364641189575, + "step": 4513 + }, + { + "epoch": 1.1986455981941309, + "grad_norm": 1.3613643273685416, + "learning_rate": 7.572261296059944e-06, + "loss": 0.2574467658996582, + "step": 4514 + }, + { + "epoch": 1.1989111671756738, + "grad_norm": 1.1866331959407248, + "learning_rate": 7.568002123736735e-06, + "loss": 0.23134055733680725, + "step": 4515 + }, + { + "epoch": 1.1991767361572168, + "grad_norm": 1.093870770411857, + "learning_rate": 7.5637434202740334e-06, + "loss": 0.22163332998752594, + "step": 4516 + }, + { + "epoch": 1.1994423051387597, + "grad_norm": 1.182308432196374, + "learning_rate": 7.559485186492868e-06, + "loss": 0.2665749788284302, + "step": 4517 + }, + { + "epoch": 1.1997078741203027, + "grad_norm": 1.0758759053634162, + "learning_rate": 7.555227423214174e-06, + "loss": 0.2237103432416916, + "step": 4518 + }, + { + "epoch": 1.1999734431018456, + "grad_norm": 1.2216323349035507, + "learning_rate": 7.550970131258801e-06, + "loss": 0.23287461698055267, + "step": 4519 + }, + { + "epoch": 1.2002390120833886, + "grad_norm": 1.1237156855078405, + "learning_rate": 7.5467133114475025e-06, + "loss": 0.2296323925256729, + "step": 4520 + }, + { + "epoch": 1.2005045810649315, + "grad_norm": 1.0900498705064874, + "learning_rate": 7.542456964600944e-06, + "loss": 0.21358339488506317, + "step": 4521 + }, + { + "epoch": 1.2007701500464747, + "grad_norm": 1.2516498821908515, + "learning_rate": 7.5382010915396954e-06, + "loss": 0.2355872094631195, + "step": 4522 + }, + { + "epoch": 1.2010357190280176, + "grad_norm": 1.2039029354448443, + "learning_rate": 7.5339456930842455e-06, + "loss": 0.25397661328315735, + "step": 4523 + }, + { + "epoch": 1.2013012880095606, + "grad_norm": 1.1762399479435963, + "learning_rate": 7.52969077005498e-06, + "loss": 0.26658257842063904, + "step": 4524 + }, + { + "epoch": 1.2015668569911035, + "grad_norm": 1.1889790145170218, + "learning_rate": 7.525436323272201e-06, + "loss": 0.27207136154174805, + "step": 4525 + }, + { + "epoch": 1.2018324259726465, + "grad_norm": 1.1867510172835751, + "learning_rate": 7.521182353556114e-06, + "loss": 0.25889313220977783, + "step": 4526 + }, + { + "epoch": 1.2020979949541895, + "grad_norm": 1.3095753328357655, + "learning_rate": 7.516928861726834e-06, + "loss": 0.272185742855072, + "step": 4527 + }, + { + "epoch": 1.2023635639357324, + "grad_norm": 1.156226984644319, + "learning_rate": 7.512675848604385e-06, + "loss": 0.25371503829956055, + "step": 4528 + }, + { + "epoch": 1.2026291329172754, + "grad_norm": 1.2028831911106082, + "learning_rate": 7.5084233150086964e-06, + "loss": 0.2554902732372284, + "step": 4529 + }, + { + "epoch": 1.2028947018988183, + "grad_norm": 1.1714528701705076, + "learning_rate": 7.50417126175961e-06, + "loss": 0.22007369995117188, + "step": 4530 + }, + { + "epoch": 1.2031602708803613, + "grad_norm": 1.2057968317835202, + "learning_rate": 7.499919689676861e-06, + "loss": 0.27492445707321167, + "step": 4531 + }, + { + "epoch": 1.2034258398619042, + "grad_norm": 1.1229280499713745, + "learning_rate": 7.4956685995801144e-06, + "loss": 0.2321021854877472, + "step": 4532 + }, + { + "epoch": 1.2036914088434472, + "grad_norm": 1.1735641467762012, + "learning_rate": 7.491417992288927e-06, + "loss": 0.25410759449005127, + "step": 4533 + }, + { + "epoch": 1.2039569778249901, + "grad_norm": 1.0638924164212193, + "learning_rate": 7.487167868622765e-06, + "loss": 0.2080576866865158, + "step": 4534 + }, + { + "epoch": 1.204222546806533, + "grad_norm": 1.115815492341061, + "learning_rate": 7.482918229401001e-06, + "loss": 0.2333327978849411, + "step": 4535 + }, + { + "epoch": 1.204488115788076, + "grad_norm": 1.1999209092526242, + "learning_rate": 7.478669075442917e-06, + "loss": 0.23160479962825775, + "step": 4536 + }, + { + "epoch": 1.204753684769619, + "grad_norm": 1.2136747509439494, + "learning_rate": 7.474420407567699e-06, + "loss": 0.2627696394920349, + "step": 4537 + }, + { + "epoch": 1.205019253751162, + "grad_norm": 1.0694648198090266, + "learning_rate": 7.470172226594441e-06, + "loss": 0.18656940758228302, + "step": 4538 + }, + { + "epoch": 1.2052848227327049, + "grad_norm": 1.2245138263513848, + "learning_rate": 7.465924533342139e-06, + "loss": 0.2749083340167999, + "step": 4539 + }, + { + "epoch": 1.2055503917142478, + "grad_norm": 1.3944907322006155, + "learning_rate": 7.461677328629696e-06, + "loss": 0.27484387159347534, + "step": 4540 + }, + { + "epoch": 1.2058159606957908, + "grad_norm": 1.254197138569937, + "learning_rate": 7.457430613275934e-06, + "loss": 0.26357588171958923, + "step": 4541 + }, + { + "epoch": 1.2060815296773337, + "grad_norm": 1.2004336778554112, + "learning_rate": 7.453184388099559e-06, + "loss": 0.23495343327522278, + "step": 4542 + }, + { + "epoch": 1.2063470986588767, + "grad_norm": 1.2123259782755003, + "learning_rate": 7.4489386539192e-06, + "loss": 0.253970205783844, + "step": 4543 + }, + { + "epoch": 1.2066126676404196, + "grad_norm": 1.1523820852778563, + "learning_rate": 7.444693411553383e-06, + "loss": 0.24919062852859497, + "step": 4544 + }, + { + "epoch": 1.2068782366219626, + "grad_norm": 1.2181666045865969, + "learning_rate": 7.440448661820536e-06, + "loss": 0.24373450875282288, + "step": 4545 + }, + { + "epoch": 1.2071438056035055, + "grad_norm": 1.3762501451890354, + "learning_rate": 7.436204405539002e-06, + "loss": 0.24739482998847961, + "step": 4546 + }, + { + "epoch": 1.2074093745850485, + "grad_norm": 1.2982074074943253, + "learning_rate": 7.4319606435270195e-06, + "loss": 0.27041494846343994, + "step": 4547 + }, + { + "epoch": 1.2076749435665914, + "grad_norm": 1.1359942984852744, + "learning_rate": 7.427717376602739e-06, + "loss": 0.23243938386440277, + "step": 4548 + }, + { + "epoch": 1.2079405125481344, + "grad_norm": 1.3118758722508392, + "learning_rate": 7.423474605584206e-06, + "loss": 0.2346343696117401, + "step": 4549 + }, + { + "epoch": 1.2082060815296773, + "grad_norm": 1.1819354183035133, + "learning_rate": 7.419232331289385e-06, + "loss": 0.2587367296218872, + "step": 4550 + }, + { + "epoch": 1.2084716505112203, + "grad_norm": 1.195922174249915, + "learning_rate": 7.414990554536134e-06, + "loss": 0.2552938461303711, + "step": 4551 + }, + { + "epoch": 1.2087372194927632, + "grad_norm": 1.2688216449772127, + "learning_rate": 7.410749276142221e-06, + "loss": 0.2693648040294647, + "step": 4552 + }, + { + "epoch": 1.2090027884743062, + "grad_norm": 1.1997939452425357, + "learning_rate": 7.406508496925307e-06, + "loss": 0.21543294191360474, + "step": 4553 + }, + { + "epoch": 1.2092683574558492, + "grad_norm": 1.2385892147047024, + "learning_rate": 7.402268217702966e-06, + "loss": 0.2913009524345398, + "step": 4554 + }, + { + "epoch": 1.209533926437392, + "grad_norm": 1.0671356100150298, + "learning_rate": 7.398028439292675e-06, + "loss": 0.23279520869255066, + "step": 4555 + }, + { + "epoch": 1.209799495418935, + "grad_norm": 1.0946575444558022, + "learning_rate": 7.393789162511815e-06, + "loss": 0.25086939334869385, + "step": 4556 + }, + { + "epoch": 1.210065064400478, + "grad_norm": 1.0964890001200192, + "learning_rate": 7.389550388177662e-06, + "loss": 0.21704714000225067, + "step": 4557 + }, + { + "epoch": 1.210330633382021, + "grad_norm": 1.126699331966135, + "learning_rate": 7.3853121171074115e-06, + "loss": 0.230219304561615, + "step": 4558 + }, + { + "epoch": 1.210596202363564, + "grad_norm": 1.1809668678269754, + "learning_rate": 7.381074350118149e-06, + "loss": 0.26073017716407776, + "step": 4559 + }, + { + "epoch": 1.2108617713451069, + "grad_norm": 1.2065072762311946, + "learning_rate": 7.376837088026863e-06, + "loss": 0.25186216831207275, + "step": 4560 + }, + { + "epoch": 1.2111273403266498, + "grad_norm": 1.3978877577958326, + "learning_rate": 7.372600331650449e-06, + "loss": 0.28719040751457214, + "step": 4561 + }, + { + "epoch": 1.2113929093081928, + "grad_norm": 1.16073083909203, + "learning_rate": 7.368364081805704e-06, + "loss": 0.23972755670547485, + "step": 4562 + }, + { + "epoch": 1.2116584782897357, + "grad_norm": 1.096919114864748, + "learning_rate": 7.364128339309326e-06, + "loss": 0.23053769767284393, + "step": 4563 + }, + { + "epoch": 1.2119240472712787, + "grad_norm": 1.2910615683085556, + "learning_rate": 7.359893104977917e-06, + "loss": 0.25124189257621765, + "step": 4564 + }, + { + "epoch": 1.2121896162528216, + "grad_norm": 1.1863697592423188, + "learning_rate": 7.355658379627981e-06, + "loss": 0.2243686318397522, + "step": 4565 + }, + { + "epoch": 1.2124551852343646, + "grad_norm": 1.244591161752608, + "learning_rate": 7.3514241640759175e-06, + "loss": 0.26047343015670776, + "step": 4566 + }, + { + "epoch": 1.2127207542159075, + "grad_norm": 1.1775978450301259, + "learning_rate": 7.3471904591380434e-06, + "loss": 0.23603469133377075, + "step": 4567 + }, + { + "epoch": 1.2129863231974505, + "grad_norm": 1.2261707581126196, + "learning_rate": 7.342957265630561e-06, + "loss": 0.31320711970329285, + "step": 4568 + }, + { + "epoch": 1.2132518921789934, + "grad_norm": 1.22464158648852, + "learning_rate": 7.338724584369581e-06, + "loss": 0.22159788012504578, + "step": 4569 + }, + { + "epoch": 1.2135174611605364, + "grad_norm": 1.1206153371836056, + "learning_rate": 7.334492416171114e-06, + "loss": 0.21992239356040955, + "step": 4570 + }, + { + "epoch": 1.2137830301420793, + "grad_norm": 1.3229661253734524, + "learning_rate": 7.330260761851071e-06, + "loss": 0.20708827674388885, + "step": 4571 + }, + { + "epoch": 1.2140485991236223, + "grad_norm": 1.1899658624900848, + "learning_rate": 7.326029622225269e-06, + "loss": 0.2846507132053375, + "step": 4572 + }, + { + "epoch": 1.2143141681051652, + "grad_norm": 1.2218224134688922, + "learning_rate": 7.321798998109417e-06, + "loss": 0.24903801083564758, + "step": 4573 + }, + { + "epoch": 1.2145797370867082, + "grad_norm": 1.1817295734811926, + "learning_rate": 7.317568890319134e-06, + "loss": 0.23426681756973267, + "step": 4574 + }, + { + "epoch": 1.2148453060682511, + "grad_norm": 1.1685993771040228, + "learning_rate": 7.31333929966993e-06, + "loss": 0.2374490350484848, + "step": 4575 + }, + { + "epoch": 1.215110875049794, + "grad_norm": 1.13335327598736, + "learning_rate": 7.309110226977223e-06, + "loss": 0.24035832285881042, + "step": 4576 + }, + { + "epoch": 1.215376444031337, + "grad_norm": 1.2837405582571324, + "learning_rate": 7.30488167305633e-06, + "loss": 0.21872258186340332, + "step": 4577 + }, + { + "epoch": 1.21564201301288, + "grad_norm": 1.3425258296129825, + "learning_rate": 7.300653638722463e-06, + "loss": 0.2940255403518677, + "step": 4578 + }, + { + "epoch": 1.215907581994423, + "grad_norm": 1.1158795437619367, + "learning_rate": 7.29642612479074e-06, + "loss": 0.20970892906188965, + "step": 4579 + }, + { + "epoch": 1.216173150975966, + "grad_norm": 1.1571301789790744, + "learning_rate": 7.292199132076175e-06, + "loss": 0.21217449009418488, + "step": 4580 + }, + { + "epoch": 1.2164387199575089, + "grad_norm": 1.2448503896532135, + "learning_rate": 7.28797266139368e-06, + "loss": 0.2463359832763672, + "step": 4581 + }, + { + "epoch": 1.2167042889390518, + "grad_norm": 1.132320428820701, + "learning_rate": 7.283746713558071e-06, + "loss": 0.21921415627002716, + "step": 4582 + }, + { + "epoch": 1.2169698579205948, + "grad_norm": 1.2437376760058587, + "learning_rate": 7.279521289384059e-06, + "loss": 0.2412380576133728, + "step": 4583 + }, + { + "epoch": 1.2172354269021377, + "grad_norm": 1.180878934188553, + "learning_rate": 7.275296389686258e-06, + "loss": 0.2558564245700836, + "step": 4584 + }, + { + "epoch": 1.2175009958836809, + "grad_norm": 1.2566060880081307, + "learning_rate": 7.271072015279179e-06, + "loss": 0.2548869848251343, + "step": 4585 + }, + { + "epoch": 1.2177665648652238, + "grad_norm": 1.4407566508510072, + "learning_rate": 7.2668481669772304e-06, + "loss": 0.22183407843112946, + "step": 4586 + }, + { + "epoch": 1.2180321338467668, + "grad_norm": 1.20165829214997, + "learning_rate": 7.262624845594721e-06, + "loss": 0.24722473323345184, + "step": 4587 + }, + { + "epoch": 1.2182977028283097, + "grad_norm": 1.190564524584547, + "learning_rate": 7.258402051945858e-06, + "loss": 0.2678988575935364, + "step": 4588 + }, + { + "epoch": 1.2185632718098527, + "grad_norm": 1.187777405395345, + "learning_rate": 7.2541797868447435e-06, + "loss": 0.2116469144821167, + "step": 4589 + }, + { + "epoch": 1.2188288407913956, + "grad_norm": 1.2500071795758152, + "learning_rate": 7.249958051105383e-06, + "loss": 0.23897933959960938, + "step": 4590 + }, + { + "epoch": 1.2190944097729386, + "grad_norm": 1.2473885744661077, + "learning_rate": 7.245736845541676e-06, + "loss": 0.25434061884880066, + "step": 4591 + }, + { + "epoch": 1.2193599787544815, + "grad_norm": 1.2108382272450464, + "learning_rate": 7.2415161709674235e-06, + "loss": 0.2602628469467163, + "step": 4592 + }, + { + "epoch": 1.2196255477360245, + "grad_norm": 3.1633443202169764, + "learning_rate": 7.2372960281963165e-06, + "loss": 0.2519065737724304, + "step": 4593 + }, + { + "epoch": 1.2198911167175674, + "grad_norm": 1.550903602515833, + "learning_rate": 7.233076418041954e-06, + "loss": 0.24404102563858032, + "step": 4594 + }, + { + "epoch": 1.2201566856991104, + "grad_norm": 1.1561711817096534, + "learning_rate": 7.228857341317825e-06, + "loss": 0.23633979260921478, + "step": 4595 + }, + { + "epoch": 1.2204222546806534, + "grad_norm": 1.2128002082313463, + "learning_rate": 7.224638798837319e-06, + "loss": 0.2513781189918518, + "step": 4596 + }, + { + "epoch": 1.2206878236621963, + "grad_norm": 1.2409533600026899, + "learning_rate": 7.220420791413721e-06, + "loss": 0.23270189762115479, + "step": 4597 + }, + { + "epoch": 1.2209533926437393, + "grad_norm": 1.2503409564498669, + "learning_rate": 7.21620331986021e-06, + "loss": 0.2770010530948639, + "step": 4598 + }, + { + "epoch": 1.2212189616252822, + "grad_norm": 1.1284522462719728, + "learning_rate": 7.2119863849898684e-06, + "loss": 0.2312745451927185, + "step": 4599 + }, + { + "epoch": 1.2214845306068252, + "grad_norm": 1.2725314186948387, + "learning_rate": 7.20776998761567e-06, + "loss": 0.231276735663414, + "step": 4600 + }, + { + "epoch": 1.221750099588368, + "grad_norm": 1.1715742737590393, + "learning_rate": 7.203554128550486e-06, + "loss": 0.24927708506584167, + "step": 4601 + }, + { + "epoch": 1.222015668569911, + "grad_norm": 1.1138441718661785, + "learning_rate": 7.199338808607084e-06, + "loss": 0.23033373057842255, + "step": 4602 + }, + { + "epoch": 1.222281237551454, + "grad_norm": 1.2545098885673684, + "learning_rate": 7.195124028598131e-06, + "loss": 0.24003425240516663, + "step": 4603 + }, + { + "epoch": 1.222546806532997, + "grad_norm": 1.1872708193619057, + "learning_rate": 7.190909789336185e-06, + "loss": 0.22648809850215912, + "step": 4604 + }, + { + "epoch": 1.22281237551454, + "grad_norm": 1.2511860493227276, + "learning_rate": 7.1866960916337006e-06, + "loss": 0.2605816125869751, + "step": 4605 + }, + { + "epoch": 1.2230779444960829, + "grad_norm": 1.1424629632361756, + "learning_rate": 7.1824829363030305e-06, + "loss": 0.21549202501773834, + "step": 4606 + }, + { + "epoch": 1.2233435134776258, + "grad_norm": 1.1532084986944064, + "learning_rate": 7.17827032415642e-06, + "loss": 0.23113220930099487, + "step": 4607 + }, + { + "epoch": 1.2236090824591688, + "grad_norm": 1.1649312720163907, + "learning_rate": 7.174058256006012e-06, + "loss": 0.22736643254756927, + "step": 4608 + }, + { + "epoch": 1.2238746514407117, + "grad_norm": 1.172011833362534, + "learning_rate": 7.169846732663845e-06, + "loss": 0.2686663866043091, + "step": 4609 + }, + { + "epoch": 1.2241402204222547, + "grad_norm": 1.1555217624379808, + "learning_rate": 7.1656357549418485e-06, + "loss": 0.1980462670326233, + "step": 4610 + }, + { + "epoch": 1.2244057894037976, + "grad_norm": 1.2401629806715768, + "learning_rate": 7.161425323651846e-06, + "loss": 0.22997641563415527, + "step": 4611 + }, + { + "epoch": 1.2246713583853406, + "grad_norm": 1.3367939845671126, + "learning_rate": 7.157215439605567e-06, + "loss": 0.28781357407569885, + "step": 4612 + }, + { + "epoch": 1.2249369273668835, + "grad_norm": 1.2895382897388425, + "learning_rate": 7.153006103614624e-06, + "loss": 0.22558270394802094, + "step": 4613 + }, + { + "epoch": 1.2252024963484265, + "grad_norm": 1.1860196927831441, + "learning_rate": 7.148797316490527e-06, + "loss": 0.2435922622680664, + "step": 4614 + }, + { + "epoch": 1.2254680653299694, + "grad_norm": 1.2828543438888096, + "learning_rate": 7.14458907904468e-06, + "loss": 0.27840936183929443, + "step": 4615 + }, + { + "epoch": 1.2257336343115124, + "grad_norm": 1.2350405670943831, + "learning_rate": 7.1403813920883825e-06, + "loss": 0.2775651812553406, + "step": 4616 + }, + { + "epoch": 1.2259992032930553, + "grad_norm": 1.2738452228129284, + "learning_rate": 7.136174256432828e-06, + "loss": 0.2430988848209381, + "step": 4617 + }, + { + "epoch": 1.2262647722745983, + "grad_norm": 1.0618083363199646, + "learning_rate": 7.131967672889101e-06, + "loss": 0.2018759697675705, + "step": 4618 + }, + { + "epoch": 1.2265303412561412, + "grad_norm": 1.2320094058432127, + "learning_rate": 7.127761642268179e-06, + "loss": 0.25314825773239136, + "step": 4619 + }, + { + "epoch": 1.2267959102376842, + "grad_norm": 1.409693024729639, + "learning_rate": 7.123556165380935e-06, + "loss": 0.2542746365070343, + "step": 4620 + }, + { + "epoch": 1.2270614792192271, + "grad_norm": 1.2571649384815597, + "learning_rate": 7.119351243038142e-06, + "loss": 0.2912300229072571, + "step": 4621 + }, + { + "epoch": 1.22732704820077, + "grad_norm": 1.3877507856901592, + "learning_rate": 7.115146876050454e-06, + "loss": 0.26893284916877747, + "step": 4622 + }, + { + "epoch": 1.227592617182313, + "grad_norm": 1.3833428208823224, + "learning_rate": 7.110943065228425e-06, + "loss": 0.2711215317249298, + "step": 4623 + }, + { + "epoch": 1.227858186163856, + "grad_norm": 1.346165350849743, + "learning_rate": 7.106739811382501e-06, + "loss": 0.25530266761779785, + "step": 4624 + }, + { + "epoch": 1.228123755145399, + "grad_norm": 1.268299981159743, + "learning_rate": 7.102537115323018e-06, + "loss": 0.2547178864479065, + "step": 4625 + }, + { + "epoch": 1.228389324126942, + "grad_norm": 1.5802606545447795, + "learning_rate": 7.0983349778602064e-06, + "loss": 0.27973634004592896, + "step": 4626 + }, + { + "epoch": 1.2286548931084849, + "grad_norm": 1.205257873334912, + "learning_rate": 7.0941333998041884e-06, + "loss": 0.24066339433193207, + "step": 4627 + }, + { + "epoch": 1.2289204620900278, + "grad_norm": 1.1798307734371165, + "learning_rate": 7.0899323819649816e-06, + "loss": 0.24305742979049683, + "step": 4628 + }, + { + "epoch": 1.2291860310715708, + "grad_norm": 1.163221794708842, + "learning_rate": 7.085731925152484e-06, + "loss": 0.22478783130645752, + "step": 4629 + }, + { + "epoch": 1.2294516000531137, + "grad_norm": 1.1812808698189172, + "learning_rate": 7.081532030176506e-06, + "loss": 0.24995659291744232, + "step": 4630 + }, + { + "epoch": 1.2297171690346567, + "grad_norm": 1.1575900439946216, + "learning_rate": 7.077332697846733e-06, + "loss": 0.2579454183578491, + "step": 4631 + }, + { + "epoch": 1.2299827380161996, + "grad_norm": 1.2378373931288529, + "learning_rate": 7.073133928972745e-06, + "loss": 0.2513299286365509, + "step": 4632 + }, + { + "epoch": 1.2302483069977426, + "grad_norm": 1.0751310135047412, + "learning_rate": 7.068935724364016e-06, + "loss": 0.23344315588474274, + "step": 4633 + }, + { + "epoch": 1.2305138759792857, + "grad_norm": 1.1882346043976466, + "learning_rate": 7.064738084829912e-06, + "loss": 0.26750341057777405, + "step": 4634 + }, + { + "epoch": 1.2307794449608287, + "grad_norm": 1.1622882344241228, + "learning_rate": 7.0605410111796855e-06, + "loss": 0.22424373030662537, + "step": 4635 + }, + { + "epoch": 1.2310450139423716, + "grad_norm": 1.0711348851881108, + "learning_rate": 7.056344504222485e-06, + "loss": 0.24261844158172607, + "step": 4636 + }, + { + "epoch": 1.2313105829239146, + "grad_norm": 1.1382788327638453, + "learning_rate": 7.052148564767347e-06, + "loss": 0.22273704409599304, + "step": 4637 + }, + { + "epoch": 1.2315761519054576, + "grad_norm": 1.217398110209698, + "learning_rate": 7.047953193623195e-06, + "loss": 0.23726603388786316, + "step": 4638 + }, + { + "epoch": 1.2318417208870005, + "grad_norm": 1.1961933626954258, + "learning_rate": 7.043758391598856e-06, + "loss": 0.2612340748310089, + "step": 4639 + }, + { + "epoch": 1.2321072898685435, + "grad_norm": 1.3828917417203295, + "learning_rate": 7.039564159503034e-06, + "loss": 0.25722867250442505, + "step": 4640 + }, + { + "epoch": 1.2323728588500864, + "grad_norm": 1.2106898963951274, + "learning_rate": 7.035370498144325e-06, + "loss": 0.25940731167793274, + "step": 4641 + }, + { + "epoch": 1.2326384278316294, + "grad_norm": 1.1431229158704634, + "learning_rate": 7.03117740833122e-06, + "loss": 0.2328685224056244, + "step": 4642 + }, + { + "epoch": 1.2329039968131723, + "grad_norm": 1.360549509974518, + "learning_rate": 7.0269848908720965e-06, + "loss": 0.3019352853298187, + "step": 4643 + }, + { + "epoch": 1.2331695657947153, + "grad_norm": 1.370123584713732, + "learning_rate": 7.022792946575222e-06, + "loss": 0.2665002942085266, + "step": 4644 + }, + { + "epoch": 1.2334351347762582, + "grad_norm": 1.2172549009924116, + "learning_rate": 7.018601576248755e-06, + "loss": 0.2425101399421692, + "step": 4645 + }, + { + "epoch": 1.2337007037578012, + "grad_norm": 1.2088470091841177, + "learning_rate": 7.014410780700743e-06, + "loss": 0.23319771885871887, + "step": 4646 + }, + { + "epoch": 1.2339662727393441, + "grad_norm": 1.1714631765087196, + "learning_rate": 7.010220560739116e-06, + "loss": 0.23033195734024048, + "step": 4647 + }, + { + "epoch": 1.234231841720887, + "grad_norm": 1.211199620492339, + "learning_rate": 7.006030917171707e-06, + "loss": 0.24682006239891052, + "step": 4648 + }, + { + "epoch": 1.23449741070243, + "grad_norm": 1.2881207045369418, + "learning_rate": 7.001841850806228e-06, + "loss": 0.25566285848617554, + "step": 4649 + }, + { + "epoch": 1.234762979683973, + "grad_norm": 1.32329780476303, + "learning_rate": 6.9976533624502784e-06, + "loss": 0.2791779339313507, + "step": 4650 + }, + { + "epoch": 1.235028548665516, + "grad_norm": 1.3093366388831746, + "learning_rate": 6.993465452911352e-06, + "loss": 0.25597846508026123, + "step": 4651 + }, + { + "epoch": 1.2352941176470589, + "grad_norm": 1.197170425293823, + "learning_rate": 6.9892781229968275e-06, + "loss": 0.24034728109836578, + "step": 4652 + }, + { + "epoch": 1.2355596866286018, + "grad_norm": 1.2583607623295634, + "learning_rate": 6.985091373513972e-06, + "loss": 0.2209509015083313, + "step": 4653 + }, + { + "epoch": 1.2358252556101448, + "grad_norm": 1.298261075070858, + "learning_rate": 6.980905205269942e-06, + "loss": 0.29106947779655457, + "step": 4654 + }, + { + "epoch": 1.2360908245916877, + "grad_norm": 1.226505577270481, + "learning_rate": 6.976719619071782e-06, + "loss": 0.24014753103256226, + "step": 4655 + }, + { + "epoch": 1.2363563935732307, + "grad_norm": 1.2297022971330018, + "learning_rate": 6.972534615726422e-06, + "loss": 0.27135470509529114, + "step": 4656 + }, + { + "epoch": 1.2366219625547736, + "grad_norm": 1.2219120714336154, + "learning_rate": 6.968350196040683e-06, + "loss": 0.23386257886886597, + "step": 4657 + }, + { + "epoch": 1.2368875315363166, + "grad_norm": 1.1452987159774544, + "learning_rate": 6.964166360821271e-06, + "loss": 0.23119661211967468, + "step": 4658 + }, + { + "epoch": 1.2371531005178595, + "grad_norm": 1.1767967288021879, + "learning_rate": 6.959983110874782e-06, + "loss": 0.2399922013282776, + "step": 4659 + }, + { + "epoch": 1.2374186694994025, + "grad_norm": 1.0521231856668218, + "learning_rate": 6.9558004470076944e-06, + "loss": 0.18323534727096558, + "step": 4660 + }, + { + "epoch": 1.2376842384809454, + "grad_norm": 1.1985431375912965, + "learning_rate": 6.951618370026378e-06, + "loss": 0.25683268904685974, + "step": 4661 + }, + { + "epoch": 1.2379498074624884, + "grad_norm": 1.307367140627743, + "learning_rate": 6.947436880737089e-06, + "loss": 0.2861499786376953, + "step": 4662 + }, + { + "epoch": 1.2382153764440313, + "grad_norm": 1.3831407282476516, + "learning_rate": 6.943255979945965e-06, + "loss": 0.28021398186683655, + "step": 4663 + }, + { + "epoch": 1.2384809454255743, + "grad_norm": 1.2940713851528283, + "learning_rate": 6.939075668459039e-06, + "loss": 0.2739776074886322, + "step": 4664 + }, + { + "epoch": 1.2387465144071172, + "grad_norm": 1.3433235944815516, + "learning_rate": 6.934895947082221e-06, + "loss": 0.26015231013298035, + "step": 4665 + }, + { + "epoch": 1.2390120833886602, + "grad_norm": 1.3230400884249285, + "learning_rate": 6.930716816621317e-06, + "loss": 0.2572113871574402, + "step": 4666 + }, + { + "epoch": 1.2392776523702032, + "grad_norm": 1.266134559335497, + "learning_rate": 6.926538277882012e-06, + "loss": 0.24094708263874054, + "step": 4667 + }, + { + "epoch": 1.239543221351746, + "grad_norm": 1.1175335748548278, + "learning_rate": 6.92236033166988e-06, + "loss": 0.22803835570812225, + "step": 4668 + }, + { + "epoch": 1.239808790333289, + "grad_norm": 1.1198379137737728, + "learning_rate": 6.9181829787903774e-06, + "loss": 0.23672322928905487, + "step": 4669 + }, + { + "epoch": 1.240074359314832, + "grad_norm": 1.3356297624894082, + "learning_rate": 6.91400622004885e-06, + "loss": 0.2568579912185669, + "step": 4670 + }, + { + "epoch": 1.240339928296375, + "grad_norm": 1.1768710116388783, + "learning_rate": 6.909830056250527e-06, + "loss": 0.25267845392227173, + "step": 4671 + }, + { + "epoch": 1.240605497277918, + "grad_norm": 1.2702969549109802, + "learning_rate": 6.905654488200524e-06, + "loss": 0.30336999893188477, + "step": 4672 + }, + { + "epoch": 1.2408710662594609, + "grad_norm": 1.17710991443045, + "learning_rate": 6.901479516703842e-06, + "loss": 0.2741299867630005, + "step": 4673 + }, + { + "epoch": 1.2411366352410038, + "grad_norm": 1.276658372251755, + "learning_rate": 6.897305142565363e-06, + "loss": 0.2896823585033417, + "step": 4674 + }, + { + "epoch": 1.2414022042225468, + "grad_norm": 1.2718591233587666, + "learning_rate": 6.8931313665898625e-06, + "loss": 0.23102329671382904, + "step": 4675 + }, + { + "epoch": 1.2416677732040897, + "grad_norm": 1.3209479857777737, + "learning_rate": 6.8889581895819915e-06, + "loss": 0.2600775361061096, + "step": 4676 + }, + { + "epoch": 1.2419333421856327, + "grad_norm": 1.1932453661715805, + "learning_rate": 6.884785612346291e-06, + "loss": 0.23589132726192474, + "step": 4677 + }, + { + "epoch": 1.2421989111671756, + "grad_norm": 1.155454248544126, + "learning_rate": 6.880613635687184e-06, + "loss": 0.24419361352920532, + "step": 4678 + }, + { + "epoch": 1.2424644801487186, + "grad_norm": 1.1323309321599895, + "learning_rate": 6.876442260408977e-06, + "loss": 0.23267227411270142, + "step": 4679 + }, + { + "epoch": 1.2427300491302615, + "grad_norm": 1.2244929254620942, + "learning_rate": 6.8722714873158635e-06, + "loss": 0.2507064938545227, + "step": 4680 + }, + { + "epoch": 1.2429956181118045, + "grad_norm": 1.2079227486812785, + "learning_rate": 6.868101317211922e-06, + "loss": 0.2529929280281067, + "step": 4681 + }, + { + "epoch": 1.2432611870933474, + "grad_norm": 1.1627205371245832, + "learning_rate": 6.863931750901107e-06, + "loss": 0.23255379498004913, + "step": 4682 + }, + { + "epoch": 1.2435267560748904, + "grad_norm": 1.1997195000446994, + "learning_rate": 6.859762789187259e-06, + "loss": 0.22757332026958466, + "step": 4683 + }, + { + "epoch": 1.2437923250564333, + "grad_norm": 1.2115398233652928, + "learning_rate": 6.8555944328741145e-06, + "loss": 0.2578364312648773, + "step": 4684 + }, + { + "epoch": 1.2440578940379763, + "grad_norm": 1.1854445431935166, + "learning_rate": 6.851426682765278e-06, + "loss": 0.27568408846855164, + "step": 4685 + }, + { + "epoch": 1.2443234630195192, + "grad_norm": 1.19754548578965, + "learning_rate": 6.847259539664244e-06, + "loss": 0.25595831871032715, + "step": 4686 + }, + { + "epoch": 1.2445890320010622, + "grad_norm": 1.1807617266458326, + "learning_rate": 6.843093004374386e-06, + "loss": 0.2195426970720291, + "step": 4687 + }, + { + "epoch": 1.2448546009826051, + "grad_norm": 1.1623631531241645, + "learning_rate": 6.838927077698967e-06, + "loss": 0.23247741162776947, + "step": 4688 + }, + { + "epoch": 1.245120169964148, + "grad_norm": 1.2953467781322094, + "learning_rate": 6.834761760441127e-06, + "loss": 0.26149916648864746, + "step": 4689 + }, + { + "epoch": 1.245385738945691, + "grad_norm": 1.1310243964126157, + "learning_rate": 6.830597053403885e-06, + "loss": 0.2521447241306305, + "step": 4690 + }, + { + "epoch": 1.245651307927234, + "grad_norm": 1.1803812700297758, + "learning_rate": 6.826432957390155e-06, + "loss": 0.23401981592178345, + "step": 4691 + }, + { + "epoch": 1.245916876908777, + "grad_norm": 1.3114713754211442, + "learning_rate": 6.822269473202714e-06, + "loss": 0.25341230630874634, + "step": 4692 + }, + { + "epoch": 1.24618244589032, + "grad_norm": 1.2025537581570156, + "learning_rate": 6.818106601644248e-06, + "loss": 0.2513907551765442, + "step": 4693 + }, + { + "epoch": 1.2464480148718629, + "grad_norm": 1.2263403478965602, + "learning_rate": 6.8139443435173005e-06, + "loss": 0.2682073414325714, + "step": 4694 + }, + { + "epoch": 1.2467135838534058, + "grad_norm": 1.1801313342439474, + "learning_rate": 6.809782699624308e-06, + "loss": 0.22726872563362122, + "step": 4695 + }, + { + "epoch": 1.2469791528349488, + "grad_norm": 1.3004812874511507, + "learning_rate": 6.805621670767588e-06, + "loss": 0.24184030294418335, + "step": 4696 + }, + { + "epoch": 1.247244721816492, + "grad_norm": 1.0395051535883466, + "learning_rate": 6.801461257749334e-06, + "loss": 0.203639417886734, + "step": 4697 + }, + { + "epoch": 1.2475102907980349, + "grad_norm": 1.1786557175840897, + "learning_rate": 6.797301461371626e-06, + "loss": 0.2170606106519699, + "step": 4698 + }, + { + "epoch": 1.2477758597795778, + "grad_norm": 1.1231113548110434, + "learning_rate": 6.7931422824364245e-06, + "loss": 0.2225056290626526, + "step": 4699 + }, + { + "epoch": 1.2480414287611208, + "grad_norm": 1.1702414518259399, + "learning_rate": 6.788983721745569e-06, + "loss": 0.2388974130153656, + "step": 4700 + }, + { + "epoch": 1.2483069977426637, + "grad_norm": 1.14649445863332, + "learning_rate": 6.784825780100776e-06, + "loss": 0.2291644811630249, + "step": 4701 + }, + { + "epoch": 1.2485725667242067, + "grad_norm": 1.3474164807852358, + "learning_rate": 6.7806684583036595e-06, + "loss": 0.23793739080429077, + "step": 4702 + }, + { + "epoch": 1.2488381357057496, + "grad_norm": 1.2839354787463726, + "learning_rate": 6.776511757155695e-06, + "loss": 0.2756902277469635, + "step": 4703 + }, + { + "epoch": 1.2491037046872926, + "grad_norm": 1.3039866822855, + "learning_rate": 6.772355677458249e-06, + "loss": 0.25046268105506897, + "step": 4704 + }, + { + "epoch": 1.2493692736688355, + "grad_norm": 1.3053078100109528, + "learning_rate": 6.7682002200125575e-06, + "loss": 0.238486647605896, + "step": 4705 + }, + { + "epoch": 1.2496348426503785, + "grad_norm": 1.1855651210182463, + "learning_rate": 6.764045385619751e-06, + "loss": 0.2366628348827362, + "step": 4706 + }, + { + "epoch": 1.2499004116319214, + "grad_norm": 1.21176387977239, + "learning_rate": 6.759891175080827e-06, + "loss": 0.24825221300125122, + "step": 4707 + }, + { + "epoch": 1.2501659806134644, + "grad_norm": 1.2922207381934139, + "learning_rate": 6.755737589196673e-06, + "loss": 0.2304186224937439, + "step": 4708 + }, + { + "epoch": 1.2504315495950074, + "grad_norm": 1.200468035859197, + "learning_rate": 6.7515846287680476e-06, + "loss": 0.2824471592903137, + "step": 4709 + }, + { + "epoch": 1.2506971185765503, + "grad_norm": 1.1994302764371214, + "learning_rate": 6.747432294595591e-06, + "loss": 0.23130697011947632, + "step": 4710 + }, + { + "epoch": 1.2509626875580933, + "grad_norm": 1.3183641444794993, + "learning_rate": 6.7432805874798334e-06, + "loss": 0.28371602296829224, + "step": 4711 + }, + { + "epoch": 1.2512282565396362, + "grad_norm": 1.1529924861272876, + "learning_rate": 6.739129508221167e-06, + "loss": 0.23452092707157135, + "step": 4712 + }, + { + "epoch": 1.2514938255211792, + "grad_norm": 1.245806995398341, + "learning_rate": 6.734979057619873e-06, + "loss": 0.22486859560012817, + "step": 4713 + }, + { + "epoch": 1.2517593945027221, + "grad_norm": 1.3481589110906722, + "learning_rate": 6.730829236476111e-06, + "loss": 0.2818532884120941, + "step": 4714 + }, + { + "epoch": 1.252024963484265, + "grad_norm": 1.172531442878329, + "learning_rate": 6.7266800455899125e-06, + "loss": 0.2060810923576355, + "step": 4715 + }, + { + "epoch": 1.252290532465808, + "grad_norm": 1.2183128764116598, + "learning_rate": 6.722531485761199e-06, + "loss": 0.2183244377374649, + "step": 4716 + }, + { + "epoch": 1.252556101447351, + "grad_norm": 1.2596677279915016, + "learning_rate": 6.71838355778976e-06, + "loss": 0.24757327139377594, + "step": 4717 + }, + { + "epoch": 1.252821670428894, + "grad_norm": 1.3267776765958388, + "learning_rate": 6.714236262475268e-06, + "loss": 0.3058333396911621, + "step": 4718 + }, + { + "epoch": 1.2530872394104369, + "grad_norm": 1.1893155452841293, + "learning_rate": 6.71008960061727e-06, + "loss": 0.24095620214939117, + "step": 4719 + }, + { + "epoch": 1.2533528083919798, + "grad_norm": 1.3050165159615794, + "learning_rate": 6.705943573015199e-06, + "loss": 0.25614839792251587, + "step": 4720 + }, + { + "epoch": 1.2536183773735228, + "grad_norm": 1.2537185610498753, + "learning_rate": 6.701798180468356e-06, + "loss": 0.22295254468917847, + "step": 4721 + }, + { + "epoch": 1.2538839463550657, + "grad_norm": 1.1724661677534984, + "learning_rate": 6.697653423775926e-06, + "loss": 0.24783796072006226, + "step": 4722 + }, + { + "epoch": 1.2541495153366087, + "grad_norm": 1.5676339911360846, + "learning_rate": 6.693509303736969e-06, + "loss": 0.19702200591564178, + "step": 4723 + }, + { + "epoch": 1.2544150843181516, + "grad_norm": 1.2713976115459882, + "learning_rate": 6.689365821150421e-06, + "loss": 0.2539074122905731, + "step": 4724 + }, + { + "epoch": 1.2546806532996946, + "grad_norm": 1.2015875463338734, + "learning_rate": 6.6852229768150976e-06, + "loss": 0.2480372041463852, + "step": 4725 + }, + { + "epoch": 1.2549462222812375, + "grad_norm": 1.1742876462412417, + "learning_rate": 6.68108077152969e-06, + "loss": 0.2231048047542572, + "step": 4726 + }, + { + "epoch": 1.2552117912627805, + "grad_norm": 1.1571308721577904, + "learning_rate": 6.676939206092766e-06, + "loss": 0.260783851146698, + "step": 4727 + }, + { + "epoch": 1.2554773602443234, + "grad_norm": 1.2569537102203152, + "learning_rate": 6.67279828130277e-06, + "loss": 0.24069254100322723, + "step": 4728 + }, + { + "epoch": 1.2557429292258664, + "grad_norm": 1.1732343490674524, + "learning_rate": 6.668657997958027e-06, + "loss": 0.2578867971897125, + "step": 4729 + }, + { + "epoch": 1.2560084982074093, + "grad_norm": 1.102080552368197, + "learning_rate": 6.664518356856732e-06, + "loss": 0.20724457502365112, + "step": 4730 + }, + { + "epoch": 1.2562740671889523, + "grad_norm": 1.1527224778451435, + "learning_rate": 6.6603793587969586e-06, + "loss": 0.23107580840587616, + "step": 4731 + }, + { + "epoch": 1.2565396361704952, + "grad_norm": 1.123633807819834, + "learning_rate": 6.656241004576659e-06, + "loss": 0.2481832504272461, + "step": 4732 + }, + { + "epoch": 1.2568052051520382, + "grad_norm": 1.1353422900728998, + "learning_rate": 6.652103294993657e-06, + "loss": 0.2219698578119278, + "step": 4733 + }, + { + "epoch": 1.2570707741335811, + "grad_norm": 1.1538807443087884, + "learning_rate": 6.647966230845655e-06, + "loss": 0.2245863974094391, + "step": 4734 + }, + { + "epoch": 1.257336343115124, + "grad_norm": 1.1991392114731283, + "learning_rate": 6.643829812930231e-06, + "loss": 0.2086387574672699, + "step": 4735 + }, + { + "epoch": 1.257601912096667, + "grad_norm": 1.1702949625685939, + "learning_rate": 6.6396940420448355e-06, + "loss": 0.23484499752521515, + "step": 4736 + }, + { + "epoch": 1.25786748107821, + "grad_norm": 1.1449620939429583, + "learning_rate": 6.635558918986797e-06, + "loss": 0.22011062502861023, + "step": 4737 + }, + { + "epoch": 1.258133050059753, + "grad_norm": 1.240312422577115, + "learning_rate": 6.631424444553319e-06, + "loss": 0.2426830381155014, + "step": 4738 + }, + { + "epoch": 1.258398619041296, + "grad_norm": 1.2472398676845469, + "learning_rate": 6.627290619541481e-06, + "loss": 0.2702174484729767, + "step": 4739 + }, + { + "epoch": 1.2586641880228389, + "grad_norm": 1.4005529994015682, + "learning_rate": 6.623157444748234e-06, + "loss": 0.26594820618629456, + "step": 4740 + }, + { + "epoch": 1.2589297570043818, + "grad_norm": 1.2550785934224764, + "learning_rate": 6.619024920970405e-06, + "loss": 0.2546013593673706, + "step": 4741 + }, + { + "epoch": 1.2591953259859248, + "grad_norm": 1.425429985784882, + "learning_rate": 6.614893049004696e-06, + "loss": 0.27207985520362854, + "step": 4742 + }, + { + "epoch": 1.259460894967468, + "grad_norm": 1.4445692953489113, + "learning_rate": 6.610761829647685e-06, + "loss": 0.2640937566757202, + "step": 4743 + }, + { + "epoch": 1.2597264639490109, + "grad_norm": 1.4095791296432063, + "learning_rate": 6.60663126369582e-06, + "loss": 0.2890278697013855, + "step": 4744 + }, + { + "epoch": 1.2599920329305538, + "grad_norm": 1.1225606468440805, + "learning_rate": 6.602501351945425e-06, + "loss": 0.24610492587089539, + "step": 4745 + }, + { + "epoch": 1.2602576019120968, + "grad_norm": 1.5273064552741338, + "learning_rate": 6.598372095192699e-06, + "loss": 0.24946746230125427, + "step": 4746 + }, + { + "epoch": 1.2605231708936397, + "grad_norm": 1.0546449518544165, + "learning_rate": 6.594243494233717e-06, + "loss": 0.2369944453239441, + "step": 4747 + }, + { + "epoch": 1.2607887398751827, + "grad_norm": 1.180556169492091, + "learning_rate": 6.590115549864421e-06, + "loss": 0.20980143547058105, + "step": 4748 + }, + { + "epoch": 1.2610543088567256, + "grad_norm": 1.1524244978042124, + "learning_rate": 6.5859882628806315e-06, + "loss": 0.22930344939231873, + "step": 4749 + }, + { + "epoch": 1.2613198778382686, + "grad_norm": 1.1353386909454481, + "learning_rate": 6.5818616340780405e-06, + "loss": 0.22352416813373566, + "step": 4750 + }, + { + "epoch": 1.2615854468198116, + "grad_norm": 1.0615225488277533, + "learning_rate": 6.577735664252214e-06, + "loss": 0.2049327939748764, + "step": 4751 + }, + { + "epoch": 1.2618510158013545, + "grad_norm": 1.3420243952278277, + "learning_rate": 6.573610354198587e-06, + "loss": 0.21858355402946472, + "step": 4752 + }, + { + "epoch": 1.2621165847828975, + "grad_norm": 1.1248247337478985, + "learning_rate": 6.5694857047124786e-06, + "loss": 0.225118950009346, + "step": 4753 + }, + { + "epoch": 1.2623821537644404, + "grad_norm": 1.1623337764465298, + "learning_rate": 6.565361716589063e-06, + "loss": 0.25780409574508667, + "step": 4754 + }, + { + "epoch": 1.2626477227459834, + "grad_norm": 1.1580907073042885, + "learning_rate": 6.5612383906233964e-06, + "loss": 0.23507939279079437, + "step": 4755 + }, + { + "epoch": 1.2629132917275263, + "grad_norm": 1.1733914893757196, + "learning_rate": 6.557115727610417e-06, + "loss": 0.27884477376937866, + "step": 4756 + }, + { + "epoch": 1.2631788607090693, + "grad_norm": 1.145599873702901, + "learning_rate": 6.552993728344921e-06, + "loss": 0.2564120888710022, + "step": 4757 + }, + { + "epoch": 1.2634444296906122, + "grad_norm": 1.3139857622357067, + "learning_rate": 6.548872393621578e-06, + "loss": 0.259651243686676, + "step": 4758 + }, + { + "epoch": 1.2637099986721552, + "grad_norm": 1.2930462493551071, + "learning_rate": 6.544751724234937e-06, + "loss": 0.23473814129829407, + "step": 4759 + }, + { + "epoch": 1.2639755676536981, + "grad_norm": 1.4411652435541018, + "learning_rate": 6.540631720979411e-06, + "loss": 0.2447129189968109, + "step": 4760 + }, + { + "epoch": 1.264241136635241, + "grad_norm": 1.1968236723875711, + "learning_rate": 6.536512384649294e-06, + "loss": 0.22695237398147583, + "step": 4761 + }, + { + "epoch": 1.264506705616784, + "grad_norm": 1.117214929215876, + "learning_rate": 6.532393716038738e-06, + "loss": 0.24303656816482544, + "step": 4762 + }, + { + "epoch": 1.264772274598327, + "grad_norm": 1.2106972269991043, + "learning_rate": 6.528275715941776e-06, + "loss": 0.23911908268928528, + "step": 4763 + }, + { + "epoch": 1.26503784357987, + "grad_norm": 1.0480584899589354, + "learning_rate": 6.524158385152309e-06, + "loss": 0.19766747951507568, + "step": 4764 + }, + { + "epoch": 1.2653034125614129, + "grad_norm": 1.390914844473808, + "learning_rate": 6.520041724464114e-06, + "loss": 0.24074134230613708, + "step": 4765 + }, + { + "epoch": 1.2655689815429558, + "grad_norm": 1.3379815630375766, + "learning_rate": 6.515925734670834e-06, + "loss": 0.27557867765426636, + "step": 4766 + }, + { + "epoch": 1.2658345505244988, + "grad_norm": 1.3286252957995823, + "learning_rate": 6.511810416565979e-06, + "loss": 0.24387787282466888, + "step": 4767 + }, + { + "epoch": 1.2661001195060417, + "grad_norm": 1.4234035593814256, + "learning_rate": 6.507695770942939e-06, + "loss": 0.27863091230392456, + "step": 4768 + }, + { + "epoch": 1.2663656884875847, + "grad_norm": 1.1364646133588507, + "learning_rate": 6.503581798594965e-06, + "loss": 0.23589591681957245, + "step": 4769 + }, + { + "epoch": 1.2666312574691276, + "grad_norm": 1.1932509985997282, + "learning_rate": 6.499468500315185e-06, + "loss": 0.22869807481765747, + "step": 4770 + }, + { + "epoch": 1.2668968264506706, + "grad_norm": 1.2498634762148577, + "learning_rate": 6.495355876896592e-06, + "loss": 0.2351568192243576, + "step": 4771 + }, + { + "epoch": 1.2671623954322135, + "grad_norm": 1.1271253337210285, + "learning_rate": 6.491243929132052e-06, + "loss": 0.2291228175163269, + "step": 4772 + }, + { + "epoch": 1.2674279644137565, + "grad_norm": 1.2013953219342957, + "learning_rate": 6.487132657814297e-06, + "loss": 0.23203743994235992, + "step": 4773 + }, + { + "epoch": 1.2676935333952994, + "grad_norm": 1.0887907712326863, + "learning_rate": 6.483022063735938e-06, + "loss": 0.22035656869411469, + "step": 4774 + }, + { + "epoch": 1.2679591023768424, + "grad_norm": 1.1270651148723736, + "learning_rate": 6.478912147689448e-06, + "loss": 0.21576716005802155, + "step": 4775 + }, + { + "epoch": 1.2682246713583853, + "grad_norm": 1.3174966546949713, + "learning_rate": 6.474802910467171e-06, + "loss": 0.27764660120010376, + "step": 4776 + }, + { + "epoch": 1.2684902403399283, + "grad_norm": 1.2418434137314485, + "learning_rate": 6.4706943528613135e-06, + "loss": 0.23715822398662567, + "step": 4777 + }, + { + "epoch": 1.2687558093214713, + "grad_norm": 1.1794293567561218, + "learning_rate": 6.4665864756639606e-06, + "loss": 0.27764302492141724, + "step": 4778 + }, + { + "epoch": 1.2690213783030142, + "grad_norm": 1.2157630211554828, + "learning_rate": 6.4624792796670624e-06, + "loss": 0.21634885668754578, + "step": 4779 + }, + { + "epoch": 1.2692869472845572, + "grad_norm": 1.2217447541656432, + "learning_rate": 6.458372765662438e-06, + "loss": 0.27262234687805176, + "step": 4780 + }, + { + "epoch": 1.2695525162661, + "grad_norm": 1.1716437260315133, + "learning_rate": 6.454266934441775e-06, + "loss": 0.2219458371400833, + "step": 4781 + }, + { + "epoch": 1.269818085247643, + "grad_norm": 1.2515340549821425, + "learning_rate": 6.450161786796625e-06, + "loss": 0.22181497514247894, + "step": 4782 + }, + { + "epoch": 1.270083654229186, + "grad_norm": 1.1858127036353512, + "learning_rate": 6.446057323518422e-06, + "loss": 0.22642338275909424, + "step": 4783 + }, + { + "epoch": 1.270349223210729, + "grad_norm": 1.2243357553110101, + "learning_rate": 6.441953545398451e-06, + "loss": 0.239711195230484, + "step": 4784 + }, + { + "epoch": 1.270614792192272, + "grad_norm": 1.29507599792429, + "learning_rate": 6.437850453227872e-06, + "loss": 0.2422255128622055, + "step": 4785 + }, + { + "epoch": 1.2708803611738149, + "grad_norm": 1.3013507424737665, + "learning_rate": 6.433748047797715e-06, + "loss": 0.23184439539909363, + "step": 4786 + }, + { + "epoch": 1.2711459301553578, + "grad_norm": 1.3032581886502261, + "learning_rate": 6.429646329898873e-06, + "loss": 0.2737428843975067, + "step": 4787 + }, + { + "epoch": 1.2714114991369008, + "grad_norm": 1.2565288812855064, + "learning_rate": 6.4255453003221115e-06, + "loss": 0.23565897345542908, + "step": 4788 + }, + { + "epoch": 1.2716770681184437, + "grad_norm": 1.3665497750328797, + "learning_rate": 6.421444959858059e-06, + "loss": 0.24349254369735718, + "step": 4789 + }, + { + "epoch": 1.2719426370999867, + "grad_norm": 1.2050219186384792, + "learning_rate": 6.4173453092972115e-06, + "loss": 0.2637769281864166, + "step": 4790 + }, + { + "epoch": 1.2722082060815296, + "grad_norm": 1.0381858832581394, + "learning_rate": 6.413246349429934e-06, + "loss": 0.21420228481292725, + "step": 4791 + }, + { + "epoch": 1.2724737750630726, + "grad_norm": 1.1333618917642097, + "learning_rate": 6.409148081046461e-06, + "loss": 0.25270405411720276, + "step": 4792 + }, + { + "epoch": 1.2727393440446155, + "grad_norm": 1.270676964933882, + "learning_rate": 6.405050504936887e-06, + "loss": 0.2710546851158142, + "step": 4793 + }, + { + "epoch": 1.2730049130261585, + "grad_norm": 1.1608891040490155, + "learning_rate": 6.400953621891178e-06, + "loss": 0.2388489842414856, + "step": 4794 + }, + { + "epoch": 1.2732704820077014, + "grad_norm": 1.1600463634666516, + "learning_rate": 6.396857432699164e-06, + "loss": 0.24581485986709595, + "step": 4795 + }, + { + "epoch": 1.2735360509892444, + "grad_norm": 1.18464881130754, + "learning_rate": 6.3927619381505404e-06, + "loss": 0.24219104647636414, + "step": 4796 + }, + { + "epoch": 1.2738016199707873, + "grad_norm": 1.0878857914267965, + "learning_rate": 6.388667139034873e-06, + "loss": 0.22722014784812927, + "step": 4797 + }, + { + "epoch": 1.2740671889523303, + "grad_norm": 1.275017638940232, + "learning_rate": 6.384573036141589e-06, + "loss": 0.25177234411239624, + "step": 4798 + }, + { + "epoch": 1.2743327579338732, + "grad_norm": 1.2824350948041237, + "learning_rate": 6.380479630259983e-06, + "loss": 0.2291412651538849, + "step": 4799 + }, + { + "epoch": 1.2745983269154162, + "grad_norm": 1.3215047708165757, + "learning_rate": 6.376386922179216e-06, + "loss": 0.2528606951236725, + "step": 4800 + }, + { + "epoch": 1.2748638958969591, + "grad_norm": 1.11001311385955, + "learning_rate": 6.372294912688315e-06, + "loss": 0.21383032202720642, + "step": 4801 + }, + { + "epoch": 1.275129464878502, + "grad_norm": 1.2162134010863295, + "learning_rate": 6.368203602576168e-06, + "loss": 0.2538087069988251, + "step": 4802 + }, + { + "epoch": 1.275395033860045, + "grad_norm": 1.2127822206191197, + "learning_rate": 6.364112992631537e-06, + "loss": 0.24437417089939117, + "step": 4803 + }, + { + "epoch": 1.275660602841588, + "grad_norm": 1.1678428848154245, + "learning_rate": 6.360023083643036e-06, + "loss": 0.2347753942012787, + "step": 4804 + }, + { + "epoch": 1.275926171823131, + "grad_norm": 1.226812886332051, + "learning_rate": 6.3559338763991576e-06, + "loss": 0.271645188331604, + "step": 4805 + }, + { + "epoch": 1.276191740804674, + "grad_norm": 1.2088165730060163, + "learning_rate": 6.35184537168825e-06, + "loss": 0.2465275228023529, + "step": 4806 + }, + { + "epoch": 1.2764573097862169, + "grad_norm": 1.216147524532817, + "learning_rate": 6.347757570298527e-06, + "loss": 0.26494044065475464, + "step": 4807 + }, + { + "epoch": 1.2767228787677598, + "grad_norm": 3.360286997098956, + "learning_rate": 6.343670473018071e-06, + "loss": 0.28292080760002136, + "step": 4808 + }, + { + "epoch": 1.2769884477493028, + "grad_norm": 1.2160142828428218, + "learning_rate": 6.339584080634824e-06, + "loss": 0.2525850534439087, + "step": 4809 + }, + { + "epoch": 1.2772540167308457, + "grad_norm": 1.224576908350391, + "learning_rate": 6.335498393936597e-06, + "loss": 0.22056345641613007, + "step": 4810 + }, + { + "epoch": 1.2775195857123887, + "grad_norm": 1.1603347806824698, + "learning_rate": 6.331413413711061e-06, + "loss": 0.23081058263778687, + "step": 4811 + }, + { + "epoch": 1.2777851546939316, + "grad_norm": 1.2309265633693007, + "learning_rate": 6.327329140745751e-06, + "loss": 0.2722470760345459, + "step": 4812 + }, + { + "epoch": 1.2780507236754748, + "grad_norm": 1.2598117885787161, + "learning_rate": 6.32324557582807e-06, + "loss": 0.24454641342163086, + "step": 4813 + }, + { + "epoch": 1.2783162926570177, + "grad_norm": 1.2713820573097572, + "learning_rate": 6.319162719745277e-06, + "loss": 0.21884413063526154, + "step": 4814 + }, + { + "epoch": 1.2785818616385607, + "grad_norm": 1.276590514388197, + "learning_rate": 6.3150805732845e-06, + "loss": 0.2737545669078827, + "step": 4815 + }, + { + "epoch": 1.2788474306201036, + "grad_norm": 1.1747258996206047, + "learning_rate": 6.31099913723273e-06, + "loss": 0.2478230595588684, + "step": 4816 + }, + { + "epoch": 1.2791129996016466, + "grad_norm": 1.2461752717378811, + "learning_rate": 6.306918412376817e-06, + "loss": 0.2508094310760498, + "step": 4817 + }, + { + "epoch": 1.2793785685831895, + "grad_norm": 1.267840547546021, + "learning_rate": 6.302838399503477e-06, + "loss": 0.24666383862495422, + "step": 4818 + }, + { + "epoch": 1.2796441375647325, + "grad_norm": 1.176059099377582, + "learning_rate": 6.298759099399292e-06, + "loss": 0.27833491563796997, + "step": 4819 + }, + { + "epoch": 1.2799097065462754, + "grad_norm": 1.1948595147219725, + "learning_rate": 6.294680512850699e-06, + "loss": 0.23092475533485413, + "step": 4820 + }, + { + "epoch": 1.2801752755278184, + "grad_norm": 1.1935160504644853, + "learning_rate": 6.290602640644005e-06, + "loss": 0.2714667022228241, + "step": 4821 + }, + { + "epoch": 1.2804408445093614, + "grad_norm": 1.1769422055863235, + "learning_rate": 6.286525483565373e-06, + "loss": 0.23292411863803864, + "step": 4822 + }, + { + "epoch": 1.2807064134909043, + "grad_norm": 1.1322856806053188, + "learning_rate": 6.282449042400831e-06, + "loss": 0.23809143900871277, + "step": 4823 + }, + { + "epoch": 1.2809719824724473, + "grad_norm": 1.0235534573008647, + "learning_rate": 6.278373317936269e-06, + "loss": 0.22593267261981964, + "step": 4824 + }, + { + "epoch": 1.2812375514539902, + "grad_norm": 1.2491300300411192, + "learning_rate": 6.274298310957439e-06, + "loss": 0.26024624705314636, + "step": 4825 + }, + { + "epoch": 1.2815031204355332, + "grad_norm": 1.138185007529017, + "learning_rate": 6.270224022249957e-06, + "loss": 0.22418126463890076, + "step": 4826 + }, + { + "epoch": 1.2817686894170761, + "grad_norm": 1.2374650134400174, + "learning_rate": 6.266150452599288e-06, + "loss": 0.26452577114105225, + "step": 4827 + }, + { + "epoch": 1.282034258398619, + "grad_norm": 1.2453587043668277, + "learning_rate": 6.262077602790779e-06, + "loss": 0.24412381649017334, + "step": 4828 + }, + { + "epoch": 1.282299827380162, + "grad_norm": 1.1670875672055734, + "learning_rate": 6.258005473609623e-06, + "loss": 0.22476118803024292, + "step": 4829 + }, + { + "epoch": 1.282565396361705, + "grad_norm": 1.1744502576491334, + "learning_rate": 6.25393406584088e-06, + "loss": 0.2208547294139862, + "step": 4830 + }, + { + "epoch": 1.282830965343248, + "grad_norm": 1.340282271944368, + "learning_rate": 6.249863380269467e-06, + "loss": 0.2903650999069214, + "step": 4831 + }, + { + "epoch": 1.2830965343247909, + "grad_norm": 1.2018727401561922, + "learning_rate": 6.245793417680168e-06, + "loss": 0.24413639307022095, + "step": 4832 + }, + { + "epoch": 1.2833621033063338, + "grad_norm": 1.162422850806728, + "learning_rate": 6.241724178857621e-06, + "loss": 0.2193944752216339, + "step": 4833 + }, + { + "epoch": 1.2836276722878768, + "grad_norm": 1.2159517583191957, + "learning_rate": 6.237655664586326e-06, + "loss": 0.22847513854503632, + "step": 4834 + }, + { + "epoch": 1.2838932412694197, + "grad_norm": 1.4211501406512423, + "learning_rate": 6.233587875650648e-06, + "loss": 0.269639253616333, + "step": 4835 + }, + { + "epoch": 1.2841588102509627, + "grad_norm": 1.3153478129856002, + "learning_rate": 6.229520812834801e-06, + "loss": 0.26329392194747925, + "step": 4836 + }, + { + "epoch": 1.2844243792325056, + "grad_norm": 1.0811891602166492, + "learning_rate": 6.225454476922877e-06, + "loss": 0.18800514936447144, + "step": 4837 + }, + { + "epoch": 1.2846899482140486, + "grad_norm": 1.2987987933289529, + "learning_rate": 6.2213888686988125e-06, + "loss": 0.2617965340614319, + "step": 4838 + }, + { + "epoch": 1.2849555171955915, + "grad_norm": 1.2029687476094635, + "learning_rate": 6.217323988946411e-06, + "loss": 0.22468717396259308, + "step": 4839 + }, + { + "epoch": 1.2852210861771345, + "grad_norm": 1.2126923104659393, + "learning_rate": 6.213259838449333e-06, + "loss": 0.22465646266937256, + "step": 4840 + }, + { + "epoch": 1.2854866551586774, + "grad_norm": 1.243457795287806, + "learning_rate": 6.209196417991096e-06, + "loss": 0.2655075490474701, + "step": 4841 + }, + { + "epoch": 1.2857522241402204, + "grad_norm": 1.2818071805394324, + "learning_rate": 6.205133728355081e-06, + "loss": 0.25313282012939453, + "step": 4842 + }, + { + "epoch": 1.2860177931217633, + "grad_norm": 1.2136879668034726, + "learning_rate": 6.201071770324527e-06, + "loss": 0.23176322877407074, + "step": 4843 + }, + { + "epoch": 1.2862833621033063, + "grad_norm": 1.3628911983979357, + "learning_rate": 6.197010544682531e-06, + "loss": 0.27396953105926514, + "step": 4844 + }, + { + "epoch": 1.2865489310848492, + "grad_norm": 1.2333432651370633, + "learning_rate": 6.192950052212046e-06, + "loss": 0.24966171383857727, + "step": 4845 + }, + { + "epoch": 1.2868145000663922, + "grad_norm": 1.184789059228899, + "learning_rate": 6.188890293695895e-06, + "loss": 0.23290866613388062, + "step": 4846 + }, + { + "epoch": 1.2870800690479351, + "grad_norm": 1.2080105834836115, + "learning_rate": 6.184831269916749e-06, + "loss": 0.2368975132703781, + "step": 4847 + }, + { + "epoch": 1.287345638029478, + "grad_norm": 1.35199057217418, + "learning_rate": 6.180772981657139e-06, + "loss": 0.25305312871932983, + "step": 4848 + }, + { + "epoch": 1.287611207011021, + "grad_norm": 1.1825950927599171, + "learning_rate": 6.176715429699452e-06, + "loss": 0.22752982378005981, + "step": 4849 + }, + { + "epoch": 1.287876775992564, + "grad_norm": 1.152582857494987, + "learning_rate": 6.1726586148259395e-06, + "loss": 0.22426503896713257, + "step": 4850 + }, + { + "epoch": 1.288142344974107, + "grad_norm": 1.2203273234703247, + "learning_rate": 6.168602537818706e-06, + "loss": 0.21261993050575256, + "step": 4851 + }, + { + "epoch": 1.28840791395565, + "grad_norm": 1.1907151660933317, + "learning_rate": 6.1645471994597185e-06, + "loss": 0.237461656332016, + "step": 4852 + }, + { + "epoch": 1.2886734829371929, + "grad_norm": 1.113120156932308, + "learning_rate": 6.160492600530794e-06, + "loss": 0.1926390826702118, + "step": 4853 + }, + { + "epoch": 1.2889390519187358, + "grad_norm": 1.6824005161064397, + "learning_rate": 6.156438741813608e-06, + "loss": 0.22673740983009338, + "step": 4854 + }, + { + "epoch": 1.289204620900279, + "grad_norm": 1.1453361708789405, + "learning_rate": 6.15238562408971e-06, + "loss": 0.22148582339286804, + "step": 4855 + }, + { + "epoch": 1.289470189881822, + "grad_norm": 1.3581323367394031, + "learning_rate": 6.148333248140483e-06, + "loss": 0.28319716453552246, + "step": 4856 + }, + { + "epoch": 1.289735758863365, + "grad_norm": 1.4367360633574449, + "learning_rate": 6.14428161474718e-06, + "loss": 0.23505647480487823, + "step": 4857 + }, + { + "epoch": 1.2900013278449078, + "grad_norm": 1.2052965186154045, + "learning_rate": 6.140230724690908e-06, + "loss": 0.24323523044586182, + "step": 4858 + }, + { + "epoch": 1.2902668968264508, + "grad_norm": 1.2357784405363281, + "learning_rate": 6.136180578752629e-06, + "loss": 0.22818386554718018, + "step": 4859 + }, + { + "epoch": 1.2905324658079937, + "grad_norm": 1.2670464740614045, + "learning_rate": 6.132131177713165e-06, + "loss": 0.24285198748111725, + "step": 4860 + }, + { + "epoch": 1.2907980347895367, + "grad_norm": 1.1369753370104339, + "learning_rate": 6.128082522353194e-06, + "loss": 0.24115213751792908, + "step": 4861 + }, + { + "epoch": 1.2910636037710796, + "grad_norm": 1.2213111344560537, + "learning_rate": 6.124034613453247e-06, + "loss": 0.21564510464668274, + "step": 4862 + }, + { + "epoch": 1.2913291727526226, + "grad_norm": 1.299973209896211, + "learning_rate": 6.119987451793711e-06, + "loss": 0.2329743504524231, + "step": 4863 + }, + { + "epoch": 1.2915947417341656, + "grad_norm": 1.2218786239106318, + "learning_rate": 6.115941038154835e-06, + "loss": 0.2161208689212799, + "step": 4864 + }, + { + "epoch": 1.2918603107157085, + "grad_norm": 1.2078035628631776, + "learning_rate": 6.111895373316721e-06, + "loss": 0.22765520215034485, + "step": 4865 + }, + { + "epoch": 1.2921258796972515, + "grad_norm": 1.2199257873933993, + "learning_rate": 6.107850458059322e-06, + "loss": 0.25506818294525146, + "step": 4866 + }, + { + "epoch": 1.2923914486787944, + "grad_norm": 1.2014544077782259, + "learning_rate": 6.1038062931624505e-06, + "loss": 0.22543852031230927, + "step": 4867 + }, + { + "epoch": 1.2926570176603374, + "grad_norm": 1.282222410309602, + "learning_rate": 6.099762879405776e-06, + "loss": 0.24295030534267426, + "step": 4868 + }, + { + "epoch": 1.2929225866418803, + "grad_norm": 1.2221545432256802, + "learning_rate": 6.095720217568819e-06, + "loss": 0.2385009229183197, + "step": 4869 + }, + { + "epoch": 1.2931881556234233, + "grad_norm": 1.119514297375773, + "learning_rate": 6.091678308430956e-06, + "loss": 0.21410472691059113, + "step": 4870 + }, + { + "epoch": 1.2934537246049662, + "grad_norm": 1.299309717988783, + "learning_rate": 6.087637152771422e-06, + "loss": 0.25934773683547974, + "step": 4871 + }, + { + "epoch": 1.2937192935865092, + "grad_norm": 1.1783576597419445, + "learning_rate": 6.0835967513693e-06, + "loss": 0.24584373831748962, + "step": 4872 + }, + { + "epoch": 1.2939848625680521, + "grad_norm": 1.3413866916188153, + "learning_rate": 6.079557105003537e-06, + "loss": 0.2403055876493454, + "step": 4873 + }, + { + "epoch": 1.294250431549595, + "grad_norm": 1.2348806886655737, + "learning_rate": 6.075518214452927e-06, + "loss": 0.23861736059188843, + "step": 4874 + }, + { + "epoch": 1.294516000531138, + "grad_norm": 1.2099712971645404, + "learning_rate": 6.071480080496119e-06, + "loss": 0.21356427669525146, + "step": 4875 + }, + { + "epoch": 1.294781569512681, + "grad_norm": 1.314183683224707, + "learning_rate": 6.067442703911621e-06, + "loss": 0.2835869789123535, + "step": 4876 + }, + { + "epoch": 1.295047138494224, + "grad_norm": 1.1868362719294436, + "learning_rate": 6.063406085477788e-06, + "loss": 0.24233242869377136, + "step": 4877 + }, + { + "epoch": 1.2953127074757669, + "grad_norm": 1.2596980829406919, + "learning_rate": 6.059370225972834e-06, + "loss": 0.24986369907855988, + "step": 4878 + }, + { + "epoch": 1.2955782764573098, + "grad_norm": 1.2583930460503605, + "learning_rate": 6.055335126174826e-06, + "loss": 0.2445756494998932, + "step": 4879 + }, + { + "epoch": 1.2958438454388528, + "grad_norm": 1.0635663336037695, + "learning_rate": 6.0513007868616825e-06, + "loss": 0.21331898868083954, + "step": 4880 + }, + { + "epoch": 1.2961094144203957, + "grad_norm": 1.1578193819974294, + "learning_rate": 6.047267208811174e-06, + "loss": 0.2782329320907593, + "step": 4881 + }, + { + "epoch": 1.2963749834019387, + "grad_norm": 2.326385436360766, + "learning_rate": 6.043234392800932e-06, + "loss": 0.20866765081882477, + "step": 4882 + }, + { + "epoch": 1.2966405523834816, + "grad_norm": 1.3211750202424803, + "learning_rate": 6.039202339608432e-06, + "loss": 0.2517815828323364, + "step": 4883 + }, + { + "epoch": 1.2969061213650246, + "grad_norm": 1.283845753322191, + "learning_rate": 6.03517105001101e-06, + "loss": 0.2617926597595215, + "step": 4884 + }, + { + "epoch": 1.2971716903465675, + "grad_norm": 1.3255504140080887, + "learning_rate": 6.0311405247858465e-06, + "loss": 0.24753305315971375, + "step": 4885 + }, + { + "epoch": 1.2974372593281105, + "grad_norm": 1.1805849927447047, + "learning_rate": 6.027110764709982e-06, + "loss": 0.19791719317436218, + "step": 4886 + }, + { + "epoch": 1.2977028283096534, + "grad_norm": 1.236398594932959, + "learning_rate": 6.023081770560307e-06, + "loss": 0.243608757853508, + "step": 4887 + }, + { + "epoch": 1.2979683972911964, + "grad_norm": 1.3652744342035896, + "learning_rate": 6.019053543113564e-06, + "loss": 0.20469853281974792, + "step": 4888 + }, + { + "epoch": 1.2982339662727393, + "grad_norm": 1.4682720215540639, + "learning_rate": 6.015026083146345e-06, + "loss": 0.25613903999328613, + "step": 4889 + }, + { + "epoch": 1.2984995352542823, + "grad_norm": 1.236223607561111, + "learning_rate": 6.010999391435097e-06, + "loss": 0.23349006474018097, + "step": 4890 + }, + { + "epoch": 1.2987651042358253, + "grad_norm": 1.1137410591057113, + "learning_rate": 6.006973468756124e-06, + "loss": 0.23646268248558044, + "step": 4891 + }, + { + "epoch": 1.2990306732173682, + "grad_norm": 1.2845979720118916, + "learning_rate": 6.002948315885572e-06, + "loss": 0.2371794581413269, + "step": 4892 + }, + { + "epoch": 1.2992962421989112, + "grad_norm": 1.1150236044260142, + "learning_rate": 5.998923933599443e-06, + "loss": 0.23791949450969696, + "step": 4893 + }, + { + "epoch": 1.299561811180454, + "grad_norm": 1.2865838186648229, + "learning_rate": 5.994900322673593e-06, + "loss": 0.26923009753227234, + "step": 4894 + }, + { + "epoch": 1.299827380161997, + "grad_norm": 1.2724647699376699, + "learning_rate": 5.990877483883723e-06, + "loss": 0.20164884626865387, + "step": 4895 + }, + { + "epoch": 1.30009294914354, + "grad_norm": 1.1263986142938482, + "learning_rate": 5.986855418005393e-06, + "loss": 0.22345462441444397, + "step": 4896 + }, + { + "epoch": 1.300358518125083, + "grad_norm": 1.2936789930425872, + "learning_rate": 5.982834125814007e-06, + "loss": 0.26678675413131714, + "step": 4897 + }, + { + "epoch": 1.300624087106626, + "grad_norm": 1.3112472329084983, + "learning_rate": 5.978813608084825e-06, + "loss": 0.24674496054649353, + "step": 4898 + }, + { + "epoch": 1.3008896560881689, + "grad_norm": 1.3746634467420622, + "learning_rate": 5.974793865592947e-06, + "loss": 0.2804900109767914, + "step": 4899 + }, + { + "epoch": 1.3011552250697118, + "grad_norm": 1.3113866221822363, + "learning_rate": 5.970774899113345e-06, + "loss": 0.2413155734539032, + "step": 4900 + }, + { + "epoch": 1.3014207940512548, + "grad_norm": 1.139036608300987, + "learning_rate": 5.96675670942082e-06, + "loss": 0.21217301487922668, + "step": 4901 + }, + { + "epoch": 1.3016863630327977, + "grad_norm": 1.2012277530250777, + "learning_rate": 5.962739297290035e-06, + "loss": 0.23362940549850464, + "step": 4902 + }, + { + "epoch": 1.3019519320143407, + "grad_norm": 1.251148135143295, + "learning_rate": 5.958722663495499e-06, + "loss": 0.2669242322444916, + "step": 4903 + }, + { + "epoch": 1.3022175009958836, + "grad_norm": 1.2365395348631665, + "learning_rate": 5.95470680881157e-06, + "loss": 0.2234608232975006, + "step": 4904 + }, + { + "epoch": 1.3024830699774266, + "grad_norm": 1.2441781101215288, + "learning_rate": 5.95069173401246e-06, + "loss": 0.25150394439697266, + "step": 4905 + }, + { + "epoch": 1.3027486389589695, + "grad_norm": 1.127228294882686, + "learning_rate": 5.9466774398722264e-06, + "loss": 0.2408430427312851, + "step": 4906 + }, + { + "epoch": 1.3030142079405125, + "grad_norm": 1.1200862415380408, + "learning_rate": 5.942663927164776e-06, + "loss": 0.2197013795375824, + "step": 4907 + }, + { + "epoch": 1.3032797769220554, + "grad_norm": 1.1474317141184802, + "learning_rate": 5.938651196663865e-06, + "loss": 0.2224964201450348, + "step": 4908 + }, + { + "epoch": 1.3035453459035984, + "grad_norm": 1.313380369558454, + "learning_rate": 5.934639249143108e-06, + "loss": 0.26466232538223267, + "step": 4909 + }, + { + "epoch": 1.3038109148851413, + "grad_norm": 1.2910852400248352, + "learning_rate": 5.930628085375958e-06, + "loss": 0.257996141910553, + "step": 4910 + }, + { + "epoch": 1.3040764838666843, + "grad_norm": 1.2056479933898356, + "learning_rate": 5.92661770613572e-06, + "loss": 0.21995162963867188, + "step": 4911 + }, + { + "epoch": 1.3043420528482272, + "grad_norm": 1.3003100511120855, + "learning_rate": 5.922608112195546e-06, + "loss": 0.26007258892059326, + "step": 4912 + }, + { + "epoch": 1.3046076218297702, + "grad_norm": 1.2951583817832037, + "learning_rate": 5.918599304328442e-06, + "loss": 0.25168827176094055, + "step": 4913 + }, + { + "epoch": 1.3048731908113131, + "grad_norm": 1.1932184000685677, + "learning_rate": 5.9145912833072535e-06, + "loss": 0.24686852097511292, + "step": 4914 + }, + { + "epoch": 1.305138759792856, + "grad_norm": 1.1951264683753895, + "learning_rate": 5.910584049904684e-06, + "loss": 0.247032031416893, + "step": 4915 + }, + { + "epoch": 1.305404328774399, + "grad_norm": 1.1517786776797445, + "learning_rate": 5.906577604893278e-06, + "loss": 0.21644674241542816, + "step": 4916 + }, + { + "epoch": 1.305669897755942, + "grad_norm": 1.3685662184124912, + "learning_rate": 5.9025719490454304e-06, + "loss": 0.28093478083610535, + "step": 4917 + }, + { + "epoch": 1.305935466737485, + "grad_norm": 1.2246452754262638, + "learning_rate": 5.898567083133389e-06, + "loss": 0.23731757700443268, + "step": 4918 + }, + { + "epoch": 1.306201035719028, + "grad_norm": 1.1125400405938466, + "learning_rate": 5.894563007929243e-06, + "loss": 0.20725491642951965, + "step": 4919 + }, + { + "epoch": 1.3064666047005709, + "grad_norm": 1.3186749566879576, + "learning_rate": 5.89055972420493e-06, + "loss": 0.2509433329105377, + "step": 4920 + }, + { + "epoch": 1.3067321736821138, + "grad_norm": 1.2793911736037649, + "learning_rate": 5.886557232732235e-06, + "loss": 0.2611580491065979, + "step": 4921 + }, + { + "epoch": 1.3069977426636568, + "grad_norm": 1.1754660821918204, + "learning_rate": 5.882555534282792e-06, + "loss": 0.20567595958709717, + "step": 4922 + }, + { + "epoch": 1.3072633116451997, + "grad_norm": 1.2179299933591687, + "learning_rate": 5.878554629628081e-06, + "loss": 0.22851137816905975, + "step": 4923 + }, + { + "epoch": 1.3075288806267427, + "grad_norm": 1.2283350051517878, + "learning_rate": 5.874554519539431e-06, + "loss": 0.24295902252197266, + "step": 4924 + }, + { + "epoch": 1.3077944496082856, + "grad_norm": 1.4565590371796837, + "learning_rate": 5.870555204788013e-06, + "loss": 0.29564642906188965, + "step": 4925 + }, + { + "epoch": 1.3080600185898288, + "grad_norm": 1.1906652754397118, + "learning_rate": 5.8665566861448465e-06, + "loss": 0.2399739921092987, + "step": 4926 + }, + { + "epoch": 1.3083255875713717, + "grad_norm": 1.2056826487968673, + "learning_rate": 5.862558964380806e-06, + "loss": 0.23882555961608887, + "step": 4927 + }, + { + "epoch": 1.3085911565529147, + "grad_norm": 1.2167231777259742, + "learning_rate": 5.858562040266599e-06, + "loss": 0.2510842978954315, + "step": 4928 + }, + { + "epoch": 1.3088567255344576, + "grad_norm": 1.3760419048772665, + "learning_rate": 5.854565914572787e-06, + "loss": 0.257358193397522, + "step": 4929 + }, + { + "epoch": 1.3091222945160006, + "grad_norm": 1.1144476904886809, + "learning_rate": 5.850570588069775e-06, + "loss": 0.23228219151496887, + "step": 4930 + }, + { + "epoch": 1.3093878634975435, + "grad_norm": 1.2711888334314898, + "learning_rate": 5.846576061527818e-06, + "loss": 0.2234456092119217, + "step": 4931 + }, + { + "epoch": 1.3096534324790865, + "grad_norm": 1.1978737759145446, + "learning_rate": 5.842582335717009e-06, + "loss": 0.2273438423871994, + "step": 4932 + }, + { + "epoch": 1.3099190014606295, + "grad_norm": 1.2382395020505186, + "learning_rate": 5.838589411407294e-06, + "loss": 0.2423306405544281, + "step": 4933 + }, + { + "epoch": 1.3101845704421724, + "grad_norm": 1.2388376015521172, + "learning_rate": 5.834597289368463e-06, + "loss": 0.266438364982605, + "step": 4934 + }, + { + "epoch": 1.3104501394237154, + "grad_norm": 1.2553012161793193, + "learning_rate": 5.830605970370142e-06, + "loss": 0.2469342052936554, + "step": 4935 + }, + { + "epoch": 1.3107157084052583, + "grad_norm": 1.2077087937137967, + "learning_rate": 5.8266154551818225e-06, + "loss": 0.2834509611129761, + "step": 4936 + }, + { + "epoch": 1.3109812773868013, + "grad_norm": 1.3037377411135151, + "learning_rate": 5.822625744572821e-06, + "loss": 0.2615162134170532, + "step": 4937 + }, + { + "epoch": 1.3112468463683442, + "grad_norm": 1.1529903033018742, + "learning_rate": 5.818636839312309e-06, + "loss": 0.2247931957244873, + "step": 4938 + }, + { + "epoch": 1.3115124153498872, + "grad_norm": 1.162136486746663, + "learning_rate": 5.814648740169299e-06, + "loss": 0.23759335279464722, + "step": 4939 + }, + { + "epoch": 1.3117779843314301, + "grad_norm": 1.2647326324758852, + "learning_rate": 5.8106614479126515e-06, + "loss": 0.23381784558296204, + "step": 4940 + }, + { + "epoch": 1.312043553312973, + "grad_norm": 1.2132087226777075, + "learning_rate": 5.8066749633110675e-06, + "loss": 0.2671264410018921, + "step": 4941 + }, + { + "epoch": 1.312309122294516, + "grad_norm": 1.09997395594631, + "learning_rate": 5.8026892871330944e-06, + "loss": 0.226065531373024, + "step": 4942 + }, + { + "epoch": 1.312574691276059, + "grad_norm": 1.3057172624305828, + "learning_rate": 5.798704420147124e-06, + "loss": 0.2654735743999481, + "step": 4943 + }, + { + "epoch": 1.312840260257602, + "grad_norm": 1.2538641402604982, + "learning_rate": 5.794720363121389e-06, + "loss": 0.23757833242416382, + "step": 4944 + }, + { + "epoch": 1.3131058292391449, + "grad_norm": 1.2131030914710175, + "learning_rate": 5.790737116823975e-06, + "loss": 0.2561591565608978, + "step": 4945 + }, + { + "epoch": 1.3133713982206878, + "grad_norm": 1.1698592689009908, + "learning_rate": 5.7867546820227995e-06, + "loss": 0.22105304896831512, + "step": 4946 + }, + { + "epoch": 1.3136369672022308, + "grad_norm": 1.190016500907537, + "learning_rate": 5.7827730594856325e-06, + "loss": 0.2485857605934143, + "step": 4947 + }, + { + "epoch": 1.3139025361837737, + "grad_norm": 1.2087719424455774, + "learning_rate": 5.7787922499800804e-06, + "loss": 0.21256676316261292, + "step": 4948 + }, + { + "epoch": 1.3141681051653167, + "grad_norm": 1.2561271472593831, + "learning_rate": 5.774812254273604e-06, + "loss": 0.2700715661048889, + "step": 4949 + }, + { + "epoch": 1.3144336741468596, + "grad_norm": 1.072264118800501, + "learning_rate": 5.770833073133488e-06, + "loss": 0.22239381074905396, + "step": 4950 + }, + { + "epoch": 1.3146992431284026, + "grad_norm": 1.2811464089131772, + "learning_rate": 5.766854707326878e-06, + "loss": 0.22973249852657318, + "step": 4951 + }, + { + "epoch": 1.3149648121099455, + "grad_norm": 1.3904264621036453, + "learning_rate": 5.762877157620751e-06, + "loss": 0.27923673391342163, + "step": 4952 + }, + { + "epoch": 1.3152303810914885, + "grad_norm": 1.1321859486950596, + "learning_rate": 5.758900424781939e-06, + "loss": 0.23142218589782715, + "step": 4953 + }, + { + "epoch": 1.3154959500730314, + "grad_norm": 1.2732500147617782, + "learning_rate": 5.754924509577107e-06, + "loss": 0.23697996139526367, + "step": 4954 + }, + { + "epoch": 1.3157615190545744, + "grad_norm": 1.2838523265227373, + "learning_rate": 5.750949412772764e-06, + "loss": 0.27600961923599243, + "step": 4955 + }, + { + "epoch": 1.3160270880361173, + "grad_norm": 1.1644607269636458, + "learning_rate": 5.74697513513526e-06, + "loss": 0.2300705760717392, + "step": 4956 + }, + { + "epoch": 1.3162926570176603, + "grad_norm": 1.2927833273456342, + "learning_rate": 5.743001677430791e-06, + "loss": 0.2771111726760864, + "step": 4957 + }, + { + "epoch": 1.3165582259992032, + "grad_norm": 1.2582954956741819, + "learning_rate": 5.739029040425391e-06, + "loss": 0.2195657342672348, + "step": 4958 + }, + { + "epoch": 1.3168237949807462, + "grad_norm": 1.3450534906440017, + "learning_rate": 5.735057224884939e-06, + "loss": 0.2877159118652344, + "step": 4959 + }, + { + "epoch": 1.3170893639622892, + "grad_norm": 1.2211564124942835, + "learning_rate": 5.731086231575154e-06, + "loss": 0.264115571975708, + "step": 4960 + }, + { + "epoch": 1.317354932943832, + "grad_norm": 1.1286607753384608, + "learning_rate": 5.727116061261593e-06, + "loss": 0.22574637830257416, + "step": 4961 + }, + { + "epoch": 1.317620501925375, + "grad_norm": 1.3177978069758023, + "learning_rate": 5.723146714709664e-06, + "loss": 0.26063698530197144, + "step": 4962 + }, + { + "epoch": 1.317886070906918, + "grad_norm": 1.2211473527893268, + "learning_rate": 5.719178192684611e-06, + "loss": 0.26272428035736084, + "step": 4963 + }, + { + "epoch": 1.318151639888461, + "grad_norm": 1.257373941755789, + "learning_rate": 5.715210495951513e-06, + "loss": 0.27188578248023987, + "step": 4964 + }, + { + "epoch": 1.318417208870004, + "grad_norm": 1.2786927551317604, + "learning_rate": 5.711243625275296e-06, + "loss": 0.26374363899230957, + "step": 4965 + }, + { + "epoch": 1.3186827778515469, + "grad_norm": 1.2469422291735242, + "learning_rate": 5.7072775814207275e-06, + "loss": 0.24819093942642212, + "step": 4966 + }, + { + "epoch": 1.3189483468330898, + "grad_norm": 1.3834225319345155, + "learning_rate": 5.703312365152412e-06, + "loss": 0.24387019872665405, + "step": 4967 + }, + { + "epoch": 1.319213915814633, + "grad_norm": 1.2919715806670669, + "learning_rate": 5.699347977234799e-06, + "loss": 0.2198091745376587, + "step": 4968 + }, + { + "epoch": 1.319479484796176, + "grad_norm": 1.3500197578827224, + "learning_rate": 5.695384418432174e-06, + "loss": 0.24349649250507355, + "step": 4969 + }, + { + "epoch": 1.319745053777719, + "grad_norm": 1.238323956307032, + "learning_rate": 5.691421689508661e-06, + "loss": 0.2330506294965744, + "step": 4970 + }, + { + "epoch": 1.3200106227592618, + "grad_norm": 1.2015417123740977, + "learning_rate": 5.687459791228234e-06, + "loss": 0.22821848094463348, + "step": 4971 + }, + { + "epoch": 1.3202761917408048, + "grad_norm": 1.1813366864368284, + "learning_rate": 5.683498724354699e-06, + "loss": 0.2342798113822937, + "step": 4972 + }, + { + "epoch": 1.3205417607223477, + "grad_norm": 1.0659168750954966, + "learning_rate": 5.679538489651702e-06, + "loss": 0.19689922034740448, + "step": 4973 + }, + { + "epoch": 1.3208073297038907, + "grad_norm": 1.1808385090527131, + "learning_rate": 5.675579087882727e-06, + "loss": 0.23910056054592133, + "step": 4974 + }, + { + "epoch": 1.3210728986854336, + "grad_norm": 1.381638431012013, + "learning_rate": 5.671620519811105e-06, + "loss": 0.25725993514060974, + "step": 4975 + }, + { + "epoch": 1.3213384676669766, + "grad_norm": 1.3528699347449313, + "learning_rate": 5.667662786199997e-06, + "loss": 0.3030434250831604, + "step": 4976 + }, + { + "epoch": 1.3216040366485196, + "grad_norm": 1.1182092617897728, + "learning_rate": 5.6637058878124075e-06, + "loss": 0.223737433552742, + "step": 4977 + }, + { + "epoch": 1.3218696056300625, + "grad_norm": 1.07766141822832, + "learning_rate": 5.659749825411183e-06, + "loss": 0.21480265259742737, + "step": 4978 + }, + { + "epoch": 1.3221351746116055, + "grad_norm": 1.2398269968997129, + "learning_rate": 5.655794599759001e-06, + "loss": 0.23288744688034058, + "step": 4979 + }, + { + "epoch": 1.3224007435931484, + "grad_norm": 1.3344080514533678, + "learning_rate": 5.651840211618387e-06, + "loss": 0.23701068758964539, + "step": 4980 + }, + { + "epoch": 1.3226663125746914, + "grad_norm": 1.2102834630940547, + "learning_rate": 5.647886661751698e-06, + "loss": 0.22164157032966614, + "step": 4981 + }, + { + "epoch": 1.3229318815562343, + "grad_norm": 1.2096538262244674, + "learning_rate": 5.643933950921132e-06, + "loss": 0.23426607251167297, + "step": 4982 + }, + { + "epoch": 1.3231974505377773, + "grad_norm": 1.1880047089826309, + "learning_rate": 5.6399820798887266e-06, + "loss": 0.2567834258079529, + "step": 4983 + }, + { + "epoch": 1.3234630195193202, + "grad_norm": 1.3013809826248692, + "learning_rate": 5.6360310494163525e-06, + "loss": 0.2713038921356201, + "step": 4984 + }, + { + "epoch": 1.3237285885008632, + "grad_norm": 1.2908080991459006, + "learning_rate": 5.632080860265725e-06, + "loss": 0.2548249661922455, + "step": 4985 + }, + { + "epoch": 1.3239941574824061, + "grad_norm": 1.3471244082770852, + "learning_rate": 5.628131513198392e-06, + "loss": 0.2442832589149475, + "step": 4986 + }, + { + "epoch": 1.324259726463949, + "grad_norm": 1.3063670062134878, + "learning_rate": 5.6241830089757435e-06, + "loss": 0.24654853343963623, + "step": 4987 + }, + { + "epoch": 1.324525295445492, + "grad_norm": 1.2792033582455469, + "learning_rate": 5.620235348358997e-06, + "loss": 0.2802797853946686, + "step": 4988 + }, + { + "epoch": 1.324790864427035, + "grad_norm": 1.0588655062771883, + "learning_rate": 5.616288532109225e-06, + "loss": 0.18801404535770416, + "step": 4989 + }, + { + "epoch": 1.325056433408578, + "grad_norm": 1.2235746865490262, + "learning_rate": 5.6123425609873235e-06, + "loss": 0.2685382068157196, + "step": 4990 + }, + { + "epoch": 1.3253220023901209, + "grad_norm": 1.1873888072876837, + "learning_rate": 5.608397435754029e-06, + "loss": 0.23479774594306946, + "step": 4991 + }, + { + "epoch": 1.3255875713716638, + "grad_norm": 1.2164455244711625, + "learning_rate": 5.604453157169914e-06, + "loss": 0.24198031425476074, + "step": 4992 + }, + { + "epoch": 1.3258531403532068, + "grad_norm": 1.3448749532595476, + "learning_rate": 5.60050972599539e-06, + "loss": 0.25523462891578674, + "step": 4993 + }, + { + "epoch": 1.3261187093347497, + "grad_norm": 1.1695382845281797, + "learning_rate": 5.596567142990703e-06, + "loss": 0.23196743428707123, + "step": 4994 + }, + { + "epoch": 1.3263842783162927, + "grad_norm": 1.3145586744837223, + "learning_rate": 5.592625408915939e-06, + "loss": 0.29365748167037964, + "step": 4995 + }, + { + "epoch": 1.3266498472978356, + "grad_norm": 1.1946134760289593, + "learning_rate": 5.588684524531014e-06, + "loss": 0.24509185552597046, + "step": 4996 + }, + { + "epoch": 1.3269154162793786, + "grad_norm": 1.3358300509723116, + "learning_rate": 5.584744490595687e-06, + "loss": 0.27032390236854553, + "step": 4997 + }, + { + "epoch": 1.3271809852609215, + "grad_norm": 1.1645416268641489, + "learning_rate": 5.580805307869549e-06, + "loss": 0.24401508271694183, + "step": 4998 + }, + { + "epoch": 1.3274465542424645, + "grad_norm": 1.1506901325018217, + "learning_rate": 5.576866977112028e-06, + "loss": 0.2216658741235733, + "step": 4999 + }, + { + "epoch": 1.3277121232240074, + "grad_norm": 1.1830944265124126, + "learning_rate": 5.5729294990823875e-06, + "loss": 0.24545373022556305, + "step": 5000 + }, + { + "epoch": 1.3279776922055504, + "grad_norm": 1.377548009409137, + "learning_rate": 5.568992874539728e-06, + "loss": 0.260816752910614, + "step": 5001 + }, + { + "epoch": 1.3282432611870933, + "grad_norm": 1.1392730403811622, + "learning_rate": 5.565057104242984e-06, + "loss": 0.1850551962852478, + "step": 5002 + }, + { + "epoch": 1.3285088301686363, + "grad_norm": 2.1232949408605624, + "learning_rate": 5.561122188950923e-06, + "loss": 0.26854407787323, + "step": 5003 + }, + { + "epoch": 1.3287743991501793, + "grad_norm": 1.1591208934359583, + "learning_rate": 5.557188129422153e-06, + "loss": 0.24294906854629517, + "step": 5004 + }, + { + "epoch": 1.3290399681317222, + "grad_norm": 1.1880501452095942, + "learning_rate": 5.553254926415114e-06, + "loss": 0.2533603310585022, + "step": 5005 + }, + { + "epoch": 1.3293055371132652, + "grad_norm": 1.1756183262516449, + "learning_rate": 5.549322580688077e-06, + "loss": 0.2082313448190689, + "step": 5006 + }, + { + "epoch": 1.329571106094808, + "grad_norm": 1.1602290025540025, + "learning_rate": 5.545391092999158e-06, + "loss": 0.24265842139720917, + "step": 5007 + }, + { + "epoch": 1.329836675076351, + "grad_norm": 1.2321490774961563, + "learning_rate": 5.541460464106301e-06, + "loss": 0.2483578324317932, + "step": 5008 + }, + { + "epoch": 1.330102244057894, + "grad_norm": 1.2798509363454456, + "learning_rate": 5.537530694767281e-06, + "loss": 0.2769540548324585, + "step": 5009 + }, + { + "epoch": 1.330367813039437, + "grad_norm": 1.1781048091325885, + "learning_rate": 5.533601785739714e-06, + "loss": 0.2132025957107544, + "step": 5010 + }, + { + "epoch": 1.33063338202098, + "grad_norm": 1.2726887496075767, + "learning_rate": 5.529673737781047e-06, + "loss": 0.25223806500434875, + "step": 5011 + }, + { + "epoch": 1.3308989510025229, + "grad_norm": 1.13329365262538, + "learning_rate": 5.52574655164856e-06, + "loss": 0.22631296515464783, + "step": 5012 + }, + { + "epoch": 1.3311645199840658, + "grad_norm": 1.1821255064699665, + "learning_rate": 5.5218202280993725e-06, + "loss": 0.23756693303585052, + "step": 5013 + }, + { + "epoch": 1.3314300889656088, + "grad_norm": 1.2775335630974591, + "learning_rate": 5.517894767890427e-06, + "loss": 0.24746376276016235, + "step": 5014 + }, + { + "epoch": 1.3316956579471517, + "grad_norm": 1.105165815318004, + "learning_rate": 5.513970171778504e-06, + "loss": 0.21463070809841156, + "step": 5015 + }, + { + "epoch": 1.3319612269286947, + "grad_norm": 1.2090979668871258, + "learning_rate": 5.510046440520228e-06, + "loss": 0.21256107091903687, + "step": 5016 + }, + { + "epoch": 1.3322267959102376, + "grad_norm": 1.1963664670778913, + "learning_rate": 5.506123574872044e-06, + "loss": 0.25800254940986633, + "step": 5017 + }, + { + "epoch": 1.3324923648917806, + "grad_norm": 1.2726257558813519, + "learning_rate": 5.502201575590236e-06, + "loss": 0.2421891689300537, + "step": 5018 + }, + { + "epoch": 1.3327579338733235, + "grad_norm": 1.3181283061442692, + "learning_rate": 5.498280443430917e-06, + "loss": 0.24375903606414795, + "step": 5019 + }, + { + "epoch": 1.3330235028548665, + "grad_norm": 1.2419078132332353, + "learning_rate": 5.494360179150033e-06, + "loss": 0.22173303365707397, + "step": 5020 + }, + { + "epoch": 1.3332890718364094, + "grad_norm": 1.1754676882141941, + "learning_rate": 5.49044078350337e-06, + "loss": 0.24005022644996643, + "step": 5021 + }, + { + "epoch": 1.3335546408179524, + "grad_norm": 1.194558748352182, + "learning_rate": 5.486522257246538e-06, + "loss": 0.2600201964378357, + "step": 5022 + }, + { + "epoch": 1.3338202097994953, + "grad_norm": 1.2112657273591712, + "learning_rate": 5.482604601134984e-06, + "loss": 0.22889836132526398, + "step": 5023 + }, + { + "epoch": 1.3340857787810383, + "grad_norm": 1.151722502872684, + "learning_rate": 5.478687815923981e-06, + "loss": 0.25045812129974365, + "step": 5024 + }, + { + "epoch": 1.3343513477625812, + "grad_norm": 1.2499612320902753, + "learning_rate": 5.474771902368646e-06, + "loss": 0.24649837613105774, + "step": 5025 + }, + { + "epoch": 1.3346169167441242, + "grad_norm": 1.1975824340507155, + "learning_rate": 5.470856861223919e-06, + "loss": 0.23994389176368713, + "step": 5026 + }, + { + "epoch": 1.3348824857256671, + "grad_norm": 1.2488470912807048, + "learning_rate": 5.466942693244572e-06, + "loss": 0.24381600320339203, + "step": 5027 + }, + { + "epoch": 1.33514805470721, + "grad_norm": 1.1770895947351019, + "learning_rate": 5.463029399185217e-06, + "loss": 0.22110486030578613, + "step": 5028 + }, + { + "epoch": 1.335413623688753, + "grad_norm": 1.2878634690011452, + "learning_rate": 5.459116979800281e-06, + "loss": 0.25733259320259094, + "step": 5029 + }, + { + "epoch": 1.335679192670296, + "grad_norm": 1.2598918710105835, + "learning_rate": 5.4552054358440355e-06, + "loss": 0.22853803634643555, + "step": 5030 + }, + { + "epoch": 1.335944761651839, + "grad_norm": 1.3118793520277159, + "learning_rate": 5.451294768070581e-06, + "loss": 0.27503639459609985, + "step": 5031 + }, + { + "epoch": 1.336210330633382, + "grad_norm": 1.2721314541046291, + "learning_rate": 5.447384977233849e-06, + "loss": 0.27931997179985046, + "step": 5032 + }, + { + "epoch": 1.3364758996149249, + "grad_norm": 1.2287817779118972, + "learning_rate": 5.443476064087596e-06, + "loss": 0.2477954626083374, + "step": 5033 + }, + { + "epoch": 1.3367414685964678, + "grad_norm": 1.2204002745504476, + "learning_rate": 5.439568029385422e-06, + "loss": 0.2195623219013214, + "step": 5034 + }, + { + "epoch": 1.3370070375780108, + "grad_norm": 1.230653492520276, + "learning_rate": 5.435660873880747e-06, + "loss": 0.22160238027572632, + "step": 5035 + }, + { + "epoch": 1.3372726065595537, + "grad_norm": 1.6764380815480615, + "learning_rate": 5.4317545983268235e-06, + "loss": 0.24107405543327332, + "step": 5036 + }, + { + "epoch": 1.3375381755410967, + "grad_norm": 1.2985203082435115, + "learning_rate": 5.427849203476738e-06, + "loss": 0.2480086386203766, + "step": 5037 + }, + { + "epoch": 1.3378037445226398, + "grad_norm": 1.2654518356324462, + "learning_rate": 5.4239446900834005e-06, + "loss": 0.22476691007614136, + "step": 5038 + }, + { + "epoch": 1.3380693135041828, + "grad_norm": 1.217906592075979, + "learning_rate": 5.420041058899559e-06, + "loss": 0.23685473203659058, + "step": 5039 + }, + { + "epoch": 1.3383348824857257, + "grad_norm": 1.215790635675812, + "learning_rate": 5.416138310677784e-06, + "loss": 0.27753746509552, + "step": 5040 + }, + { + "epoch": 1.3386004514672687, + "grad_norm": 1.2682075315501737, + "learning_rate": 5.412236446170482e-06, + "loss": 0.22446027398109436, + "step": 5041 + }, + { + "epoch": 1.3388660204488116, + "grad_norm": 1.2214424011593596, + "learning_rate": 5.4083354661298816e-06, + "loss": 0.2535285949707031, + "step": 5042 + }, + { + "epoch": 1.3391315894303546, + "grad_norm": 1.2982364680013232, + "learning_rate": 5.4044353713080565e-06, + "loss": 0.2412964254617691, + "step": 5043 + }, + { + "epoch": 1.3393971584118975, + "grad_norm": 1.3092797704576777, + "learning_rate": 5.4005361624568895e-06, + "loss": 0.23863038420677185, + "step": 5044 + }, + { + "epoch": 1.3396627273934405, + "grad_norm": 1.159506578977356, + "learning_rate": 5.396637840328105e-06, + "loss": 0.22741727530956268, + "step": 5045 + }, + { + "epoch": 1.3399282963749835, + "grad_norm": 1.285452356277395, + "learning_rate": 5.392740405673251e-06, + "loss": 0.2497379630804062, + "step": 5046 + }, + { + "epoch": 1.3401938653565264, + "grad_norm": 1.2401289485061215, + "learning_rate": 5.388843859243712e-06, + "loss": 0.19558298587799072, + "step": 5047 + }, + { + "epoch": 1.3404594343380694, + "grad_norm": 1.2074615239750155, + "learning_rate": 5.3849482017906914e-06, + "loss": 0.2266748994588852, + "step": 5048 + }, + { + "epoch": 1.3407250033196123, + "grad_norm": 1.2657162316868396, + "learning_rate": 5.381053434065229e-06, + "loss": 0.2410028576850891, + "step": 5049 + }, + { + "epoch": 1.3409905723011553, + "grad_norm": 1.301692886719208, + "learning_rate": 5.37715955681819e-06, + "loss": 0.23965512216091156, + "step": 5050 + }, + { + "epoch": 1.3412561412826982, + "grad_norm": 1.1756365557449155, + "learning_rate": 5.373266570800262e-06, + "loss": 0.22440138459205627, + "step": 5051 + }, + { + "epoch": 1.3415217102642412, + "grad_norm": 1.2562473271519534, + "learning_rate": 5.369374476761975e-06, + "loss": 0.2509710192680359, + "step": 5052 + }, + { + "epoch": 1.3417872792457841, + "grad_norm": 1.3381440207626536, + "learning_rate": 5.365483275453677e-06, + "loss": 0.26555800437927246, + "step": 5053 + }, + { + "epoch": 1.342052848227327, + "grad_norm": 1.2240809600669689, + "learning_rate": 5.361592967625544e-06, + "loss": 0.23089733719825745, + "step": 5054 + }, + { + "epoch": 1.34231841720887, + "grad_norm": 1.1178692263054482, + "learning_rate": 5.357703554027582e-06, + "loss": 0.2040700763463974, + "step": 5055 + }, + { + "epoch": 1.342583986190413, + "grad_norm": 1.309704975193781, + "learning_rate": 5.353815035409624e-06, + "loss": 0.23539039492607117, + "step": 5056 + }, + { + "epoch": 1.342849555171956, + "grad_norm": 1.7065922202358847, + "learning_rate": 5.3499274125213294e-06, + "loss": 0.2190464437007904, + "step": 5057 + }, + { + "epoch": 1.3431151241534989, + "grad_norm": 1.1478595499251703, + "learning_rate": 5.346040686112189e-06, + "loss": 0.21557429432868958, + "step": 5058 + }, + { + "epoch": 1.3433806931350418, + "grad_norm": 1.1934269644730748, + "learning_rate": 5.342154856931515e-06, + "loss": 0.24398267269134521, + "step": 5059 + }, + { + "epoch": 1.3436462621165848, + "grad_norm": 1.1089059625649784, + "learning_rate": 5.338269925728451e-06, + "loss": 0.21652038395404816, + "step": 5060 + }, + { + "epoch": 1.3439118310981277, + "grad_norm": 1.1937531358219302, + "learning_rate": 5.334385893251966e-06, + "loss": 0.2031325101852417, + "step": 5061 + }, + { + "epoch": 1.3441774000796707, + "grad_norm": 1.1621991357090053, + "learning_rate": 5.330502760250853e-06, + "loss": 0.2484835982322693, + "step": 5062 + }, + { + "epoch": 1.3444429690612136, + "grad_norm": 1.2657742595884374, + "learning_rate": 5.326620527473737e-06, + "loss": 0.23698699474334717, + "step": 5063 + }, + { + "epoch": 1.3447085380427566, + "grad_norm": 1.2000433743668328, + "learning_rate": 5.322739195669065e-06, + "loss": 0.23928484320640564, + "step": 5064 + }, + { + "epoch": 1.3449741070242995, + "grad_norm": 1.1828146199314795, + "learning_rate": 5.318858765585115e-06, + "loss": 0.22679512202739716, + "step": 5065 + }, + { + "epoch": 1.3452396760058425, + "grad_norm": 1.2334385564497414, + "learning_rate": 5.314979237969984e-06, + "loss": 0.2115025818347931, + "step": 5066 + }, + { + "epoch": 1.3455052449873854, + "grad_norm": 1.261129899382787, + "learning_rate": 5.311100613571603e-06, + "loss": 0.2441834807395935, + "step": 5067 + }, + { + "epoch": 1.3457708139689284, + "grad_norm": 1.2722125718860966, + "learning_rate": 5.307222893137722e-06, + "loss": 0.2549205720424652, + "step": 5068 + }, + { + "epoch": 1.3460363829504713, + "grad_norm": 1.179054242584843, + "learning_rate": 5.3033460774159185e-06, + "loss": 0.24652990698814392, + "step": 5069 + }, + { + "epoch": 1.3463019519320143, + "grad_norm": 1.2062419936470874, + "learning_rate": 5.299470167153602e-06, + "loss": 0.2403775006532669, + "step": 5070 + }, + { + "epoch": 1.3465675209135572, + "grad_norm": 1.1208895570259512, + "learning_rate": 5.295595163097999e-06, + "loss": 0.2215663194656372, + "step": 5071 + }, + { + "epoch": 1.3468330898951002, + "grad_norm": 1.2914937229567889, + "learning_rate": 5.291721065996167e-06, + "loss": 0.2567424774169922, + "step": 5072 + }, + { + "epoch": 1.3470986588766432, + "grad_norm": 1.0608079556396839, + "learning_rate": 5.287847876594984e-06, + "loss": 0.21162359416484833, + "step": 5073 + }, + { + "epoch": 1.347364227858186, + "grad_norm": 1.221049341797181, + "learning_rate": 5.283975595641155e-06, + "loss": 0.21851085126399994, + "step": 5074 + }, + { + "epoch": 1.347629796839729, + "grad_norm": 1.2935501467753354, + "learning_rate": 5.280104223881212e-06, + "loss": 0.2491171509027481, + "step": 5075 + }, + { + "epoch": 1.347895365821272, + "grad_norm": 1.2921255335421646, + "learning_rate": 5.276233762061507e-06, + "loss": 0.22467780113220215, + "step": 5076 + }, + { + "epoch": 1.348160934802815, + "grad_norm": 1.159790816626821, + "learning_rate": 5.272364210928223e-06, + "loss": 0.24531611800193787, + "step": 5077 + }, + { + "epoch": 1.348426503784358, + "grad_norm": 1.2178282841242851, + "learning_rate": 5.268495571227361e-06, + "loss": 0.2582520544528961, + "step": 5078 + }, + { + "epoch": 1.3486920727659009, + "grad_norm": 1.2175282778251775, + "learning_rate": 5.264627843704749e-06, + "loss": 0.21180811524391174, + "step": 5079 + }, + { + "epoch": 1.348957641747444, + "grad_norm": 1.2942378328530906, + "learning_rate": 5.2607610291060406e-06, + "loss": 0.27026671171188354, + "step": 5080 + }, + { + "epoch": 1.349223210728987, + "grad_norm": 1.1721525183169563, + "learning_rate": 5.256895128176712e-06, + "loss": 0.22954419255256653, + "step": 5081 + }, + { + "epoch": 1.34948877971053, + "grad_norm": 1.3561853541918854, + "learning_rate": 5.253030141662063e-06, + "loss": 0.24064484238624573, + "step": 5082 + }, + { + "epoch": 1.349754348692073, + "grad_norm": 1.1245550279116328, + "learning_rate": 5.249166070307218e-06, + "loss": 0.1981196105480194, + "step": 5083 + }, + { + "epoch": 1.3500199176736158, + "grad_norm": 1.0881909699390468, + "learning_rate": 5.2453029148571226e-06, + "loss": 0.19882233440876007, + "step": 5084 + }, + { + "epoch": 1.3502854866551588, + "grad_norm": 1.2123536275051694, + "learning_rate": 5.24144067605655e-06, + "loss": 0.2409907579421997, + "step": 5085 + }, + { + "epoch": 1.3505510556367017, + "grad_norm": 1.2197874501412473, + "learning_rate": 5.237579354650092e-06, + "loss": 0.2205093652009964, + "step": 5086 + }, + { + "epoch": 1.3508166246182447, + "grad_norm": 1.4716074796051495, + "learning_rate": 5.233718951382163e-06, + "loss": 0.2283058911561966, + "step": 5087 + }, + { + "epoch": 1.3510821935997877, + "grad_norm": 1.2561007307780203, + "learning_rate": 5.229859466997012e-06, + "loss": 0.25584474205970764, + "step": 5088 + }, + { + "epoch": 1.3513477625813306, + "grad_norm": 1.1491167817661179, + "learning_rate": 5.226000902238696e-06, + "loss": 0.22516845166683197, + "step": 5089 + }, + { + "epoch": 1.3516133315628736, + "grad_norm": 1.2604818786719383, + "learning_rate": 5.222143257851102e-06, + "loss": 0.23440764844417572, + "step": 5090 + }, + { + "epoch": 1.3518789005444165, + "grad_norm": 1.2156754572685655, + "learning_rate": 5.218286534577938e-06, + "loss": 0.25858962535858154, + "step": 5091 + }, + { + "epoch": 1.3521444695259595, + "grad_norm": 1.1425154357949754, + "learning_rate": 5.214430733162736e-06, + "loss": 0.20676326751708984, + "step": 5092 + }, + { + "epoch": 1.3524100385075024, + "grad_norm": 1.1266241214136956, + "learning_rate": 5.210575854348853e-06, + "loss": 0.21892425417900085, + "step": 5093 + }, + { + "epoch": 1.3526756074890454, + "grad_norm": 1.2379350388596377, + "learning_rate": 5.206721898879454e-06, + "loss": 0.2538335919380188, + "step": 5094 + }, + { + "epoch": 1.3529411764705883, + "grad_norm": 1.2059035716196298, + "learning_rate": 5.202868867497542e-06, + "loss": 0.24750448763370514, + "step": 5095 + }, + { + "epoch": 1.3532067454521313, + "grad_norm": 1.2602608504342458, + "learning_rate": 5.199016760945931e-06, + "loss": 0.2569364011287689, + "step": 5096 + }, + { + "epoch": 1.3534723144336742, + "grad_norm": 0.9860855220263709, + "learning_rate": 5.19516557996727e-06, + "loss": 0.16788914799690247, + "step": 5097 + }, + { + "epoch": 1.3537378834152172, + "grad_norm": 1.0020852845957948, + "learning_rate": 5.191315325304018e-06, + "loss": 0.19006651639938354, + "step": 5098 + }, + { + "epoch": 1.3540034523967601, + "grad_norm": 1.187896658740898, + "learning_rate": 5.1874659976984575e-06, + "loss": 0.23474551737308502, + "step": 5099 + }, + { + "epoch": 1.354269021378303, + "grad_norm": 1.2829971661643687, + "learning_rate": 5.183617597892694e-06, + "loss": 0.26601099967956543, + "step": 5100 + }, + { + "epoch": 1.354534590359846, + "grad_norm": 1.1758855450162613, + "learning_rate": 5.179770126628654e-06, + "loss": 0.24207550287246704, + "step": 5101 + }, + { + "epoch": 1.354800159341389, + "grad_norm": 1.2535446057143411, + "learning_rate": 5.175923584648083e-06, + "loss": 0.2538307309150696, + "step": 5102 + }, + { + "epoch": 1.355065728322932, + "grad_norm": 1.1865818667829109, + "learning_rate": 5.172077972692553e-06, + "loss": 0.23073242604732513, + "step": 5103 + }, + { + "epoch": 1.3553312973044749, + "grad_norm": 1.348848385270533, + "learning_rate": 5.168233291503448e-06, + "loss": 0.2634595036506653, + "step": 5104 + }, + { + "epoch": 1.3555968662860178, + "grad_norm": 1.225057907199874, + "learning_rate": 5.1643895418219744e-06, + "loss": 0.23282350599765778, + "step": 5105 + }, + { + "epoch": 1.3558624352675608, + "grad_norm": 1.333152685269679, + "learning_rate": 5.160546724389172e-06, + "loss": 0.2543700933456421, + "step": 5106 + }, + { + "epoch": 1.3561280042491037, + "grad_norm": 1.1449256417555271, + "learning_rate": 5.1567048399458855e-06, + "loss": 0.2005772739648819, + "step": 5107 + }, + { + "epoch": 1.3563935732306467, + "grad_norm": 1.2429630346358373, + "learning_rate": 5.152863889232787e-06, + "loss": 0.2367073893547058, + "step": 5108 + }, + { + "epoch": 1.3566591422121896, + "grad_norm": 1.2839253544945022, + "learning_rate": 5.14902387299036e-06, + "loss": 0.25600770115852356, + "step": 5109 + }, + { + "epoch": 1.3569247111937326, + "grad_norm": 1.198566513294344, + "learning_rate": 5.145184791958918e-06, + "loss": 0.21678754687309265, + "step": 5110 + }, + { + "epoch": 1.3571902801752755, + "grad_norm": 1.3894724787206996, + "learning_rate": 5.141346646878591e-06, + "loss": 0.265438973903656, + "step": 5111 + }, + { + "epoch": 1.3574558491568185, + "grad_norm": 1.1239736089383028, + "learning_rate": 5.13750943848933e-06, + "loss": 0.24246999621391296, + "step": 5112 + }, + { + "epoch": 1.3577214181383614, + "grad_norm": 1.299396280421792, + "learning_rate": 5.133673167530899e-06, + "loss": 0.25401771068573, + "step": 5113 + }, + { + "epoch": 1.3579869871199044, + "grad_norm": 1.2329813534125698, + "learning_rate": 5.129837834742885e-06, + "loss": 0.2698017656803131, + "step": 5114 + }, + { + "epoch": 1.3582525561014474, + "grad_norm": 1.2787210937788358, + "learning_rate": 5.126003440864703e-06, + "loss": 0.27006995677948, + "step": 5115 + }, + { + "epoch": 1.3585181250829903, + "grad_norm": 1.2695682196385796, + "learning_rate": 5.122169986635575e-06, + "loss": 0.2370866984128952, + "step": 5116 + }, + { + "epoch": 1.3587836940645333, + "grad_norm": 1.3031561376922138, + "learning_rate": 5.1183374727945425e-06, + "loss": 0.24017807841300964, + "step": 5117 + }, + { + "epoch": 1.3590492630460762, + "grad_norm": 1.1487956614446662, + "learning_rate": 5.114505900080473e-06, + "loss": 0.21664533019065857, + "step": 5118 + }, + { + "epoch": 1.3593148320276192, + "grad_norm": 4.246209132455192, + "learning_rate": 5.110675269232046e-06, + "loss": 0.24561598896980286, + "step": 5119 + }, + { + "epoch": 1.359580401009162, + "grad_norm": 1.3902415348604562, + "learning_rate": 5.106845580987763e-06, + "loss": 0.26678937673568726, + "step": 5120 + }, + { + "epoch": 1.359845969990705, + "grad_norm": 1.354168350096278, + "learning_rate": 5.103016836085943e-06, + "loss": 0.21919070184230804, + "step": 5121 + }, + { + "epoch": 1.360111538972248, + "grad_norm": 1.3057665036353723, + "learning_rate": 5.099189035264722e-06, + "loss": 0.24887943267822266, + "step": 5122 + }, + { + "epoch": 1.360377107953791, + "grad_norm": 1.2017875007060346, + "learning_rate": 5.0953621792620556e-06, + "loss": 0.23597784340381622, + "step": 5123 + }, + { + "epoch": 1.360642676935334, + "grad_norm": 1.2098630506546966, + "learning_rate": 5.091536268815717e-06, + "loss": 0.21265193819999695, + "step": 5124 + }, + { + "epoch": 1.3609082459168769, + "grad_norm": 1.3606980074054404, + "learning_rate": 5.0877113046632945e-06, + "loss": 0.29837465286254883, + "step": 5125 + }, + { + "epoch": 1.3611738148984198, + "grad_norm": 1.1915793844006848, + "learning_rate": 5.0838872875421975e-06, + "loss": 0.2324269413948059, + "step": 5126 + }, + { + "epoch": 1.3614393838799628, + "grad_norm": 1.0970197687294143, + "learning_rate": 5.080064218189652e-06, + "loss": 0.19149541854858398, + "step": 5127 + }, + { + "epoch": 1.3617049528615057, + "grad_norm": 1.1710303609542994, + "learning_rate": 5.0762420973427e-06, + "loss": 0.247644305229187, + "step": 5128 + }, + { + "epoch": 1.3619705218430487, + "grad_norm": 1.1403838601028529, + "learning_rate": 5.0724209257382006e-06, + "loss": 0.2272202968597412, + "step": 5129 + }, + { + "epoch": 1.3622360908245916, + "grad_norm": 1.2012952880900256, + "learning_rate": 5.068600704112832e-06, + "loss": 0.25735989212989807, + "step": 5130 + }, + { + "epoch": 1.3625016598061346, + "grad_norm": 1.1771555574179005, + "learning_rate": 5.064781433203086e-06, + "loss": 0.19970473647117615, + "step": 5131 + }, + { + "epoch": 1.3627672287876775, + "grad_norm": 1.2156620394191346, + "learning_rate": 5.060963113745272e-06, + "loss": 0.24289372563362122, + "step": 5132 + }, + { + "epoch": 1.3630327977692205, + "grad_norm": 1.2352988713677027, + "learning_rate": 5.0571457464755226e-06, + "loss": 0.2757350504398346, + "step": 5133 + }, + { + "epoch": 1.3632983667507634, + "grad_norm": 1.2115447809386193, + "learning_rate": 5.053329332129777e-06, + "loss": 0.24552851915359497, + "step": 5134 + }, + { + "epoch": 1.3635639357323064, + "grad_norm": 1.1546263092618338, + "learning_rate": 5.049513871443797e-06, + "loss": 0.22152797877788544, + "step": 5135 + }, + { + "epoch": 1.3638295047138493, + "grad_norm": 1.2567398712194906, + "learning_rate": 5.045699365153155e-06, + "loss": 0.27098602056503296, + "step": 5136 + }, + { + "epoch": 1.3640950736953923, + "grad_norm": 1.201852433475055, + "learning_rate": 5.041885813993246e-06, + "loss": 0.21275216341018677, + "step": 5137 + }, + { + "epoch": 1.3643606426769352, + "grad_norm": 1.3326670101473788, + "learning_rate": 5.038073218699275e-06, + "loss": 0.2510162591934204, + "step": 5138 + }, + { + "epoch": 1.3646262116584782, + "grad_norm": 1.2702563681918038, + "learning_rate": 5.034261580006269e-06, + "loss": 0.23203429579734802, + "step": 5139 + }, + { + "epoch": 1.3648917806400211, + "grad_norm": 1.137285489869793, + "learning_rate": 5.030450898649064e-06, + "loss": 0.22178995609283447, + "step": 5140 + }, + { + "epoch": 1.365157349621564, + "grad_norm": 1.2415754400243457, + "learning_rate": 5.026641175362316e-06, + "loss": 0.2567412257194519, + "step": 5141 + }, + { + "epoch": 1.365422918603107, + "grad_norm": 1.232487080143156, + "learning_rate": 5.022832410880494e-06, + "loss": 0.21939827501773834, + "step": 5142 + }, + { + "epoch": 1.36568848758465, + "grad_norm": 1.4733425270104286, + "learning_rate": 5.019024605937882e-06, + "loss": 0.2325637936592102, + "step": 5143 + }, + { + "epoch": 1.365954056566193, + "grad_norm": 1.266575596941496, + "learning_rate": 5.015217761268582e-06, + "loss": 0.2416393756866455, + "step": 5144 + }, + { + "epoch": 1.366219625547736, + "grad_norm": 1.289260413423763, + "learning_rate": 5.011411877606507e-06, + "loss": 0.2439568042755127, + "step": 5145 + }, + { + "epoch": 1.3664851945292789, + "grad_norm": 1.1439689034996021, + "learning_rate": 5.007606955685387e-06, + "loss": 0.2495957612991333, + "step": 5146 + }, + { + "epoch": 1.3667507635108218, + "grad_norm": 1.1937127912858143, + "learning_rate": 5.003802996238766e-06, + "loss": 0.23415328562259674, + "step": 5147 + }, + { + "epoch": 1.3670163324923648, + "grad_norm": 1.26410321081345, + "learning_rate": 5.000000000000003e-06, + "loss": 0.2637922465801239, + "step": 5148 + }, + { + "epoch": 1.3672819014739077, + "grad_norm": 1.243307173830296, + "learning_rate": 4.9961979677022696e-06, + "loss": 0.2319526970386505, + "step": 5149 + }, + { + "epoch": 1.3675474704554509, + "grad_norm": 1.2115383829826751, + "learning_rate": 4.992396900078551e-06, + "loss": 0.2338445484638214, + "step": 5150 + }, + { + "epoch": 1.3678130394369938, + "grad_norm": 1.1683439299091893, + "learning_rate": 4.988596797861654e-06, + "loss": 0.19041961431503296, + "step": 5151 + }, + { + "epoch": 1.3680786084185368, + "grad_norm": 1.233073404450011, + "learning_rate": 4.984797661784191e-06, + "loss": 0.2698138952255249, + "step": 5152 + }, + { + "epoch": 1.3683441774000797, + "grad_norm": 1.2592426315358647, + "learning_rate": 4.980999492578588e-06, + "loss": 0.2208167165517807, + "step": 5153 + }, + { + "epoch": 1.3686097463816227, + "grad_norm": 1.1935159953807641, + "learning_rate": 4.9772022909770915e-06, + "loss": 0.2515152096748352, + "step": 5154 + }, + { + "epoch": 1.3688753153631656, + "grad_norm": 1.3110804278343313, + "learning_rate": 4.973406057711755e-06, + "loss": 0.2393365204334259, + "step": 5155 + }, + { + "epoch": 1.3691408843447086, + "grad_norm": 1.302037077529998, + "learning_rate": 4.969610793514446e-06, + "loss": 0.24546492099761963, + "step": 5156 + }, + { + "epoch": 1.3694064533262515, + "grad_norm": 1.5300417364025873, + "learning_rate": 4.965816499116849e-06, + "loss": 0.252412348985672, + "step": 5157 + }, + { + "epoch": 1.3696720223077945, + "grad_norm": 1.1552882128683561, + "learning_rate": 4.962023175250461e-06, + "loss": 0.22654281556606293, + "step": 5158 + }, + { + "epoch": 1.3699375912893375, + "grad_norm": 1.2873880265204376, + "learning_rate": 4.958230822646581e-06, + "loss": 0.2542813718318939, + "step": 5159 + }, + { + "epoch": 1.3702031602708804, + "grad_norm": 1.2851879635778218, + "learning_rate": 4.9544394420363395e-06, + "loss": 0.25376224517822266, + "step": 5160 + }, + { + "epoch": 1.3704687292524234, + "grad_norm": 1.252574665809313, + "learning_rate": 4.950649034150666e-06, + "loss": 0.21911674737930298, + "step": 5161 + }, + { + "epoch": 1.3707342982339663, + "grad_norm": 1.3527776455922371, + "learning_rate": 4.946859599720308e-06, + "loss": 0.2805126905441284, + "step": 5162 + }, + { + "epoch": 1.3709998672155093, + "grad_norm": 1.1716388954292443, + "learning_rate": 4.943071139475824e-06, + "loss": 0.2189590483903885, + "step": 5163 + }, + { + "epoch": 1.3712654361970522, + "grad_norm": 1.2218109142926636, + "learning_rate": 4.939283654147582e-06, + "loss": 0.21837599575519562, + "step": 5164 + }, + { + "epoch": 1.3715310051785952, + "grad_norm": 1.2779646624690562, + "learning_rate": 4.935497144465766e-06, + "loss": 0.25090983510017395, + "step": 5165 + }, + { + "epoch": 1.3717965741601381, + "grad_norm": 1.1988734011828608, + "learning_rate": 4.93171161116037e-06, + "loss": 0.22028754651546478, + "step": 5166 + }, + { + "epoch": 1.372062143141681, + "grad_norm": 1.1554753760684375, + "learning_rate": 4.927927054961201e-06, + "loss": 0.20097196102142334, + "step": 5167 + }, + { + "epoch": 1.372327712123224, + "grad_norm": 1.209557738779129, + "learning_rate": 4.924143476597872e-06, + "loss": 0.230082705616951, + "step": 5168 + }, + { + "epoch": 1.372593281104767, + "grad_norm": 1.1549715219295726, + "learning_rate": 4.920360876799821e-06, + "loss": 0.23701804876327515, + "step": 5169 + }, + { + "epoch": 1.37285885008631, + "grad_norm": 1.2740998730652584, + "learning_rate": 4.9165792562962834e-06, + "loss": 0.22357231378555298, + "step": 5170 + }, + { + "epoch": 1.3731244190678529, + "grad_norm": 1.2042473616661704, + "learning_rate": 4.912798615816312e-06, + "loss": 0.2533026337623596, + "step": 5171 + }, + { + "epoch": 1.3733899880493958, + "grad_norm": 1.3342025781776312, + "learning_rate": 4.90901895608877e-06, + "loss": 0.24878138303756714, + "step": 5172 + }, + { + "epoch": 1.3736555570309388, + "grad_norm": 1.5415419516618216, + "learning_rate": 4.905240277842335e-06, + "loss": 0.22641420364379883, + "step": 5173 + }, + { + "epoch": 1.3739211260124817, + "grad_norm": 1.2916997982097302, + "learning_rate": 4.901462581805483e-06, + "loss": 0.24495793879032135, + "step": 5174 + }, + { + "epoch": 1.3741866949940247, + "grad_norm": 1.3531795848957913, + "learning_rate": 4.897685868706512e-06, + "loss": 0.2688868045806885, + "step": 5175 + }, + { + "epoch": 1.3744522639755676, + "grad_norm": 1.2828126418821555, + "learning_rate": 4.893910139273531e-06, + "loss": 0.25796642899513245, + "step": 5176 + }, + { + "epoch": 1.3747178329571106, + "grad_norm": 1.4091718050104127, + "learning_rate": 4.890135394234451e-06, + "loss": 0.27557405829429626, + "step": 5177 + }, + { + "epoch": 1.3749834019386535, + "grad_norm": 1.620605499986823, + "learning_rate": 4.886361634317004e-06, + "loss": 0.23553809523582458, + "step": 5178 + }, + { + "epoch": 1.3752489709201965, + "grad_norm": 1.2608742989736732, + "learning_rate": 4.882588860248725e-06, + "loss": 0.2454400360584259, + "step": 5179 + }, + { + "epoch": 1.3755145399017394, + "grad_norm": 1.1743865548501493, + "learning_rate": 4.878817072756959e-06, + "loss": 0.19460657238960266, + "step": 5180 + }, + { + "epoch": 1.3757801088832824, + "grad_norm": 1.2528300475452, + "learning_rate": 4.875046272568863e-06, + "loss": 0.24833449721336365, + "step": 5181 + }, + { + "epoch": 1.3760456778648253, + "grad_norm": 1.3263672125712147, + "learning_rate": 4.871276460411403e-06, + "loss": 0.2774161994457245, + "step": 5182 + }, + { + "epoch": 1.3763112468463683, + "grad_norm": 2.6268834337513667, + "learning_rate": 4.867507637011353e-06, + "loss": 0.2277964949607849, + "step": 5183 + }, + { + "epoch": 1.3765768158279112, + "grad_norm": 1.8924198767245841, + "learning_rate": 4.863739803095299e-06, + "loss": 0.2176733911037445, + "step": 5184 + }, + { + "epoch": 1.3768423848094542, + "grad_norm": 1.3153810073025014, + "learning_rate": 4.859972959389634e-06, + "loss": 0.23529113829135895, + "step": 5185 + }, + { + "epoch": 1.3771079537909972, + "grad_norm": 1.3909544444662505, + "learning_rate": 4.856207106620557e-06, + "loss": 0.2646695077419281, + "step": 5186 + }, + { + "epoch": 1.37737352277254, + "grad_norm": 1.2095108180861869, + "learning_rate": 4.852442245514093e-06, + "loss": 0.23179873824119568, + "step": 5187 + }, + { + "epoch": 1.377639091754083, + "grad_norm": 1.1084014698771758, + "learning_rate": 4.84867837679605e-06, + "loss": 0.2127494066953659, + "step": 5188 + }, + { + "epoch": 1.377904660735626, + "grad_norm": 1.2275201950569183, + "learning_rate": 4.844915501192062e-06, + "loss": 0.2204679548740387, + "step": 5189 + }, + { + "epoch": 1.378170229717169, + "grad_norm": 1.2078653060668294, + "learning_rate": 4.841153619427567e-06, + "loss": 0.20271794497966766, + "step": 5190 + }, + { + "epoch": 1.378435798698712, + "grad_norm": 1.4269963155687142, + "learning_rate": 4.837392732227811e-06, + "loss": 0.2785792052745819, + "step": 5191 + }, + { + "epoch": 1.3787013676802549, + "grad_norm": 1.2501319487764966, + "learning_rate": 4.8336328403178486e-06, + "loss": 0.24904468655586243, + "step": 5192 + }, + { + "epoch": 1.378966936661798, + "grad_norm": 1.1230965332904321, + "learning_rate": 4.829873944422544e-06, + "loss": 0.20045346021652222, + "step": 5193 + }, + { + "epoch": 1.379232505643341, + "grad_norm": 1.1339816903135191, + "learning_rate": 4.826116045266565e-06, + "loss": 0.21814313530921936, + "step": 5194 + }, + { + "epoch": 1.379498074624884, + "grad_norm": 1.236126479276255, + "learning_rate": 4.82235914357439e-06, + "loss": 0.2408592253923416, + "step": 5195 + }, + { + "epoch": 1.379763643606427, + "grad_norm": 1.1229995433845732, + "learning_rate": 4.818603240070311e-06, + "loss": 0.21453416347503662, + "step": 5196 + }, + { + "epoch": 1.3800292125879698, + "grad_norm": 1.2915687788203387, + "learning_rate": 4.814848335478418e-06, + "loss": 0.2578599154949188, + "step": 5197 + }, + { + "epoch": 1.3802947815695128, + "grad_norm": 1.0696662022967476, + "learning_rate": 4.811094430522613e-06, + "loss": 0.1980094015598297, + "step": 5198 + }, + { + "epoch": 1.3805603505510557, + "grad_norm": 1.202740960535961, + "learning_rate": 4.807341525926604e-06, + "loss": 0.24620960652828217, + "step": 5199 + }, + { + "epoch": 1.3808259195325987, + "grad_norm": 1.2486655803425535, + "learning_rate": 4.803589622413908e-06, + "loss": 0.23525282740592957, + "step": 5200 + }, + { + "epoch": 1.3810914885141417, + "grad_norm": 1.1657735912575689, + "learning_rate": 4.799838720707847e-06, + "loss": 0.2277744859457016, + "step": 5201 + }, + { + "epoch": 1.3813570574956846, + "grad_norm": 1.2927728942283212, + "learning_rate": 4.796088821531549e-06, + "loss": 0.2727074921131134, + "step": 5202 + }, + { + "epoch": 1.3816226264772276, + "grad_norm": 1.2370931993726209, + "learning_rate": 4.7923399256079525e-06, + "loss": 0.21686753630638123, + "step": 5203 + }, + { + "epoch": 1.3818881954587705, + "grad_norm": 1.2572583885252075, + "learning_rate": 4.788592033659799e-06, + "loss": 0.2841380834579468, + "step": 5204 + }, + { + "epoch": 1.3821537644403135, + "grad_norm": 1.1157272204593003, + "learning_rate": 4.78484514640964e-06, + "loss": 0.24577853083610535, + "step": 5205 + }, + { + "epoch": 1.3824193334218564, + "grad_norm": 1.2077705032221964, + "learning_rate": 4.7810992645798285e-06, + "loss": 0.22289782762527466, + "step": 5206 + }, + { + "epoch": 1.3826849024033994, + "grad_norm": 1.1476107334002954, + "learning_rate": 4.7773543888925274e-06, + "loss": 0.2223999947309494, + "step": 5207 + }, + { + "epoch": 1.3829504713849423, + "grad_norm": 1.2183085137487102, + "learning_rate": 4.773610520069706e-06, + "loss": 0.23938870429992676, + "step": 5208 + }, + { + "epoch": 1.3832160403664853, + "grad_norm": 1.219370193725879, + "learning_rate": 4.769867658833136e-06, + "loss": 0.260856568813324, + "step": 5209 + }, + { + "epoch": 1.3834816093480282, + "grad_norm": 1.2333269697463725, + "learning_rate": 4.766125805904398e-06, + "loss": 0.23602089285850525, + "step": 5210 + }, + { + "epoch": 1.3837471783295712, + "grad_norm": 1.156747833138865, + "learning_rate": 4.762384962004877e-06, + "loss": 0.22543978691101074, + "step": 5211 + }, + { + "epoch": 1.3840127473111141, + "grad_norm": 1.3639051201807257, + "learning_rate": 4.758645127855763e-06, + "loss": 0.2432224452495575, + "step": 5212 + }, + { + "epoch": 1.384278316292657, + "grad_norm": 1.3947016936895973, + "learning_rate": 4.754906304178049e-06, + "loss": 0.22764597833156586, + "step": 5213 + }, + { + "epoch": 1.3845438852742, + "grad_norm": 1.2064067504011344, + "learning_rate": 4.751168491692541e-06, + "loss": 0.22503387928009033, + "step": 5214 + }, + { + "epoch": 1.384809454255743, + "grad_norm": 1.1066861130484609, + "learning_rate": 4.747431691119846e-06, + "loss": 0.21889932453632355, + "step": 5215 + }, + { + "epoch": 1.385075023237286, + "grad_norm": 1.3903278318809302, + "learning_rate": 4.743695903180372e-06, + "loss": 0.2695825695991516, + "step": 5216 + }, + { + "epoch": 1.3853405922188289, + "grad_norm": 1.2921759622470506, + "learning_rate": 4.739961128594336e-06, + "loss": 0.265118271112442, + "step": 5217 + }, + { + "epoch": 1.3856061612003718, + "grad_norm": 1.1349207398090602, + "learning_rate": 4.736227368081757e-06, + "loss": 0.2050788253545761, + "step": 5218 + }, + { + "epoch": 1.3858717301819148, + "grad_norm": 1.23951121142384, + "learning_rate": 4.7324946223624625e-06, + "loss": 0.274588406085968, + "step": 5219 + }, + { + "epoch": 1.3861372991634577, + "grad_norm": 1.209560473571303, + "learning_rate": 4.728762892156079e-06, + "loss": 0.2242514044046402, + "step": 5220 + }, + { + "epoch": 1.3864028681450007, + "grad_norm": 1.1337174836883812, + "learning_rate": 4.725032178182042e-06, + "loss": 0.19989261031150818, + "step": 5221 + }, + { + "epoch": 1.3866684371265436, + "grad_norm": 1.1989339880554155, + "learning_rate": 4.721302481159588e-06, + "loss": 0.24409207701683044, + "step": 5222 + }, + { + "epoch": 1.3869340061080866, + "grad_norm": 1.2425140627800753, + "learning_rate": 4.71757380180776e-06, + "loss": 0.25146353244781494, + "step": 5223 + }, + { + "epoch": 1.3871995750896295, + "grad_norm": 1.245669068902739, + "learning_rate": 4.713846140845401e-06, + "loss": 0.23076622188091278, + "step": 5224 + }, + { + "epoch": 1.3874651440711725, + "grad_norm": 1.1122357580396618, + "learning_rate": 4.7101194989911635e-06, + "loss": 0.2159188687801361, + "step": 5225 + }, + { + "epoch": 1.3877307130527154, + "grad_norm": 1.433039209205417, + "learning_rate": 4.706393876963497e-06, + "loss": 0.24891307950019836, + "step": 5226 + }, + { + "epoch": 1.3879962820342584, + "grad_norm": 1.2167285098476437, + "learning_rate": 4.702669275480659e-06, + "loss": 0.26254773139953613, + "step": 5227 + }, + { + "epoch": 1.3882618510158014, + "grad_norm": 1.0872799599118763, + "learning_rate": 4.698945695260709e-06, + "loss": 0.19589121639728546, + "step": 5228 + }, + { + "epoch": 1.3885274199973443, + "grad_norm": 1.273899860234835, + "learning_rate": 4.695223137021509e-06, + "loss": 0.23796147108078003, + "step": 5229 + }, + { + "epoch": 1.3887929889788873, + "grad_norm": 1.1566738109261303, + "learning_rate": 4.6915016014807235e-06, + "loss": 0.21211156249046326, + "step": 5230 + }, + { + "epoch": 1.3890585579604302, + "grad_norm": 1.1477189909918881, + "learning_rate": 4.687781089355817e-06, + "loss": 0.22418555617332458, + "step": 5231 + }, + { + "epoch": 1.3893241269419732, + "grad_norm": 1.1999712861158167, + "learning_rate": 4.68406160136407e-06, + "loss": 0.24140511453151703, + "step": 5232 + }, + { + "epoch": 1.389589695923516, + "grad_norm": 1.3515422291949701, + "learning_rate": 4.68034313822255e-06, + "loss": 0.2863473892211914, + "step": 5233 + }, + { + "epoch": 1.389855264905059, + "grad_norm": 1.1002404477789451, + "learning_rate": 4.676625700648133e-06, + "loss": 0.21283546090126038, + "step": 5234 + }, + { + "epoch": 1.390120833886602, + "grad_norm": 1.311958297113244, + "learning_rate": 4.672909289357498e-06, + "loss": 0.2701990008354187, + "step": 5235 + }, + { + "epoch": 1.390386402868145, + "grad_norm": 1.1672674472381515, + "learning_rate": 4.669193905067124e-06, + "loss": 0.23807264864444733, + "step": 5236 + }, + { + "epoch": 1.390651971849688, + "grad_norm": 1.3282268361230456, + "learning_rate": 4.665479548493298e-06, + "loss": 0.22204206883907318, + "step": 5237 + }, + { + "epoch": 1.3909175408312309, + "grad_norm": 1.2590492281878678, + "learning_rate": 4.661766220352098e-06, + "loss": 0.22389569878578186, + "step": 5238 + }, + { + "epoch": 1.3911831098127738, + "grad_norm": 1.2844920522393721, + "learning_rate": 4.65805392135941e-06, + "loss": 0.23752997815608978, + "step": 5239 + }, + { + "epoch": 1.3914486787943168, + "grad_norm": 1.8677910056359206, + "learning_rate": 4.654342652230921e-06, + "loss": 0.24055880308151245, + "step": 5240 + }, + { + "epoch": 1.3917142477758597, + "grad_norm": 1.2030621240735913, + "learning_rate": 4.6506324136821255e-06, + "loss": 0.22136151790618896, + "step": 5241 + }, + { + "epoch": 1.3919798167574027, + "grad_norm": 1.299031121789001, + "learning_rate": 4.646923206428311e-06, + "loss": 0.2616429924964905, + "step": 5242 + }, + { + "epoch": 1.3922453857389456, + "grad_norm": 1.218734267375269, + "learning_rate": 4.643215031184569e-06, + "loss": 0.24827662110328674, + "step": 5243 + }, + { + "epoch": 1.3925109547204886, + "grad_norm": 1.3223478407487963, + "learning_rate": 4.639507888665792e-06, + "loss": 0.21999669075012207, + "step": 5244 + }, + { + "epoch": 1.3927765237020315, + "grad_norm": 1.3241857590600639, + "learning_rate": 4.6358017795866715e-06, + "loss": 0.24511300027370453, + "step": 5245 + }, + { + "epoch": 1.3930420926835745, + "grad_norm": 1.2459535025826622, + "learning_rate": 4.632096704661704e-06, + "loss": 0.2410753220319748, + "step": 5246 + }, + { + "epoch": 1.3933076616651174, + "grad_norm": 1.157173292152249, + "learning_rate": 4.628392664605184e-06, + "loss": 0.2160021960735321, + "step": 5247 + }, + { + "epoch": 1.3935732306466604, + "grad_norm": 1.2204303717623475, + "learning_rate": 4.624689660131204e-06, + "loss": 0.22672782838344574, + "step": 5248 + }, + { + "epoch": 1.3938387996282033, + "grad_norm": 1.3056904555347544, + "learning_rate": 4.620987691953659e-06, + "loss": 0.25474926829338074, + "step": 5249 + }, + { + "epoch": 1.3941043686097463, + "grad_norm": 1.3078938706976893, + "learning_rate": 4.617286760786252e-06, + "loss": 0.2449323832988739, + "step": 5250 + }, + { + "epoch": 1.3943699375912892, + "grad_norm": 1.4350253205296164, + "learning_rate": 4.613586867342473e-06, + "loss": 0.23727643489837646, + "step": 5251 + }, + { + "epoch": 1.3946355065728322, + "grad_norm": 1.492440797106639, + "learning_rate": 4.609888012335624e-06, + "loss": 0.23727962374687195, + "step": 5252 + }, + { + "epoch": 1.3949010755543751, + "grad_norm": 1.1595482332609377, + "learning_rate": 4.60619019647879e-06, + "loss": 0.21957805752754211, + "step": 5253 + }, + { + "epoch": 1.395166644535918, + "grad_norm": 1.1972608851584254, + "learning_rate": 4.6024934204848745e-06, + "loss": 0.24184471368789673, + "step": 5254 + }, + { + "epoch": 1.395432213517461, + "grad_norm": 1.2654091836286674, + "learning_rate": 4.598797685066568e-06, + "loss": 0.239216148853302, + "step": 5255 + }, + { + "epoch": 1.395697782499004, + "grad_norm": 1.1503034311319646, + "learning_rate": 4.595102990936367e-06, + "loss": 0.17741018533706665, + "step": 5256 + }, + { + "epoch": 1.395963351480547, + "grad_norm": 1.2669115039567294, + "learning_rate": 4.591409338806566e-06, + "loss": 0.26139867305755615, + "step": 5257 + }, + { + "epoch": 1.39622892046209, + "grad_norm": 1.1295627244433792, + "learning_rate": 4.587716729389251e-06, + "loss": 0.23689255118370056, + "step": 5258 + }, + { + "epoch": 1.3964944894436329, + "grad_norm": 1.3449494333614898, + "learning_rate": 4.584025163396323e-06, + "loss": 0.22679267823696136, + "step": 5259 + }, + { + "epoch": 1.3967600584251758, + "grad_norm": 1.4665032620533849, + "learning_rate": 4.580334641539467e-06, + "loss": 0.2743435204029083, + "step": 5260 + }, + { + "epoch": 1.3970256274067188, + "grad_norm": 1.166091966014122, + "learning_rate": 4.5766451645301735e-06, + "loss": 0.22738990187644958, + "step": 5261 + }, + { + "epoch": 1.3972911963882617, + "grad_norm": 1.2398512539901747, + "learning_rate": 4.57295673307973e-06, + "loss": 0.24826082587242126, + "step": 5262 + }, + { + "epoch": 1.3975567653698049, + "grad_norm": 1.2172880570038314, + "learning_rate": 4.569269347899222e-06, + "loss": 0.23121042549610138, + "step": 5263 + }, + { + "epoch": 1.3978223343513478, + "grad_norm": 2.1881918032824443, + "learning_rate": 4.5655830096995345e-06, + "loss": 0.21382957696914673, + "step": 5264 + }, + { + "epoch": 1.3980879033328908, + "grad_norm": 1.6700623666107715, + "learning_rate": 4.561897719191349e-06, + "loss": 0.24439184367656708, + "step": 5265 + }, + { + "epoch": 1.3983534723144337, + "grad_norm": 1.1734120938371422, + "learning_rate": 4.558213477085148e-06, + "loss": 0.2106003314256668, + "step": 5266 + }, + { + "epoch": 1.3986190412959767, + "grad_norm": 1.568387486793487, + "learning_rate": 4.554530284091209e-06, + "loss": 0.3073291480541229, + "step": 5267 + }, + { + "epoch": 1.3988846102775196, + "grad_norm": 1.226744359266016, + "learning_rate": 4.550848140919606e-06, + "loss": 0.2448226660490036, + "step": 5268 + }, + { + "epoch": 1.3991501792590626, + "grad_norm": 1.4434974870419186, + "learning_rate": 4.5471670482802165e-06, + "loss": 0.25378671288490295, + "step": 5269 + }, + { + "epoch": 1.3994157482406056, + "grad_norm": 1.243366792714921, + "learning_rate": 4.5434870068827086e-06, + "loss": 0.2735089659690857, + "step": 5270 + }, + { + "epoch": 1.3996813172221485, + "grad_norm": 1.3983115308066707, + "learning_rate": 4.539808017436552e-06, + "loss": 0.2530548870563507, + "step": 5271 + }, + { + "epoch": 1.3999468862036915, + "grad_norm": 1.2566722493021396, + "learning_rate": 4.536130080651015e-06, + "loss": 0.23692254722118378, + "step": 5272 + }, + { + "epoch": 1.4002124551852344, + "grad_norm": 1.257120121799197, + "learning_rate": 4.532453197235155e-06, + "loss": 0.24554882943630219, + "step": 5273 + }, + { + "epoch": 1.4004780241667774, + "grad_norm": 1.2106096425654094, + "learning_rate": 4.528777367897837e-06, + "loss": 0.20152084529399872, + "step": 5274 + }, + { + "epoch": 1.4007435931483203, + "grad_norm": 1.207683737630722, + "learning_rate": 4.525102593347714e-06, + "loss": 0.20908965170383453, + "step": 5275 + }, + { + "epoch": 1.4010091621298633, + "grad_norm": 1.2398706056963738, + "learning_rate": 4.521428874293238e-06, + "loss": 0.23158209025859833, + "step": 5276 + }, + { + "epoch": 1.4012747311114062, + "grad_norm": 1.2494835342931663, + "learning_rate": 4.517756211442664e-06, + "loss": 0.2483675330877304, + "step": 5277 + }, + { + "epoch": 1.4015403000929492, + "grad_norm": 1.1662936164598174, + "learning_rate": 4.514084605504035e-06, + "loss": 0.23435397446155548, + "step": 5278 + }, + { + "epoch": 1.4018058690744921, + "grad_norm": 1.242534131664269, + "learning_rate": 4.510414057185195e-06, + "loss": 0.2605316936969757, + "step": 5279 + }, + { + "epoch": 1.402071438056035, + "grad_norm": 1.148911142729499, + "learning_rate": 4.506744567193782e-06, + "loss": 0.2279929518699646, + "step": 5280 + }, + { + "epoch": 1.402337007037578, + "grad_norm": 1.1849060379752767, + "learning_rate": 4.503076136237228e-06, + "loss": 0.23011639714241028, + "step": 5281 + }, + { + "epoch": 1.402602576019121, + "grad_norm": 1.1735153050753564, + "learning_rate": 4.499408765022765e-06, + "loss": 0.213611900806427, + "step": 5282 + }, + { + "epoch": 1.402868145000664, + "grad_norm": 1.3225078215525052, + "learning_rate": 4.495742454257418e-06, + "loss": 0.25555503368377686, + "step": 5283 + }, + { + "epoch": 1.4031337139822069, + "grad_norm": 1.331030123703595, + "learning_rate": 4.4920772046480095e-06, + "loss": 0.2694614827632904, + "step": 5284 + }, + { + "epoch": 1.4033992829637498, + "grad_norm": 1.3958578164403037, + "learning_rate": 4.4884130169011565e-06, + "loss": 0.2160607874393463, + "step": 5285 + }, + { + "epoch": 1.4036648519452928, + "grad_norm": 1.4996515147203022, + "learning_rate": 4.48474989172327e-06, + "loss": 0.2556128203868866, + "step": 5286 + }, + { + "epoch": 1.4039304209268357, + "grad_norm": 1.2506403611380352, + "learning_rate": 4.481087829820558e-06, + "loss": 0.2251313328742981, + "step": 5287 + }, + { + "epoch": 1.4041959899083787, + "grad_norm": 1.380992563161254, + "learning_rate": 4.477426831899024e-06, + "loss": 0.26856666803359985, + "step": 5288 + }, + { + "epoch": 1.4044615588899216, + "grad_norm": 1.2429158128712894, + "learning_rate": 4.473766898664464e-06, + "loss": 0.25573840737342834, + "step": 5289 + }, + { + "epoch": 1.4047271278714646, + "grad_norm": 1.2559748496125192, + "learning_rate": 4.4701080308224685e-06, + "loss": 0.26519301533699036, + "step": 5290 + }, + { + "epoch": 1.4049926968530075, + "grad_norm": 1.5959863642176566, + "learning_rate": 4.466450229078427e-06, + "loss": 0.2329619824886322, + "step": 5291 + }, + { + "epoch": 1.4052582658345505, + "grad_norm": 1.208485124140325, + "learning_rate": 4.4627934941375185e-06, + "loss": 0.2243901491165161, + "step": 5292 + }, + { + "epoch": 1.4055238348160934, + "grad_norm": 1.2042065274178317, + "learning_rate": 4.45913782670472e-06, + "loss": 0.22516998648643494, + "step": 5293 + }, + { + "epoch": 1.4057894037976364, + "grad_norm": 1.2427926273641645, + "learning_rate": 4.455483227484796e-06, + "loss": 0.25573113560676575, + "step": 5294 + }, + { + "epoch": 1.4060549727791793, + "grad_norm": 1.3935629686917204, + "learning_rate": 4.451829697182317e-06, + "loss": 0.2568536698818207, + "step": 5295 + }, + { + "epoch": 1.4063205417607223, + "grad_norm": 1.293797792298673, + "learning_rate": 4.448177236501638e-06, + "loss": 0.24510663747787476, + "step": 5296 + }, + { + "epoch": 1.4065861107422652, + "grad_norm": 1.3445763390180965, + "learning_rate": 4.444525846146911e-06, + "loss": 0.24890470504760742, + "step": 5297 + }, + { + "epoch": 1.4068516797238082, + "grad_norm": 1.3096169257052843, + "learning_rate": 4.440875526822081e-06, + "loss": 0.21442994475364685, + "step": 5298 + }, + { + "epoch": 1.4071172487053512, + "grad_norm": 1.2628911672392604, + "learning_rate": 4.437226279230884e-06, + "loss": 0.24281370639801025, + "step": 5299 + }, + { + "epoch": 1.407382817686894, + "grad_norm": 1.2336479145010515, + "learning_rate": 4.433578104076853e-06, + "loss": 0.19542500376701355, + "step": 5300 + }, + { + "epoch": 1.407648386668437, + "grad_norm": 1.256359230599367, + "learning_rate": 4.429931002063315e-06, + "loss": 0.22688990831375122, + "step": 5301 + }, + { + "epoch": 1.40791395564998, + "grad_norm": 1.3692436485711592, + "learning_rate": 4.42628497389339e-06, + "loss": 0.2520858347415924, + "step": 5302 + }, + { + "epoch": 1.408179524631523, + "grad_norm": 1.1723697651028326, + "learning_rate": 4.42264002026998e-06, + "loss": 0.237991064786911, + "step": 5303 + }, + { + "epoch": 1.408445093613066, + "grad_norm": 1.1277997255078087, + "learning_rate": 4.418996141895797e-06, + "loss": 0.20164436101913452, + "step": 5304 + }, + { + "epoch": 1.408710662594609, + "grad_norm": 1.2657361694815492, + "learning_rate": 4.415353339473338e-06, + "loss": 0.24009189009666443, + "step": 5305 + }, + { + "epoch": 1.408976231576152, + "grad_norm": 1.138145945953283, + "learning_rate": 4.411711613704889e-06, + "loss": 0.23170322179794312, + "step": 5306 + }, + { + "epoch": 1.409241800557695, + "grad_norm": 1.2244077415708243, + "learning_rate": 4.408070965292534e-06, + "loss": 0.2280617356300354, + "step": 5307 + }, + { + "epoch": 1.409507369539238, + "grad_norm": 1.2724409466040383, + "learning_rate": 4.404431394938145e-06, + "loss": 0.21982887387275696, + "step": 5308 + }, + { + "epoch": 1.409772938520781, + "grad_norm": 1.265647410959733, + "learning_rate": 4.40079290334339e-06, + "loss": 0.25295430421829224, + "step": 5309 + }, + { + "epoch": 1.4100385075023238, + "grad_norm": 1.1099961782761754, + "learning_rate": 4.397155491209727e-06, + "loss": 0.20109041035175323, + "step": 5310 + }, + { + "epoch": 1.4103040764838668, + "grad_norm": 1.3436616824827443, + "learning_rate": 4.393519159238405e-06, + "loss": 0.2487715482711792, + "step": 5311 + }, + { + "epoch": 1.4105696454654097, + "grad_norm": 1.1475311486694626, + "learning_rate": 4.389883908130465e-06, + "loss": 0.2031790167093277, + "step": 5312 + }, + { + "epoch": 1.4108352144469527, + "grad_norm": 1.277969729475343, + "learning_rate": 4.386249738586744e-06, + "loss": 0.23029211163520813, + "step": 5313 + }, + { + "epoch": 1.4111007834284957, + "grad_norm": 1.2100830863469687, + "learning_rate": 4.382616651307866e-06, + "loss": 0.23080995678901672, + "step": 5314 + }, + { + "epoch": 1.4113663524100386, + "grad_norm": 1.2376227742095711, + "learning_rate": 4.378984646994248e-06, + "loss": 0.2450534999370575, + "step": 5315 + }, + { + "epoch": 1.4116319213915816, + "grad_norm": 1.266655148641824, + "learning_rate": 4.375353726346094e-06, + "loss": 0.24349799752235413, + "step": 5316 + }, + { + "epoch": 1.4118974903731245, + "grad_norm": 1.2696628766548714, + "learning_rate": 4.371723890063411e-06, + "loss": 0.2431599199771881, + "step": 5317 + }, + { + "epoch": 1.4121630593546675, + "grad_norm": 1.3688178233929764, + "learning_rate": 4.368095138845978e-06, + "loss": 0.2051251232624054, + "step": 5318 + }, + { + "epoch": 1.4124286283362104, + "grad_norm": 1.1726447102511934, + "learning_rate": 4.36446747339338e-06, + "loss": 0.21346575021743774, + "step": 5319 + }, + { + "epoch": 1.4126941973177534, + "grad_norm": 1.2726406383058895, + "learning_rate": 4.360840894404989e-06, + "loss": 0.22193217277526855, + "step": 5320 + }, + { + "epoch": 1.4129597662992963, + "grad_norm": 1.2762131056761095, + "learning_rate": 4.357215402579961e-06, + "loss": 0.2112501859664917, + "step": 5321 + }, + { + "epoch": 1.4132253352808393, + "grad_norm": 1.1864412536946314, + "learning_rate": 4.3535909986172565e-06, + "loss": 0.2648766040802002, + "step": 5322 + }, + { + "epoch": 1.4134909042623822, + "grad_norm": 1.1533413783243194, + "learning_rate": 4.349967683215614e-06, + "loss": 0.22139690816402435, + "step": 5323 + }, + { + "epoch": 1.4137564732439252, + "grad_norm": 1.0259028802936685, + "learning_rate": 4.346345457073568e-06, + "loss": 0.21558481454849243, + "step": 5324 + }, + { + "epoch": 1.4140220422254681, + "grad_norm": 1.2763949378052617, + "learning_rate": 4.342724320889438e-06, + "loss": 0.2013886272907257, + "step": 5325 + }, + { + "epoch": 1.414287611207011, + "grad_norm": 1.2216640015824227, + "learning_rate": 4.3391042753613375e-06, + "loss": 0.2428729385137558, + "step": 5326 + }, + { + "epoch": 1.414553180188554, + "grad_norm": 1.2385329501903242, + "learning_rate": 4.3354853211871696e-06, + "loss": 0.20930354297161102, + "step": 5327 + }, + { + "epoch": 1.414818749170097, + "grad_norm": 1.1373474530618315, + "learning_rate": 4.331867459064623e-06, + "loss": 0.18988853693008423, + "step": 5328 + }, + { + "epoch": 1.41508431815164, + "grad_norm": 1.2833653393491664, + "learning_rate": 4.328250689691182e-06, + "loss": 0.24618801474571228, + "step": 5329 + }, + { + "epoch": 1.4153498871331829, + "grad_norm": 1.2635824567099267, + "learning_rate": 4.324635013764113e-06, + "loss": 0.23857265710830688, + "step": 5330 + }, + { + "epoch": 1.4156154561147258, + "grad_norm": 1.3200622076177175, + "learning_rate": 4.321020431980483e-06, + "loss": 0.21869014203548431, + "step": 5331 + }, + { + "epoch": 1.4158810250962688, + "grad_norm": 1.2317649692424293, + "learning_rate": 4.317406945037138e-06, + "loss": 0.2508969008922577, + "step": 5332 + }, + { + "epoch": 1.4161465940778117, + "grad_norm": 1.2114692744130235, + "learning_rate": 4.313794553630711e-06, + "loss": 0.2406233549118042, + "step": 5333 + }, + { + "epoch": 1.4164121630593547, + "grad_norm": 1.3314396378070763, + "learning_rate": 4.310183258457632e-06, + "loss": 0.2376224398612976, + "step": 5334 + }, + { + "epoch": 1.4166777320408976, + "grad_norm": 1.4802475566731417, + "learning_rate": 4.306573060214115e-06, + "loss": 0.2818688750267029, + "step": 5335 + }, + { + "epoch": 1.4169433010224406, + "grad_norm": 1.2248721858463099, + "learning_rate": 4.302963959596165e-06, + "loss": 0.2279777228832245, + "step": 5336 + }, + { + "epoch": 1.4172088700039835, + "grad_norm": 1.3681495314955672, + "learning_rate": 4.299355957299573e-06, + "loss": 0.2652052640914917, + "step": 5337 + }, + { + "epoch": 1.4174744389855265, + "grad_norm": 1.2814638931564002, + "learning_rate": 4.2957490540199185e-06, + "loss": 0.24415750801563263, + "step": 5338 + }, + { + "epoch": 1.4177400079670694, + "grad_norm": 1.2028147011593575, + "learning_rate": 4.292143250452569e-06, + "loss": 0.2318287044763565, + "step": 5339 + }, + { + "epoch": 1.4180055769486124, + "grad_norm": 1.1621443407054215, + "learning_rate": 4.288538547292685e-06, + "loss": 0.19914361834526062, + "step": 5340 + }, + { + "epoch": 1.4182711459301554, + "grad_norm": 1.2533818722517012, + "learning_rate": 4.2849349452352095e-06, + "loss": 0.22550678253173828, + "step": 5341 + }, + { + "epoch": 1.4185367149116983, + "grad_norm": 1.3481328868952585, + "learning_rate": 4.281332444974874e-06, + "loss": 0.25001436471939087, + "step": 5342 + }, + { + "epoch": 1.4188022838932413, + "grad_norm": 1.2557895781680242, + "learning_rate": 4.277731047206197e-06, + "loss": 0.24873407185077667, + "step": 5343 + }, + { + "epoch": 1.4190678528747842, + "grad_norm": 1.2532145662207181, + "learning_rate": 4.274130752623487e-06, + "loss": 0.25732600688934326, + "step": 5344 + }, + { + "epoch": 1.4193334218563272, + "grad_norm": 1.1956499236331526, + "learning_rate": 4.270531561920836e-06, + "loss": 0.1894054263830185, + "step": 5345 + }, + { + "epoch": 1.4195989908378701, + "grad_norm": 1.2861805940078326, + "learning_rate": 4.2669334757921284e-06, + "loss": 0.2632025480270386, + "step": 5346 + }, + { + "epoch": 1.419864559819413, + "grad_norm": 1.1223708980675566, + "learning_rate": 4.2633364949310315e-06, + "loss": 0.22106415033340454, + "step": 5347 + }, + { + "epoch": 1.420130128800956, + "grad_norm": 1.2191554963858982, + "learning_rate": 4.259740620031e-06, + "loss": 0.2246699184179306, + "step": 5348 + }, + { + "epoch": 1.420395697782499, + "grad_norm": 1.2377251567235985, + "learning_rate": 4.256145851785277e-06, + "loss": 0.2335890382528305, + "step": 5349 + }, + { + "epoch": 1.420661266764042, + "grad_norm": 1.3200881727026734, + "learning_rate": 4.252552190886892e-06, + "loss": 0.25485220551490784, + "step": 5350 + }, + { + "epoch": 1.4209268357455849, + "grad_norm": 1.406483107573335, + "learning_rate": 4.248959638028659e-06, + "loss": 0.26234719157218933, + "step": 5351 + }, + { + "epoch": 1.4211924047271278, + "grad_norm": 1.1946878328095272, + "learning_rate": 4.245368193903181e-06, + "loss": 0.22083795070648193, + "step": 5352 + }, + { + "epoch": 1.4214579737086708, + "grad_norm": 1.288602079194267, + "learning_rate": 4.241777859202846e-06, + "loss": 0.1886332929134369, + "step": 5353 + }, + { + "epoch": 1.4217235426902137, + "grad_norm": 1.506700165302322, + "learning_rate": 4.238188634619826e-06, + "loss": 0.26154160499572754, + "step": 5354 + }, + { + "epoch": 1.4219891116717567, + "grad_norm": 1.1472960297751262, + "learning_rate": 4.234600520846085e-06, + "loss": 0.24761158227920532, + "step": 5355 + }, + { + "epoch": 1.4222546806532996, + "grad_norm": 1.154393443673505, + "learning_rate": 4.2310135185733625e-06, + "loss": 0.20936736464500427, + "step": 5356 + }, + { + "epoch": 1.4225202496348426, + "grad_norm": 1.15600424022186, + "learning_rate": 4.227427628493198e-06, + "loss": 0.2173127979040146, + "step": 5357 + }, + { + "epoch": 1.4227858186163855, + "grad_norm": 1.217414245555098, + "learning_rate": 4.223842851296907e-06, + "loss": 0.2598559260368347, + "step": 5358 + }, + { + "epoch": 1.4230513875979285, + "grad_norm": 1.224021391863692, + "learning_rate": 4.22025918767559e-06, + "loss": 0.23701196908950806, + "step": 5359 + }, + { + "epoch": 1.4233169565794714, + "grad_norm": 1.2134140712383175, + "learning_rate": 4.216676638320135e-06, + "loss": 0.26052403450012207, + "step": 5360 + }, + { + "epoch": 1.4235825255610144, + "grad_norm": 1.2465682642545985, + "learning_rate": 4.213095203921217e-06, + "loss": 0.2464584857225418, + "step": 5361 + }, + { + "epoch": 1.4238480945425573, + "grad_norm": 1.2646547527576821, + "learning_rate": 4.209514885169294e-06, + "loss": 0.25889426469802856, + "step": 5362 + }, + { + "epoch": 1.4241136635241003, + "grad_norm": 1.2990812156107416, + "learning_rate": 4.2059356827546076e-06, + "loss": 0.26529380679130554, + "step": 5363 + }, + { + "epoch": 1.4243792325056432, + "grad_norm": 1.1509506747022789, + "learning_rate": 4.202357597367187e-06, + "loss": 0.2284630388021469, + "step": 5364 + }, + { + "epoch": 1.4246448014871862, + "grad_norm": 1.1509689814009059, + "learning_rate": 4.198780629696845e-06, + "loss": 0.2361873984336853, + "step": 5365 + }, + { + "epoch": 1.4249103704687291, + "grad_norm": 1.2489364054166838, + "learning_rate": 4.195204780433179e-06, + "loss": 0.2473624348640442, + "step": 5366 + }, + { + "epoch": 1.425175939450272, + "grad_norm": 1.2584581044476912, + "learning_rate": 4.19163005026557e-06, + "loss": 0.24852773547172546, + "step": 5367 + }, + { + "epoch": 1.425441508431815, + "grad_norm": 1.413523972125062, + "learning_rate": 4.188056439883183e-06, + "loss": 0.28409647941589355, + "step": 5368 + }, + { + "epoch": 1.425707077413358, + "grad_norm": 1.2672381227374172, + "learning_rate": 4.18448394997497e-06, + "loss": 0.2500985562801361, + "step": 5369 + }, + { + "epoch": 1.425972646394901, + "grad_norm": 1.2421534737421158, + "learning_rate": 4.1809125812296635e-06, + "loss": 0.23475977778434753, + "step": 5370 + }, + { + "epoch": 1.426238215376444, + "grad_norm": 1.3107626948919207, + "learning_rate": 4.177342334335782e-06, + "loss": 0.22925345599651337, + "step": 5371 + }, + { + "epoch": 1.4265037843579869, + "grad_norm": 1.1701714137905739, + "learning_rate": 4.173773209981627e-06, + "loss": 0.24463894963264465, + "step": 5372 + }, + { + "epoch": 1.4267693533395298, + "grad_norm": 1.2600839330793319, + "learning_rate": 4.170205208855281e-06, + "loss": 0.2451590746641159, + "step": 5373 + }, + { + "epoch": 1.4270349223210728, + "grad_norm": 1.192456234510782, + "learning_rate": 4.166638331644613e-06, + "loss": 0.21078437566757202, + "step": 5374 + }, + { + "epoch": 1.427300491302616, + "grad_norm": 1.1548728286132999, + "learning_rate": 4.163072579037279e-06, + "loss": 0.21466529369354248, + "step": 5375 + }, + { + "epoch": 1.4275660602841589, + "grad_norm": 1.3327200015078104, + "learning_rate": 4.159507951720713e-06, + "loss": 0.20103147625923157, + "step": 5376 + }, + { + "epoch": 1.4278316292657018, + "grad_norm": 1.2634022835060015, + "learning_rate": 4.15594445038213e-06, + "loss": 0.2618871331214905, + "step": 5377 + }, + { + "epoch": 1.4280971982472448, + "grad_norm": 1.314150540124243, + "learning_rate": 4.152382075708534e-06, + "loss": 0.2496388852596283, + "step": 5378 + }, + { + "epoch": 1.4283627672287877, + "grad_norm": 1.2776066314767451, + "learning_rate": 4.148820828386707e-06, + "loss": 0.2663899064064026, + "step": 5379 + }, + { + "epoch": 1.4286283362103307, + "grad_norm": 1.223751737565641, + "learning_rate": 4.145260709103216e-06, + "loss": 0.23617541790008545, + "step": 5380 + }, + { + "epoch": 1.4288939051918736, + "grad_norm": 1.2184450229688006, + "learning_rate": 4.141701718544411e-06, + "loss": 0.200006365776062, + "step": 5381 + }, + { + "epoch": 1.4291594741734166, + "grad_norm": 1.2899877428495155, + "learning_rate": 4.138143857396425e-06, + "loss": 0.22707203030586243, + "step": 5382 + }, + { + "epoch": 1.4294250431549596, + "grad_norm": 1.210998695531734, + "learning_rate": 4.134587126345162e-06, + "loss": 0.23903624713420868, + "step": 5383 + }, + { + "epoch": 1.4296906121365025, + "grad_norm": 1.56990305006701, + "learning_rate": 4.131031526076329e-06, + "loss": 0.2308908998966217, + "step": 5384 + }, + { + "epoch": 1.4299561811180455, + "grad_norm": 1.2125776866133393, + "learning_rate": 4.127477057275398e-06, + "loss": 0.18762601912021637, + "step": 5385 + }, + { + "epoch": 1.4302217500995884, + "grad_norm": 1.3670823879917342, + "learning_rate": 4.123923720627633e-06, + "loss": 0.281406044960022, + "step": 5386 + }, + { + "epoch": 1.4304873190811314, + "grad_norm": 1.24677960623226, + "learning_rate": 4.120371516818071e-06, + "loss": 0.24858589470386505, + "step": 5387 + }, + { + "epoch": 1.4307528880626743, + "grad_norm": 1.2017896897650255, + "learning_rate": 4.116820446531538e-06, + "loss": 0.22179371118545532, + "step": 5388 + }, + { + "epoch": 1.4310184570442173, + "grad_norm": 1.1523445225939053, + "learning_rate": 4.113270510452636e-06, + "loss": 0.22086869180202484, + "step": 5389 + }, + { + "epoch": 1.4312840260257602, + "grad_norm": 1.295626323300653, + "learning_rate": 4.109721709265753e-06, + "loss": 0.231503427028656, + "step": 5390 + }, + { + "epoch": 1.4315495950073032, + "grad_norm": 1.31237620612278, + "learning_rate": 4.106174043655054e-06, + "loss": 0.255252867937088, + "step": 5391 + }, + { + "epoch": 1.4318151639888461, + "grad_norm": 1.2773394357808008, + "learning_rate": 4.1026275143044854e-06, + "loss": 0.23336587846279144, + "step": 5392 + }, + { + "epoch": 1.432080732970389, + "grad_norm": 1.3267952754600625, + "learning_rate": 4.099082121897783e-06, + "loss": 0.2468583881855011, + "step": 5393 + }, + { + "epoch": 1.432346301951932, + "grad_norm": 1.2137255679394872, + "learning_rate": 4.095537867118452e-06, + "loss": 0.21211153268814087, + "step": 5394 + }, + { + "epoch": 1.432611870933475, + "grad_norm": 1.2552061461264346, + "learning_rate": 4.091994750649783e-06, + "loss": 0.23173204064369202, + "step": 5395 + }, + { + "epoch": 1.432877439915018, + "grad_norm": 1.2420339991667666, + "learning_rate": 4.088452773174853e-06, + "loss": 0.2606658935546875, + "step": 5396 + }, + { + "epoch": 1.4331430088965609, + "grad_norm": 1.2141954954044303, + "learning_rate": 4.084911935376502e-06, + "loss": 0.21198314428329468, + "step": 5397 + }, + { + "epoch": 1.4334085778781038, + "grad_norm": 1.273859413406427, + "learning_rate": 4.08137223793737e-06, + "loss": 0.216193288564682, + "step": 5398 + }, + { + "epoch": 1.4336741468596468, + "grad_norm": 1.3862686522767422, + "learning_rate": 4.077833681539866e-06, + "loss": 0.27767330408096313, + "step": 5399 + }, + { + "epoch": 1.4339397158411897, + "grad_norm": 1.193043888736233, + "learning_rate": 4.0742962668661826e-06, + "loss": 0.21584349870681763, + "step": 5400 + }, + { + "epoch": 1.4342052848227327, + "grad_norm": 1.2801175216615184, + "learning_rate": 4.070759994598288e-06, + "loss": 0.220070481300354, + "step": 5401 + }, + { + "epoch": 1.4344708538042756, + "grad_norm": 1.4276288870785, + "learning_rate": 4.067224865417941e-06, + "loss": 0.26035353541374207, + "step": 5402 + }, + { + "epoch": 1.4347364227858186, + "grad_norm": 1.1784144309393945, + "learning_rate": 4.063690880006671e-06, + "loss": 0.23704876005649567, + "step": 5403 + }, + { + "epoch": 1.4350019917673615, + "grad_norm": 1.2793709287846655, + "learning_rate": 4.060158039045785e-06, + "loss": 0.2345760464668274, + "step": 5404 + }, + { + "epoch": 1.4352675607489045, + "grad_norm": 1.2583985201804126, + "learning_rate": 4.056626343216377e-06, + "loss": 0.21307331323623657, + "step": 5405 + }, + { + "epoch": 1.4355331297304474, + "grad_norm": 1.2401804894465362, + "learning_rate": 4.053095793199313e-06, + "loss": 0.22029465436935425, + "step": 5406 + }, + { + "epoch": 1.4357986987119904, + "grad_norm": 1.3865770800537958, + "learning_rate": 4.049566389675244e-06, + "loss": 0.23419252038002014, + "step": 5407 + }, + { + "epoch": 1.4360642676935333, + "grad_norm": 1.2114754283066453, + "learning_rate": 4.046038133324595e-06, + "loss": 0.21648669242858887, + "step": 5408 + }, + { + "epoch": 1.4363298366750763, + "grad_norm": 1.3682353450989566, + "learning_rate": 4.042511024827573e-06, + "loss": 0.2343464195728302, + "step": 5409 + }, + { + "epoch": 1.4365954056566193, + "grad_norm": 1.28417678054491, + "learning_rate": 4.0389850648641615e-06, + "loss": 0.20108605921268463, + "step": 5410 + }, + { + "epoch": 1.4368609746381622, + "grad_norm": 1.2806759093192033, + "learning_rate": 4.0354602541141315e-06, + "loss": 0.21885806322097778, + "step": 5411 + }, + { + "epoch": 1.4371265436197052, + "grad_norm": 1.276580988371958, + "learning_rate": 4.031936593257017e-06, + "loss": 0.2382376492023468, + "step": 5412 + }, + { + "epoch": 1.437392112601248, + "grad_norm": 1.1333519329501958, + "learning_rate": 4.028414082972141e-06, + "loss": 0.21434128284454346, + "step": 5413 + }, + { + "epoch": 1.437657681582791, + "grad_norm": 1.2161992893188567, + "learning_rate": 4.024892723938601e-06, + "loss": 0.2345191240310669, + "step": 5414 + }, + { + "epoch": 1.437923250564334, + "grad_norm": 1.309666461481554, + "learning_rate": 4.021372516835273e-06, + "loss": 0.2478899210691452, + "step": 5415 + }, + { + "epoch": 1.438188819545877, + "grad_norm": 1.2593045594203824, + "learning_rate": 4.017853462340813e-06, + "loss": 0.21356827020645142, + "step": 5416 + }, + { + "epoch": 1.4384543885274201, + "grad_norm": 1.3891493537034765, + "learning_rate": 4.014335561133652e-06, + "loss": 0.26329827308654785, + "step": 5417 + }, + { + "epoch": 1.438719957508963, + "grad_norm": 1.3689872343615141, + "learning_rate": 4.010818813892e-06, + "loss": 0.25880998373031616, + "step": 5418 + }, + { + "epoch": 1.438985526490506, + "grad_norm": 1.2738388972586026, + "learning_rate": 4.007303221293844e-06, + "loss": 0.22749441862106323, + "step": 5419 + }, + { + "epoch": 1.439251095472049, + "grad_norm": 1.2267331489472144, + "learning_rate": 4.00378878401695e-06, + "loss": 0.2242615520954132, + "step": 5420 + }, + { + "epoch": 1.439516664453592, + "grad_norm": 1.168704950265394, + "learning_rate": 4.000275502738862e-06, + "loss": 0.19751839339733124, + "step": 5421 + }, + { + "epoch": 1.439782233435135, + "grad_norm": 1.4000090999513362, + "learning_rate": 3.996763378136895e-06, + "loss": 0.27319905161857605, + "step": 5422 + }, + { + "epoch": 1.4400478024166778, + "grad_norm": 1.1483039760635705, + "learning_rate": 3.993252410888149e-06, + "loss": 0.21676769852638245, + "step": 5423 + }, + { + "epoch": 1.4403133713982208, + "grad_norm": 1.222649759682682, + "learning_rate": 3.989742601669494e-06, + "loss": 0.22788718342781067, + "step": 5424 + }, + { + "epoch": 1.4405789403797638, + "grad_norm": 1.1800102666876688, + "learning_rate": 3.986233951157581e-06, + "loss": 0.23224875330924988, + "step": 5425 + }, + { + "epoch": 1.4408445093613067, + "grad_norm": 1.3242271211713557, + "learning_rate": 3.982726460028836e-06, + "loss": 0.23625247180461884, + "step": 5426 + }, + { + "epoch": 1.4411100783428497, + "grad_norm": 1.237043381628487, + "learning_rate": 3.979220128959463e-06, + "loss": 0.2092093527317047, + "step": 5427 + }, + { + "epoch": 1.4413756473243926, + "grad_norm": 1.164989095324882, + "learning_rate": 3.975714958625442e-06, + "loss": 0.22196070849895477, + "step": 5428 + }, + { + "epoch": 1.4416412163059356, + "grad_norm": 1.248575755705502, + "learning_rate": 3.972210949702525e-06, + "loss": 0.21276375651359558, + "step": 5429 + }, + { + "epoch": 1.4419067852874785, + "grad_norm": 1.2714203744447936, + "learning_rate": 3.968708102866247e-06, + "loss": 0.22150103747844696, + "step": 5430 + }, + { + "epoch": 1.4421723542690215, + "grad_norm": 1.2519929176778726, + "learning_rate": 3.965206418791914e-06, + "loss": 0.24529573321342468, + "step": 5431 + }, + { + "epoch": 1.4424379232505644, + "grad_norm": 1.3331662749929607, + "learning_rate": 3.961705898154609e-06, + "loss": 0.24349135160446167, + "step": 5432 + }, + { + "epoch": 1.4427034922321074, + "grad_norm": 1.3094668545917496, + "learning_rate": 3.9582065416291926e-06, + "loss": 0.23481428623199463, + "step": 5433 + }, + { + "epoch": 1.4429690612136503, + "grad_norm": 1.2664431166747565, + "learning_rate": 3.954708349890299e-06, + "loss": 0.2366936057806015, + "step": 5434 + }, + { + "epoch": 1.4432346301951933, + "grad_norm": 1.2699903819491114, + "learning_rate": 3.951211323612336e-06, + "loss": 0.24792322516441345, + "step": 5435 + }, + { + "epoch": 1.4435001991767362, + "grad_norm": 1.1943208090894295, + "learning_rate": 3.947715463469493e-06, + "loss": 0.22601652145385742, + "step": 5436 + }, + { + "epoch": 1.4437657681582792, + "grad_norm": 1.1333130191791405, + "learning_rate": 3.9442207701357235e-06, + "loss": 0.19603165984153748, + "step": 5437 + }, + { + "epoch": 1.4440313371398221, + "grad_norm": 1.26512939224431, + "learning_rate": 3.940727244284772e-06, + "loss": 0.22619353234767914, + "step": 5438 + }, + { + "epoch": 1.444296906121365, + "grad_norm": 1.3207139711857465, + "learning_rate": 3.937234886590146e-06, + "loss": 0.24836638569831848, + "step": 5439 + }, + { + "epoch": 1.444562475102908, + "grad_norm": 1.2114237797025103, + "learning_rate": 3.933743697725129e-06, + "loss": 0.21585768461227417, + "step": 5440 + }, + { + "epoch": 1.444828044084451, + "grad_norm": 1.2037953387653635, + "learning_rate": 3.930253678362784e-06, + "loss": 0.20876167714595795, + "step": 5441 + }, + { + "epoch": 1.445093613065994, + "grad_norm": 1.2825218153573943, + "learning_rate": 3.926764829175943e-06, + "loss": 0.24337999522686005, + "step": 5442 + }, + { + "epoch": 1.4453591820475369, + "grad_norm": 1.2238662957767994, + "learning_rate": 3.9232771508372155e-06, + "loss": 0.2511219084262848, + "step": 5443 + }, + { + "epoch": 1.4456247510290798, + "grad_norm": 1.2796769482653771, + "learning_rate": 3.919790644018986e-06, + "loss": 0.26257213950157166, + "step": 5444 + }, + { + "epoch": 1.4458903200106228, + "grad_norm": 1.3570371082898334, + "learning_rate": 3.91630530939341e-06, + "loss": 0.2720959782600403, + "step": 5445 + }, + { + "epoch": 1.4461558889921657, + "grad_norm": 1.2897968589877258, + "learning_rate": 3.912821147632421e-06, + "loss": 0.23849177360534668, + "step": 5446 + }, + { + "epoch": 1.4464214579737087, + "grad_norm": 1.2539273982781811, + "learning_rate": 3.909338159407722e-06, + "loss": 0.2366214245557785, + "step": 5447 + }, + { + "epoch": 1.4466870269552516, + "grad_norm": 1.21348130376658, + "learning_rate": 3.905856345390793e-06, + "loss": 0.21905584633350372, + "step": 5448 + }, + { + "epoch": 1.4469525959367946, + "grad_norm": 1.3001423574977207, + "learning_rate": 3.902375706252887e-06, + "loss": 0.23964065313339233, + "step": 5449 + }, + { + "epoch": 1.4472181649183375, + "grad_norm": 1.2161208716702177, + "learning_rate": 3.89889624266503e-06, + "loss": 0.22246500849723816, + "step": 5450 + }, + { + "epoch": 1.4474837338998805, + "grad_norm": 1.2845367508241097, + "learning_rate": 3.895417955298022e-06, + "loss": 0.22980710864067078, + "step": 5451 + }, + { + "epoch": 1.4477493028814234, + "grad_norm": 1.4690832477509688, + "learning_rate": 3.8919408448224346e-06, + "loss": 0.21276253461837769, + "step": 5452 + }, + { + "epoch": 1.4480148718629664, + "grad_norm": 1.3515036942552143, + "learning_rate": 3.888464911908616e-06, + "loss": 0.23925542831420898, + "step": 5453 + }, + { + "epoch": 1.4482804408445094, + "grad_norm": 1.1871457723177183, + "learning_rate": 3.884990157226683e-06, + "loss": 0.21528369188308716, + "step": 5454 + }, + { + "epoch": 1.4485460098260523, + "grad_norm": 1.2673056278722348, + "learning_rate": 3.8815165814465235e-06, + "loss": 0.24563542008399963, + "step": 5455 + }, + { + "epoch": 1.4488115788075953, + "grad_norm": 1.2561210989748839, + "learning_rate": 3.87804418523781e-06, + "loss": 0.2721150517463684, + "step": 5456 + }, + { + "epoch": 1.4490771477891382, + "grad_norm": 1.3721328159682122, + "learning_rate": 3.874572969269976e-06, + "loss": 0.23716527223587036, + "step": 5457 + }, + { + "epoch": 1.4493427167706812, + "grad_norm": 1.5185790933002854, + "learning_rate": 3.871102934212231e-06, + "loss": 0.2182254046201706, + "step": 5458 + }, + { + "epoch": 1.4496082857522241, + "grad_norm": 1.233204842662738, + "learning_rate": 3.867634080733557e-06, + "loss": 0.2179020643234253, + "step": 5459 + }, + { + "epoch": 1.449873854733767, + "grad_norm": 1.2633976965193632, + "learning_rate": 3.864166409502706e-06, + "loss": 0.22901684045791626, + "step": 5460 + }, + { + "epoch": 1.45013942371531, + "grad_norm": 1.209132482684757, + "learning_rate": 3.860699921188211e-06, + "loss": 0.2287352979183197, + "step": 5461 + }, + { + "epoch": 1.450404992696853, + "grad_norm": 1.214494370780124, + "learning_rate": 3.85723461645836e-06, + "loss": 0.2448873668909073, + "step": 5462 + }, + { + "epoch": 1.450670561678396, + "grad_norm": 1.323933009108344, + "learning_rate": 3.85377049598123e-06, + "loss": 0.2693510055541992, + "step": 5463 + }, + { + "epoch": 1.4509361306599389, + "grad_norm": 1.1826355120377283, + "learning_rate": 3.8503075604246554e-06, + "loss": 0.25414884090423584, + "step": 5464 + }, + { + "epoch": 1.4512016996414818, + "grad_norm": 1.3400776704302024, + "learning_rate": 3.846845810456258e-06, + "loss": 0.27798837423324585, + "step": 5465 + }, + { + "epoch": 1.4514672686230248, + "grad_norm": 1.3109571985733361, + "learning_rate": 3.8433852467434175e-06, + "loss": 0.23348593711853027, + "step": 5466 + }, + { + "epoch": 1.4517328376045677, + "grad_norm": 1.148921292979252, + "learning_rate": 3.839925869953292e-06, + "loss": 0.20993635058403015, + "step": 5467 + }, + { + "epoch": 1.4519984065861107, + "grad_norm": 1.1967150813107374, + "learning_rate": 3.836467680752808e-06, + "loss": 0.225263774394989, + "step": 5468 + }, + { + "epoch": 1.4522639755676536, + "grad_norm": 4.549069881323283, + "learning_rate": 3.833010679808662e-06, + "loss": 0.2481595277786255, + "step": 5469 + }, + { + "epoch": 1.4525295445491966, + "grad_norm": 1.098861894900169, + "learning_rate": 3.829554867787324e-06, + "loss": 0.20755310356616974, + "step": 5470 + }, + { + "epoch": 1.4527951135307395, + "grad_norm": 1.3031978879220207, + "learning_rate": 3.826100245355034e-06, + "loss": 0.22124455869197845, + "step": 5471 + }, + { + "epoch": 1.4530606825122825, + "grad_norm": 1.1779333046553406, + "learning_rate": 3.822646813177803e-06, + "loss": 0.23461398482322693, + "step": 5472 + }, + { + "epoch": 1.4533262514938254, + "grad_norm": 1.123494857736561, + "learning_rate": 3.819194571921407e-06, + "loss": 0.22890526056289673, + "step": 5473 + }, + { + "epoch": 1.4535918204753684, + "grad_norm": 1.1163449125196687, + "learning_rate": 3.815743522251406e-06, + "loss": 0.23236533999443054, + "step": 5474 + }, + { + "epoch": 1.4538573894569113, + "grad_norm": 1.204733497516731, + "learning_rate": 3.8122936648331164e-06, + "loss": 0.2192365825176239, + "step": 5475 + }, + { + "epoch": 1.4541229584384543, + "grad_norm": 1.3061324350348682, + "learning_rate": 3.8088450003316346e-06, + "loss": 0.23970162868499756, + "step": 5476 + }, + { + "epoch": 1.4543885274199972, + "grad_norm": 1.256131451943752, + "learning_rate": 3.8053975294118163e-06, + "loss": 0.24270984530448914, + "step": 5477 + }, + { + "epoch": 1.4546540964015402, + "grad_norm": 1.1616491435133687, + "learning_rate": 3.801951252738295e-06, + "loss": 0.22228944301605225, + "step": 5478 + }, + { + "epoch": 1.4549196653830831, + "grad_norm": 1.2998939083384287, + "learning_rate": 3.7985061709754735e-06, + "loss": 0.25029584765434265, + "step": 5479 + }, + { + "epoch": 1.455185234364626, + "grad_norm": 1.1546196330858232, + "learning_rate": 3.795062284787522e-06, + "loss": 0.23831725120544434, + "step": 5480 + }, + { + "epoch": 1.455450803346169, + "grad_norm": 1.2698177511587796, + "learning_rate": 3.7916195948383817e-06, + "loss": 0.2571605145931244, + "step": 5481 + }, + { + "epoch": 1.455716372327712, + "grad_norm": 1.4321109332673951, + "learning_rate": 3.7881781017917586e-06, + "loss": 0.2660857141017914, + "step": 5482 + }, + { + "epoch": 1.455981941309255, + "grad_norm": 1.3406733437493707, + "learning_rate": 3.7847378063111394e-06, + "loss": 0.2468302845954895, + "step": 5483 + }, + { + "epoch": 1.456247510290798, + "grad_norm": 1.363296358111954, + "learning_rate": 3.7812987090597696e-06, + "loss": 0.2559482753276825, + "step": 5484 + }, + { + "epoch": 1.4565130792723409, + "grad_norm": 1.2144737578388247, + "learning_rate": 3.7778608107006654e-06, + "loss": 0.24484393000602722, + "step": 5485 + }, + { + "epoch": 1.4567786482538838, + "grad_norm": 1.1782087302857855, + "learning_rate": 3.774424111896614e-06, + "loss": 0.2376541644334793, + "step": 5486 + }, + { + "epoch": 1.4570442172354268, + "grad_norm": 1.1748479481028287, + "learning_rate": 3.770988613310169e-06, + "loss": 0.22265875339508057, + "step": 5487 + }, + { + "epoch": 1.45730978621697, + "grad_norm": 1.2316185421612622, + "learning_rate": 3.7675543156036555e-06, + "loss": 0.2511552572250366, + "step": 5488 + }, + { + "epoch": 1.457575355198513, + "grad_norm": 1.2601957381413438, + "learning_rate": 3.764121219439165e-06, + "loss": 0.2412843108177185, + "step": 5489 + }, + { + "epoch": 1.4578409241800558, + "grad_norm": 1.2622123015546969, + "learning_rate": 3.760689325478559e-06, + "loss": 0.26342809200286865, + "step": 5490 + }, + { + "epoch": 1.4581064931615988, + "grad_norm": 1.2994089172948287, + "learning_rate": 3.7572586343834638e-06, + "loss": 0.23315641283988953, + "step": 5491 + }, + { + "epoch": 1.4583720621431417, + "grad_norm": 1.0927170518216454, + "learning_rate": 3.753829146815279e-06, + "loss": 0.24148929119110107, + "step": 5492 + }, + { + "epoch": 1.4586376311246847, + "grad_norm": 1.363697618202234, + "learning_rate": 3.750400863435166e-06, + "loss": 0.22838115692138672, + "step": 5493 + }, + { + "epoch": 1.4589032001062276, + "grad_norm": 1.2083898158968958, + "learning_rate": 3.746973784904061e-06, + "loss": 0.21669608354568481, + "step": 5494 + }, + { + "epoch": 1.4591687690877706, + "grad_norm": 1.4819576271076944, + "learning_rate": 3.743547911882662e-06, + "loss": 0.25619322061538696, + "step": 5495 + }, + { + "epoch": 1.4594343380693136, + "grad_norm": 1.2058542987095502, + "learning_rate": 3.7401232450314384e-06, + "loss": 0.23629480600357056, + "step": 5496 + }, + { + "epoch": 1.4596999070508565, + "grad_norm": 1.189438722154431, + "learning_rate": 3.7366997850106245e-06, + "loss": 0.21799582242965698, + "step": 5497 + }, + { + "epoch": 1.4599654760323995, + "grad_norm": 1.372571579127378, + "learning_rate": 3.733277532480223e-06, + "loss": 0.2582590579986572, + "step": 5498 + }, + { + "epoch": 1.4602310450139424, + "grad_norm": 1.1675281771435806, + "learning_rate": 3.729856488100003e-06, + "loss": 0.23641736805438995, + "step": 5499 + }, + { + "epoch": 1.4604966139954854, + "grad_norm": 1.3024331747300109, + "learning_rate": 3.7264366525295e-06, + "loss": 0.24150417745113373, + "step": 5500 + }, + { + "epoch": 1.4607621829770283, + "grad_norm": 1.2012687985267718, + "learning_rate": 3.7230180264280245e-06, + "loss": 0.2474009394645691, + "step": 5501 + }, + { + "epoch": 1.4610277519585713, + "grad_norm": 1.3411668359609863, + "learning_rate": 3.7196006104546435e-06, + "loss": 0.269604355096817, + "step": 5502 + }, + { + "epoch": 1.4612933209401142, + "grad_norm": 1.3014753471077654, + "learning_rate": 3.716184405268194e-06, + "loss": 0.24324679374694824, + "step": 5503 + }, + { + "epoch": 1.4615588899216572, + "grad_norm": 1.1306865007600708, + "learning_rate": 3.7127694115272805e-06, + "loss": 0.2249709963798523, + "step": 5504 + }, + { + "epoch": 1.4618244589032001, + "grad_norm": 1.2915165646779034, + "learning_rate": 3.7093556298902734e-06, + "loss": 0.2560918629169464, + "step": 5505 + }, + { + "epoch": 1.462090027884743, + "grad_norm": 1.154084739271703, + "learning_rate": 3.705943061015309e-06, + "loss": 0.22693020105361938, + "step": 5506 + }, + { + "epoch": 1.462355596866286, + "grad_norm": 1.2640727525169442, + "learning_rate": 3.702531705560292e-06, + "loss": 0.2617371678352356, + "step": 5507 + }, + { + "epoch": 1.462621165847829, + "grad_norm": 1.2561844307954502, + "learning_rate": 3.6991215641828903e-06, + "loss": 0.2314397394657135, + "step": 5508 + }, + { + "epoch": 1.462886734829372, + "grad_norm": 1.1063207547372251, + "learning_rate": 3.6957126375405383e-06, + "loss": 0.23186162114143372, + "step": 5509 + }, + { + "epoch": 1.4631523038109149, + "grad_norm": 1.2602306615156422, + "learning_rate": 3.6923049262904375e-06, + "loss": 0.21775083243846893, + "step": 5510 + }, + { + "epoch": 1.4634178727924578, + "grad_norm": 1.2619669881473867, + "learning_rate": 3.688898431089556e-06, + "loss": 0.24707889556884766, + "step": 5511 + }, + { + "epoch": 1.4636834417740008, + "grad_norm": 1.0923805026421214, + "learning_rate": 3.6854931525946237e-06, + "loss": 0.1941150575876236, + "step": 5512 + }, + { + "epoch": 1.4639490107555437, + "grad_norm": 1.0123090946182933, + "learning_rate": 3.6820890914621376e-06, + "loss": 0.17808857560157776, + "step": 5513 + }, + { + "epoch": 1.4642145797370867, + "grad_norm": 1.2139965705715394, + "learning_rate": 3.678686248348363e-06, + "loss": 0.2150077074766159, + "step": 5514 + }, + { + "epoch": 1.4644801487186296, + "grad_norm": 1.4267562521267494, + "learning_rate": 3.6752846239093276e-06, + "loss": 0.2605292797088623, + "step": 5515 + }, + { + "epoch": 1.4647457177001726, + "grad_norm": 1.202920213288267, + "learning_rate": 3.671884218800822e-06, + "loss": 0.22481867671012878, + "step": 5516 + }, + { + "epoch": 1.4650112866817155, + "grad_norm": 5.588780783186036, + "learning_rate": 3.668485033678406e-06, + "loss": 0.24453294277191162, + "step": 5517 + }, + { + "epoch": 1.4652768556632585, + "grad_norm": 1.379432138271627, + "learning_rate": 3.6650870691973996e-06, + "loss": 0.2672286033630371, + "step": 5518 + }, + { + "epoch": 1.4655424246448014, + "grad_norm": 1.2625747265975353, + "learning_rate": 3.661690326012897e-06, + "loss": 0.2514987587928772, + "step": 5519 + }, + { + "epoch": 1.4658079936263444, + "grad_norm": 1.3337549906693908, + "learning_rate": 3.6582948047797438e-06, + "loss": 0.25671514868736267, + "step": 5520 + }, + { + "epoch": 1.4660735626078873, + "grad_norm": 1.3535247420304835, + "learning_rate": 3.654900506152561e-06, + "loss": 0.25485602021217346, + "step": 5521 + }, + { + "epoch": 1.4663391315894303, + "grad_norm": 1.1813027271086827, + "learning_rate": 3.6515074307857257e-06, + "loss": 0.23556292057037354, + "step": 5522 + }, + { + "epoch": 1.4666047005709733, + "grad_norm": 1.15604598759747, + "learning_rate": 3.6481155793333855e-06, + "loss": 0.23347696661949158, + "step": 5523 + }, + { + "epoch": 1.4668702695525162, + "grad_norm": 1.218328581124676, + "learning_rate": 3.6447249524494466e-06, + "loss": 0.2405884712934494, + "step": 5524 + }, + { + "epoch": 1.4671358385340592, + "grad_norm": 1.2423110513745568, + "learning_rate": 3.6413355507875845e-06, + "loss": 0.23668336868286133, + "step": 5525 + }, + { + "epoch": 1.467401407515602, + "grad_norm": 1.207526661238473, + "learning_rate": 3.6379473750012375e-06, + "loss": 0.25534945726394653, + "step": 5526 + }, + { + "epoch": 1.467666976497145, + "grad_norm": 1.267472887202726, + "learning_rate": 3.634560425743596e-06, + "loss": 0.22227410972118378, + "step": 5527 + }, + { + "epoch": 1.467932545478688, + "grad_norm": 1.4853214348875312, + "learning_rate": 3.631174703667636e-06, + "loss": 0.23395927250385284, + "step": 5528 + }, + { + "epoch": 1.468198114460231, + "grad_norm": 1.2396534638298151, + "learning_rate": 3.6277902094260785e-06, + "loss": 0.23419208824634552, + "step": 5529 + }, + { + "epoch": 1.4684636834417741, + "grad_norm": 1.3441597355302621, + "learning_rate": 3.6244069436714158e-06, + "loss": 0.22185654938220978, + "step": 5530 + }, + { + "epoch": 1.468729252423317, + "grad_norm": 1.2489989202798994, + "learning_rate": 3.621024907055901e-06, + "loss": 0.2705134153366089, + "step": 5531 + }, + { + "epoch": 1.46899482140486, + "grad_norm": 1.23195362246657, + "learning_rate": 3.617644100231551e-06, + "loss": 0.23426109552383423, + "step": 5532 + }, + { + "epoch": 1.469260390386403, + "grad_norm": 1.2477206941188708, + "learning_rate": 3.6142645238501462e-06, + "loss": 0.25527146458625793, + "step": 5533 + }, + { + "epoch": 1.469525959367946, + "grad_norm": 1.1030456616341389, + "learning_rate": 3.610886178563228e-06, + "loss": 0.1882668435573578, + "step": 5534 + }, + { + "epoch": 1.469791528349489, + "grad_norm": 1.2622509171219458, + "learning_rate": 3.607509065022101e-06, + "loss": 0.24060532450675964, + "step": 5535 + }, + { + "epoch": 1.4700570973310318, + "grad_norm": 1.2245038712856335, + "learning_rate": 3.6041331838778325e-06, + "loss": 0.23555803298950195, + "step": 5536 + }, + { + "epoch": 1.4703226663125748, + "grad_norm": 1.2192798079575136, + "learning_rate": 3.6007585357812557e-06, + "loss": 0.23126551508903503, + "step": 5537 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 1.139497037450913, + "learning_rate": 3.597385121382961e-06, + "loss": 0.24203836917877197, + "step": 5538 + }, + { + "epoch": 1.4708538042756607, + "grad_norm": 1.2467383616518404, + "learning_rate": 3.5940129413333046e-06, + "loss": 0.239767923951149, + "step": 5539 + }, + { + "epoch": 1.4711193732572037, + "grad_norm": 1.158137574546163, + "learning_rate": 3.5906419962824002e-06, + "loss": 0.24732957780361176, + "step": 5540 + }, + { + "epoch": 1.4713849422387466, + "grad_norm": 1.2722296085836442, + "learning_rate": 3.587272286880131e-06, + "loss": 0.2296421229839325, + "step": 5541 + }, + { + "epoch": 1.4716505112202896, + "grad_norm": 1.2453973567418024, + "learning_rate": 3.583903813776132e-06, + "loss": 0.2339775711297989, + "step": 5542 + }, + { + "epoch": 1.4719160802018325, + "grad_norm": 1.194940832073201, + "learning_rate": 3.5805365776198052e-06, + "loss": 0.230351984500885, + "step": 5543 + }, + { + "epoch": 1.4721816491833755, + "grad_norm": 1.2792126719917591, + "learning_rate": 3.5771705790603163e-06, + "loss": 0.2501414716243744, + "step": 5544 + }, + { + "epoch": 1.4724472181649184, + "grad_norm": 1.2327284472179139, + "learning_rate": 3.5738058187465864e-06, + "loss": 0.23387153446674347, + "step": 5545 + }, + { + "epoch": 1.4727127871464614, + "grad_norm": 1.2921618045206031, + "learning_rate": 3.570442297327307e-06, + "loss": 0.23874594271183014, + "step": 5546 + }, + { + "epoch": 1.4729783561280043, + "grad_norm": 1.2841826918754735, + "learning_rate": 3.5670800154509245e-06, + "loss": 0.21867451071739197, + "step": 5547 + }, + { + "epoch": 1.4732439251095473, + "grad_norm": 1.2937830650411482, + "learning_rate": 3.563718973765644e-06, + "loss": 0.24124100804328918, + "step": 5548 + }, + { + "epoch": 1.4735094940910902, + "grad_norm": 1.2156419794246578, + "learning_rate": 3.5603591729194377e-06, + "loss": 0.22185327112674713, + "step": 5549 + }, + { + "epoch": 1.4737750630726332, + "grad_norm": 1.1571779294098303, + "learning_rate": 3.5570006135600345e-06, + "loss": 0.21193793416023254, + "step": 5550 + }, + { + "epoch": 1.4740406320541761, + "grad_norm": 1.3939617841899903, + "learning_rate": 3.553643296334924e-06, + "loss": 0.2615143656730652, + "step": 5551 + }, + { + "epoch": 1.474306201035719, + "grad_norm": 1.1936451275051074, + "learning_rate": 3.5502872218913597e-06, + "loss": 0.24937541782855988, + "step": 5552 + }, + { + "epoch": 1.474571770017262, + "grad_norm": 1.0736225386439564, + "learning_rate": 3.5469323908763507e-06, + "loss": 0.22849224507808685, + "step": 5553 + }, + { + "epoch": 1.474837338998805, + "grad_norm": 1.6488166459783042, + "learning_rate": 3.5435788039366657e-06, + "loss": 0.2209717333316803, + "step": 5554 + }, + { + "epoch": 1.475102907980348, + "grad_norm": 1.2992665215674652, + "learning_rate": 3.5402264617188453e-06, + "loss": 0.2529235780239105, + "step": 5555 + }, + { + "epoch": 1.4753684769618909, + "grad_norm": 1.2133685762997675, + "learning_rate": 3.536875364869181e-06, + "loss": 0.2045450657606125, + "step": 5556 + }, + { + "epoch": 1.4756340459434338, + "grad_norm": 1.0591536248970717, + "learning_rate": 3.5335255140337167e-06, + "loss": 0.1973644196987152, + "step": 5557 + }, + { + "epoch": 1.4758996149249768, + "grad_norm": 1.3059187006673687, + "learning_rate": 3.5301769098582685e-06, + "loss": 0.27417299151420593, + "step": 5558 + }, + { + "epoch": 1.4761651839065197, + "grad_norm": 1.2500382678843112, + "learning_rate": 3.5268295529884077e-06, + "loss": 0.24541756510734558, + "step": 5559 + }, + { + "epoch": 1.4764307528880627, + "grad_norm": 1.4461383875060436, + "learning_rate": 3.5234834440694655e-06, + "loss": 0.25785958766937256, + "step": 5560 + }, + { + "epoch": 1.4766963218696056, + "grad_norm": 1.1676448271023605, + "learning_rate": 3.5201385837465307e-06, + "loss": 0.21099212765693665, + "step": 5561 + }, + { + "epoch": 1.4769618908511486, + "grad_norm": 1.1787333048605453, + "learning_rate": 3.5167949726644545e-06, + "loss": 0.26023173332214355, + "step": 5562 + }, + { + "epoch": 1.4772274598326915, + "grad_norm": 1.6670162101301063, + "learning_rate": 3.5134526114678426e-06, + "loss": 0.22882963716983795, + "step": 5563 + }, + { + "epoch": 1.4774930288142345, + "grad_norm": 1.312450944331431, + "learning_rate": 3.5101115008010677e-06, + "loss": 0.21987251937389374, + "step": 5564 + }, + { + "epoch": 1.4777585977957775, + "grad_norm": 1.163985983495263, + "learning_rate": 3.506771641308255e-06, + "loss": 0.2169610857963562, + "step": 5565 + }, + { + "epoch": 1.4780241667773204, + "grad_norm": 4.440133890295746, + "learning_rate": 3.50343303363329e-06, + "loss": 0.22723034024238586, + "step": 5566 + }, + { + "epoch": 1.4782897357588634, + "grad_norm": 1.2392064660120468, + "learning_rate": 3.5000956784198157e-06, + "loss": 0.23738276958465576, + "step": 5567 + }, + { + "epoch": 1.4785553047404063, + "grad_norm": 1.1818266174210303, + "learning_rate": 3.496759576311235e-06, + "loss": 0.19922251999378204, + "step": 5568 + }, + { + "epoch": 1.4788208737219493, + "grad_norm": 1.294067668946831, + "learning_rate": 3.4934247279507092e-06, + "loss": 0.22529268264770508, + "step": 5569 + }, + { + "epoch": 1.4790864427034922, + "grad_norm": 1.3551359298814187, + "learning_rate": 3.4900911339811583e-06, + "loss": 0.26758015155792236, + "step": 5570 + }, + { + "epoch": 1.4793520116850352, + "grad_norm": 1.2627897957153122, + "learning_rate": 3.48675879504526e-06, + "loss": 0.24752648174762726, + "step": 5571 + }, + { + "epoch": 1.4796175806665781, + "grad_norm": 1.3085621441307098, + "learning_rate": 3.483427711785449e-06, + "loss": 0.25337618589401245, + "step": 5572 + }, + { + "epoch": 1.479883149648121, + "grad_norm": 1.3543288061594618, + "learning_rate": 3.480097884843919e-06, + "loss": 0.24504786729812622, + "step": 5573 + }, + { + "epoch": 1.480148718629664, + "grad_norm": 1.1750849317955903, + "learning_rate": 3.4767693148626223e-06, + "loss": 0.21255145967006683, + "step": 5574 + }, + { + "epoch": 1.480414287611207, + "grad_norm": 1.2853041773936769, + "learning_rate": 3.473442002483267e-06, + "loss": 0.2501891553401947, + "step": 5575 + }, + { + "epoch": 1.48067985659275, + "grad_norm": 1.195974425335747, + "learning_rate": 3.4701159483473202e-06, + "loss": 0.25276634097099304, + "step": 5576 + }, + { + "epoch": 1.4809454255742929, + "grad_norm": 1.427206116406706, + "learning_rate": 3.4667911530960052e-06, + "loss": 0.2760567367076874, + "step": 5577 + }, + { + "epoch": 1.4812109945558358, + "grad_norm": 1.2442739080424003, + "learning_rate": 3.463467617370305e-06, + "loss": 0.22686481475830078, + "step": 5578 + }, + { + "epoch": 1.4814765635373788, + "grad_norm": 1.2374194002920247, + "learning_rate": 3.4601453418109554e-06, + "loss": 0.23262599110603333, + "step": 5579 + }, + { + "epoch": 1.4817421325189217, + "grad_norm": 1.2263890428702933, + "learning_rate": 3.4568243270584545e-06, + "loss": 0.22231365740299225, + "step": 5580 + }, + { + "epoch": 1.4820077015004647, + "grad_norm": 1.2193067799394695, + "learning_rate": 3.4535045737530504e-06, + "loss": 0.22237855195999146, + "step": 5581 + }, + { + "epoch": 1.4822732704820076, + "grad_norm": 1.208437884817879, + "learning_rate": 3.4501860825347587e-06, + "loss": 0.2260412871837616, + "step": 5582 + }, + { + "epoch": 1.4825388394635506, + "grad_norm": 1.3488909026023506, + "learning_rate": 3.4468688540433425e-06, + "loss": 0.2133496105670929, + "step": 5583 + }, + { + "epoch": 1.4828044084450935, + "grad_norm": 1.231358912436915, + "learning_rate": 3.4435528889183245e-06, + "loss": 0.24750375747680664, + "step": 5584 + }, + { + "epoch": 1.4830699774266365, + "grad_norm": 1.2053641188090713, + "learning_rate": 3.440238187798983e-06, + "loss": 0.23673412203788757, + "step": 5585 + }, + { + "epoch": 1.4833355464081794, + "grad_norm": 1.312048381493266, + "learning_rate": 3.436924751324354e-06, + "loss": 0.2505243420600891, + "step": 5586 + }, + { + "epoch": 1.4836011153897224, + "grad_norm": 1.2769153596955758, + "learning_rate": 3.433612580133229e-06, + "loss": 0.276151180267334, + "step": 5587 + }, + { + "epoch": 1.4838666843712653, + "grad_norm": 1.0245497892529305, + "learning_rate": 3.430301674864154e-06, + "loss": 0.1756816953420639, + "step": 5588 + }, + { + "epoch": 1.4841322533528083, + "grad_norm": 1.2667973514811224, + "learning_rate": 3.4269920361554342e-06, + "loss": 0.25901898741722107, + "step": 5589 + }, + { + "epoch": 1.4843978223343512, + "grad_norm": 1.2034260428652863, + "learning_rate": 3.4236836646451286e-06, + "loss": 0.21196085214614868, + "step": 5590 + }, + { + "epoch": 1.4846633913158942, + "grad_norm": 1.2887221468811698, + "learning_rate": 3.4203765609710525e-06, + "loss": 0.24153128266334534, + "step": 5591 + }, + { + "epoch": 1.4849289602974372, + "grad_norm": 1.2285562462634616, + "learning_rate": 3.4170707257707757e-06, + "loss": 0.25715887546539307, + "step": 5592 + }, + { + "epoch": 1.48519452927898, + "grad_norm": 1.430212837200284, + "learning_rate": 3.413766159681624e-06, + "loss": 0.2920379042625427, + "step": 5593 + }, + { + "epoch": 1.485460098260523, + "grad_norm": 1.2173970332611068, + "learning_rate": 3.41046286334068e-06, + "loss": 0.22127456963062286, + "step": 5594 + }, + { + "epoch": 1.485725667242066, + "grad_norm": 1.2534339617557788, + "learning_rate": 3.4071608373847786e-06, + "loss": 0.23103584349155426, + "step": 5595 + }, + { + "epoch": 1.485991236223609, + "grad_norm": 1.2999427041349472, + "learning_rate": 3.403860082450513e-06, + "loss": 0.29068222641944885, + "step": 5596 + }, + { + "epoch": 1.486256805205152, + "grad_norm": 1.2532608064541852, + "learning_rate": 3.4005605991742296e-06, + "loss": 0.23703888058662415, + "step": 5597 + }, + { + "epoch": 1.4865223741866949, + "grad_norm": 1.4039489349034764, + "learning_rate": 3.3972623881920296e-06, + "loss": 0.23348261415958405, + "step": 5598 + }, + { + "epoch": 1.4867879431682378, + "grad_norm": 1.1603139615742908, + "learning_rate": 3.3939654501397645e-06, + "loss": 0.24733223021030426, + "step": 5599 + }, + { + "epoch": 1.487053512149781, + "grad_norm": 1.1220204153088178, + "learning_rate": 3.3906697856530548e-06, + "loss": 0.22576835751533508, + "step": 5600 + }, + { + "epoch": 1.487319081131324, + "grad_norm": 1.1809335952834177, + "learning_rate": 3.3873753953672593e-06, + "loss": 0.20863527059555054, + "step": 5601 + }, + { + "epoch": 1.487584650112867, + "grad_norm": 1.1823379745083873, + "learning_rate": 3.384082279917499e-06, + "loss": 0.2299712598323822, + "step": 5602 + }, + { + "epoch": 1.4878502190944098, + "grad_norm": 1.1858521746021262, + "learning_rate": 3.380790439938648e-06, + "loss": 0.23058944940567017, + "step": 5603 + }, + { + "epoch": 1.4881157880759528, + "grad_norm": 1.1304663814123712, + "learning_rate": 3.3774998760653344e-06, + "loss": 0.20307201147079468, + "step": 5604 + }, + { + "epoch": 1.4883813570574957, + "grad_norm": 1.112411027996001, + "learning_rate": 3.3742105889319388e-06, + "loss": 0.2296266108751297, + "step": 5605 + }, + { + "epoch": 1.4886469260390387, + "grad_norm": 1.3206442060716181, + "learning_rate": 3.370922579172601e-06, + "loss": 0.22702309489250183, + "step": 5606 + }, + { + "epoch": 1.4889124950205816, + "grad_norm": 1.4590848907033545, + "learning_rate": 3.3676358474212035e-06, + "loss": 0.30432331562042236, + "step": 5607 + }, + { + "epoch": 1.4891780640021246, + "grad_norm": 1.201356120373459, + "learning_rate": 3.3643503943113907e-06, + "loss": 0.2488052248954773, + "step": 5608 + }, + { + "epoch": 1.4894436329836676, + "grad_norm": 1.2096846483257637, + "learning_rate": 3.361066220476564e-06, + "loss": 0.2221754938364029, + "step": 5609 + }, + { + "epoch": 1.4897092019652105, + "grad_norm": 1.289556223007011, + "learning_rate": 3.3577833265498728e-06, + "loss": 0.2547761797904968, + "step": 5610 + }, + { + "epoch": 1.4899747709467535, + "grad_norm": 1.3306628367975963, + "learning_rate": 3.3545017131642164e-06, + "loss": 0.21811938285827637, + "step": 5611 + }, + { + "epoch": 1.4902403399282964, + "grad_norm": 1.4022029015386877, + "learning_rate": 3.3512213809522554e-06, + "loss": 0.30436158180236816, + "step": 5612 + }, + { + "epoch": 1.4905059089098394, + "grad_norm": 1.2224150283856856, + "learning_rate": 3.3479423305463953e-06, + "loss": 0.2053622156381607, + "step": 5613 + }, + { + "epoch": 1.4907714778913823, + "grad_norm": 1.3026832238379669, + "learning_rate": 3.344664562578801e-06, + "loss": 0.2017601728439331, + "step": 5614 + }, + { + "epoch": 1.4910370468729253, + "grad_norm": 1.2856046275416113, + "learning_rate": 3.341388077681387e-06, + "loss": 0.23668046295642853, + "step": 5615 + }, + { + "epoch": 1.4913026158544682, + "grad_norm": 1.1460002150937032, + "learning_rate": 3.338112876485821e-06, + "loss": 0.20016951858997345, + "step": 5616 + }, + { + "epoch": 1.4915681848360112, + "grad_norm": 1.3606548245166536, + "learning_rate": 3.3348389596235177e-06, + "loss": 0.25477850437164307, + "step": 5617 + }, + { + "epoch": 1.4918337538175541, + "grad_norm": 1.2758175160721472, + "learning_rate": 3.3315663277256594e-06, + "loss": 0.24063366651535034, + "step": 5618 + }, + { + "epoch": 1.492099322799097, + "grad_norm": 1.2737128535751616, + "learning_rate": 3.328294981423165e-06, + "loss": 0.23443251848220825, + "step": 5619 + }, + { + "epoch": 1.49236489178064, + "grad_norm": 1.1580169148577781, + "learning_rate": 3.325024921346717e-06, + "loss": 0.21191264688968658, + "step": 5620 + }, + { + "epoch": 1.492630460762183, + "grad_norm": 1.213323558189925, + "learning_rate": 3.3217561481267367e-06, + "loss": 0.22062326967716217, + "step": 5621 + }, + { + "epoch": 1.492896029743726, + "grad_norm": 1.1757529457487401, + "learning_rate": 3.318488662393409e-06, + "loss": 0.2235480695962906, + "step": 5622 + }, + { + "epoch": 1.4931615987252689, + "grad_norm": 1.2611472240425432, + "learning_rate": 3.315222464776665e-06, + "loss": 0.26665517687797546, + "step": 5623 + }, + { + "epoch": 1.4934271677068118, + "grad_norm": 1.270220596773442, + "learning_rate": 3.3119575559061902e-06, + "loss": 0.24300602078437805, + "step": 5624 + }, + { + "epoch": 1.4936927366883548, + "grad_norm": 1.2622444254847978, + "learning_rate": 3.308693936411421e-06, + "loss": 0.25441884994506836, + "step": 5625 + }, + { + "epoch": 1.4939583056698977, + "grad_norm": 1.2781695234171213, + "learning_rate": 3.3054316069215407e-06, + "loss": 0.23236152529716492, + "step": 5626 + }, + { + "epoch": 1.4942238746514407, + "grad_norm": 1.2299113342509724, + "learning_rate": 3.3021705680654946e-06, + "loss": 0.24535568058490753, + "step": 5627 + }, + { + "epoch": 1.4944894436329836, + "grad_norm": 1.3635919919461823, + "learning_rate": 3.29891082047197e-06, + "loss": 0.2542986273765564, + "step": 5628 + }, + { + "epoch": 1.4947550126145266, + "grad_norm": 1.3442816383357798, + "learning_rate": 3.295652364769407e-06, + "loss": 0.26490268111228943, + "step": 5629 + }, + { + "epoch": 1.4950205815960695, + "grad_norm": 1.2455944135633985, + "learning_rate": 3.292395201585997e-06, + "loss": 0.25576913356781006, + "step": 5630 + }, + { + "epoch": 1.4952861505776125, + "grad_norm": 1.321982811797117, + "learning_rate": 3.2891393315496846e-06, + "loss": 0.2930823266506195, + "step": 5631 + }, + { + "epoch": 1.4955517195591554, + "grad_norm": 1.3029577245101889, + "learning_rate": 3.285884755288161e-06, + "loss": 0.2426074892282486, + "step": 5632 + }, + { + "epoch": 1.4958172885406984, + "grad_norm": 1.1912484566122454, + "learning_rate": 3.2826314734288713e-06, + "loss": 0.24090878665447235, + "step": 5633 + }, + { + "epoch": 1.4960828575222413, + "grad_norm": 1.291391881665867, + "learning_rate": 3.2793794865990092e-06, + "loss": 0.26155173778533936, + "step": 5634 + }, + { + "epoch": 1.4963484265037843, + "grad_norm": 1.2581171617638447, + "learning_rate": 3.2761287954255195e-06, + "loss": 0.2594009041786194, + "step": 5635 + }, + { + "epoch": 1.4966139954853273, + "grad_norm": 1.248912763921314, + "learning_rate": 3.2728794005350972e-06, + "loss": 0.24434763193130493, + "step": 5636 + }, + { + "epoch": 1.4968795644668702, + "grad_norm": 1.3459414061970596, + "learning_rate": 3.269631302554188e-06, + "loss": 0.2622208297252655, + "step": 5637 + }, + { + "epoch": 1.4971451334484132, + "grad_norm": 1.2222057610309294, + "learning_rate": 3.266384502108987e-06, + "loss": 0.18913154304027557, + "step": 5638 + }, + { + "epoch": 1.497410702429956, + "grad_norm": 1.260519406868159, + "learning_rate": 3.263138999825437e-06, + "loss": 0.2610907554626465, + "step": 5639 + }, + { + "epoch": 1.497676271411499, + "grad_norm": 1.2585537664404678, + "learning_rate": 3.2598947963292337e-06, + "loss": 0.25841569900512695, + "step": 5640 + }, + { + "epoch": 1.497941840393042, + "grad_norm": 1.1680179490188496, + "learning_rate": 3.256651892245822e-06, + "loss": 0.2066381573677063, + "step": 5641 + }, + { + "epoch": 1.4982074093745852, + "grad_norm": 1.1877407935219242, + "learning_rate": 3.253410288200396e-06, + "loss": 0.23956719040870667, + "step": 5642 + }, + { + "epoch": 1.4984729783561281, + "grad_norm": 1.1996406642135662, + "learning_rate": 3.250169984817897e-06, + "loss": 0.23999394476413727, + "step": 5643 + }, + { + "epoch": 1.498738547337671, + "grad_norm": 1.4056134439986134, + "learning_rate": 3.2469309827230156e-06, + "loss": 0.24273940920829773, + "step": 5644 + }, + { + "epoch": 1.499004116319214, + "grad_norm": 1.193555704549332, + "learning_rate": 3.2436932825401977e-06, + "loss": 0.2212621569633484, + "step": 5645 + }, + { + "epoch": 1.499269685300757, + "grad_norm": 1.293874995027958, + "learning_rate": 3.2404568848936325e-06, + "loss": 0.2487148940563202, + "step": 5646 + }, + { + "epoch": 1.4995352542823, + "grad_norm": 1.2610121684030642, + "learning_rate": 3.237221790407259e-06, + "loss": 0.29314422607421875, + "step": 5647 + }, + { + "epoch": 1.499800823263843, + "grad_norm": 1.1765702458871505, + "learning_rate": 3.233987999704763e-06, + "loss": 0.22727417945861816, + "step": 5648 + }, + { + "epoch": 1.5000663922453858, + "grad_norm": 1.1578089091098656, + "learning_rate": 3.230755513409585e-06, + "loss": 0.18877442181110382, + "step": 5649 + }, + { + "epoch": 1.5003319612269288, + "grad_norm": 1.2855274132536632, + "learning_rate": 3.2275243321449068e-06, + "loss": 0.2504552900791168, + "step": 5650 + }, + { + "epoch": 1.5005975302084718, + "grad_norm": 1.1905373910388852, + "learning_rate": 3.224294456533663e-06, + "loss": 0.23579174280166626, + "step": 5651 + }, + { + "epoch": 1.5008630991900147, + "grad_norm": 1.3692203179408873, + "learning_rate": 3.221065887198537e-06, + "loss": 0.29236793518066406, + "step": 5652 + }, + { + "epoch": 1.5011286681715577, + "grad_norm": 1.3245217175369617, + "learning_rate": 3.2178386247619577e-06, + "loss": 0.2735568881034851, + "step": 5653 + }, + { + "epoch": 1.5013942371531006, + "grad_norm": 1.240462888838021, + "learning_rate": 3.214612669846103e-06, + "loss": 0.2391616702079773, + "step": 5654 + }, + { + "epoch": 1.5016598061346436, + "grad_norm": 1.3766117264936455, + "learning_rate": 3.2113880230729e-06, + "loss": 0.24532485008239746, + "step": 5655 + }, + { + "epoch": 1.5019253751161865, + "grad_norm": 1.3310069624279295, + "learning_rate": 3.2081646850640215e-06, + "loss": 0.2605767250061035, + "step": 5656 + }, + { + "epoch": 1.5021909440977295, + "grad_norm": 1.2109489933208193, + "learning_rate": 3.2049426564408893e-06, + "loss": 0.2651350200176239, + "step": 5657 + }, + { + "epoch": 1.5024565130792724, + "grad_norm": 1.3305800775425032, + "learning_rate": 3.2017219378246734e-06, + "loss": 0.2719389498233795, + "step": 5658 + }, + { + "epoch": 1.5027220820608154, + "grad_norm": 1.2359239723239188, + "learning_rate": 3.198502529836288e-06, + "loss": 0.23077815771102905, + "step": 5659 + }, + { + "epoch": 1.5029876510423583, + "grad_norm": 1.0838054114896152, + "learning_rate": 3.1952844330964007e-06, + "loss": 0.21954959630966187, + "step": 5660 + }, + { + "epoch": 1.5032532200239013, + "grad_norm": 1.3480229773492907, + "learning_rate": 3.1920676482254186e-06, + "loss": 0.28229185938835144, + "step": 5661 + }, + { + "epoch": 1.5035187890054442, + "grad_norm": 1.2587796771658648, + "learning_rate": 3.1888521758435e-06, + "loss": 0.24612295627593994, + "step": 5662 + }, + { + "epoch": 1.5037843579869872, + "grad_norm": 1.2649379995915024, + "learning_rate": 3.185638016570555e-06, + "loss": 0.24191413819789886, + "step": 5663 + }, + { + "epoch": 1.5040499269685301, + "grad_norm": 1.225446339219085, + "learning_rate": 3.1824251710262323e-06, + "loss": 0.2427935004234314, + "step": 5664 + }, + { + "epoch": 1.504315495950073, + "grad_norm": 1.2595635392757376, + "learning_rate": 3.17921363982993e-06, + "loss": 0.2600318193435669, + "step": 5665 + }, + { + "epoch": 1.504581064931616, + "grad_norm": 1.2817020254494476, + "learning_rate": 3.1760034236007954e-06, + "loss": 0.25215205550193787, + "step": 5666 + }, + { + "epoch": 1.504846633913159, + "grad_norm": 1.2568573714231897, + "learning_rate": 3.1727945229577183e-06, + "loss": 0.24460548162460327, + "step": 5667 + }, + { + "epoch": 1.505112202894702, + "grad_norm": 1.2881955251422392, + "learning_rate": 3.169586938519338e-06, + "loss": 0.2812577486038208, + "step": 5668 + }, + { + "epoch": 1.5053777718762449, + "grad_norm": 1.1272225605105841, + "learning_rate": 3.166380670904039e-06, + "loss": 0.23297616839408875, + "step": 5669 + }, + { + "epoch": 1.5056433408577878, + "grad_norm": 1.1954331932042688, + "learning_rate": 3.163175720729954e-06, + "loss": 0.21659572422504425, + "step": 5670 + }, + { + "epoch": 1.5059089098393308, + "grad_norm": 1.2142230208725098, + "learning_rate": 3.1599720886149508e-06, + "loss": 0.22246181964874268, + "step": 5671 + }, + { + "epoch": 1.5061744788208737, + "grad_norm": 1.132636194795227, + "learning_rate": 3.1567697751766624e-06, + "loss": 0.20020918548107147, + "step": 5672 + }, + { + "epoch": 1.5064400478024167, + "grad_norm": 1.363041735701654, + "learning_rate": 3.1535687810324523e-06, + "loss": 0.25693628191947937, + "step": 5673 + }, + { + "epoch": 1.5067056167839596, + "grad_norm": 1.5250673507385644, + "learning_rate": 3.150369106799436e-06, + "loss": 0.21841923892498016, + "step": 5674 + }, + { + "epoch": 1.5069711857655026, + "grad_norm": 1.1710254495806258, + "learning_rate": 3.1471707530944707e-06, + "loss": 0.18131780624389648, + "step": 5675 + }, + { + "epoch": 1.5072367547470455, + "grad_norm": 1.180596749481675, + "learning_rate": 3.143973720534164e-06, + "loss": 0.22510449588298798, + "step": 5676 + }, + { + "epoch": 1.5075023237285885, + "grad_norm": 1.3952546557365002, + "learning_rate": 3.1407780097348627e-06, + "loss": 0.23721462488174438, + "step": 5677 + }, + { + "epoch": 1.5077678927101315, + "grad_norm": 1.2200574848273704, + "learning_rate": 3.1375836213126653e-06, + "loss": 0.24281899631023407, + "step": 5678 + }, + { + "epoch": 1.5080334616916744, + "grad_norm": 1.3211068465604292, + "learning_rate": 3.134390555883412e-06, + "loss": 0.23910081386566162, + "step": 5679 + }, + { + "epoch": 1.5082990306732174, + "grad_norm": 1.357027881520108, + "learning_rate": 3.1311988140626825e-06, + "loss": 0.2635132670402527, + "step": 5680 + }, + { + "epoch": 1.5085645996547603, + "grad_norm": 1.239638674575543, + "learning_rate": 3.1280083964658147e-06, + "loss": 0.24802634119987488, + "step": 5681 + }, + { + "epoch": 1.5088301686363033, + "grad_norm": 1.3861680174510138, + "learning_rate": 3.1248193037078823e-06, + "loss": 0.24081437289714813, + "step": 5682 + }, + { + "epoch": 1.5090957376178462, + "grad_norm": 1.2124748227090532, + "learning_rate": 3.121631536403701e-06, + "loss": 0.19550001621246338, + "step": 5683 + }, + { + "epoch": 1.5093613065993892, + "grad_norm": 1.309177755877421, + "learning_rate": 3.118445095167837e-06, + "loss": 0.2397807538509369, + "step": 5684 + }, + { + "epoch": 1.5096268755809321, + "grad_norm": 1.2243819490197418, + "learning_rate": 3.115259980614602e-06, + "loss": 0.2185651659965515, + "step": 5685 + }, + { + "epoch": 1.509892444562475, + "grad_norm": 1.2555724014592389, + "learning_rate": 3.1120761933580414e-06, + "loss": 0.22214055061340332, + "step": 5686 + }, + { + "epoch": 1.510158013544018, + "grad_norm": 1.4127254863789025, + "learning_rate": 3.108893734011955e-06, + "loss": 0.23971091210842133, + "step": 5687 + }, + { + "epoch": 1.510423582525561, + "grad_norm": 1.3331222718828735, + "learning_rate": 3.1057126031898843e-06, + "loss": 0.26458197832107544, + "step": 5688 + }, + { + "epoch": 1.510689151507104, + "grad_norm": 1.3487790050882777, + "learning_rate": 3.1025328015051093e-06, + "loss": 0.23730339109897614, + "step": 5689 + }, + { + "epoch": 1.5109547204886469, + "grad_norm": 1.2964784198979393, + "learning_rate": 3.0993543295706653e-06, + "loss": 0.21981677412986755, + "step": 5690 + }, + { + "epoch": 1.5112202894701898, + "grad_norm": 1.1812817656913812, + "learning_rate": 3.0961771879993206e-06, + "loss": 0.21984878182411194, + "step": 5691 + }, + { + "epoch": 1.5114858584517328, + "grad_norm": 1.2732802047873515, + "learning_rate": 3.093001377403592e-06, + "loss": 0.23086440563201904, + "step": 5692 + }, + { + "epoch": 1.5117514274332757, + "grad_norm": 2.3681680891314953, + "learning_rate": 3.0898268983957368e-06, + "loss": 0.2355024814605713, + "step": 5693 + }, + { + "epoch": 1.5120169964148187, + "grad_norm": 1.3061363772251866, + "learning_rate": 3.0866537515877584e-06, + "loss": 0.21210229396820068, + "step": 5694 + }, + { + "epoch": 1.5122825653963616, + "grad_norm": 1.3436771657394675, + "learning_rate": 3.0834819375914003e-06, + "loss": 0.2387622594833374, + "step": 5695 + }, + { + "epoch": 1.5125481343779046, + "grad_norm": 1.3482258979232278, + "learning_rate": 3.0803114570181527e-06, + "loss": 0.23822402954101562, + "step": 5696 + }, + { + "epoch": 1.5128137033594475, + "grad_norm": 1.3248058910768958, + "learning_rate": 3.0771423104792454e-06, + "loss": 0.26844173669815063, + "step": 5697 + }, + { + "epoch": 1.5130792723409905, + "grad_norm": 1.2131778927640824, + "learning_rate": 3.07397449858565e-06, + "loss": 0.23288767039775848, + "step": 5698 + }, + { + "epoch": 1.5133448413225334, + "grad_norm": 1.2716046597052009, + "learning_rate": 3.0708080219480896e-06, + "loss": 0.23273086547851562, + "step": 5699 + }, + { + "epoch": 1.5136104103040764, + "grad_norm": 1.4240236624695346, + "learning_rate": 3.067642881177023e-06, + "loss": 0.2505509555339813, + "step": 5700 + }, + { + "epoch": 1.5138759792856193, + "grad_norm": 1.1441752919653974, + "learning_rate": 3.0644790768826473e-06, + "loss": 0.22801508009433746, + "step": 5701 + }, + { + "epoch": 1.5141415482671623, + "grad_norm": 1.1462347465841034, + "learning_rate": 3.061316609674908e-06, + "loss": 0.2110593169927597, + "step": 5702 + }, + { + "epoch": 1.5144071172487052, + "grad_norm": 1.2145033288630525, + "learning_rate": 3.0581554801634927e-06, + "loss": 0.22201795876026154, + "step": 5703 + }, + { + "epoch": 1.5146726862302482, + "grad_norm": 1.2993896506173446, + "learning_rate": 3.054995688957829e-06, + "loss": 0.23104460537433624, + "step": 5704 + }, + { + "epoch": 1.5149382552117912, + "grad_norm": 1.5590161841107484, + "learning_rate": 3.0518372366670877e-06, + "loss": 0.23373261094093323, + "step": 5705 + }, + { + "epoch": 1.515203824193334, + "grad_norm": 1.368121139637646, + "learning_rate": 3.0486801239001806e-06, + "loss": 0.2404957264661789, + "step": 5706 + }, + { + "epoch": 1.515469393174877, + "grad_norm": 1.2346548477581518, + "learning_rate": 3.0455243512657606e-06, + "loss": 0.23209382593631744, + "step": 5707 + }, + { + "epoch": 1.51573496215642, + "grad_norm": 1.156984368318911, + "learning_rate": 3.042369919372228e-06, + "loss": 0.218237042427063, + "step": 5708 + }, + { + "epoch": 1.516000531137963, + "grad_norm": 12.380411974697722, + "learning_rate": 3.039216828827717e-06, + "loss": 0.25025027990341187, + "step": 5709 + }, + { + "epoch": 1.516266100119506, + "grad_norm": 1.3454644235463973, + "learning_rate": 3.036065080240106e-06, + "loss": 0.24729448556900024, + "step": 5710 + }, + { + "epoch": 1.5165316691010489, + "grad_norm": 1.246980236713752, + "learning_rate": 3.032914674217017e-06, + "loss": 0.23614796996116638, + "step": 5711 + }, + { + "epoch": 1.5167972380825918, + "grad_norm": 1.1947534591327391, + "learning_rate": 3.029765611365808e-06, + "loss": 0.2313452661037445, + "step": 5712 + }, + { + "epoch": 1.5170628070641348, + "grad_norm": 1.2169352172923076, + "learning_rate": 3.0266178922935842e-06, + "loss": 0.22152003645896912, + "step": 5713 + }, + { + "epoch": 1.5173283760456777, + "grad_norm": 1.3132034423317465, + "learning_rate": 3.0234715176071874e-06, + "loss": 0.25942179560661316, + "step": 5714 + }, + { + "epoch": 1.5175939450272207, + "grad_norm": 1.213532583392701, + "learning_rate": 3.0203264879132e-06, + "loss": 0.25030237436294556, + "step": 5715 + }, + { + "epoch": 1.5178595140087636, + "grad_norm": 1.212709044397772, + "learning_rate": 3.0171828038179497e-06, + "loss": 0.2025807797908783, + "step": 5716 + }, + { + "epoch": 1.5181250829903066, + "grad_norm": 1.3035190960753136, + "learning_rate": 3.014040465927499e-06, + "loss": 0.20455190539360046, + "step": 5717 + }, + { + "epoch": 1.5183906519718495, + "grad_norm": 1.2171025232725439, + "learning_rate": 3.010899474847655e-06, + "loss": 0.24197113513946533, + "step": 5718 + }, + { + "epoch": 1.5186562209533925, + "grad_norm": 1.243656057613246, + "learning_rate": 3.007759831183964e-06, + "loss": 0.22290384769439697, + "step": 5719 + }, + { + "epoch": 1.5189217899349357, + "grad_norm": 1.133911078511842, + "learning_rate": 3.0046215355417117e-06, + "loss": 0.23087520897388458, + "step": 5720 + }, + { + "epoch": 1.5191873589164786, + "grad_norm": 1.3329430419316783, + "learning_rate": 3.0014845885259236e-06, + "loss": 0.24425405263900757, + "step": 5721 + }, + { + "epoch": 1.5194529278980216, + "grad_norm": 1.310265396817766, + "learning_rate": 2.9983489907413675e-06, + "loss": 0.24888862669467926, + "step": 5722 + }, + { + "epoch": 1.5197184968795645, + "grad_norm": 1.3023172954247402, + "learning_rate": 2.9952147427925493e-06, + "loss": 0.23556756973266602, + "step": 5723 + }, + { + "epoch": 1.5199840658611075, + "grad_norm": 1.3924872169111115, + "learning_rate": 2.992081845283715e-06, + "loss": 0.2532619833946228, + "step": 5724 + }, + { + "epoch": 1.5202496348426504, + "grad_norm": 1.3351422936737996, + "learning_rate": 2.988950298818848e-06, + "loss": 0.2574974000453949, + "step": 5725 + }, + { + "epoch": 1.5205152038241934, + "grad_norm": 1.1244851887087242, + "learning_rate": 2.9858201040016775e-06, + "loss": 0.21997734904289246, + "step": 5726 + }, + { + "epoch": 1.5207807728057363, + "grad_norm": 1.3952335702566243, + "learning_rate": 2.982691261435666e-06, + "loss": 0.2174127697944641, + "step": 5727 + }, + { + "epoch": 1.5210463417872793, + "grad_norm": 1.4277294646697747, + "learning_rate": 2.979563771724019e-06, + "loss": 0.22455093264579773, + "step": 5728 + }, + { + "epoch": 1.5213119107688222, + "grad_norm": 1.2606427849530746, + "learning_rate": 2.976437635469678e-06, + "loss": 0.270727276802063, + "step": 5729 + }, + { + "epoch": 1.5215774797503652, + "grad_norm": 1.1901052998095392, + "learning_rate": 2.9733128532753254e-06, + "loss": 0.2233714610338211, + "step": 5730 + }, + { + "epoch": 1.5218430487319081, + "grad_norm": 1.364720864117707, + "learning_rate": 2.970189425743383e-06, + "loss": 0.23599566519260406, + "step": 5731 + }, + { + "epoch": 1.522108617713451, + "grad_norm": 1.2707197493270106, + "learning_rate": 2.967067353476011e-06, + "loss": 0.23598654568195343, + "step": 5732 + }, + { + "epoch": 1.522374186694994, + "grad_norm": 1.1793549120144597, + "learning_rate": 2.963946637075107e-06, + "loss": 0.205197274684906, + "step": 5733 + }, + { + "epoch": 1.522639755676537, + "grad_norm": 1.1887492971446227, + "learning_rate": 2.9608272771423073e-06, + "loss": 0.23581506311893463, + "step": 5734 + }, + { + "epoch": 1.52290532465808, + "grad_norm": 1.2937911951812968, + "learning_rate": 2.9577092742789915e-06, + "loss": 0.2088197022676468, + "step": 5735 + }, + { + "epoch": 1.5231708936396229, + "grad_norm": 1.2943182118738674, + "learning_rate": 2.95459262908627e-06, + "loss": 0.22607067227363586, + "step": 5736 + }, + { + "epoch": 1.5234364626211658, + "grad_norm": 1.1748118237242067, + "learning_rate": 2.951477342164998e-06, + "loss": 0.22242344915866852, + "step": 5737 + }, + { + "epoch": 1.5237020316027088, + "grad_norm": 1.3280405020263697, + "learning_rate": 2.9483634141157636e-06, + "loss": 0.25626271963119507, + "step": 5738 + }, + { + "epoch": 1.5239676005842517, + "grad_norm": 1.2212084732536523, + "learning_rate": 2.9452508455388975e-06, + "loss": 0.2241421341896057, + "step": 5739 + }, + { + "epoch": 1.5242331695657947, + "grad_norm": 1.5088982481303157, + "learning_rate": 2.9421396370344648e-06, + "loss": 0.2191103994846344, + "step": 5740 + }, + { + "epoch": 1.5244987385473376, + "grad_norm": 1.2411878451658047, + "learning_rate": 2.9390297892022703e-06, + "loss": 0.26252660155296326, + "step": 5741 + }, + { + "epoch": 1.5247643075288806, + "grad_norm": 1.3964551352557335, + "learning_rate": 2.9359213026418567e-06, + "loss": 0.21522507071495056, + "step": 5742 + }, + { + "epoch": 1.5250298765104235, + "grad_norm": 1.0905013771622027, + "learning_rate": 2.932814177952499e-06, + "loss": 0.20159044861793518, + "step": 5743 + }, + { + "epoch": 1.5252954454919665, + "grad_norm": 1.138416177249403, + "learning_rate": 2.929708415733221e-06, + "loss": 0.22679558396339417, + "step": 5744 + }, + { + "epoch": 1.5255610144735094, + "grad_norm": 1.199157018703913, + "learning_rate": 2.926604016582776e-06, + "loss": 0.2315664291381836, + "step": 5745 + }, + { + "epoch": 1.5258265834550524, + "grad_norm": 1.2568252329386058, + "learning_rate": 2.923500981099652e-06, + "loss": 0.229634091258049, + "step": 5746 + }, + { + "epoch": 1.5260921524365954, + "grad_norm": 1.2179751735416722, + "learning_rate": 2.9203993098820793e-06, + "loss": 0.20657674968242645, + "step": 5747 + }, + { + "epoch": 1.5263577214181385, + "grad_norm": 1.2447733239425043, + "learning_rate": 2.9172990035280237e-06, + "loss": 0.2306358814239502, + "step": 5748 + }, + { + "epoch": 1.5266232903996815, + "grad_norm": 1.2950411042959078, + "learning_rate": 2.9142000626351875e-06, + "loss": 0.2608031928539276, + "step": 5749 + }, + { + "epoch": 1.5268888593812244, + "grad_norm": 1.337100599856471, + "learning_rate": 2.911102487801013e-06, + "loss": 0.24675670266151428, + "step": 5750 + }, + { + "epoch": 1.5271544283627674, + "grad_norm": 1.3568337572597398, + "learning_rate": 2.908006279622667e-06, + "loss": 0.22544966638088226, + "step": 5751 + }, + { + "epoch": 1.5274199973443103, + "grad_norm": 1.3214418017258782, + "learning_rate": 2.904911438697071e-06, + "loss": 0.2328556478023529, + "step": 5752 + }, + { + "epoch": 1.5276855663258533, + "grad_norm": 1.25396823790717, + "learning_rate": 2.901817965620871e-06, + "loss": 0.2316005825996399, + "step": 5753 + }, + { + "epoch": 1.5279511353073962, + "grad_norm": 1.2976508240318196, + "learning_rate": 2.8987258609904522e-06, + "loss": 0.2332756370306015, + "step": 5754 + }, + { + "epoch": 1.5282167042889392, + "grad_norm": 1.3432276903845415, + "learning_rate": 2.8956351254019355e-06, + "loss": 0.24855142831802368, + "step": 5755 + }, + { + "epoch": 1.5284822732704821, + "grad_norm": 1.2138875439685706, + "learning_rate": 2.8925457594511775e-06, + "loss": 0.18745368719100952, + "step": 5756 + }, + { + "epoch": 1.528747842252025, + "grad_norm": 1.877743895818308, + "learning_rate": 2.889457763733774e-06, + "loss": 0.22402942180633545, + "step": 5757 + }, + { + "epoch": 1.529013411233568, + "grad_norm": 1.292567134146249, + "learning_rate": 2.886371138845051e-06, + "loss": 0.2156108319759369, + "step": 5758 + }, + { + "epoch": 1.529278980215111, + "grad_norm": 1.2848231417758293, + "learning_rate": 2.883285885380076e-06, + "loss": 0.22866520285606384, + "step": 5759 + }, + { + "epoch": 1.529544549196654, + "grad_norm": 1.2907471990668473, + "learning_rate": 2.880202003933645e-06, + "loss": 0.2486938238143921, + "step": 5760 + }, + { + "epoch": 1.529810118178197, + "grad_norm": 1.34098643692872, + "learning_rate": 2.877119495100301e-06, + "loss": 0.2565295696258545, + "step": 5761 + }, + { + "epoch": 1.5300756871597399, + "grad_norm": 1.1480290388256142, + "learning_rate": 2.8740383594743116e-06, + "loss": 0.21510455012321472, + "step": 5762 + }, + { + "epoch": 1.5303412561412828, + "grad_norm": 1.266250058472157, + "learning_rate": 2.8709585976496825e-06, + "loss": 0.2122025489807129, + "step": 5763 + }, + { + "epoch": 1.5306068251228258, + "grad_norm": 1.3017513152107745, + "learning_rate": 2.8678802102201575e-06, + "loss": 0.24274399876594543, + "step": 5764 + }, + { + "epoch": 1.5308723941043687, + "grad_norm": 1.4573413266326471, + "learning_rate": 2.864803197779216e-06, + "loss": 0.22325341403484344, + "step": 5765 + }, + { + "epoch": 1.5311379630859117, + "grad_norm": 1.3303976558080437, + "learning_rate": 2.8617275609200625e-06, + "loss": 0.25205284357070923, + "step": 5766 + }, + { + "epoch": 1.5314035320674546, + "grad_norm": 1.2638986714524767, + "learning_rate": 2.8586533002356465e-06, + "loss": 0.2047557830810547, + "step": 5767 + }, + { + "epoch": 1.5316691010489976, + "grad_norm": 1.2195584514594966, + "learning_rate": 2.8555804163186508e-06, + "loss": 0.2166992425918579, + "step": 5768 + }, + { + "epoch": 1.5319346700305405, + "grad_norm": 1.2333416807696795, + "learning_rate": 2.8525089097614867e-06, + "loss": 0.26253193616867065, + "step": 5769 + }, + { + "epoch": 1.5322002390120835, + "grad_norm": 1.2030637435961495, + "learning_rate": 2.8494387811563108e-06, + "loss": 0.23307687044143677, + "step": 5770 + }, + { + "epoch": 1.5324658079936264, + "grad_norm": 1.2191481171426857, + "learning_rate": 2.8463700310950047e-06, + "loss": 0.22128549218177795, + "step": 5771 + }, + { + "epoch": 1.5327313769751694, + "grad_norm": 1.272136705974986, + "learning_rate": 2.8433026601691883e-06, + "loss": 0.21966281533241272, + "step": 5772 + }, + { + "epoch": 1.5329969459567123, + "grad_norm": 1.341088625881783, + "learning_rate": 2.840236668970213e-06, + "loss": 0.22869305312633514, + "step": 5773 + }, + { + "epoch": 1.5332625149382553, + "grad_norm": 1.2257027323986465, + "learning_rate": 2.837172058089167e-06, + "loss": 0.21431279182434082, + "step": 5774 + }, + { + "epoch": 1.5335280839197982, + "grad_norm": 1.3512853622822856, + "learning_rate": 2.8341088281168693e-06, + "loss": 0.24610282480716705, + "step": 5775 + }, + { + "epoch": 1.5337936529013412, + "grad_norm": 1.3400303957635655, + "learning_rate": 2.8310469796438767e-06, + "loss": 0.24414925277233124, + "step": 5776 + }, + { + "epoch": 1.5340592218828841, + "grad_norm": 1.3597459613858938, + "learning_rate": 2.8279865132604766e-06, + "loss": 0.2330513596534729, + "step": 5777 + }, + { + "epoch": 1.534324790864427, + "grad_norm": 1.2551411616890042, + "learning_rate": 2.8249274295566863e-06, + "loss": 0.23048308491706848, + "step": 5778 + }, + { + "epoch": 1.53459035984597, + "grad_norm": 1.2566974883874766, + "learning_rate": 2.821869729122273e-06, + "loss": 0.2411375492811203, + "step": 5779 + }, + { + "epoch": 1.534855928827513, + "grad_norm": 1.384873838300398, + "learning_rate": 2.818813412546715e-06, + "loss": 0.22985543310642242, + "step": 5780 + }, + { + "epoch": 1.535121497809056, + "grad_norm": 1.320574666083159, + "learning_rate": 2.815758480419235e-06, + "loss": 0.20867247879505157, + "step": 5781 + }, + { + "epoch": 1.5353870667905989, + "grad_norm": 2.0414068761810182, + "learning_rate": 2.8127049333287913e-06, + "loss": 0.26378586888313293, + "step": 5782 + }, + { + "epoch": 1.5356526357721418, + "grad_norm": 1.552041032509997, + "learning_rate": 2.8096527718640687e-06, + "loss": 0.2690306305885315, + "step": 5783 + }, + { + "epoch": 1.5359182047536848, + "grad_norm": 1.1602606034579108, + "learning_rate": 2.8066019966134907e-06, + "loss": 0.22226165235042572, + "step": 5784 + }, + { + "epoch": 1.5361837737352277, + "grad_norm": 1.2201060637055436, + "learning_rate": 2.803552608165209e-06, + "loss": 0.23370322585105896, + "step": 5785 + }, + { + "epoch": 1.5364493427167707, + "grad_norm": 1.3067141176486328, + "learning_rate": 2.8005046071071107e-06, + "loss": 0.26137909293174744, + "step": 5786 + }, + { + "epoch": 1.5367149116983136, + "grad_norm": 1.3588127622676833, + "learning_rate": 2.7974579940268096e-06, + "loss": 0.22630617022514343, + "step": 5787 + }, + { + "epoch": 1.5369804806798566, + "grad_norm": 1.2356618590652273, + "learning_rate": 2.7944127695116663e-06, + "loss": 0.22641140222549438, + "step": 5788 + }, + { + "epoch": 1.5372460496613995, + "grad_norm": 1.266648551925957, + "learning_rate": 2.791368934148757e-06, + "loss": 0.19647541642189026, + "step": 5789 + }, + { + "epoch": 1.5375116186429425, + "grad_norm": 1.212906210017999, + "learning_rate": 2.788326488524901e-06, + "loss": 0.22399532794952393, + "step": 5790 + }, + { + "epoch": 1.5377771876244855, + "grad_norm": 1.2862970389756843, + "learning_rate": 2.7852854332266434e-06, + "loss": 0.22549685835838318, + "step": 5791 + }, + { + "epoch": 1.5380427566060284, + "grad_norm": 1.168406987557996, + "learning_rate": 2.7822457688402637e-06, + "loss": 0.2129821628332138, + "step": 5792 + }, + { + "epoch": 1.5383083255875714, + "grad_norm": 1.2301298306170827, + "learning_rate": 2.7792074959517755e-06, + "loss": 0.25330638885498047, + "step": 5793 + }, + { + "epoch": 1.5385738945691143, + "grad_norm": 1.3148661968254225, + "learning_rate": 2.7761706151469204e-06, + "loss": 0.2413945198059082, + "step": 5794 + }, + { + "epoch": 1.5388394635506573, + "grad_norm": 1.2551515744231165, + "learning_rate": 2.773135127011174e-06, + "loss": 0.21930523216724396, + "step": 5795 + }, + { + "epoch": 1.5391050325322002, + "grad_norm": 1.2506577052831476, + "learning_rate": 2.7701010321297416e-06, + "loss": 0.25499141216278076, + "step": 5796 + }, + { + "epoch": 1.5393706015137432, + "grad_norm": 1.1567311669751301, + "learning_rate": 2.7670683310875613e-06, + "loss": 0.19475680589675903, + "step": 5797 + }, + { + "epoch": 1.5396361704952861, + "grad_norm": 1.3159422945276043, + "learning_rate": 2.7640370244693026e-06, + "loss": 0.22155825793743134, + "step": 5798 + }, + { + "epoch": 1.539901739476829, + "grad_norm": 1.1818601031709017, + "learning_rate": 2.761007112859365e-06, + "loss": 0.2146138846874237, + "step": 5799 + }, + { + "epoch": 1.540167308458372, + "grad_norm": 1.146035478957987, + "learning_rate": 2.7579785968418804e-06, + "loss": 0.22698411345481873, + "step": 5800 + }, + { + "epoch": 1.540432877439915, + "grad_norm": 1.2904710642906891, + "learning_rate": 2.75495147700071e-06, + "loss": 0.23889532685279846, + "step": 5801 + }, + { + "epoch": 1.540698446421458, + "grad_norm": 1.2353012354195356, + "learning_rate": 2.7519257539194488e-06, + "loss": 0.2514609694480896, + "step": 5802 + }, + { + "epoch": 1.5409640154030009, + "grad_norm": 1.2405153867334813, + "learning_rate": 2.7489014281814185e-06, + "loss": 0.22332100570201874, + "step": 5803 + }, + { + "epoch": 1.5412295843845438, + "grad_norm": 1.1768236369414826, + "learning_rate": 2.745878500369673e-06, + "loss": 0.21316683292388916, + "step": 5804 + }, + { + "epoch": 1.5414951533660868, + "grad_norm": 1.2446325297163028, + "learning_rate": 2.742856971066996e-06, + "loss": 0.2228018194437027, + "step": 5805 + }, + { + "epoch": 1.5417607223476297, + "grad_norm": 1.3243067869686356, + "learning_rate": 2.7398368408559084e-06, + "loss": 0.22217239439487457, + "step": 5806 + }, + { + "epoch": 1.5420262913291727, + "grad_norm": 1.331116794742511, + "learning_rate": 2.736818110318652e-06, + "loss": 0.21147233247756958, + "step": 5807 + }, + { + "epoch": 1.5422918603107156, + "grad_norm": 1.2851526092309566, + "learning_rate": 2.7338007800372024e-06, + "loss": 0.23844698071479797, + "step": 5808 + }, + { + "epoch": 1.5425574292922586, + "grad_norm": 1.3238454632326748, + "learning_rate": 2.7307848505932653e-06, + "loss": 0.2361423820257187, + "step": 5809 + }, + { + "epoch": 1.5428229982738015, + "grad_norm": 1.1977956377916248, + "learning_rate": 2.727770322568277e-06, + "loss": 0.21585656702518463, + "step": 5810 + }, + { + "epoch": 1.5430885672553445, + "grad_norm": 1.172295737533699, + "learning_rate": 2.724757196543403e-06, + "loss": 0.233969584107399, + "step": 5811 + }, + { + "epoch": 1.5433541362368874, + "grad_norm": 1.3309852612756656, + "learning_rate": 2.7217454730995363e-06, + "loss": 0.25040164589881897, + "step": 5812 + }, + { + "epoch": 1.5436197052184304, + "grad_norm": 1.5198455877328005, + "learning_rate": 2.7187351528173046e-06, + "loss": 0.25848713517189026, + "step": 5813 + }, + { + "epoch": 1.5438852741999733, + "grad_norm": 1.409976572144199, + "learning_rate": 2.715726236277061e-06, + "loss": 0.22255051136016846, + "step": 5814 + }, + { + "epoch": 1.5441508431815163, + "grad_norm": 1.1799889920310853, + "learning_rate": 2.7127187240588883e-06, + "loss": 0.1882694661617279, + "step": 5815 + }, + { + "epoch": 1.5444164121630592, + "grad_norm": 1.178741445510241, + "learning_rate": 2.7097126167426002e-06, + "loss": 0.20070400834083557, + "step": 5816 + }, + { + "epoch": 1.5446819811446022, + "grad_norm": 1.2959554460073714, + "learning_rate": 2.706707914907739e-06, + "loss": 0.25316092371940613, + "step": 5817 + }, + { + "epoch": 1.5449475501261452, + "grad_norm": 1.334925654094324, + "learning_rate": 2.703704619133576e-06, + "loss": 0.24665585160255432, + "step": 5818 + }, + { + "epoch": 1.545213119107688, + "grad_norm": 1.290703779819622, + "learning_rate": 2.7007027299991095e-06, + "loss": 0.24172846972942352, + "step": 5819 + }, + { + "epoch": 1.545478688089231, + "grad_norm": 1.2781945872260183, + "learning_rate": 2.6977022480830708e-06, + "loss": 0.2405129075050354, + "step": 5820 + }, + { + "epoch": 1.545744257070774, + "grad_norm": 1.075296946307477, + "learning_rate": 2.694703173963914e-06, + "loss": 0.19716276228427887, + "step": 5821 + }, + { + "epoch": 1.546009826052317, + "grad_norm": 1.1434881656258093, + "learning_rate": 2.6917055082198284e-06, + "loss": 0.20343703031539917, + "step": 5822 + }, + { + "epoch": 1.54627539503386, + "grad_norm": 1.5985849963050902, + "learning_rate": 2.688709251428725e-06, + "loss": 0.24382619559764862, + "step": 5823 + }, + { + "epoch": 1.5465409640154029, + "grad_norm": 1.7314575476063523, + "learning_rate": 2.6857144041682514e-06, + "loss": 0.2962399423122406, + "step": 5824 + }, + { + "epoch": 1.5468065329969458, + "grad_norm": 1.2699118659079873, + "learning_rate": 2.6827209670157774e-06, + "loss": 0.24034687876701355, + "step": 5825 + }, + { + "epoch": 1.5470721019784888, + "grad_norm": 1.3757632125147359, + "learning_rate": 2.6797289405484016e-06, + "loss": 0.2575085163116455, + "step": 5826 + }, + { + "epoch": 1.5473376709600317, + "grad_norm": 1.556424910652697, + "learning_rate": 2.6767383253429515e-06, + "loss": 0.2586629092693329, + "step": 5827 + }, + { + "epoch": 1.5476032399415747, + "grad_norm": 1.096117045688234, + "learning_rate": 2.6737491219759815e-06, + "loss": 0.18447624146938324, + "step": 5828 + }, + { + "epoch": 1.5478688089231176, + "grad_norm": 1.3930188378643134, + "learning_rate": 2.670761331023779e-06, + "loss": 0.244853213429451, + "step": 5829 + }, + { + "epoch": 1.5481343779046606, + "grad_norm": 1.3163693020327074, + "learning_rate": 2.66777495306235e-06, + "loss": 0.24641919136047363, + "step": 5830 + }, + { + "epoch": 1.5483999468862035, + "grad_norm": 1.4086337954424433, + "learning_rate": 2.6647899886674323e-06, + "loss": 0.2364550232887268, + "step": 5831 + }, + { + "epoch": 1.5486655158677467, + "grad_norm": 1.1695450852938096, + "learning_rate": 2.6618064384144925e-06, + "loss": 0.17760278284549713, + "step": 5832 + }, + { + "epoch": 1.5489310848492897, + "grad_norm": 1.1988872335295608, + "learning_rate": 2.6588243028787274e-06, + "loss": 0.18571510910987854, + "step": 5833 + }, + { + "epoch": 1.5491966538308326, + "grad_norm": 1.2537289047953852, + "learning_rate": 2.655843582635057e-06, + "loss": 0.23693162202835083, + "step": 5834 + }, + { + "epoch": 1.5494622228123756, + "grad_norm": 1.3552352092705502, + "learning_rate": 2.652864278258126e-06, + "loss": 0.26481011509895325, + "step": 5835 + }, + { + "epoch": 1.5497277917939185, + "grad_norm": 1.4182429828127188, + "learning_rate": 2.6498863903223115e-06, + "loss": 0.23405003547668457, + "step": 5836 + }, + { + "epoch": 1.5499933607754615, + "grad_norm": 2.5576796684815686, + "learning_rate": 2.6469099194017144e-06, + "loss": 0.20662814378738403, + "step": 5837 + }, + { + "epoch": 1.5502589297570044, + "grad_norm": 1.3124069479853646, + "learning_rate": 2.6439348660701634e-06, + "loss": 0.2722313404083252, + "step": 5838 + }, + { + "epoch": 1.5505244987385474, + "grad_norm": 1.3906100112719377, + "learning_rate": 2.6409612309012134e-06, + "loss": 0.2288864552974701, + "step": 5839 + }, + { + "epoch": 1.5507900677200903, + "grad_norm": 1.322570753297788, + "learning_rate": 2.6379890144681464e-06, + "loss": 0.2286190539598465, + "step": 5840 + }, + { + "epoch": 1.5510556367016333, + "grad_norm": 1.2231420705695173, + "learning_rate": 2.6350182173439666e-06, + "loss": 0.22478938102722168, + "step": 5841 + }, + { + "epoch": 1.5513212056831762, + "grad_norm": 1.415848841276022, + "learning_rate": 2.6320488401014166e-06, + "loss": 0.2520615756511688, + "step": 5842 + }, + { + "epoch": 1.5515867746647192, + "grad_norm": 1.3741284890856262, + "learning_rate": 2.629080883312952e-06, + "loss": 0.2121289074420929, + "step": 5843 + }, + { + "epoch": 1.5518523436462621, + "grad_norm": 1.3092311759839703, + "learning_rate": 2.6261143475507656e-06, + "loss": 0.2252352237701416, + "step": 5844 + }, + { + "epoch": 1.552117912627805, + "grad_norm": 1.191285245143269, + "learning_rate": 2.6231492333867626e-06, + "loss": 0.21188892424106598, + "step": 5845 + }, + { + "epoch": 1.552383481609348, + "grad_norm": 1.1276138403597054, + "learning_rate": 2.6201855413925857e-06, + "loss": 0.21534699201583862, + "step": 5846 + }, + { + "epoch": 1.552649050590891, + "grad_norm": 1.2849885490704696, + "learning_rate": 2.6172232721395998e-06, + "loss": 0.21781614422798157, + "step": 5847 + }, + { + "epoch": 1.552914619572434, + "grad_norm": 1.3317886914724781, + "learning_rate": 2.6142624261988947e-06, + "loss": 0.2476508915424347, + "step": 5848 + }, + { + "epoch": 1.5531801885539769, + "grad_norm": 1.3439658215829489, + "learning_rate": 2.611303004141287e-06, + "loss": 0.2692151665687561, + "step": 5849 + }, + { + "epoch": 1.5534457575355198, + "grad_norm": 1.2839746536411722, + "learning_rate": 2.6083450065373163e-06, + "loss": 0.24868687987327576, + "step": 5850 + }, + { + "epoch": 1.5537113265170628, + "grad_norm": 1.2704813852574235, + "learning_rate": 2.6053884339572543e-06, + "loss": 0.24215853214263916, + "step": 5851 + }, + { + "epoch": 1.5539768954986057, + "grad_norm": 1.2100819665594098, + "learning_rate": 2.602433286971091e-06, + "loss": 0.2157444804906845, + "step": 5852 + }, + { + "epoch": 1.5542424644801487, + "grad_norm": 1.369237575424674, + "learning_rate": 2.599479566148544e-06, + "loss": 0.22152379155158997, + "step": 5853 + }, + { + "epoch": 1.5545080334616916, + "grad_norm": 1.1930490692336162, + "learning_rate": 2.596527272059055e-06, + "loss": 0.2278299182653427, + "step": 5854 + }, + { + "epoch": 1.5547736024432346, + "grad_norm": 1.406485645097326, + "learning_rate": 2.593576405271793e-06, + "loss": 0.23183950781822205, + "step": 5855 + }, + { + "epoch": 1.5550391714247775, + "grad_norm": 1.209726796816396, + "learning_rate": 2.5906269663556484e-06, + "loss": 0.22167566418647766, + "step": 5856 + }, + { + "epoch": 1.5553047404063205, + "grad_norm": 1.1790986825354977, + "learning_rate": 2.5876789558792403e-06, + "loss": 0.24111366271972656, + "step": 5857 + }, + { + "epoch": 1.5555703093878634, + "grad_norm": 1.1706391072024214, + "learning_rate": 2.5847323744109087e-06, + "loss": 0.2090388983488083, + "step": 5858 + }, + { + "epoch": 1.5558358783694064, + "grad_norm": 1.2588154614837785, + "learning_rate": 2.58178722251872e-06, + "loss": 0.2087189108133316, + "step": 5859 + }, + { + "epoch": 1.5561014473509496, + "grad_norm": 1.300626487965864, + "learning_rate": 2.578843500770465e-06, + "loss": 0.2277342677116394, + "step": 5860 + }, + { + "epoch": 1.5563670163324925, + "grad_norm": 1.3517116904487896, + "learning_rate": 2.57590120973366e-06, + "loss": 0.2204241305589676, + "step": 5861 + }, + { + "epoch": 1.5566325853140355, + "grad_norm": 1.213807933631201, + "learning_rate": 2.5729603499755416e-06, + "loss": 0.2138606607913971, + "step": 5862 + }, + { + "epoch": 1.5568981542955784, + "grad_norm": 1.4669648743657906, + "learning_rate": 2.5700209220630733e-06, + "loss": 0.21257862448692322, + "step": 5863 + }, + { + "epoch": 1.5571637232771214, + "grad_norm": 1.2314998246120414, + "learning_rate": 2.5670829265629437e-06, + "loss": 0.20991909503936768, + "step": 5864 + }, + { + "epoch": 1.5574292922586643, + "grad_norm": 1.294980658460416, + "learning_rate": 2.5641463640415633e-06, + "loss": 0.23745422065258026, + "step": 5865 + }, + { + "epoch": 1.5576948612402073, + "grad_norm": 1.2425796180120088, + "learning_rate": 2.561211235065065e-06, + "loss": 0.21482989192008972, + "step": 5866 + }, + { + "epoch": 1.5579604302217502, + "grad_norm": 1.008120888370748, + "learning_rate": 2.558277540199309e-06, + "loss": 0.17866572737693787, + "step": 5867 + }, + { + "epoch": 1.5582259992032932, + "grad_norm": 1.2966262005019353, + "learning_rate": 2.555345280009872e-06, + "loss": 0.223822683095932, + "step": 5868 + }, + { + "epoch": 1.5584915681848361, + "grad_norm": 1.339606961190666, + "learning_rate": 2.552414455062068e-06, + "loss": 0.2293519228696823, + "step": 5869 + }, + { + "epoch": 1.558757137166379, + "grad_norm": 1.3023504432012787, + "learning_rate": 2.5494850659209203e-06, + "loss": 0.2556726038455963, + "step": 5870 + }, + { + "epoch": 1.559022706147922, + "grad_norm": 1.255574464472328, + "learning_rate": 2.546557113151181e-06, + "loss": 0.26891303062438965, + "step": 5871 + }, + { + "epoch": 1.559288275129465, + "grad_norm": 1.1754509839553133, + "learning_rate": 2.5436305973173257e-06, + "loss": 0.19510813057422638, + "step": 5872 + }, + { + "epoch": 1.559553844111008, + "grad_norm": 1.2819966401856495, + "learning_rate": 2.5407055189835518e-06, + "loss": 0.22906547784805298, + "step": 5873 + }, + { + "epoch": 1.559819413092551, + "grad_norm": 1.3121165067922245, + "learning_rate": 2.5377818787137788e-06, + "loss": 0.25452786684036255, + "step": 5874 + }, + { + "epoch": 1.5600849820740939, + "grad_norm": 1.2743199898597464, + "learning_rate": 2.5348596770716503e-06, + "loss": 0.205597922205925, + "step": 5875 + }, + { + "epoch": 1.5603505510556368, + "grad_norm": 1.3020148941868286, + "learning_rate": 2.5319389146205344e-06, + "loss": 0.24009352922439575, + "step": 5876 + }, + { + "epoch": 1.5606161200371798, + "grad_norm": 1.433983972963341, + "learning_rate": 2.5290195919235173e-06, + "loss": 0.23381268978118896, + "step": 5877 + }, + { + "epoch": 1.5608816890187227, + "grad_norm": 1.1554092234943296, + "learning_rate": 2.52610170954341e-06, + "loss": 0.2267276644706726, + "step": 5878 + }, + { + "epoch": 1.5611472580002657, + "grad_norm": 1.2742422977156036, + "learning_rate": 2.5231852680427482e-06, + "loss": 0.24330289661884308, + "step": 5879 + }, + { + "epoch": 1.5614128269818086, + "grad_norm": 1.2802855767249914, + "learning_rate": 2.5202702679837852e-06, + "loss": 0.24877145886421204, + "step": 5880 + }, + { + "epoch": 1.5616783959633516, + "grad_norm": 1.1377670913842177, + "learning_rate": 2.5173567099285e-06, + "loss": 0.20410388708114624, + "step": 5881 + }, + { + "epoch": 1.5619439649448945, + "grad_norm": 1.2268765869469427, + "learning_rate": 2.514444594438591e-06, + "loss": 0.21524877846240997, + "step": 5882 + }, + { + "epoch": 1.5622095339264375, + "grad_norm": 1.1986269244208958, + "learning_rate": 2.5115339220754796e-06, + "loss": 0.18785043060779572, + "step": 5883 + }, + { + "epoch": 1.5624751029079804, + "grad_norm": 1.3539528047627718, + "learning_rate": 2.5086246934003113e-06, + "loss": 0.21200208365917206, + "step": 5884 + }, + { + "epoch": 1.5627406718895234, + "grad_norm": 1.6373531833898813, + "learning_rate": 2.5057169089739485e-06, + "loss": 0.20752021670341492, + "step": 5885 + }, + { + "epoch": 1.5630062408710663, + "grad_norm": 1.1717071963534185, + "learning_rate": 2.502810569356976e-06, + "loss": 0.21395736932754517, + "step": 5886 + }, + { + "epoch": 1.5632718098526093, + "grad_norm": 1.2664848714228343, + "learning_rate": 2.499905675109707e-06, + "loss": 0.26949262619018555, + "step": 5887 + }, + { + "epoch": 1.5635373788341522, + "grad_norm": 1.5283985889023297, + "learning_rate": 2.497002226792169e-06, + "loss": 0.2309839278459549, + "step": 5888 + }, + { + "epoch": 1.5638029478156952, + "grad_norm": 1.2596143819163301, + "learning_rate": 2.4941002249641123e-06, + "loss": 0.24415400624275208, + "step": 5889 + }, + { + "epoch": 1.5640685167972381, + "grad_norm": 1.3074402223027564, + "learning_rate": 2.4911996701850083e-06, + "loss": 0.23493322730064392, + "step": 5890 + }, + { + "epoch": 1.564334085778781, + "grad_norm": 1.260748243658743, + "learning_rate": 2.488300563014049e-06, + "loss": 0.23824438452720642, + "step": 5891 + }, + { + "epoch": 1.564599654760324, + "grad_norm": 1.2534870916273309, + "learning_rate": 2.4854029040101503e-06, + "loss": 0.2523414194583893, + "step": 5892 + }, + { + "epoch": 1.564865223741867, + "grad_norm": 1.2879106186872462, + "learning_rate": 2.482506693731944e-06, + "loss": 0.21360887587070465, + "step": 5893 + }, + { + "epoch": 1.56513079272341, + "grad_norm": 1.1951820042572139, + "learning_rate": 2.47961193273779e-06, + "loss": 0.21182934939861298, + "step": 5894 + }, + { + "epoch": 1.5653963617049529, + "grad_norm": 1.4293886797193323, + "learning_rate": 2.4767186215857542e-06, + "loss": 0.23104771971702576, + "step": 5895 + }, + { + "epoch": 1.5656619306864958, + "grad_norm": 1.2606491547398977, + "learning_rate": 2.473826760833643e-06, + "loss": 0.22297397255897522, + "step": 5896 + }, + { + "epoch": 1.5659274996680388, + "grad_norm": 1.176802218612286, + "learning_rate": 2.4709363510389684e-06, + "loss": 0.21597865223884583, + "step": 5897 + }, + { + "epoch": 1.5661930686495817, + "grad_norm": 1.4303555951561693, + "learning_rate": 2.468047392758969e-06, + "loss": 0.27620527148246765, + "step": 5898 + }, + { + "epoch": 1.5664586376311247, + "grad_norm": 1.373809252877093, + "learning_rate": 2.465159886550601e-06, + "loss": 0.25262463092803955, + "step": 5899 + }, + { + "epoch": 1.5667242066126676, + "grad_norm": 1.376719462816966, + "learning_rate": 2.462273832970542e-06, + "loss": 0.2729034125804901, + "step": 5900 + }, + { + "epoch": 1.5669897755942106, + "grad_norm": 1.3637563490895455, + "learning_rate": 2.459389232575188e-06, + "loss": 0.2313854992389679, + "step": 5901 + }, + { + "epoch": 1.5672553445757536, + "grad_norm": 1.3202318144066494, + "learning_rate": 2.456506085920658e-06, + "loss": 0.22513791918754578, + "step": 5902 + }, + { + "epoch": 1.5675209135572965, + "grad_norm": 1.3152362934287614, + "learning_rate": 2.4536243935627856e-06, + "loss": 0.2658824026584625, + "step": 5903 + }, + { + "epoch": 1.5677864825388395, + "grad_norm": 1.1721087348112986, + "learning_rate": 2.4507441560571275e-06, + "loss": 0.21781010925769806, + "step": 5904 + }, + { + "epoch": 1.5680520515203824, + "grad_norm": 1.3393030222309363, + "learning_rate": 2.4478653739589632e-06, + "loss": 0.21047937870025635, + "step": 5905 + }, + { + "epoch": 1.5683176205019254, + "grad_norm": 1.2196979825563006, + "learning_rate": 2.4449880478232858e-06, + "loss": 0.21674057841300964, + "step": 5906 + }, + { + "epoch": 1.5685831894834683, + "grad_norm": 1.200112520021674, + "learning_rate": 2.44211217820481e-06, + "loss": 0.22062627971172333, + "step": 5907 + }, + { + "epoch": 1.5688487584650113, + "grad_norm": 1.3158234051142574, + "learning_rate": 2.439237765657968e-06, + "loss": 0.22440886497497559, + "step": 5908 + }, + { + "epoch": 1.5691143274465542, + "grad_norm": 1.129873307165861, + "learning_rate": 2.4363648107369175e-06, + "loss": 0.21888123452663422, + "step": 5909 + }, + { + "epoch": 1.5693798964280972, + "grad_norm": 1.2586007199788052, + "learning_rate": 2.433493313995524e-06, + "loss": 0.23104462027549744, + "step": 5910 + }, + { + "epoch": 1.5696454654096401, + "grad_norm": 1.427902558182486, + "learning_rate": 2.4306232759873803e-06, + "loss": 0.23032237589359283, + "step": 5911 + }, + { + "epoch": 1.569911034391183, + "grad_norm": 1.3780752776280365, + "learning_rate": 2.4277546972657974e-06, + "loss": 0.2588527202606201, + "step": 5912 + }, + { + "epoch": 1.570176603372726, + "grad_norm": 1.4647042397629928, + "learning_rate": 2.424887578383799e-06, + "loss": 0.2845698893070221, + "step": 5913 + }, + { + "epoch": 1.570442172354269, + "grad_norm": 1.338246310760916, + "learning_rate": 2.4220219198941384e-06, + "loss": 0.23010894656181335, + "step": 5914 + }, + { + "epoch": 1.570707741335812, + "grad_norm": 1.3783426416349442, + "learning_rate": 2.419157722349278e-06, + "loss": 0.2623594403266907, + "step": 5915 + }, + { + "epoch": 1.5709733103173549, + "grad_norm": 1.2349976574308903, + "learning_rate": 2.416294986301401e-06, + "loss": 0.2107153981924057, + "step": 5916 + }, + { + "epoch": 1.5712388792988978, + "grad_norm": 1.3633626366853218, + "learning_rate": 2.413433712302409e-06, + "loss": 0.2115003615617752, + "step": 5917 + }, + { + "epoch": 1.5715044482804408, + "grad_norm": 1.3738602333573011, + "learning_rate": 2.410573900903921e-06, + "loss": 0.22406762838363647, + "step": 5918 + }, + { + "epoch": 1.5717700172619837, + "grad_norm": 1.3017270649216575, + "learning_rate": 2.407715552657277e-06, + "loss": 0.24878525733947754, + "step": 5919 + }, + { + "epoch": 1.5720355862435267, + "grad_norm": 1.5003273963811, + "learning_rate": 2.404858668113532e-06, + "loss": 0.24546805024147034, + "step": 5920 + }, + { + "epoch": 1.5723011552250696, + "grad_norm": 1.5650848412040055, + "learning_rate": 2.402003247823459e-06, + "loss": 0.23430263996124268, + "step": 5921 + }, + { + "epoch": 1.5725667242066126, + "grad_norm": 1.3939131226044492, + "learning_rate": 2.399149292337547e-06, + "loss": 0.26935267448425293, + "step": 5922 + }, + { + "epoch": 1.5728322931881555, + "grad_norm": 1.1554138984093538, + "learning_rate": 2.3962968022060097e-06, + "loss": 0.21104472875595093, + "step": 5923 + }, + { + "epoch": 1.5730978621696985, + "grad_norm": 1.147816084956367, + "learning_rate": 2.3934457779787755e-06, + "loss": 0.17162750661373138, + "step": 5924 + }, + { + "epoch": 1.5733634311512414, + "grad_norm": 1.2036391990293953, + "learning_rate": 2.390596220205481e-06, + "loss": 0.22233474254608154, + "step": 5925 + }, + { + "epoch": 1.5736290001327844, + "grad_norm": 1.456348691360017, + "learning_rate": 2.387748129435491e-06, + "loss": 0.2326992005109787, + "step": 5926 + }, + { + "epoch": 1.5738945691143273, + "grad_norm": 1.2656294085970974, + "learning_rate": 2.3849015062178835e-06, + "loss": 0.245779350399971, + "step": 5927 + }, + { + "epoch": 1.5741601380958703, + "grad_norm": 1.2198185109849795, + "learning_rate": 2.382056351101454e-06, + "loss": 0.24269379675388336, + "step": 5928 + }, + { + "epoch": 1.5744257070774133, + "grad_norm": 1.2241918308854736, + "learning_rate": 2.3792126646347138e-06, + "loss": 0.23644019663333893, + "step": 5929 + }, + { + "epoch": 1.5746912760589562, + "grad_norm": 1.2680435600362268, + "learning_rate": 2.376370447365893e-06, + "loss": 0.254330575466156, + "step": 5930 + }, + { + "epoch": 1.5749568450404992, + "grad_norm": 1.4146409212378834, + "learning_rate": 2.373529699842936e-06, + "loss": 0.2728506922721863, + "step": 5931 + }, + { + "epoch": 1.575222414022042, + "grad_norm": 1.3627178065769006, + "learning_rate": 2.3706904226135087e-06, + "loss": 0.23671439290046692, + "step": 5932 + }, + { + "epoch": 1.575487983003585, + "grad_norm": 1.409873356618632, + "learning_rate": 2.367852616224989e-06, + "loss": 0.24205748736858368, + "step": 5933 + }, + { + "epoch": 1.575753551985128, + "grad_norm": 1.2728197754861583, + "learning_rate": 2.3650162812244725e-06, + "loss": 0.1915436089038849, + "step": 5934 + }, + { + "epoch": 1.576019120966671, + "grad_norm": 1.2091326643578577, + "learning_rate": 2.3621814181587697e-06, + "loss": 0.23453299701213837, + "step": 5935 + }, + { + "epoch": 1.576284689948214, + "grad_norm": 1.3060415308267561, + "learning_rate": 2.3593480275744106e-06, + "loss": 0.24066327512264252, + "step": 5936 + }, + { + "epoch": 1.5765502589297569, + "grad_norm": 1.246429396187596, + "learning_rate": 2.356516110017639e-06, + "loss": 0.22510530054569244, + "step": 5937 + }, + { + "epoch": 1.5768158279112998, + "grad_norm": 1.2889494549478113, + "learning_rate": 2.3536856660344144e-06, + "loss": 0.22967353463172913, + "step": 5938 + }, + { + "epoch": 1.5770813968928428, + "grad_norm": 1.2404139099674472, + "learning_rate": 2.3508566961704127e-06, + "loss": 0.2299107313156128, + "step": 5939 + }, + { + "epoch": 1.5773469658743857, + "grad_norm": 1.2560783974284127, + "learning_rate": 2.3480292009710282e-06, + "loss": 0.23418918251991272, + "step": 5940 + }, + { + "epoch": 1.5776125348559287, + "grad_norm": 1.2857056044544095, + "learning_rate": 2.3452031809813657e-06, + "loss": 0.26528510451316833, + "step": 5941 + }, + { + "epoch": 1.5778781038374716, + "grad_norm": 1.1247059842406957, + "learning_rate": 2.342378636746251e-06, + "loss": 0.21878717839717865, + "step": 5942 + }, + { + "epoch": 1.5781436728190146, + "grad_norm": 1.1637472196421235, + "learning_rate": 2.339555568810221e-06, + "loss": 0.19697530567646027, + "step": 5943 + }, + { + "epoch": 1.5784092418005577, + "grad_norm": 1.3422665805434115, + "learning_rate": 2.3367339777175313e-06, + "loss": 0.24812257289886475, + "step": 5944 + }, + { + "epoch": 1.5786748107821007, + "grad_norm": 1.3285793357341238, + "learning_rate": 2.3339138640121504e-06, + "loss": 0.27651745080947876, + "step": 5945 + }, + { + "epoch": 1.5789403797636437, + "grad_norm": 1.308131821171991, + "learning_rate": 2.3310952282377643e-06, + "loss": 0.2651634216308594, + "step": 5946 + }, + { + "epoch": 1.5792059487451866, + "grad_norm": 1.3163549633798883, + "learning_rate": 2.328278070937772e-06, + "loss": 0.23799028992652893, + "step": 5947 + }, + { + "epoch": 1.5794715177267296, + "grad_norm": 1.4229706240812914, + "learning_rate": 2.3254623926552867e-06, + "loss": 0.2528802752494812, + "step": 5948 + }, + { + "epoch": 1.5797370867082725, + "grad_norm": 1.2071666314804592, + "learning_rate": 2.322648193933137e-06, + "loss": 0.23819346725940704, + "step": 5949 + }, + { + "epoch": 1.5800026556898155, + "grad_norm": 1.2694222057013376, + "learning_rate": 2.319835475313873e-06, + "loss": 0.2510845959186554, + "step": 5950 + }, + { + "epoch": 1.5802682246713584, + "grad_norm": 1.0731141255180743, + "learning_rate": 2.31702423733975e-06, + "loss": 0.20156612992286682, + "step": 5951 + }, + { + "epoch": 1.5805337936529014, + "grad_norm": 1.320010192923148, + "learning_rate": 2.3142144805527413e-06, + "loss": 0.23375174403190613, + "step": 5952 + }, + { + "epoch": 1.5807993626344443, + "grad_norm": 1.187058092026163, + "learning_rate": 2.311406205494535e-06, + "loss": 0.2378280758857727, + "step": 5953 + }, + { + "epoch": 1.5810649316159873, + "grad_norm": 1.4550533599389408, + "learning_rate": 2.308599412706535e-06, + "loss": 0.2087683081626892, + "step": 5954 + }, + { + "epoch": 1.5813305005975302, + "grad_norm": 1.2856302099767283, + "learning_rate": 2.3057941027298557e-06, + "loss": 0.2228693962097168, + "step": 5955 + }, + { + "epoch": 1.5815960695790732, + "grad_norm": 1.4738789364963756, + "learning_rate": 2.302990276105329e-06, + "loss": 0.22694727778434753, + "step": 5956 + }, + { + "epoch": 1.5818616385606161, + "grad_norm": 1.2486840544551192, + "learning_rate": 2.300187933373499e-06, + "loss": 0.22996942698955536, + "step": 5957 + }, + { + "epoch": 1.582127207542159, + "grad_norm": 1.331719034245123, + "learning_rate": 2.2973870750746253e-06, + "loss": 0.2440253496170044, + "step": 5958 + }, + { + "epoch": 1.582392776523702, + "grad_norm": 1.3266637203740035, + "learning_rate": 2.2945877017486782e-06, + "loss": 0.2507309019565582, + "step": 5959 + }, + { + "epoch": 1.582658345505245, + "grad_norm": 2.8683041985739677, + "learning_rate": 2.2917898139353467e-06, + "loss": 0.24790918827056885, + "step": 5960 + }, + { + "epoch": 1.582923914486788, + "grad_norm": 1.4168604850261965, + "learning_rate": 2.2889934121740287e-06, + "loss": 0.22106975317001343, + "step": 5961 + }, + { + "epoch": 1.5831894834683309, + "grad_norm": 1.5726662217531726, + "learning_rate": 2.2861984970038385e-06, + "loss": 0.2410939633846283, + "step": 5962 + }, + { + "epoch": 1.5834550524498738, + "grad_norm": 1.1559016560001114, + "learning_rate": 2.283405068963601e-06, + "loss": 0.22821484506130219, + "step": 5963 + }, + { + "epoch": 1.5837206214314168, + "grad_norm": 1.2324685594628142, + "learning_rate": 2.2806131285918588e-06, + "loss": 0.21425281465053558, + "step": 5964 + }, + { + "epoch": 1.5839861904129597, + "grad_norm": 1.2434376170807215, + "learning_rate": 2.277822676426863e-06, + "loss": 0.22428902983665466, + "step": 5965 + }, + { + "epoch": 1.5842517593945027, + "grad_norm": 1.4592375031786005, + "learning_rate": 2.27503371300658e-06, + "loss": 0.2986769676208496, + "step": 5966 + }, + { + "epoch": 1.5845173283760456, + "grad_norm": 1.4384957681975041, + "learning_rate": 2.272246238868687e-06, + "loss": 0.24697065353393555, + "step": 5967 + }, + { + "epoch": 1.5847828973575886, + "grad_norm": 1.3175254870878064, + "learning_rate": 2.269460254550583e-06, + "loss": 0.23725461959838867, + "step": 5968 + }, + { + "epoch": 1.5850484663391315, + "grad_norm": 1.5010497616053564, + "learning_rate": 2.2666757605893664e-06, + "loss": 0.2661248445510864, + "step": 5969 + }, + { + "epoch": 1.5853140353206745, + "grad_norm": 1.2390278830143426, + "learning_rate": 2.263892757521858e-06, + "loss": 0.23328733444213867, + "step": 5970 + }, + { + "epoch": 1.5855796043022174, + "grad_norm": 1.2547818797647754, + "learning_rate": 2.2611112458845873e-06, + "loss": 0.22886580228805542, + "step": 5971 + }, + { + "epoch": 1.5858451732837606, + "grad_norm": 1.1882681583888588, + "learning_rate": 2.2583312262137966e-06, + "loss": 0.25051698088645935, + "step": 5972 + }, + { + "epoch": 1.5861107422653036, + "grad_norm": 1.2988472953319592, + "learning_rate": 2.2555526990454413e-06, + "loss": 0.2400815784931183, + "step": 5973 + }, + { + "epoch": 1.5863763112468465, + "grad_norm": 1.1598677166947555, + "learning_rate": 2.2527756649151912e-06, + "loss": 0.2212347537279129, + "step": 5974 + }, + { + "epoch": 1.5866418802283895, + "grad_norm": 1.355013417523964, + "learning_rate": 2.2500001243584204e-06, + "loss": 0.3002026379108429, + "step": 5975 + }, + { + "epoch": 1.5869074492099324, + "grad_norm": 1.1899701199057289, + "learning_rate": 2.2472260779102185e-06, + "loss": 0.19813531637191772, + "step": 5976 + }, + { + "epoch": 1.5871730181914754, + "grad_norm": 1.2404972223723234, + "learning_rate": 2.2444535261053968e-06, + "loss": 0.2233983874320984, + "step": 5977 + }, + { + "epoch": 1.5874385871730183, + "grad_norm": 1.417840431772693, + "learning_rate": 2.2416824694784676e-06, + "loss": 0.26059988141059875, + "step": 5978 + }, + { + "epoch": 1.5877041561545613, + "grad_norm": 1.2961846276739968, + "learning_rate": 2.2389129085636573e-06, + "loss": 0.23058606684207916, + "step": 5979 + }, + { + "epoch": 1.5879697251361042, + "grad_norm": 1.3397298592095879, + "learning_rate": 2.236144843894904e-06, + "loss": 0.2414383739233017, + "step": 5980 + }, + { + "epoch": 1.5882352941176472, + "grad_norm": 1.2013757541083616, + "learning_rate": 2.23337827600586e-06, + "loss": 0.21688291430473328, + "step": 5981 + }, + { + "epoch": 1.5885008630991901, + "grad_norm": 1.2977536190104755, + "learning_rate": 2.2306132054298847e-06, + "loss": 0.24297408759593964, + "step": 5982 + }, + { + "epoch": 1.588766432080733, + "grad_norm": 1.449081017944755, + "learning_rate": 2.227849632700052e-06, + "loss": 0.2655821442604065, + "step": 5983 + }, + { + "epoch": 1.589032001062276, + "grad_norm": 1.2305338711146763, + "learning_rate": 2.225087558349146e-06, + "loss": 0.20545080304145813, + "step": 5984 + }, + { + "epoch": 1.589297570043819, + "grad_norm": 1.470607418959754, + "learning_rate": 2.2223269829096593e-06, + "loss": 0.24151475727558136, + "step": 5985 + }, + { + "epoch": 1.589563139025362, + "grad_norm": 1.2194062039730535, + "learning_rate": 2.2195679069138043e-06, + "loss": 0.2294519543647766, + "step": 5986 + }, + { + "epoch": 1.589828708006905, + "grad_norm": 1.3319096935394759, + "learning_rate": 2.2168103308934953e-06, + "loss": 0.2041824758052826, + "step": 5987 + }, + { + "epoch": 1.5900942769884479, + "grad_norm": 1.181577384258167, + "learning_rate": 2.21405425538036e-06, + "loss": 0.1856188029050827, + "step": 5988 + }, + { + "epoch": 1.5903598459699908, + "grad_norm": 1.2644853901124522, + "learning_rate": 2.2112996809057395e-06, + "loss": 0.24337685108184814, + "step": 5989 + }, + { + "epoch": 1.5906254149515338, + "grad_norm": 1.1714048449744126, + "learning_rate": 2.20854660800068e-06, + "loss": 0.2201787382364273, + "step": 5990 + }, + { + "epoch": 1.5908909839330767, + "grad_norm": 1.322531300676563, + "learning_rate": 2.2057950371959427e-06, + "loss": 0.23505619168281555, + "step": 5991 + }, + { + "epoch": 1.5911565529146197, + "grad_norm": 1.4085526679551708, + "learning_rate": 2.203044969021997e-06, + "loss": 0.19528049230575562, + "step": 5992 + }, + { + "epoch": 1.5914221218961626, + "grad_norm": 1.2299879902160842, + "learning_rate": 2.2002964040090256e-06, + "loss": 0.22281290590763092, + "step": 5993 + }, + { + "epoch": 1.5916876908777056, + "grad_norm": 1.310771483519368, + "learning_rate": 2.1975493426869155e-06, + "loss": 0.19606761634349823, + "step": 5994 + }, + { + "epoch": 1.5919532598592485, + "grad_norm": 1.2570005315725017, + "learning_rate": 2.1948037855852733e-06, + "loss": 0.22559323906898499, + "step": 5995 + }, + { + "epoch": 1.5922188288407915, + "grad_norm": 1.2326545276620708, + "learning_rate": 2.192059733233408e-06, + "loss": 0.20417393743991852, + "step": 5996 + }, + { + "epoch": 1.5924843978223344, + "grad_norm": 1.351064737074131, + "learning_rate": 2.18931718616034e-06, + "loss": 0.2579960525035858, + "step": 5997 + }, + { + "epoch": 1.5927499668038774, + "grad_norm": 1.2980140620122547, + "learning_rate": 2.1865761448948e-06, + "loss": 0.23339781165122986, + "step": 5998 + }, + { + "epoch": 1.5930155357854203, + "grad_norm": 1.2588476812522966, + "learning_rate": 2.1838366099652274e-06, + "loss": 0.2368197739124298, + "step": 5999 + }, + { + "epoch": 1.5932811047669633, + "grad_norm": 1.2980274155826699, + "learning_rate": 2.1810985818997743e-06, + "loss": 0.2225847840309143, + "step": 6000 + }, + { + "epoch": 1.5935466737485062, + "grad_norm": 1.3094945647641514, + "learning_rate": 2.1783620612263e-06, + "loss": 0.2426701784133911, + "step": 6001 + }, + { + "epoch": 1.5938122427300492, + "grad_norm": 1.284834767608695, + "learning_rate": 2.175627048472372e-06, + "loss": 0.23647268116474152, + "step": 6002 + }, + { + "epoch": 1.5940778117115921, + "grad_norm": 1.2525920428706867, + "learning_rate": 2.1728935441652687e-06, + "loss": 0.22843337059020996, + "step": 6003 + }, + { + "epoch": 1.594343380693135, + "grad_norm": 1.1786632019087344, + "learning_rate": 2.1701615488319785e-06, + "loss": 0.21524465084075928, + "step": 6004 + }, + { + "epoch": 1.594608949674678, + "grad_norm": 1.225831889373155, + "learning_rate": 2.167431062999197e-06, + "loss": 0.2160830795764923, + "step": 6005 + }, + { + "epoch": 1.594874518656221, + "grad_norm": 1.238709201727011, + "learning_rate": 2.1647020871933288e-06, + "loss": 0.2321595996618271, + "step": 6006 + }, + { + "epoch": 1.595140087637764, + "grad_norm": 1.164283210992047, + "learning_rate": 2.1619746219404916e-06, + "loss": 0.21255026757717133, + "step": 6007 + }, + { + "epoch": 1.5954056566193069, + "grad_norm": 1.3822319128280973, + "learning_rate": 2.1592486677665047e-06, + "loss": 0.22851255536079407, + "step": 6008 + }, + { + "epoch": 1.5956712256008498, + "grad_norm": 1.3982384304626327, + "learning_rate": 2.1565242251969022e-06, + "loss": 0.23844364285469055, + "step": 6009 + }, + { + "epoch": 1.5959367945823928, + "grad_norm": 1.3184134341650149, + "learning_rate": 2.153801294756924e-06, + "loss": 0.2592385411262512, + "step": 6010 + }, + { + "epoch": 1.5962023635639357, + "grad_norm": 1.221300094567036, + "learning_rate": 2.151079876971519e-06, + "loss": 0.22163718938827515, + "step": 6011 + }, + { + "epoch": 1.5964679325454787, + "grad_norm": 1.1840952132259899, + "learning_rate": 2.1483599723653415e-06, + "loss": 0.1960998773574829, + "step": 6012 + }, + { + "epoch": 1.5967335015270216, + "grad_norm": 1.1732770789502442, + "learning_rate": 2.145641581462762e-06, + "loss": 0.20811150968074799, + "step": 6013 + }, + { + "epoch": 1.5969990705085646, + "grad_norm": 1.2065470685478314, + "learning_rate": 2.1429247047878534e-06, + "loss": 0.23184621334075928, + "step": 6014 + }, + { + "epoch": 1.5972646394901076, + "grad_norm": 1.3338850940720004, + "learning_rate": 2.1402093428643942e-06, + "loss": 0.22043758630752563, + "step": 6015 + }, + { + "epoch": 1.5975302084716505, + "grad_norm": 1.1736165993383876, + "learning_rate": 2.137495496215878e-06, + "loss": 0.18621152639389038, + "step": 6016 + }, + { + "epoch": 1.5977957774531935, + "grad_norm": 1.332636421894691, + "learning_rate": 2.1347831653654995e-06, + "loss": 0.2422473132610321, + "step": 6017 + }, + { + "epoch": 1.5980613464347364, + "grad_norm": 1.5933227500597664, + "learning_rate": 2.132072350836164e-06, + "loss": 0.2147202491760254, + "step": 6018 + }, + { + "epoch": 1.5983269154162794, + "grad_norm": 1.5455916288717333, + "learning_rate": 2.1293630531504873e-06, + "loss": 0.23091933131217957, + "step": 6019 + }, + { + "epoch": 1.5985924843978223, + "grad_norm": 1.290869089573798, + "learning_rate": 2.1266552728307876e-06, + "loss": 0.220037579536438, + "step": 6020 + }, + { + "epoch": 1.5988580533793653, + "grad_norm": 1.3343924424387823, + "learning_rate": 2.1239490103990946e-06, + "loss": 0.25520551204681396, + "step": 6021 + }, + { + "epoch": 1.5991236223609082, + "grad_norm": 1.412222062207012, + "learning_rate": 2.1212442663771427e-06, + "loss": 0.23216915130615234, + "step": 6022 + }, + { + "epoch": 1.5993891913424512, + "grad_norm": 1.381515312381825, + "learning_rate": 2.118541041286374e-06, + "loss": 0.22098806500434875, + "step": 6023 + }, + { + "epoch": 1.5996547603239941, + "grad_norm": 1.4609594644715316, + "learning_rate": 2.11583933564794e-06, + "loss": 0.261300265789032, + "step": 6024 + }, + { + "epoch": 1.599920329305537, + "grad_norm": 1.2095539498781858, + "learning_rate": 2.113139149982698e-06, + "loss": 0.20427154004573822, + "step": 6025 + }, + { + "epoch": 1.60018589828708, + "grad_norm": 1.2158101663646808, + "learning_rate": 2.110440484811209e-06, + "loss": 0.20700547099113464, + "step": 6026 + }, + { + "epoch": 1.600451467268623, + "grad_norm": 1.4331467444820847, + "learning_rate": 2.1077433406537475e-06, + "loss": 0.2789752185344696, + "step": 6027 + }, + { + "epoch": 1.600717036250166, + "grad_norm": 1.2991321976135584, + "learning_rate": 2.1050477180302885e-06, + "loss": 0.2205841988325119, + "step": 6028 + }, + { + "epoch": 1.6009826052317089, + "grad_norm": 1.3197920849647402, + "learning_rate": 2.1023536174605184e-06, + "loss": 0.24921822547912598, + "step": 6029 + }, + { + "epoch": 1.6012481742132518, + "grad_norm": 2.014197229906981, + "learning_rate": 2.0996610394638228e-06, + "loss": 0.2516329288482666, + "step": 6030 + }, + { + "epoch": 1.6015137431947948, + "grad_norm": 1.2656936665142342, + "learning_rate": 2.096969984559306e-06, + "loss": 0.21832503378391266, + "step": 6031 + }, + { + "epoch": 1.6017793121763377, + "grad_norm": 1.530808592055088, + "learning_rate": 2.094280453265769e-06, + "loss": 0.2499273419380188, + "step": 6032 + }, + { + "epoch": 1.6020448811578807, + "grad_norm": 1.167125195859278, + "learning_rate": 2.09159244610172e-06, + "loss": 0.21701282262802124, + "step": 6033 + }, + { + "epoch": 1.6023104501394236, + "grad_norm": 1.2536801575307182, + "learning_rate": 2.0889059635853783e-06, + "loss": 0.24446213245391846, + "step": 6034 + }, + { + "epoch": 1.6025760191209666, + "grad_norm": 1.412317581200794, + "learning_rate": 2.0862210062346622e-06, + "loss": 0.27299973368644714, + "step": 6035 + }, + { + "epoch": 1.6028415881025095, + "grad_norm": 1.320945278338079, + "learning_rate": 2.0835375745672027e-06, + "loss": 0.2384832501411438, + "step": 6036 + }, + { + "epoch": 1.6031071570840525, + "grad_norm": 1.340788170535406, + "learning_rate": 2.0808556691003335e-06, + "loss": 0.2563338875770569, + "step": 6037 + }, + { + "epoch": 1.6033727260655954, + "grad_norm": 1.5240284764155023, + "learning_rate": 2.0781752903510954e-06, + "loss": 0.29148975014686584, + "step": 6038 + }, + { + "epoch": 1.6036382950471384, + "grad_norm": 1.1673304070468655, + "learning_rate": 2.0754964388362264e-06, + "loss": 0.24276503920555115, + "step": 6039 + }, + { + "epoch": 1.6039038640286813, + "grad_norm": 1.2629655044665746, + "learning_rate": 2.0728191150721866e-06, + "loss": 0.1863931119441986, + "step": 6040 + }, + { + "epoch": 1.6041694330102243, + "grad_norm": 1.1731073698012655, + "learning_rate": 2.0701433195751286e-06, + "loss": 0.21270868182182312, + "step": 6041 + }, + { + "epoch": 1.6044350019917673, + "grad_norm": 1.2780583308550695, + "learning_rate": 2.0674690528609155e-06, + "loss": 0.21542516350746155, + "step": 6042 + }, + { + "epoch": 1.6047005709733102, + "grad_norm": 1.256432235067539, + "learning_rate": 2.0647963154451124e-06, + "loss": 0.23099860548973083, + "step": 6043 + }, + { + "epoch": 1.6049661399548532, + "grad_norm": 1.1769565332020941, + "learning_rate": 2.062125107842993e-06, + "loss": 0.22757291793823242, + "step": 6044 + }, + { + "epoch": 1.605231708936396, + "grad_norm": 1.317404807729369, + "learning_rate": 2.0594554305695346e-06, + "loss": 0.2370409518480301, + "step": 6045 + }, + { + "epoch": 1.605497277917939, + "grad_norm": 1.1803781252235817, + "learning_rate": 2.0567872841394186e-06, + "loss": 0.21620309352874756, + "step": 6046 + }, + { + "epoch": 1.605762846899482, + "grad_norm": 1.2191738819977833, + "learning_rate": 2.0541206690670324e-06, + "loss": 0.22821158170700073, + "step": 6047 + }, + { + "epoch": 1.606028415881025, + "grad_norm": 1.385940331470305, + "learning_rate": 2.0514555858664663e-06, + "loss": 0.24930253624916077, + "step": 6048 + }, + { + "epoch": 1.606293984862568, + "grad_norm": 1.3966922562239508, + "learning_rate": 2.048792035051521e-06, + "loss": 0.2491561770439148, + "step": 6049 + }, + { + "epoch": 1.6065595538441109, + "grad_norm": 1.3037697337655914, + "learning_rate": 2.046130017135697e-06, + "loss": 0.20652002096176147, + "step": 6050 + }, + { + "epoch": 1.6068251228256538, + "grad_norm": 1.1970911046995705, + "learning_rate": 2.0434695326321975e-06, + "loss": 0.25670793652534485, + "step": 6051 + }, + { + "epoch": 1.6070906918071968, + "grad_norm": 1.2469219040368793, + "learning_rate": 2.0408105820539328e-06, + "loss": 0.2328418493270874, + "step": 6052 + }, + { + "epoch": 1.6073562607887397, + "grad_norm": 1.2657559287734064, + "learning_rate": 2.0381531659135213e-06, + "loss": 0.20811162889003754, + "step": 6053 + }, + { + "epoch": 1.6076218297702827, + "grad_norm": 1.2637409014709644, + "learning_rate": 2.0354972847232756e-06, + "loss": 0.24068522453308105, + "step": 6054 + }, + { + "epoch": 1.6078873987518256, + "grad_norm": 1.3537388998191249, + "learning_rate": 2.032842938995221e-06, + "loss": 0.2519197463989258, + "step": 6055 + }, + { + "epoch": 1.6081529677333686, + "grad_norm": 1.349413355425799, + "learning_rate": 2.030190129241083e-06, + "loss": 0.2293267697095871, + "step": 6056 + }, + { + "epoch": 1.6084185367149118, + "grad_norm": 1.8474927483406436, + "learning_rate": 2.027538855972291e-06, + "loss": 0.22398510575294495, + "step": 6057 + }, + { + "epoch": 1.6086841056964547, + "grad_norm": 1.4186878733418118, + "learning_rate": 2.0248891196999833e-06, + "loss": 0.23074102401733398, + "step": 6058 + }, + { + "epoch": 1.6089496746779977, + "grad_norm": 1.352152679115686, + "learning_rate": 2.0222409209349957e-06, + "loss": 0.2618173658847809, + "step": 6059 + }, + { + "epoch": 1.6092152436595406, + "grad_norm": 1.2898742263880296, + "learning_rate": 2.0195942601878703e-06, + "loss": 0.25361114740371704, + "step": 6060 + }, + { + "epoch": 1.6094808126410836, + "grad_norm": 1.2270527625039152, + "learning_rate": 2.016949137968851e-06, + "loss": 0.2276519238948822, + "step": 6061 + }, + { + "epoch": 1.6097463816226265, + "grad_norm": 1.3155356069823825, + "learning_rate": 2.0143055547878863e-06, + "loss": 0.20834363996982574, + "step": 6062 + }, + { + "epoch": 1.6100119506041695, + "grad_norm": 1.348708703656222, + "learning_rate": 2.011663511154628e-06, + "loss": 0.2579394578933716, + "step": 6063 + }, + { + "epoch": 1.6102775195857124, + "grad_norm": 1.2574503425710122, + "learning_rate": 2.009023007578431e-06, + "loss": 0.22118912637233734, + "step": 6064 + }, + { + "epoch": 1.6105430885672554, + "grad_norm": 1.1631210187007555, + "learning_rate": 2.0063840445683537e-06, + "loss": 0.1881515383720398, + "step": 6065 + }, + { + "epoch": 1.6108086575487983, + "grad_norm": 1.2884662240297928, + "learning_rate": 2.003746622633155e-06, + "loss": 0.2270805984735489, + "step": 6066 + }, + { + "epoch": 1.6110742265303413, + "grad_norm": 1.4261065534360056, + "learning_rate": 2.0011107422813013e-06, + "loss": 0.26356351375579834, + "step": 6067 + }, + { + "epoch": 1.6113397955118842, + "grad_norm": 1.2506363457624738, + "learning_rate": 1.9984764040209615e-06, + "loss": 0.22937676310539246, + "step": 6068 + }, + { + "epoch": 1.6116053644934272, + "grad_norm": 1.329188800311282, + "learning_rate": 1.99584360836e-06, + "loss": 0.25062739849090576, + "step": 6069 + }, + { + "epoch": 1.6118709334749701, + "grad_norm": 1.1593663351806502, + "learning_rate": 1.993212355805989e-06, + "loss": 0.2031324952840805, + "step": 6070 + }, + { + "epoch": 1.612136502456513, + "grad_norm": 1.3722085699931008, + "learning_rate": 1.990582646866206e-06, + "loss": 0.25769656896591187, + "step": 6071 + }, + { + "epoch": 1.612402071438056, + "grad_norm": 1.3184109520906713, + "learning_rate": 1.987954482047626e-06, + "loss": 0.23856252431869507, + "step": 6072 + }, + { + "epoch": 1.612667640419599, + "grad_norm": 1.3452730145342116, + "learning_rate": 1.9853278618569284e-06, + "loss": 0.2336723804473877, + "step": 6073 + }, + { + "epoch": 1.612933209401142, + "grad_norm": 1.3427497614935235, + "learning_rate": 1.9827027868004942e-06, + "loss": 0.22327622771263123, + "step": 6074 + }, + { + "epoch": 1.6131987783826849, + "grad_norm": 1.302817235652594, + "learning_rate": 1.980079257384405e-06, + "loss": 0.26695019006729126, + "step": 6075 + }, + { + "epoch": 1.6134643473642278, + "grad_norm": 1.174792834468628, + "learning_rate": 1.9774572741144514e-06, + "loss": 0.2467387616634369, + "step": 6076 + }, + { + "epoch": 1.6137299163457708, + "grad_norm": 1.3974546997540778, + "learning_rate": 1.9748368374961193e-06, + "loss": 0.25473737716674805, + "step": 6077 + }, + { + "epoch": 1.6139954853273137, + "grad_norm": 1.295354894556923, + "learning_rate": 1.972217948034596e-06, + "loss": 0.25508594512939453, + "step": 6078 + }, + { + "epoch": 1.6142610543088567, + "grad_norm": 1.2627621502033493, + "learning_rate": 1.969600606234774e-06, + "loss": 0.23020131886005402, + "step": 6079 + }, + { + "epoch": 1.6145266232903996, + "grad_norm": 1.2036992831321345, + "learning_rate": 1.9669848126012447e-06, + "loss": 0.249805748462677, + "step": 6080 + }, + { + "epoch": 1.6147921922719426, + "grad_norm": 1.2304217597704168, + "learning_rate": 1.964370567638303e-06, + "loss": 0.2377707064151764, + "step": 6081 + }, + { + "epoch": 1.6150577612534855, + "grad_norm": 1.3812388616949685, + "learning_rate": 1.9617578718499452e-06, + "loss": 0.28656789660453796, + "step": 6082 + }, + { + "epoch": 1.6153233302350285, + "grad_norm": 1.3083477730508752, + "learning_rate": 1.9591467257398668e-06, + "loss": 0.22079989314079285, + "step": 6083 + }, + { + "epoch": 1.6155888992165715, + "grad_norm": 1.048982897357468, + "learning_rate": 1.9565371298114666e-06, + "loss": 0.1993042230606079, + "step": 6084 + }, + { + "epoch": 1.6158544681981146, + "grad_norm": 1.1837758778278344, + "learning_rate": 1.9539290845678438e-06, + "loss": 0.20818357169628143, + "step": 6085 + }, + { + "epoch": 1.6161200371796576, + "grad_norm": 1.2192677831294998, + "learning_rate": 1.9513225905117996e-06, + "loss": 0.20531761646270752, + "step": 6086 + }, + { + "epoch": 1.6163856061612005, + "grad_norm": 1.2499003349392819, + "learning_rate": 1.948717648145834e-06, + "loss": 0.23414376378059387, + "step": 6087 + }, + { + "epoch": 1.6166511751427435, + "grad_norm": 1.2073482694002922, + "learning_rate": 1.9461142579721493e-06, + "loss": 0.2025471031665802, + "step": 6088 + }, + { + "epoch": 1.6169167441242864, + "grad_norm": 1.4729414889087271, + "learning_rate": 1.943512420492649e-06, + "loss": 0.19130446016788483, + "step": 6089 + }, + { + "epoch": 1.6171823131058294, + "grad_norm": 1.1947055473554775, + "learning_rate": 1.940912136208938e-06, + "loss": 0.21637848019599915, + "step": 6090 + }, + { + "epoch": 1.6174478820873723, + "grad_norm": 1.301401884532825, + "learning_rate": 1.9383134056223176e-06, + "loss": 0.26844075322151184, + "step": 6091 + }, + { + "epoch": 1.6177134510689153, + "grad_norm": 1.1755891449306313, + "learning_rate": 1.935716229233794e-06, + "loss": 0.19573305547237396, + "step": 6092 + }, + { + "epoch": 1.6179790200504582, + "grad_norm": 1.2705214543802177, + "learning_rate": 1.93312060754407e-06, + "loss": 0.22705954313278198, + "step": 6093 + }, + { + "epoch": 1.6182445890320012, + "grad_norm": 1.279170245457384, + "learning_rate": 1.9305265410535545e-06, + "loss": 0.2505400478839874, + "step": 6094 + }, + { + "epoch": 1.6185101580135441, + "grad_norm": 1.2108711177458409, + "learning_rate": 1.927934030262353e-06, + "loss": 0.2328193187713623, + "step": 6095 + }, + { + "epoch": 1.618775726995087, + "grad_norm": 1.2588974628750198, + "learning_rate": 1.9253430756702674e-06, + "loss": 0.23876577615737915, + "step": 6096 + }, + { + "epoch": 1.61904129597663, + "grad_norm": 1.3685755624123837, + "learning_rate": 1.9227536777768063e-06, + "loss": 0.2390732318162918, + "step": 6097 + }, + { + "epoch": 1.619306864958173, + "grad_norm": 1.3858306009370809, + "learning_rate": 1.9201658370811736e-06, + "loss": 0.25231993198394775, + "step": 6098 + }, + { + "epoch": 1.619572433939716, + "grad_norm": 1.2520374949609627, + "learning_rate": 1.917579554082274e-06, + "loss": 0.21527352929115295, + "step": 6099 + }, + { + "epoch": 1.619838002921259, + "grad_norm": 1.2236250632687489, + "learning_rate": 1.9149948292787133e-06, + "loss": 0.21394580602645874, + "step": 6100 + }, + { + "epoch": 1.6201035719028019, + "grad_norm": 1.3465338603905943, + "learning_rate": 1.912411663168796e-06, + "loss": 0.26093196868896484, + "step": 6101 + }, + { + "epoch": 1.6203691408843448, + "grad_norm": 1.3518497357465815, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.2631412744522095, + "step": 6102 + }, + { + "epoch": 1.6206347098658878, + "grad_norm": 1.3007944720423297, + "learning_rate": 1.9072500090216073e-06, + "loss": 0.270250141620636, + "step": 6103 + }, + { + "epoch": 1.6209002788474307, + "grad_norm": 1.3385737712068424, + "learning_rate": 1.9046715219794397e-06, + "loss": 0.22944031655788422, + "step": 6104 + }, + { + "epoch": 1.6211658478289737, + "grad_norm": 1.2125488505372424, + "learning_rate": 1.902094595621129e-06, + "loss": 0.24429070949554443, + "step": 6105 + }, + { + "epoch": 1.6214314168105166, + "grad_norm": 1.2581532570405378, + "learning_rate": 1.8995192304434729e-06, + "loss": 0.25656238198280334, + "step": 6106 + }, + { + "epoch": 1.6216969857920596, + "grad_norm": 1.3466122688772229, + "learning_rate": 1.8969454269429743e-06, + "loss": 0.2575233280658722, + "step": 6107 + }, + { + "epoch": 1.6219625547736025, + "grad_norm": 1.245984919504028, + "learning_rate": 1.8943731856158299e-06, + "loss": 0.24881063401699066, + "step": 6108 + }, + { + "epoch": 1.6222281237551455, + "grad_norm": 1.2845731125917577, + "learning_rate": 1.8918025069579382e-06, + "loss": 0.23353847861289978, + "step": 6109 + }, + { + "epoch": 1.6224936927366884, + "grad_norm": 1.2505489106727152, + "learning_rate": 1.8892333914648953e-06, + "loss": 0.21085457503795624, + "step": 6110 + }, + { + "epoch": 1.6227592617182314, + "grad_norm": 1.4134001131082032, + "learning_rate": 1.8866658396319947e-06, + "loss": 0.28600943088531494, + "step": 6111 + }, + { + "epoch": 1.6230248306997743, + "grad_norm": 1.1689838110439057, + "learning_rate": 1.8840998519542352e-06, + "loss": 0.22580507397651672, + "step": 6112 + }, + { + "epoch": 1.6232903996813173, + "grad_norm": 1.212526750953587, + "learning_rate": 1.8815354289263066e-06, + "loss": 0.19310800731182098, + "step": 6113 + }, + { + "epoch": 1.6235559686628602, + "grad_norm": 1.3020905454433194, + "learning_rate": 1.8789725710425988e-06, + "loss": 0.21633204817771912, + "step": 6114 + }, + { + "epoch": 1.6238215376444032, + "grad_norm": 1.4315370828946672, + "learning_rate": 1.8764112787972e-06, + "loss": 0.21346023678779602, + "step": 6115 + }, + { + "epoch": 1.6240871066259461, + "grad_norm": 1.21392020481053, + "learning_rate": 1.8738515526838986e-06, + "loss": 0.21206694841384888, + "step": 6116 + }, + { + "epoch": 1.624352675607489, + "grad_norm": 1.3197096686410696, + "learning_rate": 1.8712933931961773e-06, + "loss": 0.2135339230298996, + "step": 6117 + }, + { + "epoch": 1.624618244589032, + "grad_norm": 1.2484635869956482, + "learning_rate": 1.8687368008272243e-06, + "loss": 0.2168758660554886, + "step": 6118 + }, + { + "epoch": 1.624883813570575, + "grad_norm": 1.1804251189525716, + "learning_rate": 1.866181776069914e-06, + "loss": 0.20825617015361786, + "step": 6119 + }, + { + "epoch": 1.625149382552118, + "grad_norm": 1.291082575518304, + "learning_rate": 1.863628319416826e-06, + "loss": 0.25367867946624756, + "step": 6120 + }, + { + "epoch": 1.625414951533661, + "grad_norm": 1.3053498393136334, + "learning_rate": 1.8610764313602404e-06, + "loss": 0.21604284644126892, + "step": 6121 + }, + { + "epoch": 1.6256805205152038, + "grad_norm": 1.2871138327885168, + "learning_rate": 1.8585261123921283e-06, + "loss": 0.2324865758419037, + "step": 6122 + }, + { + "epoch": 1.6259460894967468, + "grad_norm": 1.2467444217539543, + "learning_rate": 1.8559773630041632e-06, + "loss": 0.2077629417181015, + "step": 6123 + }, + { + "epoch": 1.6262116584782897, + "grad_norm": 1.1704936500874914, + "learning_rate": 1.8534301836877122e-06, + "loss": 0.19919469952583313, + "step": 6124 + }, + { + "epoch": 1.6264772274598327, + "grad_norm": 1.1998850682672693, + "learning_rate": 1.8508845749338412e-06, + "loss": 0.21069160103797913, + "step": 6125 + }, + { + "epoch": 1.6267427964413756, + "grad_norm": 1.218804714337499, + "learning_rate": 1.8483405372333152e-06, + "loss": 0.2286640703678131, + "step": 6126 + }, + { + "epoch": 1.6270083654229186, + "grad_norm": 1.33630910648056, + "learning_rate": 1.8457980710765932e-06, + "loss": 0.2430541068315506, + "step": 6127 + }, + { + "epoch": 1.6272739344044616, + "grad_norm": 1.3713498598627625, + "learning_rate": 1.8432571769538344e-06, + "loss": 0.21875709295272827, + "step": 6128 + }, + { + "epoch": 1.6275395033860045, + "grad_norm": 1.4416966555618131, + "learning_rate": 1.8407178553548876e-06, + "loss": 0.22591018676757812, + "step": 6129 + }, + { + "epoch": 1.6278050723675475, + "grad_norm": 1.362917465597037, + "learning_rate": 1.8381801067693129e-06, + "loss": 0.25429075956344604, + "step": 6130 + }, + { + "epoch": 1.6280706413490904, + "grad_norm": 1.31452454626215, + "learning_rate": 1.8356439316863528e-06, + "loss": 0.2437858283519745, + "step": 6131 + }, + { + "epoch": 1.6283362103306334, + "grad_norm": 1.2489983792436092, + "learning_rate": 1.8331093305949532e-06, + "loss": 0.24196262657642365, + "step": 6132 + }, + { + "epoch": 1.6286017793121763, + "grad_norm": 1.3756170241894088, + "learning_rate": 1.8305763039837576e-06, + "loss": 0.25779271125793457, + "step": 6133 + }, + { + "epoch": 1.6288673482937193, + "grad_norm": 1.223955710903011, + "learning_rate": 1.8280448523410987e-06, + "loss": 0.23418015241622925, + "step": 6134 + }, + { + "epoch": 1.6291329172752622, + "grad_norm": 1.3748973147827792, + "learning_rate": 1.8255149761550128e-06, + "loss": 0.2670775353908539, + "step": 6135 + }, + { + "epoch": 1.6293984862568052, + "grad_norm": 1.423176544673552, + "learning_rate": 1.822986675913231e-06, + "loss": 0.29342639446258545, + "step": 6136 + }, + { + "epoch": 1.6296640552383481, + "grad_norm": 1.244422511511833, + "learning_rate": 1.8204599521031785e-06, + "loss": 0.22768062353134155, + "step": 6137 + }, + { + "epoch": 1.629929624219891, + "grad_norm": 1.6355607569945512, + "learning_rate": 1.817934805211976e-06, + "loss": 0.23938167095184326, + "step": 6138 + }, + { + "epoch": 1.630195193201434, + "grad_norm": 1.311916117620117, + "learning_rate": 1.8154112357264474e-06, + "loss": 0.1982264518737793, + "step": 6139 + }, + { + "epoch": 1.630460762182977, + "grad_norm": 1.3026965235969699, + "learning_rate": 1.8128892441331047e-06, + "loss": 0.23591312766075134, + "step": 6140 + }, + { + "epoch": 1.63072633116452, + "grad_norm": 1.259123916156089, + "learning_rate": 1.8103688309181567e-06, + "loss": 0.20317673683166504, + "step": 6141 + }, + { + "epoch": 1.6309919001460629, + "grad_norm": 1.2846300858550195, + "learning_rate": 1.8078499965675112e-06, + "loss": 0.233676478266716, + "step": 6142 + }, + { + "epoch": 1.6312574691276058, + "grad_norm": 1.3296785293607047, + "learning_rate": 1.8053327415667688e-06, + "loss": 0.22850775718688965, + "step": 6143 + }, + { + "epoch": 1.6315230381091488, + "grad_norm": 1.2850656633806874, + "learning_rate": 1.8028170664012268e-06, + "loss": 0.2603572607040405, + "step": 6144 + }, + { + "epoch": 1.6317886070906917, + "grad_norm": 1.3208849168125785, + "learning_rate": 1.8003029715558773e-06, + "loss": 0.27881523966789246, + "step": 6145 + }, + { + "epoch": 1.6320541760722347, + "grad_norm": 1.225668329292659, + "learning_rate": 1.797790457515406e-06, + "loss": 0.21744176745414734, + "step": 6146 + }, + { + "epoch": 1.6323197450537776, + "grad_norm": 1.2220588910103882, + "learning_rate": 1.7952795247642008e-06, + "loss": 0.20449542999267578, + "step": 6147 + }, + { + "epoch": 1.6325853140353206, + "grad_norm": 1.3015735321136237, + "learning_rate": 1.7927701737863402e-06, + "loss": 0.25641053915023804, + "step": 6148 + }, + { + "epoch": 1.6328508830168635, + "grad_norm": 1.294201240106412, + "learning_rate": 1.7902624050655914e-06, + "loss": 0.23583751916885376, + "step": 6149 + }, + { + "epoch": 1.6331164519984065, + "grad_norm": 1.4310897316272893, + "learning_rate": 1.787756219085427e-06, + "loss": 0.2709866762161255, + "step": 6150 + }, + { + "epoch": 1.6333820209799494, + "grad_norm": 1.2536554341378991, + "learning_rate": 1.785251616329009e-06, + "loss": 0.233103945851326, + "step": 6151 + }, + { + "epoch": 1.6336475899614924, + "grad_norm": 1.2660813048243769, + "learning_rate": 1.7827485972791957e-06, + "loss": 0.2665184438228607, + "step": 6152 + }, + { + "epoch": 1.6339131589430353, + "grad_norm": 1.2551185732946457, + "learning_rate": 1.7802471624185392e-06, + "loss": 0.20934605598449707, + "step": 6153 + }, + { + "epoch": 1.6341787279245783, + "grad_norm": 1.2179362426676639, + "learning_rate": 1.7777473122292866e-06, + "loss": 0.2102464735507965, + "step": 6154 + }, + { + "epoch": 1.6344442969061213, + "grad_norm": 1.2289784110367914, + "learning_rate": 1.7752490471933769e-06, + "loss": 0.22889986634254456, + "step": 6155 + }, + { + "epoch": 1.6347098658876642, + "grad_norm": 1.3627659705359922, + "learning_rate": 1.772752367792452e-06, + "loss": 0.2261584997177124, + "step": 6156 + }, + { + "epoch": 1.6349754348692072, + "grad_norm": 1.2186249427048736, + "learning_rate": 1.7702572745078395e-06, + "loss": 0.21456710994243622, + "step": 6157 + }, + { + "epoch": 1.63524100385075, + "grad_norm": 1.1535452073956258, + "learning_rate": 1.7677637678205627e-06, + "loss": 0.22762097418308258, + "step": 6158 + }, + { + "epoch": 1.635506572832293, + "grad_norm": 1.306484526102534, + "learning_rate": 1.7652718482113417e-06, + "loss": 0.24772633612155914, + "step": 6159 + }, + { + "epoch": 1.635772141813836, + "grad_norm": 1.3290630048425123, + "learning_rate": 1.7627815161605887e-06, + "loss": 0.22980757057666779, + "step": 6160 + }, + { + "epoch": 1.636037710795379, + "grad_norm": 1.1593602123779645, + "learning_rate": 1.760292772148411e-06, + "loss": 0.19560125470161438, + "step": 6161 + }, + { + "epoch": 1.636303279776922, + "grad_norm": 1.388673809129743, + "learning_rate": 1.7578056166546086e-06, + "loss": 0.23733064532279968, + "step": 6162 + }, + { + "epoch": 1.6365688487584649, + "grad_norm": 1.2026681813349183, + "learning_rate": 1.7553200501586743e-06, + "loss": 0.21064560115337372, + "step": 6163 + }, + { + "epoch": 1.6368344177400078, + "grad_norm": 1.3444341606502546, + "learning_rate": 1.7528360731397986e-06, + "loss": 0.26709994673728943, + "step": 6164 + }, + { + "epoch": 1.6370999867215508, + "grad_norm": 1.2755110888757868, + "learning_rate": 1.750353686076861e-06, + "loss": 0.26555943489074707, + "step": 6165 + }, + { + "epoch": 1.6373655557030937, + "grad_norm": 1.3299250322981557, + "learning_rate": 1.7478728894484375e-06, + "loss": 0.24480760097503662, + "step": 6166 + }, + { + "epoch": 1.6376311246846367, + "grad_norm": 1.2560095314061934, + "learning_rate": 1.7453936837327967e-06, + "loss": 0.2170884907245636, + "step": 6167 + }, + { + "epoch": 1.6378966936661796, + "grad_norm": 1.340756013397369, + "learning_rate": 1.7429160694078983e-06, + "loss": 0.24728982150554657, + "step": 6168 + }, + { + "epoch": 1.6381622626477228, + "grad_norm": 1.1911402182063675, + "learning_rate": 1.7404400469513994e-06, + "loss": 0.20886945724487305, + "step": 6169 + }, + { + "epoch": 1.6384278316292658, + "grad_norm": 1.2150445755778985, + "learning_rate": 1.7379656168406467e-06, + "loss": 0.1892474740743637, + "step": 6170 + }, + { + "epoch": 1.6386934006108087, + "grad_norm": 1.3004801024505461, + "learning_rate": 1.7354927795526821e-06, + "loss": 0.24953782558441162, + "step": 6171 + }, + { + "epoch": 1.6389589695923517, + "grad_norm": 1.2292705802712374, + "learning_rate": 1.7330215355642377e-06, + "loss": 0.2311600148677826, + "step": 6172 + }, + { + "epoch": 1.6392245385738946, + "grad_norm": 1.2596864005467026, + "learning_rate": 1.73055188535174e-06, + "loss": 0.24018675088882446, + "step": 6173 + }, + { + "epoch": 1.6394901075554376, + "grad_norm": 1.3394449685829455, + "learning_rate": 1.7280838293913116e-06, + "loss": 0.22607022523880005, + "step": 6174 + }, + { + "epoch": 1.6397556765369805, + "grad_norm": 1.2860534255043978, + "learning_rate": 1.7256173681587619e-06, + "loss": 0.23725482821464539, + "step": 6175 + }, + { + "epoch": 1.6400212455185235, + "grad_norm": 1.2500709715234832, + "learning_rate": 1.723152502129597e-06, + "loss": 0.241235613822937, + "step": 6176 + }, + { + "epoch": 1.6402868145000664, + "grad_norm": 1.2070755501863832, + "learning_rate": 1.7206892317790136e-06, + "loss": 0.2150690108537674, + "step": 6177 + }, + { + "epoch": 1.6405523834816094, + "grad_norm": 1.2557873581014805, + "learning_rate": 1.7182275575819007e-06, + "loss": 0.22133421897888184, + "step": 6178 + }, + { + "epoch": 1.6408179524631523, + "grad_norm": 1.1297884729403, + "learning_rate": 1.7157674800128399e-06, + "loss": 0.1937463879585266, + "step": 6179 + }, + { + "epoch": 1.6410835214446953, + "grad_norm": 1.0851305240668396, + "learning_rate": 1.7133089995461062e-06, + "loss": 0.18938027322292328, + "step": 6180 + }, + { + "epoch": 1.6413490904262382, + "grad_norm": 1.2621430482402598, + "learning_rate": 1.7108521166556646e-06, + "loss": 0.23577997088432312, + "step": 6181 + }, + { + "epoch": 1.6416146594077812, + "grad_norm": 1.2915526813468403, + "learning_rate": 1.7083968318151734e-06, + "loss": 0.2712448537349701, + "step": 6182 + }, + { + "epoch": 1.6418802283893241, + "grad_norm": 1.276409938985324, + "learning_rate": 1.7059431454979825e-06, + "loss": 0.24242255091667175, + "step": 6183 + }, + { + "epoch": 1.642145797370867, + "grad_norm": 1.3152058895449834, + "learning_rate": 1.7034910581771347e-06, + "loss": 0.22521010041236877, + "step": 6184 + }, + { + "epoch": 1.64241136635241, + "grad_norm": 1.3840145244958133, + "learning_rate": 1.7010405703253618e-06, + "loss": 0.22026273608207703, + "step": 6185 + }, + { + "epoch": 1.642676935333953, + "grad_norm": 1.458737402535225, + "learning_rate": 1.6985916824150894e-06, + "loss": 0.22726528346538544, + "step": 6186 + }, + { + "epoch": 1.642942504315496, + "grad_norm": 1.3396783040947258, + "learning_rate": 1.6961443949184353e-06, + "loss": 0.25172409415245056, + "step": 6187 + }, + { + "epoch": 1.6432080732970389, + "grad_norm": 1.1393591185728944, + "learning_rate": 1.6936987083072065e-06, + "loss": 0.21173113584518433, + "step": 6188 + }, + { + "epoch": 1.6434736422785818, + "grad_norm": 1.3589729407555038, + "learning_rate": 1.6912546230529036e-06, + "loss": 0.22596749663352966, + "step": 6189 + }, + { + "epoch": 1.6437392112601248, + "grad_norm": 1.3604263454917045, + "learning_rate": 1.6888121396267166e-06, + "loss": 0.2749077081680298, + "step": 6190 + }, + { + "epoch": 1.6440047802416677, + "grad_norm": 2.5555069132462283, + "learning_rate": 1.6863712584995252e-06, + "loss": 0.22150780260562897, + "step": 6191 + }, + { + "epoch": 1.6442703492232107, + "grad_norm": 1.2838243253096144, + "learning_rate": 1.6839319801419073e-06, + "loss": 0.23437368869781494, + "step": 6192 + }, + { + "epoch": 1.6445359182047536, + "grad_norm": 1.3069256977628543, + "learning_rate": 1.681494305024125e-06, + "loss": 0.22949008643627167, + "step": 6193 + }, + { + "epoch": 1.6448014871862966, + "grad_norm": 1.2956112975441718, + "learning_rate": 1.6790582336161332e-06, + "loss": 0.24147525429725647, + "step": 6194 + }, + { + "epoch": 1.6450670561678395, + "grad_norm": 1.180082798545332, + "learning_rate": 1.6766237663875773e-06, + "loss": 0.2001456618309021, + "step": 6195 + }, + { + "epoch": 1.6453326251493825, + "grad_norm": 1.2710753216206616, + "learning_rate": 1.674190903807794e-06, + "loss": 0.17668186128139496, + "step": 6196 + }, + { + "epoch": 1.6455981941309257, + "grad_norm": 1.369840319031622, + "learning_rate": 1.6717596463458107e-06, + "loss": 0.24585255980491638, + "step": 6197 + }, + { + "epoch": 1.6458637631124686, + "grad_norm": 1.2328642285488454, + "learning_rate": 1.6693299944703479e-06, + "loss": 0.2234572172164917, + "step": 6198 + }, + { + "epoch": 1.6461293320940116, + "grad_norm": 1.2369910191993496, + "learning_rate": 1.6669019486498083e-06, + "loss": 0.2007240653038025, + "step": 6199 + }, + { + "epoch": 1.6463949010755545, + "grad_norm": 1.317383450933259, + "learning_rate": 1.6644755093522913e-06, + "loss": 0.21926215291023254, + "step": 6200 + }, + { + "epoch": 1.6466604700570975, + "grad_norm": 1.3404302006039666, + "learning_rate": 1.662050677045589e-06, + "loss": 0.24797898530960083, + "step": 6201 + }, + { + "epoch": 1.6469260390386404, + "grad_norm": 1.285343354391859, + "learning_rate": 1.65962745219718e-06, + "loss": 0.22087037563323975, + "step": 6202 + }, + { + "epoch": 1.6471916080201834, + "grad_norm": 1.2765781805195457, + "learning_rate": 1.6572058352742327e-06, + "loss": 0.23073960840702057, + "step": 6203 + }, + { + "epoch": 1.6474571770017263, + "grad_norm": 1.3644493807061109, + "learning_rate": 1.6547858267436056e-06, + "loss": 0.2430298924446106, + "step": 6204 + }, + { + "epoch": 1.6477227459832693, + "grad_norm": 1.286198443262182, + "learning_rate": 1.6523674270718493e-06, + "loss": 0.23337247967720032, + "step": 6205 + }, + { + "epoch": 1.6479883149648122, + "grad_norm": 1.2144238817830517, + "learning_rate": 1.6499506367252016e-06, + "loss": 0.22141093015670776, + "step": 6206 + }, + { + "epoch": 1.6482538839463552, + "grad_norm": 1.280282959866893, + "learning_rate": 1.647535456169591e-06, + "loss": 0.23247988522052765, + "step": 6207 + }, + { + "epoch": 1.6485194529278981, + "grad_norm": 1.3728921390628253, + "learning_rate": 1.6451218858706374e-06, + "loss": 0.2659391760826111, + "step": 6208 + }, + { + "epoch": 1.648785021909441, + "grad_norm": 1.2534645715863684, + "learning_rate": 1.642709926293644e-06, + "loss": 0.2154998630285263, + "step": 6209 + }, + { + "epoch": 1.649050590890984, + "grad_norm": 1.322825591754104, + "learning_rate": 1.6402995779036146e-06, + "loss": 0.20363599061965942, + "step": 6210 + }, + { + "epoch": 1.649316159872527, + "grad_norm": 1.3775669953664806, + "learning_rate": 1.6378908411652328e-06, + "loss": 0.23388779163360596, + "step": 6211 + }, + { + "epoch": 1.64958172885407, + "grad_norm": 1.205059730534318, + "learning_rate": 1.6354837165428772e-06, + "loss": 0.20465341210365295, + "step": 6212 + }, + { + "epoch": 1.649847297835613, + "grad_norm": 1.2409004364034002, + "learning_rate": 1.6330782045006088e-06, + "loss": 0.2233584225177765, + "step": 6213 + }, + { + "epoch": 1.6501128668171559, + "grad_norm": 1.313264623251788, + "learning_rate": 1.6306743055021834e-06, + "loss": 0.2880077064037323, + "step": 6214 + }, + { + "epoch": 1.6503784357986988, + "grad_norm": 1.2769524753658168, + "learning_rate": 1.6282720200110458e-06, + "loss": 0.23332230746746063, + "step": 6215 + }, + { + "epoch": 1.6506440047802418, + "grad_norm": 1.2682336609825682, + "learning_rate": 1.6258713484903266e-06, + "loss": 0.22191204130649567, + "step": 6216 + }, + { + "epoch": 1.6509095737617847, + "grad_norm": 1.2899982671052521, + "learning_rate": 1.6234722914028478e-06, + "loss": 0.2403659224510193, + "step": 6217 + }, + { + "epoch": 1.6511751427433277, + "grad_norm": 1.2823746538865957, + "learning_rate": 1.6210748492111161e-06, + "loss": 0.2230256348848343, + "step": 6218 + }, + { + "epoch": 1.6514407117248706, + "grad_norm": 1.233703409456991, + "learning_rate": 1.6186790223773375e-06, + "loss": 0.2086302787065506, + "step": 6219 + }, + { + "epoch": 1.6517062807064136, + "grad_norm": 1.2696219439991872, + "learning_rate": 1.6162848113633934e-06, + "loss": 0.22336703538894653, + "step": 6220 + }, + { + "epoch": 1.6519718496879565, + "grad_norm": 1.2026474951561137, + "learning_rate": 1.6138922166308613e-06, + "loss": 0.2354746013879776, + "step": 6221 + }, + { + "epoch": 1.6522374186694995, + "grad_norm": 1.212799588563382, + "learning_rate": 1.6115012386410045e-06, + "loss": 0.23983564972877502, + "step": 6222 + }, + { + "epoch": 1.6525029876510424, + "grad_norm": 1.3394195242071623, + "learning_rate": 1.6091118778547765e-06, + "loss": 0.25468897819519043, + "step": 6223 + }, + { + "epoch": 1.6527685566325854, + "grad_norm": 1.2085737685975797, + "learning_rate": 1.6067241347328166e-06, + "loss": 0.2225346863269806, + "step": 6224 + }, + { + "epoch": 1.6530341256141283, + "grad_norm": 1.4474708027397767, + "learning_rate": 1.6043380097354543e-06, + "loss": 0.28801992535591125, + "step": 6225 + }, + { + "epoch": 1.6532996945956713, + "grad_norm": 1.1308003259460488, + "learning_rate": 1.6019535033227063e-06, + "loss": 0.1869816929101944, + "step": 6226 + }, + { + "epoch": 1.6535652635772142, + "grad_norm": 1.3022141110443597, + "learning_rate": 1.5995706159542768e-06, + "loss": 0.2569049894809723, + "step": 6227 + }, + { + "epoch": 1.6538308325587572, + "grad_norm": 1.2689496619282572, + "learning_rate": 1.5971893480895583e-06, + "loss": 0.19138488173484802, + "step": 6228 + }, + { + "epoch": 1.6540964015403001, + "grad_norm": 1.2583553251304942, + "learning_rate": 1.5948097001876318e-06, + "loss": 0.23107777535915375, + "step": 6229 + }, + { + "epoch": 1.654361970521843, + "grad_norm": 1.4140324563807463, + "learning_rate": 1.5924316727072652e-06, + "loss": 0.21682313084602356, + "step": 6230 + }, + { + "epoch": 1.654627539503386, + "grad_norm": 1.6445896965406597, + "learning_rate": 1.5900552661069135e-06, + "loss": 0.27629974484443665, + "step": 6231 + }, + { + "epoch": 1.654893108484929, + "grad_norm": 1.2060133562172235, + "learning_rate": 1.587680480844721e-06, + "loss": 0.21919876337051392, + "step": 6232 + }, + { + "epoch": 1.655158677466472, + "grad_norm": 1.4827934801999716, + "learning_rate": 1.5853073173785183e-06, + "loss": 0.2556184232234955, + "step": 6233 + }, + { + "epoch": 1.655424246448015, + "grad_norm": 1.1362954303327644, + "learning_rate": 1.5829357761658214e-06, + "loss": 0.1904449462890625, + "step": 6234 + }, + { + "epoch": 1.6556898154295578, + "grad_norm": 1.2410374365127181, + "learning_rate": 1.5805658576638372e-06, + "loss": 0.1991434246301651, + "step": 6235 + }, + { + "epoch": 1.6559553844111008, + "grad_norm": 1.4428347821081515, + "learning_rate": 1.5781975623294554e-06, + "loss": 0.2609177231788635, + "step": 6236 + }, + { + "epoch": 1.6562209533926437, + "grad_norm": 1.276051044481299, + "learning_rate": 1.575830890619261e-06, + "loss": 0.2481592893600464, + "step": 6237 + }, + { + "epoch": 1.6564865223741867, + "grad_norm": 1.2930470444266673, + "learning_rate": 1.5734658429895156e-06, + "loss": 0.23855090141296387, + "step": 6238 + }, + { + "epoch": 1.6567520913557297, + "grad_norm": 1.326739898505445, + "learning_rate": 1.5711024198961745e-06, + "loss": 0.2480623573064804, + "step": 6239 + }, + { + "epoch": 1.6570176603372726, + "grad_norm": 1.4145385747738486, + "learning_rate": 1.5687406217948775e-06, + "loss": 0.2504739463329315, + "step": 6240 + }, + { + "epoch": 1.6572832293188156, + "grad_norm": 1.1843269954841462, + "learning_rate": 1.5663804491409506e-06, + "loss": 0.2068580538034439, + "step": 6241 + }, + { + "epoch": 1.6575487983003585, + "grad_norm": 1.45151426190796, + "learning_rate": 1.5640219023894077e-06, + "loss": 0.2448163628578186, + "step": 6242 + }, + { + "epoch": 1.6578143672819015, + "grad_norm": 1.3391765527579818, + "learning_rate": 1.5616649819949492e-06, + "loss": 0.2514716386795044, + "step": 6243 + }, + { + "epoch": 1.6580799362634444, + "grad_norm": 1.1884099966156902, + "learning_rate": 1.559309688411962e-06, + "loss": 0.2067629098892212, + "step": 6244 + }, + { + "epoch": 1.6583455052449874, + "grad_norm": 1.2042735442206352, + "learning_rate": 1.5569560220945168e-06, + "loss": 0.22909750044345856, + "step": 6245 + }, + { + "epoch": 1.6586110742265303, + "grad_norm": 1.4646403481954997, + "learning_rate": 1.5546039834963745e-06, + "loss": 0.203629732131958, + "step": 6246 + }, + { + "epoch": 1.6588766432080733, + "grad_norm": 1.2050936311763847, + "learning_rate": 1.552253573070981e-06, + "loss": 0.21919086575508118, + "step": 6247 + }, + { + "epoch": 1.6591422121896162, + "grad_norm": 1.4379501702554756, + "learning_rate": 1.549904791271466e-06, + "loss": 0.2535661458969116, + "step": 6248 + }, + { + "epoch": 1.6594077811711592, + "grad_norm": 1.2609582047884877, + "learning_rate": 1.5475576385506475e-06, + "loss": 0.224460631608963, + "step": 6249 + }, + { + "epoch": 1.6596733501527021, + "grad_norm": 1.2625738742925756, + "learning_rate": 1.5452121153610288e-06, + "loss": 0.21925818920135498, + "step": 6250 + }, + { + "epoch": 1.659938919134245, + "grad_norm": 1.2787763694898493, + "learning_rate": 1.5428682221547997e-06, + "loss": 0.2100696563720703, + "step": 6251 + }, + { + "epoch": 1.660204488115788, + "grad_norm": 1.3484219674096825, + "learning_rate": 1.540525959383834e-06, + "loss": 0.25982293486595154, + "step": 6252 + }, + { + "epoch": 1.660470057097331, + "grad_norm": 1.2527966644905648, + "learning_rate": 1.538185327499694e-06, + "loss": 0.23615162074565887, + "step": 6253 + }, + { + "epoch": 1.660735626078874, + "grad_norm": 1.2738910414784854, + "learning_rate": 1.5358463269536218e-06, + "loss": 0.2454022467136383, + "step": 6254 + }, + { + "epoch": 1.6610011950604169, + "grad_norm": 1.3825181535789863, + "learning_rate": 1.5335089581965556e-06, + "loss": 0.2330605536699295, + "step": 6255 + }, + { + "epoch": 1.6612667640419598, + "grad_norm": 1.2169082012465264, + "learning_rate": 1.5311732216791087e-06, + "loss": 0.23193006217479706, + "step": 6256 + }, + { + "epoch": 1.6615323330235028, + "grad_norm": 1.2690481284418431, + "learning_rate": 1.5288391178515838e-06, + "loss": 0.23254770040512085, + "step": 6257 + }, + { + "epoch": 1.6617979020050457, + "grad_norm": 1.2246821396199268, + "learning_rate": 1.5265066471639701e-06, + "loss": 0.23240572214126587, + "step": 6258 + }, + { + "epoch": 1.6620634709865887, + "grad_norm": 1.3414134094293932, + "learning_rate": 1.5241758100659386e-06, + "loss": 0.2765730619430542, + "step": 6259 + }, + { + "epoch": 1.6623290399681316, + "grad_norm": 1.2956291225041994, + "learning_rate": 1.5218466070068472e-06, + "loss": 0.26366496086120605, + "step": 6260 + }, + { + "epoch": 1.6625946089496746, + "grad_norm": 1.240730160583952, + "learning_rate": 1.5195190384357405e-06, + "loss": 0.22322653234004974, + "step": 6261 + }, + { + "epoch": 1.6628601779312175, + "grad_norm": 1.2433877123660553, + "learning_rate": 1.5171931048013466e-06, + "loss": 0.24144116044044495, + "step": 6262 + }, + { + "epoch": 1.6631257469127605, + "grad_norm": 1.3783130308299147, + "learning_rate": 1.5148688065520734e-06, + "loss": 0.24559618532657623, + "step": 6263 + }, + { + "epoch": 1.6633913158943034, + "grad_norm": 1.3258590224160887, + "learning_rate": 1.5125461441360223e-06, + "loss": 0.24337056279182434, + "step": 6264 + }, + { + "epoch": 1.6636568848758464, + "grad_norm": 1.3292875380649603, + "learning_rate": 1.5102251180009752e-06, + "loss": 0.2733612358570099, + "step": 6265 + }, + { + "epoch": 1.6639224538573893, + "grad_norm": 1.2329811544038785, + "learning_rate": 1.5079057285943976e-06, + "loss": 0.2116459757089615, + "step": 6266 + }, + { + "epoch": 1.6641880228389323, + "grad_norm": 1.2335642813115397, + "learning_rate": 1.5055879763634407e-06, + "loss": 0.21221664547920227, + "step": 6267 + }, + { + "epoch": 1.6644535918204753, + "grad_norm": 1.2500150658336624, + "learning_rate": 1.503271861754939e-06, + "loss": 0.21166589856147766, + "step": 6268 + }, + { + "epoch": 1.6647191608020182, + "grad_norm": 1.5113123418333367, + "learning_rate": 1.5009573852154136e-06, + "loss": 0.2652161121368408, + "step": 6269 + }, + { + "epoch": 1.6649847297835612, + "grad_norm": 1.262834880378694, + "learning_rate": 1.4986445471910672e-06, + "loss": 0.22142267227172852, + "step": 6270 + }, + { + "epoch": 1.665250298765104, + "grad_norm": 1.4442965183949772, + "learning_rate": 1.4963333481277874e-06, + "loss": 0.2307332456111908, + "step": 6271 + }, + { + "epoch": 1.665515867746647, + "grad_norm": 1.411326986781179, + "learning_rate": 1.494023788471144e-06, + "loss": 0.2669411897659302, + "step": 6272 + }, + { + "epoch": 1.66578143672819, + "grad_norm": 1.2823998109594834, + "learning_rate": 1.4917158686663992e-06, + "loss": 0.2468804121017456, + "step": 6273 + }, + { + "epoch": 1.666047005709733, + "grad_norm": 1.2639666166307362, + "learning_rate": 1.4894095891584882e-06, + "loss": 0.24152463674545288, + "step": 6274 + }, + { + "epoch": 1.666312574691276, + "grad_norm": 1.098201760932299, + "learning_rate": 1.4871049503920353e-06, + "loss": 0.1966545283794403, + "step": 6275 + }, + { + "epoch": 1.6665781436728189, + "grad_norm": 1.2773845282560163, + "learning_rate": 1.4848019528113477e-06, + "loss": 0.24772626161575317, + "step": 6276 + }, + { + "epoch": 1.6668437126543618, + "grad_norm": 1.3731672204722256, + "learning_rate": 1.4825005968604189e-06, + "loss": 0.22138851881027222, + "step": 6277 + }, + { + "epoch": 1.6671092816359048, + "grad_norm": 1.2245583238686863, + "learning_rate": 1.4802008829829172e-06, + "loss": 0.24345465004444122, + "step": 6278 + }, + { + "epoch": 1.6673748506174477, + "grad_norm": 1.3209828849983516, + "learning_rate": 1.477902811622205e-06, + "loss": 0.22862716019153595, + "step": 6279 + }, + { + "epoch": 1.6676404195989907, + "grad_norm": 1.2914770883474422, + "learning_rate": 1.4756063832213207e-06, + "loss": 0.2763083577156067, + "step": 6280 + }, + { + "epoch": 1.6679059885805336, + "grad_norm": 1.3142139937070516, + "learning_rate": 1.4733115982229885e-06, + "loss": 0.24631357192993164, + "step": 6281 + }, + { + "epoch": 1.6681715575620768, + "grad_norm": 1.322429969576976, + "learning_rate": 1.4710184570696184e-06, + "loss": 0.22650030255317688, + "step": 6282 + }, + { + "epoch": 1.6684371265436198, + "grad_norm": 1.3243342318873437, + "learning_rate": 1.4687269602033006e-06, + "loss": 0.2455909103155136, + "step": 6283 + }, + { + "epoch": 1.6687026955251627, + "grad_norm": 1.3711517369784783, + "learning_rate": 1.4664371080658079e-06, + "loss": 0.25625506043434143, + "step": 6284 + }, + { + "epoch": 1.6689682645067057, + "grad_norm": 1.1450036681372322, + "learning_rate": 1.4641489010985954e-06, + "loss": 0.22178369760513306, + "step": 6285 + }, + { + "epoch": 1.6692338334882486, + "grad_norm": 1.2644620602089436, + "learning_rate": 1.4618623397428055e-06, + "loss": 0.23936234414577484, + "step": 6286 + }, + { + "epoch": 1.6694994024697916, + "grad_norm": 1.2667144776178243, + "learning_rate": 1.459577424439258e-06, + "loss": 0.21629829704761505, + "step": 6287 + }, + { + "epoch": 1.6697649714513345, + "grad_norm": 1.3486786043134158, + "learning_rate": 1.457294155628457e-06, + "loss": 0.238427072763443, + "step": 6288 + }, + { + "epoch": 1.6700305404328775, + "grad_norm": 1.412674472973442, + "learning_rate": 1.4550125337505926e-06, + "loss": 0.23168250918388367, + "step": 6289 + }, + { + "epoch": 1.6702961094144204, + "grad_norm": 1.3185872633193214, + "learning_rate": 1.45273255924553e-06, + "loss": 0.25518402457237244, + "step": 6290 + }, + { + "epoch": 1.6705616783959634, + "grad_norm": 1.2092220747685465, + "learning_rate": 1.450454232552826e-06, + "loss": 0.2488553822040558, + "step": 6291 + }, + { + "epoch": 1.6708272473775063, + "grad_norm": 1.4309048190710245, + "learning_rate": 1.448177554111716e-06, + "loss": 0.2684085965156555, + "step": 6292 + }, + { + "epoch": 1.6710928163590493, + "grad_norm": 1.3645105519242562, + "learning_rate": 1.4459025243611124e-06, + "loss": 0.24627447128295898, + "step": 6293 + }, + { + "epoch": 1.6713583853405922, + "grad_norm": 1.2960987120962004, + "learning_rate": 1.4436291437396156e-06, + "loss": 0.24725376069545746, + "step": 6294 + }, + { + "epoch": 1.6716239543221352, + "grad_norm": 1.2752333210419433, + "learning_rate": 1.4413574126855067e-06, + "loss": 0.23488914966583252, + "step": 6295 + }, + { + "epoch": 1.6718895233036781, + "grad_norm": 1.2385365684534737, + "learning_rate": 1.4390873316367492e-06, + "loss": 0.2031177133321762, + "step": 6296 + }, + { + "epoch": 1.672155092285221, + "grad_norm": 1.265889760948498, + "learning_rate": 1.4368189010309874e-06, + "loss": 0.25378018617630005, + "step": 6297 + }, + { + "epoch": 1.672420661266764, + "grad_norm": 1.2443137764428682, + "learning_rate": 1.434552121305548e-06, + "loss": 0.21305282413959503, + "step": 6298 + }, + { + "epoch": 1.672686230248307, + "grad_norm": 1.1925787762252436, + "learning_rate": 1.432286992897437e-06, + "loss": 0.20908987522125244, + "step": 6299 + }, + { + "epoch": 1.67295179922985, + "grad_norm": 1.2228377563088515, + "learning_rate": 1.4300235162433496e-06, + "loss": 0.21945340931415558, + "step": 6300 + }, + { + "epoch": 1.6732173682113929, + "grad_norm": 1.3659267409445854, + "learning_rate": 1.4277616917796544e-06, + "loss": 0.22096669673919678, + "step": 6301 + }, + { + "epoch": 1.6734829371929358, + "grad_norm": 1.2773291306452106, + "learning_rate": 1.425501519942406e-06, + "loss": 0.2233850657939911, + "step": 6302 + }, + { + "epoch": 1.6737485061744788, + "grad_norm": 1.2672720076411363, + "learning_rate": 1.423243001167337e-06, + "loss": 0.21432995796203613, + "step": 6303 + }, + { + "epoch": 1.6740140751560217, + "grad_norm": 1.3864014459258447, + "learning_rate": 1.4209861358898636e-06, + "loss": 0.2649557590484619, + "step": 6304 + }, + { + "epoch": 1.6742796441375647, + "grad_norm": 1.2642836811067808, + "learning_rate": 1.418730924545083e-06, + "loss": 0.24918347597122192, + "step": 6305 + }, + { + "epoch": 1.6745452131191076, + "grad_norm": 1.3089175693989048, + "learning_rate": 1.4164773675677745e-06, + "loss": 0.24121029675006866, + "step": 6306 + }, + { + "epoch": 1.6748107821006506, + "grad_norm": 1.2569762960026158, + "learning_rate": 1.4142254653923949e-06, + "loss": 0.24401789903640747, + "step": 6307 + }, + { + "epoch": 1.6750763510821935, + "grad_norm": 1.3272546708188746, + "learning_rate": 1.4119752184530867e-06, + "loss": 0.2374853938817978, + "step": 6308 + }, + { + "epoch": 1.6753419200637365, + "grad_norm": 1.2973848864698938, + "learning_rate": 1.4097266271836695e-06, + "loss": 0.2351088970899582, + "step": 6309 + }, + { + "epoch": 1.6756074890452797, + "grad_norm": 1.301417674196528, + "learning_rate": 1.407479692017647e-06, + "loss": 0.19560754299163818, + "step": 6310 + }, + { + "epoch": 1.6758730580268226, + "grad_norm": 1.390250023674765, + "learning_rate": 1.405234413388199e-06, + "loss": 0.24124252796173096, + "step": 6311 + }, + { + "epoch": 1.6761386270083656, + "grad_norm": 1.3742469305206364, + "learning_rate": 1.4029907917281903e-06, + "loss": 0.2208215445280075, + "step": 6312 + }, + { + "epoch": 1.6764041959899085, + "grad_norm": 1.2125662977366807, + "learning_rate": 1.4007488274701653e-06, + "loss": 0.23888292908668518, + "step": 6313 + }, + { + "epoch": 1.6766697649714515, + "grad_norm": 1.2936432356109655, + "learning_rate": 1.3985085210463479e-06, + "loss": 0.24079063534736633, + "step": 6314 + }, + { + "epoch": 1.6769353339529944, + "grad_norm": 1.2011852751375642, + "learning_rate": 1.3962698728886414e-06, + "loss": 0.18975606560707092, + "step": 6315 + }, + { + "epoch": 1.6772009029345374, + "grad_norm": 1.322599968285396, + "learning_rate": 1.3940328834286333e-06, + "loss": 0.201214998960495, + "step": 6316 + }, + { + "epoch": 1.6774664719160803, + "grad_norm": 1.2090909210103018, + "learning_rate": 1.3917975530975836e-06, + "loss": 0.20079322159290314, + "step": 6317 + }, + { + "epoch": 1.6777320408976233, + "grad_norm": 1.2732868066143843, + "learning_rate": 1.3895638823264447e-06, + "loss": 0.23593586683273315, + "step": 6318 + }, + { + "epoch": 1.6779976098791662, + "grad_norm": 1.3931846809533017, + "learning_rate": 1.3873318715458383e-06, + "loss": 0.26574259996414185, + "step": 6319 + }, + { + "epoch": 1.6782631788607092, + "grad_norm": 1.252943610173436, + "learning_rate": 1.3851015211860696e-06, + "loss": 0.20573323965072632, + "step": 6320 + }, + { + "epoch": 1.6785287478422521, + "grad_norm": 1.4484920974875073, + "learning_rate": 1.3828728316771244e-06, + "loss": 0.25610506534576416, + "step": 6321 + }, + { + "epoch": 1.678794316823795, + "grad_norm": 1.330338299337135, + "learning_rate": 1.380645803448668e-06, + "loss": 0.2138693630695343, + "step": 6322 + }, + { + "epoch": 1.679059885805338, + "grad_norm": 1.1479105398064924, + "learning_rate": 1.3784204369300447e-06, + "loss": 0.21522866189479828, + "step": 6323 + }, + { + "epoch": 1.679325454786881, + "grad_norm": 1.441538971613898, + "learning_rate": 1.376196732550279e-06, + "loss": 0.25622743368148804, + "step": 6324 + }, + { + "epoch": 1.679591023768424, + "grad_norm": 1.354050705773023, + "learning_rate": 1.3739746907380757e-06, + "loss": 0.18025386333465576, + "step": 6325 + }, + { + "epoch": 1.679856592749967, + "grad_norm": 1.1665775097977176, + "learning_rate": 1.3717543119218168e-06, + "loss": 0.18785078823566437, + "step": 6326 + }, + { + "epoch": 1.6801221617315099, + "grad_norm": 1.3771154706722653, + "learning_rate": 1.3695355965295653e-06, + "loss": 0.24682481586933136, + "step": 6327 + }, + { + "epoch": 1.6803877307130528, + "grad_norm": 1.2994385931646761, + "learning_rate": 1.3673185449890647e-06, + "loss": 0.2193487137556076, + "step": 6328 + }, + { + "epoch": 1.6806532996945958, + "grad_norm": 1.2960131024456552, + "learning_rate": 1.3651031577277351e-06, + "loss": 0.24963265657424927, + "step": 6329 + }, + { + "epoch": 1.6809188686761387, + "grad_norm": 1.2714587333981215, + "learning_rate": 1.3628894351726785e-06, + "loss": 0.21473057568073273, + "step": 6330 + }, + { + "epoch": 1.6811844376576817, + "grad_norm": 1.4508064568072063, + "learning_rate": 1.3606773777506731e-06, + "loss": 0.2539534866809845, + "step": 6331 + }, + { + "epoch": 1.6814500066392246, + "grad_norm": 1.5049767699399101, + "learning_rate": 1.3584669858881771e-06, + "loss": 0.2671799659729004, + "step": 6332 + }, + { + "epoch": 1.6817155756207676, + "grad_norm": 1.211295376852026, + "learning_rate": 1.3562582600113295e-06, + "loss": 0.24291013181209564, + "step": 6333 + }, + { + "epoch": 1.6819811446023105, + "grad_norm": 1.3672105989135315, + "learning_rate": 1.354051200545946e-06, + "loss": 0.24249233305454254, + "step": 6334 + }, + { + "epoch": 1.6822467135838535, + "grad_norm": 1.2855842039831968, + "learning_rate": 1.351845807917519e-06, + "loss": 0.21647261083126068, + "step": 6335 + }, + { + "epoch": 1.6825122825653964, + "grad_norm": 1.2764605035604815, + "learning_rate": 1.349642082551227e-06, + "loss": 0.2348332703113556, + "step": 6336 + }, + { + "epoch": 1.6827778515469394, + "grad_norm": 1.3049495455341118, + "learning_rate": 1.34744002487192e-06, + "loss": 0.22503259778022766, + "step": 6337 + }, + { + "epoch": 1.6830434205284823, + "grad_norm": 1.3236190891705721, + "learning_rate": 1.3452396353041286e-06, + "loss": 0.2397763580083847, + "step": 6338 + }, + { + "epoch": 1.6833089895100253, + "grad_norm": 1.156426557066381, + "learning_rate": 1.3430409142720624e-06, + "loss": 0.23345956206321716, + "step": 6339 + }, + { + "epoch": 1.6835745584915682, + "grad_norm": 1.1932341696009043, + "learning_rate": 1.3408438621996088e-06, + "loss": 0.19660598039627075, + "step": 6340 + }, + { + "epoch": 1.6838401274731112, + "grad_norm": 1.262928020262074, + "learning_rate": 1.3386484795103327e-06, + "loss": 0.19148695468902588, + "step": 6341 + }, + { + "epoch": 1.6841056964546541, + "grad_norm": 1.2112774084067142, + "learning_rate": 1.3364547666274819e-06, + "loss": 0.2078169733285904, + "step": 6342 + }, + { + "epoch": 1.684371265436197, + "grad_norm": 1.3703852622718744, + "learning_rate": 1.3342627239739715e-06, + "loss": 0.23122575879096985, + "step": 6343 + }, + { + "epoch": 1.68463683441774, + "grad_norm": 1.350523705417422, + "learning_rate": 1.3320723519724032e-06, + "loss": 0.2744083106517792, + "step": 6344 + }, + { + "epoch": 1.684902403399283, + "grad_norm": 1.3462449472678248, + "learning_rate": 1.3298836510450597e-06, + "loss": 0.26361098885536194, + "step": 6345 + }, + { + "epoch": 1.685167972380826, + "grad_norm": 1.2550654654863131, + "learning_rate": 1.3276966216138932e-06, + "loss": 0.21833205223083496, + "step": 6346 + }, + { + "epoch": 1.685433541362369, + "grad_norm": 1.306325021058624, + "learning_rate": 1.3255112641005374e-06, + "loss": 0.22075100243091583, + "step": 6347 + }, + { + "epoch": 1.6856991103439118, + "grad_norm": 1.4286786068270776, + "learning_rate": 1.3233275789263034e-06, + "loss": 0.24352343380451202, + "step": 6348 + }, + { + "epoch": 1.6859646793254548, + "grad_norm": 1.5476580340833483, + "learning_rate": 1.3211455665121808e-06, + "loss": 0.2331303060054779, + "step": 6349 + }, + { + "epoch": 1.6862302483069977, + "grad_norm": 1.398559395598541, + "learning_rate": 1.3189652272788356e-06, + "loss": 0.2511689066886902, + "step": 6350 + }, + { + "epoch": 1.6864958172885407, + "grad_norm": 1.1704691076383393, + "learning_rate": 1.3167865616466113e-06, + "loss": 0.18535873293876648, + "step": 6351 + }, + { + "epoch": 1.6867613862700837, + "grad_norm": 1.3097469055952822, + "learning_rate": 1.3146095700355289e-06, + "loss": 0.23924914002418518, + "step": 6352 + }, + { + "epoch": 1.6870269552516266, + "grad_norm": 1.1591649275755667, + "learning_rate": 1.3124342528652845e-06, + "loss": 0.19710025191307068, + "step": 6353 + }, + { + "epoch": 1.6872925242331696, + "grad_norm": 1.393629731020981, + "learning_rate": 1.3102606105552585e-06, + "loss": 0.21439281105995178, + "step": 6354 + }, + { + "epoch": 1.6875580932147125, + "grad_norm": 1.3051512833867451, + "learning_rate": 1.3080886435245e-06, + "loss": 0.2647722363471985, + "step": 6355 + }, + { + "epoch": 1.6878236621962555, + "grad_norm": 2.6038516980586355, + "learning_rate": 1.3059183521917396e-06, + "loss": 0.2202019840478897, + "step": 6356 + }, + { + "epoch": 1.6880892311777984, + "grad_norm": 1.3022104210295473, + "learning_rate": 1.3037497369753871e-06, + "loss": 0.25833001732826233, + "step": 6357 + }, + { + "epoch": 1.6883548001593414, + "grad_norm": 1.1906464618269579, + "learning_rate": 1.3015827982935192e-06, + "loss": 0.19984321296215057, + "step": 6358 + }, + { + "epoch": 1.6886203691408843, + "grad_norm": 1.3347301103088016, + "learning_rate": 1.2994175365638996e-06, + "loss": 0.2190552055835724, + "step": 6359 + }, + { + "epoch": 1.6888859381224273, + "grad_norm": 1.265894337049371, + "learning_rate": 1.2972539522039652e-06, + "loss": 0.26262593269348145, + "step": 6360 + }, + { + "epoch": 1.6891515071039702, + "grad_norm": 1.285416913994909, + "learning_rate": 1.2950920456308292e-06, + "loss": 0.2665651738643646, + "step": 6361 + }, + { + "epoch": 1.6894170760855132, + "grad_norm": 1.213162722605336, + "learning_rate": 1.2929318172612803e-06, + "loss": 0.22369208931922913, + "step": 6362 + }, + { + "epoch": 1.6896826450670561, + "grad_norm": 1.2234073567984471, + "learning_rate": 1.2907732675117878e-06, + "loss": 0.21063543856143951, + "step": 6363 + }, + { + "epoch": 1.689948214048599, + "grad_norm": 1.3608426715056905, + "learning_rate": 1.2886163967984944e-06, + "loss": 0.2303045690059662, + "step": 6364 + }, + { + "epoch": 1.690213783030142, + "grad_norm": 1.1473656525455074, + "learning_rate": 1.2864612055372182e-06, + "loss": 0.20185884833335876, + "step": 6365 + }, + { + "epoch": 1.690479352011685, + "grad_norm": 1.2673026097919315, + "learning_rate": 1.284307694143455e-06, + "loss": 0.22900527715682983, + "step": 6366 + }, + { + "epoch": 1.690744920993228, + "grad_norm": 1.2373147270640896, + "learning_rate": 1.282155863032377e-06, + "loss": 0.21405862271785736, + "step": 6367 + }, + { + "epoch": 1.6910104899747709, + "grad_norm": 1.3139606008654157, + "learning_rate": 1.2800057126188304e-06, + "loss": 0.26143258810043335, + "step": 6368 + }, + { + "epoch": 1.6912760589563138, + "grad_norm": 1.319330305112879, + "learning_rate": 1.2778572433173397e-06, + "loss": 0.24437926709651947, + "step": 6369 + }, + { + "epoch": 1.6915416279378568, + "grad_norm": 1.1954155676954614, + "learning_rate": 1.275710455542104e-06, + "loss": 0.24862337112426758, + "step": 6370 + }, + { + "epoch": 1.6918071969193997, + "grad_norm": 1.2264107157331223, + "learning_rate": 1.2735653497069978e-06, + "loss": 0.2146604359149933, + "step": 6371 + }, + { + "epoch": 1.6920727659009427, + "grad_norm": 1.3217815480091177, + "learning_rate": 1.2714219262255777e-06, + "loss": 0.2525256872177124, + "step": 6372 + }, + { + "epoch": 1.6923383348824856, + "grad_norm": 1.289957068010404, + "learning_rate": 1.2692801855110638e-06, + "loss": 0.23462912440299988, + "step": 6373 + }, + { + "epoch": 1.6926039038640286, + "grad_norm": 1.3468375801476438, + "learning_rate": 1.2671401279763595e-06, + "loss": 0.21551170945167542, + "step": 6374 + }, + { + "epoch": 1.6928694728455715, + "grad_norm": 1.4457180200872415, + "learning_rate": 1.2650017540340454e-06, + "loss": 0.24094407260417938, + "step": 6375 + }, + { + "epoch": 1.6931350418271145, + "grad_norm": 1.2168123169553724, + "learning_rate": 1.2628650640963736e-06, + "loss": 0.23101133108139038, + "step": 6376 + }, + { + "epoch": 1.6934006108086574, + "grad_norm": 1.4830646801660192, + "learning_rate": 1.2607300585752724e-06, + "loss": 0.2513899803161621, + "step": 6377 + }, + { + "epoch": 1.6936661797902004, + "grad_norm": 1.417144859782869, + "learning_rate": 1.258596737882345e-06, + "loss": 0.2490600198507309, + "step": 6378 + }, + { + "epoch": 1.6939317487717434, + "grad_norm": 1.3403225341914131, + "learning_rate": 1.256465102428872e-06, + "loss": 0.25767675042152405, + "step": 6379 + }, + { + "epoch": 1.6941973177532863, + "grad_norm": 1.2775246675329248, + "learning_rate": 1.254335152625804e-06, + "loss": 0.2231348305940628, + "step": 6380 + }, + { + "epoch": 1.6944628867348293, + "grad_norm": 1.4410136520558763, + "learning_rate": 1.2522068888837758e-06, + "loss": 0.25873979926109314, + "step": 6381 + }, + { + "epoch": 1.6947284557163722, + "grad_norm": 1.4111151195923193, + "learning_rate": 1.2500803116130887e-06, + "loss": 0.2848423421382904, + "step": 6382 + }, + { + "epoch": 1.6949940246979152, + "grad_norm": 1.1110125207312456, + "learning_rate": 1.247955421223721e-06, + "loss": 0.21343804895877838, + "step": 6383 + }, + { + "epoch": 1.695259593679458, + "grad_norm": 1.3025436504976033, + "learning_rate": 1.245832218125328e-06, + "loss": 0.23080062866210938, + "step": 6384 + }, + { + "epoch": 1.695525162661001, + "grad_norm": 1.3020267493975237, + "learning_rate": 1.2437107027272376e-06, + "loss": 0.2397225797176361, + "step": 6385 + }, + { + "epoch": 1.695790731642544, + "grad_norm": 1.3120966348534624, + "learning_rate": 1.2415908754384532e-06, + "loss": 0.22798654437065125, + "step": 6386 + }, + { + "epoch": 1.696056300624087, + "grad_norm": 1.3399304326822938, + "learning_rate": 1.2394727366676518e-06, + "loss": 0.2534061074256897, + "step": 6387 + }, + { + "epoch": 1.69632186960563, + "grad_norm": 1.2269756633197797, + "learning_rate": 1.2373562868231858e-06, + "loss": 0.2127036452293396, + "step": 6388 + }, + { + "epoch": 1.6965874385871729, + "grad_norm": 1.341525895521795, + "learning_rate": 1.2352415263130813e-06, + "loss": 0.22341205179691315, + "step": 6389 + }, + { + "epoch": 1.6968530075687158, + "grad_norm": 1.316572711467383, + "learning_rate": 1.2331284555450406e-06, + "loss": 0.2435426563024521, + "step": 6390 + }, + { + "epoch": 1.6971185765502588, + "grad_norm": 1.3203864338710647, + "learning_rate": 1.2310170749264383e-06, + "loss": 0.24652531743049622, + "step": 6391 + }, + { + "epoch": 1.6973841455318017, + "grad_norm": 1.251250109623578, + "learning_rate": 1.228907384864323e-06, + "loss": 0.24172671139240265, + "step": 6392 + }, + { + "epoch": 1.6976497145133447, + "grad_norm": 1.293405881850453, + "learning_rate": 1.2267993857654182e-06, + "loss": 0.21534420549869537, + "step": 6393 + }, + { + "epoch": 1.6979152834948879, + "grad_norm": 2.1259133697182575, + "learning_rate": 1.2246930780361221e-06, + "loss": 0.2617778182029724, + "step": 6394 + }, + { + "epoch": 1.6981808524764308, + "grad_norm": 1.1793022391098469, + "learning_rate": 1.2225884620825046e-06, + "loss": 0.20388583838939667, + "step": 6395 + }, + { + "epoch": 1.6984464214579738, + "grad_norm": 1.289033320527503, + "learning_rate": 1.220485538310312e-06, + "loss": 0.23714327812194824, + "step": 6396 + }, + { + "epoch": 1.6987119904395167, + "grad_norm": 1.3592785135687544, + "learning_rate": 1.2183843071249634e-06, + "loss": 0.2495463341474533, + "step": 6397 + }, + { + "epoch": 1.6989775594210597, + "grad_norm": 1.2730498991215184, + "learning_rate": 1.2162847689315483e-06, + "loss": 0.2419012188911438, + "step": 6398 + }, + { + "epoch": 1.6992431284026026, + "grad_norm": 1.2226640861076554, + "learning_rate": 1.214186924134838e-06, + "loss": 0.23392438888549805, + "step": 6399 + }, + { + "epoch": 1.6995086973841456, + "grad_norm": 1.3210458214149883, + "learning_rate": 1.2120907731392695e-06, + "loss": 0.22855526208877563, + "step": 6400 + }, + { + "epoch": 1.6997742663656885, + "grad_norm": 1.2152782326664608, + "learning_rate": 1.2099963163489558e-06, + "loss": 0.22393949329853058, + "step": 6401 + }, + { + "epoch": 1.7000398353472315, + "grad_norm": 1.3855673404796554, + "learning_rate": 1.2079035541676832e-06, + "loss": 0.2539960741996765, + "step": 6402 + }, + { + "epoch": 1.7003054043287744, + "grad_norm": 1.3330270743987416, + "learning_rate": 1.2058124869989129e-06, + "loss": 0.23716852068901062, + "step": 6403 + }, + { + "epoch": 1.7005709733103174, + "grad_norm": 1.347782549245642, + "learning_rate": 1.2037231152457773e-06, + "loss": 0.24658545851707458, + "step": 6404 + }, + { + "epoch": 1.7008365422918603, + "grad_norm": 1.2494300647338343, + "learning_rate": 1.201635439311083e-06, + "loss": 0.2316630333662033, + "step": 6405 + }, + { + "epoch": 1.7011021112734033, + "grad_norm": 1.0834142572483991, + "learning_rate": 1.1995494595973089e-06, + "loss": 0.20434345304965973, + "step": 6406 + }, + { + "epoch": 1.7013676802549462, + "grad_norm": 1.3445140884275912, + "learning_rate": 1.197465176506607e-06, + "loss": 0.2585931420326233, + "step": 6407 + }, + { + "epoch": 1.7016332492364892, + "grad_norm": 1.2567668360829787, + "learning_rate": 1.1953825904408033e-06, + "loss": 0.23007069528102875, + "step": 6408 + }, + { + "epoch": 1.7018988182180321, + "grad_norm": 1.2770978609777501, + "learning_rate": 1.1933017018013948e-06, + "loss": 0.21822810173034668, + "step": 6409 + }, + { + "epoch": 1.702164387199575, + "grad_norm": 1.2875752799081717, + "learning_rate": 1.1912225109895526e-06, + "loss": 0.241228848695755, + "step": 6410 + }, + { + "epoch": 1.702429956181118, + "grad_norm": 1.3509759956774154, + "learning_rate": 1.1891450184061203e-06, + "loss": 0.28803908824920654, + "step": 6411 + }, + { + "epoch": 1.702695525162661, + "grad_norm": 1.3018941028318989, + "learning_rate": 1.1870692244516147e-06, + "loss": 0.2387516349554062, + "step": 6412 + }, + { + "epoch": 1.702961094144204, + "grad_norm": 1.2538051398244094, + "learning_rate": 1.1849951295262242e-06, + "loss": 0.19774140417575836, + "step": 6413 + }, + { + "epoch": 1.7032266631257469, + "grad_norm": 1.269953409174644, + "learning_rate": 1.1829227340298088e-06, + "loss": 0.22842247784137726, + "step": 6414 + }, + { + "epoch": 1.7034922321072898, + "grad_norm": 1.1987695898844528, + "learning_rate": 1.1808520383619015e-06, + "loss": 0.21994739770889282, + "step": 6415 + }, + { + "epoch": 1.7037578010888328, + "grad_norm": 1.2719096074486522, + "learning_rate": 1.1787830429217084e-06, + "loss": 0.22328051924705505, + "step": 6416 + }, + { + "epoch": 1.7040233700703757, + "grad_norm": 1.3583279531737376, + "learning_rate": 1.1767157481081092e-06, + "loss": 0.26704326272010803, + "step": 6417 + }, + { + "epoch": 1.7042889390519187, + "grad_norm": 1.2796404749500392, + "learning_rate": 1.174650154319653e-06, + "loss": 0.2148481160402298, + "step": 6418 + }, + { + "epoch": 1.7045545080334616, + "grad_norm": 1.1912742761204351, + "learning_rate": 1.1725862619545625e-06, + "loss": 0.21731218695640564, + "step": 6419 + }, + { + "epoch": 1.7048200770150046, + "grad_norm": 1.3502505047017879, + "learning_rate": 1.1705240714107301e-06, + "loss": 0.20832043886184692, + "step": 6420 + }, + { + "epoch": 1.7050856459965475, + "grad_norm": 1.2922565511595965, + "learning_rate": 1.1684635830857249e-06, + "loss": 0.21739046275615692, + "step": 6421 + }, + { + "epoch": 1.7053512149780907, + "grad_norm": 1.3041232291639149, + "learning_rate": 1.1664047973767811e-06, + "loss": 0.23972246050834656, + "step": 6422 + }, + { + "epoch": 1.7056167839596337, + "grad_norm": 1.2420174603299015, + "learning_rate": 1.1643477146808092e-06, + "loss": 0.2471289187669754, + "step": 6423 + }, + { + "epoch": 1.7058823529411766, + "grad_norm": 1.2148999014811244, + "learning_rate": 1.1622923353943916e-06, + "loss": 0.2014283537864685, + "step": 6424 + }, + { + "epoch": 1.7061479219227196, + "grad_norm": 1.1799937956162947, + "learning_rate": 1.1602386599137782e-06, + "loss": 0.21680915355682373, + "step": 6425 + }, + { + "epoch": 1.7064134909042625, + "grad_norm": 1.2221660563202492, + "learning_rate": 1.158186688634898e-06, + "loss": 0.2101205736398697, + "step": 6426 + }, + { + "epoch": 1.7066790598858055, + "grad_norm": 1.2879683442276364, + "learning_rate": 1.1561364219533444e-06, + "loss": 0.22114071249961853, + "step": 6427 + }, + { + "epoch": 1.7069446288673484, + "grad_norm": 1.2910925736026095, + "learning_rate": 1.1540878602643858e-06, + "loss": 0.20608706772327423, + "step": 6428 + }, + { + "epoch": 1.7072101978488914, + "grad_norm": 1.2486066037383718, + "learning_rate": 1.1520410039629593e-06, + "loss": 0.2247905433177948, + "step": 6429 + }, + { + "epoch": 1.7074757668304343, + "grad_norm": 1.1718742986299986, + "learning_rate": 1.1499958534436751e-06, + "loss": 0.22623226046562195, + "step": 6430 + }, + { + "epoch": 1.7077413358119773, + "grad_norm": 1.2776253558863635, + "learning_rate": 1.1479524091008142e-06, + "loss": 0.2063906192779541, + "step": 6431 + }, + { + "epoch": 1.7080069047935202, + "grad_norm": 1.4035125322254989, + "learning_rate": 1.1459106713283286e-06, + "loss": 0.2787795960903168, + "step": 6432 + }, + { + "epoch": 1.7082724737750632, + "grad_norm": 1.2096674582385407, + "learning_rate": 1.1438706405198419e-06, + "loss": 0.23090440034866333, + "step": 6433 + }, + { + "epoch": 1.7085380427566061, + "grad_norm": 1.288319877687408, + "learning_rate": 1.141832317068645e-06, + "loss": 0.23690670728683472, + "step": 6434 + }, + { + "epoch": 1.708803611738149, + "grad_norm": 1.2499926164056985, + "learning_rate": 1.1397957013677064e-06, + "loss": 0.209202378988266, + "step": 6435 + }, + { + "epoch": 1.709069180719692, + "grad_norm": 1.2311768368116, + "learning_rate": 1.1377607938096635e-06, + "loss": 0.22541575133800507, + "step": 6436 + }, + { + "epoch": 1.709334749701235, + "grad_norm": 1.3505125458173146, + "learning_rate": 1.1357275947868162e-06, + "loss": 0.2460884153842926, + "step": 6437 + }, + { + "epoch": 1.709600318682778, + "grad_norm": 1.195327574575731, + "learning_rate": 1.1336961046911443e-06, + "loss": 0.21967202425003052, + "step": 6438 + }, + { + "epoch": 1.709865887664321, + "grad_norm": 1.346022527152768, + "learning_rate": 1.1316663239142954e-06, + "loss": 0.23619329929351807, + "step": 6439 + }, + { + "epoch": 1.7101314566458639, + "grad_norm": 1.3033234842407981, + "learning_rate": 1.129638252847587e-06, + "loss": 0.24563436210155487, + "step": 6440 + }, + { + "epoch": 1.7103970256274068, + "grad_norm": 1.3840933006905622, + "learning_rate": 1.1276118918820068e-06, + "loss": 0.25508859753608704, + "step": 6441 + }, + { + "epoch": 1.7106625946089498, + "grad_norm": 1.3406379279103604, + "learning_rate": 1.1255872414082136e-06, + "loss": 0.24761545658111572, + "step": 6442 + }, + { + "epoch": 1.7109281635904927, + "grad_norm": 4.632018568484065, + "learning_rate": 1.1235643018165344e-06, + "loss": 0.2355962097644806, + "step": 6443 + }, + { + "epoch": 1.7111937325720357, + "grad_norm": 1.3274457548497118, + "learning_rate": 1.1215430734969723e-06, + "loss": 0.2534273862838745, + "step": 6444 + }, + { + "epoch": 1.7114593015535786, + "grad_norm": 1.2846712625276346, + "learning_rate": 1.1195235568391938e-06, + "loss": 0.2756424844264984, + "step": 6445 + }, + { + "epoch": 1.7117248705351216, + "grad_norm": 1.2126020570228762, + "learning_rate": 1.1175057522325383e-06, + "loss": 0.2198309451341629, + "step": 6446 + }, + { + "epoch": 1.7119904395166645, + "grad_norm": 1.2343738377988847, + "learning_rate": 1.1154896600660136e-06, + "loss": 0.21767666935920715, + "step": 6447 + }, + { + "epoch": 1.7122560084982075, + "grad_norm": 1.4965895030859304, + "learning_rate": 1.1134752807283e-06, + "loss": 0.2679128348827362, + "step": 6448 + }, + { + "epoch": 1.7125215774797504, + "grad_norm": 1.292131622576057, + "learning_rate": 1.1114626146077457e-06, + "loss": 0.2268792986869812, + "step": 6449 + }, + { + "epoch": 1.7127871464612934, + "grad_norm": 1.224637524783582, + "learning_rate": 1.109451662092369e-06, + "loss": 0.21585378050804138, + "step": 6450 + }, + { + "epoch": 1.7130527154428363, + "grad_norm": 1.3157463227820392, + "learning_rate": 1.1074424235698567e-06, + "loss": 0.2258647382259369, + "step": 6451 + }, + { + "epoch": 1.7133182844243793, + "grad_norm": 1.3742268123946286, + "learning_rate": 1.1054348994275677e-06, + "loss": 0.2456682175397873, + "step": 6452 + }, + { + "epoch": 1.7135838534059222, + "grad_norm": 1.4853732102975625, + "learning_rate": 1.1034290900525279e-06, + "loss": 0.22897745668888092, + "step": 6453 + }, + { + "epoch": 1.7138494223874652, + "grad_norm": 1.133114987282755, + "learning_rate": 1.101424995831435e-06, + "loss": 0.1910650134086609, + "step": 6454 + }, + { + "epoch": 1.7141149913690081, + "grad_norm": 1.2728981818199352, + "learning_rate": 1.0994226171506529e-06, + "loss": 0.2519158720970154, + "step": 6455 + }, + { + "epoch": 1.714380560350551, + "grad_norm": 1.259309948081026, + "learning_rate": 1.0974219543962184e-06, + "loss": 0.24191951751708984, + "step": 6456 + }, + { + "epoch": 1.714646129332094, + "grad_norm": 1.3159238719963862, + "learning_rate": 1.0954230079538352e-06, + "loss": 0.2560814619064331, + "step": 6457 + }, + { + "epoch": 1.714911698313637, + "grad_norm": 1.2640782659289207, + "learning_rate": 1.0934257782088763e-06, + "loss": 0.22969035804271698, + "step": 6458 + }, + { + "epoch": 1.71517726729518, + "grad_norm": 1.3584917562872394, + "learning_rate": 1.0914302655463837e-06, + "loss": 0.26114046573638916, + "step": 6459 + }, + { + "epoch": 1.715442836276723, + "grad_norm": 1.2235177756044688, + "learning_rate": 1.0894364703510685e-06, + "loss": 0.21457752585411072, + "step": 6460 + }, + { + "epoch": 1.7157084052582658, + "grad_norm": 1.164559577491723, + "learning_rate": 1.0874443930073098e-06, + "loss": 0.19998760521411896, + "step": 6461 + }, + { + "epoch": 1.7159739742398088, + "grad_norm": 1.2278101157674874, + "learning_rate": 1.0854540338991615e-06, + "loss": 0.2379671037197113, + "step": 6462 + }, + { + "epoch": 1.7162395432213517, + "grad_norm": 1.3827652808641404, + "learning_rate": 1.0834653934103367e-06, + "loss": 0.2236609309911728, + "step": 6463 + }, + { + "epoch": 1.7165051122028947, + "grad_norm": 1.2673726734268553, + "learning_rate": 1.0814784719242234e-06, + "loss": 0.22507379949092865, + "step": 6464 + }, + { + "epoch": 1.7167706811844377, + "grad_norm": 1.3174434539455087, + "learning_rate": 1.079493269823877e-06, + "loss": 0.22138816118240356, + "step": 6465 + }, + { + "epoch": 1.7170362501659806, + "grad_norm": 1.3880746036316538, + "learning_rate": 1.0775097874920204e-06, + "loss": 0.227338969707489, + "step": 6466 + }, + { + "epoch": 1.7173018191475236, + "grad_norm": 1.2588670866885754, + "learning_rate": 1.0755280253110466e-06, + "loss": 0.23694375157356262, + "step": 6467 + }, + { + "epoch": 1.7175673881290665, + "grad_norm": 1.365387614603678, + "learning_rate": 1.0735479836630136e-06, + "loss": 0.26219409704208374, + "step": 6468 + }, + { + "epoch": 1.7178329571106095, + "grad_norm": 1.20539748496599, + "learning_rate": 1.0715696629296524e-06, + "loss": 0.22215887904167175, + "step": 6469 + }, + { + "epoch": 1.7180985260921524, + "grad_norm": 1.3543481839639284, + "learning_rate": 1.0695930634923602e-06, + "loss": 0.25434768199920654, + "step": 6470 + }, + { + "epoch": 1.7183640950736954, + "grad_norm": 1.1809119822759757, + "learning_rate": 1.0676181857321998e-06, + "loss": 0.2092076987028122, + "step": 6471 + }, + { + "epoch": 1.7186296640552383, + "grad_norm": 1.330663320526799, + "learning_rate": 1.0656450300299048e-06, + "loss": 0.2710237503051758, + "step": 6472 + }, + { + "epoch": 1.7188952330367813, + "grad_norm": 1.2715188060789504, + "learning_rate": 1.0636735967658785e-06, + "loss": 0.2533886432647705, + "step": 6473 + }, + { + "epoch": 1.7191608020183242, + "grad_norm": 1.2174102707049457, + "learning_rate": 1.0617038863201878e-06, + "loss": 0.2545754909515381, + "step": 6474 + }, + { + "epoch": 1.7194263709998672, + "grad_norm": 1.2560655592374788, + "learning_rate": 1.0597358990725703e-06, + "loss": 0.26010993123054504, + "step": 6475 + }, + { + "epoch": 1.7196919399814101, + "grad_norm": 1.2632076366916114, + "learning_rate": 1.0577696354024314e-06, + "loss": 0.22529907524585724, + "step": 6476 + }, + { + "epoch": 1.719957508962953, + "grad_norm": 1.157260113755536, + "learning_rate": 1.0558050956888433e-06, + "loss": 0.1897469311952591, + "step": 6477 + }, + { + "epoch": 1.720223077944496, + "grad_norm": 1.31651804495616, + "learning_rate": 1.0538422803105441e-06, + "loss": 0.24663670361042023, + "step": 6478 + }, + { + "epoch": 1.720488646926039, + "grad_norm": 1.343902959790046, + "learning_rate": 1.0518811896459423e-06, + "loss": 0.2462892383337021, + "step": 6479 + }, + { + "epoch": 1.720754215907582, + "grad_norm": 1.117431347891292, + "learning_rate": 1.0499218240731157e-06, + "loss": 0.18652144074440002, + "step": 6480 + }, + { + "epoch": 1.7210197848891249, + "grad_norm": 1.2234103731079693, + "learning_rate": 1.0479641839698052e-06, + "loss": 0.24614468216896057, + "step": 6481 + }, + { + "epoch": 1.7212853538706678, + "grad_norm": 1.2632894895468527, + "learning_rate": 1.046008269713421e-06, + "loss": 0.27925312519073486, + "step": 6482 + }, + { + "epoch": 1.7215509228522108, + "grad_norm": 1.3426272887839532, + "learning_rate": 1.0440540816810395e-06, + "loss": 0.2626710832118988, + "step": 6483 + }, + { + "epoch": 1.7218164918337537, + "grad_norm": 1.2982212521269376, + "learning_rate": 1.042101620249405e-06, + "loss": 0.23039895296096802, + "step": 6484 + }, + { + "epoch": 1.7220820608152967, + "grad_norm": 1.2564768074123291, + "learning_rate": 1.0401508857949295e-06, + "loss": 0.19559775292873383, + "step": 6485 + }, + { + "epoch": 1.7223476297968396, + "grad_norm": 1.222035384596064, + "learning_rate": 1.0382018786936943e-06, + "loss": 0.24982990324497223, + "step": 6486 + }, + { + "epoch": 1.7226131987783826, + "grad_norm": 1.356827120814655, + "learning_rate": 1.0362545993214402e-06, + "loss": 0.26212313771247864, + "step": 6487 + }, + { + "epoch": 1.7228787677599255, + "grad_norm": 1.2583181328160484, + "learning_rate": 1.0343090480535788e-06, + "loss": 0.22827446460723877, + "step": 6488 + }, + { + "epoch": 1.7231443367414685, + "grad_norm": 1.3650470156220376, + "learning_rate": 1.032365225265196e-06, + "loss": 0.2710435390472412, + "step": 6489 + }, + { + "epoch": 1.7234099057230114, + "grad_norm": 1.560435811081079, + "learning_rate": 1.030423131331033e-06, + "loss": 0.25116702914237976, + "step": 6490 + }, + { + "epoch": 1.7236754747045544, + "grad_norm": 1.2598369270207033, + "learning_rate": 1.0284827666255048e-06, + "loss": 0.1980481743812561, + "step": 6491 + }, + { + "epoch": 1.7239410436860974, + "grad_norm": 1.3159445178277585, + "learning_rate": 1.0265441315226898e-06, + "loss": 0.2777971625328064, + "step": 6492 + }, + { + "epoch": 1.7242066126676403, + "grad_norm": 1.3290253215924488, + "learning_rate": 1.0246072263963336e-06, + "loss": 0.23041702806949615, + "step": 6493 + }, + { + "epoch": 1.7244721816491833, + "grad_norm": 1.2761862568921072, + "learning_rate": 1.0226720516198495e-06, + "loss": 0.21428728103637695, + "step": 6494 + }, + { + "epoch": 1.7247377506307262, + "grad_norm": 1.2965072992275601, + "learning_rate": 1.020738607566316e-06, + "loss": 0.22577518224716187, + "step": 6495 + }, + { + "epoch": 1.7250033196122692, + "grad_norm": 1.2489154030372867, + "learning_rate": 1.0188068946084783e-06, + "loss": 0.21080979704856873, + "step": 6496 + }, + { + "epoch": 1.7252688885938121, + "grad_norm": 1.1941107816051266, + "learning_rate": 1.0168769131187472e-06, + "loss": 0.21232858300209045, + "step": 6497 + }, + { + "epoch": 1.725534457575355, + "grad_norm": 1.3035016990745079, + "learning_rate": 1.0149486634692019e-06, + "loss": 0.25525614619255066, + "step": 6498 + }, + { + "epoch": 1.725800026556898, + "grad_norm": 1.2742578592858531, + "learning_rate": 1.0130221460315858e-06, + "loss": 0.26291778683662415, + "step": 6499 + }, + { + "epoch": 1.726065595538441, + "grad_norm": 1.1747703502148148, + "learning_rate": 1.011097361177308e-06, + "loss": 0.21314382553100586, + "step": 6500 + }, + { + "epoch": 1.726331164519984, + "grad_norm": 1.3027182735878766, + "learning_rate": 1.0091743092774474e-06, + "loss": 0.2106419950723648, + "step": 6501 + }, + { + "epoch": 1.7265967335015269, + "grad_norm": 1.2753206037657139, + "learning_rate": 1.0072529907027407e-06, + "loss": 0.22456032037734985, + "step": 6502 + }, + { + "epoch": 1.7268623024830698, + "grad_norm": 2.1059170179774807, + "learning_rate": 1.0053334058235975e-06, + "loss": 0.2301097959280014, + "step": 6503 + }, + { + "epoch": 1.7271278714646128, + "grad_norm": 1.4062353485935484, + "learning_rate": 1.0034155550100922e-06, + "loss": 0.21207617223262787, + "step": 6504 + }, + { + "epoch": 1.7273934404461557, + "grad_norm": 1.3379977808716934, + "learning_rate": 1.0014994386319621e-06, + "loss": 0.24378664791584015, + "step": 6505 + }, + { + "epoch": 1.727659009427699, + "grad_norm": 1.402146752515372, + "learning_rate": 9.995850570586107e-07, + "loss": 0.24914023280143738, + "step": 6506 + }, + { + "epoch": 1.7279245784092419, + "grad_norm": 1.2949159811476645, + "learning_rate": 9.976724106591128e-07, + "loss": 0.23235921561717987, + "step": 6507 + }, + { + "epoch": 1.7281901473907848, + "grad_norm": 1.295455173430887, + "learning_rate": 9.957614998022015e-07, + "loss": 0.22441455721855164, + "step": 6508 + }, + { + "epoch": 1.7284557163723278, + "grad_norm": 1.4195770964317103, + "learning_rate": 9.93852324856278e-07, + "loss": 0.2559920847415924, + "step": 6509 + }, + { + "epoch": 1.7287212853538707, + "grad_norm": 1.2106097617539484, + "learning_rate": 9.919448861894088e-07, + "loss": 0.21378321945667267, + "step": 6510 + }, + { + "epoch": 1.7289868543354137, + "grad_norm": 1.223247289196822, + "learning_rate": 9.900391841693247e-07, + "loss": 0.23622627556324005, + "step": 6511 + }, + { + "epoch": 1.7292524233169566, + "grad_norm": 1.2354266119490807, + "learning_rate": 9.88135219163424e-07, + "loss": 0.217013418674469, + "step": 6512 + }, + { + "epoch": 1.7295179922984996, + "grad_norm": 1.342902376475473, + "learning_rate": 9.862329915387669e-07, + "loss": 0.2221517264842987, + "step": 6513 + }, + { + "epoch": 1.7297835612800425, + "grad_norm": 1.3136496001371853, + "learning_rate": 9.84332501662083e-07, + "loss": 0.24377144873142242, + "step": 6514 + }, + { + "epoch": 1.7300491302615855, + "grad_norm": 1.2574348774674273, + "learning_rate": 9.824337498997593e-07, + "loss": 0.23368799686431885, + "step": 6515 + }, + { + "epoch": 1.7303146992431284, + "grad_norm": 1.1949944292188206, + "learning_rate": 9.805367366178608e-07, + "loss": 0.23061680793762207, + "step": 6516 + }, + { + "epoch": 1.7305802682246714, + "grad_norm": 1.2715048223769598, + "learning_rate": 9.78641462182104e-07, + "loss": 0.24157950282096863, + "step": 6517 + }, + { + "epoch": 1.7308458372062143, + "grad_norm": 1.3248165077712177, + "learning_rate": 9.76747926957875e-07, + "loss": 0.2122395783662796, + "step": 6518 + }, + { + "epoch": 1.7311114061877573, + "grad_norm": 1.320024810941134, + "learning_rate": 9.748561313102266e-07, + "loss": 0.2351134717464447, + "step": 6519 + }, + { + "epoch": 1.7313769751693002, + "grad_norm": 1.2421546716744003, + "learning_rate": 9.729660756038738e-07, + "loss": 0.22462692856788635, + "step": 6520 + }, + { + "epoch": 1.7316425441508432, + "grad_norm": 1.191887437920794, + "learning_rate": 9.710777602031985e-07, + "loss": 0.2140806019306183, + "step": 6521 + }, + { + "epoch": 1.7319081131323861, + "grad_norm": 1.1138928252794336, + "learning_rate": 9.691911854722447e-07, + "loss": 0.22256694734096527, + "step": 6522 + }, + { + "epoch": 1.732173682113929, + "grad_norm": 1.3703383963226383, + "learning_rate": 9.673063517747216e-07, + "loss": 0.26044604182243347, + "step": 6523 + }, + { + "epoch": 1.732439251095472, + "grad_norm": 1.2598416492801234, + "learning_rate": 9.65423259474001e-07, + "loss": 0.22553196549415588, + "step": 6524 + }, + { + "epoch": 1.732704820077015, + "grad_norm": 1.351471142700479, + "learning_rate": 9.635419089331255e-07, + "loss": 0.2240113914012909, + "step": 6525 + }, + { + "epoch": 1.732970389058558, + "grad_norm": 1.1814437793767476, + "learning_rate": 9.616623005147952e-07, + "loss": 0.2239987701177597, + "step": 6526 + }, + { + "epoch": 1.7332359580401009, + "grad_norm": 1.3385972692968178, + "learning_rate": 9.597844345813746e-07, + "loss": 0.2779507040977478, + "step": 6527 + }, + { + "epoch": 1.7335015270216438, + "grad_norm": 1.24243402144453, + "learning_rate": 9.57908311494896e-07, + "loss": 0.20211297273635864, + "step": 6528 + }, + { + "epoch": 1.7337670960031868, + "grad_norm": 1.3764658259437736, + "learning_rate": 9.560339316170542e-07, + "loss": 0.2552817165851593, + "step": 6529 + }, + { + "epoch": 1.7340326649847297, + "grad_norm": 1.2797541334315956, + "learning_rate": 9.54161295309206e-07, + "loss": 0.248790442943573, + "step": 6530 + }, + { + "epoch": 1.7342982339662727, + "grad_norm": 1.2952054804389268, + "learning_rate": 9.522904029323754e-07, + "loss": 0.22865381836891174, + "step": 6531 + }, + { + "epoch": 1.7345638029478156, + "grad_norm": 1.2248102039230788, + "learning_rate": 9.504212548472458e-07, + "loss": 0.212583988904953, + "step": 6532 + }, + { + "epoch": 1.7348293719293586, + "grad_norm": 1.3834113478738954, + "learning_rate": 9.48553851414169e-07, + "loss": 0.24632221460342407, + "step": 6533 + }, + { + "epoch": 1.7350949409109018, + "grad_norm": 1.2843254083507383, + "learning_rate": 9.466881929931582e-07, + "loss": 0.2264299988746643, + "step": 6534 + }, + { + "epoch": 1.7353605098924447, + "grad_norm": 1.1969400150248917, + "learning_rate": 9.4482427994389e-07, + "loss": 0.21560585498809814, + "step": 6535 + }, + { + "epoch": 1.7356260788739877, + "grad_norm": 1.2133784097522973, + "learning_rate": 9.429621126257038e-07, + "loss": 0.24358224868774414, + "step": 6536 + }, + { + "epoch": 1.7358916478555306, + "grad_norm": 1.2714225965713206, + "learning_rate": 9.411016913976045e-07, + "loss": 0.23307816684246063, + "step": 6537 + }, + { + "epoch": 1.7361572168370736, + "grad_norm": 1.3040669928143356, + "learning_rate": 9.392430166182597e-07, + "loss": 0.28001490235328674, + "step": 6538 + }, + { + "epoch": 1.7364227858186165, + "grad_norm": 1.271471324412232, + "learning_rate": 9.373860886459996e-07, + "loss": 0.22544093430042267, + "step": 6539 + }, + { + "epoch": 1.7366883548001595, + "grad_norm": 1.196472605989987, + "learning_rate": 9.355309078388186e-07, + "loss": 0.2066478282213211, + "step": 6540 + }, + { + "epoch": 1.7369539237817024, + "grad_norm": 1.3162468805281542, + "learning_rate": 9.336774745543697e-07, + "loss": 0.21185964345932007, + "step": 6541 + }, + { + "epoch": 1.7372194927632454, + "grad_norm": 1.2806137892507987, + "learning_rate": 9.318257891499793e-07, + "loss": 0.2337890863418579, + "step": 6542 + }, + { + "epoch": 1.7374850617447883, + "grad_norm": 1.3468215205180822, + "learning_rate": 9.299758519826274e-07, + "loss": 0.2430594563484192, + "step": 6543 + }, + { + "epoch": 1.7377506307263313, + "grad_norm": 1.4072339591675835, + "learning_rate": 9.281276634089609e-07, + "loss": 0.24799269437789917, + "step": 6544 + }, + { + "epoch": 1.7380161997078742, + "grad_norm": 1.3533264573117185, + "learning_rate": 9.26281223785287e-07, + "loss": 0.24756166338920593, + "step": 6545 + }, + { + "epoch": 1.7382817686894172, + "grad_norm": 1.281195516970091, + "learning_rate": 9.244365334675787e-07, + "loss": 0.23465190827846527, + "step": 6546 + }, + { + "epoch": 1.7385473376709601, + "grad_norm": 1.22953964144765, + "learning_rate": 9.225935928114716e-07, + "loss": 0.2039640098810196, + "step": 6547 + }, + { + "epoch": 1.738812906652503, + "grad_norm": 1.3426382286400422, + "learning_rate": 9.207524021722602e-07, + "loss": 0.22304412722587585, + "step": 6548 + }, + { + "epoch": 1.739078475634046, + "grad_norm": 1.2253196898929546, + "learning_rate": 9.189129619049064e-07, + "loss": 0.19985908269882202, + "step": 6549 + }, + { + "epoch": 1.739344044615589, + "grad_norm": 1.3354963919439176, + "learning_rate": 9.17075272364032e-07, + "loss": 0.2335432469844818, + "step": 6550 + }, + { + "epoch": 1.739609613597132, + "grad_norm": 1.6822196536181961, + "learning_rate": 9.152393339039223e-07, + "loss": 0.2313593327999115, + "step": 6551 + }, + { + "epoch": 1.739875182578675, + "grad_norm": 1.310977344619443, + "learning_rate": 9.134051468785243e-07, + "loss": 0.2320600152015686, + "step": 6552 + }, + { + "epoch": 1.7401407515602179, + "grad_norm": 1.0942022372096942, + "learning_rate": 9.115727116414475e-07, + "loss": 0.1870848387479782, + "step": 6553 + }, + { + "epoch": 1.7404063205417608, + "grad_norm": 1.340037469005655, + "learning_rate": 9.097420285459635e-07, + "loss": 0.22922812402248383, + "step": 6554 + }, + { + "epoch": 1.7406718895233038, + "grad_norm": 1.3705243227438364, + "learning_rate": 9.079130979450068e-07, + "loss": 0.2505050301551819, + "step": 6555 + }, + { + "epoch": 1.7409374585048467, + "grad_norm": 1.3187608464438627, + "learning_rate": 9.060859201911732e-07, + "loss": 0.20445439219474792, + "step": 6556 + }, + { + "epoch": 1.7412030274863897, + "grad_norm": 1.1489822386745985, + "learning_rate": 9.042604956367218e-07, + "loss": 0.22338441014289856, + "step": 6557 + }, + { + "epoch": 1.7414685964679326, + "grad_norm": 1.2900464387857213, + "learning_rate": 9.024368246335735e-07, + "loss": 0.24923941493034363, + "step": 6558 + }, + { + "epoch": 1.7417341654494756, + "grad_norm": 1.3383952744906746, + "learning_rate": 9.006149075333071e-07, + "loss": 0.22842931747436523, + "step": 6559 + }, + { + "epoch": 1.7419997344310185, + "grad_norm": 1.391145524863548, + "learning_rate": 8.987947446871703e-07, + "loss": 0.22451579570770264, + "step": 6560 + }, + { + "epoch": 1.7422653034125615, + "grad_norm": 1.3218089225892669, + "learning_rate": 8.969763364460682e-07, + "loss": 0.2521047592163086, + "step": 6561 + }, + { + "epoch": 1.7425308723941044, + "grad_norm": 1.1675892500249985, + "learning_rate": 8.951596831605691e-07, + "loss": 0.25001099705696106, + "step": 6562 + }, + { + "epoch": 1.7427964413756474, + "grad_norm": 1.175521207104519, + "learning_rate": 8.933447851809007e-07, + "loss": 0.19592508673667908, + "step": 6563 + }, + { + "epoch": 1.7430620103571903, + "grad_norm": 1.399887131584603, + "learning_rate": 8.915316428569554e-07, + "loss": 0.2785179018974304, + "step": 6564 + }, + { + "epoch": 1.7433275793387333, + "grad_norm": 1.1688351316361159, + "learning_rate": 8.897202565382845e-07, + "loss": 0.20700594782829285, + "step": 6565 + }, + { + "epoch": 1.7435931483202762, + "grad_norm": 1.2225569857896341, + "learning_rate": 8.879106265741044e-07, + "loss": 0.253167062997818, + "step": 6566 + }, + { + "epoch": 1.7438587173018192, + "grad_norm": 1.4278912909015264, + "learning_rate": 8.861027533132859e-07, + "loss": 0.27672937512397766, + "step": 6567 + }, + { + "epoch": 1.7441242862833621, + "grad_norm": 1.3136368448280313, + "learning_rate": 8.842966371043671e-07, + "loss": 0.23050950467586517, + "step": 6568 + }, + { + "epoch": 1.744389855264905, + "grad_norm": 1.2790658189865058, + "learning_rate": 8.824922782955481e-07, + "loss": 0.23529425263404846, + "step": 6569 + }, + { + "epoch": 1.744655424246448, + "grad_norm": 1.2887213562899031, + "learning_rate": 8.806896772346873e-07, + "loss": 0.21803250908851624, + "step": 6570 + }, + { + "epoch": 1.744920993227991, + "grad_norm": 1.3669961004756481, + "learning_rate": 8.788888342693047e-07, + "loss": 0.24237293004989624, + "step": 6571 + }, + { + "epoch": 1.745186562209534, + "grad_norm": 1.1957319745445254, + "learning_rate": 8.770897497465803e-07, + "loss": 0.2008107602596283, + "step": 6572 + }, + { + "epoch": 1.745452131191077, + "grad_norm": 1.2693790937709173, + "learning_rate": 8.752924240133587e-07, + "loss": 0.23106279969215393, + "step": 6573 + }, + { + "epoch": 1.7457177001726198, + "grad_norm": 1.377716829660982, + "learning_rate": 8.734968574161406e-07, + "loss": 0.23726215958595276, + "step": 6574 + }, + { + "epoch": 1.7459832691541628, + "grad_norm": 1.211024095215965, + "learning_rate": 8.717030503010915e-07, + "loss": 0.26349812746047974, + "step": 6575 + }, + { + "epoch": 1.7462488381357057, + "grad_norm": 1.2871963140003055, + "learning_rate": 8.699110030140367e-07, + "loss": 0.23226451873779297, + "step": 6576 + }, + { + "epoch": 1.7465144071172487, + "grad_norm": 1.3173524718115384, + "learning_rate": 8.68120715900459e-07, + "loss": 0.22188402712345123, + "step": 6577 + }, + { + "epoch": 1.7467799760987917, + "grad_norm": 1.2367242455559135, + "learning_rate": 8.663321893055087e-07, + "loss": 0.21238234639167786, + "step": 6578 + }, + { + "epoch": 1.7470455450803346, + "grad_norm": 1.3423960800972676, + "learning_rate": 8.645454235739903e-07, + "loss": 0.2700675427913666, + "step": 6579 + }, + { + "epoch": 1.7473111140618776, + "grad_norm": 1.2737029023524005, + "learning_rate": 8.627604190503714e-07, + "loss": 0.24463894963264465, + "step": 6580 + }, + { + "epoch": 1.7475766830434205, + "grad_norm": 1.2537801110870739, + "learning_rate": 8.609771760787822e-07, + "loss": 0.23429079353809357, + "step": 6581 + }, + { + "epoch": 1.7478422520249635, + "grad_norm": 1.342775712878445, + "learning_rate": 8.591956950030067e-07, + "loss": 0.21767663955688477, + "step": 6582 + }, + { + "epoch": 1.7481078210065064, + "grad_norm": 1.3390334282971272, + "learning_rate": 8.574159761664957e-07, + "loss": 0.2499813735485077, + "step": 6583 + }, + { + "epoch": 1.7483733899880494, + "grad_norm": 1.471955255689367, + "learning_rate": 8.556380199123582e-07, + "loss": 0.28065958619117737, + "step": 6584 + }, + { + "epoch": 1.7486389589695923, + "grad_norm": 1.3012440070718, + "learning_rate": 8.538618265833621e-07, + "loss": 0.2166985273361206, + "step": 6585 + }, + { + "epoch": 1.7489045279511353, + "grad_norm": 1.2228700023368582, + "learning_rate": 8.520873965219356e-07, + "loss": 0.22835782170295715, + "step": 6586 + }, + { + "epoch": 1.7491700969326782, + "grad_norm": 1.2209097376008975, + "learning_rate": 8.503147300701709e-07, + "loss": 0.23575961589813232, + "step": 6587 + }, + { + "epoch": 1.7494356659142212, + "grad_norm": 1.1275514661567778, + "learning_rate": 8.485438275698154e-07, + "loss": 0.183369442820549, + "step": 6588 + }, + { + "epoch": 1.7497012348957641, + "grad_norm": 1.519810508178025, + "learning_rate": 8.467746893622786e-07, + "loss": 0.2731352746486664, + "step": 6589 + }, + { + "epoch": 1.749966803877307, + "grad_norm": 1.2913957246056922, + "learning_rate": 8.450073157886296e-07, + "loss": 0.20177578926086426, + "step": 6590 + }, + { + "epoch": 1.75023237285885, + "grad_norm": 1.2742798574628598, + "learning_rate": 8.432417071895982e-07, + "loss": 0.21672385931015015, + "step": 6591 + }, + { + "epoch": 1.750497941840393, + "grad_norm": 1.370933216008306, + "learning_rate": 8.414778639055699e-07, + "loss": 0.2503831386566162, + "step": 6592 + }, + { + "epoch": 1.750763510821936, + "grad_norm": 1.2884133202144494, + "learning_rate": 8.397157862765959e-07, + "loss": 0.2427521049976349, + "step": 6593 + }, + { + "epoch": 1.7510290798034789, + "grad_norm": 1.3424141731181953, + "learning_rate": 8.379554746423824e-07, + "loss": 0.23128533363342285, + "step": 6594 + }, + { + "epoch": 1.7512946487850218, + "grad_norm": 1.2353999110478557, + "learning_rate": 8.361969293422967e-07, + "loss": 0.2470957189798355, + "step": 6595 + }, + { + "epoch": 1.7515602177665648, + "grad_norm": 1.3335789710762707, + "learning_rate": 8.344401507153665e-07, + "loss": 0.29447510838508606, + "step": 6596 + }, + { + "epoch": 1.7518257867481077, + "grad_norm": 1.197223419032368, + "learning_rate": 8.326851391002777e-07, + "loss": 0.21585828065872192, + "step": 6597 + }, + { + "epoch": 1.7520913557296507, + "grad_norm": 1.2653558688292899, + "learning_rate": 8.30931894835375e-07, + "loss": 0.24081121385097504, + "step": 6598 + }, + { + "epoch": 1.7523569247111936, + "grad_norm": 1.3408805119391818, + "learning_rate": 8.291804182586638e-07, + "loss": 0.23052063584327698, + "step": 6599 + }, + { + "epoch": 1.7526224936927366, + "grad_norm": 1.2126901970374089, + "learning_rate": 8.274307097078093e-07, + "loss": 0.19008183479309082, + "step": 6600 + }, + { + "epoch": 1.7528880626742795, + "grad_norm": 1.3285441470167585, + "learning_rate": 8.25682769520132e-07, + "loss": 0.2632960379123688, + "step": 6601 + }, + { + "epoch": 1.7531536316558225, + "grad_norm": 1.4350439941988302, + "learning_rate": 8.239365980326175e-07, + "loss": 0.25958624482154846, + "step": 6602 + }, + { + "epoch": 1.7534192006373654, + "grad_norm": 1.304275360361708, + "learning_rate": 8.221921955819035e-07, + "loss": 0.22370605170726776, + "step": 6603 + }, + { + "epoch": 1.7536847696189084, + "grad_norm": 1.2385957043075924, + "learning_rate": 8.204495625042919e-07, + "loss": 0.22018703818321228, + "step": 6604 + }, + { + "epoch": 1.7539503386004514, + "grad_norm": 1.3626754196729718, + "learning_rate": 8.187086991357418e-07, + "loss": 0.26802191138267517, + "step": 6605 + }, + { + "epoch": 1.7542159075819943, + "grad_norm": 1.5313825040978437, + "learning_rate": 8.169696058118725e-07, + "loss": 0.21560518443584442, + "step": 6606 + }, + { + "epoch": 1.7544814765635373, + "grad_norm": 1.270508998157205, + "learning_rate": 8.152322828679593e-07, + "loss": 0.23222430050373077, + "step": 6607 + }, + { + "epoch": 1.7547470455450802, + "grad_norm": 1.1542994886817455, + "learning_rate": 8.134967306389374e-07, + "loss": 0.17638427019119263, + "step": 6608 + }, + { + "epoch": 1.7550126145266232, + "grad_norm": 1.3257823658984844, + "learning_rate": 8.117629494594015e-07, + "loss": 0.21539513766765594, + "step": 6609 + }, + { + "epoch": 1.7552781835081661, + "grad_norm": 1.3431199934216977, + "learning_rate": 8.100309396636031e-07, + "loss": 0.2265736162662506, + "step": 6610 + }, + { + "epoch": 1.755543752489709, + "grad_norm": 1.3478032961337874, + "learning_rate": 8.083007015854549e-07, + "loss": 0.2688787281513214, + "step": 6611 + }, + { + "epoch": 1.755809321471252, + "grad_norm": 1.3027271078273857, + "learning_rate": 8.065722355585249e-07, + "loss": 0.19756367802619934, + "step": 6612 + }, + { + "epoch": 1.756074890452795, + "grad_norm": 1.3749986253881121, + "learning_rate": 8.048455419160405e-07, + "loss": 0.19934290647506714, + "step": 6613 + }, + { + "epoch": 1.756340459434338, + "grad_norm": 1.5756000064179743, + "learning_rate": 8.031206209908904e-07, + "loss": 0.2523588538169861, + "step": 6614 + }, + { + "epoch": 1.7566060284158809, + "grad_norm": 1.2988900493114706, + "learning_rate": 8.01397473115616e-07, + "loss": 0.22825747728347778, + "step": 6615 + }, + { + "epoch": 1.7568715973974238, + "grad_norm": 1.3238944187902402, + "learning_rate": 7.996760986224228e-07, + "loss": 0.24525251984596252, + "step": 6616 + }, + { + "epoch": 1.7571371663789668, + "grad_norm": 1.366323962207031, + "learning_rate": 7.979564978431687e-07, + "loss": 0.21883559226989746, + "step": 6617 + }, + { + "epoch": 1.7574027353605097, + "grad_norm": 1.5827948860142422, + "learning_rate": 7.96238671109374e-07, + "loss": 0.2642098069190979, + "step": 6618 + }, + { + "epoch": 1.757668304342053, + "grad_norm": 1.3345016667633411, + "learning_rate": 7.945226187522159e-07, + "loss": 0.24094998836517334, + "step": 6619 + }, + { + "epoch": 1.7579338733235959, + "grad_norm": 1.2243450261876818, + "learning_rate": 7.928083411025278e-07, + "loss": 0.2225762903690338, + "step": 6620 + }, + { + "epoch": 1.7581994423051388, + "grad_norm": 1.2991544127435968, + "learning_rate": 7.910958384908041e-07, + "loss": 0.26722851395606995, + "step": 6621 + }, + { + "epoch": 1.7584650112866818, + "grad_norm": 1.3206157533666447, + "learning_rate": 7.893851112471907e-07, + "loss": 0.2176910787820816, + "step": 6622 + }, + { + "epoch": 1.7587305802682247, + "grad_norm": 1.3618122023344794, + "learning_rate": 7.876761597015003e-07, + "loss": 0.20261354744434357, + "step": 6623 + }, + { + "epoch": 1.7589961492497677, + "grad_norm": 1.1728416456458601, + "learning_rate": 7.859689841831975e-07, + "loss": 0.23314467072486877, + "step": 6624 + }, + { + "epoch": 1.7592617182313106, + "grad_norm": 1.3115277523344588, + "learning_rate": 7.842635850214054e-07, + "loss": 0.19854989647865295, + "step": 6625 + }, + { + "epoch": 1.7595272872128536, + "grad_norm": 1.2614486006783794, + "learning_rate": 7.825599625449043e-07, + "loss": 0.2422565519809723, + "step": 6626 + }, + { + "epoch": 1.7597928561943965, + "grad_norm": 1.342773057026848, + "learning_rate": 7.808581170821328e-07, + "loss": 0.27029529213905334, + "step": 6627 + }, + { + "epoch": 1.7600584251759395, + "grad_norm": 1.1918292148332001, + "learning_rate": 7.791580489611872e-07, + "loss": 0.23596832156181335, + "step": 6628 + }, + { + "epoch": 1.7603239941574824, + "grad_norm": 1.2062344481848934, + "learning_rate": 7.774597585098198e-07, + "loss": 0.218271404504776, + "step": 6629 + }, + { + "epoch": 1.7605895631390254, + "grad_norm": 1.3762692469809215, + "learning_rate": 7.75763246055441e-07, + "loss": 0.2551255226135254, + "step": 6630 + }, + { + "epoch": 1.7608551321205683, + "grad_norm": 1.3049962391533094, + "learning_rate": 7.740685119251179e-07, + "loss": 0.24410653114318848, + "step": 6631 + }, + { + "epoch": 1.7611207011021113, + "grad_norm": 1.2577276419448338, + "learning_rate": 7.723755564455771e-07, + "loss": 0.23044872283935547, + "step": 6632 + }, + { + "epoch": 1.7613862700836542, + "grad_norm": 1.334208934461724, + "learning_rate": 7.706843799431985e-07, + "loss": 0.24569427967071533, + "step": 6633 + }, + { + "epoch": 1.7616518390651972, + "grad_norm": 1.1605227177029394, + "learning_rate": 7.689949827440224e-07, + "loss": 0.200277179479599, + "step": 6634 + }, + { + "epoch": 1.7619174080467401, + "grad_norm": 1.1742759165978003, + "learning_rate": 7.673073651737428e-07, + "loss": 0.19217821955680847, + "step": 6635 + }, + { + "epoch": 1.762182977028283, + "grad_norm": 1.281151649074766, + "learning_rate": 7.656215275577151e-07, + "loss": 0.227005273103714, + "step": 6636 + }, + { + "epoch": 1.762448546009826, + "grad_norm": 1.2211778988331632, + "learning_rate": 7.639374702209468e-07, + "loss": 0.21359863877296448, + "step": 6637 + }, + { + "epoch": 1.762714114991369, + "grad_norm": 1.267969218396632, + "learning_rate": 7.62255193488105e-07, + "loss": 0.24056711792945862, + "step": 6638 + }, + { + "epoch": 1.762979683972912, + "grad_norm": 1.28035138481303, + "learning_rate": 7.605746976835127e-07, + "loss": 0.20897413790225983, + "step": 6639 + }, + { + "epoch": 1.763245252954455, + "grad_norm": 1.2567764889990254, + "learning_rate": 7.588959831311493e-07, + "loss": 0.20395967364311218, + "step": 6640 + }, + { + "epoch": 1.7635108219359978, + "grad_norm": 1.4827108993688454, + "learning_rate": 7.572190501546517e-07, + "loss": 0.2334095984697342, + "step": 6641 + }, + { + "epoch": 1.7637763909175408, + "grad_norm": 1.3358734576215814, + "learning_rate": 7.555438990773134e-07, + "loss": 0.23892858624458313, + "step": 6642 + }, + { + "epoch": 1.7640419598990837, + "grad_norm": 1.3063666339869877, + "learning_rate": 7.538705302220839e-07, + "loss": 0.23515449464321136, + "step": 6643 + }, + { + "epoch": 1.7643075288806267, + "grad_norm": 1.1919354046726482, + "learning_rate": 7.521989439115674e-07, + "loss": 0.19728611409664154, + "step": 6644 + }, + { + "epoch": 1.7645730978621696, + "grad_norm": 1.2609989060636697, + "learning_rate": 7.505291404680281e-07, + "loss": 0.22277355194091797, + "step": 6645 + }, + { + "epoch": 1.7648386668437126, + "grad_norm": 1.2129119488866849, + "learning_rate": 7.488611202133822e-07, + "loss": 0.24117602407932281, + "step": 6646 + }, + { + "epoch": 1.7651042358252558, + "grad_norm": 1.3643314179100876, + "learning_rate": 7.471948834692045e-07, + "loss": 0.24675750732421875, + "step": 6647 + }, + { + "epoch": 1.7653698048067987, + "grad_norm": 1.3261352525807495, + "learning_rate": 7.455304305567279e-07, + "loss": 0.2413899004459381, + "step": 6648 + }, + { + "epoch": 1.7656353737883417, + "grad_norm": 1.3357210816225529, + "learning_rate": 7.438677617968348e-07, + "loss": 0.22125428915023804, + "step": 6649 + }, + { + "epoch": 1.7659009427698846, + "grad_norm": 1.2099689083776513, + "learning_rate": 7.422068775100732e-07, + "loss": 0.205051988363266, + "step": 6650 + }, + { + "epoch": 1.7661665117514276, + "grad_norm": 1.2734255069971199, + "learning_rate": 7.405477780166415e-07, + "loss": 0.23711715638637543, + "step": 6651 + }, + { + "epoch": 1.7664320807329705, + "grad_norm": 1.4063590395204508, + "learning_rate": 7.388904636363914e-07, + "loss": 0.2591046988964081, + "step": 6652 + }, + { + "epoch": 1.7666976497145135, + "grad_norm": 1.4323150626725398, + "learning_rate": 7.372349346888363e-07, + "loss": 0.24837243556976318, + "step": 6653 + }, + { + "epoch": 1.7669632186960564, + "grad_norm": 1.1492996795155954, + "learning_rate": 7.35581191493141e-07, + "loss": 0.20910412073135376, + "step": 6654 + }, + { + "epoch": 1.7672287876775994, + "grad_norm": 1.113119722429438, + "learning_rate": 7.339292343681282e-07, + "loss": 0.2056204229593277, + "step": 6655 + }, + { + "epoch": 1.7674943566591423, + "grad_norm": 1.2927092177897141, + "learning_rate": 7.322790636322764e-07, + "loss": 0.2496742308139801, + "step": 6656 + }, + { + "epoch": 1.7677599256406853, + "grad_norm": 1.3571185149739835, + "learning_rate": 7.306306796037188e-07, + "loss": 0.24432921409606934, + "step": 6657 + }, + { + "epoch": 1.7680254946222282, + "grad_norm": 1.3006085174415165, + "learning_rate": 7.289840826002414e-07, + "loss": 0.2492775321006775, + "step": 6658 + }, + { + "epoch": 1.7682910636037712, + "grad_norm": 1.3256617876861967, + "learning_rate": 7.273392729392936e-07, + "loss": 0.22673827409744263, + "step": 6659 + }, + { + "epoch": 1.7685566325853141, + "grad_norm": 1.3730978211523115, + "learning_rate": 7.25696250937975e-07, + "loss": 0.2225622981786728, + "step": 6660 + }, + { + "epoch": 1.768822201566857, + "grad_norm": 1.2296766172450786, + "learning_rate": 7.240550169130378e-07, + "loss": 0.24896883964538574, + "step": 6661 + }, + { + "epoch": 1.7690877705484, + "grad_norm": 1.2103035123370711, + "learning_rate": 7.224155711808923e-07, + "loss": 0.2395302951335907, + "step": 6662 + }, + { + "epoch": 1.769353339529943, + "grad_norm": 1.2658162555194572, + "learning_rate": 7.207779140576066e-07, + "loss": 0.2255886197090149, + "step": 6663 + }, + { + "epoch": 1.769618908511486, + "grad_norm": 1.2518907529925698, + "learning_rate": 7.191420458589005e-07, + "loss": 0.24029678106307983, + "step": 6664 + }, + { + "epoch": 1.769884477493029, + "grad_norm": 1.1016484922093457, + "learning_rate": 7.175079669001506e-07, + "loss": 0.19399142265319824, + "step": 6665 + }, + { + "epoch": 1.7701500464745719, + "grad_norm": 1.2291425924678119, + "learning_rate": 7.158756774963882e-07, + "loss": 0.24569162726402283, + "step": 6666 + }, + { + "epoch": 1.7704156154561148, + "grad_norm": 1.2180012837263907, + "learning_rate": 7.142451779622971e-07, + "loss": 0.2484329342842102, + "step": 6667 + }, + { + "epoch": 1.7706811844376578, + "grad_norm": 1.2505833357389051, + "learning_rate": 7.126164686122216e-07, + "loss": 0.24423512816429138, + "step": 6668 + }, + { + "epoch": 1.7709467534192007, + "grad_norm": 1.1277554918017485, + "learning_rate": 7.109895497601571e-07, + "loss": 0.20146678388118744, + "step": 6669 + }, + { + "epoch": 1.7712123224007437, + "grad_norm": 1.2945002187740315, + "learning_rate": 7.093644217197526e-07, + "loss": 0.23329001665115356, + "step": 6670 + }, + { + "epoch": 1.7714778913822866, + "grad_norm": 1.1689758736288713, + "learning_rate": 7.077410848043165e-07, + "loss": 0.2290019690990448, + "step": 6671 + }, + { + "epoch": 1.7717434603638296, + "grad_norm": 1.2744441159542537, + "learning_rate": 7.061195393268061e-07, + "loss": 0.2329377382993698, + "step": 6672 + }, + { + "epoch": 1.7720090293453725, + "grad_norm": 1.1430677052322078, + "learning_rate": 7.04499785599837e-07, + "loss": 0.21513575315475464, + "step": 6673 + }, + { + "epoch": 1.7722745983269155, + "grad_norm": 1.1659646021132744, + "learning_rate": 7.028818239356794e-07, + "loss": 0.19022463262081146, + "step": 6674 + }, + { + "epoch": 1.7725401673084584, + "grad_norm": 1.2837523861206293, + "learning_rate": 7.012656546462571e-07, + "loss": 0.2097887396812439, + "step": 6675 + }, + { + "epoch": 1.7728057362900014, + "grad_norm": 1.3991640357566577, + "learning_rate": 6.996512780431486e-07, + "loss": 0.2559792101383209, + "step": 6676 + }, + { + "epoch": 1.7730713052715443, + "grad_norm": 1.3219531410357084, + "learning_rate": 6.980386944375849e-07, + "loss": 0.24624274671077728, + "step": 6677 + }, + { + "epoch": 1.7733368742530873, + "grad_norm": 1.2405076465604956, + "learning_rate": 6.964279041404553e-07, + "loss": 0.22904372215270996, + "step": 6678 + }, + { + "epoch": 1.7736024432346302, + "grad_norm": 1.216707646052236, + "learning_rate": 6.948189074623002e-07, + "loss": 0.20808623731136322, + "step": 6679 + }, + { + "epoch": 1.7738680122161732, + "grad_norm": 1.229477200185015, + "learning_rate": 6.932117047133158e-07, + "loss": 0.1931435763835907, + "step": 6680 + }, + { + "epoch": 1.7741335811977161, + "grad_norm": 1.2962984681963328, + "learning_rate": 6.91606296203351e-07, + "loss": 0.22938531637191772, + "step": 6681 + }, + { + "epoch": 1.774399150179259, + "grad_norm": 1.2921857742770726, + "learning_rate": 6.900026822419103e-07, + "loss": 0.240365132689476, + "step": 6682 + }, + { + "epoch": 1.774664719160802, + "grad_norm": 1.3560359754116593, + "learning_rate": 6.8840086313815e-07, + "loss": 0.26665499806404114, + "step": 6683 + }, + { + "epoch": 1.774930288142345, + "grad_norm": 1.1827095382370005, + "learning_rate": 6.86800839200884e-07, + "loss": 0.19775834679603577, + "step": 6684 + }, + { + "epoch": 1.775195857123888, + "grad_norm": 1.2698613362606737, + "learning_rate": 6.852026107385756e-07, + "loss": 0.20334021747112274, + "step": 6685 + }, + { + "epoch": 1.775461426105431, + "grad_norm": 1.1845529296493982, + "learning_rate": 6.836061780593484e-07, + "loss": 0.20670340955257416, + "step": 6686 + }, + { + "epoch": 1.7757269950869738, + "grad_norm": 1.2940248868651125, + "learning_rate": 6.820115414709727e-07, + "loss": 0.2033209353685379, + "step": 6687 + }, + { + "epoch": 1.7759925640685168, + "grad_norm": 1.101442360403221, + "learning_rate": 6.804187012808761e-07, + "loss": 0.23827815055847168, + "step": 6688 + }, + { + "epoch": 1.7762581330500598, + "grad_norm": 1.200357834005043, + "learning_rate": 6.788276577961394e-07, + "loss": 0.2054731547832489, + "step": 6689 + }, + { + "epoch": 1.7765237020316027, + "grad_norm": 1.3006753644657554, + "learning_rate": 6.772384113234987e-07, + "loss": 0.25553691387176514, + "step": 6690 + }, + { + "epoch": 1.7767892710131457, + "grad_norm": 1.2800516387465457, + "learning_rate": 6.756509621693385e-07, + "loss": 0.23650874197483063, + "step": 6691 + }, + { + "epoch": 1.7770548399946886, + "grad_norm": 1.2987358367196533, + "learning_rate": 6.740653106397033e-07, + "loss": 0.2353624701499939, + "step": 6692 + }, + { + "epoch": 1.7773204089762316, + "grad_norm": 1.3578478166739052, + "learning_rate": 6.724814570402871e-07, + "loss": 0.26034629344940186, + "step": 6693 + }, + { + "epoch": 1.7775859779577745, + "grad_norm": 1.2070636800070726, + "learning_rate": 6.70899401676438e-07, + "loss": 0.2272130399942398, + "step": 6694 + }, + { + "epoch": 1.7778515469393175, + "grad_norm": 1.353295285146214, + "learning_rate": 6.693191448531589e-07, + "loss": 0.27940404415130615, + "step": 6695 + }, + { + "epoch": 1.7781171159208604, + "grad_norm": 1.2726244327901954, + "learning_rate": 6.677406868751013e-07, + "loss": 0.22997702658176422, + "step": 6696 + }, + { + "epoch": 1.7783826849024034, + "grad_norm": 1.2569026906720413, + "learning_rate": 6.661640280465775e-07, + "loss": 0.22918452322483063, + "step": 6697 + }, + { + "epoch": 1.7786482538839463, + "grad_norm": 1.2456580683228033, + "learning_rate": 6.645891686715456e-07, + "loss": 0.18456090986728668, + "step": 6698 + }, + { + "epoch": 1.7789138228654893, + "grad_norm": 1.3290472252808803, + "learning_rate": 6.630161090536214e-07, + "loss": 0.23256534337997437, + "step": 6699 + }, + { + "epoch": 1.7791793918470322, + "grad_norm": 1.2224316750050632, + "learning_rate": 6.614448494960713e-07, + "loss": 0.21171879768371582, + "step": 6700 + }, + { + "epoch": 1.7794449608285752, + "grad_norm": 1.201224789246079, + "learning_rate": 6.598753903018163e-07, + "loss": 0.21382400393486023, + "step": 6701 + }, + { + "epoch": 1.7797105298101181, + "grad_norm": 1.2240177347792593, + "learning_rate": 6.583077317734299e-07, + "loss": 0.22954748570919037, + "step": 6702 + }, + { + "epoch": 1.779976098791661, + "grad_norm": 1.519530195710278, + "learning_rate": 6.56741874213136e-07, + "loss": 0.25691086053848267, + "step": 6703 + }, + { + "epoch": 1.780241667773204, + "grad_norm": 1.4662002194098382, + "learning_rate": 6.551778179228174e-07, + "loss": 0.23413901031017303, + "step": 6704 + }, + { + "epoch": 1.780507236754747, + "grad_norm": 1.2775019242293946, + "learning_rate": 6.536155632040031e-07, + "loss": 0.2493733912706375, + "step": 6705 + }, + { + "epoch": 1.78077280573629, + "grad_norm": 1.2512747936457356, + "learning_rate": 6.520551103578776e-07, + "loss": 0.26094138622283936, + "step": 6706 + }, + { + "epoch": 1.7810383747178329, + "grad_norm": 1.3016608765448805, + "learning_rate": 6.504964596852781e-07, + "loss": 0.23509518802165985, + "step": 6707 + }, + { + "epoch": 1.7813039436993758, + "grad_norm": 1.4726929969063267, + "learning_rate": 6.489396114866942e-07, + "loss": 0.2471122294664383, + "step": 6708 + }, + { + "epoch": 1.7815695126809188, + "grad_norm": 1.3034668854019054, + "learning_rate": 6.47384566062268e-07, + "loss": 0.2363303005695343, + "step": 6709 + }, + { + "epoch": 1.7818350816624617, + "grad_norm": 1.1801501968168786, + "learning_rate": 6.458313237117953e-07, + "loss": 0.18868233263492584, + "step": 6710 + }, + { + "epoch": 1.7821006506440047, + "grad_norm": 1.3437880175802723, + "learning_rate": 6.442798847347187e-07, + "loss": 0.23380546271800995, + "step": 6711 + }, + { + "epoch": 1.7823662196255476, + "grad_norm": 1.471740030592424, + "learning_rate": 6.42730249430139e-07, + "loss": 0.24112167954444885, + "step": 6712 + }, + { + "epoch": 1.7826317886070906, + "grad_norm": 1.2664184946697812, + "learning_rate": 6.411824180968096e-07, + "loss": 0.2397521436214447, + "step": 6713 + }, + { + "epoch": 1.7828973575886335, + "grad_norm": 1.309174308390434, + "learning_rate": 6.396363910331338e-07, + "loss": 0.23775406181812286, + "step": 6714 + }, + { + "epoch": 1.7831629265701765, + "grad_norm": 1.4327166340451307, + "learning_rate": 6.380921685371655e-07, + "loss": 0.23278602957725525, + "step": 6715 + }, + { + "epoch": 1.7834284955517195, + "grad_norm": 1.1135605228940266, + "learning_rate": 6.365497509066143e-07, + "loss": 0.20028996467590332, + "step": 6716 + }, + { + "epoch": 1.7836940645332624, + "grad_norm": 1.146963533940078, + "learning_rate": 6.35009138438839e-07, + "loss": 0.20862875878810883, + "step": 6717 + }, + { + "epoch": 1.7839596335148054, + "grad_norm": 1.3257848293601993, + "learning_rate": 6.334703314308521e-07, + "loss": 0.23522542417049408, + "step": 6718 + }, + { + "epoch": 1.7842252024963483, + "grad_norm": 1.2172150430538355, + "learning_rate": 6.319333301793173e-07, + "loss": 0.24633824825286865, + "step": 6719 + }, + { + "epoch": 1.7844907714778913, + "grad_norm": 1.3131451310460658, + "learning_rate": 6.30398134980551e-07, + "loss": 0.22141410410404205, + "step": 6720 + }, + { + "epoch": 1.7847563404594342, + "grad_norm": 1.3593079444355614, + "learning_rate": 6.288647461305186e-07, + "loss": 0.23313754796981812, + "step": 6721 + }, + { + "epoch": 1.7850219094409772, + "grad_norm": 1.2751593889081192, + "learning_rate": 6.273331639248414e-07, + "loss": 0.22015389800071716, + "step": 6722 + }, + { + "epoch": 1.7852874784225201, + "grad_norm": 1.2716859790694561, + "learning_rate": 6.258033886587911e-07, + "loss": 0.21154522895812988, + "step": 6723 + }, + { + "epoch": 1.785553047404063, + "grad_norm": 1.3319130935282857, + "learning_rate": 6.242754206272883e-07, + "loss": 0.2320503294467926, + "step": 6724 + }, + { + "epoch": 1.785818616385606, + "grad_norm": 1.2016740259413836, + "learning_rate": 6.227492601249097e-07, + "loss": 0.21778921782970428, + "step": 6725 + }, + { + "epoch": 1.786084185367149, + "grad_norm": 1.2321504813505204, + "learning_rate": 6.212249074458776e-07, + "loss": 0.2368871569633484, + "step": 6726 + }, + { + "epoch": 1.786349754348692, + "grad_norm": 1.5195368545073897, + "learning_rate": 6.197023628840704e-07, + "loss": 0.27269479632377625, + "step": 6727 + }, + { + "epoch": 1.7866153233302349, + "grad_norm": 1.2744130185555103, + "learning_rate": 6.181816267330177e-07, + "loss": 0.2414151132106781, + "step": 6728 + }, + { + "epoch": 1.7868808923117778, + "grad_norm": 1.1197825562175172, + "learning_rate": 6.166626992858993e-07, + "loss": 0.2156972736120224, + "step": 6729 + }, + { + "epoch": 1.7871464612933208, + "grad_norm": 1.2748992996552195, + "learning_rate": 6.151455808355455e-07, + "loss": 0.2510441541671753, + "step": 6730 + }, + { + "epoch": 1.787412030274864, + "grad_norm": 1.2924509412618195, + "learning_rate": 6.136302716744402e-07, + "loss": 0.20290088653564453, + "step": 6731 + }, + { + "epoch": 1.787677599256407, + "grad_norm": 1.3705736121123597, + "learning_rate": 6.121167720947174e-07, + "loss": 0.25088101625442505, + "step": 6732 + }, + { + "epoch": 1.7879431682379499, + "grad_norm": 1.3723338572382136, + "learning_rate": 6.106050823881604e-07, + "loss": 0.2566376328468323, + "step": 6733 + }, + { + "epoch": 1.7882087372194928, + "grad_norm": 1.1043772478174716, + "learning_rate": 6.09095202846206e-07, + "loss": 0.1882714033126831, + "step": 6734 + }, + { + "epoch": 1.7884743062010358, + "grad_norm": 1.2323780172305254, + "learning_rate": 6.075871337599404e-07, + "loss": 0.18705856800079346, + "step": 6735 + }, + { + "epoch": 1.7887398751825787, + "grad_norm": 1.1976910574931858, + "learning_rate": 6.060808754201031e-07, + "loss": 0.24756133556365967, + "step": 6736 + }, + { + "epoch": 1.7890054441641217, + "grad_norm": 1.3197777974144425, + "learning_rate": 6.045764281170818e-07, + "loss": 0.2537599205970764, + "step": 6737 + }, + { + "epoch": 1.7892710131456646, + "grad_norm": 1.330362234255321, + "learning_rate": 6.030737921409169e-07, + "loss": 0.22049202024936676, + "step": 6738 + }, + { + "epoch": 1.7895365821272076, + "grad_norm": 1.1222347914068396, + "learning_rate": 6.015729677812965e-07, + "loss": 0.20820394158363342, + "step": 6739 + }, + { + "epoch": 1.7898021511087505, + "grad_norm": 1.3153590716408405, + "learning_rate": 6.00073955327567e-07, + "loss": 0.2339879721403122, + "step": 6740 + }, + { + "epoch": 1.7900677200902935, + "grad_norm": 1.2483259153993207, + "learning_rate": 5.98576755068715e-07, + "loss": 0.22082161903381348, + "step": 6741 + }, + { + "epoch": 1.7903332890718364, + "grad_norm": 1.28162605766883, + "learning_rate": 5.97081367293385e-07, + "loss": 0.21883058547973633, + "step": 6742 + }, + { + "epoch": 1.7905988580533794, + "grad_norm": 1.1591166092235485, + "learning_rate": 5.955877922898712e-07, + "loss": 0.214680016040802, + "step": 6743 + }, + { + "epoch": 1.7908644270349223, + "grad_norm": 1.37628370977899, + "learning_rate": 5.940960303461152e-07, + "loss": 0.24533744156360626, + "step": 6744 + }, + { + "epoch": 1.7911299960164653, + "grad_norm": 1.3046535737377691, + "learning_rate": 5.926060817497137e-07, + "loss": 0.19857585430145264, + "step": 6745 + }, + { + "epoch": 1.7913955649980082, + "grad_norm": 1.4468975368000232, + "learning_rate": 5.911179467879081e-07, + "loss": 0.27493876218795776, + "step": 6746 + }, + { + "epoch": 1.7916611339795512, + "grad_norm": 1.1490145590407708, + "learning_rate": 5.896316257475954e-07, + "loss": 0.20560544729232788, + "step": 6747 + }, + { + "epoch": 1.7919267029610941, + "grad_norm": 1.2213631424870741, + "learning_rate": 5.881471189153199e-07, + "loss": 0.23559418320655823, + "step": 6748 + }, + { + "epoch": 1.792192271942637, + "grad_norm": 1.3144055462601232, + "learning_rate": 5.866644265772769e-07, + "loss": 0.23055103421211243, + "step": 6749 + }, + { + "epoch": 1.79245784092418, + "grad_norm": 1.4747052812755685, + "learning_rate": 5.851835490193136e-07, + "loss": 0.2780724763870239, + "step": 6750 + }, + { + "epoch": 1.792723409905723, + "grad_norm": 1.2354333862915858, + "learning_rate": 5.837044865269248e-07, + "loss": 0.20216618478298187, + "step": 6751 + }, + { + "epoch": 1.792988978887266, + "grad_norm": 1.308066661539038, + "learning_rate": 5.822272393852557e-07, + "loss": 0.2289930284023285, + "step": 6752 + }, + { + "epoch": 1.793254547868809, + "grad_norm": 1.2952454297764495, + "learning_rate": 5.80751807879103e-07, + "loss": 0.2028929740190506, + "step": 6753 + }, + { + "epoch": 1.7935201168503518, + "grad_norm": 1.2960791997009702, + "learning_rate": 5.792781922929114e-07, + "loss": 0.1964842826128006, + "step": 6754 + }, + { + "epoch": 1.7937856858318948, + "grad_norm": 1.4512315838061285, + "learning_rate": 5.77806392910778e-07, + "loss": 0.2617039084434509, + "step": 6755 + }, + { + "epoch": 1.7940512548134377, + "grad_norm": 1.325466585449178, + "learning_rate": 5.76336410016447e-07, + "loss": 0.2582395374774933, + "step": 6756 + }, + { + "epoch": 1.7943168237949807, + "grad_norm": 1.2587701407069858, + "learning_rate": 5.74868243893314e-07, + "loss": 0.23379334807395935, + "step": 6757 + }, + { + "epoch": 1.7945823927765236, + "grad_norm": 1.2979435124807637, + "learning_rate": 5.734018948244247e-07, + "loss": 0.2376977801322937, + "step": 6758 + }, + { + "epoch": 1.7948479617580668, + "grad_norm": 1.414785341098569, + "learning_rate": 5.719373630924741e-07, + "loss": 0.21816037595272064, + "step": 6759 + }, + { + "epoch": 1.7951135307396098, + "grad_norm": 1.1404163081963787, + "learning_rate": 5.704746489798063e-07, + "loss": 0.22156387567520142, + "step": 6760 + }, + { + "epoch": 1.7953790997211527, + "grad_norm": 1.195358056085369, + "learning_rate": 5.690137527684147e-07, + "loss": 0.20818129181861877, + "step": 6761 + }, + { + "epoch": 1.7956446687026957, + "grad_norm": 1.1501993150491747, + "learning_rate": 5.67554674739944e-07, + "loss": 0.18672943115234375, + "step": 6762 + }, + { + "epoch": 1.7959102376842386, + "grad_norm": 1.2143392515173568, + "learning_rate": 5.66097415175686e-07, + "loss": 0.2023036777973175, + "step": 6763 + }, + { + "epoch": 1.7961758066657816, + "grad_norm": 1.3551091626165586, + "learning_rate": 5.646419743565845e-07, + "loss": 0.24798424541950226, + "step": 6764 + }, + { + "epoch": 1.7964413756473245, + "grad_norm": 1.2034553304236573, + "learning_rate": 5.631883525632297e-07, + "loss": 0.1885790377855301, + "step": 6765 + }, + { + "epoch": 1.7967069446288675, + "grad_norm": 1.3693229184747842, + "learning_rate": 5.617365500758631e-07, + "loss": 0.24120381474494934, + "step": 6766 + }, + { + "epoch": 1.7969725136104104, + "grad_norm": 1.2063823939207, + "learning_rate": 5.602865671743763e-07, + "loss": 0.24238690733909607, + "step": 6767 + }, + { + "epoch": 1.7972380825919534, + "grad_norm": 1.2611645650605894, + "learning_rate": 5.588384041383089e-07, + "loss": 0.22928190231323242, + "step": 6768 + }, + { + "epoch": 1.7975036515734963, + "grad_norm": 1.3148280979127052, + "learning_rate": 5.573920612468486e-07, + "loss": 0.2464730143547058, + "step": 6769 + }, + { + "epoch": 1.7977692205550393, + "grad_norm": 1.149985298163883, + "learning_rate": 5.559475387788348e-07, + "loss": 0.2167670875787735, + "step": 6770 + }, + { + "epoch": 1.7980347895365822, + "grad_norm": 1.3365719233561757, + "learning_rate": 5.545048370127526e-07, + "loss": 0.24080663919448853, + "step": 6771 + }, + { + "epoch": 1.7983003585181252, + "grad_norm": 1.3571891328346308, + "learning_rate": 5.530639562267382e-07, + "loss": 0.25481417775154114, + "step": 6772 + }, + { + "epoch": 1.7985659274996681, + "grad_norm": 1.3525822075957274, + "learning_rate": 5.51624896698576e-07, + "loss": 0.23328909277915955, + "step": 6773 + }, + { + "epoch": 1.798831496481211, + "grad_norm": 1.136424514008492, + "learning_rate": 5.50187658705702e-07, + "loss": 0.18779747188091278, + "step": 6774 + }, + { + "epoch": 1.799097065462754, + "grad_norm": 1.3089016035676113, + "learning_rate": 5.487522425251968e-07, + "loss": 0.24840545654296875, + "step": 6775 + }, + { + "epoch": 1.799362634444297, + "grad_norm": 1.4658187281761286, + "learning_rate": 5.473186484337911e-07, + "loss": 0.2559642791748047, + "step": 6776 + }, + { + "epoch": 1.79962820342584, + "grad_norm": 1.3714243263968933, + "learning_rate": 5.458868767078673e-07, + "loss": 0.2005981206893921, + "step": 6777 + }, + { + "epoch": 1.799893772407383, + "grad_norm": 1.4085177100377464, + "learning_rate": 5.444569276234523e-07, + "loss": 0.2480883002281189, + "step": 6778 + }, + { + "epoch": 1.8001593413889259, + "grad_norm": 1.2203856732153913, + "learning_rate": 5.430288014562235e-07, + "loss": 0.23043295741081238, + "step": 6779 + }, + { + "epoch": 1.8004249103704688, + "grad_norm": 1.4245462518797845, + "learning_rate": 5.416024984815072e-07, + "loss": 0.22702521085739136, + "step": 6780 + }, + { + "epoch": 1.8006904793520118, + "grad_norm": 1.153610007644359, + "learning_rate": 5.401780189742789e-07, + "loss": 0.19955751299858093, + "step": 6781 + }, + { + "epoch": 1.8009560483335547, + "grad_norm": 1.2560139759300732, + "learning_rate": 5.387553632091591e-07, + "loss": 0.19743162393569946, + "step": 6782 + }, + { + "epoch": 1.8012216173150977, + "grad_norm": 1.3072968250539403, + "learning_rate": 5.373345314604206e-07, + "loss": 0.2262525111436844, + "step": 6783 + }, + { + "epoch": 1.8014871862966406, + "grad_norm": 1.2987858405959638, + "learning_rate": 5.359155240019809e-07, + "loss": 0.249632328748703, + "step": 6784 + }, + { + "epoch": 1.8017527552781836, + "grad_norm": 1.1804135507002813, + "learning_rate": 5.344983411074111e-07, + "loss": 0.19300231337547302, + "step": 6785 + }, + { + "epoch": 1.8020183242597265, + "grad_norm": 1.293291337799575, + "learning_rate": 5.330829830499263e-07, + "loss": 0.22256134450435638, + "step": 6786 + }, + { + "epoch": 1.8022838932412695, + "grad_norm": 1.283065855572867, + "learning_rate": 5.316694501023911e-07, + "loss": 0.2666356563568115, + "step": 6787 + }, + { + "epoch": 1.8025494622228124, + "grad_norm": 1.239663996945653, + "learning_rate": 5.302577425373156e-07, + "loss": 0.223050057888031, + "step": 6788 + }, + { + "epoch": 1.8028150312043554, + "grad_norm": 1.3011452698852823, + "learning_rate": 5.288478606268632e-07, + "loss": 0.2298094481229782, + "step": 6789 + }, + { + "epoch": 1.8030806001858983, + "grad_norm": 1.4761708863150307, + "learning_rate": 5.27439804642843e-07, + "loss": 0.23596417903900146, + "step": 6790 + }, + { + "epoch": 1.8033461691674413, + "grad_norm": 1.226229776793909, + "learning_rate": 5.26033574856708e-07, + "loss": 0.19501623511314392, + "step": 6791 + }, + { + "epoch": 1.8036117381489842, + "grad_norm": 1.2825838070785722, + "learning_rate": 5.246291715395657e-07, + "loss": 0.23518472909927368, + "step": 6792 + }, + { + "epoch": 1.8038773071305272, + "grad_norm": 1.1820374841237484, + "learning_rate": 5.232265949621651e-07, + "loss": 0.2251899093389511, + "step": 6793 + }, + { + "epoch": 1.8041428761120701, + "grad_norm": 1.1527654541489951, + "learning_rate": 5.218258453949099e-07, + "loss": 0.1764119267463684, + "step": 6794 + }, + { + "epoch": 1.804408445093613, + "grad_norm": 1.2895741356204065, + "learning_rate": 5.204269231078484e-07, + "loss": 0.20768773555755615, + "step": 6795 + }, + { + "epoch": 1.804674014075156, + "grad_norm": 1.3841780370828203, + "learning_rate": 5.19029828370674e-07, + "loss": 0.2115546613931656, + "step": 6796 + }, + { + "epoch": 1.804939583056699, + "grad_norm": 1.315680847185169, + "learning_rate": 5.176345614527312e-07, + "loss": 0.2465972602367401, + "step": 6797 + }, + { + "epoch": 1.805205152038242, + "grad_norm": 1.379203464130328, + "learning_rate": 5.162411226230102e-07, + "loss": 0.2359803020954132, + "step": 6798 + }, + { + "epoch": 1.805470721019785, + "grad_norm": 1.4106819634653143, + "learning_rate": 5.148495121501506e-07, + "loss": 0.27518990635871887, + "step": 6799 + }, + { + "epoch": 1.8057362900013278, + "grad_norm": 1.3653410113402416, + "learning_rate": 5.134597303024391e-07, + "loss": 0.23914849758148193, + "step": 6800 + }, + { + "epoch": 1.8060018589828708, + "grad_norm": 1.256847668479307, + "learning_rate": 5.120717773478068e-07, + "loss": 0.21771098673343658, + "step": 6801 + }, + { + "epoch": 1.8062674279644138, + "grad_norm": 1.2716100664289411, + "learning_rate": 5.106856535538363e-07, + "loss": 0.235421285033226, + "step": 6802 + }, + { + "epoch": 1.8065329969459567, + "grad_norm": 1.4167241401735549, + "learning_rate": 5.093013591877561e-07, + "loss": 0.23973548412322998, + "step": 6803 + }, + { + "epoch": 1.8067985659274997, + "grad_norm": 1.484886222602596, + "learning_rate": 5.079188945164426e-07, + "loss": 0.24059349298477173, + "step": 6804 + }, + { + "epoch": 1.8070641349090426, + "grad_norm": 1.3840991454067133, + "learning_rate": 5.065382598064161e-07, + "loss": 0.25188207626342773, + "step": 6805 + }, + { + "epoch": 1.8073297038905856, + "grad_norm": 1.1866308474402574, + "learning_rate": 5.051594553238482e-07, + "loss": 0.20124536752700806, + "step": 6806 + }, + { + "epoch": 1.8075952728721285, + "grad_norm": 1.2234769875088154, + "learning_rate": 5.037824813345571e-07, + "loss": 0.2059330940246582, + "step": 6807 + }, + { + "epoch": 1.8078608418536715, + "grad_norm": 1.2468279665046458, + "learning_rate": 5.024073381040052e-07, + "loss": 0.2122621238231659, + "step": 6808 + }, + { + "epoch": 1.8081264108352144, + "grad_norm": 1.2203093249465347, + "learning_rate": 5.010340258973046e-07, + "loss": 0.20064303278923035, + "step": 6809 + }, + { + "epoch": 1.8083919798167574, + "grad_norm": 1.3685187895509534, + "learning_rate": 4.996625449792147e-07, + "loss": 0.24773281812667847, + "step": 6810 + }, + { + "epoch": 1.8086575487983003, + "grad_norm": 1.149837064877599, + "learning_rate": 4.982928956141375e-07, + "loss": 0.2111661732196808, + "step": 6811 + }, + { + "epoch": 1.8089231177798433, + "grad_norm": 1.2721912706796665, + "learning_rate": 4.969250780661306e-07, + "loss": 0.24823394417762756, + "step": 6812 + }, + { + "epoch": 1.8091886867613862, + "grad_norm": 1.410632443971984, + "learning_rate": 4.955590925988896e-07, + "loss": 0.24726605415344238, + "step": 6813 + }, + { + "epoch": 1.8094542557429292, + "grad_norm": 1.3112520269484638, + "learning_rate": 4.941949394757605e-07, + "loss": 0.2269962728023529, + "step": 6814 + }, + { + "epoch": 1.8097198247244721, + "grad_norm": 1.311172380903373, + "learning_rate": 4.928326189597377e-07, + "loss": 0.2336469292640686, + "step": 6815 + }, + { + "epoch": 1.809985393706015, + "grad_norm": 1.3372206959113173, + "learning_rate": 4.914721313134585e-07, + "loss": 0.24872124195098877, + "step": 6816 + }, + { + "epoch": 1.810250962687558, + "grad_norm": 1.3116570930981006, + "learning_rate": 4.901134767992099e-07, + "loss": 0.2484157383441925, + "step": 6817 + }, + { + "epoch": 1.810516531669101, + "grad_norm": 1.5234901533359522, + "learning_rate": 4.887566556789247e-07, + "loss": 0.24683158099651337, + "step": 6818 + }, + { + "epoch": 1.810782100650644, + "grad_norm": 1.1959899225802055, + "learning_rate": 4.874016682141802e-07, + "loss": 0.18717995285987854, + "step": 6819 + }, + { + "epoch": 1.8110476696321869, + "grad_norm": 1.2862771000886628, + "learning_rate": 4.860485146662053e-07, + "loss": 0.2220807671546936, + "step": 6820 + }, + { + "epoch": 1.8113132386137298, + "grad_norm": 1.196369102162481, + "learning_rate": 4.84697195295869e-07, + "loss": 0.2178400307893753, + "step": 6821 + }, + { + "epoch": 1.8115788075952728, + "grad_norm": 1.2250082051849178, + "learning_rate": 4.833477103636908e-07, + "loss": 0.2056645154953003, + "step": 6822 + }, + { + "epoch": 1.8118443765768157, + "grad_norm": 1.1729075702986809, + "learning_rate": 4.820000601298358e-07, + "loss": 0.21441905200481415, + "step": 6823 + }, + { + "epoch": 1.8121099455583587, + "grad_norm": 1.4445497728186703, + "learning_rate": 4.806542448541151e-07, + "loss": 0.17688237130641937, + "step": 6824 + }, + { + "epoch": 1.8123755145399016, + "grad_norm": 1.3216659704658935, + "learning_rate": 4.793102647959847e-07, + "loss": 0.22405505180358887, + "step": 6825 + }, + { + "epoch": 1.8126410835214446, + "grad_norm": 1.4226735460298432, + "learning_rate": 4.779681202145503e-07, + "loss": 0.21617908775806427, + "step": 6826 + }, + { + "epoch": 1.8129066525029875, + "grad_norm": 1.3284639992790963, + "learning_rate": 4.766278113685596e-07, + "loss": 0.23570871353149414, + "step": 6827 + }, + { + "epoch": 1.8131722214845305, + "grad_norm": 1.222373726415007, + "learning_rate": 4.7528933851641036e-07, + "loss": 0.23806743323802948, + "step": 6828 + }, + { + "epoch": 1.8134377904660735, + "grad_norm": 1.3312930220149763, + "learning_rate": 4.739527019161405e-07, + "loss": 0.24859179556369781, + "step": 6829 + }, + { + "epoch": 1.8137033594476164, + "grad_norm": 1.2143252342774762, + "learning_rate": 4.726179018254418e-07, + "loss": 0.21314260363578796, + "step": 6830 + }, + { + "epoch": 1.8139689284291594, + "grad_norm": 1.272910058647325, + "learning_rate": 4.7128493850164715e-07, + "loss": 0.25290659070014954, + "step": 6831 + }, + { + "epoch": 1.8142344974107023, + "grad_norm": 1.1800117497978073, + "learning_rate": 4.699538122017355e-07, + "loss": 0.22606703639030457, + "step": 6832 + }, + { + "epoch": 1.8145000663922453, + "grad_norm": 1.3037958158309495, + "learning_rate": 4.6862452318233275e-07, + "loss": 0.23973071575164795, + "step": 6833 + }, + { + "epoch": 1.8147656353737882, + "grad_norm": 1.2341358358957555, + "learning_rate": 4.672970716997094e-07, + "loss": 0.2225341498851776, + "step": 6834 + }, + { + "epoch": 1.8150312043553312, + "grad_norm": 1.441833447404081, + "learning_rate": 4.6597145800978183e-07, + "loss": 0.19153356552124023, + "step": 6835 + }, + { + "epoch": 1.8152967733368741, + "grad_norm": 1.2010339801105188, + "learning_rate": 4.646476823681145e-07, + "loss": 0.19694843888282776, + "step": 6836 + }, + { + "epoch": 1.815562342318417, + "grad_norm": 1.2719437537675773, + "learning_rate": 4.6332574502991554e-07, + "loss": 0.2353869527578354, + "step": 6837 + }, + { + "epoch": 1.81582791129996, + "grad_norm": 1.3504470280928214, + "learning_rate": 4.6200564625003775e-07, + "loss": 0.20919787883758545, + "step": 6838 + }, + { + "epoch": 1.816093480281503, + "grad_norm": 1.1775336742921327, + "learning_rate": 4.6068738628298193e-07, + "loss": 0.18352919816970825, + "step": 6839 + }, + { + "epoch": 1.816359049263046, + "grad_norm": 1.3571378213568392, + "learning_rate": 4.5937096538289147e-07, + "loss": 0.24711212515830994, + "step": 6840 + }, + { + "epoch": 1.8166246182445889, + "grad_norm": 1.2216287617055834, + "learning_rate": 4.580563838035579e-07, + "loss": 0.2350531816482544, + "step": 6841 + }, + { + "epoch": 1.8168901872261318, + "grad_norm": 1.3731447849726235, + "learning_rate": 4.5674364179841614e-07, + "loss": 0.26124465465545654, + "step": 6842 + }, + { + "epoch": 1.8171557562076748, + "grad_norm": 1.3819435677197398, + "learning_rate": 4.5543273962054934e-07, + "loss": 0.2110440880060196, + "step": 6843 + }, + { + "epoch": 1.817421325189218, + "grad_norm": 1.425540844923539, + "learning_rate": 4.5412367752268094e-07, + "loss": 0.2409415990114212, + "step": 6844 + }, + { + "epoch": 1.817686894170761, + "grad_norm": 1.2827549712815094, + "learning_rate": 4.528164557571857e-07, + "loss": 0.2280777543783188, + "step": 6845 + }, + { + "epoch": 1.8179524631523039, + "grad_norm": 1.111661347066374, + "learning_rate": 4.515110745760787e-07, + "loss": 0.201339989900589, + "step": 6846 + }, + { + "epoch": 1.8182180321338468, + "grad_norm": 1.2576623337538495, + "learning_rate": 4.5020753423102083e-07, + "loss": 0.22910752892494202, + "step": 6847 + }, + { + "epoch": 1.8184836011153898, + "grad_norm": 1.2835742527474332, + "learning_rate": 4.4890583497332327e-07, + "loss": 0.21736779808998108, + "step": 6848 + }, + { + "epoch": 1.8187491700969327, + "grad_norm": 1.282796826855034, + "learning_rate": 4.476059770539354e-07, + "loss": 0.20898449420928955, + "step": 6849 + }, + { + "epoch": 1.8190147390784757, + "grad_norm": 1.2514312774528749, + "learning_rate": 4.463079607234555e-07, + "loss": 0.22159051895141602, + "step": 6850 + }, + { + "epoch": 1.8192803080600186, + "grad_norm": 1.290667660986327, + "learning_rate": 4.450117862321246e-07, + "loss": 0.24081172049045563, + "step": 6851 + }, + { + "epoch": 1.8195458770415616, + "grad_norm": 1.2092663587603776, + "learning_rate": 4.4371745382983164e-07, + "loss": 0.17856758832931519, + "step": 6852 + }, + { + "epoch": 1.8198114460231045, + "grad_norm": 1.2002967167521004, + "learning_rate": 4.424249637661071e-07, + "loss": 0.20796868205070496, + "step": 6853 + }, + { + "epoch": 1.8200770150046475, + "grad_norm": 1.5683273026632796, + "learning_rate": 4.4113431629013046e-07, + "loss": 0.24277149140834808, + "step": 6854 + }, + { + "epoch": 1.8203425839861904, + "grad_norm": 1.1767967505464594, + "learning_rate": 4.3984551165071944e-07, + "loss": 0.19315838813781738, + "step": 6855 + }, + { + "epoch": 1.8206081529677334, + "grad_norm": 1.2457379727303777, + "learning_rate": 4.3855855009634075e-07, + "loss": 0.20789340138435364, + "step": 6856 + }, + { + "epoch": 1.8208737219492763, + "grad_norm": 1.4246348317049922, + "learning_rate": 4.372734318751082e-07, + "loss": 0.2871186137199402, + "step": 6857 + }, + { + "epoch": 1.8211392909308193, + "grad_norm": 1.3878283876849893, + "learning_rate": 4.359901572347758e-07, + "loss": 0.2419736236333847, + "step": 6858 + }, + { + "epoch": 1.8214048599123622, + "grad_norm": 1.3237602075469659, + "learning_rate": 4.3470872642274455e-07, + "loss": 0.2190292328596115, + "step": 6859 + }, + { + "epoch": 1.8216704288939052, + "grad_norm": 1.3879953178475168, + "learning_rate": 4.3342913968605903e-07, + "loss": 0.2654367685317993, + "step": 6860 + }, + { + "epoch": 1.8219359978754481, + "grad_norm": 1.3362249609314758, + "learning_rate": 4.321513972714075e-07, + "loss": 0.2536984086036682, + "step": 6861 + }, + { + "epoch": 1.822201566856991, + "grad_norm": 1.3804156416489965, + "learning_rate": 4.308754994251252e-07, + "loss": 0.260431170463562, + "step": 6862 + }, + { + "epoch": 1.822467135838534, + "grad_norm": 1.1376782237723586, + "learning_rate": 4.2960144639318855e-07, + "loss": 0.19348303973674774, + "step": 6863 + }, + { + "epoch": 1.822732704820077, + "grad_norm": 1.3505211109720399, + "learning_rate": 4.283292384212201e-07, + "loss": 0.2284386157989502, + "step": 6864 + }, + { + "epoch": 1.82299827380162, + "grad_norm": 1.2449697035186624, + "learning_rate": 4.270588757544869e-07, + "loss": 0.23439526557922363, + "step": 6865 + }, + { + "epoch": 1.823263842783163, + "grad_norm": 1.247098399621602, + "learning_rate": 4.2579035863790086e-07, + "loss": 0.2123441994190216, + "step": 6866 + }, + { + "epoch": 1.8235294117647058, + "grad_norm": 1.251423525262008, + "learning_rate": 4.245236873160163e-07, + "loss": 0.24568180739879608, + "step": 6867 + }, + { + "epoch": 1.8237949807462488, + "grad_norm": 1.4504253184377665, + "learning_rate": 4.232588620330325e-07, + "loss": 0.24078285694122314, + "step": 6868 + }, + { + "epoch": 1.8240605497277917, + "grad_norm": 1.157509101798501, + "learning_rate": 4.2199588303279414e-07, + "loss": 0.2003621608018875, + "step": 6869 + }, + { + "epoch": 1.8243261187093347, + "grad_norm": 1.3049050095763572, + "learning_rate": 4.2073475055878664e-07, + "loss": 0.21201889216899872, + "step": 6870 + }, + { + "epoch": 1.8245916876908777, + "grad_norm": 1.429124542908126, + "learning_rate": 4.1947546485414215e-07, + "loss": 0.23175427317619324, + "step": 6871 + }, + { + "epoch": 1.8248572566724208, + "grad_norm": 1.3101487536079581, + "learning_rate": 4.182180261616364e-07, + "loss": 0.2391383945941925, + "step": 6872 + }, + { + "epoch": 1.8251228256539638, + "grad_norm": 1.341869026992186, + "learning_rate": 4.169624347236878e-07, + "loss": 0.23120146989822388, + "step": 6873 + }, + { + "epoch": 1.8253883946355067, + "grad_norm": 1.1699948636498165, + "learning_rate": 4.157086907823604e-07, + "loss": 0.22541432082653046, + "step": 6874 + }, + { + "epoch": 1.8256539636170497, + "grad_norm": 1.3354293669412138, + "learning_rate": 4.1445679457936094e-07, + "loss": 0.25613510608673096, + "step": 6875 + }, + { + "epoch": 1.8259195325985926, + "grad_norm": 1.191861909098097, + "learning_rate": 4.1320674635604186e-07, + "loss": 0.21002547442913055, + "step": 6876 + }, + { + "epoch": 1.8261851015801356, + "grad_norm": 1.230870532242656, + "learning_rate": 4.119585463533959e-07, + "loss": 0.2593066692352295, + "step": 6877 + }, + { + "epoch": 1.8264506705616785, + "grad_norm": 1.4772106156087776, + "learning_rate": 4.1071219481206184e-07, + "loss": 0.23771531879901886, + "step": 6878 + }, + { + "epoch": 1.8267162395432215, + "grad_norm": 1.3106459571340912, + "learning_rate": 4.094676919723206e-07, + "loss": 0.2069541960954666, + "step": 6879 + }, + { + "epoch": 1.8269818085247644, + "grad_norm": 1.2065450512433227, + "learning_rate": 4.082250380740993e-07, + "loss": 0.21314311027526855, + "step": 6880 + }, + { + "epoch": 1.8272473775063074, + "grad_norm": 1.2723957233809677, + "learning_rate": 4.069842333569662e-07, + "loss": 0.198696106672287, + "step": 6881 + }, + { + "epoch": 1.8275129464878503, + "grad_norm": 1.2365636263350124, + "learning_rate": 4.057452780601334e-07, + "loss": 0.22771228849887848, + "step": 6882 + }, + { + "epoch": 1.8277785154693933, + "grad_norm": 1.3935711018120034, + "learning_rate": 4.045081724224564e-07, + "loss": 0.24176150560379028, + "step": 6883 + }, + { + "epoch": 1.8280440844509362, + "grad_norm": 1.1711714123320747, + "learning_rate": 4.0327291668243785e-07, + "loss": 0.18257084488868713, + "step": 6884 + }, + { + "epoch": 1.8283096534324792, + "grad_norm": 1.7740145369201021, + "learning_rate": 4.02039511078216e-07, + "loss": 0.2317531704902649, + "step": 6885 + }, + { + "epoch": 1.8285752224140222, + "grad_norm": 1.237685133468282, + "learning_rate": 4.008079558475797e-07, + "loss": 0.22523516416549683, + "step": 6886 + }, + { + "epoch": 1.828840791395565, + "grad_norm": 1.338469580607285, + "learning_rate": 3.995782512279578e-07, + "loss": 0.22351330518722534, + "step": 6887 + }, + { + "epoch": 1.829106360377108, + "grad_norm": 1.3272231861758204, + "learning_rate": 3.983503974564229e-07, + "loss": 0.22151902318000793, + "step": 6888 + }, + { + "epoch": 1.829371929358651, + "grad_norm": 1.2483501881623744, + "learning_rate": 3.971243947696901e-07, + "loss": 0.20800583064556122, + "step": 6889 + }, + { + "epoch": 1.829637498340194, + "grad_norm": 1.189419989304772, + "learning_rate": 3.959002434041181e-07, + "loss": 0.21332690119743347, + "step": 6890 + }, + { + "epoch": 1.829903067321737, + "grad_norm": 1.3040750377284556, + "learning_rate": 3.946779435957093e-07, + "loss": 0.2561502456665039, + "step": 6891 + }, + { + "epoch": 1.8301686363032799, + "grad_norm": 1.2150229659643972, + "learning_rate": 3.934574955801074e-07, + "loss": 0.23636910319328308, + "step": 6892 + }, + { + "epoch": 1.8304342052848228, + "grad_norm": 1.303931878967275, + "learning_rate": 3.922388995926041e-07, + "loss": 0.26683998107910156, + "step": 6893 + }, + { + "epoch": 1.8306997742663658, + "grad_norm": 1.319570373744726, + "learning_rate": 3.910221558681271e-07, + "loss": 0.2779492735862732, + "step": 6894 + }, + { + "epoch": 1.8309653432479087, + "grad_norm": 1.473106593059021, + "learning_rate": 3.8980726464125095e-07, + "loss": 0.20174488425254822, + "step": 6895 + }, + { + "epoch": 1.8312309122294517, + "grad_norm": 1.3128034885814306, + "learning_rate": 3.885942261461928e-07, + "loss": 0.21486055850982666, + "step": 6896 + }, + { + "epoch": 1.8314964812109946, + "grad_norm": 1.2201269476427121, + "learning_rate": 3.8738304061681107e-07, + "loss": 0.25637733936309814, + "step": 6897 + }, + { + "epoch": 1.8317620501925376, + "grad_norm": 1.3661274524986262, + "learning_rate": 3.8617370828661014e-07, + "loss": 0.2518364489078522, + "step": 6898 + }, + { + "epoch": 1.8320276191740805, + "grad_norm": 1.2902396654446358, + "learning_rate": 3.849662293887324e-07, + "loss": 0.25752246379852295, + "step": 6899 + }, + { + "epoch": 1.8322931881556235, + "grad_norm": 1.1514833439027936, + "learning_rate": 3.8376060415596826e-07, + "loss": 0.20891718566417694, + "step": 6900 + }, + { + "epoch": 1.8325587571371664, + "grad_norm": 1.378720679176223, + "learning_rate": 3.825568328207452e-07, + "loss": 0.20491960644721985, + "step": 6901 + }, + { + "epoch": 1.8328243261187094, + "grad_norm": 1.2540067790590503, + "learning_rate": 3.813549156151386e-07, + "loss": 0.22183339297771454, + "step": 6902 + }, + { + "epoch": 1.8330898951002523, + "grad_norm": 1.3321077338345055, + "learning_rate": 3.801548527708621e-07, + "loss": 0.2476987987756729, + "step": 6903 + }, + { + "epoch": 1.8333554640817953, + "grad_norm": 1.470629998110282, + "learning_rate": 3.7895664451927493e-07, + "loss": 0.26486238837242126, + "step": 6904 + }, + { + "epoch": 1.8336210330633382, + "grad_norm": 1.2524745099106778, + "learning_rate": 3.777602910913769e-07, + "loss": 0.25922873616218567, + "step": 6905 + }, + { + "epoch": 1.8338866020448812, + "grad_norm": 1.317563058388092, + "learning_rate": 3.7656579271781127e-07, + "loss": 0.22682476043701172, + "step": 6906 + }, + { + "epoch": 1.8341521710264241, + "grad_norm": 1.2391277284536568, + "learning_rate": 3.753731496288626e-07, + "loss": 0.20371592044830322, + "step": 6907 + }, + { + "epoch": 1.834417740007967, + "grad_norm": 1.2444383452097851, + "learning_rate": 3.7418236205445826e-07, + "loss": 0.23857446014881134, + "step": 6908 + }, + { + "epoch": 1.83468330898951, + "grad_norm": 2.6487436557467645, + "learning_rate": 3.729934302241689e-07, + "loss": 0.27119290828704834, + "step": 6909 + }, + { + "epoch": 1.834948877971053, + "grad_norm": 1.254159773595776, + "learning_rate": 3.7180635436720567e-07, + "loss": 0.2354927361011505, + "step": 6910 + }, + { + "epoch": 1.835214446952596, + "grad_norm": 1.301136184663389, + "learning_rate": 3.706211347124233e-07, + "loss": 0.26378512382507324, + "step": 6911 + }, + { + "epoch": 1.835480015934139, + "grad_norm": 1.3296098934003593, + "learning_rate": 3.6943777148831907e-07, + "loss": 0.20725026726722717, + "step": 6912 + }, + { + "epoch": 1.8357455849156818, + "grad_norm": 1.2212362377090786, + "learning_rate": 3.682562649230304e-07, + "loss": 0.2049856185913086, + "step": 6913 + }, + { + "epoch": 1.8360111538972248, + "grad_norm": 1.2555620791922353, + "learning_rate": 3.6707661524433833e-07, + "loss": 0.19303423166275024, + "step": 6914 + }, + { + "epoch": 1.8362767228787678, + "grad_norm": 1.2395332139010746, + "learning_rate": 3.6589882267966445e-07, + "loss": 0.21510104835033417, + "step": 6915 + }, + { + "epoch": 1.8365422918603107, + "grad_norm": 1.1669418633603965, + "learning_rate": 3.6472288745607376e-07, + "loss": 0.1933138072490692, + "step": 6916 + }, + { + "epoch": 1.8368078608418537, + "grad_norm": 1.112367559966563, + "learning_rate": 3.6354880980027373e-07, + "loss": 0.2015206664800644, + "step": 6917 + }, + { + "epoch": 1.8370734298233966, + "grad_norm": 1.2823070307410491, + "learning_rate": 3.6237658993861114e-07, + "loss": 0.20550866425037384, + "step": 6918 + }, + { + "epoch": 1.8373389988049396, + "grad_norm": 1.3067689335737758, + "learning_rate": 3.612062280970763e-07, + "loss": 0.221620112657547, + "step": 6919 + }, + { + "epoch": 1.8376045677864825, + "grad_norm": 1.3556317520839982, + "learning_rate": 3.6003772450130315e-07, + "loss": 0.23098941147327423, + "step": 6920 + }, + { + "epoch": 1.8378701367680255, + "grad_norm": 1.147765516964157, + "learning_rate": 3.588710793765626e-07, + "loss": 0.2119837999343872, + "step": 6921 + }, + { + "epoch": 1.8381357057495684, + "grad_norm": 1.3802709807389941, + "learning_rate": 3.5770629294777146e-07, + "loss": 0.24879229068756104, + "step": 6922 + }, + { + "epoch": 1.8384012747311114, + "grad_norm": 1.3060365647669372, + "learning_rate": 3.565433654394879e-07, + "loss": 0.18895789980888367, + "step": 6923 + }, + { + "epoch": 1.8386668437126543, + "grad_norm": 1.2553378569117732, + "learning_rate": 3.55382297075908e-07, + "loss": 0.23148275911808014, + "step": 6924 + }, + { + "epoch": 1.8389324126941973, + "grad_norm": 1.212120061404488, + "learning_rate": 3.542230880808739e-07, + "loss": 0.20919913053512573, + "step": 6925 + }, + { + "epoch": 1.8391979816757402, + "grad_norm": 1.4703495422250146, + "learning_rate": 3.53065738677868e-07, + "loss": 0.22832845151424408, + "step": 6926 + }, + { + "epoch": 1.8394635506572832, + "grad_norm": 1.2792392305491092, + "learning_rate": 3.519102490900117e-07, + "loss": 0.25866004824638367, + "step": 6927 + }, + { + "epoch": 1.8397291196388261, + "grad_norm": 1.4425441758777668, + "learning_rate": 3.507566195400691e-07, + "loss": 0.23372048139572144, + "step": 6928 + }, + { + "epoch": 1.839994688620369, + "grad_norm": 1.3100572186568338, + "learning_rate": 3.496048502504501e-07, + "loss": 0.2516997158527374, + "step": 6929 + }, + { + "epoch": 1.840260257601912, + "grad_norm": 1.3352189279547024, + "learning_rate": 3.4845494144320036e-07, + "loss": 0.21170508861541748, + "step": 6930 + }, + { + "epoch": 1.840525826583455, + "grad_norm": 1.3970465930645521, + "learning_rate": 3.473068933400081e-07, + "loss": 0.2642953395843506, + "step": 6931 + }, + { + "epoch": 1.840791395564998, + "grad_norm": 1.2429277065520816, + "learning_rate": 3.461607061622041e-07, + "loss": 0.2294994294643402, + "step": 6932 + }, + { + "epoch": 1.8410569645465409, + "grad_norm": 1.3898674163561502, + "learning_rate": 3.450163801307582e-07, + "loss": 0.2554621696472168, + "step": 6933 + }, + { + "epoch": 1.8413225335280838, + "grad_norm": 1.5251200097904765, + "learning_rate": 3.4387391546628733e-07, + "loss": 0.2291295826435089, + "step": 6934 + }, + { + "epoch": 1.8415881025096268, + "grad_norm": 1.2253918775229307, + "learning_rate": 3.4273331238903974e-07, + "loss": 0.1996842920780182, + "step": 6935 + }, + { + "epoch": 1.8418536714911697, + "grad_norm": 1.3974356568527164, + "learning_rate": 3.415945711189128e-07, + "loss": 0.248038187623024, + "step": 6936 + }, + { + "epoch": 1.8421192404727127, + "grad_norm": 1.4224083213114915, + "learning_rate": 3.4045769187544096e-07, + "loss": 0.232235848903656, + "step": 6937 + }, + { + "epoch": 1.8423848094542556, + "grad_norm": 1.2811247103872994, + "learning_rate": 3.3932267487780333e-07, + "loss": 0.2526085376739502, + "step": 6938 + }, + { + "epoch": 1.8426503784357986, + "grad_norm": 1.324059920588895, + "learning_rate": 3.381895203448182e-07, + "loss": 0.22401389479637146, + "step": 6939 + }, + { + "epoch": 1.8429159474173415, + "grad_norm": 1.2904044842651823, + "learning_rate": 3.3705822849494195e-07, + "loss": 0.2509264647960663, + "step": 6940 + }, + { + "epoch": 1.8431815163988845, + "grad_norm": 1.2502849304352568, + "learning_rate": 3.3592879954627564e-07, + "loss": 0.2451169192790985, + "step": 6941 + }, + { + "epoch": 1.8434470853804275, + "grad_norm": 1.2774613485778883, + "learning_rate": 3.3480123371655957e-07, + "loss": 0.2361738532781601, + "step": 6942 + }, + { + "epoch": 1.8437126543619704, + "grad_norm": 1.1823675774441849, + "learning_rate": 3.3367553122317544e-07, + "loss": 0.22336295247077942, + "step": 6943 + }, + { + "epoch": 1.8439782233435134, + "grad_norm": 1.4218109729535482, + "learning_rate": 3.325516922831451e-07, + "loss": 0.22287659347057343, + "step": 6944 + }, + { + "epoch": 1.8442437923250563, + "grad_norm": 1.2819242467045069, + "learning_rate": 3.3142971711312975e-07, + "loss": 0.21845945715904236, + "step": 6945 + }, + { + "epoch": 1.8445093613065993, + "grad_norm": 1.2822597279006254, + "learning_rate": 3.303096059294364e-07, + "loss": 0.2650350332260132, + "step": 6946 + }, + { + "epoch": 1.8447749302881422, + "grad_norm": 1.346661503925149, + "learning_rate": 3.291913589480078e-07, + "loss": 0.21282124519348145, + "step": 6947 + }, + { + "epoch": 1.8450404992696852, + "grad_norm": 1.1254422779054267, + "learning_rate": 3.280749763844293e-07, + "loss": 0.17899346351623535, + "step": 6948 + }, + { + "epoch": 1.8453060682512281, + "grad_norm": 1.3295675928838626, + "learning_rate": 3.269604584539254e-07, + "loss": 0.23462103307247162, + "step": 6949 + }, + { + "epoch": 1.845571637232771, + "grad_norm": 1.2573990354862534, + "learning_rate": 3.2584780537136206e-07, + "loss": 0.20188388228416443, + "step": 6950 + }, + { + "epoch": 1.845837206214314, + "grad_norm": 1.3823133322277716, + "learning_rate": 3.247370173512443e-07, + "loss": 0.2760109305381775, + "step": 6951 + }, + { + "epoch": 1.846102775195857, + "grad_norm": 1.1542508493730164, + "learning_rate": 3.236280946077219e-07, + "loss": 0.20977352559566498, + "step": 6952 + }, + { + "epoch": 1.8463683441774, + "grad_norm": 1.299549634983184, + "learning_rate": 3.225210373545806e-07, + "loss": 0.26468873023986816, + "step": 6953 + }, + { + "epoch": 1.8466339131589429, + "grad_norm": 1.287524526318513, + "learning_rate": 3.214158458052463e-07, + "loss": 0.2362184375524521, + "step": 6954 + }, + { + "epoch": 1.8468994821404858, + "grad_norm": 1.29131597308928, + "learning_rate": 3.2031252017278966e-07, + "loss": 0.21406327188014984, + "step": 6955 + }, + { + "epoch": 1.847165051122029, + "grad_norm": 1.4794600314925854, + "learning_rate": 3.1921106066991835e-07, + "loss": 0.2698758840560913, + "step": 6956 + }, + { + "epoch": 1.847430620103572, + "grad_norm": 1.3029413719135112, + "learning_rate": 3.1811146750898025e-07, + "loss": 0.22954389452934265, + "step": 6957 + }, + { + "epoch": 1.847696189085115, + "grad_norm": 1.149631756175727, + "learning_rate": 3.170137409019636e-07, + "loss": 0.23005755245685577, + "step": 6958 + }, + { + "epoch": 1.8479617580666579, + "grad_norm": 1.270561680049171, + "learning_rate": 3.159178810604968e-07, + "loss": 0.22408893704414368, + "step": 6959 + }, + { + "epoch": 1.8482273270482008, + "grad_norm": 1.1761716687553918, + "learning_rate": 3.14823888195851e-07, + "loss": 0.1983698308467865, + "step": 6960 + }, + { + "epoch": 1.8484928960297438, + "grad_norm": 1.387251984339494, + "learning_rate": 3.137317625189329e-07, + "loss": 0.24643054604530334, + "step": 6961 + }, + { + "epoch": 1.8487584650112867, + "grad_norm": 1.3612119090250128, + "learning_rate": 3.1264150424029083e-07, + "loss": 0.274917870759964, + "step": 6962 + }, + { + "epoch": 1.8490240339928297, + "grad_norm": 1.2836957141365997, + "learning_rate": 3.115531135701155e-07, + "loss": 0.2129468023777008, + "step": 6963 + }, + { + "epoch": 1.8492896029743726, + "grad_norm": 1.3421884287788837, + "learning_rate": 3.1046659071823695e-07, + "loss": 0.24127928912639618, + "step": 6964 + }, + { + "epoch": 1.8495551719559156, + "grad_norm": 1.2737231627436634, + "learning_rate": 3.093819358941208e-07, + "loss": 0.2528054416179657, + "step": 6965 + }, + { + "epoch": 1.8498207409374585, + "grad_norm": 1.253824703575336, + "learning_rate": 3.0829914930687767e-07, + "loss": 0.23623798787593842, + "step": 6966 + }, + { + "epoch": 1.8500863099190015, + "grad_norm": 1.231408637511902, + "learning_rate": 3.0721823116525497e-07, + "loss": 0.20241659879684448, + "step": 6967 + }, + { + "epoch": 1.8503518789005444, + "grad_norm": 1.264350645442844, + "learning_rate": 3.0613918167764156e-07, + "loss": 0.24365916848182678, + "step": 6968 + }, + { + "epoch": 1.8506174478820874, + "grad_norm": 1.311846273217192, + "learning_rate": 3.0506200105206554e-07, + "loss": 0.2550637722015381, + "step": 6969 + }, + { + "epoch": 1.8508830168636303, + "grad_norm": 1.1438212130974086, + "learning_rate": 3.0398668949619515e-07, + "loss": 0.21531938016414642, + "step": 6970 + }, + { + "epoch": 1.8511485858451733, + "grad_norm": 1.3468646282560623, + "learning_rate": 3.029132472173368e-07, + "loss": 0.22749900817871094, + "step": 6971 + }, + { + "epoch": 1.8514141548267162, + "grad_norm": 1.186404759445675, + "learning_rate": 3.018416744224373e-07, + "loss": 0.1826775223016739, + "step": 6972 + }, + { + "epoch": 1.8516797238082592, + "grad_norm": 1.1782373460713542, + "learning_rate": 3.0077197131808344e-07, + "loss": 0.21982814371585846, + "step": 6973 + }, + { + "epoch": 1.8519452927898021, + "grad_norm": 1.2874557997839566, + "learning_rate": 2.997041381105026e-07, + "loss": 0.23515473306179047, + "step": 6974 + }, + { + "epoch": 1.852210861771345, + "grad_norm": 1.2184369208885015, + "learning_rate": 2.9863817500556e-07, + "loss": 0.19620616734027863, + "step": 6975 + }, + { + "epoch": 1.852476430752888, + "grad_norm": 1.208715706835639, + "learning_rate": 2.975740822087603e-07, + "loss": 0.22158116102218628, + "step": 6976 + }, + { + "epoch": 1.852741999734431, + "grad_norm": 1.5176127203291871, + "learning_rate": 2.96511859925247e-07, + "loss": 0.23082244396209717, + "step": 6977 + }, + { + "epoch": 1.853007568715974, + "grad_norm": 1.286088700644728, + "learning_rate": 2.954515083598064e-07, + "loss": 0.22743141651153564, + "step": 6978 + }, + { + "epoch": 1.853273137697517, + "grad_norm": 1.3437900472909596, + "learning_rate": 2.943930277168594e-07, + "loss": 0.2329188883304596, + "step": 6979 + }, + { + "epoch": 1.8535387066790598, + "grad_norm": 1.1892741095151198, + "learning_rate": 2.9333641820047055e-07, + "loss": 0.20360302925109863, + "step": 6980 + }, + { + "epoch": 1.8538042756606028, + "grad_norm": 1.1771915113483071, + "learning_rate": 2.922816800143402e-07, + "loss": 0.1903664767742157, + "step": 6981 + }, + { + "epoch": 1.8540698446421457, + "grad_norm": 1.2252145672801615, + "learning_rate": 2.912288133618102e-07, + "loss": 0.2247854322195053, + "step": 6982 + }, + { + "epoch": 1.8543354136236887, + "grad_norm": 1.305215823982529, + "learning_rate": 2.9017781844586035e-07, + "loss": 0.22693192958831787, + "step": 6983 + }, + { + "epoch": 1.8546009826052319, + "grad_norm": 1.3213552294005186, + "learning_rate": 2.891286954691108e-07, + "loss": 0.23769894242286682, + "step": 6984 + }, + { + "epoch": 1.8548665515867748, + "grad_norm": 1.267542763443237, + "learning_rate": 2.880814446338198e-07, + "loss": 0.23251450061798096, + "step": 6985 + }, + { + "epoch": 1.8551321205683178, + "grad_norm": 1.3253334264213772, + "learning_rate": 2.870360661418847e-07, + "loss": 0.20828741788864136, + "step": 6986 + }, + { + "epoch": 1.8553976895498607, + "grad_norm": 1.2448815733296377, + "learning_rate": 2.859925601948421e-07, + "loss": 0.2324519008398056, + "step": 6987 + }, + { + "epoch": 1.8556632585314037, + "grad_norm": 1.2799176737952995, + "learning_rate": 2.8495092699386774e-07, + "loss": 0.2166297733783722, + "step": 6988 + }, + { + "epoch": 1.8559288275129466, + "grad_norm": 1.416567928880924, + "learning_rate": 2.839111667397765e-07, + "loss": 0.2760158181190491, + "step": 6989 + }, + { + "epoch": 1.8561943964944896, + "grad_norm": 1.1117414218952344, + "learning_rate": 2.8287327963302025e-07, + "loss": 0.2263752520084381, + "step": 6990 + }, + { + "epoch": 1.8564599654760325, + "grad_norm": 1.328135206527719, + "learning_rate": 2.8183726587369455e-07, + "loss": 0.2490656077861786, + "step": 6991 + }, + { + "epoch": 1.8567255344575755, + "grad_norm": 1.4860885268210424, + "learning_rate": 2.808031256615285e-07, + "loss": 0.22495508193969727, + "step": 6992 + }, + { + "epoch": 1.8569911034391184, + "grad_norm": 1.297235121122649, + "learning_rate": 2.7977085919589253e-07, + "loss": 0.2671046853065491, + "step": 6993 + }, + { + "epoch": 1.8572566724206614, + "grad_norm": 1.2050300397617886, + "learning_rate": 2.7874046667579535e-07, + "loss": 0.19782954454421997, + "step": 6994 + }, + { + "epoch": 1.8575222414022043, + "grad_norm": 1.3009259795352104, + "learning_rate": 2.777119482998847e-07, + "loss": 0.24458879232406616, + "step": 6995 + }, + { + "epoch": 1.8577878103837473, + "grad_norm": 1.203325902936209, + "learning_rate": 2.7668530426644637e-07, + "loss": 0.23476794362068176, + "step": 6996 + }, + { + "epoch": 1.8580533793652902, + "grad_norm": 1.3828799415147273, + "learning_rate": 2.7566053477340535e-07, + "loss": 0.2318287342786789, + "step": 6997 + }, + { + "epoch": 1.8583189483468332, + "grad_norm": 1.1075382213650395, + "learning_rate": 2.746376400183259e-07, + "loss": 0.21341973543167114, + "step": 6998 + }, + { + "epoch": 1.8585845173283762, + "grad_norm": 1.3634634009375282, + "learning_rate": 2.7361662019840916e-07, + "loss": 0.25269803404808044, + "step": 6999 + }, + { + "epoch": 1.858850086309919, + "grad_norm": 1.2242004376785176, + "learning_rate": 2.7259747551049653e-07, + "loss": 0.24590039253234863, + "step": 7000 + }, + { + "epoch": 1.859115655291462, + "grad_norm": 1.2116643717780577, + "learning_rate": 2.715802061510664e-07, + "loss": 0.19907096028327942, + "step": 7001 + }, + { + "epoch": 1.859381224273005, + "grad_norm": 1.319285786592131, + "learning_rate": 2.705648123162363e-07, + "loss": 0.24304917454719543, + "step": 7002 + }, + { + "epoch": 1.859646793254548, + "grad_norm": 1.3884525546157216, + "learning_rate": 2.6955129420176193e-07, + "loss": 0.24846915900707245, + "step": 7003 + }, + { + "epoch": 1.859912362236091, + "grad_norm": 1.365283429552511, + "learning_rate": 2.685396520030381e-07, + "loss": 0.21709200739860535, + "step": 7004 + }, + { + "epoch": 1.8601779312176339, + "grad_norm": 1.3687506828870908, + "learning_rate": 2.675298859150977e-07, + "loss": 0.28031325340270996, + "step": 7005 + }, + { + "epoch": 1.8604435001991768, + "grad_norm": 1.1527129171653896, + "learning_rate": 2.6652199613261155e-07, + "loss": 0.20367707312107086, + "step": 7006 + }, + { + "epoch": 1.8607090691807198, + "grad_norm": 1.1875101722790007, + "learning_rate": 2.6551598284988877e-07, + "loss": 0.20737403631210327, + "step": 7007 + }, + { + "epoch": 1.8609746381622627, + "grad_norm": 1.3375926225189751, + "learning_rate": 2.6451184626087646e-07, + "loss": 0.2504046559333801, + "step": 7008 + }, + { + "epoch": 1.8612402071438057, + "grad_norm": 1.3403751507501938, + "learning_rate": 2.635095865591608e-07, + "loss": 0.26347339153289795, + "step": 7009 + }, + { + "epoch": 1.8615057761253486, + "grad_norm": 1.1832867553985462, + "learning_rate": 2.625092039379662e-07, + "loss": 0.2347220480442047, + "step": 7010 + }, + { + "epoch": 1.8617713451068916, + "grad_norm": 1.2487098903864389, + "learning_rate": 2.6151069859015386e-07, + "loss": 0.23565630614757538, + "step": 7011 + }, + { + "epoch": 1.8620369140884345, + "grad_norm": 1.2377624004623402, + "learning_rate": 2.605140707082243e-07, + "loss": 0.21462437510490417, + "step": 7012 + }, + { + "epoch": 1.8623024830699775, + "grad_norm": 1.2992774401284823, + "learning_rate": 2.595193204843149e-07, + "loss": 0.24224728345870972, + "step": 7013 + }, + { + "epoch": 1.8625680520515204, + "grad_norm": 1.3531530893390702, + "learning_rate": 2.5852644811020344e-07, + "loss": 0.24200880527496338, + "step": 7014 + }, + { + "epoch": 1.8628336210330634, + "grad_norm": 1.2331149203562455, + "learning_rate": 2.5753545377730227e-07, + "loss": 0.23315191268920898, + "step": 7015 + }, + { + "epoch": 1.8630991900146063, + "grad_norm": 1.4360061023192454, + "learning_rate": 2.56546337676663e-07, + "loss": 0.31112274527549744, + "step": 7016 + }, + { + "epoch": 1.8633647589961493, + "grad_norm": 1.1775380155652753, + "learning_rate": 2.555590999989754e-07, + "loss": 0.2291945070028305, + "step": 7017 + }, + { + "epoch": 1.8636303279776922, + "grad_norm": 1.3248749602779475, + "learning_rate": 2.5457374093457057e-07, + "loss": 0.2324746549129486, + "step": 7018 + }, + { + "epoch": 1.8638958969592352, + "grad_norm": 1.3333311590100283, + "learning_rate": 2.5359026067341086e-07, + "loss": 0.2585206627845764, + "step": 7019 + }, + { + "epoch": 1.8641614659407781, + "grad_norm": 1.254813387894953, + "learning_rate": 2.5260865940510027e-07, + "loss": 0.22986871004104614, + "step": 7020 + }, + { + "epoch": 1.864427034922321, + "grad_norm": 1.3302473304174876, + "learning_rate": 2.5162893731888074e-07, + "loss": 0.22615428268909454, + "step": 7021 + }, + { + "epoch": 1.864692603903864, + "grad_norm": 1.2311139475810073, + "learning_rate": 2.5065109460363113e-07, + "loss": 0.21324753761291504, + "step": 7022 + }, + { + "epoch": 1.864958172885407, + "grad_norm": 1.2499721276179248, + "learning_rate": 2.4967513144786736e-07, + "loss": 0.2247733324766159, + "step": 7023 + }, + { + "epoch": 1.86522374186695, + "grad_norm": 1.198842298043478, + "learning_rate": 2.4870104803974336e-07, + "loss": 0.22080597281455994, + "step": 7024 + }, + { + "epoch": 1.865489310848493, + "grad_norm": 1.3721040923851937, + "learning_rate": 2.4772884456705224e-07, + "loss": 0.23669888079166412, + "step": 7025 + }, + { + "epoch": 1.8657548798300359, + "grad_norm": 1.2946969495879501, + "learning_rate": 2.4675852121722075e-07, + "loss": 0.2320847064256668, + "step": 7026 + }, + { + "epoch": 1.8660204488115788, + "grad_norm": 1.374404266409337, + "learning_rate": 2.4579007817731925e-07, + "loss": 0.2595662474632263, + "step": 7027 + }, + { + "epoch": 1.8662860177931218, + "grad_norm": 1.2351512812852723, + "learning_rate": 2.4482351563405174e-07, + "loss": 0.22152045369148254, + "step": 7028 + }, + { + "epoch": 1.8665515867746647, + "grad_norm": 1.270416082371449, + "learning_rate": 2.4385883377375683e-07, + "loss": 0.2391948401927948, + "step": 7029 + }, + { + "epoch": 1.8668171557562077, + "grad_norm": 1.3234796115140017, + "learning_rate": 2.428960327824159e-07, + "loss": 0.23117749392986298, + "step": 7030 + }, + { + "epoch": 1.8670827247377506, + "grad_norm": 1.313106749776766, + "learning_rate": 2.41935112845646e-07, + "loss": 0.24019500613212585, + "step": 7031 + }, + { + "epoch": 1.8673482937192936, + "grad_norm": 1.253088890729472, + "learning_rate": 2.4097607414869995e-07, + "loss": 0.19560202956199646, + "step": 7032 + }, + { + "epoch": 1.8676138627008365, + "grad_norm": 1.3625686769003584, + "learning_rate": 2.4001891687647103e-07, + "loss": 0.23110055923461914, + "step": 7033 + }, + { + "epoch": 1.8678794316823795, + "grad_norm": 1.3388200482229684, + "learning_rate": 2.39063641213485e-07, + "loss": 0.2214709371328354, + "step": 7034 + }, + { + "epoch": 1.8681450006639224, + "grad_norm": 1.2700799842548796, + "learning_rate": 2.381102473439101e-07, + "loss": 0.22123369574546814, + "step": 7035 + }, + { + "epoch": 1.8684105696454654, + "grad_norm": 1.4629863869289934, + "learning_rate": 2.371587354515481e-07, + "loss": 0.23984813690185547, + "step": 7036 + }, + { + "epoch": 1.8686761386270083, + "grad_norm": 1.4496870886295976, + "learning_rate": 2.3620910571984124e-07, + "loss": 0.26089030504226685, + "step": 7037 + }, + { + "epoch": 1.8689417076085513, + "grad_norm": 1.2076380290124689, + "learning_rate": 2.3526135833186527e-07, + "loss": 0.2344229370355606, + "step": 7038 + }, + { + "epoch": 1.8692072765900942, + "grad_norm": 1.290620691312973, + "learning_rate": 2.34315493470334e-07, + "loss": 0.24499498307704926, + "step": 7039 + }, + { + "epoch": 1.8694728455716372, + "grad_norm": 1.2975050166282813, + "learning_rate": 2.333715113176005e-07, + "loss": 0.21971477568149567, + "step": 7040 + }, + { + "epoch": 1.8697384145531801, + "grad_norm": 1.2659856510175163, + "learning_rate": 2.3242941205565362e-07, + "loss": 0.2594453990459442, + "step": 7041 + }, + { + "epoch": 1.870003983534723, + "grad_norm": 1.3125676617059407, + "learning_rate": 2.3148919586611806e-07, + "loss": 0.24689960479736328, + "step": 7042 + }, + { + "epoch": 1.870269552516266, + "grad_norm": 1.2165345453138858, + "learning_rate": 2.3055086293025665e-07, + "loss": 0.19972509145736694, + "step": 7043 + }, + { + "epoch": 1.870535121497809, + "grad_norm": 1.2460782677559714, + "learning_rate": 2.2961441342896795e-07, + "loss": 0.2139236032962799, + "step": 7044 + }, + { + "epoch": 1.870800690479352, + "grad_norm": 1.196552292185578, + "learning_rate": 2.286798475427898e-07, + "loss": 0.2251984179019928, + "step": 7045 + }, + { + "epoch": 1.8710662594608949, + "grad_norm": 1.2395291577625112, + "learning_rate": 2.277471654518959e-07, + "loss": 0.24517378211021423, + "step": 7046 + }, + { + "epoch": 1.8713318284424378, + "grad_norm": 1.3048847468612028, + "learning_rate": 2.2681636733609457e-07, + "loss": 0.19115275144577026, + "step": 7047 + }, + { + "epoch": 1.8715973974239808, + "grad_norm": 1.2997607659373802, + "learning_rate": 2.2588745337483454e-07, + "loss": 0.26092633605003357, + "step": 7048 + }, + { + "epoch": 1.8718629664055237, + "grad_norm": 1.2646212726473884, + "learning_rate": 2.2496042374719807e-07, + "loss": 0.18862302601337433, + "step": 7049 + }, + { + "epoch": 1.8721285353870667, + "grad_norm": 1.1602330038245767, + "learning_rate": 2.2403527863190554e-07, + "loss": 0.20728996396064758, + "step": 7050 + }, + { + "epoch": 1.8723941043686096, + "grad_norm": 1.236025812615254, + "learning_rate": 2.231120182073143e-07, + "loss": 0.24244122207164764, + "step": 7051 + }, + { + "epoch": 1.8726596733501526, + "grad_norm": 1.205655043915546, + "learning_rate": 2.2219064265141866e-07, + "loss": 0.18956953287124634, + "step": 7052 + }, + { + "epoch": 1.8729252423316956, + "grad_norm": 1.1159089015267554, + "learning_rate": 2.2127115214184868e-07, + "loss": 0.19873176515102386, + "step": 7053 + }, + { + "epoch": 1.8731908113132385, + "grad_norm": 1.2896839736015335, + "learning_rate": 2.203535468558704e-07, + "loss": 0.23717360198497772, + "step": 7054 + }, + { + "epoch": 1.8734563802947815, + "grad_norm": 1.3203924338573048, + "learning_rate": 2.1943782697038896e-07, + "loss": 0.24051904678344727, + "step": 7055 + }, + { + "epoch": 1.8737219492763244, + "grad_norm": 1.3193670550613668, + "learning_rate": 2.1852399266194312e-07, + "loss": 0.23541691899299622, + "step": 7056 + }, + { + "epoch": 1.8739875182578674, + "grad_norm": 1.3395958296451687, + "learning_rate": 2.1761204410671088e-07, + "loss": 0.22566163539886475, + "step": 7057 + }, + { + "epoch": 1.8742530872394103, + "grad_norm": 1.297432294479727, + "learning_rate": 2.167019814805027e-07, + "loss": 0.25771743059158325, + "step": 7058 + }, + { + "epoch": 1.8745186562209533, + "grad_norm": 1.1482951648622821, + "learning_rate": 2.1579380495876934e-07, + "loss": 0.22624637186527252, + "step": 7059 + }, + { + "epoch": 1.8747842252024962, + "grad_norm": 1.3036126318267591, + "learning_rate": 2.148875147165963e-07, + "loss": 0.24671627581119537, + "step": 7060 + }, + { + "epoch": 1.8750497941840392, + "grad_norm": 1.1983704285109544, + "learning_rate": 2.1398311092870605e-07, + "loss": 0.21607278287410736, + "step": 7061 + }, + { + "epoch": 1.8753153631655821, + "grad_norm": 1.1102939736369823, + "learning_rate": 2.1308059376945689e-07, + "loss": 0.1960655301809311, + "step": 7062 + }, + { + "epoch": 1.875580932147125, + "grad_norm": 1.2816228458436618, + "learning_rate": 2.1217996341284297e-07, + "loss": 0.22005721926689148, + "step": 7063 + }, + { + "epoch": 1.875846501128668, + "grad_norm": 1.2746284533707484, + "learning_rate": 2.1128122003249541e-07, + "loss": 0.21442776918411255, + "step": 7064 + }, + { + "epoch": 1.876112070110211, + "grad_norm": 1.1849768238897622, + "learning_rate": 2.1038436380168114e-07, + "loss": 0.23126785457134247, + "step": 7065 + }, + { + "epoch": 1.876377639091754, + "grad_norm": 1.4246070766583077, + "learning_rate": 2.094893948933041e-07, + "loss": 0.24286629259586334, + "step": 7066 + }, + { + "epoch": 1.8766432080732969, + "grad_norm": 1.3706445020134141, + "learning_rate": 2.0859631347990406e-07, + "loss": 0.25771957635879517, + "step": 7067 + }, + { + "epoch": 1.87690877705484, + "grad_norm": 1.1754559873110961, + "learning_rate": 2.0770511973365436e-07, + "loss": 0.19837790727615356, + "step": 7068 + }, + { + "epoch": 1.877174346036383, + "grad_norm": 1.2372359407501599, + "learning_rate": 2.0681581382636984e-07, + "loss": 0.21209359169006348, + "step": 7069 + }, + { + "epoch": 1.877439915017926, + "grad_norm": 1.9178204608286211, + "learning_rate": 2.0592839592949554e-07, + "loss": 0.26641422510147095, + "step": 7070 + }, + { + "epoch": 1.877705483999469, + "grad_norm": 1.3604176831947503, + "learning_rate": 2.050428662141146e-07, + "loss": 0.21609601378440857, + "step": 7071 + }, + { + "epoch": 1.8779710529810119, + "grad_norm": 1.2861845280896875, + "learning_rate": 2.0415922485095051e-07, + "loss": 0.23642000555992126, + "step": 7072 + }, + { + "epoch": 1.8782366219625548, + "grad_norm": 1.3854568667341272, + "learning_rate": 2.0327747201035587e-07, + "loss": 0.24564675986766815, + "step": 7073 + }, + { + "epoch": 1.8785021909440978, + "grad_norm": 1.229212126818568, + "learning_rate": 2.0239760786232355e-07, + "loss": 0.20001479983329773, + "step": 7074 + }, + { + "epoch": 1.8787677599256407, + "grad_norm": 1.2817747323253132, + "learning_rate": 2.015196325764801e-07, + "loss": 0.2590208649635315, + "step": 7075 + }, + { + "epoch": 1.8790333289071837, + "grad_norm": 1.2462050168824985, + "learning_rate": 2.0064354632208904e-07, + "loss": 0.23298504948616028, + "step": 7076 + }, + { + "epoch": 1.8792988978887266, + "grad_norm": 1.2573573484068483, + "learning_rate": 1.997693492680497e-07, + "loss": 0.22409996390342712, + "step": 7077 + }, + { + "epoch": 1.8795644668702696, + "grad_norm": 1.410723892029772, + "learning_rate": 1.9889704158289724e-07, + "loss": 0.27316784858703613, + "step": 7078 + }, + { + "epoch": 1.8798300358518125, + "grad_norm": 1.2924796650338854, + "learning_rate": 1.980266234348016e-07, + "loss": 0.2271946519613266, + "step": 7079 + }, + { + "epoch": 1.8800956048333555, + "grad_norm": 1.2438429761767338, + "learning_rate": 1.9715809499156858e-07, + "loss": 0.20887964963912964, + "step": 7080 + }, + { + "epoch": 1.8803611738148984, + "grad_norm": 1.2112268618082698, + "learning_rate": 1.9629145642064197e-07, + "loss": 0.23468685150146484, + "step": 7081 + }, + { + "epoch": 1.8806267427964414, + "grad_norm": 1.308865144497765, + "learning_rate": 1.9542670788909813e-07, + "loss": 0.21624556183815002, + "step": 7082 + }, + { + "epoch": 1.8808923117779843, + "grad_norm": 1.1751415989571612, + "learning_rate": 1.9456384956365149e-07, + "loss": 0.22328166663646698, + "step": 7083 + }, + { + "epoch": 1.8811578807595273, + "grad_norm": 1.3508603820961609, + "learning_rate": 1.93702881610649e-07, + "loss": 0.2526431381702423, + "step": 7084 + }, + { + "epoch": 1.8814234497410702, + "grad_norm": 1.3562256445660688, + "learning_rate": 1.9284380419607784e-07, + "loss": 0.23668771982192993, + "step": 7085 + }, + { + "epoch": 1.8816890187226132, + "grad_norm": 1.2668189225170288, + "learning_rate": 1.9198661748555557e-07, + "loss": 0.24710845947265625, + "step": 7086 + }, + { + "epoch": 1.8819545877041561, + "grad_norm": 1.4047256701053605, + "learning_rate": 1.911313216443389e-07, + "loss": 0.22696900367736816, + "step": 7087 + }, + { + "epoch": 1.882220156685699, + "grad_norm": 1.3717447863189725, + "learning_rate": 1.9027791683731922e-07, + "loss": 0.21652163565158844, + "step": 7088 + }, + { + "epoch": 1.882485725667242, + "grad_norm": 1.3189608691767827, + "learning_rate": 1.894264032290205e-07, + "loss": 0.2166716307401657, + "step": 7089 + }, + { + "epoch": 1.882751294648785, + "grad_norm": 1.3746931913110367, + "learning_rate": 1.8857678098360698e-07, + "loss": 0.26200050115585327, + "step": 7090 + }, + { + "epoch": 1.883016863630328, + "grad_norm": 1.2945644704190118, + "learning_rate": 1.8772905026487654e-07, + "loss": 0.2292764037847519, + "step": 7091 + }, + { + "epoch": 1.883282432611871, + "grad_norm": 1.3106590918741248, + "learning_rate": 1.8688321123625842e-07, + "loss": 0.23893016576766968, + "step": 7092 + }, + { + "epoch": 1.8835480015934138, + "grad_norm": 1.2241030970764724, + "learning_rate": 1.860392640608244e-07, + "loss": 0.2509230673313141, + "step": 7093 + }, + { + "epoch": 1.8838135705749568, + "grad_norm": 1.2218686374923997, + "learning_rate": 1.8519720890127434e-07, + "loss": 0.24156486988067627, + "step": 7094 + }, + { + "epoch": 1.8840791395564997, + "grad_norm": 1.2859122561460798, + "learning_rate": 1.843570459199462e-07, + "loss": 0.2120019942522049, + "step": 7095 + }, + { + "epoch": 1.884344708538043, + "grad_norm": 1.6579646138710773, + "learning_rate": 1.835187752788159e-07, + "loss": 0.23400259017944336, + "step": 7096 + }, + { + "epoch": 1.8846102775195859, + "grad_norm": 1.281132346942695, + "learning_rate": 1.8268239713949087e-07, + "loss": 0.20913103222846985, + "step": 7097 + }, + { + "epoch": 1.8848758465011288, + "grad_norm": 1.3381319381686223, + "learning_rate": 1.8184791166321546e-07, + "loss": 0.24468877911567688, + "step": 7098 + }, + { + "epoch": 1.8851414154826718, + "grad_norm": 1.236616212709848, + "learning_rate": 1.8101531901086767e-07, + "loss": 0.2038918137550354, + "step": 7099 + }, + { + "epoch": 1.8854069844642147, + "grad_norm": 1.3201086548941574, + "learning_rate": 1.8018461934296239e-07, + "loss": 0.24191413819789886, + "step": 7100 + }, + { + "epoch": 1.8856725534457577, + "grad_norm": 1.277539269643606, + "learning_rate": 1.793558128196493e-07, + "loss": 0.24394474923610687, + "step": 7101 + }, + { + "epoch": 1.8859381224273006, + "grad_norm": 1.1561225023553612, + "learning_rate": 1.7852889960071063e-07, + "loss": 0.22630709409713745, + "step": 7102 + }, + { + "epoch": 1.8862036914088436, + "grad_norm": 1.5472360212555962, + "learning_rate": 1.7770387984556768e-07, + "loss": 0.23936980962753296, + "step": 7103 + }, + { + "epoch": 1.8864692603903865, + "grad_norm": 1.275471897769737, + "learning_rate": 1.768807537132733e-07, + "loss": 0.24808618426322937, + "step": 7104 + }, + { + "epoch": 1.8867348293719295, + "grad_norm": 1.273035999339445, + "learning_rate": 1.7605952136251603e-07, + "loss": 0.23934635519981384, + "step": 7105 + }, + { + "epoch": 1.8870003983534724, + "grad_norm": 1.189686791776393, + "learning_rate": 1.7524018295162148e-07, + "loss": 0.22107656300067902, + "step": 7106 + }, + { + "epoch": 1.8872659673350154, + "grad_norm": 1.3496800848037154, + "learning_rate": 1.7442273863854553e-07, + "loss": 0.23253028094768524, + "step": 7107 + }, + { + "epoch": 1.8875315363165583, + "grad_norm": 1.3028365552765204, + "learning_rate": 1.7360718858088542e-07, + "loss": 0.2501102387905121, + "step": 7108 + }, + { + "epoch": 1.8877971052981013, + "grad_norm": 1.4057988238229884, + "learning_rate": 1.7279353293586765e-07, + "loss": 0.25537967681884766, + "step": 7109 + }, + { + "epoch": 1.8880626742796442, + "grad_norm": 2.7876746143917033, + "learning_rate": 1.7198177186035447e-07, + "loss": 0.25701045989990234, + "step": 7110 + }, + { + "epoch": 1.8883282432611872, + "grad_norm": 1.1447271563365653, + "learning_rate": 1.7117190551084628e-07, + "loss": 0.2109440565109253, + "step": 7111 + }, + { + "epoch": 1.8885938122427302, + "grad_norm": 1.2454061070152636, + "learning_rate": 1.7036393404347373e-07, + "loss": 0.22767721116542816, + "step": 7112 + }, + { + "epoch": 1.888859381224273, + "grad_norm": 1.1572937395529788, + "learning_rate": 1.6955785761400444e-07, + "loss": 0.1976814568042755, + "step": 7113 + }, + { + "epoch": 1.889124950205816, + "grad_norm": 1.1727224852039306, + "learning_rate": 1.687536763778419e-07, + "loss": 0.21109873056411743, + "step": 7114 + }, + { + "epoch": 1.889390519187359, + "grad_norm": 1.1916227822459606, + "learning_rate": 1.6795139049002095e-07, + "loss": 0.2165786623954773, + "step": 7115 + }, + { + "epoch": 1.889656088168902, + "grad_norm": 1.2917556149315792, + "learning_rate": 1.6715100010521347e-07, + "loss": 0.23962441086769104, + "step": 7116 + }, + { + "epoch": 1.889921657150445, + "grad_norm": 1.2423009900583697, + "learning_rate": 1.6635250537772596e-07, + "loss": 0.23351140320301056, + "step": 7117 + }, + { + "epoch": 1.8901872261319879, + "grad_norm": 1.3034348272306633, + "learning_rate": 1.6555590646149866e-07, + "loss": 0.19999945163726807, + "step": 7118 + }, + { + "epoch": 1.8904527951135308, + "grad_norm": 1.432201467842623, + "learning_rate": 1.647612035101054e-07, + "loss": 0.27142196893692017, + "step": 7119 + }, + { + "epoch": 1.8907183640950738, + "grad_norm": 1.2861780172834696, + "learning_rate": 1.6396839667675691e-07, + "loss": 0.21525685489177704, + "step": 7120 + }, + { + "epoch": 1.8909839330766167, + "grad_norm": 3.2062699859400396, + "learning_rate": 1.631774861142965e-07, + "loss": 0.24305005371570587, + "step": 7121 + }, + { + "epoch": 1.8912495020581597, + "grad_norm": 1.2019998279555377, + "learning_rate": 1.6238847197520113e-07, + "loss": 0.23202842473983765, + "step": 7122 + }, + { + "epoch": 1.8915150710397026, + "grad_norm": 1.4409003412080332, + "learning_rate": 1.6160135441158576e-07, + "loss": 0.24373790621757507, + "step": 7123 + }, + { + "epoch": 1.8917806400212456, + "grad_norm": 1.2360359431057044, + "learning_rate": 1.6081613357519565e-07, + "loss": 0.22774222493171692, + "step": 7124 + }, + { + "epoch": 1.8920462090027885, + "grad_norm": 1.2064368847282083, + "learning_rate": 1.6003280961741196e-07, + "loss": 0.20660057663917542, + "step": 7125 + }, + { + "epoch": 1.8923117779843315, + "grad_norm": 1.3070998228758686, + "learning_rate": 1.5925138268925166e-07, + "loss": 0.23578912019729614, + "step": 7126 + }, + { + "epoch": 1.8925773469658744, + "grad_norm": 1.2737250152668298, + "learning_rate": 1.5847185294136313e-07, + "loss": 0.20852091908454895, + "step": 7127 + }, + { + "epoch": 1.8928429159474174, + "grad_norm": 1.1465883719364975, + "learning_rate": 1.5769422052403172e-07, + "loss": 0.17455898225307465, + "step": 7128 + }, + { + "epoch": 1.8931084849289603, + "grad_norm": 1.5036497092390075, + "learning_rate": 1.5691848558717638e-07, + "loss": 0.29552748799324036, + "step": 7129 + }, + { + "epoch": 1.8933740539105033, + "grad_norm": 1.3009458238394367, + "learning_rate": 1.5614464828034746e-07, + "loss": 0.22972649335861206, + "step": 7130 + }, + { + "epoch": 1.8936396228920462, + "grad_norm": 1.2296689152648304, + "learning_rate": 1.5537270875273348e-07, + "loss": 0.2134108692407608, + "step": 7131 + }, + { + "epoch": 1.8939051918735892, + "grad_norm": 1.4119584533896288, + "learning_rate": 1.546026671531542e-07, + "loss": 0.24145451188087463, + "step": 7132 + }, + { + "epoch": 1.8941707608551321, + "grad_norm": 1.355860353407812, + "learning_rate": 1.5383452363006534e-07, + "loss": 0.2323920726776123, + "step": 7133 + }, + { + "epoch": 1.894436329836675, + "grad_norm": 1.197617700552455, + "learning_rate": 1.5306827833155403e-07, + "loss": 0.20091015100479126, + "step": 7134 + }, + { + "epoch": 1.894701898818218, + "grad_norm": 1.370489911603159, + "learning_rate": 1.523039314053465e-07, + "loss": 0.2451317310333252, + "step": 7135 + }, + { + "epoch": 1.894967467799761, + "grad_norm": 1.2946538259097045, + "learning_rate": 1.5154148299879822e-07, + "loss": 0.22744594514369965, + "step": 7136 + }, + { + "epoch": 1.895233036781304, + "grad_norm": 1.2046527835430252, + "learning_rate": 1.5078093325889943e-07, + "loss": 0.2460673749446869, + "step": 7137 + }, + { + "epoch": 1.895498605762847, + "grad_norm": 1.4172423595206858, + "learning_rate": 1.5002228233227722e-07, + "loss": 0.2524537444114685, + "step": 7138 + }, + { + "epoch": 1.8957641747443899, + "grad_norm": 1.1840127480017744, + "learning_rate": 1.4926553036518798e-07, + "loss": 0.2056279480457306, + "step": 7139 + }, + { + "epoch": 1.8960297437259328, + "grad_norm": 1.2144930845419581, + "learning_rate": 1.485106775035261e-07, + "loss": 0.2656184732913971, + "step": 7140 + }, + { + "epoch": 1.8962953127074758, + "grad_norm": 1.1903286988332102, + "learning_rate": 1.477577238928185e-07, + "loss": 0.2190116047859192, + "step": 7141 + }, + { + "epoch": 1.8965608816890187, + "grad_norm": 1.206151177902952, + "learning_rate": 1.4700666967822574e-07, + "loss": 0.22984017431735992, + "step": 7142 + }, + { + "epoch": 1.8968264506705617, + "grad_norm": 1.1949819121682481, + "learning_rate": 1.462575150045409e-07, + "loss": 0.17947378754615784, + "step": 7143 + }, + { + "epoch": 1.8970920196521046, + "grad_norm": 1.2649423314993642, + "learning_rate": 1.4551026001619395e-07, + "loss": 0.24965715408325195, + "step": 7144 + }, + { + "epoch": 1.8973575886336476, + "grad_norm": 1.236302993447548, + "learning_rate": 1.4476490485724526e-07, + "loss": 0.2337307333946228, + "step": 7145 + }, + { + "epoch": 1.8976231576151905, + "grad_norm": 1.2205039464348546, + "learning_rate": 1.4402144967139098e-07, + "loss": 0.22668538987636566, + "step": 7146 + }, + { + "epoch": 1.8978887265967335, + "grad_norm": 1.350785859399433, + "learning_rate": 1.4327989460196091e-07, + "loss": 0.21934781968593597, + "step": 7147 + }, + { + "epoch": 1.8981542955782764, + "grad_norm": 1.2212959594670445, + "learning_rate": 1.4254023979191844e-07, + "loss": 0.1957930624485016, + "step": 7148 + }, + { + "epoch": 1.8984198645598194, + "grad_norm": 1.1724780894008597, + "learning_rate": 1.4180248538385956e-07, + "loss": 0.22351369261741638, + "step": 7149 + }, + { + "epoch": 1.8986854335413623, + "grad_norm": 1.3930947329130605, + "learning_rate": 1.4106663152001487e-07, + "loss": 0.2603265047073364, + "step": 7150 + }, + { + "epoch": 1.8989510025229053, + "grad_norm": 1.260479860356455, + "learning_rate": 1.4033267834224873e-07, + "loss": 0.2566663324832916, + "step": 7151 + }, + { + "epoch": 1.8992165715044482, + "grad_norm": 1.2799319314175146, + "learning_rate": 1.3960062599205682e-07, + "loss": 0.23130206763744354, + "step": 7152 + }, + { + "epoch": 1.8994821404859912, + "grad_norm": 1.1757231252562024, + "learning_rate": 1.3887047461057179e-07, + "loss": 0.17946425080299377, + "step": 7153 + }, + { + "epoch": 1.8997477094675341, + "grad_norm": 1.2434099546308155, + "learning_rate": 1.3814222433855884e-07, + "loss": 0.23946328461170197, + "step": 7154 + }, + { + "epoch": 1.900013278449077, + "grad_norm": 1.2249367291717066, + "learning_rate": 1.3741587531641566e-07, + "loss": 0.21002715826034546, + "step": 7155 + }, + { + "epoch": 1.90027884743062, + "grad_norm": 1.3062374823275615, + "learning_rate": 1.3669142768417242e-07, + "loss": 0.2121986746788025, + "step": 7156 + }, + { + "epoch": 1.900544416412163, + "grad_norm": 1.373871289837254, + "learning_rate": 1.3596888158149525e-07, + "loss": 0.26400670409202576, + "step": 7157 + }, + { + "epoch": 1.900809985393706, + "grad_norm": 1.1813353744292436, + "learning_rate": 1.3524823714768375e-07, + "loss": 0.18764406442642212, + "step": 7158 + }, + { + "epoch": 1.9010755543752489, + "grad_norm": 1.415975931925435, + "learning_rate": 1.3452949452166686e-07, + "loss": 0.2550342381000519, + "step": 7159 + }, + { + "epoch": 1.9013411233567918, + "grad_norm": 1.304366194966887, + "learning_rate": 1.3381265384201035e-07, + "loss": 0.23188576102256775, + "step": 7160 + }, + { + "epoch": 1.9016066923383348, + "grad_norm": 1.2473914592639561, + "learning_rate": 1.3309771524691372e-07, + "loss": 0.23124513030052185, + "step": 7161 + }, + { + "epoch": 1.9018722613198777, + "grad_norm": 1.2056745011797427, + "learning_rate": 1.323846788742078e-07, + "loss": 0.19941067695617676, + "step": 7162 + }, + { + "epoch": 1.9021378303014207, + "grad_norm": 1.4624998875104938, + "learning_rate": 1.316735448613593e-07, + "loss": 0.22510412335395813, + "step": 7163 + }, + { + "epoch": 1.9024033992829636, + "grad_norm": 1.2448961229015743, + "learning_rate": 1.309643133454641e-07, + "loss": 0.19102326035499573, + "step": 7164 + }, + { + "epoch": 1.9026689682645066, + "grad_norm": 1.2307397875458914, + "learning_rate": 1.3025698446325618e-07, + "loss": 0.20826731622219086, + "step": 7165 + }, + { + "epoch": 1.9029345372460496, + "grad_norm": 1.3483240422328144, + "learning_rate": 1.2955155835109757e-07, + "loss": 0.23238909244537354, + "step": 7166 + }, + { + "epoch": 1.9032001062275925, + "grad_norm": 1.4338552298496805, + "learning_rate": 1.2884803514498833e-07, + "loss": 0.2635011374950409, + "step": 7167 + }, + { + "epoch": 1.9034656752091355, + "grad_norm": 1.1745725675637841, + "learning_rate": 1.281464149805578e-07, + "loss": 0.2073322981595993, + "step": 7168 + }, + { + "epoch": 1.9037312441906784, + "grad_norm": 1.2344038568124596, + "learning_rate": 1.274466979930711e-07, + "loss": 0.22091326117515564, + "step": 7169 + }, + { + "epoch": 1.9039968131722214, + "grad_norm": 1.114689842836081, + "learning_rate": 1.2674888431742472e-07, + "loss": 0.18613001704216003, + "step": 7170 + }, + { + "epoch": 1.9042623821537643, + "grad_norm": 1.2788383965135535, + "learning_rate": 1.2605297408814887e-07, + "loss": 0.2165849655866623, + "step": 7171 + }, + { + "epoch": 1.9045279511353073, + "grad_norm": 1.294203512401496, + "learning_rate": 1.2535896743940844e-07, + "loss": 0.21317794919013977, + "step": 7172 + }, + { + "epoch": 1.9047935201168502, + "grad_norm": 1.47127212987638, + "learning_rate": 1.2466686450499866e-07, + "loss": 0.25221073627471924, + "step": 7173 + }, + { + "epoch": 1.9050590890983932, + "grad_norm": 1.2647474973058104, + "learning_rate": 1.239766654183472e-07, + "loss": 0.21598559617996216, + "step": 7174 + }, + { + "epoch": 1.9053246580799361, + "grad_norm": 1.2635227030316536, + "learning_rate": 1.232883703125187e-07, + "loss": 0.2284495085477829, + "step": 7175 + }, + { + "epoch": 1.905590227061479, + "grad_norm": 1.1825527167306378, + "learning_rate": 1.2260197932020713e-07, + "loss": 0.21899332106113434, + "step": 7176 + }, + { + "epoch": 1.905855796043022, + "grad_norm": 1.3588902485974734, + "learning_rate": 1.2191749257374097e-07, + "loss": 0.2633277177810669, + "step": 7177 + }, + { + "epoch": 1.906121365024565, + "grad_norm": 1.2643904365611611, + "learning_rate": 1.2123491020508137e-07, + "loss": 0.2330140471458435, + "step": 7178 + }, + { + "epoch": 1.906386934006108, + "grad_norm": 1.2757939155257039, + "learning_rate": 1.2055423234582087e-07, + "loss": 0.21859750151634216, + "step": 7179 + }, + { + "epoch": 1.9066525029876509, + "grad_norm": 1.3985563606047093, + "learning_rate": 1.198754591271878e-07, + "loss": 0.252164363861084, + "step": 7180 + }, + { + "epoch": 1.906918071969194, + "grad_norm": 1.4365501399575176, + "learning_rate": 1.191985906800408e-07, + "loss": 0.24968160688877106, + "step": 7181 + }, + { + "epoch": 1.907183640950737, + "grad_norm": 1.199067091736319, + "learning_rate": 1.185236271348722e-07, + "loss": 0.2083423137664795, + "step": 7182 + }, + { + "epoch": 1.90744920993228, + "grad_norm": 1.258208503364781, + "learning_rate": 1.1785056862180789e-07, + "loss": 0.2468394935131073, + "step": 7183 + }, + { + "epoch": 1.907714778913823, + "grad_norm": 1.2908738922715033, + "learning_rate": 1.1717941527060405e-07, + "loss": 0.22417521476745605, + "step": 7184 + }, + { + "epoch": 1.9079803478953659, + "grad_norm": 1.2789853859840312, + "learning_rate": 1.1651016721065167e-07, + "loss": 0.2411842793226242, + "step": 7185 + }, + { + "epoch": 1.9082459168769088, + "grad_norm": 1.311967953603668, + "learning_rate": 1.1584282457097417e-07, + "loss": 0.24650761485099792, + "step": 7186 + }, + { + "epoch": 1.9085114858584518, + "grad_norm": 1.3305923315328496, + "learning_rate": 1.1517738748022755e-07, + "loss": 0.22433717548847198, + "step": 7187 + }, + { + "epoch": 1.9087770548399947, + "grad_norm": 1.2666444248015347, + "learning_rate": 1.145138560667003e-07, + "loss": 0.20867910981178284, + "step": 7188 + }, + { + "epoch": 1.9090426238215377, + "grad_norm": 1.2511449541105855, + "learning_rate": 1.138522304583134e-07, + "loss": 0.21889618039131165, + "step": 7189 + }, + { + "epoch": 1.9093081928030806, + "grad_norm": 1.113107479716362, + "learning_rate": 1.1319251078261928e-07, + "loss": 0.19350749254226685, + "step": 7190 + }, + { + "epoch": 1.9095737617846236, + "grad_norm": 1.183265546980091, + "learning_rate": 1.125346971668051e-07, + "loss": 0.19123657047748566, + "step": 7191 + }, + { + "epoch": 1.9098393307661665, + "grad_norm": 1.2653223306994201, + "learning_rate": 1.118787897376905e-07, + "loss": 0.21433782577514648, + "step": 7192 + }, + { + "epoch": 1.9101048997477095, + "grad_norm": 1.474925382041675, + "learning_rate": 1.1122478862172437e-07, + "loss": 0.2521187663078308, + "step": 7193 + }, + { + "epoch": 1.9103704687292524, + "grad_norm": 1.2835872924926361, + "learning_rate": 1.1057269394499248e-07, + "loss": 0.2141486555337906, + "step": 7194 + }, + { + "epoch": 1.9106360377107954, + "grad_norm": 1.271472683987379, + "learning_rate": 1.0992250583320985e-07, + "loss": 0.22960343956947327, + "step": 7195 + }, + { + "epoch": 1.9109016066923383, + "grad_norm": 1.3433609684783299, + "learning_rate": 1.092742244117262e-07, + "loss": 0.21809744834899902, + "step": 7196 + }, + { + "epoch": 1.9111671756738813, + "grad_norm": 1.248347973820862, + "learning_rate": 1.0862784980552044e-07, + "loss": 0.22418212890625, + "step": 7197 + }, + { + "epoch": 1.9114327446554242, + "grad_norm": 1.2504701200893746, + "learning_rate": 1.0798338213920845e-07, + "loss": 0.22050701081752777, + "step": 7198 + }, + { + "epoch": 1.9116983136369672, + "grad_norm": 1.206849931438756, + "learning_rate": 1.0734082153703418e-07, + "loss": 0.23200345039367676, + "step": 7199 + }, + { + "epoch": 1.9119638826185101, + "grad_norm": 1.1102825382626649, + "learning_rate": 1.0670016812287631e-07, + "loss": 0.18366631865501404, + "step": 7200 + }, + { + "epoch": 1.912229451600053, + "grad_norm": 1.2844567521026582, + "learning_rate": 1.0606142202024605e-07, + "loss": 0.24362193048000336, + "step": 7201 + }, + { + "epoch": 1.912495020581596, + "grad_norm": 1.2822631921528913, + "learning_rate": 1.0542458335228601e-07, + "loss": 0.2216200977563858, + "step": 7202 + }, + { + "epoch": 1.912760589563139, + "grad_norm": 1.0921875359661608, + "learning_rate": 1.0478965224176907e-07, + "loss": 0.20216065645217896, + "step": 7203 + }, + { + "epoch": 1.913026158544682, + "grad_norm": 1.254966671592246, + "learning_rate": 1.041566288111051e-07, + "loss": 0.22054359316825867, + "step": 7204 + }, + { + "epoch": 1.913291727526225, + "grad_norm": 1.3532366246655447, + "learning_rate": 1.0352551318233206e-07, + "loss": 0.21569015085697174, + "step": 7205 + }, + { + "epoch": 1.9135572965077678, + "grad_norm": 1.2826756039782425, + "learning_rate": 1.028963054771226e-07, + "loss": 0.22967267036437988, + "step": 7206 + }, + { + "epoch": 1.9138228654893108, + "grad_norm": 1.3494789006319945, + "learning_rate": 1.0226900581677968e-07, + "loss": 0.2422460913658142, + "step": 7207 + }, + { + "epoch": 1.9140884344708538, + "grad_norm": 1.3606228589652338, + "learning_rate": 1.0164361432223879e-07, + "loss": 0.25891292095184326, + "step": 7208 + }, + { + "epoch": 1.914354003452397, + "grad_norm": 1.3570561855059022, + "learning_rate": 1.0102013111406905e-07, + "loss": 0.26915764808654785, + "step": 7209 + }, + { + "epoch": 1.9146195724339399, + "grad_norm": 1.3889996377213247, + "learning_rate": 1.0039855631247097e-07, + "loss": 0.2268485426902771, + "step": 7210 + }, + { + "epoch": 1.9148851414154828, + "grad_norm": 1.254622691077732, + "learning_rate": 9.977889003727647e-08, + "loss": 0.22551512718200684, + "step": 7211 + }, + { + "epoch": 1.9151507103970258, + "grad_norm": 1.233084698895248, + "learning_rate": 9.91611324079489e-08, + "loss": 0.24224743247032166, + "step": 7212 + }, + { + "epoch": 1.9154162793785687, + "grad_norm": 1.2426176239380708, + "learning_rate": 9.854528354358517e-08, + "loss": 0.19550879299640656, + "step": 7213 + }, + { + "epoch": 1.9156818483601117, + "grad_norm": 1.3449782320604147, + "learning_rate": 9.793134356291478e-08, + "loss": 0.24986523389816284, + "step": 7214 + }, + { + "epoch": 1.9159474173416546, + "grad_norm": 1.3340583070384961, + "learning_rate": 9.731931258429638e-08, + "loss": 0.2565170228481293, + "step": 7215 + }, + { + "epoch": 1.9162129863231976, + "grad_norm": 1.185156912642083, + "learning_rate": 9.670919072572449e-08, + "loss": 0.2166958749294281, + "step": 7216 + }, + { + "epoch": 1.9164785553047405, + "grad_norm": 1.2903999319183896, + "learning_rate": 9.610097810482166e-08, + "loss": 0.2002115249633789, + "step": 7217 + }, + { + "epoch": 1.9167441242862835, + "grad_norm": 1.1589813054229285, + "learning_rate": 9.549467483884412e-08, + "loss": 0.209486186504364, + "step": 7218 + }, + { + "epoch": 1.9170096932678264, + "grad_norm": 1.2748483155423624, + "learning_rate": 9.489028104468056e-08, + "loss": 0.22061321139335632, + "step": 7219 + }, + { + "epoch": 1.9172752622493694, + "grad_norm": 1.3916500275624957, + "learning_rate": 9.428779683885114e-08, + "loss": 0.21880047023296356, + "step": 7220 + }, + { + "epoch": 1.9175408312309123, + "grad_norm": 1.174801358834737, + "learning_rate": 9.368722233750849e-08, + "loss": 0.22674325108528137, + "step": 7221 + }, + { + "epoch": 1.9178064002124553, + "grad_norm": 1.2877078963500264, + "learning_rate": 9.308855765643332e-08, + "loss": 0.22100718319416046, + "step": 7222 + }, + { + "epoch": 1.9180719691939982, + "grad_norm": 1.3291196619762962, + "learning_rate": 9.249180291104553e-08, + "loss": 0.23105254769325256, + "step": 7223 + }, + { + "epoch": 1.9183375381755412, + "grad_norm": 1.2897395451200044, + "learning_rate": 9.189695821638755e-08, + "loss": 0.22483405470848083, + "step": 7224 + }, + { + "epoch": 1.9186031071570842, + "grad_norm": 1.0701399001286365, + "learning_rate": 9.130402368714208e-08, + "loss": 0.1939004510641098, + "step": 7225 + }, + { + "epoch": 1.918868676138627, + "grad_norm": 1.2349263677236755, + "learning_rate": 9.071299943761769e-08, + "loss": 0.21722440421581268, + "step": 7226 + }, + { + "epoch": 1.91913424512017, + "grad_norm": 1.2911544131515666, + "learning_rate": 9.012388558175877e-08, + "loss": 0.24213966727256775, + "step": 7227 + }, + { + "epoch": 1.919399814101713, + "grad_norm": 1.2266941536480729, + "learning_rate": 8.953668223313783e-08, + "loss": 0.2305546998977661, + "step": 7228 + }, + { + "epoch": 1.919665383083256, + "grad_norm": 1.3932840646040938, + "learning_rate": 8.895138950496207e-08, + "loss": 0.2678033709526062, + "step": 7229 + }, + { + "epoch": 1.919930952064799, + "grad_norm": 1.2449965535251106, + "learning_rate": 8.836800751006791e-08, + "loss": 0.2491014301776886, + "step": 7230 + }, + { + "epoch": 1.9201965210463419, + "grad_norm": 1.2551836576043742, + "learning_rate": 8.778653636092537e-08, + "loss": 0.21837326884269714, + "step": 7231 + }, + { + "epoch": 1.9204620900278848, + "grad_norm": 1.2745391136427304, + "learning_rate": 8.72069761696348e-08, + "loss": 0.24149999022483826, + "step": 7232 + }, + { + "epoch": 1.9207276590094278, + "grad_norm": 1.3444140835580012, + "learning_rate": 8.662932704792793e-08, + "loss": 0.2124684453010559, + "step": 7233 + }, + { + "epoch": 1.9209932279909707, + "grad_norm": 1.3660213009765734, + "learning_rate": 8.60535891071712e-08, + "loss": 0.2452150285243988, + "step": 7234 + }, + { + "epoch": 1.9212587969725137, + "grad_norm": 1.2005299446152509, + "learning_rate": 8.547976245835698e-08, + "loss": 0.23598846793174744, + "step": 7235 + }, + { + "epoch": 1.9215243659540566, + "grad_norm": 1.3152974069295431, + "learning_rate": 8.490784721211454e-08, + "loss": 0.2105225920677185, + "step": 7236 + }, + { + "epoch": 1.9217899349355996, + "grad_norm": 1.4424977304862223, + "learning_rate": 8.433784347870122e-08, + "loss": 0.2585388720035553, + "step": 7237 + }, + { + "epoch": 1.9220555039171425, + "grad_norm": 1.2300698994172445, + "learning_rate": 8.376975136800691e-08, + "loss": 0.21703900396823883, + "step": 7238 + }, + { + "epoch": 1.9223210728986855, + "grad_norm": 1.2580366958382383, + "learning_rate": 8.3203570989554e-08, + "loss": 0.22771210968494415, + "step": 7239 + }, + { + "epoch": 1.9225866418802284, + "grad_norm": 1.1645003525207898, + "learning_rate": 8.263930245249408e-08, + "loss": 0.22535575926303864, + "step": 7240 + }, + { + "epoch": 1.9228522108617714, + "grad_norm": 1.1822452042500315, + "learning_rate": 8.207694586561344e-08, + "loss": 0.2052595466375351, + "step": 7241 + }, + { + "epoch": 1.9231177798433143, + "grad_norm": 1.2683012213528768, + "learning_rate": 8.151650133732536e-08, + "loss": 0.19611456990242004, + "step": 7242 + }, + { + "epoch": 1.9233833488248573, + "grad_norm": 1.2762939262923303, + "learning_rate": 8.095796897567787e-08, + "loss": 0.20256826281547546, + "step": 7243 + }, + { + "epoch": 1.9236489178064002, + "grad_norm": 1.5444723931343434, + "learning_rate": 8.040134888835038e-08, + "loss": 0.25462138652801514, + "step": 7244 + }, + { + "epoch": 1.9239144867879432, + "grad_norm": 1.2813246309729553, + "learning_rate": 7.984664118265262e-08, + "loss": 0.27362316846847534, + "step": 7245 + }, + { + "epoch": 1.9241800557694861, + "grad_norm": 1.3526739723939418, + "learning_rate": 7.929384596552459e-08, + "loss": 0.23749098181724548, + "step": 7246 + }, + { + "epoch": 1.924445624751029, + "grad_norm": 1.3016147885306604, + "learning_rate": 7.874296334353882e-08, + "loss": 0.2472018599510193, + "step": 7247 + }, + { + "epoch": 1.924711193732572, + "grad_norm": 1.3451463766339227, + "learning_rate": 7.819399342290034e-08, + "loss": 0.23181989789009094, + "step": 7248 + }, + { + "epoch": 1.924976762714115, + "grad_norm": 1.2415200588572097, + "learning_rate": 7.764693630944231e-08, + "loss": 0.21363665163516998, + "step": 7249 + }, + { + "epoch": 1.925242331695658, + "grad_norm": 1.1849821155034532, + "learning_rate": 7.710179210863144e-08, + "loss": 0.21239221096038818, + "step": 7250 + }, + { + "epoch": 1.925507900677201, + "grad_norm": 1.4494720585200522, + "learning_rate": 7.655856092556591e-08, + "loss": 0.2643742263317108, + "step": 7251 + }, + { + "epoch": 1.9257734696587439, + "grad_norm": 1.251877664981762, + "learning_rate": 7.601724286497414e-08, + "loss": 0.2232428789138794, + "step": 7252 + }, + { + "epoch": 1.9260390386402868, + "grad_norm": 1.313277386530887, + "learning_rate": 7.547783803121489e-08, + "loss": 0.2052377462387085, + "step": 7253 + }, + { + "epoch": 1.9263046076218298, + "grad_norm": 1.2540878413614547, + "learning_rate": 7.494034652827942e-08, + "loss": 0.22194740176200867, + "step": 7254 + }, + { + "epoch": 1.9265701766033727, + "grad_norm": 1.2500554609811554, + "learning_rate": 7.440476845979038e-08, + "loss": 0.22004084289073944, + "step": 7255 + }, + { + "epoch": 1.9268357455849157, + "grad_norm": 1.5480704193409933, + "learning_rate": 7.387110392899965e-08, + "loss": 0.2218078374862671, + "step": 7256 + }, + { + "epoch": 1.9271013145664586, + "grad_norm": 1.3006193889830067, + "learning_rate": 7.33393530387927e-08, + "loss": 0.23272839188575745, + "step": 7257 + }, + { + "epoch": 1.9273668835480016, + "grad_norm": 1.3119971487868216, + "learning_rate": 7.280951589168417e-08, + "loss": 0.23666653037071228, + "step": 7258 + }, + { + "epoch": 1.9276324525295445, + "grad_norm": 1.235294099691234, + "learning_rate": 7.228159258982126e-08, + "loss": 0.21946533024311066, + "step": 7259 + }, + { + "epoch": 1.9278980215110875, + "grad_norm": 1.252328485116134, + "learning_rate": 7.175558323498033e-08, + "loss": 0.22158634662628174, + "step": 7260 + }, + { + "epoch": 1.9281635904926304, + "grad_norm": 1.1330771135999202, + "learning_rate": 7.123148792857026e-08, + "loss": 0.19978654384613037, + "step": 7261 + }, + { + "epoch": 1.9284291594741734, + "grad_norm": 1.2859436875650823, + "learning_rate": 7.070930677163023e-08, + "loss": 0.21197813749313354, + "step": 7262 + }, + { + "epoch": 1.9286947284557163, + "grad_norm": 1.2611518825786316, + "learning_rate": 7.018903986483083e-08, + "loss": 0.22650468349456787, + "step": 7263 + }, + { + "epoch": 1.9289602974372593, + "grad_norm": 1.2701948406662635, + "learning_rate": 6.967068730847293e-08, + "loss": 0.22257481515407562, + "step": 7264 + }, + { + "epoch": 1.9292258664188022, + "grad_norm": 1.3219742856760701, + "learning_rate": 6.915424920248992e-08, + "loss": 0.24899804592132568, + "step": 7265 + }, + { + "epoch": 1.9294914354003452, + "grad_norm": 1.2996576951077934, + "learning_rate": 6.863972564644328e-08, + "loss": 0.250610888004303, + "step": 7266 + }, + { + "epoch": 1.9297570043818881, + "grad_norm": 1.251137163804366, + "learning_rate": 6.81271167395292e-08, + "loss": 0.22786292433738708, + "step": 7267 + }, + { + "epoch": 1.930022573363431, + "grad_norm": 1.2890465128808872, + "learning_rate": 6.761642258056977e-08, + "loss": 0.22816789150238037, + "step": 7268 + }, + { + "epoch": 1.930288142344974, + "grad_norm": 1.3522601458627446, + "learning_rate": 6.7107643268024e-08, + "loss": 0.2589687407016754, + "step": 7269 + }, + { + "epoch": 1.930553711326517, + "grad_norm": 1.1963236616697677, + "learning_rate": 6.660077889997673e-08, + "loss": 0.2281583547592163, + "step": 7270 + }, + { + "epoch": 1.93081928030806, + "grad_norm": 1.3347065729182181, + "learning_rate": 6.60958295741454e-08, + "loss": 0.22833740711212158, + "step": 7271 + }, + { + "epoch": 1.931084849289603, + "grad_norm": 1.1611313283452582, + "learning_rate": 6.559279538787877e-08, + "loss": 0.20720313489437103, + "step": 7272 + }, + { + "epoch": 1.9313504182711458, + "grad_norm": 1.1884544288263172, + "learning_rate": 6.509167643815594e-08, + "loss": 0.17191773653030396, + "step": 7273 + }, + { + "epoch": 1.9316159872526888, + "grad_norm": 1.1354230474675757, + "learning_rate": 6.459247282158632e-08, + "loss": 0.23586943745613098, + "step": 7274 + }, + { + "epoch": 1.9318815562342317, + "grad_norm": 1.3318856895013969, + "learning_rate": 6.409518463441067e-08, + "loss": 0.21353168785572052, + "step": 7275 + }, + { + "epoch": 1.9321471252157747, + "grad_norm": 1.404937308132313, + "learning_rate": 6.359981197250009e-08, + "loss": 0.23148195445537567, + "step": 7276 + }, + { + "epoch": 1.9324126941973176, + "grad_norm": 1.3040478141172254, + "learning_rate": 6.310635493135709e-08, + "loss": 0.2113666534423828, + "step": 7277 + }, + { + "epoch": 1.9326782631788606, + "grad_norm": 1.3399999009479682, + "learning_rate": 6.261481360611332e-08, + "loss": 0.27689510583877563, + "step": 7278 + }, + { + "epoch": 1.9329438321604036, + "grad_norm": 1.2809237898551964, + "learning_rate": 6.2125188091533e-08, + "loss": 0.23746277391910553, + "step": 7279 + }, + { + "epoch": 1.9332094011419465, + "grad_norm": 1.4215326252349767, + "learning_rate": 6.163747848201062e-08, + "loss": 0.23123708367347717, + "step": 7280 + }, + { + "epoch": 1.9334749701234895, + "grad_norm": 1.3095914464878196, + "learning_rate": 6.115168487157097e-08, + "loss": 0.23640167713165283, + "step": 7281 + }, + { + "epoch": 1.9337405391050324, + "grad_norm": 1.3278235730632808, + "learning_rate": 6.066780735386801e-08, + "loss": 0.2259385585784912, + "step": 7282 + }, + { + "epoch": 1.9340061080865754, + "grad_norm": 1.230137664492021, + "learning_rate": 6.018584602218824e-08, + "loss": 0.219761461019516, + "step": 7283 + }, + { + "epoch": 1.9342716770681183, + "grad_norm": 1.43054331413576, + "learning_rate": 5.970580096944733e-08, + "loss": 0.24411989748477936, + "step": 7284 + }, + { + "epoch": 1.9345372460496613, + "grad_norm": 1.196712051616964, + "learning_rate": 5.922767228819459e-08, + "loss": 0.232415571808815, + "step": 7285 + }, + { + "epoch": 1.9348028150312042, + "grad_norm": 1.341424963494065, + "learning_rate": 5.875146007060517e-08, + "loss": 0.25938165187835693, + "step": 7286 + }, + { + "epoch": 1.9350683840127472, + "grad_norm": 1.253589726996753, + "learning_rate": 5.827716440848785e-08, + "loss": 0.22138425707817078, + "step": 7287 + }, + { + "epoch": 1.9353339529942901, + "grad_norm": 1.12038038288381, + "learning_rate": 5.7804785393282825e-08, + "loss": 0.19724398851394653, + "step": 7288 + }, + { + "epoch": 1.935599521975833, + "grad_norm": 1.4840167690508577, + "learning_rate": 5.7334323116056136e-08, + "loss": 0.25307583808898926, + "step": 7289 + }, + { + "epoch": 1.935865090957376, + "grad_norm": 1.2525903433235852, + "learning_rate": 5.686577766751078e-08, + "loss": 0.2436421811580658, + "step": 7290 + }, + { + "epoch": 1.936130659938919, + "grad_norm": 1.2518328182394873, + "learning_rate": 5.6399149137973394e-08, + "loss": 0.2164984941482544, + "step": 7291 + }, + { + "epoch": 1.936396228920462, + "grad_norm": 1.2277499731042363, + "learning_rate": 5.5934437617407576e-08, + "loss": 0.22526800632476807, + "step": 7292 + }, + { + "epoch": 1.936661797902005, + "grad_norm": 2.195756796154145, + "learning_rate": 5.547164319540277e-08, + "loss": 0.27787747979164124, + "step": 7293 + }, + { + "epoch": 1.936927366883548, + "grad_norm": 1.2647979578451993, + "learning_rate": 5.5010765961179825e-08, + "loss": 0.2188001275062561, + "step": 7294 + }, + { + "epoch": 1.937192935865091, + "grad_norm": 1.2454775538056309, + "learning_rate": 5.4551806003591e-08, + "loss": 0.22620335221290588, + "step": 7295 + }, + { + "epoch": 1.937458504846634, + "grad_norm": 1.186081247005514, + "learning_rate": 5.409476341111775e-08, + "loss": 0.20357783138751984, + "step": 7296 + }, + { + "epoch": 1.937724073828177, + "grad_norm": 1.2316030990526627, + "learning_rate": 5.3639638271872906e-08, + "loss": 0.22717830538749695, + "step": 7297 + }, + { + "epoch": 1.9379896428097199, + "grad_norm": 1.1600371116406252, + "learning_rate": 5.318643067360074e-08, + "loss": 0.20139163732528687, + "step": 7298 + }, + { + "epoch": 1.9382552117912628, + "grad_norm": 1.3377291184643103, + "learning_rate": 5.273514070367247e-08, + "loss": 0.2620807886123657, + "step": 7299 + }, + { + "epoch": 1.9385207807728058, + "grad_norm": 1.2240680803779018, + "learning_rate": 5.2285768449091834e-08, + "loss": 0.2102596014738083, + "step": 7300 + }, + { + "epoch": 1.9387863497543487, + "grad_norm": 1.3057613284367482, + "learning_rate": 5.183831399649175e-08, + "loss": 0.2105238288640976, + "step": 7301 + }, + { + "epoch": 1.9390519187358917, + "grad_norm": 1.2241670740951547, + "learning_rate": 5.1392777432138773e-08, + "loss": 0.22178848087787628, + "step": 7302 + }, + { + "epoch": 1.9393174877174346, + "grad_norm": 1.3648564311332518, + "learning_rate": 5.094915884192419e-08, + "loss": 0.23375345766544342, + "step": 7303 + }, + { + "epoch": 1.9395830566989776, + "grad_norm": 1.3411332724549108, + "learning_rate": 5.050745831137405e-08, + "loss": 0.22709332406520844, + "step": 7304 + }, + { + "epoch": 1.9398486256805205, + "grad_norm": 1.270429998105922, + "learning_rate": 5.0067675925642437e-08, + "loss": 0.2312362790107727, + "step": 7305 + }, + { + "epoch": 1.9401141946620635, + "grad_norm": 1.159162680689607, + "learning_rate": 4.962981176951376e-08, + "loss": 0.2014419138431549, + "step": 7306 + }, + { + "epoch": 1.9403797636436064, + "grad_norm": 1.4294147842238243, + "learning_rate": 4.9193865927404936e-08, + "loss": 0.23700466752052307, + "step": 7307 + }, + { + "epoch": 1.9406453326251494, + "grad_norm": 1.3814639969092575, + "learning_rate": 4.8759838483358745e-08, + "loss": 0.23362770676612854, + "step": 7308 + }, + { + "epoch": 1.9409109016066923, + "grad_norm": 1.4217349736822034, + "learning_rate": 4.832772952105269e-08, + "loss": 0.26057323813438416, + "step": 7309 + }, + { + "epoch": 1.9411764705882353, + "grad_norm": 1.1693504727058668, + "learning_rate": 4.789753912379014e-08, + "loss": 0.20954950153827667, + "step": 7310 + }, + { + "epoch": 1.9414420395697782, + "grad_norm": 1.1532528532836688, + "learning_rate": 4.746926737450919e-08, + "loss": 0.2100827842950821, + "step": 7311 + }, + { + "epoch": 1.9417076085513212, + "grad_norm": 1.2509560196931713, + "learning_rate": 4.7042914355773795e-08, + "loss": 0.216691792011261, + "step": 7312 + }, + { + "epoch": 1.9419731775328641, + "grad_norm": 1.2086430330598397, + "learning_rate": 4.6618480149780434e-08, + "loss": 0.22815749049186707, + "step": 7313 + }, + { + "epoch": 1.942238746514407, + "grad_norm": 1.3440658280324072, + "learning_rate": 4.6195964838353646e-08, + "loss": 0.23365731537342072, + "step": 7314 + }, + { + "epoch": 1.94250431549595, + "grad_norm": 1.5301363693806977, + "learning_rate": 4.577536850295161e-08, + "loss": 0.2112172693014145, + "step": 7315 + }, + { + "epoch": 1.942769884477493, + "grad_norm": 1.1945701714854287, + "learning_rate": 4.5356691224659466e-08, + "loss": 0.21821950376033783, + "step": 7316 + }, + { + "epoch": 1.943035453459036, + "grad_norm": 1.1491339078592526, + "learning_rate": 4.4939933084192646e-08, + "loss": 0.2374412566423416, + "step": 7317 + }, + { + "epoch": 1.943301022440579, + "grad_norm": 1.3549046355713708, + "learning_rate": 4.4525094161897987e-08, + "loss": 0.2483779489994049, + "step": 7318 + }, + { + "epoch": 1.9435665914221218, + "grad_norm": 1.327945477663327, + "learning_rate": 4.411217453775152e-08, + "loss": 0.23641882836818695, + "step": 7319 + }, + { + "epoch": 1.9438321604036648, + "grad_norm": 1.3586245026219714, + "learning_rate": 4.370117429135956e-08, + "loss": 0.24779492616653442, + "step": 7320 + }, + { + "epoch": 1.944097729385208, + "grad_norm": 1.1641395539357577, + "learning_rate": 4.329209350195651e-08, + "loss": 0.20288071036338806, + "step": 7321 + }, + { + "epoch": 1.944363298366751, + "grad_norm": 1.2676649817410126, + "learning_rate": 4.288493224840928e-08, + "loss": 0.24286144971847534, + "step": 7322 + }, + { + "epoch": 1.9446288673482939, + "grad_norm": 1.3164985028745375, + "learning_rate": 4.2479690609213976e-08, + "loss": 0.22825902700424194, + "step": 7323 + }, + { + "epoch": 1.9448944363298368, + "grad_norm": 1.255280762331411, + "learning_rate": 4.207636866249587e-08, + "loss": 0.22563335299491882, + "step": 7324 + }, + { + "epoch": 1.9451600053113798, + "grad_norm": 1.2990544857906836, + "learning_rate": 4.167496648601166e-08, + "loss": 0.22853273153305054, + "step": 7325 + }, + { + "epoch": 1.9454255742929227, + "grad_norm": 1.1281442356079434, + "learning_rate": 4.1275484157147216e-08, + "loss": 0.20790672302246094, + "step": 7326 + }, + { + "epoch": 1.9456911432744657, + "grad_norm": 1.1980029703513235, + "learning_rate": 4.087792175291649e-08, + "loss": 0.2165423035621643, + "step": 7327 + }, + { + "epoch": 1.9459567122560086, + "grad_norm": 1.3858946395294593, + "learning_rate": 4.048227934996485e-08, + "loss": 0.2605394721031189, + "step": 7328 + }, + { + "epoch": 1.9462222812375516, + "grad_norm": 1.280554987273632, + "learning_rate": 4.008855702456904e-08, + "loss": 0.22624900937080383, + "step": 7329 + }, + { + "epoch": 1.9464878502190945, + "grad_norm": 1.1967949808184344, + "learning_rate": 3.9696754852632804e-08, + "loss": 0.23086196184158325, + "step": 7330 + }, + { + "epoch": 1.9467534192006375, + "grad_norm": 1.4330145211347993, + "learning_rate": 3.9306872909691265e-08, + "loss": 0.24633410573005676, + "step": 7331 + }, + { + "epoch": 1.9470189881821804, + "grad_norm": 2.2568432653955894, + "learning_rate": 3.8918911270908745e-08, + "loss": 0.2535535395145416, + "step": 7332 + }, + { + "epoch": 1.9472845571637234, + "grad_norm": 1.3555855555438505, + "learning_rate": 3.853287001108097e-08, + "loss": 0.23904260993003845, + "step": 7333 + }, + { + "epoch": 1.9475501261452663, + "grad_norm": 1.3963340527453718, + "learning_rate": 3.814874920463063e-08, + "loss": 0.22525179386138916, + "step": 7334 + }, + { + "epoch": 1.9478156951268093, + "grad_norm": 1.415360473918547, + "learning_rate": 3.776654892561293e-08, + "loss": 0.21139883995056152, + "step": 7335 + }, + { + "epoch": 1.9480812641083523, + "grad_norm": 1.2272269269066283, + "learning_rate": 3.738626924771005e-08, + "loss": 0.21939310431480408, + "step": 7336 + }, + { + "epoch": 1.9483468330898952, + "grad_norm": 1.1845473795192814, + "learning_rate": 3.7007910244236664e-08, + "loss": 0.22852283716201782, + "step": 7337 + }, + { + "epoch": 1.9486124020714382, + "grad_norm": 1.2529721413425112, + "learning_rate": 3.663147198813666e-08, + "loss": 0.20769211649894714, + "step": 7338 + }, + { + "epoch": 1.948877971052981, + "grad_norm": 1.216093250313145, + "learning_rate": 3.625695455198086e-08, + "loss": 0.21721890568733215, + "step": 7339 + }, + { + "epoch": 1.949143540034524, + "grad_norm": 1.261493312403511, + "learning_rate": 3.588435800797263e-08, + "loss": 0.24236848950386047, + "step": 7340 + }, + { + "epoch": 1.949409109016067, + "grad_norm": 1.21142050375974, + "learning_rate": 3.5513682427944505e-08, + "loss": 0.2300192266702652, + "step": 7341 + }, + { + "epoch": 1.94967467799761, + "grad_norm": 1.1850825722481098, + "learning_rate": 3.5144927883358215e-08, + "loss": 0.21636728942394257, + "step": 7342 + }, + { + "epoch": 1.949940246979153, + "grad_norm": 1.3000939007920165, + "learning_rate": 3.477809444530578e-08, + "loss": 0.25367966294288635, + "step": 7343 + }, + { + "epoch": 1.9502058159606959, + "grad_norm": 1.4245768388392126, + "learning_rate": 3.4413182184507285e-08, + "loss": 0.24514247477054596, + "step": 7344 + }, + { + "epoch": 1.9504713849422388, + "grad_norm": 1.1048557155163508, + "learning_rate": 3.405019117131425e-08, + "loss": 0.18460404872894287, + "step": 7345 + }, + { + "epoch": 1.9507369539237818, + "grad_norm": 1.275062396510646, + "learning_rate": 3.3689121475706244e-08, + "loss": 0.2096845805644989, + "step": 7346 + }, + { + "epoch": 1.9510025229053247, + "grad_norm": 1.2314050158221594, + "learning_rate": 3.332997316729536e-08, + "loss": 0.22435057163238525, + "step": 7347 + }, + { + "epoch": 1.9512680918868677, + "grad_norm": 1.208912476805739, + "learning_rate": 3.2972746315318436e-08, + "loss": 0.20798128843307495, + "step": 7348 + }, + { + "epoch": 1.9515336608684106, + "grad_norm": 1.2922181556866412, + "learning_rate": 3.2617440988645945e-08, + "loss": 0.23958316445350647, + "step": 7349 + }, + { + "epoch": 1.9517992298499536, + "grad_norm": 1.3799363972113297, + "learning_rate": 3.2264057255777525e-08, + "loss": 0.21934574842453003, + "step": 7350 + }, + { + "epoch": 1.9520647988314965, + "grad_norm": 1.2014453671941887, + "learning_rate": 3.1912595184839804e-08, + "loss": 0.24321375787258148, + "step": 7351 + }, + { + "epoch": 1.9523303678130395, + "grad_norm": 1.1661737247347086, + "learning_rate": 3.156305484359079e-08, + "loss": 0.20932736992835999, + "step": 7352 + }, + { + "epoch": 1.9525959367945824, + "grad_norm": 1.2983329607047998, + "learning_rate": 3.12154362994177e-08, + "loss": 0.19824840128421783, + "step": 7353 + }, + { + "epoch": 1.9528615057761254, + "grad_norm": 1.3128795915591134, + "learning_rate": 3.0869739619338034e-08, + "loss": 0.212745800614357, + "step": 7354 + }, + { + "epoch": 1.9531270747576683, + "grad_norm": 1.247129470001585, + "learning_rate": 3.0525964869997374e-08, + "loss": 0.23044779896736145, + "step": 7355 + }, + { + "epoch": 1.9533926437392113, + "grad_norm": 1.2323689907378315, + "learning_rate": 3.018411211767158e-08, + "loss": 0.2237459123134613, + "step": 7356 + }, + { + "epoch": 1.9536582127207542, + "grad_norm": 1.3228713238231502, + "learning_rate": 2.984418142826684e-08, + "loss": 0.2592429518699646, + "step": 7357 + }, + { + "epoch": 1.9539237817022972, + "grad_norm": 1.1444806738907807, + "learning_rate": 2.9506172867315163e-08, + "loss": 0.17559123039245605, + "step": 7358 + }, + { + "epoch": 1.9541893506838401, + "grad_norm": 1.287127142439038, + "learning_rate": 2.917008649998332e-08, + "loss": 0.24143017828464508, + "step": 7359 + }, + { + "epoch": 1.954454919665383, + "grad_norm": 1.310526275865734, + "learning_rate": 2.883592239106392e-08, + "loss": 0.23560799658298492, + "step": 7360 + }, + { + "epoch": 1.954720488646926, + "grad_norm": 1.357586181070064, + "learning_rate": 2.8503680604979878e-08, + "loss": 0.2456119805574417, + "step": 7361 + }, + { + "epoch": 1.954986057628469, + "grad_norm": 1.2143945666113656, + "learning_rate": 2.817336120578329e-08, + "loss": 0.21878069639205933, + "step": 7362 + }, + { + "epoch": 1.955251626610012, + "grad_norm": 1.2288786099560105, + "learning_rate": 2.7844964257155438e-08, + "loss": 0.20496608316898346, + "step": 7363 + }, + { + "epoch": 1.955517195591555, + "grad_norm": 1.2067776880816419, + "learning_rate": 2.7518489822407902e-08, + "loss": 0.23219498991966248, + "step": 7364 + }, + { + "epoch": 1.9557827645730979, + "grad_norm": 1.3499865013336032, + "learning_rate": 2.7193937964481442e-08, + "loss": 0.2284272015094757, + "step": 7365 + }, + { + "epoch": 1.9560483335546408, + "grad_norm": 1.3177047034961433, + "learning_rate": 2.68713087459449e-08, + "loss": 0.22303974628448486, + "step": 7366 + }, + { + "epoch": 1.9563139025361838, + "grad_norm": 1.337791009624748, + "learning_rate": 2.655060222899741e-08, + "loss": 0.22489243745803833, + "step": 7367 + }, + { + "epoch": 1.9565794715177267, + "grad_norm": 1.2719472133739602, + "learning_rate": 2.6231818475468407e-08, + "loss": 0.27986854314804077, + "step": 7368 + }, + { + "epoch": 1.9568450404992697, + "grad_norm": 1.3884495118427658, + "learning_rate": 2.591495754681539e-08, + "loss": 0.29321208596229553, + "step": 7369 + }, + { + "epoch": 1.9571106094808126, + "grad_norm": 1.3942541242432065, + "learning_rate": 2.5600019504125053e-08, + "loss": 0.2560982406139374, + "step": 7370 + }, + { + "epoch": 1.9573761784623556, + "grad_norm": 1.4283472016053, + "learning_rate": 2.528700440811438e-08, + "loss": 0.264164537191391, + "step": 7371 + }, + { + "epoch": 1.9576417474438985, + "grad_norm": 1.1832183058517125, + "learning_rate": 2.4975912319127326e-08, + "loss": 0.2135474979877472, + "step": 7372 + }, + { + "epoch": 1.9579073164254415, + "grad_norm": 1.265205421311282, + "learning_rate": 2.466674329714036e-08, + "loss": 0.2100939154624939, + "step": 7373 + }, + { + "epoch": 1.9581728854069844, + "grad_norm": 1.395586955333931, + "learning_rate": 2.4359497401758026e-08, + "loss": 0.23327934741973877, + "step": 7374 + }, + { + "epoch": 1.9584384543885274, + "grad_norm": 1.0722904974981595, + "learning_rate": 2.405417469221183e-08, + "loss": 0.18830639123916626, + "step": 7375 + }, + { + "epoch": 1.9587040233700703, + "grad_norm": 1.284092871282835, + "learning_rate": 2.3750775227364686e-08, + "loss": 0.2558823227882385, + "step": 7376 + }, + { + "epoch": 1.9589695923516133, + "grad_norm": 1.2598399224501151, + "learning_rate": 2.3449299065710917e-08, + "loss": 0.24241580069065094, + "step": 7377 + }, + { + "epoch": 1.9592351613331562, + "grad_norm": 1.1684337819721369, + "learning_rate": 2.3149746265368478e-08, + "loss": 0.21678534150123596, + "step": 7378 + }, + { + "epoch": 1.9595007303146992, + "grad_norm": 1.2804084693654512, + "learning_rate": 2.2852116884088947e-08, + "loss": 0.20956794917583466, + "step": 7379 + }, + { + "epoch": 1.9597662992962421, + "grad_norm": 1.2682321373225172, + "learning_rate": 2.2556410979253095e-08, + "loss": 0.2185555249452591, + "step": 7380 + }, + { + "epoch": 1.960031868277785, + "grad_norm": 1.3369178147645102, + "learning_rate": 2.226262860786643e-08, + "loss": 0.21802933514118195, + "step": 7381 + }, + { + "epoch": 1.960297437259328, + "grad_norm": 1.4565773631347612, + "learning_rate": 2.1970769826570317e-08, + "loss": 0.22842684388160706, + "step": 7382 + }, + { + "epoch": 1.960563006240871, + "grad_norm": 1.2737807469252465, + "learning_rate": 2.1680834691628627e-08, + "loss": 0.23380814492702484, + "step": 7383 + }, + { + "epoch": 1.960828575222414, + "grad_norm": 1.311531421948895, + "learning_rate": 2.1392823258938877e-08, + "loss": 0.23476335406303406, + "step": 7384 + }, + { + "epoch": 1.961094144203957, + "grad_norm": 1.2100451325455786, + "learning_rate": 2.110673558402554e-08, + "loss": 0.19657662510871887, + "step": 7385 + }, + { + "epoch": 1.9613597131854998, + "grad_norm": 1.191542044024077, + "learning_rate": 2.0822571722044494e-08, + "loss": 0.1724000722169876, + "step": 7386 + }, + { + "epoch": 1.9616252821670428, + "grad_norm": 1.3535695538712786, + "learning_rate": 2.0540331727777475e-08, + "loss": 0.22960031032562256, + "step": 7387 + }, + { + "epoch": 1.9618908511485857, + "grad_norm": 1.4028518726902017, + "learning_rate": 2.0260015655637623e-08, + "loss": 0.2601638436317444, + "step": 7388 + }, + { + "epoch": 1.9621564201301287, + "grad_norm": 1.3907771240802078, + "learning_rate": 1.998162355966726e-08, + "loss": 0.2562445402145386, + "step": 7389 + }, + { + "epoch": 1.9624219891116716, + "grad_norm": 1.1881922077977833, + "learning_rate": 1.9705155493535688e-08, + "loss": 0.20073221623897552, + "step": 7390 + }, + { + "epoch": 1.9626875580932146, + "grad_norm": 1.2076860773847395, + "learning_rate": 1.9430611510544707e-08, + "loss": 0.18454071879386902, + "step": 7391 + }, + { + "epoch": 1.9629531270747576, + "grad_norm": 1.1878203901407238, + "learning_rate": 1.915799166362087e-08, + "loss": 0.18515023589134216, + "step": 7392 + }, + { + "epoch": 1.9632186960563005, + "grad_norm": 1.3323308983960227, + "learning_rate": 1.8887296005323242e-08, + "loss": 0.25658512115478516, + "step": 7393 + }, + { + "epoch": 1.9634842650378435, + "grad_norm": 1.4122913637661163, + "learning_rate": 1.861852458783897e-08, + "loss": 0.2219933569431305, + "step": 7394 + }, + { + "epoch": 1.9637498340193864, + "grad_norm": 1.3005286775146463, + "learning_rate": 1.8351677462983276e-08, + "loss": 0.24949616193771362, + "step": 7395 + }, + { + "epoch": 1.9640154030009294, + "grad_norm": 1.4026906711741571, + "learning_rate": 1.808675468220167e-08, + "loss": 0.24348726868629456, + "step": 7396 + }, + { + "epoch": 1.9642809719824723, + "grad_norm": 1.3848607909391346, + "learning_rate": 1.782375629656885e-08, + "loss": 0.2329033762216568, + "step": 7397 + }, + { + "epoch": 1.9645465409640153, + "grad_norm": 1.2075544796662319, + "learning_rate": 1.7562682356786488e-08, + "loss": 0.22265426814556122, + "step": 7398 + }, + { + "epoch": 1.9648121099455582, + "grad_norm": 1.2895787739524316, + "learning_rate": 1.730353291318654e-08, + "loss": 0.24438990652561188, + "step": 7399 + }, + { + "epoch": 1.9650776789271012, + "grad_norm": 1.3518107746112518, + "learning_rate": 1.704630801573015e-08, + "loss": 0.2632136642932892, + "step": 7400 + }, + { + "epoch": 1.9653432479086441, + "grad_norm": 1.3377019916165274, + "learning_rate": 1.6791007714008766e-08, + "loss": 0.22230927646160126, + "step": 7401 + }, + { + "epoch": 1.965608816890187, + "grad_norm": 1.3577982430958546, + "learning_rate": 1.653763205723968e-08, + "loss": 0.26317098736763, + "step": 7402 + }, + { + "epoch": 1.96587438587173, + "grad_norm": 1.3261620865973216, + "learning_rate": 1.628618109427049e-08, + "loss": 0.23205846548080444, + "step": 7403 + }, + { + "epoch": 1.966139954853273, + "grad_norm": 1.1507090645553337, + "learning_rate": 1.6036654873579084e-08, + "loss": 0.202583909034729, + "step": 7404 + }, + { + "epoch": 1.966405523834816, + "grad_norm": 1.3959078486467311, + "learning_rate": 1.5789053443270308e-08, + "loss": 0.2579672038555145, + "step": 7405 + }, + { + "epoch": 1.966671092816359, + "grad_norm": 1.4293268160842907, + "learning_rate": 1.5543376851080428e-08, + "loss": 0.27483606338500977, + "step": 7406 + }, + { + "epoch": 1.966936661797902, + "grad_norm": 1.6466914863601023, + "learning_rate": 1.5299625144370444e-08, + "loss": 0.22510311007499695, + "step": 7407 + }, + { + "epoch": 1.967202230779445, + "grad_norm": 1.3926470224592478, + "learning_rate": 1.505779837013499e-08, + "loss": 0.24941131472587585, + "step": 7408 + }, + { + "epoch": 1.967467799760988, + "grad_norm": 1.316826202799614, + "learning_rate": 1.481789657499344e-08, + "loss": 0.22301170229911804, + "step": 7409 + }, + { + "epoch": 1.967733368742531, + "grad_norm": 1.4513024231529628, + "learning_rate": 1.4579919805198795e-08, + "loss": 0.23045194149017334, + "step": 7410 + }, + { + "epoch": 1.9679989377240739, + "grad_norm": 1.2632313332378347, + "learning_rate": 1.4343868106627689e-08, + "loss": 0.25892990827560425, + "step": 7411 + }, + { + "epoch": 1.9682645067056168, + "grad_norm": 1.316940344896203, + "learning_rate": 1.4109741524788167e-08, + "loss": 0.23086567223072052, + "step": 7412 + }, + { + "epoch": 1.9685300756871598, + "grad_norm": 1.2838593122102535, + "learning_rate": 1.3877540104818566e-08, + "loss": 0.2514735460281372, + "step": 7413 + }, + { + "epoch": 1.9687956446687027, + "grad_norm": 1.2787980812943278, + "learning_rate": 1.3647263891484187e-08, + "loss": 0.21824213862419128, + "step": 7414 + }, + { + "epoch": 1.9690612136502457, + "grad_norm": 1.3351479110439386, + "learning_rate": 1.3418912929178407e-08, + "loss": 0.2262609452009201, + "step": 7415 + }, + { + "epoch": 1.9693267826317886, + "grad_norm": 1.2373165426791106, + "learning_rate": 1.3192487261926013e-08, + "loss": 0.23119492828845978, + "step": 7416 + }, + { + "epoch": 1.9695923516133316, + "grad_norm": 1.2213219567044962, + "learning_rate": 1.2967986933378751e-08, + "loss": 0.20173534750938416, + "step": 7417 + }, + { + "epoch": 1.9698579205948745, + "grad_norm": 1.3102471335629409, + "learning_rate": 1.2745411986816447e-08, + "loss": 0.2212662547826767, + "step": 7418 + }, + { + "epoch": 1.9701234895764175, + "grad_norm": 1.2461352597734543, + "learning_rate": 1.2524762465151442e-08, + "loss": 0.21990706026554108, + "step": 7419 + }, + { + "epoch": 1.9703890585579604, + "grad_norm": 1.2130065240866306, + "learning_rate": 1.2306038410919707e-08, + "loss": 0.18648189306259155, + "step": 7420 + }, + { + "epoch": 1.9706546275395034, + "grad_norm": 1.334350070832243, + "learning_rate": 1.2089239866289737e-08, + "loss": 0.23273484408855438, + "step": 7421 + }, + { + "epoch": 1.9709201965210463, + "grad_norm": 1.3083344252475524, + "learning_rate": 1.1874366873059206e-08, + "loss": 0.21514324843883514, + "step": 7422 + }, + { + "epoch": 1.9711857655025893, + "grad_norm": 1.2628839077455776, + "learning_rate": 1.1661419472650538e-08, + "loss": 0.2544926106929779, + "step": 7423 + }, + { + "epoch": 1.9714513344841322, + "grad_norm": 1.1881271398224822, + "learning_rate": 1.1450397706119776e-08, + "loss": 0.235082745552063, + "step": 7424 + }, + { + "epoch": 1.9717169034656752, + "grad_norm": 1.3712056139426412, + "learning_rate": 1.1241301614147715e-08, + "loss": 0.24777358770370483, + "step": 7425 + }, + { + "epoch": 1.9719824724472181, + "grad_norm": 1.5271853101134352, + "learning_rate": 1.1034131237045443e-08, + "loss": 0.23714174330234528, + "step": 7426 + }, + { + "epoch": 1.972248041428761, + "grad_norm": 1.3430700979817631, + "learning_rate": 1.0828886614754342e-08, + "loss": 0.24665668606758118, + "step": 7427 + }, + { + "epoch": 1.972513610410304, + "grad_norm": 1.3931055934155485, + "learning_rate": 1.062556778684276e-08, + "loss": 0.23421131074428558, + "step": 7428 + }, + { + "epoch": 1.972779179391847, + "grad_norm": 1.274566697934482, + "learning_rate": 1.0424174792508234e-08, + "loss": 0.23443526029586792, + "step": 7429 + }, + { + "epoch": 1.97304474837339, + "grad_norm": 1.3315316306417777, + "learning_rate": 1.0224707670576373e-08, + "loss": 0.24177192151546478, + "step": 7430 + }, + { + "epoch": 1.973310317354933, + "grad_norm": 1.4439736433803494, + "learning_rate": 1.002716645950197e-08, + "loss": 0.20957472920417786, + "step": 7431 + }, + { + "epoch": 1.9735758863364758, + "grad_norm": 1.2252184749081894, + "learning_rate": 9.831551197370116e-09, + "loss": 0.21594710648059845, + "step": 7432 + }, + { + "epoch": 1.9738414553180188, + "grad_norm": 1.4445839220306718, + "learning_rate": 9.637861921891756e-09, + "loss": 0.2372155487537384, + "step": 7433 + }, + { + "epoch": 1.974107024299562, + "grad_norm": 1.295551996082086, + "learning_rate": 9.446098670408132e-09, + "loss": 0.211237370967865, + "step": 7434 + }, + { + "epoch": 1.974372593281105, + "grad_norm": 1.3006326416512255, + "learning_rate": 9.256261479888562e-09, + "loss": 0.25123757123947144, + "step": 7435 + }, + { + "epoch": 1.9746381622626479, + "grad_norm": 1.2670719422156809, + "learning_rate": 9.068350386932655e-09, + "loss": 0.23048831522464752, + "step": 7436 + }, + { + "epoch": 1.9749037312441908, + "grad_norm": 1.2157385411321804, + "learning_rate": 8.882365427765883e-09, + "loss": 0.22923544049263, + "step": 7437 + }, + { + "epoch": 1.9751693002257338, + "grad_norm": 1.1040485462060259, + "learning_rate": 8.698306638245114e-09, + "loss": 0.199529767036438, + "step": 7438 + }, + { + "epoch": 1.9754348692072767, + "grad_norm": 1.314383264088006, + "learning_rate": 8.516174053854187e-09, + "loss": 0.22778059542179108, + "step": 7439 + }, + { + "epoch": 1.9757004381888197, + "grad_norm": 1.3428968973890816, + "learning_rate": 8.335967709706128e-09, + "loss": 0.22807848453521729, + "step": 7440 + }, + { + "epoch": 1.9759660071703626, + "grad_norm": 1.3347725648799278, + "learning_rate": 8.157687640543143e-09, + "loss": 0.24764932692050934, + "step": 7441 + }, + { + "epoch": 1.9762315761519056, + "grad_norm": 1.376463462320243, + "learning_rate": 7.98133388073552e-09, + "loss": 0.22213312983512878, + "step": 7442 + }, + { + "epoch": 1.9764971451334485, + "grad_norm": 1.2799794398059858, + "learning_rate": 7.806906464281617e-09, + "loss": 0.22822709381580353, + "step": 7443 + }, + { + "epoch": 1.9767627141149915, + "grad_norm": 1.2148981447749936, + "learning_rate": 7.634405424808977e-09, + "loss": 0.2236599326133728, + "step": 7444 + }, + { + "epoch": 1.9770282830965344, + "grad_norm": 1.263255403192069, + "learning_rate": 7.463830795574334e-09, + "loss": 0.20294487476348877, + "step": 7445 + }, + { + "epoch": 1.9772938520780774, + "grad_norm": 1.3034015114742201, + "learning_rate": 7.295182609461382e-09, + "loss": 0.2187870740890503, + "step": 7446 + }, + { + "epoch": 1.9775594210596203, + "grad_norm": 1.362800468373944, + "learning_rate": 7.128460898984113e-09, + "loss": 0.2629002630710602, + "step": 7447 + }, + { + "epoch": 1.9778249900411633, + "grad_norm": 1.3155096560899557, + "learning_rate": 6.963665696285704e-09, + "loss": 0.24024136364459991, + "step": 7448 + }, + { + "epoch": 1.9780905590227063, + "grad_norm": 1.240780926418524, + "learning_rate": 6.800797033134077e-09, + "loss": 0.22334401309490204, + "step": 7449 + }, + { + "epoch": 1.9783561280042492, + "grad_norm": 1.2853076050759633, + "learning_rate": 6.639854940930779e-09, + "loss": 0.21535055339336395, + "step": 7450 + }, + { + "epoch": 1.9786216969857922, + "grad_norm": 1.3182931470109147, + "learning_rate": 6.480839450703214e-09, + "loss": 0.26096785068511963, + "step": 7451 + }, + { + "epoch": 1.978887265967335, + "grad_norm": 1.2393293544951642, + "learning_rate": 6.323750593106859e-09, + "loss": 0.22461384534835815, + "step": 7452 + }, + { + "epoch": 1.979152834948878, + "grad_norm": 1.2999818118404687, + "learning_rate": 6.168588398426378e-09, + "loss": 0.24372713267803192, + "step": 7453 + }, + { + "epoch": 1.979418403930421, + "grad_norm": 1.2743158428703243, + "learning_rate": 6.015352896576732e-09, + "loss": 0.19544872641563416, + "step": 7454 + }, + { + "epoch": 1.979683972911964, + "grad_norm": 1.1957228310016947, + "learning_rate": 5.864044117097623e-09, + "loss": 0.22004768252372742, + "step": 7455 + }, + { + "epoch": 1.979949541893507, + "grad_norm": 1.3624679399119848, + "learning_rate": 5.714662089162381e-09, + "loss": 0.2509492337703705, + "step": 7456 + }, + { + "epoch": 1.9802151108750499, + "grad_norm": 1.1563599654889156, + "learning_rate": 5.567206841567974e-09, + "loss": 0.19315078854560852, + "step": 7457 + }, + { + "epoch": 1.9804806798565928, + "grad_norm": 1.1652222675857882, + "learning_rate": 5.421678402741659e-09, + "loss": 0.20722024142742157, + "step": 7458 + }, + { + "epoch": 1.9807462488381358, + "grad_norm": 1.2430974429352135, + "learning_rate": 5.278076800742105e-09, + "loss": 0.2041238397359848, + "step": 7459 + }, + { + "epoch": 1.9810118178196787, + "grad_norm": 1.226308526828602, + "learning_rate": 5.136402063251611e-09, + "loss": 0.21889238059520721, + "step": 7460 + }, + { + "epoch": 1.9812773868012217, + "grad_norm": 1.2925316754685727, + "learning_rate": 4.996654217584995e-09, + "loss": 0.23580557107925415, + "step": 7461 + }, + { + "epoch": 1.9815429557827646, + "grad_norm": 1.5912986799887796, + "learning_rate": 4.858833290684039e-09, + "loss": 0.24967315793037415, + "step": 7462 + }, + { + "epoch": 1.9818085247643076, + "grad_norm": 1.3642305983011473, + "learning_rate": 4.722939309116381e-09, + "loss": 0.21802274882793427, + "step": 7463 + }, + { + "epoch": 1.9820740937458505, + "grad_norm": 1.2778589071361273, + "learning_rate": 4.588972299084393e-09, + "loss": 0.2641376554965973, + "step": 7464 + }, + { + "epoch": 1.9823396627273935, + "grad_norm": 1.181293128126433, + "learning_rate": 4.456932286412974e-09, + "loss": 0.20166629552841187, + "step": 7465 + }, + { + "epoch": 1.9826052317089364, + "grad_norm": 1.3531318882305197, + "learning_rate": 4.3268192965573164e-09, + "loss": 0.22796592116355896, + "step": 7466 + }, + { + "epoch": 1.9828708006904794, + "grad_norm": 1.1849961491022751, + "learning_rate": 4.19863335460402e-09, + "loss": 0.19833455979824066, + "step": 7467 + }, + { + "epoch": 1.9831363696720223, + "grad_norm": 1.273561592311718, + "learning_rate": 4.07237448526554e-09, + "loss": 0.23009257018566132, + "step": 7468 + }, + { + "epoch": 1.9834019386535653, + "grad_norm": 1.2188380225442625, + "learning_rate": 3.9480427128812945e-09, + "loss": 0.22418440878391266, + "step": 7469 + }, + { + "epoch": 1.9836675076351082, + "grad_norm": 1.2878640211544259, + "learning_rate": 3.825638061421e-09, + "loss": 0.2015800178050995, + "step": 7470 + }, + { + "epoch": 1.9839330766166512, + "grad_norm": 1.2488639013131106, + "learning_rate": 3.705160554485776e-09, + "loss": 0.22166767716407776, + "step": 7471 + }, + { + "epoch": 1.9841986455981941, + "grad_norm": 1.476152466944419, + "learning_rate": 3.5866102152981586e-09, + "loss": 0.3154509961605072, + "step": 7472 + }, + { + "epoch": 1.984464214579737, + "grad_norm": 1.3338840715084874, + "learning_rate": 3.4699870667165292e-09, + "loss": 0.25891417264938354, + "step": 7473 + }, + { + "epoch": 1.98472978356128, + "grad_norm": 1.2984805204003045, + "learning_rate": 3.355291131222904e-09, + "loss": 0.24837851524353027, + "step": 7474 + }, + { + "epoch": 1.984995352542823, + "grad_norm": 1.2923319105031845, + "learning_rate": 3.2425224309307055e-09, + "loss": 0.24254213273525238, + "step": 7475 + }, + { + "epoch": 1.985260921524366, + "grad_norm": 1.3479980629574153, + "learning_rate": 3.1316809875781005e-09, + "loss": 0.24822884798049927, + "step": 7476 + }, + { + "epoch": 1.985526490505909, + "grad_norm": 1.2515754926310612, + "learning_rate": 3.022766822535772e-09, + "loss": 0.19553488492965698, + "step": 7477 + }, + { + "epoch": 1.9857920594874519, + "grad_norm": 1.289139949226706, + "learning_rate": 2.9157799568002576e-09, + "loss": 0.24758943915367126, + "step": 7478 + }, + { + "epoch": 1.9860576284689948, + "grad_norm": 1.3254058481790592, + "learning_rate": 2.810720410998391e-09, + "loss": 0.22947746515274048, + "step": 7479 + }, + { + "epoch": 1.9863231974505378, + "grad_norm": 1.1718425441422213, + "learning_rate": 2.7075882053828605e-09, + "loss": 0.20573696494102478, + "step": 7480 + }, + { + "epoch": 1.9865887664320807, + "grad_norm": 1.3248019948595686, + "learning_rate": 2.606383359837761e-09, + "loss": 0.2547800838947296, + "step": 7481 + }, + { + "epoch": 1.9868543354136237, + "grad_norm": 1.3239089800396548, + "learning_rate": 2.507105893874151e-09, + "loss": 0.22227191925048828, + "step": 7482 + }, + { + "epoch": 1.9871199043951666, + "grad_norm": 1.379027057566697, + "learning_rate": 2.409755826630056e-09, + "loss": 0.24687603116035461, + "step": 7483 + }, + { + "epoch": 1.9873854733767096, + "grad_norm": 1.3626347731044859, + "learning_rate": 2.3143331768749053e-09, + "loss": 0.23577818274497986, + "step": 7484 + }, + { + "epoch": 1.9876510423582525, + "grad_norm": 1.2429616783261994, + "learning_rate": 2.2208379630039858e-09, + "loss": 0.23012465238571167, + "step": 7485 + }, + { + "epoch": 1.9879166113397955, + "grad_norm": 1.2667278392117014, + "learning_rate": 2.129270203043987e-09, + "loss": 0.21479251980781555, + "step": 7486 + }, + { + "epoch": 1.9881821803213384, + "grad_norm": 1.2419157692275362, + "learning_rate": 2.039629914645236e-09, + "loss": 0.24436548352241516, + "step": 7487 + }, + { + "epoch": 1.9884477493028814, + "grad_norm": 1.3198752588445606, + "learning_rate": 1.951917115091684e-09, + "loss": 0.22225134074687958, + "step": 7488 + }, + { + "epoch": 1.9887133182844243, + "grad_norm": 1.4243538533938824, + "learning_rate": 1.8661318212920275e-09, + "loss": 0.22320827841758728, + "step": 7489 + }, + { + "epoch": 1.9889788872659673, + "grad_norm": 1.3025984911365984, + "learning_rate": 1.7822740497852597e-09, + "loss": 0.2317924201488495, + "step": 7490 + }, + { + "epoch": 1.9892444562475102, + "grad_norm": 1.370204940685918, + "learning_rate": 1.700343816738448e-09, + "loss": 0.2275170385837555, + "step": 7491 + }, + { + "epoch": 1.9895100252290532, + "grad_norm": 1.652167024814656, + "learning_rate": 1.6203411379456247e-09, + "loss": 0.24541540443897247, + "step": 7492 + }, + { + "epoch": 1.9897755942105961, + "grad_norm": 1.311164124852614, + "learning_rate": 1.5422660288322288e-09, + "loss": 0.23041896522045135, + "step": 7493 + }, + { + "epoch": 1.990041163192139, + "grad_norm": 1.301476042648128, + "learning_rate": 1.4661185044484438e-09, + "loss": 0.22362437844276428, + "step": 7494 + }, + { + "epoch": 1.990306732173682, + "grad_norm": 1.1872303288026824, + "learning_rate": 1.3918985794747486e-09, + "loss": 0.22082944214344025, + "step": 7495 + }, + { + "epoch": 1.990572301155225, + "grad_norm": 1.2985516009859217, + "learning_rate": 1.3196062682208078e-09, + "loss": 0.2210516780614853, + "step": 7496 + }, + { + "epoch": 1.990837870136768, + "grad_norm": 1.2609254238659025, + "learning_rate": 1.249241584623251e-09, + "loss": 0.21891455352306366, + "step": 7497 + }, + { + "epoch": 1.991103439118311, + "grad_norm": 1.2687100133579783, + "learning_rate": 1.1808045422478932e-09, + "loss": 0.23363247513771057, + "step": 7498 + }, + { + "epoch": 1.9913690080998538, + "grad_norm": 1.188481032582791, + "learning_rate": 1.1142951542875146e-09, + "loss": 0.20676104724407196, + "step": 7499 + }, + { + "epoch": 1.9916345770813968, + "grad_norm": 1.2983095103442552, + "learning_rate": 1.0497134335663018e-09, + "loss": 0.23037788271903992, + "step": 7500 + }, + { + "epoch": 1.9919001460629397, + "grad_norm": 1.1706822471326355, + "learning_rate": 9.870593925320748e-10, + "loss": 0.21958573162555695, + "step": 7501 + }, + { + "epoch": 1.9921657150444827, + "grad_norm": 1.3574206120623875, + "learning_rate": 9.263330432662809e-10, + "loss": 0.23280993103981018, + "step": 7502 + }, + { + "epoch": 1.9924312840260257, + "grad_norm": 1.2662411212973668, + "learning_rate": 8.675343974762219e-10, + "loss": 0.2254818230867386, + "step": 7503 + }, + { + "epoch": 1.9926968530075686, + "grad_norm": 1.255709874874282, + "learning_rate": 8.106634664950541e-10, + "loss": 0.1850586235523224, + "step": 7504 + }, + { + "epoch": 1.9929624219891116, + "grad_norm": 1.1965362861662039, + "learning_rate": 7.557202612895609e-10, + "loss": 0.21080443263053894, + "step": 7505 + }, + { + "epoch": 1.9932279909706545, + "grad_norm": 1.2788710791805473, + "learning_rate": 7.027047924512698e-10, + "loss": 0.21604907512664795, + "step": 7506 + }, + { + "epoch": 1.9934935599521975, + "grad_norm": 1.287068201404914, + "learning_rate": 6.516170701997837e-10, + "loss": 0.24684564769268036, + "step": 7507 + }, + { + "epoch": 1.9937591289337404, + "grad_norm": 1.2013851004960618, + "learning_rate": 6.024571043861116e-10, + "loss": 0.21735510230064392, + "step": 7508 + }, + { + "epoch": 1.9940246979152834, + "grad_norm": 1.2853945699676002, + "learning_rate": 5.552249044860069e-10, + "loss": 0.23616179823875427, + "step": 7509 + }, + { + "epoch": 1.9942902668968263, + "grad_norm": 1.280261468721699, + "learning_rate": 5.099204796066293e-10, + "loss": 0.23930129408836365, + "step": 7510 + }, + { + "epoch": 1.9945558358783693, + "grad_norm": 1.30216307212454, + "learning_rate": 4.665438384809928e-10, + "loss": 0.2354714274406433, + "step": 7511 + }, + { + "epoch": 1.9948214048599122, + "grad_norm": 1.4489462806357751, + "learning_rate": 4.250949894724077e-10, + "loss": 0.28315576910972595, + "step": 7512 + }, + { + "epoch": 1.9950869738414552, + "grad_norm": 1.1749720994980957, + "learning_rate": 3.8557394057114895e-10, + "loss": 0.19599778950214386, + "step": 7513 + }, + { + "epoch": 1.9953525428229981, + "grad_norm": 1.5080290285974376, + "learning_rate": 3.4798069939667725e-10, + "loss": 0.2295808494091034, + "step": 7514 + }, + { + "epoch": 1.995618111804541, + "grad_norm": 1.2840127096725462, + "learning_rate": 3.1231527319763864e-10, + "loss": 0.23212578892707825, + "step": 7515 + }, + { + "epoch": 1.995883680786084, + "grad_norm": 1.2763709143213344, + "learning_rate": 2.78577668847424e-10, + "loss": 0.2408447265625, + "step": 7516 + }, + { + "epoch": 1.996149249767627, + "grad_norm": 1.325995428985527, + "learning_rate": 2.4676789285305034e-10, + "loss": 0.25482073426246643, + "step": 7517 + }, + { + "epoch": 1.9964148187491702, + "grad_norm": 1.2453043840474796, + "learning_rate": 2.1688595134516932e-10, + "loss": 0.21228459477424622, + "step": 7518 + }, + { + "epoch": 1.996680387730713, + "grad_norm": 1.3949495270151018, + "learning_rate": 1.8893185008472814e-10, + "loss": 0.2467353343963623, + "step": 7519 + }, + { + "epoch": 1.996945956712256, + "grad_norm": 1.3819791453502894, + "learning_rate": 1.6290559446185962e-10, + "loss": 0.24475792050361633, + "step": 7520 + }, + { + "epoch": 1.997211525693799, + "grad_norm": 1.3766398068169023, + "learning_rate": 1.3880718949366155e-10, + "loss": 0.24821621179580688, + "step": 7521 + }, + { + "epoch": 1.997477094675342, + "grad_norm": 1.2860965423885737, + "learning_rate": 1.1663663982530715e-10, + "loss": 0.24725303053855896, + "step": 7522 + }, + { + "epoch": 1.997742663656885, + "grad_norm": 1.2302869290522314, + "learning_rate": 9.639394973226523e-11, + "loss": 0.2319290041923523, + "step": 7523 + }, + { + "epoch": 1.9980082326384279, + "grad_norm": 1.3169058540691405, + "learning_rate": 7.807912311696974e-11, + "loss": 0.22183239459991455, + "step": 7524 + }, + { + "epoch": 1.9982738016199708, + "grad_norm": 1.3038532813647647, + "learning_rate": 6.169216350881968e-11, + "loss": 0.2154427468776703, + "step": 7525 + }, + { + "epoch": 1.9985393706015138, + "grad_norm": 1.3153427866812037, + "learning_rate": 4.723307406973021e-11, + "loss": 0.22269389033317566, + "step": 7526 + }, + { + "epoch": 1.9988049395830567, + "grad_norm": 1.1809886655167368, + "learning_rate": 3.4701857584140686e-11, + "loss": 0.20317527651786804, + "step": 7527 + }, + { + "epoch": 1.9990705085645997, + "grad_norm": 1.2813479125348537, + "learning_rate": 2.409851647011685e-11, + "loss": 0.20792551338672638, + "step": 7528 + }, + { + "epoch": 1.9993360775461426, + "grad_norm": 1.1774217019209885, + "learning_rate": 1.5423052770469072e-11, + "loss": 0.2128266990184784, + "step": 7529 + }, + { + "epoch": 1.9996016465276856, + "grad_norm": 1.2535950646579268, + "learning_rate": 8.67546815941367e-12, + "loss": 0.23220527172088623, + "step": 7530 + }, + { + "epoch": 1.9998672155092285, + "grad_norm": 1.234107937433565, + "learning_rate": 3.8557639359115826e-12, + "loss": 0.22269386053085327, + "step": 7531 + }, + { + "epoch": 2.0, + "grad_norm": 2.3086652843747557, + "learning_rate": 9.63941030329707e-13, + "loss": 0.2053365409374237, + "step": 7532 + } + ], + "logging_steps": 1, + "max_steps": 7532, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5704003196682240.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-7532/training_args.bin b/checkpoint-7532/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c0c92feb0b44b3362d1d98054f06b20cb57a4b7 --- /dev/null +++ b/checkpoint-7532/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89857e5ce3d813c9a03825c43337cd93b1e4a595acca4834e9e4f1a47312d609 +size 6968 diff --git a/checkpoint-7532/zero_to_fp32.py b/checkpoint-7532/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-7532/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/processor_config.json b/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a574a917c05d1c53d5210b2a3351a5df4b586147 --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.0, + "total_flos": 5704003196682240.0, + "train_loss": 0.29768029879729163, + "train_runtime": 98000.2149, + "train_samples_per_second": 1.229, + "train_steps_per_second": 0.077 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3d2cd7a4c985ebffc724f89cf6f59ec2e0793311 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,7533 @@ +{"current_steps": 1, "total_steps": 7532, "loss": 1.1502833366394043, "lr": 0.0, "epoch": 0.0002655689815429558, "percentage": 0.01, "elapsed_time": "0:00:17", "remaining_time": "1 day, 13:07:05"} +{"current_steps": 2, "total_steps": 7532, "loss": 1.1698756217956543, "lr": 5.3050397877984086e-08, "epoch": 0.0005311379630859116, "percentage": 0.03, "elapsed_time": "0:00:31", "remaining_time": "1 day, 8:38:26"} +{"current_steps": 3, "total_steps": 7532, "loss": 1.1060130596160889, "lr": 1.0610079575596817e-07, "epoch": 0.0007967069446288673, "percentage": 0.04, "elapsed_time": "0:00:44", "remaining_time": "1 day, 6:49:16"} +{"current_steps": 4, "total_steps": 7532, "loss": 1.1075276136398315, "lr": 1.5915119363395226e-07, "epoch": 0.0010622759261718232, "percentage": 0.05, "elapsed_time": "0:00:57", "remaining_time": "1 day, 6:09:35"} +{"current_steps": 5, "total_steps": 7532, "loss": 1.2153511047363281, "lr": 2.1220159151193635e-07, "epoch": 0.001327844907714779, "percentage": 0.07, "elapsed_time": "0:01:10", "remaining_time": "1 day, 5:35:16"} +{"current_steps": 6, "total_steps": 7532, "loss": 1.1400426626205444, "lr": 2.6525198938992043e-07, "epoch": 0.0015934138892577346, "percentage": 0.08, "elapsed_time": "0:01:24", "remaining_time": "1 day, 5:23:14"} +{"current_steps": 7, "total_steps": 7532, "loss": 1.2070660591125488, "lr": 3.183023872679045e-07, "epoch": 0.0018589828708006906, "percentage": 0.09, "elapsed_time": "0:01:37", "remaining_time": "1 day, 5:01:32"} +{"current_steps": 8, "total_steps": 7532, "loss": 1.1614588499069214, "lr": 3.713527851458886e-07, "epoch": 0.0021245518523436463, "percentage": 0.11, "elapsed_time": "0:01:50", "remaining_time": "1 day, 4:46:07"} +{"current_steps": 9, "total_steps": 7532, "loss": 1.1739476919174194, "lr": 4.244031830238727e-07, "epoch": 0.002390120833886602, "percentage": 0.12, "elapsed_time": "0:02:03", "remaining_time": "1 day, 4:44:57"} +{"current_steps": 10, "total_steps": 7532, "loss": 1.1559171676635742, "lr": 4.774535809018568e-07, "epoch": 0.002655689815429558, "percentage": 0.13, "elapsed_time": "0:02:17", "remaining_time": "1 day, 4:38:45"} +{"current_steps": 11, "total_steps": 7532, "loss": 1.2086225748062134, "lr": 5.305039787798409e-07, "epoch": 0.0029212587969725135, "percentage": 0.15, "elapsed_time": "0:02:30", "remaining_time": "1 day, 4:39:13"} +{"current_steps": 12, "total_steps": 7532, "loss": 1.227709174156189, "lr": 5.83554376657825e-07, "epoch": 0.0031868277785154693, "percentage": 0.16, "elapsed_time": "0:02:43", "remaining_time": "1 day, 4:32:39"} +{"current_steps": 13, "total_steps": 7532, "loss": 1.255577564239502, "lr": 6.36604774535809e-07, "epoch": 0.003452396760058425, "percentage": 0.17, "elapsed_time": "0:02:57", "remaining_time": "1 day, 4:33:16"} +{"current_steps": 14, "total_steps": 7532, "loss": 1.1953760385513306, "lr": 6.896551724137931e-07, "epoch": 0.003717965741601381, "percentage": 0.19, "elapsed_time": "0:03:11", "remaining_time": "1 day, 4:29:37"} +{"current_steps": 15, "total_steps": 7532, "loss": 1.1325336694717407, "lr": 7.427055702917772e-07, "epoch": 0.0039835347231443365, "percentage": 0.2, "elapsed_time": "0:03:24", "remaining_time": "1 day, 4:31:27"} +{"current_steps": 16, "total_steps": 7532, "loss": 1.232974648475647, "lr": 7.957559681697613e-07, "epoch": 0.004249103704687293, "percentage": 0.21, "elapsed_time": "0:03:38", "remaining_time": "1 day, 4:29:12"} +{"current_steps": 17, "total_steps": 7532, "loss": 1.1527395248413086, "lr": 8.488063660477454e-07, "epoch": 0.004514672686230248, "percentage": 0.23, "elapsed_time": "0:03:52", "remaining_time": "1 day, 4:31:20"} +{"current_steps": 18, "total_steps": 7532, "loss": 1.151026964187622, "lr": 9.018567639257295e-07, "epoch": 0.004780241667773204, "percentage": 0.24, "elapsed_time": "0:04:05", "remaining_time": "1 day, 4:28:38"} +{"current_steps": 19, "total_steps": 7532, "loss": 1.155288815498352, "lr": 9.549071618037136e-07, "epoch": 0.00504581064931616, "percentage": 0.25, "elapsed_time": "0:04:19", "remaining_time": "1 day, 4:28:33"} +{"current_steps": 20, "total_steps": 7532, "loss": 1.183434009552002, "lr": 1.0079575596816979e-06, "epoch": 0.005311379630859116, "percentage": 0.27, "elapsed_time": "0:04:32", "remaining_time": "1 day, 4:23:33"} +{"current_steps": 21, "total_steps": 7532, "loss": 1.161030650138855, "lr": 1.0610079575596817e-06, "epoch": 0.005576948612402072, "percentage": 0.28, "elapsed_time": "0:04:45", "remaining_time": "1 day, 4:22:11"} +{"current_steps": 22, "total_steps": 7532, "loss": 1.123382806777954, "lr": 1.1140583554376658e-06, "epoch": 0.005842517593945027, "percentage": 0.29, "elapsed_time": "0:04:58", "remaining_time": "1 day, 4:18:00"} +{"current_steps": 23, "total_steps": 7532, "loss": 1.238707423210144, "lr": 1.16710875331565e-06, "epoch": 0.006108086575487983, "percentage": 0.31, "elapsed_time": "0:05:12", "remaining_time": "1 day, 4:17:52"} +{"current_steps": 24, "total_steps": 7532, "loss": 1.2058464288711548, "lr": 1.220159151193634e-06, "epoch": 0.0063736555570309385, "percentage": 0.32, "elapsed_time": "0:05:24", "remaining_time": "1 day, 4:14:25"} +{"current_steps": 25, "total_steps": 7532, "loss": 1.2351092100143433, "lr": 1.273209549071618e-06, "epoch": 0.006639224538573895, "percentage": 0.33, "elapsed_time": "0:05:37", "remaining_time": "1 day, 4:11:00"} +{"current_steps": 26, "total_steps": 7532, "loss": 1.1739860773086548, "lr": 1.3262599469496024e-06, "epoch": 0.00690479352011685, "percentage": 0.35, "elapsed_time": "0:05:51", "remaining_time": "1 day, 4:12:43"} +{"current_steps": 27, "total_steps": 7532, "loss": 1.1407617330551147, "lr": 1.3793103448275862e-06, "epoch": 0.007170362501659806, "percentage": 0.36, "elapsed_time": "0:06:04", "remaining_time": "1 day, 4:10:25"} +{"current_steps": 28, "total_steps": 7532, "loss": 1.1844531297683716, "lr": 1.4323607427055705e-06, "epoch": 0.007435931483202762, "percentage": 0.37, "elapsed_time": "0:06:18", "remaining_time": "1 day, 4:08:32"} +{"current_steps": 29, "total_steps": 7532, "loss": 1.1293678283691406, "lr": 1.4854111405835544e-06, "epoch": 0.007701500464745718, "percentage": 0.39, "elapsed_time": "0:06:30", "remaining_time": "1 day, 4:04:52"} +{"current_steps": 30, "total_steps": 7532, "loss": 1.1310899257659912, "lr": 1.5384615384615387e-06, "epoch": 0.007967069446288673, "percentage": 0.4, "elapsed_time": "0:06:43", "remaining_time": "1 day, 4:02:16"} +{"current_steps": 31, "total_steps": 7532, "loss": 1.1015795469284058, "lr": 1.5915119363395226e-06, "epoch": 0.00823263842783163, "percentage": 0.41, "elapsed_time": "0:06:56", "remaining_time": "1 day, 3:58:12"} +{"current_steps": 32, "total_steps": 7532, "loss": 1.0756056308746338, "lr": 1.6445623342175069e-06, "epoch": 0.008498207409374585, "percentage": 0.42, "elapsed_time": "0:07:08", "remaining_time": "1 day, 3:55:39"} +{"current_steps": 33, "total_steps": 7532, "loss": 1.1496126651763916, "lr": 1.6976127320954908e-06, "epoch": 0.00876377639091754, "percentage": 0.44, "elapsed_time": "0:07:21", "remaining_time": "1 day, 3:51:43"} +{"current_steps": 34, "total_steps": 7532, "loss": 1.203465461730957, "lr": 1.750663129973475e-06, "epoch": 0.009029345372460496, "percentage": 0.45, "elapsed_time": "0:07:34", "remaining_time": "1 day, 3:49:16"} +{"current_steps": 35, "total_steps": 7532, "loss": 1.0613923072814941, "lr": 1.803713527851459e-06, "epoch": 0.009294914354003453, "percentage": 0.46, "elapsed_time": "0:07:46", "remaining_time": "1 day, 3:46:36"} +{"current_steps": 36, "total_steps": 7532, "loss": 1.06027090549469, "lr": 1.8567639257294432e-06, "epoch": 0.009560483335546408, "percentage": 0.48, "elapsed_time": "0:08:00", "remaining_time": "1 day, 3:47:59"} +{"current_steps": 37, "total_steps": 7532, "loss": 1.0508522987365723, "lr": 1.909814323607427e-06, "epoch": 0.009826052317089363, "percentage": 0.49, "elapsed_time": "0:08:13", "remaining_time": "1 day, 3:46:36"} +{"current_steps": 38, "total_steps": 7532, "loss": 1.0353929996490479, "lr": 1.9628647214854114e-06, "epoch": 0.01009162129863232, "percentage": 0.5, "elapsed_time": "0:08:27", "remaining_time": "1 day, 3:46:27"} +{"current_steps": 39, "total_steps": 7532, "loss": 0.9974027276039124, "lr": 2.0159151193633957e-06, "epoch": 0.010357190280175276, "percentage": 0.52, "elapsed_time": "0:08:39", "remaining_time": "1 day, 3:44:30"} +{"current_steps": 40, "total_steps": 7532, "loss": 1.0849467515945435, "lr": 2.0689655172413796e-06, "epoch": 0.010622759261718231, "percentage": 0.53, "elapsed_time": "0:08:53", "remaining_time": "1 day, 3:44:33"} +{"current_steps": 41, "total_steps": 7532, "loss": 1.005434274673462, "lr": 2.1220159151193635e-06, "epoch": 0.010888328243261186, "percentage": 0.54, "elapsed_time": "0:09:06", "remaining_time": "1 day, 3:42:39"} +{"current_steps": 42, "total_steps": 7532, "loss": 1.052631139755249, "lr": 2.1750663129973478e-06, "epoch": 0.011153897224804143, "percentage": 0.56, "elapsed_time": "0:09:18", "remaining_time": "1 day, 3:41:12"} +{"current_steps": 43, "total_steps": 7532, "loss": 0.9470957517623901, "lr": 2.2281167108753316e-06, "epoch": 0.011419466206347099, "percentage": 0.57, "elapsed_time": "0:09:32", "remaining_time": "1 day, 3:41:57"} +{"current_steps": 44, "total_steps": 7532, "loss": 0.9865130186080933, "lr": 2.281167108753316e-06, "epoch": 0.011685035187890054, "percentage": 0.58, "elapsed_time": "0:09:45", "remaining_time": "1 day, 3:41:18"} +{"current_steps": 45, "total_steps": 7532, "loss": 0.9405577778816223, "lr": 2.3342175066313e-06, "epoch": 0.01195060416943301, "percentage": 0.6, "elapsed_time": "0:09:59", "remaining_time": "1 day, 3:42:29"} +{"current_steps": 46, "total_steps": 7532, "loss": 0.9418795108795166, "lr": 2.387267904509284e-06, "epoch": 0.012216173150975966, "percentage": 0.61, "elapsed_time": "0:10:12", "remaining_time": "1 day, 3:41:30"} +{"current_steps": 47, "total_steps": 7532, "loss": 0.9841142892837524, "lr": 2.440318302387268e-06, "epoch": 0.012481742132518922, "percentage": 0.62, "elapsed_time": "0:10:26", "remaining_time": "1 day, 3:42:25"} +{"current_steps": 48, "total_steps": 7532, "loss": 0.9412609338760376, "lr": 2.4933687002652523e-06, "epoch": 0.012747311114061877, "percentage": 0.64, "elapsed_time": "0:10:39", "remaining_time": "1 day, 3:42:08"} +{"current_steps": 49, "total_steps": 7532, "loss": 0.9239889979362488, "lr": 2.546419098143236e-06, "epoch": 0.013012880095604834, "percentage": 0.65, "elapsed_time": "0:10:53", "remaining_time": "1 day, 3:42:54"} +{"current_steps": 50, "total_steps": 7532, "loss": 0.9212941527366638, "lr": 2.59946949602122e-06, "epoch": 0.01327844907714779, "percentage": 0.66, "elapsed_time": "0:11:06", "remaining_time": "1 day, 3:42:02"} +{"current_steps": 51, "total_steps": 7532, "loss": 0.8863773345947266, "lr": 2.6525198938992047e-06, "epoch": 0.013544018058690745, "percentage": 0.68, "elapsed_time": "0:11:20", "remaining_time": "1 day, 3:43:09"} +{"current_steps": 52, "total_steps": 7532, "loss": 0.8990404009819031, "lr": 2.7055702917771886e-06, "epoch": 0.0138095870402337, "percentage": 0.69, "elapsed_time": "0:11:33", "remaining_time": "1 day, 3:43:01"} +{"current_steps": 53, "total_steps": 7532, "loss": 0.9257171154022217, "lr": 2.7586206896551725e-06, "epoch": 0.014075156021776657, "percentage": 0.7, "elapsed_time": "0:11:47", "remaining_time": "1 day, 3:44:14"} +{"current_steps": 54, "total_steps": 7532, "loss": 0.8239601254463196, "lr": 2.8116710875331564e-06, "epoch": 0.014340725003319612, "percentage": 0.72, "elapsed_time": "0:12:01", "remaining_time": "1 day, 3:44:30"} +{"current_steps": 55, "total_steps": 7532, "loss": 0.8656830787658691, "lr": 2.864721485411141e-06, "epoch": 0.014606293984862568, "percentage": 0.73, "elapsed_time": "0:12:14", "remaining_time": "1 day, 3:44:36"} +{"current_steps": 56, "total_steps": 7532, "loss": 0.9470342397689819, "lr": 2.917771883289125e-06, "epoch": 0.014871862966405525, "percentage": 0.74, "elapsed_time": "0:12:28", "remaining_time": "1 day, 3:46:12"} +{"current_steps": 57, "total_steps": 7532, "loss": 0.8699859976768494, "lr": 2.970822281167109e-06, "epoch": 0.01513743194794848, "percentage": 0.76, "elapsed_time": "0:12:42", "remaining_time": "1 day, 3:45:56"} +{"current_steps": 58, "total_steps": 7532, "loss": 0.8668704628944397, "lr": 3.0238726790450927e-06, "epoch": 0.015403000929491435, "percentage": 0.77, "elapsed_time": "0:12:55", "remaining_time": "1 day, 3:46:21"} +{"current_steps": 59, "total_steps": 7532, "loss": 0.841624915599823, "lr": 3.0769230769230774e-06, "epoch": 0.01566856991103439, "percentage": 0.78, "elapsed_time": "0:13:09", "remaining_time": "1 day, 3:45:51"} +{"current_steps": 60, "total_steps": 7532, "loss": 0.8412661552429199, "lr": 3.1299734748010613e-06, "epoch": 0.015934138892577346, "percentage": 0.8, "elapsed_time": "0:13:23", "remaining_time": "1 day, 3:46:57"} +{"current_steps": 61, "total_steps": 7532, "loss": 0.818957507610321, "lr": 3.183023872679045e-06, "epoch": 0.0161997078741203, "percentage": 0.81, "elapsed_time": "0:13:36", "remaining_time": "1 day, 3:46:55"} +{"current_steps": 62, "total_steps": 7532, "loss": 0.8030763268470764, "lr": 3.23607427055703e-06, "epoch": 0.01646527685566326, "percentage": 0.82, "elapsed_time": "0:13:50", "remaining_time": "1 day, 3:47:51"} +{"current_steps": 63, "total_steps": 7532, "loss": 0.7869359850883484, "lr": 3.2891246684350138e-06, "epoch": 0.016730845837206215, "percentage": 0.84, "elapsed_time": "0:14:03", "remaining_time": "1 day, 3:47:02"} +{"current_steps": 64, "total_steps": 7532, "loss": 0.7912170886993408, "lr": 3.3421750663129977e-06, "epoch": 0.01699641481874917, "percentage": 0.85, "elapsed_time": "0:14:17", "remaining_time": "1 day, 3:47:34"} +{"current_steps": 65, "total_steps": 7532, "loss": 0.7736695408821106, "lr": 3.3952254641909815e-06, "epoch": 0.017261983800292126, "percentage": 0.86, "elapsed_time": "0:14:30", "remaining_time": "1 day, 3:47:07"} +{"current_steps": 66, "total_steps": 7532, "loss": 0.768275260925293, "lr": 3.448275862068966e-06, "epoch": 0.01752755278183508, "percentage": 0.88, "elapsed_time": "0:14:44", "remaining_time": "1 day, 3:47:41"} +{"current_steps": 67, "total_steps": 7532, "loss": 0.7210639119148254, "lr": 3.50132625994695e-06, "epoch": 0.017793121763378036, "percentage": 0.89, "elapsed_time": "0:14:57", "remaining_time": "1 day, 3:46:50"} +{"current_steps": 68, "total_steps": 7532, "loss": 0.7488028407096863, "lr": 3.554376657824934e-06, "epoch": 0.01805869074492099, "percentage": 0.9, "elapsed_time": "0:15:11", "remaining_time": "1 day, 3:46:59"} +{"current_steps": 69, "total_steps": 7532, "loss": 0.7329621911048889, "lr": 3.607427055702918e-06, "epoch": 0.01832425972646395, "percentage": 0.92, "elapsed_time": "0:15:24", "remaining_time": "1 day, 3:46:56"} +{"current_steps": 70, "total_steps": 7532, "loss": 0.7270619869232178, "lr": 3.660477453580902e-06, "epoch": 0.018589828708006906, "percentage": 0.93, "elapsed_time": "0:15:38", "remaining_time": "1 day, 3:47:37"} +{"current_steps": 71, "total_steps": 7532, "loss": 0.7271254658699036, "lr": 3.7135278514588865e-06, "epoch": 0.01885539768954986, "percentage": 0.94, "elapsed_time": "0:15:51", "remaining_time": "1 day, 3:47:06"} +{"current_steps": 72, "total_steps": 7532, "loss": 0.787033200263977, "lr": 3.7665782493368703e-06, "epoch": 0.019120966671092816, "percentage": 0.96, "elapsed_time": "0:16:05", "remaining_time": "1 day, 3:47:08"} +{"current_steps": 73, "total_steps": 7532, "loss": 0.7049479484558105, "lr": 3.819628647214854e-06, "epoch": 0.01938653565263577, "percentage": 0.97, "elapsed_time": "0:16:18", "remaining_time": "1 day, 3:45:51"} +{"current_steps": 74, "total_steps": 7532, "loss": 0.7146892547607422, "lr": 3.8726790450928385e-06, "epoch": 0.019652104634178727, "percentage": 0.98, "elapsed_time": "0:16:31", "remaining_time": "1 day, 3:44:59"} +{"current_steps": 75, "total_steps": 7532, "loss": 0.7212516069412231, "lr": 3.925729442970823e-06, "epoch": 0.019917673615721682, "percentage": 1.0, "elapsed_time": "0:16:44", "remaining_time": "1 day, 3:44:45"} +{"current_steps": 76, "total_steps": 7532, "loss": 0.6612375378608704, "lr": 3.978779840848806e-06, "epoch": 0.02018324259726464, "percentage": 1.01, "elapsed_time": "0:16:57", "remaining_time": "1 day, 3:43:29"} +{"current_steps": 77, "total_steps": 7532, "loss": 0.7038244605064392, "lr": 4.031830238726791e-06, "epoch": 0.020448811578807596, "percentage": 1.02, "elapsed_time": "0:17:10", "remaining_time": "1 day, 3:43:32"} +{"current_steps": 78, "total_steps": 7532, "loss": 0.7081903219223022, "lr": 4.084880636604775e-06, "epoch": 0.02071438056035055, "percentage": 1.04, "elapsed_time": "0:17:23", "remaining_time": "1 day, 3:42:17"} +{"current_steps": 79, "total_steps": 7532, "loss": 0.7079841494560242, "lr": 4.137931034482759e-06, "epoch": 0.020979949541893507, "percentage": 1.05, "elapsed_time": "0:17:37", "remaining_time": "1 day, 3:42:23"} +{"current_steps": 80, "total_steps": 7532, "loss": 0.7090641260147095, "lr": 4.190981432360743e-06, "epoch": 0.021245518523436462, "percentage": 1.06, "elapsed_time": "0:17:50", "remaining_time": "1 day, 3:41:40"} +{"current_steps": 81, "total_steps": 7532, "loss": 0.6632575988769531, "lr": 4.244031830238727e-06, "epoch": 0.021511087504979418, "percentage": 1.08, "elapsed_time": "0:18:03", "remaining_time": "1 day, 3:41:49"} +{"current_steps": 82, "total_steps": 7532, "loss": 0.7231097221374512, "lr": 4.297082228116711e-06, "epoch": 0.021776656486522373, "percentage": 1.09, "elapsed_time": "0:18:17", "remaining_time": "1 day, 3:41:21"} +{"current_steps": 83, "total_steps": 7532, "loss": 0.6696034669876099, "lr": 4.3501326259946955e-06, "epoch": 0.02204222546806533, "percentage": 1.1, "elapsed_time": "0:18:30", "remaining_time": "1 day, 3:41:43"} +{"current_steps": 84, "total_steps": 7532, "loss": 0.7550696134567261, "lr": 4.403183023872679e-06, "epoch": 0.022307794449608287, "percentage": 1.12, "elapsed_time": "0:18:43", "remaining_time": "1 day, 3:40:58"} +{"current_steps": 85, "total_steps": 7532, "loss": 0.671328067779541, "lr": 4.456233421750663e-06, "epoch": 0.022573363431151242, "percentage": 1.13, "elapsed_time": "0:18:57", "remaining_time": "1 day, 3:40:22"} +{"current_steps": 86, "total_steps": 7532, "loss": 0.6864410638809204, "lr": 4.5092838196286476e-06, "epoch": 0.022838932412694198, "percentage": 1.14, "elapsed_time": "0:19:10", "remaining_time": "1 day, 3:40:36"} +{"current_steps": 87, "total_steps": 7532, "loss": 0.6870769262313843, "lr": 4.562334217506632e-06, "epoch": 0.023104501394237153, "percentage": 1.16, "elapsed_time": "0:19:24", "remaining_time": "1 day, 3:40:19"} +{"current_steps": 88, "total_steps": 7532, "loss": 0.6539690494537354, "lr": 4.615384615384616e-06, "epoch": 0.023370070375780108, "percentage": 1.17, "elapsed_time": "0:19:37", "remaining_time": "1 day, 3:40:36"} +{"current_steps": 89, "total_steps": 7532, "loss": 0.6946991086006165, "lr": 4.6684350132626e-06, "epoch": 0.023635639357323063, "percentage": 1.18, "elapsed_time": "0:19:51", "remaining_time": "1 day, 3:40:15"} +{"current_steps": 90, "total_steps": 7532, "loss": 0.6177583932876587, "lr": 4.721485411140584e-06, "epoch": 0.02390120833886602, "percentage": 1.19, "elapsed_time": "0:20:04", "remaining_time": "1 day, 3:40:36"} +{"current_steps": 91, "total_steps": 7532, "loss": 0.6890037059783936, "lr": 4.774535809018568e-06, "epoch": 0.024166777320408978, "percentage": 1.21, "elapsed_time": "0:20:18", "remaining_time": "1 day, 3:40:16"} +{"current_steps": 92, "total_steps": 7532, "loss": 0.6563063263893127, "lr": 4.8275862068965525e-06, "epoch": 0.024432346301951933, "percentage": 1.22, "elapsed_time": "0:20:32", "remaining_time": "1 day, 3:40:32"} +{"current_steps": 93, "total_steps": 7532, "loss": 0.714318573474884, "lr": 4.880636604774536e-06, "epoch": 0.024697915283494888, "percentage": 1.23, "elapsed_time": "0:20:45", "remaining_time": "1 day, 3:39:59"} +{"current_steps": 94, "total_steps": 7532, "loss": 0.6720882654190063, "lr": 4.93368700265252e-06, "epoch": 0.024963484265037843, "percentage": 1.25, "elapsed_time": "0:20:59", "remaining_time": "1 day, 3:40:23"} +{"current_steps": 95, "total_steps": 7532, "loss": 0.602899968624115, "lr": 4.9867374005305045e-06, "epoch": 0.0252290532465808, "percentage": 1.26, "elapsed_time": "0:21:12", "remaining_time": "1 day, 3:39:51"} +{"current_steps": 96, "total_steps": 7532, "loss": 0.6628841161727905, "lr": 5.039787798408489e-06, "epoch": 0.025494622228123754, "percentage": 1.27, "elapsed_time": "0:21:25", "remaining_time": "1 day, 3:39:28"} +{"current_steps": 97, "total_steps": 7532, "loss": 0.6486932635307312, "lr": 5.092838196286472e-06, "epoch": 0.02576019120966671, "percentage": 1.29, "elapsed_time": "0:21:39", "remaining_time": "1 day, 3:40:01"} +{"current_steps": 98, "total_steps": 7532, "loss": 0.6719033122062683, "lr": 5.145888594164457e-06, "epoch": 0.026025760191209668, "percentage": 1.3, "elapsed_time": "0:21:52", "remaining_time": "1 day, 3:39:32"} +{"current_steps": 99, "total_steps": 7532, "loss": 0.6818530559539795, "lr": 5.19893899204244e-06, "epoch": 0.026291329172752623, "percentage": 1.31, "elapsed_time": "0:22:06", "remaining_time": "1 day, 3:39:23"} +{"current_steps": 100, "total_steps": 7532, "loss": 0.6306912899017334, "lr": 5.251989389920424e-06, "epoch": 0.02655689815429558, "percentage": 1.33, "elapsed_time": "0:22:19", "remaining_time": "1 day, 3:38:48"} +{"current_steps": 101, "total_steps": 7532, "loss": 0.5952945351600647, "lr": 5.3050397877984095e-06, "epoch": 0.026822467135838534, "percentage": 1.34, "elapsed_time": "0:22:37", "remaining_time": "1 day, 3:45:12"} +{"current_steps": 102, "total_steps": 7532, "loss": 0.6566107273101807, "lr": 5.358090185676394e-06, "epoch": 0.02708803611738149, "percentage": 1.35, "elapsed_time": "0:22:50", "remaining_time": "1 day, 3:44:13"} +{"current_steps": 103, "total_steps": 7532, "loss": 0.6981694102287292, "lr": 5.411140583554377e-06, "epoch": 0.027353605098924445, "percentage": 1.37, "elapsed_time": "0:23:03", "remaining_time": "1 day, 3:42:58"} +{"current_steps": 104, "total_steps": 7532, "loss": 0.6231328248977661, "lr": 5.4641909814323615e-06, "epoch": 0.0276191740804674, "percentage": 1.38, "elapsed_time": "0:23:15", "remaining_time": "1 day, 3:41:22"} +{"current_steps": 105, "total_steps": 7532, "loss": 0.6414977312088013, "lr": 5.517241379310345e-06, "epoch": 0.02788474306201036, "percentage": 1.39, "elapsed_time": "0:23:28", "remaining_time": "1 day, 3:40:03"} +{"current_steps": 106, "total_steps": 7532, "loss": 0.6335234642028809, "lr": 5.570291777188329e-06, "epoch": 0.028150312043553314, "percentage": 1.41, "elapsed_time": "0:23:40", "remaining_time": "1 day, 3:38:28"} +{"current_steps": 107, "total_steps": 7532, "loss": 0.6040852665901184, "lr": 5.623342175066313e-06, "epoch": 0.02841588102509627, "percentage": 1.42, "elapsed_time": "0:23:53", "remaining_time": "1 day, 3:37:27"} +{"current_steps": 108, "total_steps": 7532, "loss": 0.6011114716529846, "lr": 5.676392572944297e-06, "epoch": 0.028681450006639225, "percentage": 1.43, "elapsed_time": "0:24:05", "remaining_time": "1 day, 3:36:00"} +{"current_steps": 109, "total_steps": 7532, "loss": 0.6042627096176147, "lr": 5.729442970822282e-06, "epoch": 0.02894701898818218, "percentage": 1.45, "elapsed_time": "0:24:18", "remaining_time": "1 day, 3:35:11"} +{"current_steps": 110, "total_steps": 7532, "loss": 0.6183412671089172, "lr": 5.782493368700266e-06, "epoch": 0.029212587969725135, "percentage": 1.46, "elapsed_time": "0:24:30", "remaining_time": "1 day, 3:34:06"} +{"current_steps": 111, "total_steps": 7532, "loss": 0.6150818467140198, "lr": 5.83554376657825e-06, "epoch": 0.02947815695126809, "percentage": 1.47, "elapsed_time": "0:24:43", "remaining_time": "1 day, 3:32:36"} +{"current_steps": 112, "total_steps": 7532, "loss": 0.6202039122581482, "lr": 5.888594164456234e-06, "epoch": 0.02974372593281105, "percentage": 1.49, "elapsed_time": "0:24:55", "remaining_time": "1 day, 3:31:05"} +{"current_steps": 113, "total_steps": 7532, "loss": 0.6334809064865112, "lr": 5.941644562334218e-06, "epoch": 0.030009294914354005, "percentage": 1.5, "elapsed_time": "0:25:08", "remaining_time": "1 day, 3:30:19"} +{"current_steps": 114, "total_steps": 7532, "loss": 0.5728089809417725, "lr": 5.994694960212202e-06, "epoch": 0.03027486389589696, "percentage": 1.51, "elapsed_time": "0:25:20", "remaining_time": "1 day, 3:29:30"} +{"current_steps": 115, "total_steps": 7532, "loss": 0.6884603500366211, "lr": 6.0477453580901854e-06, "epoch": 0.030540432877439915, "percentage": 1.53, "elapsed_time": "0:25:32", "remaining_time": "1 day, 3:27:40"} +{"current_steps": 116, "total_steps": 7532, "loss": 0.5619829893112183, "lr": 6.1007957559681706e-06, "epoch": 0.03080600185898287, "percentage": 1.54, "elapsed_time": "0:25:45", "remaining_time": "1 day, 3:26:31"} +{"current_steps": 117, "total_steps": 7532, "loss": 0.6224710941314697, "lr": 6.153846153846155e-06, "epoch": 0.031071570840525826, "percentage": 1.55, "elapsed_time": "0:25:57", "remaining_time": "1 day, 3:24:59"} +{"current_steps": 118, "total_steps": 7532, "loss": 0.6119496822357178, "lr": 6.206896551724138e-06, "epoch": 0.03133713982206878, "percentage": 1.57, "elapsed_time": "0:26:09", "remaining_time": "1 day, 3:23:51"} +{"current_steps": 119, "total_steps": 7532, "loss": 0.6190857887268066, "lr": 6.259946949602123e-06, "epoch": 0.03160270880361174, "percentage": 1.58, "elapsed_time": "0:26:22", "remaining_time": "1 day, 3:22:44"} +{"current_steps": 120, "total_steps": 7532, "loss": 0.6460769176483154, "lr": 6.312997347480107e-06, "epoch": 0.03186827778515469, "percentage": 1.59, "elapsed_time": "0:26:35", "remaining_time": "1 day, 3:22:26"} +{"current_steps": 121, "total_steps": 7532, "loss": 0.6148796677589417, "lr": 6.36604774535809e-06, "epoch": 0.03213384676669765, "percentage": 1.61, "elapsed_time": "0:26:47", "remaining_time": "1 day, 3:21:26"} +{"current_steps": 122, "total_steps": 7532, "loss": 0.558960497379303, "lr": 6.419098143236075e-06, "epoch": 0.0323994157482406, "percentage": 1.62, "elapsed_time": "0:27:01", "remaining_time": "1 day, 3:21:12"} +{"current_steps": 123, "total_steps": 7532, "loss": 0.5844984650611877, "lr": 6.47214854111406e-06, "epoch": 0.03266498472978356, "percentage": 1.63, "elapsed_time": "0:27:13", "remaining_time": "1 day, 3:20:22"} +{"current_steps": 124, "total_steps": 7532, "loss": 0.5343623161315918, "lr": 6.525198938992043e-06, "epoch": 0.03293055371132652, "percentage": 1.65, "elapsed_time": "0:27:26", "remaining_time": "1 day, 3:19:33"} +{"current_steps": 125, "total_steps": 7532, "loss": 0.5834348797798157, "lr": 6.5782493368700276e-06, "epoch": 0.03319612269286947, "percentage": 1.66, "elapsed_time": "0:27:39", "remaining_time": "1 day, 3:19:16"} +{"current_steps": 126, "total_steps": 7532, "loss": 0.5997360944747925, "lr": 6.631299734748011e-06, "epoch": 0.03346169167441243, "percentage": 1.67, "elapsed_time": "0:27:52", "remaining_time": "1 day, 3:18:25"} +{"current_steps": 127, "total_steps": 7532, "loss": 0.5991666316986084, "lr": 6.684350132625995e-06, "epoch": 0.03372726065595538, "percentage": 1.69, "elapsed_time": "0:28:05", "remaining_time": "1 day, 3:18:22"} +{"current_steps": 128, "total_steps": 7532, "loss": 0.581120491027832, "lr": 6.737400530503979e-06, "epoch": 0.03399282963749834, "percentage": 1.7, "elapsed_time": "0:28:18", "remaining_time": "1 day, 3:17:38"} +{"current_steps": 129, "total_steps": 7532, "loss": 0.6219569444656372, "lr": 6.790450928381963e-06, "epoch": 0.03425839861904129, "percentage": 1.71, "elapsed_time": "0:28:31", "remaining_time": "1 day, 3:17:16"} +{"current_steps": 130, "total_steps": 7532, "loss": 0.5950608253479004, "lr": 6.843501326259947e-06, "epoch": 0.03452396760058425, "percentage": 1.73, "elapsed_time": "0:28:44", "remaining_time": "1 day, 3:16:17"} +{"current_steps": 131, "total_steps": 7532, "loss": 0.5762747526168823, "lr": 6.896551724137932e-06, "epoch": 0.03478953658212721, "percentage": 1.74, "elapsed_time": "0:28:57", "remaining_time": "1 day, 3:15:50"} +{"current_steps": 132, "total_steps": 7532, "loss": 0.6003131866455078, "lr": 6.949602122015916e-06, "epoch": 0.03505510556367016, "percentage": 1.75, "elapsed_time": "0:29:09", "remaining_time": "1 day, 3:14:50"} +{"current_steps": 133, "total_steps": 7532, "loss": 0.5866605043411255, "lr": 7.0026525198939e-06, "epoch": 0.03532067454521312, "percentage": 1.77, "elapsed_time": "0:29:22", "remaining_time": "1 day, 3:14:20"} +{"current_steps": 134, "total_steps": 7532, "loss": 0.6668443083763123, "lr": 7.055702917771884e-06, "epoch": 0.03558624352675607, "percentage": 1.78, "elapsed_time": "0:29:35", "remaining_time": "1 day, 3:13:25"} +{"current_steps": 135, "total_steps": 7532, "loss": 0.5738306045532227, "lr": 7.108753315649868e-06, "epoch": 0.03585181250829903, "percentage": 1.79, "elapsed_time": "0:29:47", "remaining_time": "1 day, 3:12:42"} +{"current_steps": 136, "total_steps": 7532, "loss": 0.5774663686752319, "lr": 7.1618037135278515e-06, "epoch": 0.03611738148984198, "percentage": 1.81, "elapsed_time": "0:30:00", "remaining_time": "1 day, 3:12:12"} +{"current_steps": 137, "total_steps": 7532, "loss": 0.5721150636672974, "lr": 7.214854111405836e-06, "epoch": 0.03638295047138494, "percentage": 1.82, "elapsed_time": "0:30:13", "remaining_time": "1 day, 3:11:16"} +{"current_steps": 138, "total_steps": 7532, "loss": 0.6350122690200806, "lr": 7.267904509283821e-06, "epoch": 0.0366485194529279, "percentage": 1.83, "elapsed_time": "0:30:25", "remaining_time": "1 day, 3:10:26"} +{"current_steps": 139, "total_steps": 7532, "loss": 0.5435039401054382, "lr": 7.320954907161804e-06, "epoch": 0.03691408843447085, "percentage": 1.85, "elapsed_time": "0:30:37", "remaining_time": "1 day, 3:09:15"} +{"current_steps": 140, "total_steps": 7532, "loss": 0.5869162678718567, "lr": 7.374005305039789e-06, "epoch": 0.03717965741601381, "percentage": 1.86, "elapsed_time": "0:30:50", "remaining_time": "1 day, 3:08:23"} +{"current_steps": 141, "total_steps": 7532, "loss": 0.5645807981491089, "lr": 7.427055702917773e-06, "epoch": 0.03744522639755676, "percentage": 1.87, "elapsed_time": "0:31:02", "remaining_time": "1 day, 3:07:19"} +{"current_steps": 142, "total_steps": 7532, "loss": 0.5502692461013794, "lr": 7.480106100795756e-06, "epoch": 0.03771079537909972, "percentage": 1.89, "elapsed_time": "0:31:15", "remaining_time": "1 day, 3:07:02"} +{"current_steps": 143, "total_steps": 7532, "loss": 0.5602732300758362, "lr": 7.533156498673741e-06, "epoch": 0.037976364360642674, "percentage": 1.9, "elapsed_time": "0:31:28", "remaining_time": "1 day, 3:06:14"} +{"current_steps": 144, "total_steps": 7532, "loss": 0.6011391282081604, "lr": 7.586206896551724e-06, "epoch": 0.03824193334218563, "percentage": 1.91, "elapsed_time": "0:31:41", "remaining_time": "1 day, 3:05:53"} +{"current_steps": 145, "total_steps": 7532, "loss": 0.5482327938079834, "lr": 7.639257294429708e-06, "epoch": 0.03850750232372859, "percentage": 1.93, "elapsed_time": "0:31:53", "remaining_time": "1 day, 3:05:01"} +{"current_steps": 146, "total_steps": 7532, "loss": 0.5926344394683838, "lr": 7.692307692307694e-06, "epoch": 0.03877307130527154, "percentage": 1.94, "elapsed_time": "0:32:07", "remaining_time": "1 day, 3:04:48"} +{"current_steps": 147, "total_steps": 7532, "loss": 0.6558316946029663, "lr": 7.745358090185677e-06, "epoch": 0.0390386402868145, "percentage": 1.95, "elapsed_time": "0:32:19", "remaining_time": "1 day, 3:04:00"} +{"current_steps": 148, "total_steps": 7532, "loss": 0.572425365447998, "lr": 7.79840848806366e-06, "epoch": 0.039304209268357454, "percentage": 1.96, "elapsed_time": "0:32:32", "remaining_time": "1 day, 3:03:23"} +{"current_steps": 149, "total_steps": 7532, "loss": 0.5684784650802612, "lr": 7.851458885941646e-06, "epoch": 0.03956977824990041, "percentage": 1.98, "elapsed_time": "0:32:44", "remaining_time": "1 day, 3:02:28"} +{"current_steps": 150, "total_steps": 7532, "loss": 0.5843643546104431, "lr": 7.904509283819629e-06, "epoch": 0.039835347231443365, "percentage": 1.99, "elapsed_time": "0:32:57", "remaining_time": "1 day, 3:02:11"} +{"current_steps": 151, "total_steps": 7532, "loss": 0.5471494793891907, "lr": 7.957559681697613e-06, "epoch": 0.04010091621298632, "percentage": 2.0, "elapsed_time": "0:33:10", "remaining_time": "1 day, 3:01:19"} +{"current_steps": 152, "total_steps": 7532, "loss": 0.595018744468689, "lr": 8.010610079575598e-06, "epoch": 0.04036648519452928, "percentage": 2.02, "elapsed_time": "0:33:22", "remaining_time": "1 day, 3:00:36"} +{"current_steps": 153, "total_steps": 7532, "loss": 0.5431865453720093, "lr": 8.063660477453583e-06, "epoch": 0.040632054176072234, "percentage": 2.03, "elapsed_time": "0:33:35", "remaining_time": "1 day, 3:00:22"} +{"current_steps": 154, "total_steps": 7532, "loss": 0.5622385740280151, "lr": 8.116710875331566e-06, "epoch": 0.04089762315761519, "percentage": 2.04, "elapsed_time": "0:33:48", "remaining_time": "1 day, 2:59:47"} +{"current_steps": 155, "total_steps": 7532, "loss": 0.5795880556106567, "lr": 8.16976127320955e-06, "epoch": 0.041163192139158145, "percentage": 2.06, "elapsed_time": "0:34:02", "remaining_time": "1 day, 2:59:49"} +{"current_steps": 156, "total_steps": 7532, "loss": 0.5854965448379517, "lr": 8.222811671087533e-06, "epoch": 0.0414287611207011, "percentage": 2.07, "elapsed_time": "0:34:14", "remaining_time": "1 day, 2:59:19"} +{"current_steps": 157, "total_steps": 7532, "loss": 0.5374501943588257, "lr": 8.275862068965518e-06, "epoch": 0.041694330102244055, "percentage": 2.08, "elapsed_time": "0:34:27", "remaining_time": "1 day, 2:59:01"} +{"current_steps": 158, "total_steps": 7532, "loss": 0.5779006481170654, "lr": 8.328912466843502e-06, "epoch": 0.041959899083787014, "percentage": 2.1, "elapsed_time": "0:34:40", "remaining_time": "1 day, 2:58:32"} +{"current_steps": 159, "total_steps": 7532, "loss": 0.550236701965332, "lr": 8.381962864721485e-06, "epoch": 0.04222546806532997, "percentage": 2.11, "elapsed_time": "0:34:54", "remaining_time": "1 day, 2:58:38"} +{"current_steps": 160, "total_steps": 7532, "loss": 0.557443380355835, "lr": 8.43501326259947e-06, "epoch": 0.042491037046872925, "percentage": 2.12, "elapsed_time": "0:35:06", "remaining_time": "1 day, 2:57:55"} +{"current_steps": 161, "total_steps": 7532, "loss": 0.5875238180160522, "lr": 8.488063660477454e-06, "epoch": 0.04275660602841588, "percentage": 2.14, "elapsed_time": "0:35:19", "remaining_time": "1 day, 2:57:24"} +{"current_steps": 162, "total_steps": 7532, "loss": 0.510900616645813, "lr": 8.541114058355439e-06, "epoch": 0.043022175009958835, "percentage": 2.15, "elapsed_time": "0:35:31", "remaining_time": "1 day, 2:56:27"} +{"current_steps": 163, "total_steps": 7532, "loss": 0.5465859174728394, "lr": 8.594164456233422e-06, "epoch": 0.043287743991501794, "percentage": 2.16, "elapsed_time": "0:35:44", "remaining_time": "1 day, 2:55:43"} +{"current_steps": 164, "total_steps": 7532, "loss": 0.5508615970611572, "lr": 8.647214854111406e-06, "epoch": 0.043553312973044746, "percentage": 2.18, "elapsed_time": "0:35:56", "remaining_time": "1 day, 2:54:50"} +{"current_steps": 165, "total_steps": 7532, "loss": 0.5437714457511902, "lr": 8.700265251989391e-06, "epoch": 0.043818881954587705, "percentage": 2.19, "elapsed_time": "0:36:08", "remaining_time": "1 day, 2:53:58"} +{"current_steps": 166, "total_steps": 7532, "loss": 0.542698323726654, "lr": 8.753315649867374e-06, "epoch": 0.04408445093613066, "percentage": 2.2, "elapsed_time": "0:36:21", "remaining_time": "1 day, 2:53:11"} +{"current_steps": 167, "total_steps": 7532, "loss": 0.5169371962547302, "lr": 8.806366047745358e-06, "epoch": 0.044350019917673615, "percentage": 2.22, "elapsed_time": "0:36:33", "remaining_time": "1 day, 2:52:16"} +{"current_steps": 168, "total_steps": 7532, "loss": 0.5436176061630249, "lr": 8.859416445623343e-06, "epoch": 0.044615588899216574, "percentage": 2.23, "elapsed_time": "0:36:45", "remaining_time": "1 day, 2:51:36"} +{"current_steps": 169, "total_steps": 7532, "loss": 0.568030834197998, "lr": 8.912466843501327e-06, "epoch": 0.044881157880759526, "percentage": 2.24, "elapsed_time": "0:36:58", "remaining_time": "1 day, 2:50:50"} +{"current_steps": 170, "total_steps": 7532, "loss": 0.5218889713287354, "lr": 8.965517241379312e-06, "epoch": 0.045146726862302484, "percentage": 2.26, "elapsed_time": "0:37:11", "remaining_time": "1 day, 2:50:50"} +{"current_steps": 171, "total_steps": 7532, "loss": 0.5275779962539673, "lr": 9.018567639257295e-06, "epoch": 0.045412295843845436, "percentage": 2.27, "elapsed_time": "0:37:24", "remaining_time": "1 day, 2:50:22"} +{"current_steps": 172, "total_steps": 7532, "loss": 0.5263184905052185, "lr": 9.071618037135279e-06, "epoch": 0.045677864825388395, "percentage": 2.28, "elapsed_time": "0:37:37", "remaining_time": "1 day, 2:50:11"} +{"current_steps": 173, "total_steps": 7532, "loss": 0.563044548034668, "lr": 9.124668435013264e-06, "epoch": 0.045943433806931354, "percentage": 2.3, "elapsed_time": "0:37:50", "remaining_time": "1 day, 2:49:46"} +{"current_steps": 174, "total_steps": 7532, "loss": 0.5896912217140198, "lr": 9.177718832891247e-06, "epoch": 0.046209002788474306, "percentage": 2.31, "elapsed_time": "0:38:03", "remaining_time": "1 day, 2:49:37"} +{"current_steps": 175, "total_steps": 7532, "loss": 0.5163949131965637, "lr": 9.230769230769232e-06, "epoch": 0.046474571770017264, "percentage": 2.32, "elapsed_time": "0:38:16", "remaining_time": "1 day, 2:48:58"} +{"current_steps": 176, "total_steps": 7532, "loss": 0.6203320026397705, "lr": 9.283819628647216e-06, "epoch": 0.046740140751560216, "percentage": 2.34, "elapsed_time": "0:38:28", "remaining_time": "1 day, 2:48:22"} +{"current_steps": 177, "total_steps": 7532, "loss": 0.5528024435043335, "lr": 9.3368700265252e-06, "epoch": 0.047005709733103175, "percentage": 2.35, "elapsed_time": "0:38:41", "remaining_time": "1 day, 2:47:31"} +{"current_steps": 178, "total_steps": 7532, "loss": 0.5657555460929871, "lr": 9.389920424403184e-06, "epoch": 0.04727127871464613, "percentage": 2.36, "elapsed_time": "0:38:53", "remaining_time": "1 day, 2:47:00"} +{"current_steps": 179, "total_steps": 7532, "loss": 0.5301925539970398, "lr": 9.442970822281168e-06, "epoch": 0.047536847696189086, "percentage": 2.38, "elapsed_time": "0:39:06", "remaining_time": "1 day, 2:46:14"} +{"current_steps": 180, "total_steps": 7532, "loss": 0.5388369560241699, "lr": 9.496021220159151e-06, "epoch": 0.04780241667773204, "percentage": 2.39, "elapsed_time": "0:39:18", "remaining_time": "1 day, 2:45:39"} +{"current_steps": 181, "total_steps": 7532, "loss": 0.5549717545509338, "lr": 9.549071618037136e-06, "epoch": 0.048067985659274996, "percentage": 2.4, "elapsed_time": "0:39:30", "remaining_time": "1 day, 2:44:48"} +{"current_steps": 182, "total_steps": 7532, "loss": 0.5959764719009399, "lr": 9.60212201591512e-06, "epoch": 0.048333554640817955, "percentage": 2.42, "elapsed_time": "0:39:43", "remaining_time": "1 day, 2:44:20"} +{"current_steps": 183, "total_steps": 7532, "loss": 0.5461844205856323, "lr": 9.655172413793105e-06, "epoch": 0.04859912362236091, "percentage": 2.43, "elapsed_time": "0:39:56", "remaining_time": "1 day, 2:43:42"} +{"current_steps": 184, "total_steps": 7532, "loss": 0.5428494811058044, "lr": 9.708222811671088e-06, "epoch": 0.048864692603903866, "percentage": 2.44, "elapsed_time": "0:40:08", "remaining_time": "1 day, 2:43:03"} +{"current_steps": 185, "total_steps": 7532, "loss": 0.552198052406311, "lr": 9.761273209549072e-06, "epoch": 0.04913026158544682, "percentage": 2.46, "elapsed_time": "0:40:20", "remaining_time": "1 day, 2:42:23"} +{"current_steps": 186, "total_steps": 7532, "loss": 0.49992549419403076, "lr": 9.814323607427057e-06, "epoch": 0.049395830566989776, "percentage": 2.47, "elapsed_time": "0:40:33", "remaining_time": "1 day, 2:41:33"} +{"current_steps": 187, "total_steps": 7532, "loss": 0.557820200920105, "lr": 9.86737400530504e-06, "epoch": 0.04966139954853273, "percentage": 2.48, "elapsed_time": "0:40:45", "remaining_time": "1 day, 2:40:51"} +{"current_steps": 188, "total_steps": 7532, "loss": 0.5238749384880066, "lr": 9.920424403183024e-06, "epoch": 0.04992696853007569, "percentage": 2.5, "elapsed_time": "0:40:57", "remaining_time": "1 day, 2:39:59"} +{"current_steps": 189, "total_steps": 7532, "loss": 0.5346978902816772, "lr": 9.973474801061009e-06, "epoch": 0.050192537511618646, "percentage": 2.51, "elapsed_time": "0:41:10", "remaining_time": "1 day, 2:39:27"} +{"current_steps": 190, "total_steps": 7532, "loss": 0.5256577730178833, "lr": 1.0026525198938993e-05, "epoch": 0.0504581064931616, "percentage": 2.52, "elapsed_time": "0:41:22", "remaining_time": "1 day, 2:39:03"} +{"current_steps": 191, "total_steps": 7532, "loss": 0.5235393047332764, "lr": 1.0079575596816978e-05, "epoch": 0.050723675474704556, "percentage": 2.54, "elapsed_time": "0:41:36", "remaining_time": "1 day, 2:39:03"} +{"current_steps": 192, "total_steps": 7532, "loss": 0.4984837472438812, "lr": 1.013262599469496e-05, "epoch": 0.05098924445624751, "percentage": 2.55, "elapsed_time": "0:41:48", "remaining_time": "1 day, 2:38:36"} +{"current_steps": 193, "total_steps": 7532, "loss": 0.5209602117538452, "lr": 1.0185676392572945e-05, "epoch": 0.05125481343779047, "percentage": 2.56, "elapsed_time": "0:42:02", "remaining_time": "1 day, 2:38:34"} +{"current_steps": 194, "total_steps": 7532, "loss": 0.5468267202377319, "lr": 1.023872679045093e-05, "epoch": 0.05152038241933342, "percentage": 2.58, "elapsed_time": "0:42:15", "remaining_time": "1 day, 2:38:08"} +{"current_steps": 195, "total_steps": 7532, "loss": 0.5531667470932007, "lr": 1.0291777188328913e-05, "epoch": 0.05178595140087638, "percentage": 2.59, "elapsed_time": "0:42:27", "remaining_time": "1 day, 2:37:45"} +{"current_steps": 196, "total_steps": 7532, "loss": 0.5741526484489441, "lr": 1.0344827586206898e-05, "epoch": 0.052051520382419336, "percentage": 2.6, "elapsed_time": "0:42:41", "remaining_time": "1 day, 2:37:40"} +{"current_steps": 197, "total_steps": 7532, "loss": 0.46132561564445496, "lr": 1.039787798408488e-05, "epoch": 0.05231708936396229, "percentage": 2.62, "elapsed_time": "0:42:53", "remaining_time": "1 day, 2:37:18"} +{"current_steps": 198, "total_steps": 7532, "loss": 0.5074198842048645, "lr": 1.0450928381962865e-05, "epoch": 0.05258265834550525, "percentage": 2.63, "elapsed_time": "0:43:06", "remaining_time": "1 day, 2:37:00"} +{"current_steps": 199, "total_steps": 7532, "loss": 0.5193089842796326, "lr": 1.0503978779840849e-05, "epoch": 0.0528482273270482, "percentage": 2.64, "elapsed_time": "0:43:19", "remaining_time": "1 day, 2:36:27"} +{"current_steps": 200, "total_steps": 7532, "loss": 0.5209243297576904, "lr": 1.0557029177718834e-05, "epoch": 0.05311379630859116, "percentage": 2.66, "elapsed_time": "0:43:32", "remaining_time": "1 day, 2:36:07"} +{"current_steps": 201, "total_steps": 7532, "loss": 0.5607191920280457, "lr": 1.0610079575596819e-05, "epoch": 0.05337936529013411, "percentage": 2.67, "elapsed_time": "0:43:49", "remaining_time": "1 day, 2:38:40"} +{"current_steps": 202, "total_steps": 7532, "loss": 0.5482805371284485, "lr": 1.0663129973474802e-05, "epoch": 0.05364493427167707, "percentage": 2.68, "elapsed_time": "0:44:02", "remaining_time": "1 day, 2:38:18"} +{"current_steps": 203, "total_steps": 7532, "loss": 0.5579961538314819, "lr": 1.0716180371352788e-05, "epoch": 0.05391050325322003, "percentage": 2.7, "elapsed_time": "0:44:14", "remaining_time": "1 day, 2:37:32"} +{"current_steps": 204, "total_steps": 7532, "loss": 0.5119072794914246, "lr": 1.076923076923077e-05, "epoch": 0.05417607223476298, "percentage": 2.71, "elapsed_time": "0:44:27", "remaining_time": "1 day, 2:37:13"} +{"current_steps": 205, "total_steps": 7532, "loss": 0.5129292607307434, "lr": 1.0822281167108754e-05, "epoch": 0.05444164121630594, "percentage": 2.72, "elapsed_time": "0:44:39", "remaining_time": "1 day, 2:36:24"} +{"current_steps": 206, "total_steps": 7532, "loss": 0.500032901763916, "lr": 1.0875331564986738e-05, "epoch": 0.05470721019784889, "percentage": 2.73, "elapsed_time": "0:44:52", "remaining_time": "1 day, 2:35:41"} +{"current_steps": 207, "total_steps": 7532, "loss": 0.5264571309089661, "lr": 1.0928381962864723e-05, "epoch": 0.05497277917939185, "percentage": 2.75, "elapsed_time": "0:45:05", "remaining_time": "1 day, 2:35:21"} +{"current_steps": 208, "total_steps": 7532, "loss": 0.5569590330123901, "lr": 1.0981432360742708e-05, "epoch": 0.0552383481609348, "percentage": 2.76, "elapsed_time": "0:45:17", "remaining_time": "1 day, 2:34:44"} +{"current_steps": 209, "total_steps": 7532, "loss": 0.5429908037185669, "lr": 1.103448275862069e-05, "epoch": 0.05550391714247776, "percentage": 2.77, "elapsed_time": "0:45:30", "remaining_time": "1 day, 2:34:28"} +{"current_steps": 210, "total_steps": 7532, "loss": 0.5073692202568054, "lr": 1.1087533156498675e-05, "epoch": 0.05576948612402072, "percentage": 2.79, "elapsed_time": "0:45:42", "remaining_time": "1 day, 2:33:53"} +{"current_steps": 211, "total_steps": 7532, "loss": 0.5613659620285034, "lr": 1.1140583554376659e-05, "epoch": 0.05603505510556367, "percentage": 2.8, "elapsed_time": "0:45:55", "remaining_time": "1 day, 2:33:31"} +{"current_steps": 212, "total_steps": 7532, "loss": 0.5197691917419434, "lr": 1.1193633952254644e-05, "epoch": 0.05630062408710663, "percentage": 2.81, "elapsed_time": "0:46:08", "remaining_time": "1 day, 2:32:55"} +{"current_steps": 213, "total_steps": 7532, "loss": 0.5410990715026855, "lr": 1.1246684350132625e-05, "epoch": 0.05656619306864958, "percentage": 2.83, "elapsed_time": "0:46:20", "remaining_time": "1 day, 2:32:27"} +{"current_steps": 214, "total_steps": 7532, "loss": 0.5852477550506592, "lr": 1.129973474801061e-05, "epoch": 0.05683176205019254, "percentage": 2.84, "elapsed_time": "0:46:32", "remaining_time": "1 day, 2:31:47"} +{"current_steps": 215, "total_steps": 7532, "loss": 0.5312488079071045, "lr": 1.1352785145888594e-05, "epoch": 0.05709733103173549, "percentage": 2.85, "elapsed_time": "0:46:45", "remaining_time": "1 day, 2:31:25"} +{"current_steps": 216, "total_steps": 7532, "loss": 0.5075235366821289, "lr": 1.140583554376658e-05, "epoch": 0.05736290001327845, "percentage": 2.87, "elapsed_time": "0:46:58", "remaining_time": "1 day, 2:30:54"} +{"current_steps": 217, "total_steps": 7532, "loss": 0.5213298797607422, "lr": 1.1458885941644564e-05, "epoch": 0.05762846899482141, "percentage": 2.88, "elapsed_time": "0:47:11", "remaining_time": "1 day, 2:30:48"} +{"current_steps": 218, "total_steps": 7532, "loss": 0.5564183592796326, "lr": 1.1511936339522548e-05, "epoch": 0.05789403797636436, "percentage": 2.89, "elapsed_time": "0:47:24", "remaining_time": "1 day, 2:30:41"} +{"current_steps": 219, "total_steps": 7532, "loss": 0.5328387022018433, "lr": 1.1564986737400531e-05, "epoch": 0.05815960695790732, "percentage": 2.91, "elapsed_time": "0:47:37", "remaining_time": "1 day, 2:30:33"} +{"current_steps": 220, "total_steps": 7532, "loss": 0.5010273456573486, "lr": 1.1618037135278515e-05, "epoch": 0.05842517593945027, "percentage": 2.92, "elapsed_time": "0:47:50", "remaining_time": "1 day, 2:30:08"} +{"current_steps": 221, "total_steps": 7532, "loss": 0.5473708510398865, "lr": 1.16710875331565e-05, "epoch": 0.05869074492099323, "percentage": 2.93, "elapsed_time": "0:48:03", "remaining_time": "1 day, 2:30:02"} +{"current_steps": 222, "total_steps": 7532, "loss": 0.5359818339347839, "lr": 1.1724137931034483e-05, "epoch": 0.05895631390253618, "percentage": 2.95, "elapsed_time": "0:48:16", "remaining_time": "1 day, 2:29:38"} +{"current_steps": 223, "total_steps": 7532, "loss": 0.5274665951728821, "lr": 1.1777188328912468e-05, "epoch": 0.05922188288407914, "percentage": 2.96, "elapsed_time": "0:48:29", "remaining_time": "1 day, 2:29:36"} +{"current_steps": 224, "total_steps": 7532, "loss": 0.5463781952857971, "lr": 1.1830238726790454e-05, "epoch": 0.0594874518656221, "percentage": 2.97, "elapsed_time": "0:48:42", "remaining_time": "1 day, 2:29:16"} +{"current_steps": 225, "total_steps": 7532, "loss": 0.553212583065033, "lr": 1.1883289124668435e-05, "epoch": 0.05975302084716505, "percentage": 2.99, "elapsed_time": "0:48:55", "remaining_time": "1 day, 2:28:59"} +{"current_steps": 226, "total_steps": 7532, "loss": 0.47144171595573425, "lr": 1.193633952254642e-05, "epoch": 0.06001858982870801, "percentage": 3.0, "elapsed_time": "0:49:09", "remaining_time": "1 day, 2:28:58"} +{"current_steps": 227, "total_steps": 7532, "loss": 0.506844162940979, "lr": 1.1989389920424404e-05, "epoch": 0.06028415881025096, "percentage": 3.01, "elapsed_time": "0:49:22", "remaining_time": "1 day, 2:28:59"} +{"current_steps": 228, "total_steps": 7532, "loss": 0.4965322017669678, "lr": 1.2042440318302389e-05, "epoch": 0.06054972779179392, "percentage": 3.03, "elapsed_time": "0:49:36", "remaining_time": "1 day, 2:29:03"} +{"current_steps": 229, "total_steps": 7532, "loss": 0.4815751612186432, "lr": 1.2095490716180371e-05, "epoch": 0.06081529677333687, "percentage": 3.04, "elapsed_time": "0:49:49", "remaining_time": "1 day, 2:28:49"} +{"current_steps": 230, "total_steps": 7532, "loss": 0.5245312452316284, "lr": 1.2148541114058356e-05, "epoch": 0.06108086575487983, "percentage": 3.05, "elapsed_time": "0:50:02", "remaining_time": "1 day, 2:28:56"} +{"current_steps": 231, "total_steps": 7532, "loss": 0.5215133428573608, "lr": 1.2201591511936341e-05, "epoch": 0.06134643473642279, "percentage": 3.07, "elapsed_time": "0:50:16", "remaining_time": "1 day, 2:28:46"} +{"current_steps": 232, "total_steps": 7532, "loss": 0.5039419531822205, "lr": 1.2254641909814325e-05, "epoch": 0.06161200371796574, "percentage": 3.08, "elapsed_time": "0:50:29", "remaining_time": "1 day, 2:28:52"} +{"current_steps": 233, "total_steps": 7532, "loss": 0.5562925338745117, "lr": 1.230769230769231e-05, "epoch": 0.0618775726995087, "percentage": 3.09, "elapsed_time": "0:50:42", "remaining_time": "1 day, 2:28:32"} +{"current_steps": 234, "total_steps": 7532, "loss": 0.5372984409332275, "lr": 1.2360742705570291e-05, "epoch": 0.06214314168105165, "percentage": 3.11, "elapsed_time": "0:50:56", "remaining_time": "1 day, 2:28:37"} +{"current_steps": 235, "total_steps": 7532, "loss": 0.44987717270851135, "lr": 1.2413793103448277e-05, "epoch": 0.06240871066259461, "percentage": 3.12, "elapsed_time": "0:51:09", "remaining_time": "1 day, 2:28:22"} +{"current_steps": 236, "total_steps": 7532, "loss": 0.537068247795105, "lr": 1.246684350132626e-05, "epoch": 0.06267427964413756, "percentage": 3.13, "elapsed_time": "0:51:22", "remaining_time": "1 day, 2:28:14"} +{"current_steps": 237, "total_steps": 7532, "loss": 0.504779577255249, "lr": 1.2519893899204245e-05, "epoch": 0.06293984862568051, "percentage": 3.15, "elapsed_time": "0:51:36", "remaining_time": "1 day, 2:28:18"} +{"current_steps": 238, "total_steps": 7532, "loss": 0.5524113774299622, "lr": 1.257294429708223e-05, "epoch": 0.06320541760722348, "percentage": 3.16, "elapsed_time": "0:51:49", "remaining_time": "1 day, 2:28:02"} +{"current_steps": 239, "total_steps": 7532, "loss": 0.5089439153671265, "lr": 1.2625994694960214e-05, "epoch": 0.06347098658876643, "percentage": 3.17, "elapsed_time": "0:52:02", "remaining_time": "1 day, 2:27:58"} +{"current_steps": 240, "total_steps": 7532, "loss": 0.4501679837703705, "lr": 1.2679045092838197e-05, "epoch": 0.06373655557030938, "percentage": 3.19, "elapsed_time": "0:52:15", "remaining_time": "1 day, 2:27:36"} +{"current_steps": 241, "total_steps": 7532, "loss": 0.5360216498374939, "lr": 1.273209549071618e-05, "epoch": 0.06400212455185235, "percentage": 3.2, "elapsed_time": "0:52:28", "remaining_time": "1 day, 2:27:31"} +{"current_steps": 242, "total_steps": 7532, "loss": 0.5595712661743164, "lr": 1.2785145888594166e-05, "epoch": 0.0642676935333953, "percentage": 3.21, "elapsed_time": "0:52:40", "remaining_time": "1 day, 2:27:00"} +{"current_steps": 243, "total_steps": 7532, "loss": 0.5010904669761658, "lr": 1.283819628647215e-05, "epoch": 0.06453326251493825, "percentage": 3.23, "elapsed_time": "0:52:54", "remaining_time": "1 day, 2:26:48"} +{"current_steps": 244, "total_steps": 7532, "loss": 0.5053697228431702, "lr": 1.2891246684350134e-05, "epoch": 0.0647988314964812, "percentage": 3.24, "elapsed_time": "0:53:06", "remaining_time": "1 day, 2:26:15"} +{"current_steps": 245, "total_steps": 7532, "loss": 0.5045514106750488, "lr": 1.294429708222812e-05, "epoch": 0.06506440047802417, "percentage": 3.25, "elapsed_time": "0:53:19", "remaining_time": "1 day, 2:26:06"} +{"current_steps": 246, "total_steps": 7532, "loss": 0.5546073913574219, "lr": 1.2997347480106101e-05, "epoch": 0.06532996945956712, "percentage": 3.27, "elapsed_time": "0:53:31", "remaining_time": "1 day, 2:25:31"} +{"current_steps": 247, "total_steps": 7532, "loss": 0.47276046872138977, "lr": 1.3050397877984087e-05, "epoch": 0.06559553844111007, "percentage": 3.28, "elapsed_time": "0:53:44", "remaining_time": "1 day, 2:25:00"} +{"current_steps": 248, "total_steps": 7532, "loss": 0.4757889211177826, "lr": 1.310344827586207e-05, "epoch": 0.06586110742265304, "percentage": 3.29, "elapsed_time": "0:53:57", "remaining_time": "1 day, 2:24:46"} +{"current_steps": 249, "total_steps": 7532, "loss": 0.5078848600387573, "lr": 1.3156498673740055e-05, "epoch": 0.06612667640419599, "percentage": 3.31, "elapsed_time": "0:54:09", "remaining_time": "1 day, 2:24:18"} +{"current_steps": 250, "total_steps": 7532, "loss": 0.4890335202217102, "lr": 1.3209549071618037e-05, "epoch": 0.06639224538573894, "percentage": 3.32, "elapsed_time": "0:54:22", "remaining_time": "1 day, 2:23:57"} +{"current_steps": 251, "total_steps": 7532, "loss": 0.5406580567359924, "lr": 1.3262599469496022e-05, "epoch": 0.0666578143672819, "percentage": 3.33, "elapsed_time": "0:54:35", "remaining_time": "1 day, 2:23:30"} +{"current_steps": 252, "total_steps": 7532, "loss": 0.5236875414848328, "lr": 1.3315649867374005e-05, "epoch": 0.06692338334882486, "percentage": 3.35, "elapsed_time": "0:54:48", "remaining_time": "1 day, 2:23:10"} +{"current_steps": 253, "total_steps": 7532, "loss": 0.4991317391395569, "lr": 1.336870026525199e-05, "epoch": 0.06718895233036781, "percentage": 3.36, "elapsed_time": "0:55:00", "remaining_time": "1 day, 2:22:39"} +{"current_steps": 254, "total_steps": 7532, "loss": 0.4234679639339447, "lr": 1.3421750663129976e-05, "epoch": 0.06745452131191076, "percentage": 3.37, "elapsed_time": "0:55:13", "remaining_time": "1 day, 2:22:19"} +{"current_steps": 255, "total_steps": 7532, "loss": 0.49749234318733215, "lr": 1.3474801061007958e-05, "epoch": 0.06772009029345373, "percentage": 3.39, "elapsed_time": "0:55:25", "remaining_time": "1 day, 2:21:32"} +{"current_steps": 256, "total_steps": 7532, "loss": 0.5049105286598206, "lr": 1.3527851458885943e-05, "epoch": 0.06798565927499668, "percentage": 3.4, "elapsed_time": "0:55:38", "remaining_time": "1 day, 2:21:23"} +{"current_steps": 257, "total_steps": 7532, "loss": 0.5355304479598999, "lr": 1.3580901856763926e-05, "epoch": 0.06825122825653963, "percentage": 3.41, "elapsed_time": "0:55:50", "remaining_time": "1 day, 2:20:53"} +{"current_steps": 258, "total_steps": 7532, "loss": 0.46302929520606995, "lr": 1.3633952254641911e-05, "epoch": 0.06851679723808259, "percentage": 3.43, "elapsed_time": "0:56:03", "remaining_time": "1 day, 2:20:24"} +{"current_steps": 259, "total_steps": 7532, "loss": 0.5054173469543457, "lr": 1.3687002652519895e-05, "epoch": 0.06878236621962555, "percentage": 3.44, "elapsed_time": "0:56:15", "remaining_time": "1 day, 2:19:45"} +{"current_steps": 260, "total_steps": 7532, "loss": 0.5018566846847534, "lr": 1.374005305039788e-05, "epoch": 0.0690479352011685, "percentage": 3.45, "elapsed_time": "0:56:27", "remaining_time": "1 day, 2:19:16"} +{"current_steps": 261, "total_steps": 7532, "loss": 0.4938735365867615, "lr": 1.3793103448275863e-05, "epoch": 0.06931350418271146, "percentage": 3.47, "elapsed_time": "0:56:39", "remaining_time": "1 day, 2:18:37"} +{"current_steps": 262, "total_steps": 7532, "loss": 0.4605029225349426, "lr": 1.3846153846153847e-05, "epoch": 0.06957907316425442, "percentage": 3.48, "elapsed_time": "0:56:52", "remaining_time": "1 day, 2:18:04"} +{"current_steps": 263, "total_steps": 7532, "loss": 0.5056782960891724, "lr": 1.3899204244031832e-05, "epoch": 0.06984464214579737, "percentage": 3.49, "elapsed_time": "0:57:04", "remaining_time": "1 day, 2:17:26"} +{"current_steps": 264, "total_steps": 7532, "loss": 0.5017784833908081, "lr": 1.3952254641909815e-05, "epoch": 0.07011021112734032, "percentage": 3.51, "elapsed_time": "0:57:16", "remaining_time": "1 day, 2:16:45"} +{"current_steps": 265, "total_steps": 7532, "loss": 0.5132012367248535, "lr": 1.40053050397878e-05, "epoch": 0.07037578010888328, "percentage": 3.52, "elapsed_time": "0:57:28", "remaining_time": "1 day, 2:16:16"} +{"current_steps": 266, "total_steps": 7532, "loss": 0.531212329864502, "lr": 1.4058355437665782e-05, "epoch": 0.07064134909042624, "percentage": 3.53, "elapsed_time": "0:57:41", "remaining_time": "1 day, 2:15:40"} +{"current_steps": 267, "total_steps": 7532, "loss": 0.4900968074798584, "lr": 1.4111405835543767e-05, "epoch": 0.0709069180719692, "percentage": 3.54, "elapsed_time": "0:57:53", "remaining_time": "1 day, 2:15:12"} +{"current_steps": 268, "total_steps": 7532, "loss": 0.45277124643325806, "lr": 1.4164456233421753e-05, "epoch": 0.07117248705351215, "percentage": 3.56, "elapsed_time": "0:58:06", "remaining_time": "1 day, 2:14:50"} +{"current_steps": 269, "total_steps": 7532, "loss": 0.48026078939437866, "lr": 1.4217506631299736e-05, "epoch": 0.07143805603505511, "percentage": 3.57, "elapsed_time": "0:58:18", "remaining_time": "1 day, 2:14:30"} +{"current_steps": 270, "total_steps": 7532, "loss": 0.5111234784126282, "lr": 1.4270557029177721e-05, "epoch": 0.07170362501659806, "percentage": 3.58, "elapsed_time": "0:58:31", "remaining_time": "1 day, 2:14:01"} +{"current_steps": 271, "total_steps": 7532, "loss": 0.5448082685470581, "lr": 1.4323607427055703e-05, "epoch": 0.07196919399814102, "percentage": 3.6, "elapsed_time": "0:58:44", "remaining_time": "1 day, 2:13:41"} +{"current_steps": 272, "total_steps": 7532, "loss": 0.5242921113967896, "lr": 1.4376657824933688e-05, "epoch": 0.07223476297968397, "percentage": 3.61, "elapsed_time": "0:58:56", "remaining_time": "1 day, 2:13:20"} +{"current_steps": 273, "total_steps": 7532, "loss": 0.5194095373153687, "lr": 1.4429708222811672e-05, "epoch": 0.07250033196122693, "percentage": 3.62, "elapsed_time": "0:59:09", "remaining_time": "1 day, 2:13:12"} +{"current_steps": 274, "total_steps": 7532, "loss": 0.4620330333709717, "lr": 1.4482758620689657e-05, "epoch": 0.07276590094276988, "percentage": 3.64, "elapsed_time": "0:59:23", "remaining_time": "1 day, 2:13:06"} +{"current_steps": 275, "total_steps": 7532, "loss": 0.4654063582420349, "lr": 1.4535809018567642e-05, "epoch": 0.07303146992431284, "percentage": 3.65, "elapsed_time": "0:59:36", "remaining_time": "1 day, 2:12:52"} +{"current_steps": 276, "total_steps": 7532, "loss": 0.4637746810913086, "lr": 1.4588859416445624e-05, "epoch": 0.0732970389058558, "percentage": 3.66, "elapsed_time": "0:59:49", "remaining_time": "1 day, 2:12:45"} +{"current_steps": 277, "total_steps": 7532, "loss": 0.47949421405792236, "lr": 1.4641909814323609e-05, "epoch": 0.07356260788739875, "percentage": 3.68, "elapsed_time": "1:00:02", "remaining_time": "1 day, 2:12:28"} +{"current_steps": 278, "total_steps": 7532, "loss": 0.49565935134887695, "lr": 1.4694960212201592e-05, "epoch": 0.0738281768689417, "percentage": 3.69, "elapsed_time": "1:00:15", "remaining_time": "1 day, 2:12:26"} +{"current_steps": 279, "total_steps": 7532, "loss": 0.5057941675186157, "lr": 1.4748010610079577e-05, "epoch": 0.07409374585048466, "percentage": 3.7, "elapsed_time": "1:00:28", "remaining_time": "1 day, 2:12:13"} +{"current_steps": 280, "total_steps": 7532, "loss": 0.5495956540107727, "lr": 1.480106100795756e-05, "epoch": 0.07435931483202762, "percentage": 3.72, "elapsed_time": "1:00:42", "remaining_time": "1 day, 2:12:08"} +{"current_steps": 281, "total_steps": 7532, "loss": 0.4502897560596466, "lr": 1.4854111405835546e-05, "epoch": 0.07462488381357057, "percentage": 3.73, "elapsed_time": "1:00:54", "remaining_time": "1 day, 2:11:52"} +{"current_steps": 282, "total_steps": 7532, "loss": 0.4799070954322815, "lr": 1.490716180371353e-05, "epoch": 0.07489045279511353, "percentage": 3.74, "elapsed_time": "1:01:07", "remaining_time": "1 day, 2:11:31"} +{"current_steps": 283, "total_steps": 7532, "loss": 0.45640307664871216, "lr": 1.4960212201591513e-05, "epoch": 0.07515602177665649, "percentage": 3.76, "elapsed_time": "1:01:20", "remaining_time": "1 day, 2:11:03"} +{"current_steps": 284, "total_steps": 7532, "loss": 0.47862207889556885, "lr": 1.5013262599469498e-05, "epoch": 0.07542159075819944, "percentage": 3.77, "elapsed_time": "1:01:32", "remaining_time": "1 day, 2:10:39"} +{"current_steps": 285, "total_steps": 7532, "loss": 0.48195987939834595, "lr": 1.5066312997347481e-05, "epoch": 0.0756871597397424, "percentage": 3.78, "elapsed_time": "1:01:44", "remaining_time": "1 day, 2:09:58"} +{"current_steps": 286, "total_steps": 7532, "loss": 0.518566370010376, "lr": 1.5119363395225467e-05, "epoch": 0.07595272872128535, "percentage": 3.8, "elapsed_time": "1:01:57", "remaining_time": "1 day, 2:09:37"} +{"current_steps": 287, "total_steps": 7532, "loss": 0.5034162402153015, "lr": 1.5172413793103448e-05, "epoch": 0.07621829770282831, "percentage": 3.81, "elapsed_time": "1:02:09", "remaining_time": "1 day, 2:09:02"} +{"current_steps": 288, "total_steps": 7532, "loss": 0.497822642326355, "lr": 1.5225464190981433e-05, "epoch": 0.07648386668437127, "percentage": 3.82, "elapsed_time": "1:02:21", "remaining_time": "1 day, 2:08:39"} +{"current_steps": 289, "total_steps": 7532, "loss": 0.510530412197113, "lr": 1.5278514588859417e-05, "epoch": 0.07674943566591422, "percentage": 3.84, "elapsed_time": "1:02:34", "remaining_time": "1 day, 2:08:07"} +{"current_steps": 290, "total_steps": 7532, "loss": 0.5163881778717041, "lr": 1.53315649867374e-05, "epoch": 0.07701500464745718, "percentage": 3.85, "elapsed_time": "1:02:46", "remaining_time": "1 day, 2:07:49"} +{"current_steps": 291, "total_steps": 7532, "loss": 0.5161621570587158, "lr": 1.5384615384615387e-05, "epoch": 0.07728057362900013, "percentage": 3.86, "elapsed_time": "1:02:59", "remaining_time": "1 day, 2:07:20"} +{"current_steps": 292, "total_steps": 7532, "loss": 0.5260482430458069, "lr": 1.543766578249337e-05, "epoch": 0.07754614261054309, "percentage": 3.88, "elapsed_time": "1:03:11", "remaining_time": "1 day, 2:07:00"} +{"current_steps": 293, "total_steps": 7532, "loss": 0.4946279227733612, "lr": 1.5490716180371354e-05, "epoch": 0.07781171159208604, "percentage": 3.89, "elapsed_time": "1:03:24", "remaining_time": "1 day, 2:06:39"} +{"current_steps": 294, "total_steps": 7532, "loss": 0.5030514001846313, "lr": 1.5543766578249338e-05, "epoch": 0.078077280573629, "percentage": 3.9, "elapsed_time": "1:03:37", "remaining_time": "1 day, 2:06:12"} +{"current_steps": 295, "total_steps": 7532, "loss": 0.48864102363586426, "lr": 1.559681697612732e-05, "epoch": 0.07834284955517196, "percentage": 3.92, "elapsed_time": "1:03:49", "remaining_time": "1 day, 2:05:43"} +{"current_steps": 296, "total_steps": 7532, "loss": 0.48310425877571106, "lr": 1.5649867374005304e-05, "epoch": 0.07860841853671491, "percentage": 3.93, "elapsed_time": "1:04:01", "remaining_time": "1 day, 2:05:09"} +{"current_steps": 297, "total_steps": 7532, "loss": 0.4451446533203125, "lr": 1.570291777188329e-05, "epoch": 0.07887398751825787, "percentage": 3.94, "elapsed_time": "1:04:14", "remaining_time": "1 day, 2:04:50"} +{"current_steps": 298, "total_steps": 7532, "loss": 0.4884604811668396, "lr": 1.5755968169761275e-05, "epoch": 0.07913955649980083, "percentage": 3.96, "elapsed_time": "1:04:26", "remaining_time": "1 day, 2:04:21"} +{"current_steps": 299, "total_steps": 7532, "loss": 0.5047659873962402, "lr": 1.5809018567639258e-05, "epoch": 0.07940512548134378, "percentage": 3.97, "elapsed_time": "1:04:39", "remaining_time": "1 day, 2:04:05"} +{"current_steps": 300, "total_steps": 7532, "loss": 0.49124205112457275, "lr": 1.586206896551724e-05, "epoch": 0.07967069446288673, "percentage": 3.98, "elapsed_time": "1:04:51", "remaining_time": "1 day, 2:03:42"} +{"current_steps": 301, "total_steps": 7532, "loss": 0.5113086700439453, "lr": 1.5915119363395225e-05, "epoch": 0.0799362634444297, "percentage": 4.0, "elapsed_time": "1:05:10", "remaining_time": "1 day, 2:05:52"} +{"current_steps": 302, "total_steps": 7532, "loss": 0.5298338532447815, "lr": 1.5968169761273212e-05, "epoch": 0.08020183242597265, "percentage": 4.01, "elapsed_time": "1:05:24", "remaining_time": "1 day, 2:05:43"} +{"current_steps": 303, "total_steps": 7532, "loss": 0.4673181176185608, "lr": 1.6021220159151195e-05, "epoch": 0.0804674014075156, "percentage": 4.02, "elapsed_time": "1:05:37", "remaining_time": "1 day, 2:05:41"} +{"current_steps": 304, "total_steps": 7532, "loss": 0.45361828804016113, "lr": 1.607427055702918e-05, "epoch": 0.08073297038905856, "percentage": 4.04, "elapsed_time": "1:05:50", "remaining_time": "1 day, 2:05:30"} +{"current_steps": 305, "total_steps": 7532, "loss": 0.5144034624099731, "lr": 1.6127320954907166e-05, "epoch": 0.08099853937060152, "percentage": 4.05, "elapsed_time": "1:06:04", "remaining_time": "1 day, 2:05:35"} +{"current_steps": 306, "total_steps": 7532, "loss": 0.5027451515197754, "lr": 1.6180371352785146e-05, "epoch": 0.08126410835214447, "percentage": 4.06, "elapsed_time": "1:06:17", "remaining_time": "1 day, 2:05:23"} +{"current_steps": 307, "total_steps": 7532, "loss": 0.4987551271915436, "lr": 1.6233421750663133e-05, "epoch": 0.08152967733368742, "percentage": 4.08, "elapsed_time": "1:06:30", "remaining_time": "1 day, 2:05:18"} +{"current_steps": 308, "total_steps": 7532, "loss": 0.5433062314987183, "lr": 1.6286472148541116e-05, "epoch": 0.08179524631523039, "percentage": 4.09, "elapsed_time": "1:06:43", "remaining_time": "1 day, 2:05:08"} +{"current_steps": 309, "total_steps": 7532, "loss": 0.49603772163391113, "lr": 1.63395225464191e-05, "epoch": 0.08206081529677334, "percentage": 4.1, "elapsed_time": "1:06:57", "remaining_time": "1 day, 2:05:03"} +{"current_steps": 310, "total_steps": 7532, "loss": 0.47990959882736206, "lr": 1.6392572944297083e-05, "epoch": 0.08232638427831629, "percentage": 4.12, "elapsed_time": "1:07:10", "remaining_time": "1 day, 2:04:46"} +{"current_steps": 311, "total_steps": 7532, "loss": 0.5196831226348877, "lr": 1.6445623342175066e-05, "epoch": 0.08259195325985925, "percentage": 4.13, "elapsed_time": "1:07:23", "remaining_time": "1 day, 2:04:44"} +{"current_steps": 312, "total_steps": 7532, "loss": 0.4664091467857361, "lr": 1.6498673740053053e-05, "epoch": 0.0828575222414022, "percentage": 4.14, "elapsed_time": "1:07:36", "remaining_time": "1 day, 2:04:30"} +{"current_steps": 313, "total_steps": 7532, "loss": 0.4405553936958313, "lr": 1.6551724137931037e-05, "epoch": 0.08312309122294516, "percentage": 4.16, "elapsed_time": "1:07:49", "remaining_time": "1 day, 2:04:19"} +{"current_steps": 314, "total_steps": 7532, "loss": 0.46172815561294556, "lr": 1.660477453580902e-05, "epoch": 0.08338866020448811, "percentage": 4.17, "elapsed_time": "1:08:03", "remaining_time": "1 day, 2:04:19"} +{"current_steps": 315, "total_steps": 7532, "loss": 0.5004327297210693, "lr": 1.6657824933687004e-05, "epoch": 0.08365422918603108, "percentage": 4.18, "elapsed_time": "1:08:16", "remaining_time": "1 day, 2:04:06"} +{"current_steps": 316, "total_steps": 7532, "loss": 0.4727814197540283, "lr": 1.6710875331564987e-05, "epoch": 0.08391979816757403, "percentage": 4.2, "elapsed_time": "1:08:29", "remaining_time": "1 day, 2:04:12"} +{"current_steps": 317, "total_steps": 7532, "loss": 0.43602120876312256, "lr": 1.676392572944297e-05, "epoch": 0.08418536714911698, "percentage": 4.21, "elapsed_time": "1:08:43", "remaining_time": "1 day, 2:04:01"} +{"current_steps": 318, "total_steps": 7532, "loss": 0.5110410451889038, "lr": 1.6816976127320957e-05, "epoch": 0.08445093613065995, "percentage": 4.22, "elapsed_time": "1:08:56", "remaining_time": "1 day, 2:03:49"} +{"current_steps": 319, "total_steps": 7532, "loss": 0.4798283278942108, "lr": 1.687002652519894e-05, "epoch": 0.0847165051122029, "percentage": 4.24, "elapsed_time": "1:09:08", "remaining_time": "1 day, 2:03:28"} +{"current_steps": 320, "total_steps": 7532, "loss": 0.45690029859542847, "lr": 1.6923076923076924e-05, "epoch": 0.08498207409374585, "percentage": 4.25, "elapsed_time": "1:09:21", "remaining_time": "1 day, 2:03:10"} +{"current_steps": 321, "total_steps": 7532, "loss": 0.4770117998123169, "lr": 1.6976127320954908e-05, "epoch": 0.0852476430752888, "percentage": 4.26, "elapsed_time": "1:09:34", "remaining_time": "1 day, 2:02:47"} +{"current_steps": 322, "total_steps": 7532, "loss": 0.512240469455719, "lr": 1.702917771883289e-05, "epoch": 0.08551321205683177, "percentage": 4.28, "elapsed_time": "1:09:46", "remaining_time": "1 day, 2:02:31"} +{"current_steps": 323, "total_steps": 7532, "loss": 0.4696195423603058, "lr": 1.7082228116710878e-05, "epoch": 0.08577878103837472, "percentage": 4.29, "elapsed_time": "1:09:59", "remaining_time": "1 day, 2:02:01"} +{"current_steps": 324, "total_steps": 7532, "loss": 0.4779578149318695, "lr": 1.713527851458886e-05, "epoch": 0.08604435001991767, "percentage": 4.3, "elapsed_time": "1:10:11", "remaining_time": "1 day, 2:01:41"} +{"current_steps": 325, "total_steps": 7532, "loss": 0.48670440912246704, "lr": 1.7188328912466845e-05, "epoch": 0.08630991900146064, "percentage": 4.31, "elapsed_time": "1:10:24", "remaining_time": "1 day, 2:01:14"} +{"current_steps": 326, "total_steps": 7532, "loss": 0.5285798907279968, "lr": 1.7241379310344828e-05, "epoch": 0.08657548798300359, "percentage": 4.33, "elapsed_time": "1:10:37", "remaining_time": "1 day, 2:01:01"} +{"current_steps": 327, "total_steps": 7532, "loss": 0.46095865964889526, "lr": 1.7294429708222812e-05, "epoch": 0.08684105696454654, "percentage": 4.34, "elapsed_time": "1:10:49", "remaining_time": "1 day, 2:00:36"} +{"current_steps": 328, "total_steps": 7532, "loss": 0.44342565536499023, "lr": 1.73474801061008e-05, "epoch": 0.08710662594608949, "percentage": 4.35, "elapsed_time": "1:11:02", "remaining_time": "1 day, 2:00:16"} +{"current_steps": 329, "total_steps": 7532, "loss": 0.48974257707595825, "lr": 1.7400530503978782e-05, "epoch": 0.08737219492763246, "percentage": 4.37, "elapsed_time": "1:11:15", "remaining_time": "1 day, 1:59:56"} +{"current_steps": 330, "total_steps": 7532, "loss": 0.4763977527618408, "lr": 1.7453580901856765e-05, "epoch": 0.08763776390917541, "percentage": 4.38, "elapsed_time": "1:11:28", "remaining_time": "1 day, 1:59:44"} +{"current_steps": 331, "total_steps": 7532, "loss": 0.5390856266021729, "lr": 1.750663129973475e-05, "epoch": 0.08790333289071836, "percentage": 4.39, "elapsed_time": "1:11:41", "remaining_time": "1 day, 1:59:41"} +{"current_steps": 332, "total_steps": 7532, "loss": 0.4833192825317383, "lr": 1.7559681697612732e-05, "epoch": 0.08816890187226133, "percentage": 4.41, "elapsed_time": "1:11:54", "remaining_time": "1 day, 1:59:27"} +{"current_steps": 333, "total_steps": 7532, "loss": 0.47842955589294434, "lr": 1.7612732095490716e-05, "epoch": 0.08843447085380428, "percentage": 4.42, "elapsed_time": "1:12:08", "remaining_time": "1 day, 1:59:27"} +{"current_steps": 334, "total_steps": 7532, "loss": 0.4543060064315796, "lr": 1.7665782493368703e-05, "epoch": 0.08870003983534723, "percentage": 4.43, "elapsed_time": "1:12:20", "remaining_time": "1 day, 1:59:10"} +{"current_steps": 335, "total_steps": 7532, "loss": 0.4492039978504181, "lr": 1.7718832891246686e-05, "epoch": 0.08896560881689018, "percentage": 4.45, "elapsed_time": "1:12:34", "remaining_time": "1 day, 1:59:02"} +{"current_steps": 336, "total_steps": 7532, "loss": 0.4930066466331482, "lr": 1.777188328912467e-05, "epoch": 0.08923117779843315, "percentage": 4.46, "elapsed_time": "1:12:46", "remaining_time": "1 day, 1:58:40"} +{"current_steps": 337, "total_steps": 7532, "loss": 0.46343356370925903, "lr": 1.7824933687002653e-05, "epoch": 0.0894967467799761, "percentage": 4.47, "elapsed_time": "1:12:59", "remaining_time": "1 day, 1:58:29"} +{"current_steps": 338, "total_steps": 7532, "loss": 0.5118839740753174, "lr": 1.7877984084880636e-05, "epoch": 0.08976231576151905, "percentage": 4.49, "elapsed_time": "1:13:12", "remaining_time": "1 day, 1:58:01"} +{"current_steps": 339, "total_steps": 7532, "loss": 0.4659194350242615, "lr": 1.7931034482758623e-05, "epoch": 0.09002788474306202, "percentage": 4.5, "elapsed_time": "1:13:24", "remaining_time": "1 day, 1:57:35"} +{"current_steps": 340, "total_steps": 7532, "loss": 0.45929303765296936, "lr": 1.7984084880636607e-05, "epoch": 0.09029345372460497, "percentage": 4.51, "elapsed_time": "1:13:36", "remaining_time": "1 day, 1:57:05"} +{"current_steps": 341, "total_steps": 7532, "loss": 0.5072556734085083, "lr": 1.803713527851459e-05, "epoch": 0.09055902270614792, "percentage": 4.53, "elapsed_time": "1:13:49", "remaining_time": "1 day, 1:56:43"} +{"current_steps": 342, "total_steps": 7532, "loss": 0.42370402812957764, "lr": 1.8090185676392577e-05, "epoch": 0.09082459168769087, "percentage": 4.54, "elapsed_time": "1:14:01", "remaining_time": "1 day, 1:56:10"} +{"current_steps": 343, "total_steps": 7532, "loss": 0.5017818212509155, "lr": 1.8143236074270557e-05, "epoch": 0.09109016066923384, "percentage": 4.55, "elapsed_time": "1:14:13", "remaining_time": "1 day, 1:55:42"} +{"current_steps": 344, "total_steps": 7532, "loss": 0.4746384620666504, "lr": 1.8196286472148544e-05, "epoch": 0.09135572965077679, "percentage": 4.57, "elapsed_time": "1:14:25", "remaining_time": "1 day, 1:55:11"} +{"current_steps": 345, "total_steps": 7532, "loss": 0.49020540714263916, "lr": 1.8249336870026527e-05, "epoch": 0.09162129863231974, "percentage": 4.58, "elapsed_time": "1:14:38", "remaining_time": "1 day, 1:54:45"} +{"current_steps": 346, "total_steps": 7532, "loss": 0.4569393992424011, "lr": 1.830238726790451e-05, "epoch": 0.09188686761386271, "percentage": 4.59, "elapsed_time": "1:14:50", "remaining_time": "1 day, 1:54:13"} +{"current_steps": 347, "total_steps": 7532, "loss": 0.46831727027893066, "lr": 1.8355437665782494e-05, "epoch": 0.09215243659540566, "percentage": 4.61, "elapsed_time": "1:15:02", "remaining_time": "1 day, 1:53:42"} +{"current_steps": 348, "total_steps": 7532, "loss": 0.4795265197753906, "lr": 1.8408488063660478e-05, "epoch": 0.09241800557694861, "percentage": 4.62, "elapsed_time": "1:15:14", "remaining_time": "1 day, 1:53:22"} +{"current_steps": 349, "total_steps": 7532, "loss": 0.5122503042221069, "lr": 1.8461538461538465e-05, "epoch": 0.09268357455849156, "percentage": 4.63, "elapsed_time": "1:15:26", "remaining_time": "1 day, 1:52:49"} +{"current_steps": 350, "total_steps": 7532, "loss": 0.4190404713153839, "lr": 1.8514588859416448e-05, "epoch": 0.09294914354003453, "percentage": 4.65, "elapsed_time": "1:15:39", "remaining_time": "1 day, 1:52:28"} +{"current_steps": 351, "total_steps": 7532, "loss": 0.47778886556625366, "lr": 1.856763925729443e-05, "epoch": 0.09321471252157748, "percentage": 4.66, "elapsed_time": "1:15:51", "remaining_time": "1 day, 1:51:55"} +{"current_steps": 352, "total_steps": 7532, "loss": 0.45851507782936096, "lr": 1.8620689655172415e-05, "epoch": 0.09348028150312043, "percentage": 4.67, "elapsed_time": "1:16:04", "remaining_time": "1 day, 1:51:38"} +{"current_steps": 353, "total_steps": 7532, "loss": 0.4888782501220703, "lr": 1.86737400530504e-05, "epoch": 0.09374585048466338, "percentage": 4.69, "elapsed_time": "1:16:16", "remaining_time": "1 day, 1:51:08"} +{"current_steps": 354, "total_steps": 7532, "loss": 0.5032983422279358, "lr": 1.8726790450928382e-05, "epoch": 0.09401141946620635, "percentage": 4.7, "elapsed_time": "1:16:28", "remaining_time": "1 day, 1:50:48"} +{"current_steps": 355, "total_steps": 7532, "loss": 0.4754604697227478, "lr": 1.877984084880637e-05, "epoch": 0.0942769884477493, "percentage": 4.71, "elapsed_time": "1:16:41", "remaining_time": "1 day, 1:50:18"} +{"current_steps": 356, "total_steps": 7532, "loss": 0.488397479057312, "lr": 1.8832891246684352e-05, "epoch": 0.09454255742929225, "percentage": 4.73, "elapsed_time": "1:16:53", "remaining_time": "1 day, 1:50:01"} +{"current_steps": 357, "total_steps": 7532, "loss": 0.4775403141975403, "lr": 1.8885941644562336e-05, "epoch": 0.09480812641083522, "percentage": 4.74, "elapsed_time": "1:17:05", "remaining_time": "1 day, 1:49:30"} +{"current_steps": 358, "total_steps": 7532, "loss": 0.47063153982162476, "lr": 1.893899204244032e-05, "epoch": 0.09507369539237817, "percentage": 4.75, "elapsed_time": "1:17:18", "remaining_time": "1 day, 1:49:08"} +{"current_steps": 359, "total_steps": 7532, "loss": 0.4856908321380615, "lr": 1.8992042440318303e-05, "epoch": 0.09533926437392112, "percentage": 4.77, "elapsed_time": "1:17:30", "remaining_time": "1 day, 1:48:38"} +{"current_steps": 360, "total_steps": 7532, "loss": 0.440033495426178, "lr": 1.904509283819629e-05, "epoch": 0.09560483335546408, "percentage": 4.78, "elapsed_time": "1:17:42", "remaining_time": "1 day, 1:48:11"} +{"current_steps": 361, "total_steps": 7532, "loss": 0.4825770854949951, "lr": 1.9098143236074273e-05, "epoch": 0.09587040233700704, "percentage": 4.79, "elapsed_time": "1:17:55", "remaining_time": "1 day, 1:47:51"} +{"current_steps": 362, "total_steps": 7532, "loss": 0.48192232847213745, "lr": 1.9151193633952256e-05, "epoch": 0.09613597131854999, "percentage": 4.81, "elapsed_time": "1:18:07", "remaining_time": "1 day, 1:47:25"} +{"current_steps": 363, "total_steps": 7532, "loss": 0.4689444899559021, "lr": 1.920424403183024e-05, "epoch": 0.09640154030009294, "percentage": 4.82, "elapsed_time": "1:18:20", "remaining_time": "1 day, 1:47:06"} +{"current_steps": 364, "total_steps": 7532, "loss": 0.47120895981788635, "lr": 1.9257294429708223e-05, "epoch": 0.09666710928163591, "percentage": 4.83, "elapsed_time": "1:18:32", "remaining_time": "1 day, 1:46:42"} +{"current_steps": 365, "total_steps": 7532, "loss": 0.4968941807746887, "lr": 1.931034482758621e-05, "epoch": 0.09693267826317886, "percentage": 4.85, "elapsed_time": "1:18:45", "remaining_time": "1 day, 1:46:22"} +{"current_steps": 366, "total_steps": 7532, "loss": 0.46982288360595703, "lr": 1.9363395225464193e-05, "epoch": 0.09719824724472181, "percentage": 4.86, "elapsed_time": "1:18:57", "remaining_time": "1 day, 1:46:01"} +{"current_steps": 367, "total_steps": 7532, "loss": 0.4541531205177307, "lr": 1.9416445623342177e-05, "epoch": 0.09746381622626477, "percentage": 4.87, "elapsed_time": "1:19:10", "remaining_time": "1 day, 1:45:50"} +{"current_steps": 368, "total_steps": 7532, "loss": 0.45576703548431396, "lr": 1.946949602122016e-05, "epoch": 0.09772938520780773, "percentage": 4.89, "elapsed_time": "1:19:23", "remaining_time": "1 day, 1:45:31"} +{"current_steps": 369, "total_steps": 7532, "loss": 0.48060357570648193, "lr": 1.9522546419098144e-05, "epoch": 0.09799495418935068, "percentage": 4.9, "elapsed_time": "1:19:36", "remaining_time": "1 day, 1:45:23"} +{"current_steps": 370, "total_steps": 7532, "loss": 0.47536781430244446, "lr": 1.9575596816976127e-05, "epoch": 0.09826052317089363, "percentage": 4.91, "elapsed_time": "1:19:49", "remaining_time": "1 day, 1:45:03"} +{"current_steps": 371, "total_steps": 7532, "loss": 0.46463894844055176, "lr": 1.9628647214854114e-05, "epoch": 0.0985260921524366, "percentage": 4.93, "elapsed_time": "1:20:02", "remaining_time": "1 day, 1:44:53"} +{"current_steps": 372, "total_steps": 7532, "loss": 0.49570178985595703, "lr": 1.9681697612732098e-05, "epoch": 0.09879166113397955, "percentage": 4.94, "elapsed_time": "1:20:15", "remaining_time": "1 day, 1:44:36"} +{"current_steps": 373, "total_steps": 7532, "loss": 0.4764043390750885, "lr": 1.973474801061008e-05, "epoch": 0.0990572301155225, "percentage": 4.95, "elapsed_time": "1:20:28", "remaining_time": "1 day, 1:44:25"} +{"current_steps": 374, "total_steps": 7532, "loss": 0.43582671880722046, "lr": 1.9787798408488064e-05, "epoch": 0.09932279909706546, "percentage": 4.97, "elapsed_time": "1:20:40", "remaining_time": "1 day, 1:43:58"} +{"current_steps": 375, "total_steps": 7532, "loss": 0.46077725291252136, "lr": 1.9840848806366048e-05, "epoch": 0.09958836807860842, "percentage": 4.98, "elapsed_time": "1:20:52", "remaining_time": "1 day, 1:43:39"} +{"current_steps": 376, "total_steps": 7532, "loss": 0.4794929027557373, "lr": 1.9893899204244035e-05, "epoch": 0.09985393706015137, "percentage": 4.99, "elapsed_time": "1:21:04", "remaining_time": "1 day, 1:43:06"} +{"current_steps": 377, "total_steps": 7532, "loss": 0.43174588680267334, "lr": 1.9946949602122018e-05, "epoch": 0.10011950604169433, "percentage": 5.01, "elapsed_time": "1:21:17", "remaining_time": "1 day, 1:42:46"} +{"current_steps": 378, "total_steps": 7532, "loss": 0.44885915517807007, "lr": 2e-05, "epoch": 0.10038507502323729, "percentage": 5.02, "elapsed_time": "1:21:29", "remaining_time": "1 day, 1:42:20"} +{"current_steps": 379, "total_steps": 7532, "loss": 0.520150899887085, "lr": 1.9999999036058974e-05, "epoch": 0.10065064400478024, "percentage": 5.03, "elapsed_time": "1:21:41", "remaining_time": "1 day, 1:41:52"} +{"current_steps": 380, "total_steps": 7532, "loss": 0.5139277577400208, "lr": 1.9999996144236068e-05, "epoch": 0.1009162129863232, "percentage": 5.05, "elapsed_time": "1:21:54", "remaining_time": "1 day, 1:41:33"} +{"current_steps": 381, "total_steps": 7532, "loss": 0.48935171961784363, "lr": 1.999999132453184e-05, "epoch": 0.10118178196786615, "percentage": 5.06, "elapsed_time": "1:22:06", "remaining_time": "1 day, 1:41:05"} +{"current_steps": 382, "total_steps": 7532, "loss": 0.4805561304092407, "lr": 1.999998457694723e-05, "epoch": 0.10144735094940911, "percentage": 5.07, "elapsed_time": "1:22:19", "remaining_time": "1 day, 1:40:52"} +{"current_steps": 383, "total_steps": 7532, "loss": 0.4340912997722626, "lr": 1.9999975901483532e-05, "epoch": 0.10171291993095206, "percentage": 5.08, "elapsed_time": "1:22:31", "remaining_time": "1 day, 1:40:31"} +{"current_steps": 384, "total_steps": 7532, "loss": 0.48282474279403687, "lr": 1.999996529814242e-05, "epoch": 0.10197848891249502, "percentage": 5.1, "elapsed_time": "1:22:44", "remaining_time": "1 day, 1:40:17"} +{"current_steps": 385, "total_steps": 7532, "loss": 0.4653206169605255, "lr": 1.999995276692593e-05, "epoch": 0.10224405789403798, "percentage": 5.11, "elapsed_time": "1:22:57", "remaining_time": "1 day, 1:39:54"} +{"current_steps": 386, "total_steps": 7532, "loss": 0.48501014709472656, "lr": 1.999993830783649e-05, "epoch": 0.10250962687558093, "percentage": 5.12, "elapsed_time": "1:23:09", "remaining_time": "1 day, 1:39:37"} +{"current_steps": 387, "total_steps": 7532, "loss": 0.48260143399238586, "lr": 1.9999921920876882e-05, "epoch": 0.10277519585712389, "percentage": 5.14, "elapsed_time": "1:23:22", "remaining_time": "1 day, 1:39:17"} +{"current_steps": 388, "total_steps": 7532, "loss": 0.44557270407676697, "lr": 1.9999903606050267e-05, "epoch": 0.10304076483866684, "percentage": 5.15, "elapsed_time": "1:23:34", "remaining_time": "1 day, 1:38:57"} +{"current_steps": 389, "total_steps": 7532, "loss": 0.4843652546405792, "lr": 1.9999883363360175e-05, "epoch": 0.1033063338202098, "percentage": 5.16, "elapsed_time": "1:23:47", "remaining_time": "1 day, 1:38:29"} +{"current_steps": 390, "total_steps": 7532, "loss": 0.4536727964878082, "lr": 1.9999861192810508e-05, "epoch": 0.10357190280175275, "percentage": 5.18, "elapsed_time": "1:23:59", "remaining_time": "1 day, 1:38:02"} +{"current_steps": 391, "total_steps": 7532, "loss": 0.49557366967201233, "lr": 1.9999837094405538e-05, "epoch": 0.1038374717832957, "percentage": 5.19, "elapsed_time": "1:24:11", "remaining_time": "1 day, 1:37:45"} +{"current_steps": 392, "total_steps": 7532, "loss": 0.45077240467071533, "lr": 1.9999811068149917e-05, "epoch": 0.10410304076483867, "percentage": 5.2, "elapsed_time": "1:24:24", "remaining_time": "1 day, 1:37:31"} +{"current_steps": 393, "total_steps": 7532, "loss": 0.4554041624069214, "lr": 1.9999783114048658e-05, "epoch": 0.10436860974638162, "percentage": 5.22, "elapsed_time": "1:24:37", "remaining_time": "1 day, 1:37:10"} +{"current_steps": 394, "total_steps": 7532, "loss": 0.43526744842529297, "lr": 1.999975323210715e-05, "epoch": 0.10463417872792458, "percentage": 5.23, "elapsed_time": "1:24:49", "remaining_time": "1 day, 1:36:50"} +{"current_steps": 395, "total_steps": 7532, "loss": 0.4097936749458313, "lr": 1.9999721422331154e-05, "epoch": 0.10489974770946753, "percentage": 5.24, "elapsed_time": "1:25:02", "remaining_time": "1 day, 1:36:32"} +{"current_steps": 396, "total_steps": 7532, "loss": 0.4740130305290222, "lr": 1.9999687684726803e-05, "epoch": 0.1051653166910105, "percentage": 5.26, "elapsed_time": "1:25:14", "remaining_time": "1 day, 1:36:04"} +{"current_steps": 397, "total_steps": 7532, "loss": 0.43374374508857727, "lr": 1.9999652019300604e-05, "epoch": 0.10543088567255345, "percentage": 5.27, "elapsed_time": "1:25:26", "remaining_time": "1 day, 1:35:43"} +{"current_steps": 398, "total_steps": 7532, "loss": 0.4423784911632538, "lr": 1.999961442605943e-05, "epoch": 0.1056964546540964, "percentage": 5.28, "elapsed_time": "1:25:39", "remaining_time": "1 day, 1:35:21"} +{"current_steps": 399, "total_steps": 7532, "loss": 0.4660544693470001, "lr": 1.999957490501053e-05, "epoch": 0.10596202363563936, "percentage": 5.3, "elapsed_time": "1:25:52", "remaining_time": "1 day, 1:35:06"} +{"current_steps": 400, "total_steps": 7532, "loss": 0.4579896628856659, "lr": 1.999953345616152e-05, "epoch": 0.10622759261718231, "percentage": 5.31, "elapsed_time": "1:26:04", "remaining_time": "1 day, 1:34:44"} +{"current_steps": 401, "total_steps": 7532, "loss": 0.4634096920490265, "lr": 1.9999490079520395e-05, "epoch": 0.10649316159872527, "percentage": 5.32, "elapsed_time": "1:26:22", "remaining_time": "1 day, 1:36:05"} +{"current_steps": 402, "total_steps": 7532, "loss": 0.45374077558517456, "lr": 1.9999444775095517e-05, "epoch": 0.10675873058026822, "percentage": 5.34, "elapsed_time": "1:26:36", "remaining_time": "1 day, 1:36:05"} +{"current_steps": 403, "total_steps": 7532, "loss": 0.49752670526504517, "lr": 1.9999397542895615e-05, "epoch": 0.10702429956181118, "percentage": 5.35, "elapsed_time": "1:26:49", "remaining_time": "1 day, 1:35:48"} +{"current_steps": 404, "total_steps": 7532, "loss": 0.4539335370063782, "lr": 1.99993483829298e-05, "epoch": 0.10728986854335414, "percentage": 5.36, "elapsed_time": "1:27:02", "remaining_time": "1 day, 1:35:42"} +{"current_steps": 405, "total_steps": 7532, "loss": 0.4665772616863251, "lr": 1.999929729520755e-05, "epoch": 0.10755543752489709, "percentage": 5.38, "elapsed_time": "1:27:15", "remaining_time": "1 day, 1:35:30"} +{"current_steps": 406, "total_steps": 7532, "loss": 0.4850832223892212, "lr": 1.9999244279738713e-05, "epoch": 0.10782100650644005, "percentage": 5.39, "elapsed_time": "1:27:29", "remaining_time": "1 day, 1:35:32"} +{"current_steps": 407, "total_steps": 7532, "loss": 0.43974876403808594, "lr": 1.9999189336533508e-05, "epoch": 0.108086575487983, "percentage": 5.4, "elapsed_time": "1:27:42", "remaining_time": "1 day, 1:35:19"} +{"current_steps": 408, "total_steps": 7532, "loss": 0.46823856234550476, "lr": 1.9999132465602526e-05, "epoch": 0.10835214446952596, "percentage": 5.42, "elapsed_time": "1:27:55", "remaining_time": "1 day, 1:35:16"} +{"current_steps": 409, "total_steps": 7532, "loss": 0.49704545736312866, "lr": 1.9999073666956734e-05, "epoch": 0.10861771345106891, "percentage": 5.43, "elapsed_time": "1:28:08", "remaining_time": "1 day, 1:35:02"} +{"current_steps": 410, "total_steps": 7532, "loss": 0.3863454759120941, "lr": 1.999901294060747e-05, "epoch": 0.10888328243261187, "percentage": 5.44, "elapsed_time": "1:28:22", "remaining_time": "1 day, 1:35:04"} +{"current_steps": 411, "total_steps": 7532, "loss": 0.4903780221939087, "lr": 1.9998950286566438e-05, "epoch": 0.10914885141415483, "percentage": 5.46, "elapsed_time": "1:28:35", "remaining_time": "1 day, 1:34:51"} +{"current_steps": 412, "total_steps": 7532, "loss": 0.4312375485897064, "lr": 1.9998885704845716e-05, "epoch": 0.10941442039569778, "percentage": 5.47, "elapsed_time": "1:28:48", "remaining_time": "1 day, 1:34:46"} +{"current_steps": 413, "total_steps": 7532, "loss": 0.4350954294204712, "lr": 1.9998819195457756e-05, "epoch": 0.10967998937724074, "percentage": 5.48, "elapsed_time": "1:29:01", "remaining_time": "1 day, 1:34:30"} +{"current_steps": 414, "total_steps": 7532, "loss": 0.4364873766899109, "lr": 1.999875075841538e-05, "epoch": 0.1099455583587837, "percentage": 5.5, "elapsed_time": "1:29:15", "remaining_time": "1 day, 1:34:32"} +{"current_steps": 415, "total_steps": 7532, "loss": 0.42079728841781616, "lr": 1.999868039373178e-05, "epoch": 0.11021112734032665, "percentage": 5.51, "elapsed_time": "1:29:28", "remaining_time": "1 day, 1:34:26"} +{"current_steps": 416, "total_steps": 7532, "loss": 0.4396737515926361, "lr": 1.9998608101420527e-05, "epoch": 0.1104766963218696, "percentage": 5.52, "elapsed_time": "1:29:42", "remaining_time": "1 day, 1:34:26"} +{"current_steps": 417, "total_steps": 7532, "loss": 0.44765806198120117, "lr": 1.9998533881495552e-05, "epoch": 0.11074226530341257, "percentage": 5.54, "elapsed_time": "1:29:55", "remaining_time": "1 day, 1:34:17"} +{"current_steps": 418, "total_steps": 7532, "loss": 0.46199291944503784, "lr": 1.999845773397117e-05, "epoch": 0.11100783428495552, "percentage": 5.55, "elapsed_time": "1:30:08", "remaining_time": "1 day, 1:34:15"} +{"current_steps": 419, "total_steps": 7532, "loss": 0.44561129808425903, "lr": 1.9998379658862058e-05, "epoch": 0.11127340326649847, "percentage": 5.56, "elapsed_time": "1:30:22", "remaining_time": "1 day, 1:34:06"} +{"current_steps": 420, "total_steps": 7532, "loss": 0.46025681495666504, "lr": 1.9998299656183263e-05, "epoch": 0.11153897224804143, "percentage": 5.58, "elapsed_time": "1:30:35", "remaining_time": "1 day, 1:33:53"} +{"current_steps": 421, "total_steps": 7532, "loss": 0.4408613443374634, "lr": 1.999821772595022e-05, "epoch": 0.11180454122958439, "percentage": 5.59, "elapsed_time": "1:30:48", "remaining_time": "1 day, 1:33:49"} +{"current_steps": 422, "total_steps": 7532, "loss": 0.4846842586994171, "lr": 1.999813386817871e-05, "epoch": 0.11207011021112734, "percentage": 5.6, "elapsed_time": "1:31:01", "remaining_time": "1 day, 1:33:32"} +{"current_steps": 423, "total_steps": 7532, "loss": 0.44503283500671387, "lr": 1.999804808288491e-05, "epoch": 0.11233567919267029, "percentage": 5.62, "elapsed_time": "1:31:14", "remaining_time": "1 day, 1:33:19"} +{"current_steps": 424, "total_steps": 7532, "loss": 0.4090060293674469, "lr": 1.9997960370085355e-05, "epoch": 0.11260124817421326, "percentage": 5.63, "elapsed_time": "1:31:27", "remaining_time": "1 day, 1:33:08"} +{"current_steps": 425, "total_steps": 7532, "loss": 0.43246471881866455, "lr": 1.999787072979696e-05, "epoch": 0.11286681715575621, "percentage": 5.64, "elapsed_time": "1:31:40", "remaining_time": "1 day, 1:33:05"} +{"current_steps": 426, "total_steps": 7532, "loss": 0.46283262968063354, "lr": 1.9997779162036996e-05, "epoch": 0.11313238613729916, "percentage": 5.66, "elapsed_time": "1:31:53", "remaining_time": "1 day, 1:32:50"} +{"current_steps": 427, "total_steps": 7532, "loss": 0.3866165578365326, "lr": 1.999768566682313e-05, "epoch": 0.11339795511884213, "percentage": 5.67, "elapsed_time": "1:32:06", "remaining_time": "1 day, 1:32:41"} +{"current_steps": 428, "total_steps": 7532, "loss": 0.4501144289970398, "lr": 1.9997590244173374e-05, "epoch": 0.11366352410038508, "percentage": 5.68, "elapsed_time": "1:32:19", "remaining_time": "1 day, 1:32:30"} +{"current_steps": 429, "total_steps": 7532, "loss": 0.43005290627479553, "lr": 1.9997492894106127e-05, "epoch": 0.11392909308192803, "percentage": 5.7, "elapsed_time": "1:32:33", "remaining_time": "1 day, 1:32:25"} +{"current_steps": 430, "total_steps": 7532, "loss": 0.4427964985370636, "lr": 1.9997393616640165e-05, "epoch": 0.11419466206347098, "percentage": 5.71, "elapsed_time": "1:32:46", "remaining_time": "1 day, 1:32:15"} +{"current_steps": 431, "total_steps": 7532, "loss": 0.4690951108932495, "lr": 1.999729241179462e-05, "epoch": 0.11446023104501395, "percentage": 5.72, "elapsed_time": "1:32:59", "remaining_time": "1 day, 1:31:58"} +{"current_steps": 432, "total_steps": 7532, "loss": 0.456949919462204, "lr": 1.9997189279589003e-05, "epoch": 0.1147258000265569, "percentage": 5.74, "elapsed_time": "1:33:12", "remaining_time": "1 day, 1:31:47"} +{"current_steps": 433, "total_steps": 7532, "loss": 0.456052303314209, "lr": 1.99970842200432e-05, "epoch": 0.11499136900809985, "percentage": 5.75, "elapsed_time": "1:33:24", "remaining_time": "1 day, 1:31:28"} +{"current_steps": 434, "total_steps": 7532, "loss": 0.43220120668411255, "lr": 1.9996977233177466e-05, "epoch": 0.11525693798964282, "percentage": 5.76, "elapsed_time": "1:33:37", "remaining_time": "1 day, 1:31:15"} +{"current_steps": 435, "total_steps": 7532, "loss": 0.4237494170665741, "lr": 1.9996868319012422e-05, "epoch": 0.11552250697118577, "percentage": 5.78, "elapsed_time": "1:33:50", "remaining_time": "1 day, 1:30:54"} +{"current_steps": 436, "total_steps": 7532, "loss": 0.4713878631591797, "lr": 1.9996757477569072e-05, "epoch": 0.11578807595272872, "percentage": 5.79, "elapsed_time": "1:34:02", "remaining_time": "1 day, 1:30:37"} +{"current_steps": 437, "total_steps": 7532, "loss": 0.4561111330986023, "lr": 1.9996644708868776e-05, "epoch": 0.11605364493427167, "percentage": 5.8, "elapsed_time": "1:34:15", "remaining_time": "1 day, 1:30:19"} +{"current_steps": 438, "total_steps": 7532, "loss": 0.468253493309021, "lr": 1.9996530012933285e-05, "epoch": 0.11631921391581464, "percentage": 5.82, "elapsed_time": "1:34:28", "remaining_time": "1 day, 1:30:03"} +{"current_steps": 439, "total_steps": 7532, "loss": 0.4815019369125366, "lr": 1.9996413389784704e-05, "epoch": 0.11658478289735759, "percentage": 5.83, "elapsed_time": "1:34:40", "remaining_time": "1 day, 1:29:42"} +{"current_steps": 440, "total_steps": 7532, "loss": 0.4235987663269043, "lr": 1.9996294839445518e-05, "epoch": 0.11685035187890054, "percentage": 5.84, "elapsed_time": "1:34:53", "remaining_time": "1 day, 1:29:24"} +{"current_steps": 441, "total_steps": 7532, "loss": 0.40562817454338074, "lr": 1.999617436193858e-05, "epoch": 0.1171159208604435, "percentage": 5.86, "elapsed_time": "1:35:05", "remaining_time": "1 day, 1:29:01"} +{"current_steps": 442, "total_steps": 7532, "loss": 0.424539715051651, "lr": 1.999605195728712e-05, "epoch": 0.11738148984198646, "percentage": 5.87, "elapsed_time": "1:35:18", "remaining_time": "1 day, 1:28:47"} +{"current_steps": 443, "total_steps": 7532, "loss": 0.43677473068237305, "lr": 1.9995927625514736e-05, "epoch": 0.11764705882352941, "percentage": 5.88, "elapsed_time": "1:35:31", "remaining_time": "1 day, 1:28:40"} +{"current_steps": 444, "total_steps": 7532, "loss": 0.47325971722602844, "lr": 1.9995801366645396e-05, "epoch": 0.11791262780507236, "percentage": 5.89, "elapsed_time": "1:35:44", "remaining_time": "1 day, 1:28:25"} +{"current_steps": 445, "total_steps": 7532, "loss": 0.4206562638282776, "lr": 1.9995673180703443e-05, "epoch": 0.11817819678661533, "percentage": 5.91, "elapsed_time": "1:35:57", "remaining_time": "1 day, 1:28:17"} +{"current_steps": 446, "total_steps": 7532, "loss": 0.4492834210395813, "lr": 1.999554306771359e-05, "epoch": 0.11844376576815828, "percentage": 5.92, "elapsed_time": "1:36:10", "remaining_time": "1 day, 1:28:03"} +{"current_steps": 447, "total_steps": 7532, "loss": 0.4445284605026245, "lr": 1.9995411027700917e-05, "epoch": 0.11870933474970123, "percentage": 5.93, "elapsed_time": "1:36:23", "remaining_time": "1 day, 1:27:55"} +{"current_steps": 448, "total_steps": 7532, "loss": 0.4038352370262146, "lr": 1.9995277060690885e-05, "epoch": 0.1189749037312442, "percentage": 5.95, "elapsed_time": "1:36:36", "remaining_time": "1 day, 1:27:40"} +{"current_steps": 449, "total_steps": 7532, "loss": 0.4261324405670166, "lr": 1.9995141166709318e-05, "epoch": 0.11924047271278715, "percentage": 5.96, "elapsed_time": "1:36:50", "remaining_time": "1 day, 1:27:34"} +{"current_steps": 450, "total_steps": 7532, "loss": 0.44187062978744507, "lr": 1.9995003345782416e-05, "epoch": 0.1195060416943301, "percentage": 5.97, "elapsed_time": "1:37:02", "remaining_time": "1 day, 1:27:18"} +{"current_steps": 451, "total_steps": 7532, "loss": 0.44672587513923645, "lr": 1.9994863597936752e-05, "epoch": 0.11977161067587305, "percentage": 5.99, "elapsed_time": "1:37:16", "remaining_time": "1 day, 1:27:14"} +{"current_steps": 452, "total_steps": 7532, "loss": 0.44322314858436584, "lr": 1.999472192319926e-05, "epoch": 0.12003717965741602, "percentage": 6.0, "elapsed_time": "1:37:29", "remaining_time": "1 day, 1:27:03"} +{"current_steps": 453, "total_steps": 7532, "loss": 0.4396611154079437, "lr": 1.9994578321597258e-05, "epoch": 0.12030274863895897, "percentage": 6.01, "elapsed_time": "1:37:42", "remaining_time": "1 day, 1:26:58"} +{"current_steps": 454, "total_steps": 7532, "loss": 0.4487733542919159, "lr": 1.9994432793158433e-05, "epoch": 0.12056831762050192, "percentage": 6.03, "elapsed_time": "1:37:55", "remaining_time": "1 day, 1:26:47"} +{"current_steps": 455, "total_steps": 7532, "loss": 0.3969653248786926, "lr": 1.999428533791084e-05, "epoch": 0.12083388660204489, "percentage": 6.04, "elapsed_time": "1:38:09", "remaining_time": "1 day, 1:26:47"} +{"current_steps": 456, "total_steps": 7532, "loss": 0.39312344789505005, "lr": 1.9994135955882906e-05, "epoch": 0.12109945558358784, "percentage": 6.05, "elapsed_time": "1:38:22", "remaining_time": "1 day, 1:26:36"} +{"current_steps": 457, "total_steps": 7532, "loss": 0.3979804217815399, "lr": 1.9993984647103425e-05, "epoch": 0.12136502456513079, "percentage": 6.07, "elapsed_time": "1:38:36", "remaining_time": "1 day, 1:26:35"} +{"current_steps": 458, "total_steps": 7532, "loss": 0.4430229365825653, "lr": 1.9993831411601573e-05, "epoch": 0.12163059354667374, "percentage": 6.08, "elapsed_time": "1:38:49", "remaining_time": "1 day, 1:26:24"} +{"current_steps": 459, "total_steps": 7532, "loss": 0.4511718451976776, "lr": 1.9993676249406895e-05, "epoch": 0.12189616252821671, "percentage": 6.09, "elapsed_time": "1:39:02", "remaining_time": "1 day, 1:26:11"} +{"current_steps": 460, "total_steps": 7532, "loss": 0.4686455726623535, "lr": 1.9993519160549298e-05, "epoch": 0.12216173150975966, "percentage": 6.11, "elapsed_time": "1:39:15", "remaining_time": "1 day, 1:25:58"} +{"current_steps": 461, "total_steps": 7532, "loss": 0.4501730501651764, "lr": 1.9993360145059073e-05, "epoch": 0.12242730049130261, "percentage": 6.12, "elapsed_time": "1:39:27", "remaining_time": "1 day, 1:25:33"} +{"current_steps": 462, "total_steps": 7532, "loss": 0.40718767046928406, "lr": 1.999319920296687e-05, "epoch": 0.12269286947284558, "percentage": 6.13, "elapsed_time": "1:39:40", "remaining_time": "1 day, 1:25:19"} +{"current_steps": 463, "total_steps": 7532, "loss": 0.47313761711120605, "lr": 1.9993036334303716e-05, "epoch": 0.12295843845438853, "percentage": 6.15, "elapsed_time": "1:39:53", "remaining_time": "1 day, 1:25:07"} +{"current_steps": 464, "total_steps": 7532, "loss": 0.47417378425598145, "lr": 1.9992871539101018e-05, "epoch": 0.12322400743593148, "percentage": 6.16, "elapsed_time": "1:40:06", "remaining_time": "1 day, 1:25:01"} +{"current_steps": 465, "total_steps": 7532, "loss": 0.44206154346466064, "lr": 1.999270481739054e-05, "epoch": 0.12348957641747443, "percentage": 6.17, "elapsed_time": "1:40:19", "remaining_time": "1 day, 1:24:41"} +{"current_steps": 466, "total_steps": 7532, "loss": 0.3800848722457886, "lr": 1.9992536169204427e-05, "epoch": 0.1237551453990174, "percentage": 6.19, "elapsed_time": "1:40:32", "remaining_time": "1 day, 1:24:36"} +{"current_steps": 467, "total_steps": 7532, "loss": 0.40339407324790955, "lr": 1.9992365594575194e-05, "epoch": 0.12402071438056035, "percentage": 6.2, "elapsed_time": "1:40:45", "remaining_time": "1 day, 1:24:18"} +{"current_steps": 468, "total_steps": 7532, "loss": 0.45280492305755615, "lr": 1.999219309353572e-05, "epoch": 0.1242862833621033, "percentage": 6.21, "elapsed_time": "1:40:58", "remaining_time": "1 day, 1:24:10"} +{"current_steps": 469, "total_steps": 7532, "loss": 0.4600910544395447, "lr": 1.9992018666119266e-05, "epoch": 0.12455185234364627, "percentage": 6.23, "elapsed_time": "1:41:11", "remaining_time": "1 day, 1:23:49"} +{"current_steps": 470, "total_steps": 7532, "loss": 0.4475003480911255, "lr": 1.9991842312359458e-05, "epoch": 0.12481742132518922, "percentage": 6.24, "elapsed_time": "1:41:24", "remaining_time": "1 day, 1:23:42"} +{"current_steps": 471, "total_steps": 7532, "loss": 0.45377033948898315, "lr": 1.9991664032290297e-05, "epoch": 0.12508299030673217, "percentage": 6.25, "elapsed_time": "1:41:37", "remaining_time": "1 day, 1:23:27"} +{"current_steps": 472, "total_steps": 7532, "loss": 0.4397522509098053, "lr": 1.9991483825946147e-05, "epoch": 0.12534855928827512, "percentage": 6.27, "elapsed_time": "1:41:50", "remaining_time": "1 day, 1:23:12"} +{"current_steps": 473, "total_steps": 7532, "loss": 0.4258221387863159, "lr": 1.9991301693361756e-05, "epoch": 0.12561412826981808, "percentage": 6.28, "elapsed_time": "1:42:03", "remaining_time": "1 day, 1:23:04"} +{"current_steps": 474, "total_steps": 7532, "loss": 0.40272068977355957, "lr": 1.9991117634572234e-05, "epoch": 0.12587969725136103, "percentage": 6.29, "elapsed_time": "1:42:15", "remaining_time": "1 day, 1:22:45"} +{"current_steps": 475, "total_steps": 7532, "loss": 0.3721206784248352, "lr": 1.9990931649613067e-05, "epoch": 0.126145266232904, "percentage": 6.31, "elapsed_time": "1:42:29", "remaining_time": "1 day, 1:22:42"} +{"current_steps": 476, "total_steps": 7532, "loss": 0.4530203938484192, "lr": 1.9990743738520115e-05, "epoch": 0.12641083521444696, "percentage": 6.32, "elapsed_time": "1:42:42", "remaining_time": "1 day, 1:22:28"} +{"current_steps": 477, "total_steps": 7532, "loss": 0.4281614422798157, "lr": 1.999055390132959e-05, "epoch": 0.1266764041959899, "percentage": 6.33, "elapsed_time": "1:42:55", "remaining_time": "1 day, 1:22:23"} +{"current_steps": 478, "total_steps": 7532, "loss": 0.41965895891189575, "lr": 1.999036213807811e-05, "epoch": 0.12694197317753286, "percentage": 6.35, "elapsed_time": "1:43:08", "remaining_time": "1 day, 1:22:09"} +{"current_steps": 479, "total_steps": 7532, "loss": 0.40055203437805176, "lr": 1.9990168448802633e-05, "epoch": 0.12720754215907581, "percentage": 6.36, "elapsed_time": "1:43:21", "remaining_time": "1 day, 1:21:59"} +{"current_steps": 480, "total_steps": 7532, "loss": 0.4266522526741028, "lr": 1.99899728335405e-05, "epoch": 0.12747311114061877, "percentage": 6.37, "elapsed_time": "1:43:33", "remaining_time": "1 day, 1:21:33"} +{"current_steps": 481, "total_steps": 7532, "loss": 0.42291250824928284, "lr": 1.9989775292329425e-05, "epoch": 0.12773868012216172, "percentage": 6.39, "elapsed_time": "1:43:46", "remaining_time": "1 day, 1:21:15"} +{"current_steps": 482, "total_steps": 7532, "loss": 0.41346436738967896, "lr": 1.9989575825207494e-05, "epoch": 0.1280042491037047, "percentage": 6.4, "elapsed_time": "1:43:59", "remaining_time": "1 day, 1:20:56"} +{"current_steps": 483, "total_steps": 7532, "loss": 0.4092825651168823, "lr": 1.998937443221316e-05, "epoch": 0.12826981808524765, "percentage": 6.41, "elapsed_time": "1:44:12", "remaining_time": "1 day, 1:20:44"} +{"current_steps": 484, "total_steps": 7532, "loss": 0.39763280749320984, "lr": 1.998917111338525e-05, "epoch": 0.1285353870667906, "percentage": 6.43, "elapsed_time": "1:44:24", "remaining_time": "1 day, 1:20:28"} +{"current_steps": 485, "total_steps": 7532, "loss": 0.45523273944854736, "lr": 1.9988965868762956e-05, "epoch": 0.12880095604833355, "percentage": 6.44, "elapsed_time": "1:44:37", "remaining_time": "1 day, 1:20:09"} +{"current_steps": 486, "total_steps": 7532, "loss": 0.40181300044059753, "lr": 1.9988758698385854e-05, "epoch": 0.1290665250298765, "percentage": 6.45, "elapsed_time": "1:44:49", "remaining_time": "1 day, 1:19:46"} +{"current_steps": 487, "total_steps": 7532, "loss": 0.42487743496894836, "lr": 1.9988549602293884e-05, "epoch": 0.12933209401141946, "percentage": 6.47, "elapsed_time": "1:45:02", "remaining_time": "1 day, 1:19:27"} +{"current_steps": 488, "total_steps": 7532, "loss": 0.41672298312187195, "lr": 1.998833858052735e-05, "epoch": 0.1295976629929624, "percentage": 6.48, "elapsed_time": "1:45:14", "remaining_time": "1 day, 1:19:06"} +{"current_steps": 489, "total_steps": 7532, "loss": 0.36750108003616333, "lr": 1.998812563312694e-05, "epoch": 0.1298632319745054, "percentage": 6.49, "elapsed_time": "1:45:27", "remaining_time": "1 day, 1:18:53"} +{"current_steps": 490, "total_steps": 7532, "loss": 0.49290573596954346, "lr": 1.9987910760133712e-05, "epoch": 0.13012880095604834, "percentage": 6.51, "elapsed_time": "1:45:40", "remaining_time": "1 day, 1:18:35"} +{"current_steps": 491, "total_steps": 7532, "loss": 0.460039347410202, "lr": 1.9987693961589084e-05, "epoch": 0.1303943699375913, "percentage": 6.52, "elapsed_time": "1:45:52", "remaining_time": "1 day, 1:18:20"} +{"current_steps": 492, "total_steps": 7532, "loss": 0.4471668303012848, "lr": 1.998747523753485e-05, "epoch": 0.13065993891913424, "percentage": 6.53, "elapsed_time": "1:46:06", "remaining_time": "1 day, 1:18:15"} +{"current_steps": 493, "total_steps": 7532, "loss": 0.395844966173172, "lr": 1.9987254588013184e-05, "epoch": 0.1309255079006772, "percentage": 6.55, "elapsed_time": "1:46:19", "remaining_time": "1 day, 1:18:02"} +{"current_steps": 494, "total_steps": 7532, "loss": 0.4465745985507965, "lr": 1.9987032013066623e-05, "epoch": 0.13119107688222015, "percentage": 6.56, "elapsed_time": "1:46:32", "remaining_time": "1 day, 1:17:54"} +{"current_steps": 495, "total_steps": 7532, "loss": 0.43123912811279297, "lr": 1.9986807512738075e-05, "epoch": 0.1314566458637631, "percentage": 6.57, "elapsed_time": "1:46:45", "remaining_time": "1 day, 1:17:42"} +{"current_steps": 496, "total_steps": 7532, "loss": 0.40066564083099365, "lr": 1.9986581087070824e-05, "epoch": 0.13172221484530608, "percentage": 6.59, "elapsed_time": "1:46:58", "remaining_time": "1 day, 1:17:35"} +{"current_steps": 497, "total_steps": 7532, "loss": 0.38514643907546997, "lr": 1.9986352736108515e-05, "epoch": 0.13198778382684903, "percentage": 6.6, "elapsed_time": "1:47:11", "remaining_time": "1 day, 1:17:20"} +{"current_steps": 498, "total_steps": 7532, "loss": 0.37397241592407227, "lr": 1.9986122459895182e-05, "epoch": 0.13225335280839198, "percentage": 6.61, "elapsed_time": "1:47:25", "remaining_time": "1 day, 1:17:14"} +{"current_steps": 499, "total_steps": 7532, "loss": 0.44865745306015015, "lr": 1.9985890258475215e-05, "epoch": 0.13251892178993493, "percentage": 6.63, "elapsed_time": "1:47:38", "remaining_time": "1 day, 1:17:00"} +{"current_steps": 500, "total_steps": 7532, "loss": 0.4161406457424164, "lr": 1.9985656131893374e-05, "epoch": 0.1327844907714779, "percentage": 6.64, "elapsed_time": "1:47:51", "remaining_time": "1 day, 1:16:55"} +{"current_steps": 501, "total_steps": 7532, "loss": 0.41364359855651855, "lr": 1.9985420080194804e-05, "epoch": 0.13305005975302084, "percentage": 6.65, "elapsed_time": "1:48:10", "remaining_time": "1 day, 1:18:03"} +{"current_steps": 502, "total_steps": 7532, "loss": 0.38466009497642517, "lr": 1.9985182103425007e-05, "epoch": 0.1333156287345638, "percentage": 6.66, "elapsed_time": "1:48:23", "remaining_time": "1 day, 1:17:49"} +{"current_steps": 503, "total_steps": 7532, "loss": 0.4189472794532776, "lr": 1.9984942201629868e-05, "epoch": 0.13358119771610677, "percentage": 6.68, "elapsed_time": "1:48:36", "remaining_time": "1 day, 1:17:45"} +{"current_steps": 504, "total_steps": 7532, "loss": 0.4088754653930664, "lr": 1.998470037485563e-05, "epoch": 0.13384676669764972, "percentage": 6.69, "elapsed_time": "1:48:49", "remaining_time": "1 day, 1:17:23"} +{"current_steps": 505, "total_steps": 7532, "loss": 0.4197084307670593, "lr": 1.9984456623148923e-05, "epoch": 0.13411233567919267, "percentage": 6.7, "elapsed_time": "1:49:02", "remaining_time": "1 day, 1:17:11"} +{"current_steps": 506, "total_steps": 7532, "loss": 0.4318644404411316, "lr": 1.998421094655673e-05, "epoch": 0.13437790466073563, "percentage": 6.72, "elapsed_time": "1:49:14", "remaining_time": "1 day, 1:16:46"} +{"current_steps": 507, "total_steps": 7532, "loss": 0.38180238008499146, "lr": 1.9983963345126423e-05, "epoch": 0.13464347364227858, "percentage": 6.73, "elapsed_time": "1:49:26", "remaining_time": "1 day, 1:16:29"} +{"current_steps": 508, "total_steps": 7532, "loss": 0.38704103231430054, "lr": 1.9983713818905733e-05, "epoch": 0.13490904262382153, "percentage": 6.74, "elapsed_time": "1:49:38", "remaining_time": "1 day, 1:16:05"} +{"current_steps": 509, "total_steps": 7532, "loss": 0.4206693768501282, "lr": 1.998346236794276e-05, "epoch": 0.13517461160536448, "percentage": 6.76, "elapsed_time": "1:49:51", "remaining_time": "1 day, 1:15:49"} +{"current_steps": 510, "total_steps": 7532, "loss": 0.42818987369537354, "lr": 1.9983208992285993e-05, "epoch": 0.13544018058690746, "percentage": 6.77, "elapsed_time": "1:50:04", "remaining_time": "1 day, 1:15:31"} +{"current_steps": 511, "total_steps": 7532, "loss": 0.44592660665512085, "lr": 1.9982953691984274e-05, "epoch": 0.1357057495684504, "percentage": 6.78, "elapsed_time": "1:50:16", "remaining_time": "1 day, 1:15:15"} +{"current_steps": 512, "total_steps": 7532, "loss": 0.4272580146789551, "lr": 1.9982696467086815e-05, "epoch": 0.13597131854999336, "percentage": 6.8, "elapsed_time": "1:50:29", "remaining_time": "1 day, 1:14:52"} +{"current_steps": 513, "total_steps": 7532, "loss": 0.4416295289993286, "lr": 1.9982437317643218e-05, "epoch": 0.13623688753153632, "percentage": 6.81, "elapsed_time": "1:50:41", "remaining_time": "1 day, 1:14:30"} +{"current_steps": 514, "total_steps": 7532, "loss": 0.45108669996261597, "lr": 1.998217624370343e-05, "epoch": 0.13650245651307927, "percentage": 6.82, "elapsed_time": "1:50:54", "remaining_time": "1 day, 1:14:13"} +{"current_steps": 515, "total_steps": 7532, "loss": 0.40311864018440247, "lr": 1.9981913245317802e-05, "epoch": 0.13676802549462222, "percentage": 6.84, "elapsed_time": "1:51:06", "remaining_time": "1 day, 1:13:51"} +{"current_steps": 516, "total_steps": 7532, "loss": 0.4388020932674408, "lr": 1.9981648322537017e-05, "epoch": 0.13703359447616517, "percentage": 6.85, "elapsed_time": "1:51:18", "remaining_time": "1 day, 1:13:29"} +{"current_steps": 517, "total_steps": 7532, "loss": 0.42741361260414124, "lr": 1.9981381475412162e-05, "epoch": 0.13729916345770815, "percentage": 6.86, "elapsed_time": "1:51:31", "remaining_time": "1 day, 1:13:10"} +{"current_steps": 518, "total_steps": 7532, "loss": 0.3766555190086365, "lr": 1.9981112703994677e-05, "epoch": 0.1375647324392511, "percentage": 6.88, "elapsed_time": "1:51:43", "remaining_time": "1 day, 1:12:51"} +{"current_steps": 519, "total_steps": 7532, "loss": 0.38618308305740356, "lr": 1.998084200833638e-05, "epoch": 0.13783030142079405, "percentage": 6.89, "elapsed_time": "1:51:55", "remaining_time": "1 day, 1:12:28"} +{"current_steps": 520, "total_steps": 7532, "loss": 0.4553264379501343, "lr": 1.9980569388489457e-05, "epoch": 0.138095870402337, "percentage": 6.9, "elapsed_time": "1:52:08", "remaining_time": "1 day, 1:12:07"} +{"current_steps": 521, "total_steps": 7532, "loss": 0.44632673263549805, "lr": 1.9980294844506468e-05, "epoch": 0.13836143938387996, "percentage": 6.92, "elapsed_time": "1:52:20", "remaining_time": "1 day, 1:11:41"} +{"current_steps": 522, "total_steps": 7532, "loss": 0.4285067617893219, "lr": 1.998001837644033e-05, "epoch": 0.1386270083654229, "percentage": 6.93, "elapsed_time": "1:52:32", "remaining_time": "1 day, 1:11:19"} +{"current_steps": 523, "total_steps": 7532, "loss": 0.39360538125038147, "lr": 1.9979739984344365e-05, "epoch": 0.13889257734696586, "percentage": 6.94, "elapsed_time": "1:52:44", "remaining_time": "1 day, 1:10:54"} +{"current_steps": 524, "total_steps": 7532, "loss": 0.4007593095302582, "lr": 1.9979459668272226e-05, "epoch": 0.13915814632850884, "percentage": 6.96, "elapsed_time": "1:52:56", "remaining_time": "1 day, 1:10:34"} +{"current_steps": 525, "total_steps": 7532, "loss": 0.40176767110824585, "lr": 1.9979177428277955e-05, "epoch": 0.1394237153100518, "percentage": 6.97, "elapsed_time": "1:53:09", "remaining_time": "1 day, 1:10:13"} +{"current_steps": 526, "total_steps": 7532, "loss": 0.4190528392791748, "lr": 1.9978893264415978e-05, "epoch": 0.13968928429159475, "percentage": 6.98, "elapsed_time": "1:53:22", "remaining_time": "1 day, 1:09:59"} +{"current_steps": 527, "total_steps": 7532, "loss": 0.4139288067817688, "lr": 1.9978607176741063e-05, "epoch": 0.1399548532731377, "percentage": 7.0, "elapsed_time": "1:53:34", "remaining_time": "1 day, 1:09:35"} +{"current_steps": 528, "total_steps": 7532, "loss": 0.3666151463985443, "lr": 1.9978319165308373e-05, "epoch": 0.14022042225468065, "percentage": 7.01, "elapsed_time": "1:53:47", "remaining_time": "1 day, 1:09:24"} +{"current_steps": 529, "total_steps": 7532, "loss": 0.44621142745018005, "lr": 1.997802923017343e-05, "epoch": 0.1404859912362236, "percentage": 7.02, "elapsed_time": "1:53:59", "remaining_time": "1 day, 1:09:01"} +{"current_steps": 530, "total_steps": 7532, "loss": 0.4162977635860443, "lr": 1.9977737371392134e-05, "epoch": 0.14075156021776655, "percentage": 7.04, "elapsed_time": "1:54:11", "remaining_time": "1 day, 1:08:40"} +{"current_steps": 531, "total_steps": 7532, "loss": 0.438882052898407, "lr": 1.997744358902075e-05, "epoch": 0.14101712919930953, "percentage": 7.05, "elapsed_time": "1:54:24", "remaining_time": "1 day, 1:08:23"} +{"current_steps": 532, "total_steps": 7532, "loss": 0.43381333351135254, "lr": 1.997714788311591e-05, "epoch": 0.14128269818085248, "percentage": 7.06, "elapsed_time": "1:54:36", "remaining_time": "1 day, 1:08:03"} +{"current_steps": 533, "total_steps": 7532, "loss": 0.41925039887428284, "lr": 1.9976850253734633e-05, "epoch": 0.14154826716239544, "percentage": 7.08, "elapsed_time": "1:54:49", "remaining_time": "1 day, 1:07:46"} +{"current_steps": 534, "total_steps": 7532, "loss": 0.40469998121261597, "lr": 1.997655070093429e-05, "epoch": 0.1418138361439384, "percentage": 7.09, "elapsed_time": "1:55:01", "remaining_time": "1 day, 1:07:25"} +{"current_steps": 535, "total_steps": 7532, "loss": 0.4252749979496002, "lr": 1.9976249224772638e-05, "epoch": 0.14207940512548134, "percentage": 7.1, "elapsed_time": "1:55:14", "remaining_time": "1 day, 1:07:14"} +{"current_steps": 536, "total_steps": 7532, "loss": 0.42437341809272766, "lr": 1.9975945825307788e-05, "epoch": 0.1423449741070243, "percentage": 7.12, "elapsed_time": "1:55:26", "remaining_time": "1 day, 1:06:52"} +{"current_steps": 537, "total_steps": 7532, "loss": 0.3435184955596924, "lr": 1.9975640502598243e-05, "epoch": 0.14261054308856724, "percentage": 7.13, "elapsed_time": "1:55:39", "remaining_time": "1 day, 1:06:30"} +{"current_steps": 538, "total_steps": 7532, "loss": 0.4677535593509674, "lr": 1.9975333256702864e-05, "epoch": 0.14287611207011022, "percentage": 7.14, "elapsed_time": "1:55:51", "remaining_time": "1 day, 1:06:05"} +{"current_steps": 539, "total_steps": 7532, "loss": 0.3860551118850708, "lr": 1.9975024087680873e-05, "epoch": 0.14314168105165317, "percentage": 7.16, "elapsed_time": "1:56:03", "remaining_time": "1 day, 1:05:44"} +{"current_steps": 540, "total_steps": 7532, "loss": 0.4067271649837494, "lr": 1.9974712995591887e-05, "epoch": 0.14340725003319613, "percentage": 7.17, "elapsed_time": "1:56:15", "remaining_time": "1 day, 1:05:21"} +{"current_steps": 541, "total_steps": 7532, "loss": 0.42236536741256714, "lr": 1.9974399980495877e-05, "epoch": 0.14367281901473908, "percentage": 7.18, "elapsed_time": "1:56:27", "remaining_time": "1 day, 1:05:01"} +{"current_steps": 542, "total_steps": 7532, "loss": 0.45230624079704285, "lr": 1.9974085042453188e-05, "epoch": 0.14393838799628203, "percentage": 7.2, "elapsed_time": "1:56:39", "remaining_time": "1 day, 1:04:36"} +{"current_steps": 543, "total_steps": 7532, "loss": 0.428194522857666, "lr": 1.997376818152453e-05, "epoch": 0.14420395697782498, "percentage": 7.21, "elapsed_time": "1:56:52", "remaining_time": "1 day, 1:04:14"} +{"current_steps": 544, "total_steps": 7532, "loss": 0.40774789452552795, "lr": 1.9973449397771004e-05, "epoch": 0.14446952595936793, "percentage": 7.22, "elapsed_time": "1:57:04", "remaining_time": "1 day, 1:03:51"} +{"current_steps": 545, "total_steps": 7532, "loss": 0.4086815118789673, "lr": 1.9973128691254054e-05, "epoch": 0.1447350949409109, "percentage": 7.24, "elapsed_time": "1:57:16", "remaining_time": "1 day, 1:03:27"} +{"current_steps": 546, "total_steps": 7532, "loss": 0.4045162796974182, "lr": 1.997280606203552e-05, "epoch": 0.14500066392245387, "percentage": 7.25, "elapsed_time": "1:57:28", "remaining_time": "1 day, 1:03:09"} +{"current_steps": 547, "total_steps": 7532, "loss": 0.40463268756866455, "lr": 1.9972481510177594e-05, "epoch": 0.14526623290399682, "percentage": 7.26, "elapsed_time": "1:57:41", "remaining_time": "1 day, 1:02:49"} +{"current_steps": 548, "total_steps": 7532, "loss": 0.46733587980270386, "lr": 1.9972155035742847e-05, "epoch": 0.14553180188553977, "percentage": 7.28, "elapsed_time": "1:57:53", "remaining_time": "1 day, 1:02:34"} +{"current_steps": 549, "total_steps": 7532, "loss": 0.45210930705070496, "lr": 1.997182663879422e-05, "epoch": 0.14579737086708272, "percentage": 7.29, "elapsed_time": "1:58:06", "remaining_time": "1 day, 1:02:15"} +{"current_steps": 550, "total_steps": 7532, "loss": 0.39798587560653687, "lr": 1.9971496319395022e-05, "epoch": 0.14606293984862567, "percentage": 7.3, "elapsed_time": "1:58:18", "remaining_time": "1 day, 1:01:58"} +{"current_steps": 551, "total_steps": 7532, "loss": 0.4166080057621002, "lr": 1.9971164077608937e-05, "epoch": 0.14632850883016862, "percentage": 7.32, "elapsed_time": "1:58:31", "remaining_time": "1 day, 1:01:37"} +{"current_steps": 552, "total_steps": 7532, "loss": 0.3995435833930969, "lr": 1.9970829913500017e-05, "epoch": 0.1465940778117116, "percentage": 7.33, "elapsed_time": "1:58:43", "remaining_time": "1 day, 1:01:20"} +{"current_steps": 553, "total_steps": 7532, "loss": 0.39335039258003235, "lr": 1.9970493827132686e-05, "epoch": 0.14685964679325456, "percentage": 7.34, "elapsed_time": "1:58:56", "remaining_time": "1 day, 1:01:03"} +{"current_steps": 554, "total_steps": 7532, "loss": 0.3923008441925049, "lr": 1.9970155818571733e-05, "epoch": 0.1471252157747975, "percentage": 7.36, "elapsed_time": "1:59:09", "remaining_time": "1 day, 1:00:48"} +{"current_steps": 555, "total_steps": 7532, "loss": 0.42148759961128235, "lr": 1.996981588788233e-05, "epoch": 0.14739078475634046, "percentage": 7.37, "elapsed_time": "1:59:21", "remaining_time": "1 day, 1:00:34"} +{"current_steps": 556, "total_steps": 7532, "loss": 0.36099517345428467, "lr": 1.9969474035130005e-05, "epoch": 0.1476563537378834, "percentage": 7.38, "elapsed_time": "1:59:34", "remaining_time": "1 day, 1:00:18"} +{"current_steps": 557, "total_steps": 7532, "loss": 0.39650559425354004, "lr": 1.9969130260380663e-05, "epoch": 0.14792192271942636, "percentage": 7.4, "elapsed_time": "1:59:46", "remaining_time": "1 day, 0:59:54"} +{"current_steps": 558, "total_steps": 7532, "loss": 0.36410078406333923, "lr": 1.9968784563700586e-05, "epoch": 0.14818749170096931, "percentage": 7.41, "elapsed_time": "1:59:59", "remaining_time": "1 day, 0:59:36"} +{"current_steps": 559, "total_steps": 7532, "loss": 0.41312888264656067, "lr": 1.996843694515641e-05, "epoch": 0.1484530606825123, "percentage": 7.42, "elapsed_time": "2:00:11", "remaining_time": "1 day, 0:59:12"} +{"current_steps": 560, "total_steps": 7532, "loss": 0.3895263373851776, "lr": 1.9968087404815162e-05, "epoch": 0.14871862966405525, "percentage": 7.43, "elapsed_time": "2:00:23", "remaining_time": "1 day, 0:58:54"} +{"current_steps": 561, "total_steps": 7532, "loss": 0.4400597810745239, "lr": 1.9967735942744226e-05, "epoch": 0.1489841986455982, "percentage": 7.45, "elapsed_time": "2:00:35", "remaining_time": "1 day, 0:58:31"} +{"current_steps": 562, "total_steps": 7532, "loss": 0.36712852120399475, "lr": 1.9967382559011356e-05, "epoch": 0.14924976762714115, "percentage": 7.46, "elapsed_time": "2:00:47", "remaining_time": "1 day, 0:58:07"} +{"current_steps": 563, "total_steps": 7532, "loss": 0.4043564200401306, "lr": 1.9967027253684685e-05, "epoch": 0.1495153366086841, "percentage": 7.47, "elapsed_time": "2:01:00", "remaining_time": "1 day, 0:57:49"} +{"current_steps": 564, "total_steps": 7532, "loss": 0.45233044028282166, "lr": 1.9966670026832707e-05, "epoch": 0.14978090559022705, "percentage": 7.49, "elapsed_time": "2:01:12", "remaining_time": "1 day, 0:57:25"} +{"current_steps": 565, "total_steps": 7532, "loss": 0.441600501537323, "lr": 1.9966310878524297e-05, "epoch": 0.15004647457177, "percentage": 7.5, "elapsed_time": "2:01:24", "remaining_time": "1 day, 0:57:08"} +{"current_steps": 566, "total_steps": 7532, "loss": 0.4268038868904114, "lr": 1.9965949808828687e-05, "epoch": 0.15031204355331299, "percentage": 7.51, "elapsed_time": "2:01:36", "remaining_time": "1 day, 0:56:42"} +{"current_steps": 567, "total_steps": 7532, "loss": 0.41927874088287354, "lr": 1.9965586817815494e-05, "epoch": 0.15057761253485594, "percentage": 7.53, "elapsed_time": "2:01:48", "remaining_time": "1 day, 0:56:22"} +{"current_steps": 568, "total_steps": 7532, "loss": 0.41488781571388245, "lr": 1.9965221905554695e-05, "epoch": 0.1508431815163989, "percentage": 7.54, "elapsed_time": "2:02:00", "remaining_time": "1 day, 0:55:59"} +{"current_steps": 569, "total_steps": 7532, "loss": 0.3624749779701233, "lr": 1.9964855072116642e-05, "epoch": 0.15110875049794184, "percentage": 7.55, "elapsed_time": "2:02:13", "remaining_time": "1 day, 0:55:47"} +{"current_steps": 570, "total_steps": 7532, "loss": 0.45119866728782654, "lr": 1.996448631757206e-05, "epoch": 0.1513743194794848, "percentage": 7.57, "elapsed_time": "2:02:26", "remaining_time": "1 day, 0:55:31"} +{"current_steps": 571, "total_steps": 7532, "loss": 0.41389739513397217, "lr": 1.996411564199203e-05, "epoch": 0.15163988846102774, "percentage": 7.58, "elapsed_time": "2:02:39", "remaining_time": "1 day, 0:55:12"} +{"current_steps": 572, "total_steps": 7532, "loss": 0.3640916347503662, "lr": 1.996374304544802e-05, "epoch": 0.1519054574425707, "percentage": 7.59, "elapsed_time": "2:02:51", "remaining_time": "1 day, 0:54:50"} +{"current_steps": 573, "total_steps": 7532, "loss": 0.45648565888404846, "lr": 1.9963368528011867e-05, "epoch": 0.15217102642411368, "percentage": 7.61, "elapsed_time": "2:03:03", "remaining_time": "1 day, 0:54:28"} +{"current_steps": 574, "total_steps": 7532, "loss": 0.4335980713367462, "lr": 1.9962992089755765e-05, "epoch": 0.15243659540565663, "percentage": 7.62, "elapsed_time": "2:03:15", "remaining_time": "1 day, 0:54:09"} +{"current_steps": 575, "total_steps": 7532, "loss": 0.3908158540725708, "lr": 1.996261373075229e-05, "epoch": 0.15270216438719958, "percentage": 7.63, "elapsed_time": "2:03:27", "remaining_time": "1 day, 0:53:44"} +{"current_steps": 576, "total_steps": 7532, "loss": 0.36533305048942566, "lr": 1.996223345107439e-05, "epoch": 0.15296773336874253, "percentage": 7.65, "elapsed_time": "2:03:39", "remaining_time": "1 day, 0:53:25"} +{"current_steps": 577, "total_steps": 7532, "loss": 0.407212495803833, "lr": 1.9961851250795372e-05, "epoch": 0.15323330235028548, "percentage": 7.66, "elapsed_time": "2:03:52", "remaining_time": "1 day, 0:53:03"} +{"current_steps": 578, "total_steps": 7532, "loss": 0.4266315698623657, "lr": 1.996146712998892e-05, "epoch": 0.15349887133182843, "percentage": 7.67, "elapsed_time": "2:04:04", "remaining_time": "1 day, 0:52:46"} +{"current_steps": 579, "total_steps": 7532, "loss": 0.3806581199169159, "lr": 1.9961081088729092e-05, "epoch": 0.1537644403133714, "percentage": 7.69, "elapsed_time": "2:04:16", "remaining_time": "1 day, 0:52:21"} +{"current_steps": 580, "total_steps": 7532, "loss": 0.40962716937065125, "lr": 1.9960693127090312e-05, "epoch": 0.15403000929491437, "percentage": 7.7, "elapsed_time": "2:04:28", "remaining_time": "1 day, 0:52:04"} +{"current_steps": 581, "total_steps": 7532, "loss": 0.4195394515991211, "lr": 1.996030324514737e-05, "epoch": 0.15429557827645732, "percentage": 7.71, "elapsed_time": "2:04:41", "remaining_time": "1 day, 0:51:45"} +{"current_steps": 582, "total_steps": 7532, "loss": 0.4366803765296936, "lr": 1.995991144297543e-05, "epoch": 0.15456114725800027, "percentage": 7.73, "elapsed_time": "2:04:53", "remaining_time": "1 day, 0:51:25"} +{"current_steps": 583, "total_steps": 7532, "loss": 0.44951680302619934, "lr": 1.995951772065004e-05, "epoch": 0.15482671623954322, "percentage": 7.74, "elapsed_time": "2:05:05", "remaining_time": "1 day, 0:51:02"} +{"current_steps": 584, "total_steps": 7532, "loss": 0.42920851707458496, "lr": 1.9959122078247088e-05, "epoch": 0.15509228522108617, "percentage": 7.75, "elapsed_time": "2:05:17", "remaining_time": "1 day, 0:50:38"} +{"current_steps": 585, "total_steps": 7532, "loss": 0.3805098533630371, "lr": 1.9958724515842856e-05, "epoch": 0.15535785420262913, "percentage": 7.77, "elapsed_time": "2:05:30", "remaining_time": "1 day, 0:50:20"} +{"current_steps": 586, "total_steps": 7532, "loss": 0.439333438873291, "lr": 1.995832503351399e-05, "epoch": 0.15562342318417208, "percentage": 7.78, "elapsed_time": "2:05:42", "remaining_time": "1 day, 0:50:00"} +{"current_steps": 587, "total_steps": 7532, "loss": 0.38338547945022583, "lr": 1.9957923631337505e-05, "epoch": 0.15588899216571506, "percentage": 7.79, "elapsed_time": "2:05:54", "remaining_time": "1 day, 0:49:43"} +{"current_steps": 588, "total_steps": 7532, "loss": 0.40603697299957275, "lr": 1.9957520309390786e-05, "epoch": 0.156154561147258, "percentage": 7.81, "elapsed_time": "2:06:06", "remaining_time": "1 day, 0:49:21"} +{"current_steps": 589, "total_steps": 7532, "loss": 0.42816999554634094, "lr": 1.9957115067751594e-05, "epoch": 0.15642013012880096, "percentage": 7.82, "elapsed_time": "2:06:19", "remaining_time": "1 day, 0:49:07"} +{"current_steps": 590, "total_steps": 7532, "loss": 0.42367884516716003, "lr": 1.9956707906498046e-05, "epoch": 0.1566856991103439, "percentage": 7.83, "elapsed_time": "2:06:31", "remaining_time": "1 day, 0:48:46"} +{"current_steps": 591, "total_steps": 7532, "loss": 0.4349297881126404, "lr": 1.995629882570864e-05, "epoch": 0.15695126809188686, "percentage": 7.85, "elapsed_time": "2:06:44", "remaining_time": "1 day, 0:48:27"} +{"current_steps": 592, "total_steps": 7532, "loss": 0.37990960478782654, "lr": 1.995588782546225e-05, "epoch": 0.15721683707342982, "percentage": 7.86, "elapsed_time": "2:06:56", "remaining_time": "1 day, 0:48:09"} +{"current_steps": 593, "total_steps": 7532, "loss": 0.4085468649864197, "lr": 1.9955474905838102e-05, "epoch": 0.15748240605497277, "percentage": 7.87, "elapsed_time": "2:07:09", "remaining_time": "1 day, 0:47:56"} +{"current_steps": 594, "total_steps": 7532, "loss": 0.41362464427948, "lr": 1.995506006691581e-05, "epoch": 0.15774797503651575, "percentage": 7.89, "elapsed_time": "2:07:22", "remaining_time": "1 day, 0:47:42"} +{"current_steps": 595, "total_steps": 7532, "loss": 0.3830018937587738, "lr": 1.9954643308775342e-05, "epoch": 0.1580135440180587, "percentage": 7.9, "elapsed_time": "2:07:35", "remaining_time": "1 day, 0:47:34"} +{"current_steps": 596, "total_steps": 7532, "loss": 0.48350822925567627, "lr": 1.995422463149705e-05, "epoch": 0.15827911299960165, "percentage": 7.91, "elapsed_time": "2:07:48", "remaining_time": "1 day, 0:47:17"} +{"current_steps": 597, "total_steps": 7532, "loss": 0.4215185344219208, "lr": 1.995380403516165e-05, "epoch": 0.1585446819811446, "percentage": 7.93, "elapsed_time": "2:08:01", "remaining_time": "1 day, 0:47:07"} +{"current_steps": 598, "total_steps": 7532, "loss": 0.42061948776245117, "lr": 1.9953381519850224e-05, "epoch": 0.15881025096268755, "percentage": 7.94, "elapsed_time": "2:08:13", "remaining_time": "1 day, 0:46:53"} +{"current_steps": 599, "total_steps": 7532, "loss": 0.38956254720687866, "lr": 1.995295708564423e-05, "epoch": 0.1590758199442305, "percentage": 7.95, "elapsed_time": "2:08:27", "remaining_time": "1 day, 0:46:43"} +{"current_steps": 600, "total_steps": 7532, "loss": 0.3864685893058777, "lr": 1.9952530732625492e-05, "epoch": 0.15934138892577346, "percentage": 7.97, "elapsed_time": "2:08:39", "remaining_time": "1 day, 0:46:24"} +{"current_steps": 601, "total_steps": 7532, "loss": 0.395724356174469, "lr": 1.9952102460876214e-05, "epoch": 0.15960695790731644, "percentage": 7.98, "elapsed_time": "2:08:57", "remaining_time": "1 day, 0:47:16"} +{"current_steps": 602, "total_steps": 7532, "loss": 0.4220300316810608, "lr": 1.995167227047895e-05, "epoch": 0.1598725268888594, "percentage": 7.99, "elapsed_time": "2:09:11", "remaining_time": "1 day, 0:47:13"} +{"current_steps": 603, "total_steps": 7532, "loss": 0.4129142165184021, "lr": 1.9951240161516643e-05, "epoch": 0.16013809587040234, "percentage": 8.01, "elapsed_time": "2:09:24", "remaining_time": "1 day, 0:46:59"} +{"current_steps": 604, "total_steps": 7532, "loss": 0.3951375484466553, "lr": 1.9950806134072595e-05, "epoch": 0.1604036648519453, "percentage": 8.02, "elapsed_time": "2:09:37", "remaining_time": "1 day, 0:46:51"} +{"current_steps": 605, "total_steps": 7532, "loss": 0.4117582142353058, "lr": 1.9950370188230486e-05, "epoch": 0.16066923383348825, "percentage": 8.03, "elapsed_time": "2:09:50", "remaining_time": "1 day, 0:46:39"} +{"current_steps": 606, "total_steps": 7532, "loss": 0.3920668363571167, "lr": 1.994993232407436e-05, "epoch": 0.1609348028150312, "percentage": 8.05, "elapsed_time": "2:10:04", "remaining_time": "1 day, 0:46:35"} +{"current_steps": 607, "total_steps": 7532, "loss": 0.3756999373435974, "lr": 1.9949492541688626e-05, "epoch": 0.16120037179657415, "percentage": 8.06, "elapsed_time": "2:10:17", "remaining_time": "1 day, 0:46:25"} +{"current_steps": 608, "total_steps": 7532, "loss": 0.41009610891342163, "lr": 1.9949050841158078e-05, "epoch": 0.16146594077811713, "percentage": 8.07, "elapsed_time": "2:10:30", "remaining_time": "1 day, 0:46:20"} +{"current_steps": 609, "total_steps": 7532, "loss": 0.3986571729183197, "lr": 1.994860722256786e-05, "epoch": 0.16173150975966008, "percentage": 8.09, "elapsed_time": "2:10:43", "remaining_time": "1 day, 0:46:06"} +{"current_steps": 610, "total_steps": 7532, "loss": 0.3903341591358185, "lr": 1.994816168600351e-05, "epoch": 0.16199707874120303, "percentage": 8.1, "elapsed_time": "2:10:57", "remaining_time": "1 day, 0:45:57"} +{"current_steps": 611, "total_steps": 7532, "loss": 0.39725261926651, "lr": 1.994771423155091e-05, "epoch": 0.16226264772274598, "percentage": 8.11, "elapsed_time": "2:11:09", "remaining_time": "1 day, 0:45:42"} +{"current_steps": 612, "total_steps": 7532, "loss": 0.39461129903793335, "lr": 1.994726485929633e-05, "epoch": 0.16252821670428894, "percentage": 8.13, "elapsed_time": "2:11:22", "remaining_time": "1 day, 0:45:29"} +{"current_steps": 613, "total_steps": 7532, "loss": 0.41346144676208496, "lr": 1.99468135693264e-05, "epoch": 0.1627937856858319, "percentage": 8.14, "elapsed_time": "2:11:35", "remaining_time": "1 day, 0:45:15"} +{"current_steps": 614, "total_steps": 7532, "loss": 0.41148197650909424, "lr": 1.9946360361728127e-05, "epoch": 0.16305935466737484, "percentage": 8.15, "elapsed_time": "2:11:48", "remaining_time": "1 day, 0:45:04"} +{"current_steps": 615, "total_steps": 7532, "loss": 0.38204139471054077, "lr": 1.9945905236588884e-05, "epoch": 0.16332492364891782, "percentage": 8.17, "elapsed_time": "2:12:02", "remaining_time": "1 day, 0:45:01"} +{"current_steps": 616, "total_steps": 7532, "loss": 0.41496896743774414, "lr": 1.9945448193996412e-05, "epoch": 0.16359049263046077, "percentage": 8.18, "elapsed_time": "2:12:14", "remaining_time": "1 day, 0:44:48"} +{"current_steps": 617, "total_steps": 7532, "loss": 0.38998982310295105, "lr": 1.994498923403882e-05, "epoch": 0.16385606161200372, "percentage": 8.19, "elapsed_time": "2:12:27", "remaining_time": "1 day, 0:44:36"} +{"current_steps": 618, "total_steps": 7532, "loss": 0.39018991589546204, "lr": 1.99445283568046e-05, "epoch": 0.16412163059354667, "percentage": 8.2, "elapsed_time": "2:12:40", "remaining_time": "1 day, 0:44:15"} +{"current_steps": 619, "total_steps": 7532, "loss": 0.41579991579055786, "lr": 1.9944065562382594e-05, "epoch": 0.16438719957508963, "percentage": 8.22, "elapsed_time": "2:12:52", "remaining_time": "1 day, 0:44:01"} +{"current_steps": 620, "total_steps": 7532, "loss": 0.426283061504364, "lr": 1.9943600850862027e-05, "epoch": 0.16465276855663258, "percentage": 8.23, "elapsed_time": "2:13:05", "remaining_time": "1 day, 0:43:43"} +{"current_steps": 621, "total_steps": 7532, "loss": 0.418672651052475, "lr": 1.9943134222332493e-05, "epoch": 0.16491833753817553, "percentage": 8.24, "elapsed_time": "2:13:18", "remaining_time": "1 day, 0:43:34"} +{"current_steps": 622, "total_steps": 7532, "loss": 0.4014776349067688, "lr": 1.9942665676883946e-05, "epoch": 0.1651839065197185, "percentage": 8.26, "elapsed_time": "2:13:30", "remaining_time": "1 day, 0:43:15"} +{"current_steps": 623, "total_steps": 7532, "loss": 0.3714776933193207, "lr": 1.994219521460672e-05, "epoch": 0.16544947550126146, "percentage": 8.27, "elapsed_time": "2:13:43", "remaining_time": "1 day, 0:43:01"} +{"current_steps": 624, "total_steps": 7532, "loss": 0.39415785670280457, "lr": 1.9941722835591514e-05, "epoch": 0.1657150444828044, "percentage": 8.28, "elapsed_time": "2:13:56", "remaining_time": "1 day, 0:42:42"} +{"current_steps": 625, "total_steps": 7532, "loss": 0.3706223964691162, "lr": 1.9941248539929395e-05, "epoch": 0.16598061346434737, "percentage": 8.3, "elapsed_time": "2:14:08", "remaining_time": "1 day, 0:42:23"} +{"current_steps": 626, "total_steps": 7532, "loss": 0.4167429208755493, "lr": 1.9940772327711807e-05, "epoch": 0.16624618244589032, "percentage": 8.31, "elapsed_time": "2:14:21", "remaining_time": "1 day, 0:42:09"} +{"current_steps": 627, "total_steps": 7532, "loss": 0.38234227895736694, "lr": 1.9940294199030553e-05, "epoch": 0.16651175142743327, "percentage": 8.32, "elapsed_time": "2:14:33", "remaining_time": "1 day, 0:41:53"} +{"current_steps": 628, "total_steps": 7532, "loss": 0.4139519929885864, "lr": 1.9939814153977813e-05, "epoch": 0.16677732040897622, "percentage": 8.34, "elapsed_time": "2:14:47", "remaining_time": "1 day, 0:41:48"} +{"current_steps": 629, "total_steps": 7532, "loss": 0.44490402936935425, "lr": 1.9939332192646136e-05, "epoch": 0.1670428893905192, "percentage": 8.35, "elapsed_time": "2:15:00", "remaining_time": "1 day, 0:41:37"} +{"current_steps": 630, "total_steps": 7532, "loss": 0.3870658278465271, "lr": 1.993884831512843e-05, "epoch": 0.16730845837206215, "percentage": 8.36, "elapsed_time": "2:15:14", "remaining_time": "1 day, 0:41:34"} +{"current_steps": 631, "total_steps": 7532, "loss": 0.3308948278427124, "lr": 1.993836252151799e-05, "epoch": 0.1675740273536051, "percentage": 8.38, "elapsed_time": "2:15:27", "remaining_time": "1 day, 0:41:22"} +{"current_steps": 632, "total_steps": 7532, "loss": 0.3727487623691559, "lr": 1.993787481190847e-05, "epoch": 0.16783959633514806, "percentage": 8.39, "elapsed_time": "2:15:40", "remaining_time": "1 day, 0:41:19"} +{"current_steps": 633, "total_steps": 7532, "loss": 0.4277465343475342, "lr": 1.9937385186393888e-05, "epoch": 0.168105165316691, "percentage": 8.4, "elapsed_time": "2:15:53", "remaining_time": "1 day, 0:41:04"} +{"current_steps": 634, "total_steps": 7532, "loss": 0.4276485741138458, "lr": 1.9936893645068647e-05, "epoch": 0.16837073429823396, "percentage": 8.42, "elapsed_time": "2:16:06", "remaining_time": "1 day, 0:40:57"} +{"current_steps": 635, "total_steps": 7532, "loss": 0.374578058719635, "lr": 1.9936400188027502e-05, "epoch": 0.1686363032797769, "percentage": 8.43, "elapsed_time": "2:16:19", "remaining_time": "1 day, 0:40:45"} +{"current_steps": 636, "total_steps": 7532, "loss": 0.4583400785923004, "lr": 1.993590481536559e-05, "epoch": 0.1689018722613199, "percentage": 8.44, "elapsed_time": "2:16:33", "remaining_time": "1 day, 0:40:37"} +{"current_steps": 637, "total_steps": 7532, "loss": 0.3734489679336548, "lr": 1.9935407527178417e-05, "epoch": 0.16916744124286284, "percentage": 8.46, "elapsed_time": "2:16:46", "remaining_time": "1 day, 0:40:25"} +{"current_steps": 638, "total_steps": 7532, "loss": 0.39524513483047485, "lr": 1.9934908323561846e-05, "epoch": 0.1694330102244058, "percentage": 8.47, "elapsed_time": "2:16:59", "remaining_time": "1 day, 0:40:16"} +{"current_steps": 639, "total_steps": 7532, "loss": 0.42300352454185486, "lr": 1.9934407204612124e-05, "epoch": 0.16969857920594875, "percentage": 8.48, "elapsed_time": "2:17:11", "remaining_time": "1 day, 0:39:58"} +{"current_steps": 640, "total_steps": 7532, "loss": 0.4152276813983917, "lr": 1.9933904170425858e-05, "epoch": 0.1699641481874917, "percentage": 8.5, "elapsed_time": "2:17:25", "remaining_time": "1 day, 0:39:54"} +{"current_steps": 641, "total_steps": 7532, "loss": 0.43046653270721436, "lr": 1.9933399221100026e-05, "epoch": 0.17022971716903465, "percentage": 8.51, "elapsed_time": "2:17:38", "remaining_time": "1 day, 0:39:46"} +{"current_steps": 642, "total_steps": 7532, "loss": 0.4134339392185211, "lr": 1.993289235673198e-05, "epoch": 0.1704952861505776, "percentage": 8.52, "elapsed_time": "2:17:52", "remaining_time": "1 day, 0:39:36"} +{"current_steps": 643, "total_steps": 7532, "loss": 0.44028693437576294, "lr": 1.9932383577419432e-05, "epoch": 0.17076085513212058, "percentage": 8.54, "elapsed_time": "2:18:05", "remaining_time": "1 day, 0:39:31"} +{"current_steps": 644, "total_steps": 7532, "loss": 0.3790222704410553, "lr": 1.9931872883260473e-05, "epoch": 0.17102642411366353, "percentage": 8.55, "elapsed_time": "2:18:18", "remaining_time": "1 day, 0:39:19"} +{"current_steps": 645, "total_steps": 7532, "loss": 0.3683086633682251, "lr": 1.9931360274353556e-05, "epoch": 0.17129199309520649, "percentage": 8.56, "elapsed_time": "2:18:31", "remaining_time": "1 day, 0:39:11"} +{"current_steps": 646, "total_steps": 7532, "loss": 0.3630594313144684, "lr": 1.993084575079751e-05, "epoch": 0.17155756207674944, "percentage": 8.58, "elapsed_time": "2:18:44", "remaining_time": "1 day, 0:38:59"} +{"current_steps": 647, "total_steps": 7532, "loss": 0.4398641884326935, "lr": 1.993032931269153e-05, "epoch": 0.1718231310582924, "percentage": 8.59, "elapsed_time": "2:18:58", "remaining_time": "1 day, 0:38:54"} +{"current_steps": 648, "total_steps": 7532, "loss": 0.42222845554351807, "lr": 1.992981096013517e-05, "epoch": 0.17208870003983534, "percentage": 8.6, "elapsed_time": "2:19:11", "remaining_time": "1 day, 0:38:46"} +{"current_steps": 649, "total_steps": 7532, "loss": 0.38966643810272217, "lr": 1.992929069322837e-05, "epoch": 0.1723542690213783, "percentage": 8.62, "elapsed_time": "2:19:25", "remaining_time": "1 day, 0:38:40"} +{"current_steps": 650, "total_steps": 7532, "loss": 0.4394804835319519, "lr": 1.992876851207143e-05, "epoch": 0.17261983800292127, "percentage": 8.63, "elapsed_time": "2:19:38", "remaining_time": "1 day, 0:38:31"} +{"current_steps": 651, "total_steps": 7532, "loss": 0.3475287854671478, "lr": 1.9928244416765022e-05, "epoch": 0.17288540698446422, "percentage": 8.64, "elapsed_time": "2:19:52", "remaining_time": "1 day, 0:38:24"} +{"current_steps": 652, "total_steps": 7532, "loss": 0.40047168731689453, "lr": 1.992771840741018e-05, "epoch": 0.17315097596600718, "percentage": 8.66, "elapsed_time": "2:20:05", "remaining_time": "1 day, 0:38:16"} +{"current_steps": 653, "total_steps": 7532, "loss": 0.4028981328010559, "lr": 1.9927190484108315e-05, "epoch": 0.17341654494755013, "percentage": 8.67, "elapsed_time": "2:20:19", "remaining_time": "1 day, 0:38:14"} +{"current_steps": 654, "total_steps": 7532, "loss": 0.3891482949256897, "lr": 1.9926660646961208e-05, "epoch": 0.17368211392909308, "percentage": 8.68, "elapsed_time": "2:20:32", "remaining_time": "1 day, 0:38:04"} +{"current_steps": 655, "total_steps": 7532, "loss": 0.4570680856704712, "lr": 1.9926128896071e-05, "epoch": 0.17394768291063603, "percentage": 8.7, "elapsed_time": "2:20:45", "remaining_time": "1 day, 0:37:53"} +{"current_steps": 656, "total_steps": 7532, "loss": 0.392758309841156, "lr": 1.992559523154021e-05, "epoch": 0.17421325189217898, "percentage": 8.71, "elapsed_time": "2:20:59", "remaining_time": "1 day, 0:37:48"} +{"current_steps": 657, "total_steps": 7532, "loss": 0.39552047848701477, "lr": 1.992505965347172e-05, "epoch": 0.17447882087372196, "percentage": 8.72, "elapsed_time": "2:21:12", "remaining_time": "1 day, 0:37:38"} +{"current_steps": 658, "total_steps": 7532, "loss": 0.4412619173526764, "lr": 1.992452216196879e-05, "epoch": 0.17474438985526491, "percentage": 8.74, "elapsed_time": "2:21:26", "remaining_time": "1 day, 0:37:34"} +{"current_steps": 659, "total_steps": 7532, "loss": 0.4075942635536194, "lr": 1.9923982757135028e-05, "epoch": 0.17500995883680787, "percentage": 8.75, "elapsed_time": "2:21:39", "remaining_time": "1 day, 0:37:23"} +{"current_steps": 660, "total_steps": 7532, "loss": 0.44615018367767334, "lr": 1.9923441439074434e-05, "epoch": 0.17527552781835082, "percentage": 8.76, "elapsed_time": "2:21:53", "remaining_time": "1 day, 0:37:20"} +{"current_steps": 661, "total_steps": 7532, "loss": 0.3957441449165344, "lr": 1.992289820789137e-05, "epoch": 0.17554109679989377, "percentage": 8.78, "elapsed_time": "2:22:06", "remaining_time": "1 day, 0:37:09"} +{"current_steps": 662, "total_steps": 7532, "loss": 0.4014820158481598, "lr": 1.992235306369056e-05, "epoch": 0.17580666578143672, "percentage": 8.79, "elapsed_time": "2:22:20", "remaining_time": "1 day, 0:37:05"} +{"current_steps": 663, "total_steps": 7532, "loss": 0.39478158950805664, "lr": 1.9921806006577102e-05, "epoch": 0.17607223476297967, "percentage": 8.8, "elapsed_time": "2:22:32", "remaining_time": "1 day, 0:36:48"} +{"current_steps": 664, "total_steps": 7532, "loss": 0.45742082595825195, "lr": 1.9921257036656463e-05, "epoch": 0.17633780374452265, "percentage": 8.82, "elapsed_time": "2:22:45", "remaining_time": "1 day, 0:36:35"} +{"current_steps": 665, "total_steps": 7532, "loss": 0.36519041657447815, "lr": 1.9920706154034477e-05, "epoch": 0.1766033727260656, "percentage": 8.83, "elapsed_time": "2:22:58", "remaining_time": "1 day, 0:36:19"} +{"current_steps": 666, "total_steps": 7532, "loss": 0.40599358081817627, "lr": 1.992015335881735e-05, "epoch": 0.17686894170760856, "percentage": 8.84, "elapsed_time": "2:23:11", "remaining_time": "1 day, 0:36:10"} +{"current_steps": 667, "total_steps": 7532, "loss": 0.4064781367778778, "lr": 1.991959865111165e-05, "epoch": 0.1771345106891515, "percentage": 8.86, "elapsed_time": "2:23:24", "remaining_time": "1 day, 0:35:56"} +{"current_steps": 668, "total_steps": 7532, "loss": 0.4076484143733978, "lr": 1.991904203102432e-05, "epoch": 0.17740007967069446, "percentage": 8.87, "elapsed_time": "2:23:37", "remaining_time": "1 day, 0:35:49"} +{"current_steps": 669, "total_steps": 7532, "loss": 0.42157143354415894, "lr": 1.9918483498662678e-05, "epoch": 0.1776656486522374, "percentage": 8.88, "elapsed_time": "2:23:50", "remaining_time": "1 day, 0:35:35"} +{"current_steps": 670, "total_steps": 7532, "loss": 0.3814900517463684, "lr": 1.9917923054134388e-05, "epoch": 0.17793121763378036, "percentage": 8.9, "elapsed_time": "2:24:04", "remaining_time": "1 day, 0:35:32"} +{"current_steps": 671, "total_steps": 7532, "loss": 0.4211175739765167, "lr": 1.9917360697547506e-05, "epoch": 0.17819678661532334, "percentage": 8.91, "elapsed_time": "2:24:17", "remaining_time": "1 day, 0:35:20"} +{"current_steps": 672, "total_steps": 7532, "loss": 0.3975893259048462, "lr": 1.991679642901045e-05, "epoch": 0.1784623555968663, "percentage": 8.92, "elapsed_time": "2:24:30", "remaining_time": "1 day, 0:35:12"} +{"current_steps": 673, "total_steps": 7532, "loss": 0.36090826988220215, "lr": 1.9916230248631993e-05, "epoch": 0.17872792457840925, "percentage": 8.94, "elapsed_time": "2:24:43", "remaining_time": "1 day, 0:35:02"} +{"current_steps": 674, "total_steps": 7532, "loss": 0.36511334776878357, "lr": 1.99156621565213e-05, "epoch": 0.1789934935599522, "percentage": 8.95, "elapsed_time": "2:24:57", "remaining_time": "1 day, 0:34:52"} +{"current_steps": 675, "total_steps": 7532, "loss": 0.4131924510002136, "lr": 1.9915092152787888e-05, "epoch": 0.17925906254149515, "percentage": 8.96, "elapsed_time": "2:25:10", "remaining_time": "1 day, 0:34:46"} +{"current_steps": 676, "total_steps": 7532, "loss": 0.4283728301525116, "lr": 1.9914520237541644e-05, "epoch": 0.1795246315230381, "percentage": 8.98, "elapsed_time": "2:25:23", "remaining_time": "1 day, 0:34:35"} +{"current_steps": 677, "total_steps": 7532, "loss": 0.3855544924736023, "lr": 1.991394641089283e-05, "epoch": 0.17979020050458105, "percentage": 8.99, "elapsed_time": "2:25:37", "remaining_time": "1 day, 0:34:34"} +{"current_steps": 678, "total_steps": 7532, "loss": 0.41288501024246216, "lr": 1.9913370672952074e-05, "epoch": 0.18005576948612403, "percentage": 9.0, "elapsed_time": "2:25:50", "remaining_time": "1 day, 0:34:23"} +{"current_steps": 679, "total_steps": 7532, "loss": 0.3824073076248169, "lr": 1.9912793023830365e-05, "epoch": 0.180321338467667, "percentage": 9.01, "elapsed_time": "2:26:04", "remaining_time": "1 day, 0:34:19"} +{"current_steps": 680, "total_steps": 7532, "loss": 0.39005106687545776, "lr": 1.9912213463639077e-05, "epoch": 0.18058690744920994, "percentage": 9.03, "elapsed_time": "2:26:17", "remaining_time": "1 day, 0:34:09"} +{"current_steps": 681, "total_steps": 7532, "loss": 0.3521374464035034, "lr": 1.9911631992489933e-05, "epoch": 0.1808524764307529, "percentage": 9.04, "elapsed_time": "2:26:31", "remaining_time": "1 day, 0:34:04"} +{"current_steps": 682, "total_steps": 7532, "loss": 0.337347149848938, "lr": 1.9911048610495037e-05, "epoch": 0.18111804541229584, "percentage": 9.05, "elapsed_time": "2:26:43", "remaining_time": "1 day, 0:33:47"} +{"current_steps": 683, "total_steps": 7532, "loss": 0.4349983334541321, "lr": 1.9910463317766864e-05, "epoch": 0.1813836143938388, "percentage": 9.07, "elapsed_time": "2:26:56", "remaining_time": "1 day, 0:33:33"} +{"current_steps": 684, "total_steps": 7532, "loss": 0.3783540427684784, "lr": 1.9909876114418242e-05, "epoch": 0.18164918337538175, "percentage": 9.08, "elapsed_time": "2:27:09", "remaining_time": "1 day, 0:33:15"} +{"current_steps": 685, "total_steps": 7532, "loss": 0.4065130054950714, "lr": 1.9909287000562383e-05, "epoch": 0.18191475235692472, "percentage": 9.09, "elapsed_time": "2:27:21", "remaining_time": "1 day, 0:32:59"} +{"current_steps": 686, "total_steps": 7532, "loss": 0.3876315653324127, "lr": 1.990869597631286e-05, "epoch": 0.18218032133846768, "percentage": 9.11, "elapsed_time": "2:27:34", "remaining_time": "1 day, 0:32:45"} +{"current_steps": 687, "total_steps": 7532, "loss": 0.3716024160385132, "lr": 1.9908103041783615e-05, "epoch": 0.18244589032001063, "percentage": 9.12, "elapsed_time": "2:27:47", "remaining_time": "1 day, 0:32:30"} +{"current_steps": 688, "total_steps": 7532, "loss": 0.4096733331680298, "lr": 1.990750819708896e-05, "epoch": 0.18271145930155358, "percentage": 9.13, "elapsed_time": "2:28:00", "remaining_time": "1 day, 0:32:19"} +{"current_steps": 689, "total_steps": 7532, "loss": 0.41432395577430725, "lr": 1.9906911442343567e-05, "epoch": 0.18297702828309653, "percentage": 9.15, "elapsed_time": "2:28:12", "remaining_time": "1 day, 0:32:02"} +{"current_steps": 690, "total_steps": 7532, "loss": 0.3934200406074524, "lr": 1.9906312777662493e-05, "epoch": 0.18324259726463948, "percentage": 9.16, "elapsed_time": "2:28:26", "remaining_time": "1 day, 0:31:56"} +{"current_steps": 691, "total_steps": 7532, "loss": 0.4246784746646881, "lr": 1.9905712203161148e-05, "epoch": 0.18350816624618244, "percentage": 9.17, "elapsed_time": "2:28:39", "remaining_time": "1 day, 0:31:41"} +{"current_steps": 692, "total_steps": 7532, "loss": 0.40027567744255066, "lr": 1.9905109718955323e-05, "epoch": 0.18377373522772542, "percentage": 9.19, "elapsed_time": "2:28:52", "remaining_time": "1 day, 0:31:30"} +{"current_steps": 693, "total_steps": 7532, "loss": 0.4162583351135254, "lr": 1.990450532516116e-05, "epoch": 0.18403930420926837, "percentage": 9.2, "elapsed_time": "2:29:05", "remaining_time": "1 day, 0:31:16"} +{"current_steps": 694, "total_steps": 7532, "loss": 0.4133074879646301, "lr": 1.990389902189518e-05, "epoch": 0.18430487319081132, "percentage": 9.21, "elapsed_time": "2:29:18", "remaining_time": "1 day, 0:31:04"} +{"current_steps": 695, "total_steps": 7532, "loss": 0.333192378282547, "lr": 1.9903290809274277e-05, "epoch": 0.18457044217235427, "percentage": 9.23, "elapsed_time": "2:29:30", "remaining_time": "1 day, 0:30:49"} +{"current_steps": 696, "total_steps": 7532, "loss": 0.39349496364593506, "lr": 1.9902680687415704e-05, "epoch": 0.18483601115389722, "percentage": 9.24, "elapsed_time": "2:29:43", "remaining_time": "1 day, 0:30:31"} +{"current_steps": 697, "total_steps": 7532, "loss": 0.39678412675857544, "lr": 1.9902068656437086e-05, "epoch": 0.18510158013544017, "percentage": 9.25, "elapsed_time": "2:29:56", "remaining_time": "1 day, 0:30:20"} +{"current_steps": 698, "total_steps": 7532, "loss": 0.3553932011127472, "lr": 1.9901454716456415e-05, "epoch": 0.18536714911698313, "percentage": 9.27, "elapsed_time": "2:30:09", "remaining_time": "1 day, 0:30:05"} +{"current_steps": 699, "total_steps": 7532, "loss": 0.4264630079269409, "lr": 1.990083886759205e-05, "epoch": 0.18563271809852608, "percentage": 9.28, "elapsed_time": "2:30:21", "remaining_time": "1 day, 0:29:52"} +{"current_steps": 700, "total_steps": 7532, "loss": 0.3883950412273407, "lr": 1.9900221109962726e-05, "epoch": 0.18589828708006906, "percentage": 9.29, "elapsed_time": "2:30:34", "remaining_time": "1 day, 0:29:34"} +{"current_steps": 701, "total_steps": 7532, "loss": 0.38465407490730286, "lr": 1.989960144368753e-05, "epoch": 0.186163856061612, "percentage": 9.31, "elapsed_time": "2:30:52", "remaining_time": "1 day, 0:30:10"} +{"current_steps": 702, "total_steps": 7532, "loss": 0.39897871017456055, "lr": 1.9898979868885933e-05, "epoch": 0.18642942504315496, "percentage": 9.32, "elapsed_time": "2:31:04", "remaining_time": "1 day, 0:29:52"} +{"current_steps": 703, "total_steps": 7532, "loss": 0.4386023283004761, "lr": 1.9898356385677762e-05, "epoch": 0.1866949940246979, "percentage": 9.33, "elapsed_time": "2:31:17", "remaining_time": "1 day, 0:29:40"} +{"current_steps": 704, "total_steps": 7532, "loss": 0.42621874809265137, "lr": 1.989773099418322e-05, "epoch": 0.18696056300624087, "percentage": 9.35, "elapsed_time": "2:31:30", "remaining_time": "1 day, 0:29:25"} +{"current_steps": 705, "total_steps": 7532, "loss": 0.3811546266078949, "lr": 1.9897103694522877e-05, "epoch": 0.18722613198778382, "percentage": 9.36, "elapsed_time": "2:31:43", "remaining_time": "1 day, 0:29:19"} +{"current_steps": 706, "total_steps": 7532, "loss": 0.4018982946872711, "lr": 1.989647448681767e-05, "epoch": 0.18749170096932677, "percentage": 9.37, "elapsed_time": "2:31:56", "remaining_time": "1 day, 0:29:06"} +{"current_steps": 707, "total_steps": 7532, "loss": 0.3920126259326935, "lr": 1.9895843371188897e-05, "epoch": 0.18775726995086975, "percentage": 9.39, "elapsed_time": "2:32:10", "remaining_time": "1 day, 0:29:00"} +{"current_steps": 708, "total_steps": 7532, "loss": 0.3598487973213196, "lr": 1.9895210347758233e-05, "epoch": 0.1880228389324127, "percentage": 9.4, "elapsed_time": "2:32:23", "remaining_time": "1 day, 0:28:48"} +{"current_steps": 709, "total_steps": 7532, "loss": 0.4204316735267639, "lr": 1.9894575416647717e-05, "epoch": 0.18828840791395565, "percentage": 9.41, "elapsed_time": "2:32:36", "remaining_time": "1 day, 0:28:34"} +{"current_steps": 710, "total_steps": 7532, "loss": 0.33814263343811035, "lr": 1.9893938577979755e-05, "epoch": 0.1885539768954986, "percentage": 9.43, "elapsed_time": "2:32:49", "remaining_time": "1 day, 0:28:21"} +{"current_steps": 711, "total_steps": 7532, "loss": 0.3788227140903473, "lr": 1.9893299831877124e-05, "epoch": 0.18881954587704156, "percentage": 9.44, "elapsed_time": "2:33:01", "remaining_time": "1 day, 0:28:04"} +{"current_steps": 712, "total_steps": 7532, "loss": 0.38141176104545593, "lr": 1.989265917846297e-05, "epoch": 0.1890851148585845, "percentage": 9.45, "elapsed_time": "2:33:14", "remaining_time": "1 day, 0:27:55"} +{"current_steps": 713, "total_steps": 7532, "loss": 0.3757280707359314, "lr": 1.9892016617860793e-05, "epoch": 0.18935068384012746, "percentage": 9.47, "elapsed_time": "2:33:27", "remaining_time": "1 day, 0:27:41"} +{"current_steps": 714, "total_steps": 7532, "loss": 0.37819087505340576, "lr": 1.989137215019448e-05, "epoch": 0.18961625282167044, "percentage": 9.48, "elapsed_time": "2:33:40", "remaining_time": "1 day, 0:27:29"} +{"current_steps": 715, "total_steps": 7532, "loss": 0.46046000719070435, "lr": 1.9890725775588277e-05, "epoch": 0.1898818218032134, "percentage": 9.49, "elapsed_time": "2:33:53", "remaining_time": "1 day, 0:27:11"} +{"current_steps": 716, "total_steps": 7532, "loss": 0.33967363834381104, "lr": 1.9890077494166792e-05, "epoch": 0.19014739078475634, "percentage": 9.51, "elapsed_time": "2:34:06", "remaining_time": "1 day, 0:26:58"} +{"current_steps": 717, "total_steps": 7532, "loss": 0.36672675609588623, "lr": 1.988942730605501e-05, "epoch": 0.1904129597662993, "percentage": 9.52, "elapsed_time": "2:34:18", "remaining_time": "1 day, 0:26:40"} +{"current_steps": 718, "total_steps": 7532, "loss": 0.38705015182495117, "lr": 1.9888775211378278e-05, "epoch": 0.19067852874784225, "percentage": 9.53, "elapsed_time": "2:34:31", "remaining_time": "1 day, 0:26:26"} +{"current_steps": 719, "total_steps": 7532, "loss": 0.35257095098495483, "lr": 1.9888121210262313e-05, "epoch": 0.1909440977293852, "percentage": 9.55, "elapsed_time": "2:34:43", "remaining_time": "1 day, 0:26:08"} +{"current_steps": 720, "total_steps": 7532, "loss": 0.3803965449333191, "lr": 1.9887465302833194e-05, "epoch": 0.19120966671092815, "percentage": 9.56, "elapsed_time": "2:34:56", "remaining_time": "1 day, 0:25:56"} +{"current_steps": 721, "total_steps": 7532, "loss": 0.38166487216949463, "lr": 1.988680748921738e-05, "epoch": 0.19147523569247113, "percentage": 9.57, "elapsed_time": "2:35:09", "remaining_time": "1 day, 0:25:38"} +{"current_steps": 722, "total_steps": 7532, "loss": 0.4017483592033386, "lr": 1.988614776954169e-05, "epoch": 0.19174080467401408, "percentage": 9.59, "elapsed_time": "2:35:21", "remaining_time": "1 day, 0:25:21"} +{"current_steps": 723, "total_steps": 7532, "loss": 0.4343035817146301, "lr": 1.98854861439333e-05, "epoch": 0.19200637365555703, "percentage": 9.6, "elapsed_time": "2:35:34", "remaining_time": "1 day, 0:25:07"} +{"current_steps": 724, "total_steps": 7532, "loss": 0.4017031192779541, "lr": 1.9884822612519773e-05, "epoch": 0.19227194263709999, "percentage": 9.61, "elapsed_time": "2:35:47", "remaining_time": "1 day, 0:24:55"} +{"current_steps": 725, "total_steps": 7532, "loss": 0.32294636964797974, "lr": 1.988415717542903e-05, "epoch": 0.19253751161864294, "percentage": 9.63, "elapsed_time": "2:36:01", "remaining_time": "1 day, 0:24:50"} +{"current_steps": 726, "total_steps": 7532, "loss": 0.34661561250686646, "lr": 1.988348983278935e-05, "epoch": 0.1928030806001859, "percentage": 9.64, "elapsed_time": "2:36:13", "remaining_time": "1 day, 0:24:37"} +{"current_steps": 727, "total_steps": 7532, "loss": 0.3588724434375763, "lr": 1.98828205847294e-05, "epoch": 0.19306864958172884, "percentage": 9.65, "elapsed_time": "2:36:26", "remaining_time": "1 day, 0:24:23"} +{"current_steps": 728, "total_steps": 7532, "loss": 0.45439180731773376, "lr": 1.9882149431378194e-05, "epoch": 0.19333421856327182, "percentage": 9.67, "elapsed_time": "2:36:39", "remaining_time": "1 day, 0:24:06"} +{"current_steps": 729, "total_steps": 7532, "loss": 0.3916742205619812, "lr": 1.988147637286513e-05, "epoch": 0.19359978754481477, "percentage": 9.68, "elapsed_time": "2:36:51", "remaining_time": "1 day, 0:23:51"} +{"current_steps": 730, "total_steps": 7532, "loss": 0.3838115334510803, "lr": 1.988080140931996e-05, "epoch": 0.19386535652635772, "percentage": 9.69, "elapsed_time": "2:37:04", "remaining_time": "1 day, 0:23:34"} +{"current_steps": 731, "total_steps": 7532, "loss": 0.3803096413612366, "lr": 1.9880124540872813e-05, "epoch": 0.19413092550790068, "percentage": 9.71, "elapsed_time": "2:37:17", "remaining_time": "1 day, 0:23:22"} +{"current_steps": 732, "total_steps": 7532, "loss": 0.4180675446987152, "lr": 1.987944576765418e-05, "epoch": 0.19439649448944363, "percentage": 9.72, "elapsed_time": "2:37:29", "remaining_time": "1 day, 0:23:06"} +{"current_steps": 733, "total_steps": 7532, "loss": 0.34485924243927, "lr": 1.987876508979492e-05, "epoch": 0.19466206347098658, "percentage": 9.73, "elapsed_time": "2:37:42", "remaining_time": "1 day, 0:22:53"} +{"current_steps": 734, "total_steps": 7532, "loss": 0.3696223795413971, "lr": 1.987808250742626e-05, "epoch": 0.19492763245252953, "percentage": 9.75, "elapsed_time": "2:37:55", "remaining_time": "1 day, 0:22:38"} +{"current_steps": 735, "total_steps": 7532, "loss": 0.39920324087142944, "lr": 1.9877398020679796e-05, "epoch": 0.1951932014340725, "percentage": 9.76, "elapsed_time": "2:38:08", "remaining_time": "1 day, 0:22:29"} +{"current_steps": 736, "total_steps": 7532, "loss": 0.33534419536590576, "lr": 1.987671162968748e-05, "epoch": 0.19545877041561546, "percentage": 9.77, "elapsed_time": "2:38:22", "remaining_time": "1 day, 0:22:20"} +{"current_steps": 737, "total_steps": 7532, "loss": 0.3698185682296753, "lr": 1.9876023334581657e-05, "epoch": 0.19572433939715841, "percentage": 9.78, "elapsed_time": "2:38:35", "remaining_time": "1 day, 0:22:15"} +{"current_steps": 738, "total_steps": 7532, "loss": 0.37388375401496887, "lr": 1.9875333135495e-05, "epoch": 0.19598990837870137, "percentage": 9.8, "elapsed_time": "2:38:49", "remaining_time": "1 day, 0:22:05"} +{"current_steps": 739, "total_steps": 7532, "loss": 0.3285469114780426, "lr": 1.9874641032560594e-05, "epoch": 0.19625547736024432, "percentage": 9.81, "elapsed_time": "2:39:02", "remaining_time": "1 day, 0:21:59"} +{"current_steps": 740, "total_steps": 7532, "loss": 0.3539549708366394, "lr": 1.9873947025911854e-05, "epoch": 0.19652104634178727, "percentage": 9.82, "elapsed_time": "2:39:16", "remaining_time": "1 day, 0:21:49"} +{"current_steps": 741, "total_steps": 7532, "loss": 0.4707021117210388, "lr": 1.9873251115682577e-05, "epoch": 0.19678661532333022, "percentage": 9.84, "elapsed_time": "2:39:29", "remaining_time": "1 day, 0:21:36"} +{"current_steps": 742, "total_steps": 7532, "loss": 0.3871781826019287, "lr": 1.987255330200693e-05, "epoch": 0.1970521843048732, "percentage": 9.85, "elapsed_time": "2:39:42", "remaining_time": "1 day, 0:21:26"} +{"current_steps": 743, "total_steps": 7532, "loss": 0.3890243172645569, "lr": 1.9871853585019446e-05, "epoch": 0.19731775328641615, "percentage": 9.86, "elapsed_time": "2:39:54", "remaining_time": "1 day, 0:21:11"} +{"current_steps": 744, "total_steps": 7532, "loss": 0.34914374351501465, "lr": 1.9871151964855013e-05, "epoch": 0.1975833222679591, "percentage": 9.88, "elapsed_time": "2:40:08", "remaining_time": "1 day, 0:21:03"} +{"current_steps": 745, "total_steps": 7532, "loss": 0.41009777784347534, "lr": 1.9870448441648905e-05, "epoch": 0.19784889124950206, "percentage": 9.89, "elapsed_time": "2:40:21", "remaining_time": "1 day, 0:20:51"} +{"current_steps": 746, "total_steps": 7532, "loss": 0.39449363946914673, "lr": 1.9869743015536747e-05, "epoch": 0.198114460231045, "percentage": 9.9, "elapsed_time": "2:40:34", "remaining_time": "1 day, 0:20:42"} +{"current_steps": 747, "total_steps": 7532, "loss": 0.3530065417289734, "lr": 1.9869035686654538e-05, "epoch": 0.19838002921258796, "percentage": 9.92, "elapsed_time": "2:40:47", "remaining_time": "1 day, 0:20:29"} +{"current_steps": 748, "total_steps": 7532, "loss": 0.4255196154117584, "lr": 1.986832645513864e-05, "epoch": 0.1986455981941309, "percentage": 9.93, "elapsed_time": "2:41:00", "remaining_time": "1 day, 0:20:19"} +{"current_steps": 749, "total_steps": 7532, "loss": 0.3921143114566803, "lr": 1.9867615321125796e-05, "epoch": 0.1989111671756739, "percentage": 9.94, "elapsed_time": "2:41:13", "remaining_time": "1 day, 0:20:05"} +{"current_steps": 750, "total_steps": 7532, "loss": 0.4157381057739258, "lr": 1.986690228475309e-05, "epoch": 0.19917673615721684, "percentage": 9.96, "elapsed_time": "2:41:27", "remaining_time": "1 day, 0:19:59"} +{"current_steps": 751, "total_steps": 7532, "loss": 0.3922047019004822, "lr": 1.986618734615799e-05, "epoch": 0.1994423051387598, "percentage": 9.97, "elapsed_time": "2:41:40", "remaining_time": "1 day, 0:19:47"} +{"current_steps": 752, "total_steps": 7532, "loss": 0.4378710985183716, "lr": 1.9865470505478335e-05, "epoch": 0.19970787412030275, "percentage": 9.98, "elapsed_time": "2:41:52", "remaining_time": "1 day, 0:19:31"} +{"current_steps": 753, "total_steps": 7532, "loss": 0.3636753261089325, "lr": 1.986475176285232e-05, "epoch": 0.1999734431018457, "percentage": 10.0, "elapsed_time": "2:42:06", "remaining_time": "1 day, 0:19:22"} +{"current_steps": 754, "total_steps": 7532, "loss": 0.3509834408760071, "lr": 1.986403111841851e-05, "epoch": 0.20023901208338865, "percentage": 10.01, "elapsed_time": "2:42:19", "remaining_time": "1 day, 0:19:08"} +{"current_steps": 755, "total_steps": 7532, "loss": 0.3539624512195587, "lr": 1.986330857231583e-05, "epoch": 0.2005045810649316, "percentage": 10.02, "elapsed_time": "2:42:32", "remaining_time": "1 day, 0:18:56"} +{"current_steps": 756, "total_steps": 7532, "loss": 0.417904257774353, "lr": 1.9862584124683587e-05, "epoch": 0.20077015004647458, "percentage": 10.04, "elapsed_time": "2:42:44", "remaining_time": "1 day, 0:18:40"} +{"current_steps": 757, "total_steps": 7532, "loss": 0.3602277636528015, "lr": 1.9861857775661442e-05, "epoch": 0.20103571902801753, "percentage": 10.05, "elapsed_time": "2:42:57", "remaining_time": "1 day, 0:18:27"} +{"current_steps": 758, "total_steps": 7532, "loss": 0.41064661741256714, "lr": 1.986112952538943e-05, "epoch": 0.2013012880095605, "percentage": 10.06, "elapsed_time": "2:43:09", "remaining_time": "1 day, 0:18:09"} +{"current_steps": 759, "total_steps": 7532, "loss": 0.36313754320144653, "lr": 1.9860399374007944e-05, "epoch": 0.20156685699110344, "percentage": 10.08, "elapsed_time": "2:43:23", "remaining_time": "1 day, 0:17:58"} +{"current_steps": 760, "total_steps": 7532, "loss": 0.39497628808021545, "lr": 1.9859667321657755e-05, "epoch": 0.2018324259726464, "percentage": 10.09, "elapsed_time": "2:43:35", "remaining_time": "1 day, 0:17:41"} +{"current_steps": 761, "total_steps": 7532, "loss": 0.405613511800766, "lr": 1.9858933368479987e-05, "epoch": 0.20209799495418934, "percentage": 10.1, "elapsed_time": "2:43:48", "remaining_time": "1 day, 0:17:26"} +{"current_steps": 762, "total_steps": 7532, "loss": 0.39093440771102905, "lr": 1.9858197514616142e-05, "epoch": 0.2023635639357323, "percentage": 10.12, "elapsed_time": "2:44:00", "remaining_time": "1 day, 0:17:07"} +{"current_steps": 763, "total_steps": 7532, "loss": 0.39908382296562195, "lr": 1.9857459760208084e-05, "epoch": 0.20262913291727527, "percentage": 10.13, "elapsed_time": "2:44:12", "remaining_time": "1 day, 0:16:48"} +{"current_steps": 764, "total_steps": 7532, "loss": 0.36787620186805725, "lr": 1.9856720105398038e-05, "epoch": 0.20289470189881822, "percentage": 10.14, "elapsed_time": "2:44:25", "remaining_time": "1 day, 0:16:35"} +{"current_steps": 765, "total_steps": 7532, "loss": 0.390550822019577, "lr": 1.985597855032861e-05, "epoch": 0.20316027088036118, "percentage": 10.16, "elapsed_time": "2:44:37", "remaining_time": "1 day, 0:16:17"} +{"current_steps": 766, "total_steps": 7532, "loss": 0.4191611409187317, "lr": 1.9855235095142754e-05, "epoch": 0.20342583986190413, "percentage": 10.17, "elapsed_time": "2:44:50", "remaining_time": "1 day, 0:16:00"} +{"current_steps": 767, "total_steps": 7532, "loss": 0.4060766100883484, "lr": 1.985448973998381e-05, "epoch": 0.20369140884344708, "percentage": 10.18, "elapsed_time": "2:45:02", "remaining_time": "1 day, 0:15:43"} +{"current_steps": 768, "total_steps": 7532, "loss": 0.3906163275241852, "lr": 1.985374248499546e-05, "epoch": 0.20395697782499003, "percentage": 10.2, "elapsed_time": "2:45:15", "remaining_time": "1 day, 0:15:31"} +{"current_steps": 769, "total_steps": 7532, "loss": 0.3926839828491211, "lr": 1.9852993330321774e-05, "epoch": 0.20422254680653298, "percentage": 10.21, "elapsed_time": "2:45:28", "remaining_time": "1 day, 0:15:18"} +{"current_steps": 770, "total_steps": 7532, "loss": 0.37276068329811096, "lr": 1.9852242276107182e-05, "epoch": 0.20448811578807596, "percentage": 10.22, "elapsed_time": "2:45:41", "remaining_time": "1 day, 0:15:06"} +{"current_steps": 771, "total_steps": 7532, "loss": 0.3765360414981842, "lr": 1.9851489322496476e-05, "epoch": 0.20475368476961892, "percentage": 10.24, "elapsed_time": "2:45:54", "remaining_time": "1 day, 0:14:51"} +{"current_steps": 772, "total_steps": 7532, "loss": 0.35091257095336914, "lr": 1.9850734469634815e-05, "epoch": 0.20501925375116187, "percentage": 10.25, "elapsed_time": "2:46:08", "remaining_time": "1 day, 0:14:44"} +{"current_steps": 773, "total_steps": 7532, "loss": 0.4259791076183319, "lr": 1.9849977717667725e-05, "epoch": 0.20528482273270482, "percentage": 10.26, "elapsed_time": "2:46:21", "remaining_time": "1 day, 0:14:32"} +{"current_steps": 774, "total_steps": 7532, "loss": 0.3563114404678345, "lr": 1.9849219066741102e-05, "epoch": 0.20555039171424777, "percentage": 10.28, "elapsed_time": "2:46:34", "remaining_time": "1 day, 0:14:27"} +{"current_steps": 775, "total_steps": 7532, "loss": 0.4148223102092743, "lr": 1.9848458517001203e-05, "epoch": 0.20581596069579072, "percentage": 10.29, "elapsed_time": "2:46:47", "remaining_time": "1 day, 0:14:16"} +{"current_steps": 776, "total_steps": 7532, "loss": 0.3817785382270813, "lr": 1.9847696068594655e-05, "epoch": 0.20608152967733367, "percentage": 10.3, "elapsed_time": "2:47:01", "remaining_time": "1 day, 0:14:11"} +{"current_steps": 777, "total_steps": 7532, "loss": 0.41741886734962463, "lr": 1.984693172166845e-05, "epoch": 0.20634709865887665, "percentage": 10.32, "elapsed_time": "2:47:14", "remaining_time": "1 day, 0:14:00"} +{"current_steps": 778, "total_steps": 7532, "loss": 0.34800025820732117, "lr": 1.9846165476369938e-05, "epoch": 0.2066126676404196, "percentage": 10.33, "elapsed_time": "2:47:28", "remaining_time": "1 day, 0:13:52"} +{"current_steps": 779, "total_steps": 7532, "loss": 0.38093405961990356, "lr": 1.9845397332846848e-05, "epoch": 0.20687823662196256, "percentage": 10.34, "elapsed_time": "2:47:41", "remaining_time": "1 day, 0:13:37"} +{"current_steps": 780, "total_steps": 7532, "loss": 0.40733009576797485, "lr": 1.9844627291247268e-05, "epoch": 0.2071438056035055, "percentage": 10.36, "elapsed_time": "2:47:54", "remaining_time": "1 day, 0:13:24"} +{"current_steps": 781, "total_steps": 7532, "loss": 0.3829066753387451, "lr": 1.9843855351719655e-05, "epoch": 0.20740937458504846, "percentage": 10.37, "elapsed_time": "2:48:07", "remaining_time": "1 day, 0:13:16"} +{"current_steps": 782, "total_steps": 7532, "loss": 0.3574868440628052, "lr": 1.9843081514412827e-05, "epoch": 0.2076749435665914, "percentage": 10.38, "elapsed_time": "2:48:19", "remaining_time": "1 day, 0:12:59"} +{"current_steps": 783, "total_steps": 7532, "loss": 0.3675144612789154, "lr": 1.984230577947597e-05, "epoch": 0.20794051254813437, "percentage": 10.4, "elapsed_time": "2:48:32", "remaining_time": "1 day, 0:12:44"} +{"current_steps": 784, "total_steps": 7532, "loss": 0.36120525002479553, "lr": 1.9841528147058638e-05, "epoch": 0.20820608152967734, "percentage": 10.41, "elapsed_time": "2:48:45", "remaining_time": "1 day, 0:12:29"} +{"current_steps": 785, "total_steps": 7532, "loss": 0.3651392459869385, "lr": 1.984074861731075e-05, "epoch": 0.2084716505112203, "percentage": 10.42, "elapsed_time": "2:48:58", "remaining_time": "1 day, 0:12:15"} +{"current_steps": 786, "total_steps": 7532, "loss": 0.4204651117324829, "lr": 1.983996719038259e-05, "epoch": 0.20873721949276325, "percentage": 10.44, "elapsed_time": "2:49:10", "remaining_time": "1 day, 0:11:57"} +{"current_steps": 787, "total_steps": 7532, "loss": 0.4452149271965027, "lr": 1.9839183866424806e-05, "epoch": 0.2090027884743062, "percentage": 10.45, "elapsed_time": "2:49:23", "remaining_time": "1 day, 0:11:43"} +{"current_steps": 788, "total_steps": 7532, "loss": 0.3931270241737366, "lr": 1.9838398645588418e-05, "epoch": 0.20926835745584915, "percentage": 10.46, "elapsed_time": "2:49:35", "remaining_time": "1 day, 0:11:26"} +{"current_steps": 789, "total_steps": 7532, "loss": 0.3680538535118103, "lr": 1.98376115280248e-05, "epoch": 0.2095339264373921, "percentage": 10.48, "elapsed_time": "2:49:48", "remaining_time": "1 day, 0:11:13"} +{"current_steps": 790, "total_steps": 7532, "loss": 0.3766820728778839, "lr": 1.9836822513885704e-05, "epoch": 0.20979949541893506, "percentage": 10.49, "elapsed_time": "2:50:00", "remaining_time": "1 day, 0:10:54"} +{"current_steps": 791, "total_steps": 7532, "loss": 0.3602439761161804, "lr": 1.9836031603323245e-05, "epoch": 0.21006506440047804, "percentage": 10.5, "elapsed_time": "2:50:13", "remaining_time": "1 day, 0:10:40"} +{"current_steps": 792, "total_steps": 7532, "loss": 0.38925549387931824, "lr": 1.98352387964899e-05, "epoch": 0.210330633382021, "percentage": 10.52, "elapsed_time": "2:50:26", "remaining_time": "1 day, 0:10:30"} +{"current_steps": 793, "total_steps": 7532, "loss": 0.3569640517234802, "lr": 1.9834444093538504e-05, "epoch": 0.21059620236356394, "percentage": 10.53, "elapsed_time": "2:50:39", "remaining_time": "1 day, 0:10:19"} +{"current_steps": 794, "total_steps": 7532, "loss": 0.3543340265750885, "lr": 1.9833647494622275e-05, "epoch": 0.2108617713451069, "percentage": 10.54, "elapsed_time": "2:50:53", "remaining_time": "1 day, 0:10:14"} +{"current_steps": 795, "total_steps": 7532, "loss": 0.37313222885131836, "lr": 1.983284899989479e-05, "epoch": 0.21112734032664984, "percentage": 10.55, "elapsed_time": "2:51:06", "remaining_time": "1 day, 0:10:02"} +{"current_steps": 796, "total_steps": 7532, "loss": 0.3874257802963257, "lr": 1.983204860950998e-05, "epoch": 0.2113929093081928, "percentage": 10.57, "elapsed_time": "2:51:20", "remaining_time": "1 day, 0:09:58"} +{"current_steps": 797, "total_steps": 7532, "loss": 0.3815164864063263, "lr": 1.983124632362216e-05, "epoch": 0.21165847828973575, "percentage": 10.58, "elapsed_time": "2:51:33", "remaining_time": "1 day, 0:09:47"} +{"current_steps": 798, "total_steps": 7532, "loss": 0.39476731419563293, "lr": 1.9830442142386e-05, "epoch": 0.21192404727127873, "percentage": 10.59, "elapsed_time": "2:51:47", "remaining_time": "1 day, 0:09:43"} +{"current_steps": 799, "total_steps": 7532, "loss": 0.399338036775589, "lr": 1.9829636065956527e-05, "epoch": 0.21218961625282168, "percentage": 10.61, "elapsed_time": "2:52:01", "remaining_time": "1 day, 0:09:34"} +{"current_steps": 800, "total_steps": 7532, "loss": 0.3940344452857971, "lr": 1.9828828094489157e-05, "epoch": 0.21245518523436463, "percentage": 10.62, "elapsed_time": "2:52:14", "remaining_time": "1 day, 0:09:24"} +{"current_steps": 801, "total_steps": 7532, "loss": 0.35597044229507446, "lr": 1.9828018228139647e-05, "epoch": 0.21272075421590758, "percentage": 10.63, "elapsed_time": "2:52:33", "remaining_time": "1 day, 0:10:02"} +{"current_steps": 802, "total_steps": 7532, "loss": 0.4309435784816742, "lr": 1.9827206467064133e-05, "epoch": 0.21298632319745053, "percentage": 10.65, "elapsed_time": "2:52:47", "remaining_time": "1 day, 0:09:55"} +{"current_steps": 803, "total_steps": 7532, "loss": 0.37327438592910767, "lr": 1.9826392811419113e-05, "epoch": 0.21325189217899349, "percentage": 10.66, "elapsed_time": "2:53:00", "remaining_time": "1 day, 0:09:47"} +{"current_steps": 804, "total_steps": 7532, "loss": 0.35214242339134216, "lr": 1.9825577261361454e-05, "epoch": 0.21351746116053644, "percentage": 10.67, "elapsed_time": "2:53:14", "remaining_time": "1 day, 0:09:40"} +{"current_steps": 805, "total_steps": 7532, "loss": 0.41114968061447144, "lr": 1.982475981704838e-05, "epoch": 0.21378303014207942, "percentage": 10.69, "elapsed_time": "2:53:27", "remaining_time": "1 day, 0:09:29"} +{"current_steps": 806, "total_steps": 7532, "loss": 0.3632299304008484, "lr": 1.9823940478637486e-05, "epoch": 0.21404859912362237, "percentage": 10.7, "elapsed_time": "2:53:41", "remaining_time": "1 day, 0:09:22"} +{"current_steps": 807, "total_steps": 7532, "loss": 0.39640772342681885, "lr": 1.9823119246286727e-05, "epoch": 0.21431416810516532, "percentage": 10.71, "elapsed_time": "2:53:53", "remaining_time": "1 day, 0:09:06"} +{"current_steps": 808, "total_steps": 7532, "loss": 0.39356929063796997, "lr": 1.9822296120154433e-05, "epoch": 0.21457973708670827, "percentage": 10.73, "elapsed_time": "2:54:06", "remaining_time": "1 day, 0:08:50"} +{"current_steps": 809, "total_steps": 7532, "loss": 0.3710761070251465, "lr": 1.9821471100399294e-05, "epoch": 0.21484530606825122, "percentage": 10.74, "elapsed_time": "2:54:19", "remaining_time": "1 day, 0:08:39"} +{"current_steps": 810, "total_steps": 7532, "loss": 0.35515087842941284, "lr": 1.9820644187180354e-05, "epoch": 0.21511087504979418, "percentage": 10.75, "elapsed_time": "2:54:32", "remaining_time": "1 day, 0:08:25"} +{"current_steps": 811, "total_steps": 7532, "loss": 0.3803205192089081, "lr": 1.981981538065704e-05, "epoch": 0.21537644403133713, "percentage": 10.77, "elapsed_time": "2:54:45", "remaining_time": "1 day, 0:08:15"} +{"current_steps": 812, "total_steps": 7532, "loss": 0.40275394916534424, "lr": 1.9818984680989134e-05, "epoch": 0.2156420130128801, "percentage": 10.78, "elapsed_time": "2:54:57", "remaining_time": "1 day, 0:07:59"} +{"current_steps": 813, "total_steps": 7532, "loss": 0.3711051344871521, "lr": 1.9818152088336786e-05, "epoch": 0.21590758199442306, "percentage": 10.79, "elapsed_time": "2:55:11", "remaining_time": "1 day, 0:07:47"} +{"current_steps": 814, "total_steps": 7532, "loss": 0.4198985695838928, "lr": 1.9817317602860512e-05, "epoch": 0.216173150975966, "percentage": 10.81, "elapsed_time": "2:55:23", "remaining_time": "1 day, 0:07:32"} +{"current_steps": 815, "total_steps": 7532, "loss": 0.38333773612976074, "lr": 1.9816481224721185e-05, "epoch": 0.21643871995750896, "percentage": 10.82, "elapsed_time": "2:55:37", "remaining_time": "1 day, 0:07:25"} +{"current_steps": 816, "total_steps": 7532, "loss": 0.3959774971008301, "lr": 1.9815642954080055e-05, "epoch": 0.21670428893905191, "percentage": 10.83, "elapsed_time": "2:55:50", "remaining_time": "1 day, 0:07:11"} +{"current_steps": 817, "total_steps": 7532, "loss": 0.3475337326526642, "lr": 1.9814802791098728e-05, "epoch": 0.21696985792059487, "percentage": 10.85, "elapsed_time": "2:56:03", "remaining_time": "1 day, 0:07:02"} +{"current_steps": 818, "total_steps": 7532, "loss": 0.369370698928833, "lr": 1.981396073593918e-05, "epoch": 0.21723542690213782, "percentage": 10.86, "elapsed_time": "2:56:16", "remaining_time": "1 day, 0:06:48"} +{"current_steps": 819, "total_steps": 7532, "loss": 0.3515776991844177, "lr": 1.9813116788763744e-05, "epoch": 0.2175009958836808, "percentage": 10.87, "elapsed_time": "2:56:29", "remaining_time": "1 day, 0:06:34"} +{"current_steps": 820, "total_steps": 7532, "loss": 0.3637402355670929, "lr": 1.9812270949735124e-05, "epoch": 0.21776656486522375, "percentage": 10.89, "elapsed_time": "2:56:42", "remaining_time": "1 day, 0:06:27"} +{"current_steps": 821, "total_steps": 7532, "loss": 0.3930947780609131, "lr": 1.9811423219016395e-05, "epoch": 0.2180321338467667, "percentage": 10.9, "elapsed_time": "2:56:55", "remaining_time": "1 day, 0:06:13"} +{"current_steps": 822, "total_steps": 7532, "loss": 0.40081048011779785, "lr": 1.981057359677098e-05, "epoch": 0.21829770282830965, "percentage": 10.91, "elapsed_time": "2:57:09", "remaining_time": "1 day, 0:06:05"} +{"current_steps": 823, "total_steps": 7532, "loss": 0.3831724226474762, "lr": 1.9809722083162682e-05, "epoch": 0.2185632718098526, "percentage": 10.93, "elapsed_time": "2:57:22", "remaining_time": "1 day, 0:05:55"} +{"current_steps": 824, "total_steps": 7532, "loss": 0.3919270932674408, "lr": 1.9808868678355662e-05, "epoch": 0.21882884079139556, "percentage": 10.94, "elapsed_time": "2:57:36", "remaining_time": "1 day, 0:05:50"} +{"current_steps": 825, "total_steps": 7532, "loss": 0.41782522201538086, "lr": 1.9808013382514448e-05, "epoch": 0.2190944097729385, "percentage": 10.95, "elapsed_time": "2:57:49", "remaining_time": "1 day, 0:05:39"} +{"current_steps": 826, "total_steps": 7532, "loss": 0.3751329779624939, "lr": 1.9807156195803926e-05, "epoch": 0.2193599787544815, "percentage": 10.97, "elapsed_time": "2:58:03", "remaining_time": "1 day, 0:05:34"} +{"current_steps": 827, "total_steps": 7532, "loss": 0.36451685428619385, "lr": 1.9806297118389353e-05, "epoch": 0.21962554773602444, "percentage": 10.98, "elapsed_time": "2:58:16", "remaining_time": "1 day, 0:05:24"} +{"current_steps": 828, "total_steps": 7532, "loss": 0.3924056887626648, "lr": 1.9805436150436352e-05, "epoch": 0.2198911167175674, "percentage": 10.99, "elapsed_time": "2:58:30", "remaining_time": "1 day, 0:05:17"} +{"current_steps": 829, "total_steps": 7532, "loss": 0.34744757413864136, "lr": 1.9804573292110906e-05, "epoch": 0.22015668569911034, "percentage": 11.01, "elapsed_time": "2:58:43", "remaining_time": "1 day, 0:05:04"} +{"current_steps": 830, "total_steps": 7532, "loss": 0.4162982702255249, "lr": 1.980370854357936e-05, "epoch": 0.2204222546806533, "percentage": 11.02, "elapsed_time": "2:58:56", "remaining_time": "1 day, 0:04:51"} +{"current_steps": 831, "total_steps": 7532, "loss": 0.36572596430778503, "lr": 1.9802841905008434e-05, "epoch": 0.22068782366219625, "percentage": 11.03, "elapsed_time": "2:59:09", "remaining_time": "1 day, 0:04:39"} +{"current_steps": 832, "total_steps": 7532, "loss": 0.3535170555114746, "lr": 1.98019733765652e-05, "epoch": 0.2209533926437392, "percentage": 11.05, "elapsed_time": "2:59:22", "remaining_time": "1 day, 0:04:28"} +{"current_steps": 833, "total_steps": 7532, "loss": 0.3906480073928833, "lr": 1.9801102958417107e-05, "epoch": 0.22121896162528218, "percentage": 11.06, "elapsed_time": "2:59:35", "remaining_time": "1 day, 0:04:18"} +{"current_steps": 834, "total_steps": 7532, "loss": 0.34185755252838135, "lr": 1.980023065073195e-05, "epoch": 0.22148453060682513, "percentage": 11.07, "elapsed_time": "2:59:48", "remaining_time": "1 day, 0:04:06"} +{"current_steps": 835, "total_steps": 7532, "loss": 0.4216359853744507, "lr": 1.9799356453677913e-05, "epoch": 0.22175009958836808, "percentage": 11.09, "elapsed_time": "3:00:02", "remaining_time": "1 day, 0:04:00"} +{"current_steps": 836, "total_steps": 7532, "loss": 0.365469366312027, "lr": 1.979848036742352e-05, "epoch": 0.22201566856991103, "percentage": 11.1, "elapsed_time": "3:00:15", "remaining_time": "1 day, 0:03:51"} +{"current_steps": 837, "total_steps": 7532, "loss": 0.3570204973220825, "lr": 1.9797602392137678e-05, "epoch": 0.222281237551454, "percentage": 11.11, "elapsed_time": "3:00:29", "remaining_time": "1 day, 0:03:45"} +{"current_steps": 838, "total_steps": 7532, "loss": 0.3929975926876068, "lr": 1.9796722527989646e-05, "epoch": 0.22254680653299694, "percentage": 11.13, "elapsed_time": "3:00:42", "remaining_time": "1 day, 0:03:34"} +{"current_steps": 839, "total_steps": 7532, "loss": 0.39064258337020874, "lr": 1.979584077514905e-05, "epoch": 0.2228123755145399, "percentage": 11.14, "elapsed_time": "3:00:56", "remaining_time": "1 day, 0:03:25"} +{"current_steps": 840, "total_steps": 7532, "loss": 0.3626471757888794, "lr": 1.9794957133785884e-05, "epoch": 0.22307794449608287, "percentage": 11.15, "elapsed_time": "3:01:09", "remaining_time": "1 day, 0:03:13"} +{"current_steps": 841, "total_steps": 7532, "loss": 0.4337238371372223, "lr": 1.9794071604070506e-05, "epoch": 0.22334351347762582, "percentage": 11.17, "elapsed_time": "3:01:22", "remaining_time": "1 day, 0:03:03"} +{"current_steps": 842, "total_steps": 7532, "loss": 0.3361967206001282, "lr": 1.9793184186173632e-05, "epoch": 0.22360908245916877, "percentage": 11.18, "elapsed_time": "3:01:35", "remaining_time": "1 day, 0:02:51"} +{"current_steps": 843, "total_steps": 7532, "loss": 0.3429332971572876, "lr": 1.9792294880266346e-05, "epoch": 0.22387465144071172, "percentage": 11.19, "elapsed_time": "3:01:49", "remaining_time": "1 day, 0:02:43"} +{"current_steps": 844, "total_steps": 7532, "loss": 0.39196616411209106, "lr": 1.97914036865201e-05, "epoch": 0.22414022042225468, "percentage": 11.21, "elapsed_time": "3:02:02", "remaining_time": "1 day, 0:02:34"} +{"current_steps": 845, "total_steps": 7532, "loss": 0.3763045072555542, "lr": 1.9790510605106697e-05, "epoch": 0.22440578940379763, "percentage": 11.22, "elapsed_time": "3:02:16", "remaining_time": "1 day, 0:02:27"} +{"current_steps": 846, "total_steps": 7532, "loss": 0.41614070534706116, "lr": 1.978961563619832e-05, "epoch": 0.22467135838534058, "percentage": 11.23, "elapsed_time": "3:02:29", "remaining_time": "1 day, 0:02:14"} +{"current_steps": 847, "total_steps": 7532, "loss": 0.3834165334701538, "lr": 1.9788718779967506e-05, "epoch": 0.22493692736688356, "percentage": 11.25, "elapsed_time": "3:02:42", "remaining_time": "1 day, 0:02:01"} +{"current_steps": 848, "total_steps": 7532, "loss": 0.3552364110946655, "lr": 1.978782003658716e-05, "epoch": 0.2252024963484265, "percentage": 11.26, "elapsed_time": "3:02:55", "remaining_time": "1 day, 0:01:51"} +{"current_steps": 849, "total_steps": 7532, "loss": 0.3857925534248352, "lr": 1.9786919406230544e-05, "epoch": 0.22546806532996946, "percentage": 11.27, "elapsed_time": "3:03:08", "remaining_time": "1 day, 0:01:35"} +{"current_steps": 850, "total_steps": 7532, "loss": 0.3501393795013428, "lr": 1.9786016889071294e-05, "epoch": 0.22573363431151242, "percentage": 11.29, "elapsed_time": "3:03:21", "remaining_time": "1 day, 0:01:21"} +{"current_steps": 851, "total_steps": 7532, "loss": 0.36280643939971924, "lr": 1.9785112485283404e-05, "epoch": 0.22599920329305537, "percentage": 11.3, "elapsed_time": "3:03:33", "remaining_time": "1 day, 0:01:03"} +{"current_steps": 852, "total_steps": 7532, "loss": 0.3713894486427307, "lr": 1.978420619504123e-05, "epoch": 0.22626477227459832, "percentage": 11.31, "elapsed_time": "3:03:46", "remaining_time": "1 day, 0:00:49"} +{"current_steps": 853, "total_steps": 7532, "loss": 0.3668733537197113, "lr": 1.97832980185195e-05, "epoch": 0.22653034125614127, "percentage": 11.33, "elapsed_time": "3:03:59", "remaining_time": "1 day, 0:00:37"} +{"current_steps": 854, "total_steps": 7532, "loss": 0.4054701626300812, "lr": 1.978238795589329e-05, "epoch": 0.22679591023768425, "percentage": 11.34, "elapsed_time": "3:04:12", "remaining_time": "1 day, 0:00:30"} +{"current_steps": 855, "total_steps": 7532, "loss": 0.3824681043624878, "lr": 1.9781476007338058e-05, "epoch": 0.2270614792192272, "percentage": 11.35, "elapsed_time": "3:04:26", "remaining_time": "1 day, 0:00:18"} +{"current_steps": 856, "total_steps": 7532, "loss": 0.4009544253349304, "lr": 1.978056217302961e-05, "epoch": 0.22732704820077015, "percentage": 11.36, "elapsed_time": "3:04:38", "remaining_time": "1 day, 0:00:04"} +{"current_steps": 857, "total_steps": 7532, "loss": 0.34773316979408264, "lr": 1.9779646453144133e-05, "epoch": 0.2275926171823131, "percentage": 11.38, "elapsed_time": "3:04:51", "remaining_time": "23:59:51"} +{"current_steps": 858, "total_steps": 7532, "loss": 0.4067278206348419, "lr": 1.977872884785815e-05, "epoch": 0.22785818616385606, "percentage": 11.39, "elapsed_time": "3:05:04", "remaining_time": "23:59:38"} +{"current_steps": 859, "total_steps": 7532, "loss": 0.3843458890914917, "lr": 1.9777809357348584e-05, "epoch": 0.228123755145399, "percentage": 11.4, "elapsed_time": "3:05:17", "remaining_time": "23:59:23"} +{"current_steps": 860, "total_steps": 7532, "loss": 0.4261704683303833, "lr": 1.977688798179269e-05, "epoch": 0.22838932412694196, "percentage": 11.42, "elapsed_time": "3:05:30", "remaining_time": "23:59:08"} +{"current_steps": 861, "total_steps": 7532, "loss": 0.39109086990356445, "lr": 1.9775964721368098e-05, "epoch": 0.22865489310848494, "percentage": 11.43, "elapsed_time": "3:05:43", "remaining_time": "23:58:57"} +{"current_steps": 862, "total_steps": 7532, "loss": 0.39436954259872437, "lr": 1.9775039576252807e-05, "epoch": 0.2289204620900279, "percentage": 11.44, "elapsed_time": "3:05:55", "remaining_time": "23:58:42"} +{"current_steps": 863, "total_steps": 7532, "loss": 0.3787967562675476, "lr": 1.9774112546625168e-05, "epoch": 0.22918603107157084, "percentage": 11.46, "elapsed_time": "3:06:08", "remaining_time": "23:58:28"} +{"current_steps": 864, "total_steps": 7532, "loss": 0.3729320466518402, "lr": 1.9773183632663907e-05, "epoch": 0.2294516000531138, "percentage": 11.47, "elapsed_time": "3:06:21", "remaining_time": "23:58:13"} +{"current_steps": 865, "total_steps": 7532, "loss": 0.3817081153392792, "lr": 1.9772252834548108e-05, "epoch": 0.22971716903465675, "percentage": 11.48, "elapsed_time": "3:06:34", "remaining_time": "23:58:02"} +{"current_steps": 866, "total_steps": 7532, "loss": 0.3362218737602234, "lr": 1.9771320152457212e-05, "epoch": 0.2299827380161997, "percentage": 11.5, "elapsed_time": "3:06:47", "remaining_time": "23:57:48"} +{"current_steps": 867, "total_steps": 7532, "loss": 0.37274059653282166, "lr": 1.9770385586571033e-05, "epoch": 0.23024830699774265, "percentage": 11.51, "elapsed_time": "3:07:00", "remaining_time": "23:57:33"} +{"current_steps": 868, "total_steps": 7532, "loss": 0.3832330107688904, "lr": 1.9769449137069746e-05, "epoch": 0.23051387597928563, "percentage": 11.52, "elapsed_time": "3:07:12", "remaining_time": "23:57:19"} +{"current_steps": 869, "total_steps": 7532, "loss": 0.37420010566711426, "lr": 1.9768510804133886e-05, "epoch": 0.23077944496082858, "percentage": 11.54, "elapsed_time": "3:07:26", "remaining_time": "23:57:10"} +{"current_steps": 870, "total_steps": 7532, "loss": 0.35314565896987915, "lr": 1.976757058794435e-05, "epoch": 0.23104501394237154, "percentage": 11.55, "elapsed_time": "3:07:38", "remaining_time": "23:56:53"} +{"current_steps": 871, "total_steps": 7532, "loss": 0.34667372703552246, "lr": 1.97666284886824e-05, "epoch": 0.2313105829239145, "percentage": 11.56, "elapsed_time": "3:07:51", "remaining_time": "23:56:41"} +{"current_steps": 872, "total_steps": 7532, "loss": 0.3465980589389801, "lr": 1.976568450652967e-05, "epoch": 0.23157615190545744, "percentage": 11.58, "elapsed_time": "3:08:04", "remaining_time": "23:56:24"} +{"current_steps": 873, "total_steps": 7532, "loss": 0.40539389848709106, "lr": 1.9764738641668137e-05, "epoch": 0.2318417208870004, "percentage": 11.59, "elapsed_time": "3:08:17", "remaining_time": "23:56:10"} +{"current_steps": 874, "total_steps": 7532, "loss": 0.35154545307159424, "lr": 1.976379089428016e-05, "epoch": 0.23210728986854334, "percentage": 11.6, "elapsed_time": "3:08:29", "remaining_time": "23:55:56"} +{"current_steps": 875, "total_steps": 7532, "loss": 0.39748087525367737, "lr": 1.9762841264548453e-05, "epoch": 0.23237285885008632, "percentage": 11.62, "elapsed_time": "3:08:43", "remaining_time": "23:55:45"} +{"current_steps": 876, "total_steps": 7532, "loss": 0.41628387570381165, "lr": 1.976188975265609e-05, "epoch": 0.23263842783162927, "percentage": 11.63, "elapsed_time": "3:08:56", "remaining_time": "23:55:33"} +{"current_steps": 877, "total_steps": 7532, "loss": 0.4076233208179474, "lr": 1.976093635878652e-05, "epoch": 0.23290399681317223, "percentage": 11.64, "elapsed_time": "3:09:09", "remaining_time": "23:55:26"} +{"current_steps": 878, "total_steps": 7532, "loss": 0.3262259364128113, "lr": 1.9759981083123533e-05, "epoch": 0.23316956579471518, "percentage": 11.66, "elapsed_time": "3:09:22", "remaining_time": "23:55:15"} +{"current_steps": 879, "total_steps": 7532, "loss": 0.36561673879623413, "lr": 1.9759023925851302e-05, "epoch": 0.23343513477625813, "percentage": 11.67, "elapsed_time": "3:09:35", "remaining_time": "23:55:02"} +{"current_steps": 880, "total_steps": 7532, "loss": 0.36661773920059204, "lr": 1.9758064887154358e-05, "epoch": 0.23370070375780108, "percentage": 11.68, "elapsed_time": "3:09:49", "remaining_time": "23:54:55"} +{"current_steps": 881, "total_steps": 7532, "loss": 0.34671685099601746, "lr": 1.9757103967217587e-05, "epoch": 0.23396627273934403, "percentage": 11.7, "elapsed_time": "3:10:02", "remaining_time": "23:54:43"} +{"current_steps": 882, "total_steps": 7532, "loss": 0.3486331105232239, "lr": 1.9756141166226246e-05, "epoch": 0.234231841720887, "percentage": 11.71, "elapsed_time": "3:10:16", "remaining_time": "23:54:36"} +{"current_steps": 883, "total_steps": 7532, "loss": 0.3883505165576935, "lr": 1.9755176484365953e-05, "epoch": 0.23449741070242996, "percentage": 11.72, "elapsed_time": "3:10:29", "remaining_time": "23:54:26"} +{"current_steps": 884, "total_steps": 7532, "loss": 0.3832106590270996, "lr": 1.9754209921822683e-05, "epoch": 0.23476297968397292, "percentage": 11.74, "elapsed_time": "3:10:44", "remaining_time": "23:54:24"} +{"current_steps": 885, "total_steps": 7532, "loss": 0.37876033782958984, "lr": 1.975324147878278e-05, "epoch": 0.23502854866551587, "percentage": 11.75, "elapsed_time": "3:10:57", "remaining_time": "23:54:13"} +{"current_steps": 886, "total_steps": 7532, "loss": 0.38931846618652344, "lr": 1.975227115543295e-05, "epoch": 0.23529411764705882, "percentage": 11.76, "elapsed_time": "3:11:10", "remaining_time": "23:54:00"} +{"current_steps": 887, "total_steps": 7532, "loss": 0.3581021726131439, "lr": 1.9751298951960258e-05, "epoch": 0.23555968662860177, "percentage": 11.78, "elapsed_time": "3:11:23", "remaining_time": "23:53:45"} +{"current_steps": 888, "total_steps": 7532, "loss": 0.35196465253829956, "lr": 1.9750324868552133e-05, "epoch": 0.23582525561014472, "percentage": 11.79, "elapsed_time": "3:11:35", "remaining_time": "23:53:32"} +{"current_steps": 889, "total_steps": 7532, "loss": 0.3635658025741577, "lr": 1.974934890539637e-05, "epoch": 0.2360908245916877, "percentage": 11.8, "elapsed_time": "3:11:48", "remaining_time": "23:53:15"} +{"current_steps": 890, "total_steps": 7532, "loss": 0.345594197511673, "lr": 1.9748371062681122e-05, "epoch": 0.23635639357323066, "percentage": 11.82, "elapsed_time": "3:12:01", "remaining_time": "23:53:01"} +{"current_steps": 891, "total_steps": 7532, "loss": 0.357181191444397, "lr": 1.97473913405949e-05, "epoch": 0.2366219625547736, "percentage": 11.83, "elapsed_time": "3:12:13", "remaining_time": "23:52:47"} +{"current_steps": 892, "total_steps": 7532, "loss": 0.3264622986316681, "lr": 1.974640973932659e-05, "epoch": 0.23688753153631656, "percentage": 11.84, "elapsed_time": "3:12:26", "remaining_time": "23:52:31"} +{"current_steps": 893, "total_steps": 7532, "loss": 0.37950894236564636, "lr": 1.9745426259065434e-05, "epoch": 0.2371531005178595, "percentage": 11.86, "elapsed_time": "3:12:39", "remaining_time": "23:52:16"} +{"current_steps": 894, "total_steps": 7532, "loss": 0.37400782108306885, "lr": 1.9744440900001027e-05, "epoch": 0.23741866949940246, "percentage": 11.87, "elapsed_time": "3:12:51", "remaining_time": "23:51:59"} +{"current_steps": 895, "total_steps": 7532, "loss": 0.3455463945865631, "lr": 1.974345366232334e-05, "epoch": 0.23768423848094541, "percentage": 11.88, "elapsed_time": "3:13:04", "remaining_time": "23:51:46"} +{"current_steps": 896, "total_steps": 7532, "loss": 0.3605351150035858, "lr": 1.9742464546222702e-05, "epoch": 0.2379498074624884, "percentage": 11.9, "elapsed_time": "3:13:16", "remaining_time": "23:51:30"} +{"current_steps": 897, "total_steps": 7532, "loss": 0.3839051127433777, "lr": 1.97414735518898e-05, "epoch": 0.23821537644403135, "percentage": 11.91, "elapsed_time": "3:13:29", "remaining_time": "23:51:17"} +{"current_steps": 898, "total_steps": 7532, "loss": 0.34562867879867554, "lr": 1.974048067951569e-05, "epoch": 0.2384809454255743, "percentage": 11.92, "elapsed_time": "3:13:42", "remaining_time": "23:51:01"} +{"current_steps": 899, "total_steps": 7532, "loss": 0.3986506760120392, "lr": 1.9739485929291778e-05, "epoch": 0.23874651440711725, "percentage": 11.94, "elapsed_time": "3:13:55", "remaining_time": "23:50:48"} +{"current_steps": 900, "total_steps": 7532, "loss": 0.3955162465572357, "lr": 1.9738489301409848e-05, "epoch": 0.2390120833886602, "percentage": 11.95, "elapsed_time": "3:14:07", "remaining_time": "23:50:32"} +{"current_steps": 901, "total_steps": 7532, "loss": 0.370066374540329, "lr": 1.9737490796062036e-05, "epoch": 0.23927765237020315, "percentage": 11.96, "elapsed_time": "3:14:25", "remaining_time": "23:50:53"} +{"current_steps": 902, "total_steps": 7532, "loss": 0.3777826726436615, "lr": 1.973649041344084e-05, "epoch": 0.2395432213517461, "percentage": 11.98, "elapsed_time": "3:14:38", "remaining_time": "23:50:43"} +{"current_steps": 903, "total_steps": 7532, "loss": 0.327572226524353, "lr": 1.9735488153739128e-05, "epoch": 0.23980879033328908, "percentage": 11.99, "elapsed_time": "3:14:51", "remaining_time": "23:50:31"} +{"current_steps": 904, "total_steps": 7532, "loss": 0.3921743929386139, "lr": 1.973448401715011e-05, "epoch": 0.24007435931483204, "percentage": 12.0, "elapsed_time": "3:15:05", "remaining_time": "23:50:21"} +{"current_steps": 905, "total_steps": 7532, "loss": 0.3683379888534546, "lr": 1.973347800386739e-05, "epoch": 0.240339928296375, "percentage": 12.02, "elapsed_time": "3:15:18", "remaining_time": "23:50:08"} +{"current_steps": 906, "total_steps": 7532, "loss": 0.390872597694397, "lr": 1.9732470114084905e-05, "epoch": 0.24060549727791794, "percentage": 12.03, "elapsed_time": "3:15:31", "remaining_time": "23:50:00"} +{"current_steps": 907, "total_steps": 7532, "loss": 0.3772459626197815, "lr": 1.9731460347996964e-05, "epoch": 0.2408710662594609, "percentage": 12.04, "elapsed_time": "3:15:44", "remaining_time": "23:49:47"} +{"current_steps": 908, "total_steps": 7532, "loss": 0.37990954518318176, "lr": 1.973044870579824e-05, "epoch": 0.24113663524100384, "percentage": 12.06, "elapsed_time": "3:15:58", "remaining_time": "23:49:38"} +{"current_steps": 909, "total_steps": 7532, "loss": 0.3380817770957947, "lr": 1.972943518768377e-05, "epoch": 0.2414022042225468, "percentage": 12.07, "elapsed_time": "3:16:11", "remaining_time": "23:49:25"} +{"current_steps": 910, "total_steps": 7532, "loss": 0.3348115384578705, "lr": 1.9728419793848935e-05, "epoch": 0.24166777320408978, "percentage": 12.08, "elapsed_time": "3:16:24", "remaining_time": "23:49:15"} +{"current_steps": 911, "total_steps": 7532, "loss": 0.36936551332473755, "lr": 1.9727402524489505e-05, "epoch": 0.24193334218563273, "percentage": 12.1, "elapsed_time": "3:16:37", "remaining_time": "23:49:01"} +{"current_steps": 912, "total_steps": 7532, "loss": 0.3871539235115051, "lr": 1.9726383379801593e-05, "epoch": 0.24219891116717568, "percentage": 12.11, "elapsed_time": "3:16:50", "remaining_time": "23:48:51"} +{"current_steps": 913, "total_steps": 7532, "loss": 0.37087059020996094, "lr": 1.9725362359981676e-05, "epoch": 0.24246448014871863, "percentage": 12.12, "elapsed_time": "3:17:03", "remaining_time": "23:48:39"} +{"current_steps": 914, "total_steps": 7532, "loss": 0.35582688450813293, "lr": 1.9724339465226595e-05, "epoch": 0.24273004913026158, "percentage": 12.13, "elapsed_time": "3:17:16", "remaining_time": "23:48:27"} +{"current_steps": 915, "total_steps": 7532, "loss": 0.38500669598579407, "lr": 1.9723314695733557e-05, "epoch": 0.24299561811180453, "percentage": 12.15, "elapsed_time": "3:17:30", "remaining_time": "23:48:19"} +{"current_steps": 916, "total_steps": 7532, "loss": 0.32470762729644775, "lr": 1.9722288051700116e-05, "epoch": 0.2432611870933475, "percentage": 12.16, "elapsed_time": "3:17:43", "remaining_time": "23:48:06"} +{"current_steps": 917, "total_steps": 7532, "loss": 0.3822774589061737, "lr": 1.9721259533324207e-05, "epoch": 0.24352675607489047, "percentage": 12.17, "elapsed_time": "3:17:57", "remaining_time": "23:47:58"} +{"current_steps": 918, "total_steps": 7532, "loss": 0.38374873995780945, "lr": 1.972022914080411e-05, "epoch": 0.24379232505643342, "percentage": 12.19, "elapsed_time": "3:18:10", "remaining_time": "23:47:46"} +{"current_steps": 919, "total_steps": 7532, "loss": 0.3419352173805237, "lr": 1.9719196874338472e-05, "epoch": 0.24405789403797637, "percentage": 12.2, "elapsed_time": "3:18:23", "remaining_time": "23:47:34"} +{"current_steps": 920, "total_steps": 7532, "loss": 0.3294275403022766, "lr": 1.9718162734126308e-05, "epoch": 0.24432346301951932, "percentage": 12.21, "elapsed_time": "3:18:36", "remaining_time": "23:47:23"} +{"current_steps": 921, "total_steps": 7532, "loss": 0.3585365414619446, "lr": 1.9717126720366982e-05, "epoch": 0.24458903200106227, "percentage": 12.23, "elapsed_time": "3:18:50", "remaining_time": "23:47:16"} +{"current_steps": 922, "total_steps": 7532, "loss": 0.38130316138267517, "lr": 1.9716088833260225e-05, "epoch": 0.24485460098260522, "percentage": 12.24, "elapsed_time": "3:19:03", "remaining_time": "23:47:03"} +{"current_steps": 923, "total_steps": 7532, "loss": 0.3745136260986328, "lr": 1.9715049073006133e-05, "epoch": 0.24512016996414818, "percentage": 12.25, "elapsed_time": "3:19:16", "remaining_time": "23:46:54"} +{"current_steps": 924, "total_steps": 7532, "loss": 0.3771660327911377, "lr": 1.971400743980516e-05, "epoch": 0.24538573894569116, "percentage": 12.27, "elapsed_time": "3:19:29", "remaining_time": "23:46:43"} +{"current_steps": 925, "total_steps": 7532, "loss": 0.29661691188812256, "lr": 1.971296393385812e-05, "epoch": 0.2456513079272341, "percentage": 12.28, "elapsed_time": "3:19:43", "remaining_time": "23:46:35"} +{"current_steps": 926, "total_steps": 7532, "loss": 0.33783960342407227, "lr": 1.9711918555366184e-05, "epoch": 0.24591687690877706, "percentage": 12.29, "elapsed_time": "3:19:56", "remaining_time": "23:46:23"} +{"current_steps": 927, "total_steps": 7532, "loss": 0.42983683943748474, "lr": 1.971087130453089e-05, "epoch": 0.24618244589032, "percentage": 12.31, "elapsed_time": "3:20:10", "remaining_time": "23:46:13"} +{"current_steps": 928, "total_steps": 7532, "loss": 0.32242363691329956, "lr": 1.9709822181554142e-05, "epoch": 0.24644801487186296, "percentage": 12.32, "elapsed_time": "3:20:22", "remaining_time": "23:45:58"} +{"current_steps": 929, "total_steps": 7532, "loss": 0.3576955795288086, "lr": 1.970877118663819e-05, "epoch": 0.24671358385340592, "percentage": 12.33, "elapsed_time": "3:20:36", "remaining_time": "23:45:53"} +{"current_steps": 930, "total_steps": 7532, "loss": 0.4185359477996826, "lr": 1.9707718319985663e-05, "epoch": 0.24697915283494887, "percentage": 12.35, "elapsed_time": "3:20:49", "remaining_time": "23:45:38"} +{"current_steps": 931, "total_steps": 7532, "loss": 0.35377705097198486, "lr": 1.970666358179953e-05, "epoch": 0.24724472181649185, "percentage": 12.36, "elapsed_time": "3:21:02", "remaining_time": "23:45:27"} +{"current_steps": 932, "total_steps": 7532, "loss": 0.3860151171684265, "lr": 1.9705606972283143e-05, "epoch": 0.2475102907980348, "percentage": 12.37, "elapsed_time": "3:21:15", "remaining_time": "23:45:14"} +{"current_steps": 933, "total_steps": 7532, "loss": 0.39463168382644653, "lr": 1.9704548491640195e-05, "epoch": 0.24777585977957775, "percentage": 12.39, "elapsed_time": "3:21:28", "remaining_time": "23:45:03"} +{"current_steps": 934, "total_steps": 7532, "loss": 0.3670084774494171, "lr": 1.9703488140074752e-05, "epoch": 0.2480414287611207, "percentage": 12.4, "elapsed_time": "3:21:42", "remaining_time": "23:44:53"} +{"current_steps": 935, "total_steps": 7532, "loss": 0.388730525970459, "lr": 1.9702425917791242e-05, "epoch": 0.24830699774266365, "percentage": 12.41, "elapsed_time": "3:21:55", "remaining_time": "23:44:40"} +{"current_steps": 936, "total_steps": 7532, "loss": 0.38767656683921814, "lr": 1.970136182499444e-05, "epoch": 0.2485725667242066, "percentage": 12.43, "elapsed_time": "3:22:08", "remaining_time": "23:44:28"} +{"current_steps": 937, "total_steps": 7532, "loss": 0.35394930839538574, "lr": 1.9700295861889497e-05, "epoch": 0.24883813570574956, "percentage": 12.44, "elapsed_time": "3:22:20", "remaining_time": "23:44:11"} +{"current_steps": 938, "total_steps": 7532, "loss": 0.3360324501991272, "lr": 1.9699228028681917e-05, "epoch": 0.24910370468729254, "percentage": 12.45, "elapsed_time": "3:22:33", "remaining_time": "23:43:58"} +{"current_steps": 939, "total_steps": 7532, "loss": 0.390169233083725, "lr": 1.9698158325577563e-05, "epoch": 0.2493692736688355, "percentage": 12.47, "elapsed_time": "3:22:46", "remaining_time": "23:43:42"} +{"current_steps": 940, "total_steps": 7532, "loss": 0.3921571671962738, "lr": 1.9697086752782666e-05, "epoch": 0.24963484265037844, "percentage": 12.48, "elapsed_time": "3:22:59", "remaining_time": "23:43:29"} +{"current_steps": 941, "total_steps": 7532, "loss": 0.3795739710330963, "lr": 1.9696013310503808e-05, "epoch": 0.2499004116319214, "percentage": 12.49, "elapsed_time": "3:23:11", "remaining_time": "23:43:14"} +{"current_steps": 942, "total_steps": 7532, "loss": 0.3891025185585022, "lr": 1.9694937998947935e-05, "epoch": 0.25016598061346434, "percentage": 12.51, "elapsed_time": "3:23:25", "remaining_time": "23:43:03"} +{"current_steps": 943, "total_steps": 7532, "loss": 0.3548225164413452, "lr": 1.9693860818322357e-05, "epoch": 0.2504315495950073, "percentage": 12.52, "elapsed_time": "3:23:38", "remaining_time": "23:42:51"} +{"current_steps": 944, "total_steps": 7532, "loss": 0.3696819543838501, "lr": 1.9692781768834747e-05, "epoch": 0.25069711857655025, "percentage": 12.53, "elapsed_time": "3:23:51", "remaining_time": "23:42:41"} +{"current_steps": 945, "total_steps": 7532, "loss": 0.3906037211418152, "lr": 1.9691700850693126e-05, "epoch": 0.2509626875580932, "percentage": 12.55, "elapsed_time": "3:24:04", "remaining_time": "23:42:26"} +{"current_steps": 946, "total_steps": 7532, "loss": 0.38181206583976746, "lr": 1.9690618064105883e-05, "epoch": 0.25122825653963615, "percentage": 12.56, "elapsed_time": "3:24:17", "remaining_time": "23:42:12"} +{"current_steps": 947, "total_steps": 7532, "loss": 0.36904582381248474, "lr": 1.9689533409281765e-05, "epoch": 0.2514938255211791, "percentage": 12.57, "elapsed_time": "3:24:30", "remaining_time": "23:42:01"} +{"current_steps": 948, "total_steps": 7532, "loss": 0.3635823130607605, "lr": 1.9688446886429885e-05, "epoch": 0.25175939450272206, "percentage": 12.59, "elapsed_time": "3:24:42", "remaining_time": "23:41:46"} +{"current_steps": 949, "total_steps": 7532, "loss": 0.3527260422706604, "lr": 1.9687358495759713e-05, "epoch": 0.25202496348426506, "percentage": 12.6, "elapsed_time": "3:24:55", "remaining_time": "23:41:32"} +{"current_steps": 950, "total_steps": 7532, "loss": 0.3781110346317291, "lr": 1.968626823748107e-05, "epoch": 0.252290532465808, "percentage": 12.61, "elapsed_time": "3:25:08", "remaining_time": "23:41:16"} +{"current_steps": 951, "total_steps": 7532, "loss": 0.3931560814380646, "lr": 1.968517611180415e-05, "epoch": 0.25255610144735097, "percentage": 12.63, "elapsed_time": "3:25:21", "remaining_time": "23:41:04"} +{"current_steps": 952, "total_steps": 7532, "loss": 0.39111074805259705, "lr": 1.9684082118939503e-05, "epoch": 0.2528216704288939, "percentage": 12.64, "elapsed_time": "3:25:33", "remaining_time": "23:40:48"} +{"current_steps": 953, "total_steps": 7532, "loss": 0.385967880487442, "lr": 1.9682986259098037e-05, "epoch": 0.25308723941043687, "percentage": 12.65, "elapsed_time": "3:25:46", "remaining_time": "23:40:35"} +{"current_steps": 954, "total_steps": 7532, "loss": 0.34006553888320923, "lr": 1.9681888532491022e-05, "epoch": 0.2533528083919798, "percentage": 12.67, "elapsed_time": "3:25:59", "remaining_time": "23:40:18"} +{"current_steps": 955, "total_steps": 7532, "loss": 0.36069998145103455, "lr": 1.9680788939330086e-05, "epoch": 0.2536183773735228, "percentage": 12.68, "elapsed_time": "3:26:11", "remaining_time": "23:40:04"} +{"current_steps": 956, "total_steps": 7532, "loss": 0.3354898691177368, "lr": 1.9679687479827212e-05, "epoch": 0.2538839463550657, "percentage": 12.69, "elapsed_time": "3:26:24", "remaining_time": "23:39:49"} +{"current_steps": 957, "total_steps": 7532, "loss": 0.35667335987091064, "lr": 1.9678584154194756e-05, "epoch": 0.2541495153366087, "percentage": 12.71, "elapsed_time": "3:26:37", "remaining_time": "23:39:37"} +{"current_steps": 958, "total_steps": 7532, "loss": 0.4003029465675354, "lr": 1.9677478962645422e-05, "epoch": 0.25441508431815163, "percentage": 12.72, "elapsed_time": "3:26:51", "remaining_time": "23:39:27"} +{"current_steps": 959, "total_steps": 7532, "loss": 0.34397056698799133, "lr": 1.9676371905392278e-05, "epoch": 0.2546806532996946, "percentage": 12.73, "elapsed_time": "3:27:04", "remaining_time": "23:39:15"} +{"current_steps": 960, "total_steps": 7532, "loss": 0.35319578647613525, "lr": 1.9675262982648757e-05, "epoch": 0.25494622228123753, "percentage": 12.75, "elapsed_time": "3:27:17", "remaining_time": "23:39:07"} +{"current_steps": 961, "total_steps": 7532, "loss": 0.34840327501296997, "lr": 1.967415219462864e-05, "epoch": 0.2552117912627805, "percentage": 12.76, "elapsed_time": "3:27:30", "remaining_time": "23:38:54"} +{"current_steps": 962, "total_steps": 7532, "loss": 0.3298989534378052, "lr": 1.9673039541546076e-05, "epoch": 0.25547736024432344, "percentage": 12.77, "elapsed_time": "3:27:44", "remaining_time": "23:38:43"} +{"current_steps": 963, "total_steps": 7532, "loss": 0.38438719511032104, "lr": 1.9671925023615572e-05, "epoch": 0.25574292922586644, "percentage": 12.79, "elapsed_time": "3:27:56", "remaining_time": "23:38:29"} +{"current_steps": 964, "total_steps": 7532, "loss": 0.3834493160247803, "lr": 1.9670808641051994e-05, "epoch": 0.2560084982074094, "percentage": 12.8, "elapsed_time": "3:28:10", "remaining_time": "23:38:19"} +{"current_steps": 965, "total_steps": 7532, "loss": 0.3713288903236389, "lr": 1.9669690394070564e-05, "epoch": 0.25627406718895235, "percentage": 12.81, "elapsed_time": "3:28:23", "remaining_time": "23:38:07"} +{"current_steps": 966, "total_steps": 7532, "loss": 0.37564241886138916, "lr": 1.966857028288687e-05, "epoch": 0.2565396361704953, "percentage": 12.83, "elapsed_time": "3:28:36", "remaining_time": "23:37:55"} +{"current_steps": 967, "total_steps": 7532, "loss": 0.30162689089775085, "lr": 1.9667448307716857e-05, "epoch": 0.25680520515203825, "percentage": 12.84, "elapsed_time": "3:28:49", "remaining_time": "23:37:41"} +{"current_steps": 968, "total_steps": 7532, "loss": 0.35969680547714233, "lr": 1.9666324468776826e-05, "epoch": 0.2570707741335812, "percentage": 12.85, "elapsed_time": "3:29:01", "remaining_time": "23:37:26"} +{"current_steps": 969, "total_steps": 7532, "loss": 0.40947285294532776, "lr": 1.9665198766283444e-05, "epoch": 0.25733634311512416, "percentage": 12.87, "elapsed_time": "3:29:15", "remaining_time": "23:37:15"} +{"current_steps": 970, "total_steps": 7532, "loss": 0.35868343710899353, "lr": 1.9664071200453726e-05, "epoch": 0.2576019120966671, "percentage": 12.88, "elapsed_time": "3:29:27", "remaining_time": "23:37:01"} +{"current_steps": 971, "total_steps": 7532, "loss": 0.3569234311580658, "lr": 1.966294177150506e-05, "epoch": 0.25786748107821006, "percentage": 12.89, "elapsed_time": "3:29:41", "remaining_time": "23:36:51"} +{"current_steps": 972, "total_steps": 7532, "loss": 0.3381764888763428, "lr": 1.9661810479655184e-05, "epoch": 0.258133050059753, "percentage": 12.9, "elapsed_time": "3:29:54", "remaining_time": "23:36:38"} +{"current_steps": 973, "total_steps": 7532, "loss": 0.39847785234451294, "lr": 1.9660677325122196e-05, "epoch": 0.25839861904129596, "percentage": 12.92, "elapsed_time": "3:30:07", "remaining_time": "23:36:30"} +{"current_steps": 974, "total_steps": 7532, "loss": 0.33162468671798706, "lr": 1.965954230812456e-05, "epoch": 0.2586641880228389, "percentage": 12.93, "elapsed_time": "3:30:21", "remaining_time": "23:36:18"} +{"current_steps": 975, "total_steps": 7532, "loss": 0.3627605438232422, "lr": 1.9658405428881087e-05, "epoch": 0.25892975700438187, "percentage": 12.94, "elapsed_time": "3:30:34", "remaining_time": "23:36:09"} +{"current_steps": 976, "total_steps": 7532, "loss": 0.3253796100616455, "lr": 1.9657266687610965e-05, "epoch": 0.2591953259859248, "percentage": 12.96, "elapsed_time": "3:30:47", "remaining_time": "23:35:56"} +{"current_steps": 977, "total_steps": 7532, "loss": 0.3341265916824341, "lr": 1.9656126084533716e-05, "epoch": 0.2594608949674678, "percentage": 12.97, "elapsed_time": "3:31:01", "remaining_time": "23:35:48"} +{"current_steps": 978, "total_steps": 7532, "loss": 0.3714970052242279, "lr": 1.9654983619869242e-05, "epoch": 0.2597264639490108, "percentage": 12.98, "elapsed_time": "3:31:14", "remaining_time": "23:35:36"} +{"current_steps": 979, "total_steps": 7532, "loss": 0.3360912501811981, "lr": 1.9653839293837798e-05, "epoch": 0.25999203293055373, "percentage": 13.0, "elapsed_time": "3:31:28", "remaining_time": "23:35:30"} +{"current_steps": 980, "total_steps": 7532, "loss": 0.3780854642391205, "lr": 1.9652693106659995e-05, "epoch": 0.2602576019120967, "percentage": 13.01, "elapsed_time": "3:31:41", "remaining_time": "23:35:17"} +{"current_steps": 981, "total_steps": 7532, "loss": 0.33595478534698486, "lr": 1.9651545058556803e-05, "epoch": 0.26052317089363963, "percentage": 13.02, "elapsed_time": "3:31:55", "remaining_time": "23:35:10"} +{"current_steps": 982, "total_steps": 7532, "loss": 0.3608357012271881, "lr": 1.965039514974955e-05, "epoch": 0.2607887398751826, "percentage": 13.04, "elapsed_time": "3:32:08", "remaining_time": "23:34:58"} +{"current_steps": 983, "total_steps": 7532, "loss": 0.3807666599750519, "lr": 1.964924338045993e-05, "epoch": 0.26105430885672554, "percentage": 13.05, "elapsed_time": "3:32:21", "remaining_time": "23:34:44"} +{"current_steps": 984, "total_steps": 7532, "loss": 0.3551647663116455, "lr": 1.964808975090999e-05, "epoch": 0.2613198778382685, "percentage": 13.06, "elapsed_time": "3:32:33", "remaining_time": "23:34:27"} +{"current_steps": 985, "total_steps": 7532, "loss": 0.3771904706954956, "lr": 1.9646934261322135e-05, "epoch": 0.26158544681981144, "percentage": 13.08, "elapsed_time": "3:32:45", "remaining_time": "23:34:09"} +{"current_steps": 986, "total_steps": 7532, "loss": 0.41103222966194153, "lr": 1.964577691191913e-05, "epoch": 0.2618510158013544, "percentage": 13.09, "elapsed_time": "3:32:58", "remaining_time": "23:33:58"} +{"current_steps": 987, "total_steps": 7532, "loss": 0.34439292550086975, "lr": 1.9644617702924093e-05, "epoch": 0.26211658478289734, "percentage": 13.1, "elapsed_time": "3:33:11", "remaining_time": "23:33:44"} +{"current_steps": 988, "total_steps": 7532, "loss": 0.41214391589164734, "lr": 1.9643456634560515e-05, "epoch": 0.2623821537644403, "percentage": 13.12, "elapsed_time": "3:33:24", "remaining_time": "23:33:31"} +{"current_steps": 989, "total_steps": 7532, "loss": 0.3186502754688263, "lr": 1.9642293707052232e-05, "epoch": 0.26264772274598325, "percentage": 13.13, "elapsed_time": "3:33:37", "remaining_time": "23:33:16"} +{"current_steps": 990, "total_steps": 7532, "loss": 0.3534559905529022, "lr": 1.9641128920623438e-05, "epoch": 0.2629132917275262, "percentage": 13.14, "elapsed_time": "3:33:50", "remaining_time": "23:33:04"} +{"current_steps": 991, "total_steps": 7532, "loss": 0.35217320919036865, "lr": 1.96399622754987e-05, "epoch": 0.2631788607090692, "percentage": 13.16, "elapsed_time": "3:34:03", "remaining_time": "23:32:49"} +{"current_steps": 992, "total_steps": 7532, "loss": 0.31661587953567505, "lr": 1.9638793771902924e-05, "epoch": 0.26344442969061216, "percentage": 13.17, "elapsed_time": "3:34:16", "remaining_time": "23:32:41"} +{"current_steps": 993, "total_steps": 7532, "loss": 0.32468482851982117, "lr": 1.9637623410061392e-05, "epoch": 0.2637099986721551, "percentage": 13.18, "elapsed_time": "3:34:29", "remaining_time": "23:32:27"} +{"current_steps": 994, "total_steps": 7532, "loss": 0.346771776676178, "lr": 1.9636451190199727e-05, "epoch": 0.26397556765369806, "percentage": 13.2, "elapsed_time": "3:34:43", "remaining_time": "23:32:20"} +{"current_steps": 995, "total_steps": 7532, "loss": 0.36409270763397217, "lr": 1.9635277112543928e-05, "epoch": 0.264241136635241, "percentage": 13.21, "elapsed_time": "3:34:56", "remaining_time": "23:32:08"} +{"current_steps": 996, "total_steps": 7532, "loss": 0.404967725276947, "lr": 1.963410117732034e-05, "epoch": 0.26450670561678397, "percentage": 13.22, "elapsed_time": "3:35:10", "remaining_time": "23:31:59"} +{"current_steps": 997, "total_steps": 7532, "loss": 0.39506661891937256, "lr": 1.9632923384755666e-05, "epoch": 0.2647722745983269, "percentage": 13.24, "elapsed_time": "3:35:23", "remaining_time": "23:31:47"} +{"current_steps": 998, "total_steps": 7532, "loss": 0.3833203911781311, "lr": 1.9631743735076972e-05, "epoch": 0.26503784357986987, "percentage": 13.25, "elapsed_time": "3:35:36", "remaining_time": "23:31:33"} +{"current_steps": 999, "total_steps": 7532, "loss": 0.34522518515586853, "lr": 1.9630562228511682e-05, "epoch": 0.2653034125614128, "percentage": 13.26, "elapsed_time": "3:35:49", "remaining_time": "23:31:24"} +{"current_steps": 1000, "total_steps": 7532, "loss": 0.3818400800228119, "lr": 1.962937886528758e-05, "epoch": 0.2655689815429558, "percentage": 13.28, "elapsed_time": "3:36:02", "remaining_time": "23:31:10"} +{"current_steps": 1001, "total_steps": 7532, "loss": 0.40827828645706177, "lr": 1.9628193645632796e-05, "epoch": 0.2658345505244987, "percentage": 13.29, "elapsed_time": "3:36:21", "remaining_time": "23:31:38"} +{"current_steps": 1002, "total_steps": 7532, "loss": 0.3448852002620697, "lr": 1.962700656977583e-05, "epoch": 0.2661001195060417, "percentage": 13.3, "elapsed_time": "3:36:34", "remaining_time": "23:31:26"} +{"current_steps": 1003, "total_steps": 7532, "loss": 0.36560773849487305, "lr": 1.9625817637945542e-05, "epoch": 0.26636568848758463, "percentage": 13.32, "elapsed_time": "3:36:48", "remaining_time": "23:31:16"} +{"current_steps": 1004, "total_steps": 7532, "loss": 0.38305893540382385, "lr": 1.962462685037114e-05, "epoch": 0.2666312574691276, "percentage": 13.33, "elapsed_time": "3:37:00", "remaining_time": "23:31:01"} +{"current_steps": 1005, "total_steps": 7532, "loss": 0.3562568426132202, "lr": 1.962343420728219e-05, "epoch": 0.2668968264506706, "percentage": 13.34, "elapsed_time": "3:37:13", "remaining_time": "23:30:49"} +{"current_steps": 1006, "total_steps": 7532, "loss": 0.37458860874176025, "lr": 1.9622239708908626e-05, "epoch": 0.26716239543221354, "percentage": 13.36, "elapsed_time": "3:37:26", "remaining_time": "23:30:34"} +{"current_steps": 1007, "total_steps": 7532, "loss": 0.35852503776550293, "lr": 1.9621043355480726e-05, "epoch": 0.2674279644137565, "percentage": 13.37, "elapsed_time": "3:37:39", "remaining_time": "23:30:20"} +{"current_steps": 1008, "total_steps": 7532, "loss": 0.4056578278541565, "lr": 1.961984514722914e-05, "epoch": 0.26769353339529944, "percentage": 13.38, "elapsed_time": "3:37:52", "remaining_time": "23:30:05"} +{"current_steps": 1009, "total_steps": 7532, "loss": 0.4531296491622925, "lr": 1.9618645084384863e-05, "epoch": 0.2679591023768424, "percentage": 13.4, "elapsed_time": "3:38:04", "remaining_time": "23:29:48"} +{"current_steps": 1010, "total_steps": 7532, "loss": 0.3356376886367798, "lr": 1.9617443167179256e-05, "epoch": 0.26822467135838535, "percentage": 13.41, "elapsed_time": "3:38:18", "remaining_time": "23:29:41"} +{"current_steps": 1011, "total_steps": 7532, "loss": 0.38045161962509155, "lr": 1.9616239395844033e-05, "epoch": 0.2684902403399283, "percentage": 13.42, "elapsed_time": "3:38:30", "remaining_time": "23:29:26"} +{"current_steps": 1012, "total_steps": 7532, "loss": 0.3549511730670929, "lr": 1.9615033770611268e-05, "epoch": 0.26875580932147125, "percentage": 13.44, "elapsed_time": "3:38:44", "remaining_time": "23:29:16"} +{"current_steps": 1013, "total_steps": 7532, "loss": 0.33363252878189087, "lr": 1.9613826291713393e-05, "epoch": 0.2690213783030142, "percentage": 13.45, "elapsed_time": "3:38:56", "remaining_time": "23:29:01"} +{"current_steps": 1014, "total_steps": 7532, "loss": 0.3443339467048645, "lr": 1.961261695938319e-05, "epoch": 0.26928694728455715, "percentage": 13.46, "elapsed_time": "3:39:10", "remaining_time": "23:28:48"} +{"current_steps": 1015, "total_steps": 7532, "loss": 0.3258364796638489, "lr": 1.9611405773853807e-05, "epoch": 0.2695525162661001, "percentage": 13.48, "elapsed_time": "3:39:23", "remaining_time": "23:28:37"} +{"current_steps": 1016, "total_steps": 7532, "loss": 0.357122540473938, "lr": 1.961019273535875e-05, "epoch": 0.26981808524764306, "percentage": 13.49, "elapsed_time": "3:39:36", "remaining_time": "23:28:24"} +{"current_steps": 1017, "total_steps": 7532, "loss": 0.32092082500457764, "lr": 1.9608977844131875e-05, "epoch": 0.270083654229186, "percentage": 13.5, "elapsed_time": "3:39:48", "remaining_time": "23:28:09"} +{"current_steps": 1018, "total_steps": 7532, "loss": 0.36354511976242065, "lr": 1.96077611004074e-05, "epoch": 0.27034922321072896, "percentage": 13.52, "elapsed_time": "3:40:02", "remaining_time": "23:27:58"} +{"current_steps": 1019, "total_steps": 7532, "loss": 0.37128758430480957, "lr": 1.9606542504419895e-05, "epoch": 0.27061479219227197, "percentage": 13.53, "elapsed_time": "3:40:14", "remaining_time": "23:27:42"} +{"current_steps": 1020, "total_steps": 7532, "loss": 0.3732859790325165, "lr": 1.9605322056404294e-05, "epoch": 0.2708803611738149, "percentage": 13.54, "elapsed_time": "3:40:27", "remaining_time": "23:27:26"} +{"current_steps": 1021, "total_steps": 7532, "loss": 0.32642674446105957, "lr": 1.9604099756595885e-05, "epoch": 0.2711459301553579, "percentage": 13.56, "elapsed_time": "3:40:39", "remaining_time": "23:27:10"} +{"current_steps": 1022, "total_steps": 7532, "loss": 0.376791775226593, "lr": 1.9602875605230313e-05, "epoch": 0.2714114991369008, "percentage": 13.57, "elapsed_time": "3:40:52", "remaining_time": "23:26:54"} +{"current_steps": 1023, "total_steps": 7532, "loss": 0.34514784812927246, "lr": 1.960164960254358e-05, "epoch": 0.2716770681184438, "percentage": 13.58, "elapsed_time": "3:41:05", "remaining_time": "23:26:44"} +{"current_steps": 1024, "total_steps": 7532, "loss": 0.3752189576625824, "lr": 1.9600421748772044e-05, "epoch": 0.27194263709998673, "percentage": 13.6, "elapsed_time": "3:41:18", "remaining_time": "23:26:30"} +{"current_steps": 1025, "total_steps": 7532, "loss": 0.33100831508636475, "lr": 1.959919204415242e-05, "epoch": 0.2722082060815297, "percentage": 13.61, "elapsed_time": "3:41:31", "remaining_time": "23:26:17"} +{"current_steps": 1026, "total_steps": 7532, "loss": 0.42713654041290283, "lr": 1.9597960488921785e-05, "epoch": 0.27247377506307263, "percentage": 13.62, "elapsed_time": "3:41:43", "remaining_time": "23:25:59"} +{"current_steps": 1027, "total_steps": 7532, "loss": 0.3746519684791565, "lr": 1.9596727083317565e-05, "epoch": 0.2727393440446156, "percentage": 13.64, "elapsed_time": "3:41:56", "remaining_time": "23:25:44"} +{"current_steps": 1028, "total_steps": 7532, "loss": 0.39962098002433777, "lr": 1.9595491827577543e-05, "epoch": 0.27300491302615854, "percentage": 13.65, "elapsed_time": "3:42:08", "remaining_time": "23:25:25"} +{"current_steps": 1029, "total_steps": 7532, "loss": 0.35112401843070984, "lr": 1.9594254721939866e-05, "epoch": 0.2732704820077015, "percentage": 13.66, "elapsed_time": "3:42:21", "remaining_time": "23:25:13"} +{"current_steps": 1030, "total_steps": 7532, "loss": 0.3648139238357544, "lr": 1.9593015766643037e-05, "epoch": 0.27353605098924444, "percentage": 13.67, "elapsed_time": "3:42:34", "remaining_time": "23:25:00"} +{"current_steps": 1031, "total_steps": 7532, "loss": 0.31544098258018494, "lr": 1.9591774961925902e-05, "epoch": 0.2738016199707874, "percentage": 13.69, "elapsed_time": "3:42:47", "remaining_time": "23:24:49"} +{"current_steps": 1032, "total_steps": 7532, "loss": 0.3593738079071045, "lr": 1.959053230802768e-05, "epoch": 0.27406718895233034, "percentage": 13.7, "elapsed_time": "3:43:01", "remaining_time": "23:24:40"} +{"current_steps": 1033, "total_steps": 7532, "loss": 0.39784368872642517, "lr": 1.958928780518794e-05, "epoch": 0.27433275793387335, "percentage": 13.71, "elapsed_time": "3:43:14", "remaining_time": "23:24:27"} +{"current_steps": 1034, "total_steps": 7532, "loss": 0.3869936168193817, "lr": 1.9588041453646606e-05, "epoch": 0.2745983269154163, "percentage": 13.73, "elapsed_time": "3:43:27", "remaining_time": "23:24:18"} +{"current_steps": 1035, "total_steps": 7532, "loss": 0.31108593940734863, "lr": 1.958679325364396e-05, "epoch": 0.27486389589695925, "percentage": 13.74, "elapsed_time": "3:43:40", "remaining_time": "23:24:07"} +{"current_steps": 1036, "total_steps": 7532, "loss": 0.3917708098888397, "lr": 1.958554320542064e-05, "epoch": 0.2751294648785022, "percentage": 13.75, "elapsed_time": "3:43:54", "remaining_time": "23:23:57"} +{"current_steps": 1037, "total_steps": 7532, "loss": 0.36782944202423096, "lr": 1.958429130921764e-05, "epoch": 0.27539503386004516, "percentage": 13.77, "elapsed_time": "3:44:07", "remaining_time": "23:23:43"} +{"current_steps": 1038, "total_steps": 7532, "loss": 0.36196422576904297, "lr": 1.9583037565276314e-05, "epoch": 0.2756606028415881, "percentage": 13.78, "elapsed_time": "3:44:20", "remaining_time": "23:23:33"} +{"current_steps": 1039, "total_steps": 7532, "loss": 0.32208555936813354, "lr": 1.9581781973838368e-05, "epoch": 0.27592617182313106, "percentage": 13.79, "elapsed_time": "3:44:33", "remaining_time": "23:23:18"} +{"current_steps": 1040, "total_steps": 7532, "loss": 0.33451759815216064, "lr": 1.958052453514586e-05, "epoch": 0.276191740804674, "percentage": 13.81, "elapsed_time": "3:44:46", "remaining_time": "23:23:08"} +{"current_steps": 1041, "total_steps": 7532, "loss": 0.3228047788143158, "lr": 1.9579265249441216e-05, "epoch": 0.27645730978621696, "percentage": 13.82, "elapsed_time": "3:44:59", "remaining_time": "23:22:55"} +{"current_steps": 1042, "total_steps": 7532, "loss": 0.36992791295051575, "lr": 1.957800411696721e-05, "epoch": 0.2767228787677599, "percentage": 13.83, "elapsed_time": "3:45:13", "remaining_time": "23:22:44"} +{"current_steps": 1043, "total_steps": 7532, "loss": 0.3072342276573181, "lr": 1.9576741137966967e-05, "epoch": 0.27698844774930287, "percentage": 13.85, "elapsed_time": "3:45:26", "remaining_time": "23:22:31"} +{"current_steps": 1044, "total_steps": 7532, "loss": 0.3372080326080322, "lr": 1.9575476312683985e-05, "epoch": 0.2772540167308458, "percentage": 13.86, "elapsed_time": "3:45:38", "remaining_time": "23:22:18"} +{"current_steps": 1045, "total_steps": 7532, "loss": 0.34725332260131836, "lr": 1.95742096413621e-05, "epoch": 0.27751958571238877, "percentage": 13.87, "elapsed_time": "3:45:51", "remaining_time": "23:22:01"} +{"current_steps": 1046, "total_steps": 7532, "loss": 0.36714982986450195, "lr": 1.9572941124245516e-05, "epoch": 0.2777851546939317, "percentage": 13.89, "elapsed_time": "3:46:04", "remaining_time": "23:21:47"} +{"current_steps": 1047, "total_steps": 7532, "loss": 0.4163498282432556, "lr": 1.957167076157878e-05, "epoch": 0.27805072367547473, "percentage": 13.9, "elapsed_time": "3:46:16", "remaining_time": "23:21:32"} +{"current_steps": 1048, "total_steps": 7532, "loss": 0.40059348940849304, "lr": 1.9570398553606815e-05, "epoch": 0.2783162926570177, "percentage": 13.91, "elapsed_time": "3:46:29", "remaining_time": "23:21:18"} +{"current_steps": 1049, "total_steps": 7532, "loss": 0.3622320294380188, "lr": 1.956912450057488e-05, "epoch": 0.27858186163856063, "percentage": 13.93, "elapsed_time": "3:46:42", "remaining_time": "23:21:04"} +{"current_steps": 1050, "total_steps": 7532, "loss": 0.35159534215927124, "lr": 1.9567848602728595e-05, "epoch": 0.2788474306201036, "percentage": 13.94, "elapsed_time": "3:46:55", "remaining_time": "23:20:50"} +{"current_steps": 1051, "total_steps": 7532, "loss": 0.3093762993812561, "lr": 1.9566570860313944e-05, "epoch": 0.27911299960164654, "percentage": 13.95, "elapsed_time": "3:47:07", "remaining_time": "23:20:35"} +{"current_steps": 1052, "total_steps": 7532, "loss": 0.341474324464798, "lr": 1.9565291273577255e-05, "epoch": 0.2793785685831895, "percentage": 13.97, "elapsed_time": "3:47:20", "remaining_time": "23:20:19"} +{"current_steps": 1053, "total_steps": 7532, "loss": 0.35376566648483276, "lr": 1.9564009842765225e-05, "epoch": 0.27964413756473244, "percentage": 13.98, "elapsed_time": "3:47:33", "remaining_time": "23:20:05"} +{"current_steps": 1054, "total_steps": 7532, "loss": 0.3487662374973297, "lr": 1.9562726568124892e-05, "epoch": 0.2799097065462754, "percentage": 13.99, "elapsed_time": "3:47:45", "remaining_time": "23:19:50"} +{"current_steps": 1055, "total_steps": 7532, "loss": 0.3610745370388031, "lr": 1.956144144990366e-05, "epoch": 0.28017527552781835, "percentage": 14.01, "elapsed_time": "3:47:58", "remaining_time": "23:19:36"} +{"current_steps": 1056, "total_steps": 7532, "loss": 0.33230137825012207, "lr": 1.9560154488349284e-05, "epoch": 0.2804408445093613, "percentage": 14.02, "elapsed_time": "3:48:11", "remaining_time": "23:19:21"} +{"current_steps": 1057, "total_steps": 7532, "loss": 0.310351699590683, "lr": 1.9558865683709875e-05, "epoch": 0.28070641349090425, "percentage": 14.03, "elapsed_time": "3:48:23", "remaining_time": "23:19:07"} +{"current_steps": 1058, "total_steps": 7532, "loss": 0.39930224418640137, "lr": 1.9557575036233897e-05, "epoch": 0.2809719824724472, "percentage": 14.05, "elapsed_time": "3:48:36", "remaining_time": "23:18:51"} +{"current_steps": 1059, "total_steps": 7532, "loss": 0.3345295488834381, "lr": 1.955628254617017e-05, "epoch": 0.28123755145399015, "percentage": 14.06, "elapsed_time": "3:48:48", "remaining_time": "23:18:36"} +{"current_steps": 1060, "total_steps": 7532, "loss": 0.37963107228279114, "lr": 1.9554988213767875e-05, "epoch": 0.2815031204355331, "percentage": 14.07, "elapsed_time": "3:49:01", "remaining_time": "23:18:19"} +{"current_steps": 1061, "total_steps": 7532, "loss": 0.3923654854297638, "lr": 1.9553692039276545e-05, "epoch": 0.2817686894170761, "percentage": 14.09, "elapsed_time": "3:49:13", "remaining_time": "23:18:02"} +{"current_steps": 1062, "total_steps": 7532, "loss": 0.363646924495697, "lr": 1.9552394022946068e-05, "epoch": 0.28203425839861906, "percentage": 14.1, "elapsed_time": "3:49:26", "remaining_time": "23:17:46"} +{"current_steps": 1063, "total_steps": 7532, "loss": 0.35486382246017456, "lr": 1.9551094165026677e-05, "epoch": 0.282299827380162, "percentage": 14.11, "elapsed_time": "3:49:38", "remaining_time": "23:17:30"} +{"current_steps": 1064, "total_steps": 7532, "loss": 0.35215455293655396, "lr": 1.954979246576898e-05, "epoch": 0.28256539636170497, "percentage": 14.13, "elapsed_time": "3:49:51", "remaining_time": "23:17:20"} +{"current_steps": 1065, "total_steps": 7532, "loss": 0.3936809003353119, "lr": 1.9548488925423924e-05, "epoch": 0.2828309653432479, "percentage": 14.14, "elapsed_time": "3:50:05", "remaining_time": "23:17:09"} +{"current_steps": 1066, "total_steps": 7532, "loss": 0.36852866411209106, "lr": 1.9547183544242817e-05, "epoch": 0.28309653432479087, "percentage": 14.15, "elapsed_time": "3:50:18", "remaining_time": "23:16:58"} +{"current_steps": 1067, "total_steps": 7532, "loss": 0.3552001714706421, "lr": 1.954587632247732e-05, "epoch": 0.2833621033063338, "percentage": 14.17, "elapsed_time": "3:50:31", "remaining_time": "23:16:46"} +{"current_steps": 1068, "total_steps": 7532, "loss": 0.3684498965740204, "lr": 1.9544567260379455e-05, "epoch": 0.2836276722878768, "percentage": 14.18, "elapsed_time": "3:50:45", "remaining_time": "23:16:38"} +{"current_steps": 1069, "total_steps": 7532, "loss": 0.3367026448249817, "lr": 1.9543256358201586e-05, "epoch": 0.2838932412694197, "percentage": 14.19, "elapsed_time": "3:50:58", "remaining_time": "23:16:25"} +{"current_steps": 1070, "total_steps": 7532, "loss": 0.3702335059642792, "lr": 1.9541943616196443e-05, "epoch": 0.2841588102509627, "percentage": 14.21, "elapsed_time": "3:51:11", "remaining_time": "23:16:13"} +{"current_steps": 1071, "total_steps": 7532, "loss": 0.3430984318256378, "lr": 1.9540629034617108e-05, "epoch": 0.28442437923250563, "percentage": 14.22, "elapsed_time": "3:51:24", "remaining_time": "23:16:00"} +{"current_steps": 1072, "total_steps": 7532, "loss": 0.36514735221862793, "lr": 1.953931261371702e-05, "epoch": 0.2846899482140486, "percentage": 14.23, "elapsed_time": "3:51:37", "remaining_time": "23:15:50"} +{"current_steps": 1073, "total_steps": 7532, "loss": 0.3524945080280304, "lr": 1.9537994353749963e-05, "epoch": 0.28495551719559153, "percentage": 14.25, "elapsed_time": "3:51:51", "remaining_time": "23:15:38"} +{"current_steps": 1074, "total_steps": 7532, "loss": 0.32405683398246765, "lr": 1.9536674254970088e-05, "epoch": 0.2852210861771345, "percentage": 14.26, "elapsed_time": "3:52:04", "remaining_time": "23:15:28"} +{"current_steps": 1075, "total_steps": 7532, "loss": 0.30863165855407715, "lr": 1.9535352317631888e-05, "epoch": 0.2854866551586775, "percentage": 14.27, "elapsed_time": "3:52:17", "remaining_time": "23:15:14"} +{"current_steps": 1076, "total_steps": 7532, "loss": 0.34343889355659485, "lr": 1.953402854199022e-05, "epoch": 0.28575222414022045, "percentage": 14.29, "elapsed_time": "3:52:30", "remaining_time": "23:15:05"} +{"current_steps": 1077, "total_steps": 7532, "loss": 0.3639434576034546, "lr": 1.9532702928300292e-05, "epoch": 0.2860177931217634, "percentage": 14.3, "elapsed_time": "3:52:44", "remaining_time": "23:14:53"} +{"current_steps": 1078, "total_steps": 7532, "loss": 0.3380300998687744, "lr": 1.9531375476817667e-05, "epoch": 0.28628336210330635, "percentage": 14.31, "elapsed_time": "3:52:57", "remaining_time": "23:14:46"} +{"current_steps": 1079, "total_steps": 7532, "loss": 0.3323265016078949, "lr": 1.9530046187798267e-05, "epoch": 0.2865489310848493, "percentage": 14.33, "elapsed_time": "3:53:10", "remaining_time": "23:14:33"} +{"current_steps": 1080, "total_steps": 7532, "loss": 0.3439220190048218, "lr": 1.9528715061498355e-05, "epoch": 0.28681450006639225, "percentage": 14.34, "elapsed_time": "3:53:24", "remaining_time": "23:14:21"} +{"current_steps": 1081, "total_steps": 7532, "loss": 0.36376965045928955, "lr": 1.952738209817456e-05, "epoch": 0.2870800690479352, "percentage": 14.35, "elapsed_time": "3:53:37", "remaining_time": "23:14:11"} +{"current_steps": 1082, "total_steps": 7532, "loss": 0.3281211853027344, "lr": 1.952604729808386e-05, "epoch": 0.28734563802947816, "percentage": 14.37, "elapsed_time": "3:53:49", "remaining_time": "23:13:55"} +{"current_steps": 1083, "total_steps": 7532, "loss": 0.3538089990615845, "lr": 1.9524710661483594e-05, "epoch": 0.2876112070110211, "percentage": 14.38, "elapsed_time": "3:54:02", "remaining_time": "23:13:41"} +{"current_steps": 1084, "total_steps": 7532, "loss": 0.3982803225517273, "lr": 1.9523372188631442e-05, "epoch": 0.28787677599256406, "percentage": 14.39, "elapsed_time": "3:54:15", "remaining_time": "23:13:28"} +{"current_steps": 1085, "total_steps": 7532, "loss": 0.3958810567855835, "lr": 1.9522031879785453e-05, "epoch": 0.288142344974107, "percentage": 14.41, "elapsed_time": "3:54:29", "remaining_time": "23:13:17"} +{"current_steps": 1086, "total_steps": 7532, "loss": 0.40133988857269287, "lr": 1.9520689735204016e-05, "epoch": 0.28840791395564996, "percentage": 14.42, "elapsed_time": "3:54:42", "remaining_time": "23:13:04"} +{"current_steps": 1087, "total_steps": 7532, "loss": 0.32411646842956543, "lr": 1.9519345755145886e-05, "epoch": 0.2886734829371929, "percentage": 14.43, "elapsed_time": "3:54:55", "remaining_time": "23:12:54"} +{"current_steps": 1088, "total_steps": 7532, "loss": 0.38678207993507385, "lr": 1.9517999939870166e-05, "epoch": 0.28893905191873587, "percentage": 14.45, "elapsed_time": "3:55:08", "remaining_time": "23:12:41"} +{"current_steps": 1089, "total_steps": 7532, "loss": 0.36829686164855957, "lr": 1.951665228963631e-05, "epoch": 0.2892046209002789, "percentage": 14.46, "elapsed_time": "3:55:22", "remaining_time": "23:12:32"} +{"current_steps": 1090, "total_steps": 7532, "loss": 0.38631704449653625, "lr": 1.9515302804704134e-05, "epoch": 0.2894701898818218, "percentage": 14.47, "elapsed_time": "3:55:35", "remaining_time": "23:12:19"} +{"current_steps": 1091, "total_steps": 7532, "loss": 0.39288902282714844, "lr": 1.9513951485333798e-05, "epoch": 0.2897357588633648, "percentage": 14.48, "elapsed_time": "3:55:48", "remaining_time": "23:12:07"} +{"current_steps": 1092, "total_steps": 7532, "loss": 0.3655658960342407, "lr": 1.9512598331785822e-05, "epoch": 0.29000132784490773, "percentage": 14.5, "elapsed_time": "3:56:01", "remaining_time": "23:11:57"} +{"current_steps": 1093, "total_steps": 7532, "loss": 0.3263852596282959, "lr": 1.9511243344321076e-05, "epoch": 0.2902668968264507, "percentage": 14.51, "elapsed_time": "3:56:14", "remaining_time": "23:11:43"} +{"current_steps": 1094, "total_steps": 7532, "loss": 0.37939125299453735, "lr": 1.9509886523200792e-05, "epoch": 0.29053246580799363, "percentage": 14.52, "elapsed_time": "3:56:27", "remaining_time": "23:11:32"} +{"current_steps": 1095, "total_steps": 7532, "loss": 0.34218865633010864, "lr": 1.9508527868686543e-05, "epoch": 0.2907980347895366, "percentage": 14.54, "elapsed_time": "3:56:40", "remaining_time": "23:11:17"} +{"current_steps": 1096, "total_steps": 7532, "loss": 0.368261456489563, "lr": 1.9507167381040263e-05, "epoch": 0.29106360377107954, "percentage": 14.55, "elapsed_time": "3:56:53", "remaining_time": "23:11:06"} +{"current_steps": 1097, "total_steps": 7532, "loss": 0.36133286356925964, "lr": 1.950580506052424e-05, "epoch": 0.2913291727526225, "percentage": 14.56, "elapsed_time": "3:57:06", "remaining_time": "23:10:52"} +{"current_steps": 1098, "total_steps": 7532, "loss": 0.3667418658733368, "lr": 1.9504440907401113e-05, "epoch": 0.29159474173416544, "percentage": 14.58, "elapsed_time": "3:57:19", "remaining_time": "23:10:39"} +{"current_steps": 1099, "total_steps": 7532, "loss": 0.34444570541381836, "lr": 1.950307492193387e-05, "epoch": 0.2918603107157084, "percentage": 14.59, "elapsed_time": "3:57:32", "remaining_time": "23:10:28"} +{"current_steps": 1100, "total_steps": 7532, "loss": 0.41261589527130127, "lr": 1.9501707104385863e-05, "epoch": 0.29212587969725134, "percentage": 14.6, "elapsed_time": "3:57:45", "remaining_time": "23:10:14"} +{"current_steps": 1101, "total_steps": 7532, "loss": 0.3762981593608856, "lr": 1.9500337455020788e-05, "epoch": 0.2923914486787943, "percentage": 14.62, "elapsed_time": "3:58:03", "remaining_time": "23:10:29"} +{"current_steps": 1102, "total_steps": 7532, "loss": 0.3527417480945587, "lr": 1.9498965974102697e-05, "epoch": 0.29265701766033725, "percentage": 14.63, "elapsed_time": "3:58:16", "remaining_time": "23:10:18"} +{"current_steps": 1103, "total_steps": 7532, "loss": 0.34812286496162415, "lr": 1.9497592661895996e-05, "epoch": 0.29292258664188026, "percentage": 14.64, "elapsed_time": "3:58:29", "remaining_time": "23:10:02"} +{"current_steps": 1104, "total_steps": 7532, "loss": 0.33663398027420044, "lr": 1.9496217518665444e-05, "epoch": 0.2931881556234232, "percentage": 14.66, "elapsed_time": "3:58:41", "remaining_time": "23:09:47"} +{"current_steps": 1105, "total_steps": 7532, "loss": 0.3632991313934326, "lr": 1.9494840544676156e-05, "epoch": 0.29345372460496616, "percentage": 14.67, "elapsed_time": "3:58:54", "remaining_time": "23:09:35"} +{"current_steps": 1106, "total_steps": 7532, "loss": 0.37389490008354187, "lr": 1.9493461740193587e-05, "epoch": 0.2937192935865091, "percentage": 14.68, "elapsed_time": "3:59:07", "remaining_time": "23:09:22"} +{"current_steps": 1107, "total_steps": 7532, "loss": 0.3634020686149597, "lr": 1.949208110548356e-05, "epoch": 0.29398486256805206, "percentage": 14.7, "elapsed_time": "3:59:21", "remaining_time": "23:09:14"} +{"current_steps": 1108, "total_steps": 7532, "loss": 0.36032742261886597, "lr": 1.9490698640812247e-05, "epoch": 0.294250431549595, "percentage": 14.71, "elapsed_time": "3:59:34", "remaining_time": "23:09:01"} +{"current_steps": 1109, "total_steps": 7532, "loss": 0.3385765552520752, "lr": 1.9489314346446164e-05, "epoch": 0.29451600053113797, "percentage": 14.72, "elapsed_time": "3:59:48", "remaining_time": "23:08:52"} +{"current_steps": 1110, "total_steps": 7532, "loss": 0.3751915991306305, "lr": 1.9487928222652195e-05, "epoch": 0.2947815695126809, "percentage": 14.74, "elapsed_time": "4:00:01", "remaining_time": "23:08:40"} +{"current_steps": 1111, "total_steps": 7532, "loss": 0.36069825291633606, "lr": 1.9486540269697564e-05, "epoch": 0.29504713849422387, "percentage": 14.75, "elapsed_time": "4:00:14", "remaining_time": "23:08:30"} +{"current_steps": 1112, "total_steps": 7532, "loss": 0.32703787088394165, "lr": 1.948515048784985e-05, "epoch": 0.2953127074757668, "percentage": 14.76, "elapsed_time": "4:00:27", "remaining_time": "23:08:18"} +{"current_steps": 1113, "total_steps": 7532, "loss": 0.312494158744812, "lr": 1.948375887737699e-05, "epoch": 0.2955782764573098, "percentage": 14.78, "elapsed_time": "4:00:41", "remaining_time": "23:08:08"} +{"current_steps": 1114, "total_steps": 7532, "loss": 0.30626165866851807, "lr": 1.9482365438547272e-05, "epoch": 0.2958438454388527, "percentage": 14.79, "elapsed_time": "4:00:54", "remaining_time": "23:07:56"} +{"current_steps": 1115, "total_steps": 7532, "loss": 0.3625817894935608, "lr": 1.948097017162933e-05, "epoch": 0.2961094144203957, "percentage": 14.8, "elapsed_time": "4:01:08", "remaining_time": "23:07:47"} +{"current_steps": 1116, "total_steps": 7532, "loss": 0.38403773307800293, "lr": 1.9479573076892152e-05, "epoch": 0.29637498340193863, "percentage": 14.82, "elapsed_time": "4:01:21", "remaining_time": "23:07:34"} +{"current_steps": 1117, "total_steps": 7532, "loss": 0.3645164966583252, "lr": 1.9478174154605093e-05, "epoch": 0.2966405523834816, "percentage": 14.83, "elapsed_time": "4:01:35", "remaining_time": "23:07:26"} +{"current_steps": 1118, "total_steps": 7532, "loss": 0.3714389503002167, "lr": 1.9476773405037836e-05, "epoch": 0.2969061213650246, "percentage": 14.84, "elapsed_time": "4:01:48", "remaining_time": "23:07:14"} +{"current_steps": 1119, "total_steps": 7532, "loss": 0.39809900522232056, "lr": 1.9475370828460436e-05, "epoch": 0.29717169034656754, "percentage": 14.86, "elapsed_time": "4:02:02", "remaining_time": "23:07:06"} +{"current_steps": 1120, "total_steps": 7532, "loss": 0.3698490262031555, "lr": 1.9473966425143292e-05, "epoch": 0.2974372593281105, "percentage": 14.87, "elapsed_time": "4:02:15", "remaining_time": "23:06:53"} +{"current_steps": 1121, "total_steps": 7532, "loss": 0.3072658181190491, "lr": 1.947256019535716e-05, "epoch": 0.29770282830965344, "percentage": 14.88, "elapsed_time": "4:02:28", "remaining_time": "23:06:42"} +{"current_steps": 1122, "total_steps": 7532, "loss": 0.3294365406036377, "lr": 1.947115213937314e-05, "epoch": 0.2979683972911964, "percentage": 14.9, "elapsed_time": "4:02:42", "remaining_time": "23:06:33"} +{"current_steps": 1123, "total_steps": 7532, "loss": 0.34933674335479736, "lr": 1.9469742257462684e-05, "epoch": 0.29823396627273935, "percentage": 14.91, "elapsed_time": "4:02:54", "remaining_time": "23:06:19"} +{"current_steps": 1124, "total_steps": 7532, "loss": 0.34586772322654724, "lr": 1.946833054989761e-05, "epoch": 0.2984995352542823, "percentage": 14.92, "elapsed_time": "4:03:08", "remaining_time": "23:06:09"} +{"current_steps": 1125, "total_steps": 7532, "loss": 0.33158159255981445, "lr": 1.9466917016950076e-05, "epoch": 0.29876510423582525, "percentage": 14.94, "elapsed_time": "4:03:21", "remaining_time": "23:05:55"} +{"current_steps": 1126, "total_steps": 7532, "loss": 0.32665887475013733, "lr": 1.946550165889259e-05, "epoch": 0.2990306732173682, "percentage": 14.95, "elapsed_time": "4:03:34", "remaining_time": "23:05:45"} +{"current_steps": 1127, "total_steps": 7532, "loss": 0.3333032429218292, "lr": 1.946408447599802e-05, "epoch": 0.29929624219891116, "percentage": 14.96, "elapsed_time": "4:03:47", "remaining_time": "23:05:32"} +{"current_steps": 1128, "total_steps": 7532, "loss": 0.3747228980064392, "lr": 1.9462665468539582e-05, "epoch": 0.2995618111804541, "percentage": 14.98, "elapsed_time": "4:04:00", "remaining_time": "23:05:21"} +{"current_steps": 1129, "total_steps": 7532, "loss": 0.34040436148643494, "lr": 1.9461244636790845e-05, "epoch": 0.29982738016199706, "percentage": 14.99, "elapsed_time": "4:04:13", "remaining_time": "23:05:08"} +{"current_steps": 1130, "total_steps": 7532, "loss": 0.3279584050178528, "lr": 1.9459821981025723e-05, "epoch": 0.30009294914354, "percentage": 15.0, "elapsed_time": "4:04:27", "remaining_time": "23:04:58"} +{"current_steps": 1131, "total_steps": 7532, "loss": 0.33507707715034485, "lr": 1.9458397501518496e-05, "epoch": 0.30035851812508296, "percentage": 15.02, "elapsed_time": "4:04:40", "remaining_time": "23:04:43"} +{"current_steps": 1132, "total_steps": 7532, "loss": 0.3511529862880707, "lr": 1.945697119854378e-05, "epoch": 0.30062408710662597, "percentage": 15.03, "elapsed_time": "4:04:53", "remaining_time": "23:04:32"} +{"current_steps": 1133, "total_steps": 7532, "loss": 0.33260345458984375, "lr": 1.945554307237655e-05, "epoch": 0.3008896560881689, "percentage": 15.04, "elapsed_time": "4:05:06", "remaining_time": "23:04:18"} +{"current_steps": 1134, "total_steps": 7532, "loss": 0.37698423862457275, "lr": 1.9454113123292133e-05, "epoch": 0.3011552250697119, "percentage": 15.06, "elapsed_time": "4:05:19", "remaining_time": "23:04:08"} +{"current_steps": 1135, "total_steps": 7532, "loss": 0.34843316674232483, "lr": 1.945268135156621e-05, "epoch": 0.3014207940512548, "percentage": 15.07, "elapsed_time": "4:05:32", "remaining_time": "23:03:55"} +{"current_steps": 1136, "total_steps": 7532, "loss": 0.38723987340927124, "lr": 1.9451247757474805e-05, "epoch": 0.3016863630327978, "percentage": 15.08, "elapsed_time": "4:05:46", "remaining_time": "23:03:45"} +{"current_steps": 1137, "total_steps": 7532, "loss": 0.3836795389652252, "lr": 1.9449812341294302e-05, "epoch": 0.30195193201434073, "percentage": 15.1, "elapsed_time": "4:05:59", "remaining_time": "23:03:31"} +{"current_steps": 1138, "total_steps": 7532, "loss": 0.3362433612346649, "lr": 1.9448375103301424e-05, "epoch": 0.3022175009958837, "percentage": 15.11, "elapsed_time": "4:06:12", "remaining_time": "23:03:19"} +{"current_steps": 1139, "total_steps": 7532, "loss": 0.3615792393684387, "lr": 1.9446936043773264e-05, "epoch": 0.30248306997742663, "percentage": 15.12, "elapsed_time": "4:06:25", "remaining_time": "23:03:08"} +{"current_steps": 1140, "total_steps": 7532, "loss": 0.33693915605545044, "lr": 1.944549516298725e-05, "epoch": 0.3027486389589696, "percentage": 15.14, "elapsed_time": "4:06:38", "remaining_time": "23:02:54"} +{"current_steps": 1141, "total_steps": 7532, "loss": 0.32611170411109924, "lr": 1.9444052461221167e-05, "epoch": 0.30301420794051254, "percentage": 15.15, "elapsed_time": "4:06:51", "remaining_time": "23:02:44"} +{"current_steps": 1142, "total_steps": 7532, "loss": 0.3504132032394409, "lr": 1.9442607938753153e-05, "epoch": 0.3032797769220555, "percentage": 15.16, "elapsed_time": "4:07:04", "remaining_time": "23:02:30"} +{"current_steps": 1143, "total_steps": 7532, "loss": 0.3598168194293976, "lr": 1.944116159586169e-05, "epoch": 0.30354534590359844, "percentage": 15.18, "elapsed_time": "4:07:18", "remaining_time": "23:02:19"} +{"current_steps": 1144, "total_steps": 7532, "loss": 0.33447909355163574, "lr": 1.9439713432825625e-05, "epoch": 0.3038109148851414, "percentage": 15.19, "elapsed_time": "4:07:31", "remaining_time": "23:02:08"} +{"current_steps": 1145, "total_steps": 7532, "loss": 0.34026333689689636, "lr": 1.943826344992414e-05, "epoch": 0.30407648386668434, "percentage": 15.2, "elapsed_time": "4:07:44", "remaining_time": "23:01:58"} +{"current_steps": 1146, "total_steps": 7532, "loss": 0.323203980922699, "lr": 1.9436811647436772e-05, "epoch": 0.30434205284822735, "percentage": 15.22, "elapsed_time": "4:07:57", "remaining_time": "23:01:43"} +{"current_steps": 1147, "total_steps": 7532, "loss": 0.332398921251297, "lr": 1.943535802564342e-05, "epoch": 0.3046076218297703, "percentage": 15.23, "elapsed_time": "4:08:10", "remaining_time": "23:01:33"} +{"current_steps": 1148, "total_steps": 7532, "loss": 0.3882995545864105, "lr": 1.9433902584824316e-05, "epoch": 0.30487319081131325, "percentage": 15.24, "elapsed_time": "4:08:23", "remaining_time": "23:01:20"} +{"current_steps": 1149, "total_steps": 7532, "loss": 0.35262739658355713, "lr": 1.943244532526006e-05, "epoch": 0.3051387597928562, "percentage": 15.25, "elapsed_time": "4:08:37", "remaining_time": "23:01:10"} +{"current_steps": 1150, "total_steps": 7532, "loss": 0.39694511890411377, "lr": 1.9430986247231586e-05, "epoch": 0.30540432877439916, "percentage": 15.27, "elapsed_time": "4:08:50", "remaining_time": "23:00:56"} +{"current_steps": 1151, "total_steps": 7532, "loss": 0.3692580759525299, "lr": 1.9429525351020197e-05, "epoch": 0.3056698977559421, "percentage": 15.28, "elapsed_time": "4:09:03", "remaining_time": "23:00:46"} +{"current_steps": 1152, "total_steps": 7532, "loss": 0.3685402572154999, "lr": 1.9428062636907526e-05, "epoch": 0.30593546673748506, "percentage": 15.29, "elapsed_time": "4:09:16", "remaining_time": "23:00:32"} +{"current_steps": 1153, "total_steps": 7532, "loss": 0.37557253241539, "lr": 1.9426598105175575e-05, "epoch": 0.306201035719028, "percentage": 15.31, "elapsed_time": "4:09:30", "remaining_time": "23:00:21"} +{"current_steps": 1154, "total_steps": 7532, "loss": 0.3323203921318054, "lr": 1.9425131756106687e-05, "epoch": 0.30646660470057097, "percentage": 15.32, "elapsed_time": "4:09:42", "remaining_time": "23:00:08"} +{"current_steps": 1155, "total_steps": 7532, "loss": 0.37262290716171265, "lr": 1.9423663589983554e-05, "epoch": 0.3067321736821139, "percentage": 15.33, "elapsed_time": "4:09:55", "remaining_time": "22:59:54"} +{"current_steps": 1156, "total_steps": 7532, "loss": 0.36621618270874023, "lr": 1.9422193607089224e-05, "epoch": 0.30699774266365687, "percentage": 15.35, "elapsed_time": "4:10:08", "remaining_time": "22:59:42"} +{"current_steps": 1157, "total_steps": 7532, "loss": 0.3844982385635376, "lr": 1.942072180770709e-05, "epoch": 0.3072633116451998, "percentage": 15.36, "elapsed_time": "4:10:21", "remaining_time": "22:59:27"} +{"current_steps": 1158, "total_steps": 7532, "loss": 0.3229531943798065, "lr": 1.94192481921209e-05, "epoch": 0.3075288806267428, "percentage": 15.37, "elapsed_time": "4:10:34", "remaining_time": "22:59:16"} +{"current_steps": 1159, "total_steps": 7532, "loss": 0.34862661361694336, "lr": 1.9417772760614745e-05, "epoch": 0.3077944496082857, "percentage": 15.39, "elapsed_time": "4:10:47", "remaining_time": "22:59:02"} +{"current_steps": 1160, "total_steps": 7532, "loss": 0.35496509075164795, "lr": 1.941629551347308e-05, "epoch": 0.30806001858982873, "percentage": 15.4, "elapsed_time": "4:11:01", "remaining_time": "22:58:51"} +{"current_steps": 1161, "total_steps": 7532, "loss": 0.3695065975189209, "lr": 1.9414816450980686e-05, "epoch": 0.3083255875713717, "percentage": 15.41, "elapsed_time": "4:11:13", "remaining_time": "22:58:38"} +{"current_steps": 1162, "total_steps": 7532, "loss": 0.3472525179386139, "lr": 1.9413335573422723e-05, "epoch": 0.30859115655291464, "percentage": 15.43, "elapsed_time": "4:11:27", "remaining_time": "22:58:28"} +{"current_steps": 1163, "total_steps": 7532, "loss": 0.3447483479976654, "lr": 1.9411852881084683e-05, "epoch": 0.3088567255344576, "percentage": 15.44, "elapsed_time": "4:11:40", "remaining_time": "22:58:14"} +{"current_steps": 1164, "total_steps": 7532, "loss": 0.31047824025154114, "lr": 1.941036837425241e-05, "epoch": 0.30912229451600054, "percentage": 15.45, "elapsed_time": "4:11:53", "remaining_time": "22:58:03"} +{"current_steps": 1165, "total_steps": 7532, "loss": 0.34502410888671875, "lr": 1.9408882053212094e-05, "epoch": 0.3093878634975435, "percentage": 15.47, "elapsed_time": "4:12:06", "remaining_time": "22:57:49"} +{"current_steps": 1166, "total_steps": 7532, "loss": 0.3663109540939331, "lr": 1.940739391825029e-05, "epoch": 0.30965343247908644, "percentage": 15.48, "elapsed_time": "4:12:19", "remaining_time": "22:57:38"} +{"current_steps": 1167, "total_steps": 7532, "loss": 0.3635792136192322, "lr": 1.9405903969653887e-05, "epoch": 0.3099190014606294, "percentage": 15.49, "elapsed_time": "4:12:32", "remaining_time": "22:57:24"} +{"current_steps": 1168, "total_steps": 7532, "loss": 0.359528124332428, "lr": 1.940441220771013e-05, "epoch": 0.31018457044217235, "percentage": 15.51, "elapsed_time": "4:12:45", "remaining_time": "22:57:11"} +{"current_steps": 1169, "total_steps": 7532, "loss": 0.32566630840301514, "lr": 1.9402918632706618e-05, "epoch": 0.3104501394237153, "percentage": 15.52, "elapsed_time": "4:12:58", "remaining_time": "22:56:59"} +{"current_steps": 1170, "total_steps": 7532, "loss": 0.34758460521698, "lr": 1.940142324493129e-05, "epoch": 0.31071570840525825, "percentage": 15.53, "elapsed_time": "4:13:11", "remaining_time": "22:56:46"} +{"current_steps": 1171, "total_steps": 7532, "loss": 0.3484055995941162, "lr": 1.9399926044672438e-05, "epoch": 0.3109812773868012, "percentage": 15.55, "elapsed_time": "4:13:25", "remaining_time": "22:56:37"} +{"current_steps": 1172, "total_steps": 7532, "loss": 0.41958773136138916, "lr": 1.93984270322187e-05, "epoch": 0.31124684636834415, "percentage": 15.56, "elapsed_time": "4:13:38", "remaining_time": "22:56:24"} +{"current_steps": 1173, "total_steps": 7532, "loss": 0.3578398525714874, "lr": 1.9396926207859085e-05, "epoch": 0.3115124153498871, "percentage": 15.57, "elapsed_time": "4:13:52", "remaining_time": "22:56:15"} +{"current_steps": 1174, "total_steps": 7532, "loss": 0.38140422105789185, "lr": 1.9395423571882917e-05, "epoch": 0.3117779843314301, "percentage": 15.59, "elapsed_time": "4:14:05", "remaining_time": "22:56:03"} +{"current_steps": 1175, "total_steps": 7532, "loss": 0.3782861828804016, "lr": 1.9393919124579898e-05, "epoch": 0.31204355331297307, "percentage": 15.6, "elapsed_time": "4:14:19", "remaining_time": "22:55:55"} +{"current_steps": 1176, "total_steps": 7532, "loss": 0.3211040496826172, "lr": 1.939241286624006e-05, "epoch": 0.312309122294516, "percentage": 15.61, "elapsed_time": "4:14:32", "remaining_time": "22:55:43"} +{"current_steps": 1177, "total_steps": 7532, "loss": 0.3090783953666687, "lr": 1.9390904797153795e-05, "epoch": 0.31257469127605897, "percentage": 15.63, "elapsed_time": "4:14:45", "remaining_time": "22:55:32"} +{"current_steps": 1178, "total_steps": 7532, "loss": 0.3542889654636383, "lr": 1.938939491761184e-05, "epoch": 0.3128402602576019, "percentage": 15.64, "elapsed_time": "4:14:58", "remaining_time": "22:55:19"} +{"current_steps": 1179, "total_steps": 7532, "loss": 0.369164377450943, "lr": 1.9387883227905285e-05, "epoch": 0.3131058292391449, "percentage": 15.65, "elapsed_time": "4:15:12", "remaining_time": "22:55:08"} +{"current_steps": 1180, "total_steps": 7532, "loss": 0.35200801491737366, "lr": 1.9386369728325562e-05, "epoch": 0.3133713982206878, "percentage": 15.67, "elapsed_time": "4:15:25", "remaining_time": "22:54:59"} +{"current_steps": 1181, "total_steps": 7532, "loss": 0.3696276843547821, "lr": 1.9384854419164454e-05, "epoch": 0.3136369672022308, "percentage": 15.68, "elapsed_time": "4:15:39", "remaining_time": "22:54:50"} +{"current_steps": 1182, "total_steps": 7532, "loss": 0.3403652012348175, "lr": 1.9383337300714104e-05, "epoch": 0.31390253618377373, "percentage": 15.69, "elapsed_time": "4:15:52", "remaining_time": "22:54:37"} +{"current_steps": 1183, "total_steps": 7532, "loss": 0.3307063579559326, "lr": 1.9381818373266987e-05, "epoch": 0.3141681051653167, "percentage": 15.71, "elapsed_time": "4:16:06", "remaining_time": "22:54:28"} +{"current_steps": 1184, "total_steps": 7532, "loss": 0.3223465085029602, "lr": 1.9380297637115933e-05, "epoch": 0.31443367414685963, "percentage": 15.72, "elapsed_time": "4:16:19", "remaining_time": "22:54:17"} +{"current_steps": 1185, "total_steps": 7532, "loss": 0.4013838768005371, "lr": 1.9378775092554124e-05, "epoch": 0.3146992431284026, "percentage": 15.73, "elapsed_time": "4:16:33", "remaining_time": "22:54:08"} +{"current_steps": 1186, "total_steps": 7532, "loss": 0.3596574664115906, "lr": 1.9377250739875095e-05, "epoch": 0.31496481210994554, "percentage": 15.75, "elapsed_time": "4:16:46", "remaining_time": "22:53:55"} +{"current_steps": 1187, "total_steps": 7532, "loss": 0.41639968752861023, "lr": 1.937572457937271e-05, "epoch": 0.3152303810914885, "percentage": 15.76, "elapsed_time": "4:16:59", "remaining_time": "22:53:44"} +{"current_steps": 1188, "total_steps": 7532, "loss": 0.3001318573951721, "lr": 1.9374196611341212e-05, "epoch": 0.3154959500730315, "percentage": 15.77, "elapsed_time": "4:17:13", "remaining_time": "22:53:35"} +{"current_steps": 1189, "total_steps": 7532, "loss": 0.33238667249679565, "lr": 1.937266683607516e-05, "epoch": 0.31576151905457445, "percentage": 15.79, "elapsed_time": "4:17:26", "remaining_time": "22:53:25"} +{"current_steps": 1190, "total_steps": 7532, "loss": 0.33638086915016174, "lr": 1.9371135253869483e-05, "epoch": 0.3160270880361174, "percentage": 15.8, "elapsed_time": "4:17:41", "remaining_time": "22:53:18"} +{"current_steps": 1191, "total_steps": 7532, "loss": 0.34445878863334656, "lr": 1.9369601865019452e-05, "epoch": 0.31629265701766035, "percentage": 15.81, "elapsed_time": "4:17:54", "remaining_time": "22:53:06"} +{"current_steps": 1192, "total_steps": 7532, "loss": 0.33554553985595703, "lr": 1.9368066669820684e-05, "epoch": 0.3165582259992033, "percentage": 15.83, "elapsed_time": "4:18:08", "remaining_time": "22:52:58"} +{"current_steps": 1193, "total_steps": 7532, "loss": 0.3668493628501892, "lr": 1.936652966856915e-05, "epoch": 0.31682379498074625, "percentage": 15.84, "elapsed_time": "4:18:21", "remaining_time": "22:52:47"} +{"current_steps": 1194, "total_steps": 7532, "loss": 0.3813396990299225, "lr": 1.9364990861561163e-05, "epoch": 0.3170893639622892, "percentage": 15.85, "elapsed_time": "4:18:35", "remaining_time": "22:52:38"} +{"current_steps": 1195, "total_steps": 7532, "loss": 0.33625900745391846, "lr": 1.936345024909339e-05, "epoch": 0.31735493294383216, "percentage": 15.87, "elapsed_time": "4:18:48", "remaining_time": "22:52:27"} +{"current_steps": 1196, "total_steps": 7532, "loss": 0.31131428480148315, "lr": 1.9361907831462836e-05, "epoch": 0.3176205019253751, "percentage": 15.88, "elapsed_time": "4:19:02", "remaining_time": "22:52:19"} +{"current_steps": 1197, "total_steps": 7532, "loss": 0.32571589946746826, "lr": 1.936036360896687e-05, "epoch": 0.31788607090691806, "percentage": 15.89, "elapsed_time": "4:19:15", "remaining_time": "22:52:07"} +{"current_steps": 1198, "total_steps": 7532, "loss": 0.36207717657089233, "lr": 1.9358817581903193e-05, "epoch": 0.318151639888461, "percentage": 15.91, "elapsed_time": "4:19:29", "remaining_time": "22:51:56"} +{"current_steps": 1199, "total_steps": 7532, "loss": 0.3743855059146881, "lr": 1.9357269750569864e-05, "epoch": 0.31841720887000396, "percentage": 15.92, "elapsed_time": "4:19:42", "remaining_time": "22:51:47"} +{"current_steps": 1200, "total_steps": 7532, "loss": 0.3862137794494629, "lr": 1.9355720115265283e-05, "epoch": 0.3186827778515469, "percentage": 15.93, "elapsed_time": "4:19:56", "remaining_time": "22:51:35"} +{"current_steps": 1201, "total_steps": 7532, "loss": 0.33353424072265625, "lr": 1.935416867628821e-05, "epoch": 0.31894834683308987, "percentage": 15.95, "elapsed_time": "4:20:15", "remaining_time": "22:51:56"} +{"current_steps": 1202, "total_steps": 7532, "loss": 0.3277953267097473, "lr": 1.9352615433937733e-05, "epoch": 0.3192139158146329, "percentage": 15.96, "elapsed_time": "4:20:28", "remaining_time": "22:51:44"} +{"current_steps": 1203, "total_steps": 7532, "loss": 0.38247692584991455, "lr": 1.9351060388513304e-05, "epoch": 0.3194794847961758, "percentage": 15.97, "elapsed_time": "4:20:42", "remaining_time": "22:51:33"} +{"current_steps": 1204, "total_steps": 7532, "loss": 0.3330709934234619, "lr": 1.9349503540314724e-05, "epoch": 0.3197450537777188, "percentage": 15.99, "elapsed_time": "4:20:54", "remaining_time": "22:51:19"} +{"current_steps": 1205, "total_steps": 7532, "loss": 0.3809449076652527, "lr": 1.9347944889642125e-05, "epoch": 0.32001062275926173, "percentage": 16.0, "elapsed_time": "4:21:08", "remaining_time": "22:51:08"} +{"current_steps": 1206, "total_steps": 7532, "loss": 0.33623188734054565, "lr": 1.9346384436796e-05, "epoch": 0.3202761917408047, "percentage": 16.01, "elapsed_time": "4:21:21", "remaining_time": "22:50:56"} +{"current_steps": 1207, "total_steps": 7532, "loss": 0.35465264320373535, "lr": 1.9344822182077184e-05, "epoch": 0.32054176072234764, "percentage": 16.02, "elapsed_time": "4:21:35", "remaining_time": "22:50:46"} +{"current_steps": 1208, "total_steps": 7532, "loss": 0.3532233238220215, "lr": 1.9343258125786866e-05, "epoch": 0.3208073297038906, "percentage": 16.04, "elapsed_time": "4:21:48", "remaining_time": "22:50:33"} +{"current_steps": 1209, "total_steps": 7532, "loss": 0.3498903512954712, "lr": 1.9341692268226572e-05, "epoch": 0.32107289868543354, "percentage": 16.05, "elapsed_time": "4:22:01", "remaining_time": "22:50:22"} +{"current_steps": 1210, "total_steps": 7532, "loss": 0.36124879121780396, "lr": 1.9340124609698185e-05, "epoch": 0.3213384676669765, "percentage": 16.06, "elapsed_time": "4:22:14", "remaining_time": "22:50:08"} +{"current_steps": 1211, "total_steps": 7532, "loss": 0.38535434007644653, "lr": 1.933855515050393e-05, "epoch": 0.32160403664851944, "percentage": 16.08, "elapsed_time": "4:22:27", "remaining_time": "22:49:58"} +{"current_steps": 1212, "total_steps": 7532, "loss": 0.39999911189079285, "lr": 1.9336983890946383e-05, "epoch": 0.3218696056300624, "percentage": 16.09, "elapsed_time": "4:22:40", "remaining_time": "22:49:45"} +{"current_steps": 1213, "total_steps": 7532, "loss": 0.3519791066646576, "lr": 1.9335410831328457e-05, "epoch": 0.32213517461160535, "percentage": 16.1, "elapsed_time": "4:22:53", "remaining_time": "22:49:30"} +{"current_steps": 1214, "total_steps": 7532, "loss": 0.35882368683815, "lr": 1.9333835971953424e-05, "epoch": 0.3224007435931483, "percentage": 16.12, "elapsed_time": "4:23:06", "remaining_time": "22:49:19"} +{"current_steps": 1215, "total_steps": 7532, "loss": 0.36132001876831055, "lr": 1.93322593131249e-05, "epoch": 0.32266631257469125, "percentage": 16.13, "elapsed_time": "4:23:19", "remaining_time": "22:49:03"} +{"current_steps": 1216, "total_steps": 7532, "loss": 0.36840832233428955, "lr": 1.9330680855146845e-05, "epoch": 0.32293188155623426, "percentage": 16.14, "elapsed_time": "4:23:33", "remaining_time": "22:48:54"} +{"current_steps": 1217, "total_steps": 7532, "loss": 0.3755963444709778, "lr": 1.9329100598323563e-05, "epoch": 0.3231974505377772, "percentage": 16.16, "elapsed_time": "4:23:46", "remaining_time": "22:48:41"} +{"current_steps": 1218, "total_steps": 7532, "loss": 0.400601863861084, "lr": 1.9327518542959717e-05, "epoch": 0.32346301951932016, "percentage": 16.17, "elapsed_time": "4:23:59", "remaining_time": "22:48:29"} +{"current_steps": 1219, "total_steps": 7532, "loss": 0.3100128769874573, "lr": 1.93259346893603e-05, "epoch": 0.3237285885008631, "percentage": 16.18, "elapsed_time": "4:24:12", "remaining_time": "22:48:15"} +{"current_steps": 1220, "total_steps": 7532, "loss": 0.3439880609512329, "lr": 1.9324349037830665e-05, "epoch": 0.32399415748240606, "percentage": 16.2, "elapsed_time": "4:24:25", "remaining_time": "22:48:02"} +{"current_steps": 1221, "total_steps": 7532, "loss": 0.3612631559371948, "lr": 1.9322761588676505e-05, "epoch": 0.324259726463949, "percentage": 16.21, "elapsed_time": "4:24:37", "remaining_time": "22:47:45"} +{"current_steps": 1222, "total_steps": 7532, "loss": 0.38202327489852905, "lr": 1.9321172342203863e-05, "epoch": 0.32452529544549197, "percentage": 16.22, "elapsed_time": "4:24:50", "remaining_time": "22:47:32"} +{"current_steps": 1223, "total_steps": 7532, "loss": 0.3405265808105469, "lr": 1.9319581298719127e-05, "epoch": 0.3247908644270349, "percentage": 16.24, "elapsed_time": "4:25:03", "remaining_time": "22:47:17"} +{"current_steps": 1224, "total_steps": 7532, "loss": 0.4110907018184662, "lr": 1.931798845852903e-05, "epoch": 0.32505643340857787, "percentage": 16.25, "elapsed_time": "4:25:16", "remaining_time": "22:47:06"} +{"current_steps": 1225, "total_steps": 7532, "loss": 0.3007548451423645, "lr": 1.9316393821940654e-05, "epoch": 0.3253220023901208, "percentage": 16.26, "elapsed_time": "4:25:28", "remaining_time": "22:46:50"} +{"current_steps": 1226, "total_steps": 7532, "loss": 0.32769858837127686, "lr": 1.9314797389261426e-05, "epoch": 0.3255875713716638, "percentage": 16.28, "elapsed_time": "4:25:41", "remaining_time": "22:46:36"} +{"current_steps": 1227, "total_steps": 7532, "loss": 0.3619830310344696, "lr": 1.931319916079912e-05, "epoch": 0.3258531403532067, "percentage": 16.29, "elapsed_time": "4:25:55", "remaining_time": "22:46:26"} +{"current_steps": 1228, "total_steps": 7532, "loss": 0.3470210134983063, "lr": 1.9311599136861853e-05, "epoch": 0.3261187093347497, "percentage": 16.3, "elapsed_time": "4:26:07", "remaining_time": "22:46:12"} +{"current_steps": 1229, "total_steps": 7532, "loss": 0.3471665382385254, "lr": 1.9309997317758093e-05, "epoch": 0.32638427831629263, "percentage": 16.32, "elapsed_time": "4:26:21", "remaining_time": "22:46:01"} +{"current_steps": 1230, "total_steps": 7532, "loss": 0.3717760443687439, "lr": 1.930839370379665e-05, "epoch": 0.32664984729783564, "percentage": 16.33, "elapsed_time": "4:26:34", "remaining_time": "22:45:47"} +{"current_steps": 1231, "total_steps": 7532, "loss": 0.37279975414276123, "lr": 1.9306788295286687e-05, "epoch": 0.3269154162793786, "percentage": 16.34, "elapsed_time": "4:26:47", "remaining_time": "22:45:35"} +{"current_steps": 1232, "total_steps": 7532, "loss": 0.3884522020816803, "lr": 1.93051810925377e-05, "epoch": 0.32718098526092154, "percentage": 16.36, "elapsed_time": "4:27:00", "remaining_time": "22:45:23"} +{"current_steps": 1233, "total_steps": 7532, "loss": 0.4277604818344116, "lr": 1.9303572095859545e-05, "epoch": 0.3274465542424645, "percentage": 16.37, "elapsed_time": "4:27:13", "remaining_time": "22:45:12"} +{"current_steps": 1234, "total_steps": 7532, "loss": 0.2888818681240082, "lr": 1.9301961305562415e-05, "epoch": 0.32771212322400745, "percentage": 16.38, "elapsed_time": "4:27:26", "remaining_time": "22:44:58"} +{"current_steps": 1235, "total_steps": 7532, "loss": 0.3134511709213257, "lr": 1.9300348721956854e-05, "epoch": 0.3279776922055504, "percentage": 16.4, "elapsed_time": "4:27:40", "remaining_time": "22:44:47"} +{"current_steps": 1236, "total_steps": 7532, "loss": 0.38525280356407166, "lr": 1.9298734345353745e-05, "epoch": 0.32824326118709335, "percentage": 16.41, "elapsed_time": "4:27:53", "remaining_time": "22:44:34"} +{"current_steps": 1237, "total_steps": 7532, "loss": 0.3692918121814728, "lr": 1.9297118176064324e-05, "epoch": 0.3285088301686363, "percentage": 16.42, "elapsed_time": "4:28:06", "remaining_time": "22:44:22"} +{"current_steps": 1238, "total_steps": 7532, "loss": 0.3443421721458435, "lr": 1.9295500214400165e-05, "epoch": 0.32877439915017925, "percentage": 16.44, "elapsed_time": "4:28:19", "remaining_time": "22:44:10"} +{"current_steps": 1239, "total_steps": 7532, "loss": 0.3228621184825897, "lr": 1.9293880460673197e-05, "epoch": 0.3290399681317222, "percentage": 16.45, "elapsed_time": "4:28:32", "remaining_time": "22:43:59"} +{"current_steps": 1240, "total_steps": 7532, "loss": 0.330943763256073, "lr": 1.9292258915195688e-05, "epoch": 0.32930553711326516, "percentage": 16.46, "elapsed_time": "4:28:45", "remaining_time": "22:43:45"} +{"current_steps": 1241, "total_steps": 7532, "loss": 0.356637567281723, "lr": 1.929063557828025e-05, "epoch": 0.3295711060948081, "percentage": 16.48, "elapsed_time": "4:28:59", "remaining_time": "22:43:34"} +{"current_steps": 1242, "total_steps": 7532, "loss": 0.3481113910675049, "lr": 1.9289010450239843e-05, "epoch": 0.32983667507635106, "percentage": 16.49, "elapsed_time": "4:29:12", "remaining_time": "22:43:20"} +{"current_steps": 1243, "total_steps": 7532, "loss": 0.36579906940460205, "lr": 1.928738353138778e-05, "epoch": 0.330102244057894, "percentage": 16.5, "elapsed_time": "4:29:25", "remaining_time": "22:43:09"} +{"current_steps": 1244, "total_steps": 7532, "loss": 0.33025234937667847, "lr": 1.9285754822037705e-05, "epoch": 0.330367813039437, "percentage": 16.52, "elapsed_time": "4:29:38", "remaining_time": "22:42:56"} +{"current_steps": 1245, "total_steps": 7532, "loss": 0.34848469495773315, "lr": 1.9284124322503613e-05, "epoch": 0.33063338202097997, "percentage": 16.53, "elapsed_time": "4:29:51", "remaining_time": "22:42:43"} +{"current_steps": 1246, "total_steps": 7532, "loss": 0.3523876368999481, "lr": 1.928249203309985e-05, "epoch": 0.3308989510025229, "percentage": 16.54, "elapsed_time": "4:30:04", "remaining_time": "22:42:32"} +{"current_steps": 1247, "total_steps": 7532, "loss": 0.3695565462112427, "lr": 1.92808579541411e-05, "epoch": 0.3311645199840659, "percentage": 16.56, "elapsed_time": "4:30:17", "remaining_time": "22:42:18"} +{"current_steps": 1248, "total_steps": 7532, "loss": 0.3557945191860199, "lr": 1.9279222085942396e-05, "epoch": 0.3314300889656088, "percentage": 16.57, "elapsed_time": "4:30:30", "remaining_time": "22:42:06"} +{"current_steps": 1249, "total_steps": 7532, "loss": 0.3015502989292145, "lr": 1.9277584428819113e-05, "epoch": 0.3316956579471518, "percentage": 16.58, "elapsed_time": "4:30:43", "remaining_time": "22:41:53"} +{"current_steps": 1250, "total_steps": 7532, "loss": 0.31333664059638977, "lr": 1.9275944983086964e-05, "epoch": 0.33196122692869473, "percentage": 16.6, "elapsed_time": "4:30:57", "remaining_time": "22:41:41"} +{"current_steps": 1251, "total_steps": 7532, "loss": 0.36595287919044495, "lr": 1.9274303749062028e-05, "epoch": 0.3322267959102377, "percentage": 16.61, "elapsed_time": "4:31:09", "remaining_time": "22:41:24"} +{"current_steps": 1252, "total_steps": 7532, "loss": 0.3400266170501709, "lr": 1.9272660727060705e-05, "epoch": 0.33249236489178063, "percentage": 16.62, "elapsed_time": "4:31:21", "remaining_time": "22:41:10"} +{"current_steps": 1253, "total_steps": 7532, "loss": 0.3642529547214508, "lr": 1.927101591739976e-05, "epoch": 0.3327579338733236, "percentage": 16.64, "elapsed_time": "4:31:34", "remaining_time": "22:40:55"} +{"current_steps": 1254, "total_steps": 7532, "loss": 0.3418777287006378, "lr": 1.926936932039628e-05, "epoch": 0.33302350285486654, "percentage": 16.65, "elapsed_time": "4:31:47", "remaining_time": "22:40:43"} +{"current_steps": 1255, "total_steps": 7532, "loss": 0.33382388949394226, "lr": 1.9267720936367723e-05, "epoch": 0.3332890718364095, "percentage": 16.66, "elapsed_time": "4:32:01", "remaining_time": "22:40:31"} +{"current_steps": 1256, "total_steps": 7532, "loss": 0.36257779598236084, "lr": 1.926607076563187e-05, "epoch": 0.33355464081795244, "percentage": 16.68, "elapsed_time": "4:32:14", "remaining_time": "22:40:20"} +{"current_steps": 1257, "total_steps": 7532, "loss": 0.3018002510070801, "lr": 1.926441880850686e-05, "epoch": 0.3338202097994954, "percentage": 16.69, "elapsed_time": "4:32:27", "remaining_time": "22:40:05"} +{"current_steps": 1258, "total_steps": 7532, "loss": 0.3373662233352661, "lr": 1.9262765065311165e-05, "epoch": 0.3340857787810384, "percentage": 16.7, "elapsed_time": "4:32:40", "remaining_time": "22:39:52"} +{"current_steps": 1259, "total_steps": 7532, "loss": 0.3555397391319275, "lr": 1.9261109536363613e-05, "epoch": 0.33435134776258135, "percentage": 16.72, "elapsed_time": "4:32:53", "remaining_time": "22:39:42"} +{"current_steps": 1260, "total_steps": 7532, "loss": 0.3004256784915924, "lr": 1.925945222198336e-05, "epoch": 0.3346169167441243, "percentage": 16.73, "elapsed_time": "4:33:06", "remaining_time": "22:39:28"} +{"current_steps": 1261, "total_steps": 7532, "loss": 0.33299940824508667, "lr": 1.925779312248993e-05, "epoch": 0.33488248572566726, "percentage": 16.74, "elapsed_time": "4:33:19", "remaining_time": "22:39:17"} +{"current_steps": 1262, "total_steps": 7532, "loss": 0.3715725541114807, "lr": 1.9256132238203166e-05, "epoch": 0.3351480547072102, "percentage": 16.76, "elapsed_time": "4:33:32", "remaining_time": "22:39:03"} +{"current_steps": 1263, "total_steps": 7532, "loss": 0.35133951902389526, "lr": 1.9254469569443274e-05, "epoch": 0.33541362368875316, "percentage": 16.77, "elapsed_time": "4:33:46", "remaining_time": "22:38:54"} +{"current_steps": 1264, "total_steps": 7532, "loss": 0.3328818380832672, "lr": 1.92528051165308e-05, "epoch": 0.3356791926702961, "percentage": 16.78, "elapsed_time": "4:33:59", "remaining_time": "22:38:40"} +{"current_steps": 1265, "total_steps": 7532, "loss": 0.3665468692779541, "lr": 1.925113887978662e-05, "epoch": 0.33594476165183906, "percentage": 16.8, "elapsed_time": "4:34:12", "remaining_time": "22:38:29"} +{"current_steps": 1266, "total_steps": 7532, "loss": 0.3489571511745453, "lr": 1.9249470859531976e-05, "epoch": 0.336210330633382, "percentage": 16.81, "elapsed_time": "4:34:25", "remaining_time": "22:38:15"} +{"current_steps": 1267, "total_steps": 7532, "loss": 0.30038982629776, "lr": 1.9247801056088433e-05, "epoch": 0.33647589961492497, "percentage": 16.82, "elapsed_time": "4:34:38", "remaining_time": "22:38:01"} +{"current_steps": 1268, "total_steps": 7532, "loss": 0.4163355827331543, "lr": 1.9246129469777918e-05, "epoch": 0.3367414685964679, "percentage": 16.83, "elapsed_time": "4:34:50", "remaining_time": "22:37:46"} +{"current_steps": 1269, "total_steps": 7532, "loss": 0.33687612414360046, "lr": 1.924445610092269e-05, "epoch": 0.33700703757801087, "percentage": 16.85, "elapsed_time": "4:35:03", "remaining_time": "22:37:31"} +{"current_steps": 1270, "total_steps": 7532, "loss": 0.3448297679424286, "lr": 1.924278094984535e-05, "epoch": 0.3372726065595538, "percentage": 16.86, "elapsed_time": "4:35:16", "remaining_time": "22:37:18"} +{"current_steps": 1271, "total_steps": 7532, "loss": 0.35257208347320557, "lr": 1.9241104016868853e-05, "epoch": 0.3375381755410968, "percentage": 16.87, "elapsed_time": "4:35:29", "remaining_time": "22:37:02"} +{"current_steps": 1272, "total_steps": 7532, "loss": 0.34880566596984863, "lr": 1.9239425302316487e-05, "epoch": 0.3378037445226398, "percentage": 16.89, "elapsed_time": "4:35:41", "remaining_time": "22:36:48"} +{"current_steps": 1273, "total_steps": 7532, "loss": 0.33643782138824463, "lr": 1.9237744806511895e-05, "epoch": 0.33806931350418273, "percentage": 16.9, "elapsed_time": "4:35:54", "remaining_time": "22:36:33"} +{"current_steps": 1274, "total_steps": 7532, "loss": 0.32345050573349, "lr": 1.9236062529779057e-05, "epoch": 0.3383348824857257, "percentage": 16.91, "elapsed_time": "4:36:07", "remaining_time": "22:36:19"} +{"current_steps": 1275, "total_steps": 7532, "loss": 0.33983978629112244, "lr": 1.9234378472442286e-05, "epoch": 0.33860045146726864, "percentage": 16.93, "elapsed_time": "4:36:19", "remaining_time": "22:36:04"} +{"current_steps": 1276, "total_steps": 7532, "loss": 0.32825571298599243, "lr": 1.923269263482626e-05, "epoch": 0.3388660204488116, "percentage": 16.94, "elapsed_time": "4:36:32", "remaining_time": "22:35:50"} +{"current_steps": 1277, "total_steps": 7532, "loss": 0.3434044122695923, "lr": 1.923100501725598e-05, "epoch": 0.33913158943035454, "percentage": 16.95, "elapsed_time": "4:36:45", "remaining_time": "22:35:34"} +{"current_steps": 1278, "total_steps": 7532, "loss": 0.3463204503059387, "lr": 1.9229315620056805e-05, "epoch": 0.3393971584118975, "percentage": 16.97, "elapsed_time": "4:36:58", "remaining_time": "22:35:21"} +{"current_steps": 1279, "total_steps": 7532, "loss": 0.3608240485191345, "lr": 1.9227624443554425e-05, "epoch": 0.33966272739344044, "percentage": 16.98, "elapsed_time": "4:37:10", "remaining_time": "22:35:07"} +{"current_steps": 1280, "total_steps": 7532, "loss": 0.36131763458251953, "lr": 1.9225931488074882e-05, "epoch": 0.3399282963749834, "percentage": 16.99, "elapsed_time": "4:37:23", "remaining_time": "22:34:54"} +{"current_steps": 1281, "total_steps": 7532, "loss": 0.3270101547241211, "lr": 1.922423675394456e-05, "epoch": 0.34019386535652635, "percentage": 17.01, "elapsed_time": "4:37:37", "remaining_time": "22:34:44"} +{"current_steps": 1282, "total_steps": 7532, "loss": 0.3551778495311737, "lr": 1.922254024149018e-05, "epoch": 0.3404594343380693, "percentage": 17.02, "elapsed_time": "4:37:50", "remaining_time": "22:34:30"} +{"current_steps": 1283, "total_steps": 7532, "loss": 0.3686622381210327, "lr": 1.9220841951038815e-05, "epoch": 0.34072500331961225, "percentage": 17.03, "elapsed_time": "4:38:03", "remaining_time": "22:34:18"} +{"current_steps": 1284, "total_steps": 7532, "loss": 0.35161536931991577, "lr": 1.921914188291787e-05, "epoch": 0.3409905723011552, "percentage": 17.05, "elapsed_time": "4:38:16", "remaining_time": "22:34:04"} +{"current_steps": 1285, "total_steps": 7532, "loss": 0.3549870550632477, "lr": 1.92174400374551e-05, "epoch": 0.34125614128269816, "percentage": 17.06, "elapsed_time": "4:38:29", "remaining_time": "22:33:52"} +{"current_steps": 1286, "total_steps": 7532, "loss": 0.36780738830566406, "lr": 1.9215736414978593e-05, "epoch": 0.34152171026424116, "percentage": 17.07, "elapsed_time": "4:38:42", "remaining_time": "22:33:37"} +{"current_steps": 1287, "total_steps": 7532, "loss": 0.36060047149658203, "lr": 1.9214031015816803e-05, "epoch": 0.3417872792457841, "percentage": 17.09, "elapsed_time": "4:38:55", "remaining_time": "22:33:26"} +{"current_steps": 1288, "total_steps": 7532, "loss": 0.32578715682029724, "lr": 1.9212323840298502e-05, "epoch": 0.34205284822732707, "percentage": 17.1, "elapsed_time": "4:39:07", "remaining_time": "22:33:09"} +{"current_steps": 1289, "total_steps": 7532, "loss": 0.3505493402481079, "lr": 1.9210614888752813e-05, "epoch": 0.34231841720887, "percentage": 17.11, "elapsed_time": "4:39:21", "remaining_time": "22:32:59"} +{"current_steps": 1290, "total_steps": 7532, "loss": 0.32681795954704285, "lr": 1.9208904161509203e-05, "epoch": 0.34258398619041297, "percentage": 17.13, "elapsed_time": "4:39:33", "remaining_time": "22:32:44"} +{"current_steps": 1291, "total_steps": 7532, "loss": 0.34808459877967834, "lr": 1.9207191658897473e-05, "epoch": 0.3428495551719559, "percentage": 17.14, "elapsed_time": "4:39:47", "remaining_time": "22:32:32"} +{"current_steps": 1292, "total_steps": 7532, "loss": 0.3588678240776062, "lr": 1.920547738124779e-05, "epoch": 0.3431151241534989, "percentage": 17.15, "elapsed_time": "4:39:59", "remaining_time": "22:32:18"} +{"current_steps": 1293, "total_steps": 7532, "loss": 0.3528832495212555, "lr": 1.9203761328890626e-05, "epoch": 0.3433806931350418, "percentage": 17.17, "elapsed_time": "4:40:13", "remaining_time": "22:32:07"} +{"current_steps": 1294, "total_steps": 7532, "loss": 0.33549001812934875, "lr": 1.9202043502156833e-05, "epoch": 0.3436462621165848, "percentage": 17.18, "elapsed_time": "4:40:26", "remaining_time": "22:31:54"} +{"current_steps": 1295, "total_steps": 7532, "loss": 0.3466021418571472, "lr": 1.920032390137758e-05, "epoch": 0.34391183109812773, "percentage": 17.19, "elapsed_time": "4:40:39", "remaining_time": "22:31:43"} +{"current_steps": 1296, "total_steps": 7532, "loss": 0.35646146535873413, "lr": 1.9198602526884388e-05, "epoch": 0.3441774000796707, "percentage": 17.21, "elapsed_time": "4:40:52", "remaining_time": "22:31:31"} +{"current_steps": 1297, "total_steps": 7532, "loss": 0.3442128300666809, "lr": 1.9196879379009112e-05, "epoch": 0.34444296906121363, "percentage": 17.22, "elapsed_time": "4:41:05", "remaining_time": "22:31:17"} +{"current_steps": 1298, "total_steps": 7532, "loss": 0.3854391872882843, "lr": 1.9195154458083962e-05, "epoch": 0.3447085380427566, "percentage": 17.23, "elapsed_time": "4:41:19", "remaining_time": "22:31:08"} +{"current_steps": 1299, "total_steps": 7532, "loss": 0.376137375831604, "lr": 1.9193427764441477e-05, "epoch": 0.34497410702429954, "percentage": 17.25, "elapsed_time": "4:41:32", "remaining_time": "22:30:55"} +{"current_steps": 1300, "total_steps": 7532, "loss": 0.3115769028663635, "lr": 1.9191699298414547e-05, "epoch": 0.34523967600584254, "percentage": 17.26, "elapsed_time": "4:41:45", "remaining_time": "22:30:44"} +{"current_steps": 1301, "total_steps": 7532, "loss": 0.32553282380104065, "lr": 1.9189969060336396e-05, "epoch": 0.3455052449873855, "percentage": 17.27, "elapsed_time": "4:42:04", "remaining_time": "22:30:57"} +{"current_steps": 1302, "total_steps": 7532, "loss": 0.39529356360435486, "lr": 1.9188237050540597e-05, "epoch": 0.34577081396892845, "percentage": 17.29, "elapsed_time": "4:42:17", "remaining_time": "22:30:47"} +{"current_steps": 1303, "total_steps": 7532, "loss": 0.3027458190917969, "lr": 1.9186503269361063e-05, "epoch": 0.3460363829504714, "percentage": 17.3, "elapsed_time": "4:42:30", "remaining_time": "22:30:34"} +{"current_steps": 1304, "total_steps": 7532, "loss": 0.39317795634269714, "lr": 1.918476771713204e-05, "epoch": 0.34630195193201435, "percentage": 17.31, "elapsed_time": "4:42:44", "remaining_time": "22:30:22"} +{"current_steps": 1305, "total_steps": 7532, "loss": 0.3730325698852539, "lr": 1.918303039418813e-05, "epoch": 0.3465675209135573, "percentage": 17.33, "elapsed_time": "4:42:56", "remaining_time": "22:30:08"} +{"current_steps": 1306, "total_steps": 7532, "loss": 0.34862780570983887, "lr": 1.918129130086426e-05, "epoch": 0.34683308989510025, "percentage": 17.34, "elapsed_time": "4:43:10", "remaining_time": "22:29:55"} +{"current_steps": 1307, "total_steps": 7532, "loss": 0.32139018177986145, "lr": 1.9179550437495707e-05, "epoch": 0.3470986588766432, "percentage": 17.35, "elapsed_time": "4:43:22", "remaining_time": "22:29:41"} +{"current_steps": 1308, "total_steps": 7532, "loss": 0.37246090173721313, "lr": 1.91778078044181e-05, "epoch": 0.34736422785818616, "percentage": 17.37, "elapsed_time": "4:43:36", "remaining_time": "22:29:31"} +{"current_steps": 1309, "total_steps": 7532, "loss": 0.30985957384109497, "lr": 1.9176063401967386e-05, "epoch": 0.3476297968397291, "percentage": 17.38, "elapsed_time": "4:43:49", "remaining_time": "22:29:17"} +{"current_steps": 1310, "total_steps": 7532, "loss": 0.3713758587837219, "lr": 1.917431723047987e-05, "epoch": 0.34789536582127206, "percentage": 17.39, "elapsed_time": "4:44:01", "remaining_time": "22:29:02"} +{"current_steps": 1311, "total_steps": 7532, "loss": 0.3465833067893982, "lr": 1.9172569290292193e-05, "epoch": 0.348160934802815, "percentage": 17.41, "elapsed_time": "4:44:15", "remaining_time": "22:28:51"} +{"current_steps": 1312, "total_steps": 7532, "loss": 0.34807220101356506, "lr": 1.917081958174134e-05, "epoch": 0.34842650378435797, "percentage": 17.42, "elapsed_time": "4:44:27", "remaining_time": "22:28:36"} +{"current_steps": 1313, "total_steps": 7532, "loss": 0.3369640111923218, "lr": 1.9169068105164627e-05, "epoch": 0.3486920727659009, "percentage": 17.43, "elapsed_time": "4:44:41", "remaining_time": "22:28:25"} +{"current_steps": 1314, "total_steps": 7532, "loss": 0.3521544337272644, "lr": 1.9167314860899724e-05, "epoch": 0.3489576417474439, "percentage": 17.45, "elapsed_time": "4:44:54", "remaining_time": "22:28:11"} +{"current_steps": 1315, "total_steps": 7532, "loss": 0.3256300687789917, "lr": 1.9165559849284635e-05, "epoch": 0.3492232107289869, "percentage": 17.46, "elapsed_time": "4:45:07", "remaining_time": "22:28:00"} +{"current_steps": 1316, "total_steps": 7532, "loss": 0.32401931285858154, "lr": 1.9163803070657706e-05, "epoch": 0.34948877971052983, "percentage": 17.47, "elapsed_time": "4:45:20", "remaining_time": "22:27:45"} +{"current_steps": 1317, "total_steps": 7532, "loss": 0.372749924659729, "lr": 1.916204452535762e-05, "epoch": 0.3497543486920728, "percentage": 17.49, "elapsed_time": "4:45:33", "remaining_time": "22:27:33"} +{"current_steps": 1318, "total_steps": 7532, "loss": 0.35853224992752075, "lr": 1.9160284213723407e-05, "epoch": 0.35001991767361573, "percentage": 17.5, "elapsed_time": "4:45:46", "remaining_time": "22:27:19"} +{"current_steps": 1319, "total_steps": 7532, "loss": 0.32850801944732666, "lr": 1.9158522136094433e-05, "epoch": 0.3502854866551587, "percentage": 17.51, "elapsed_time": "4:45:59", "remaining_time": "22:27:07"} +{"current_steps": 1320, "total_steps": 7532, "loss": 0.3548474907875061, "lr": 1.9156758292810404e-05, "epoch": 0.35055105563670164, "percentage": 17.53, "elapsed_time": "4:46:12", "remaining_time": "22:26:53"} +{"current_steps": 1321, "total_steps": 7532, "loss": 0.38709041476249695, "lr": 1.9154992684211372e-05, "epoch": 0.3508166246182446, "percentage": 17.54, "elapsed_time": "4:46:25", "remaining_time": "22:26:40"} +{"current_steps": 1322, "total_steps": 7532, "loss": 0.40369266271591187, "lr": 1.9153225310637726e-05, "epoch": 0.35108219359978754, "percentage": 17.55, "elapsed_time": "4:46:38", "remaining_time": "22:26:29"} +{"current_steps": 1323, "total_steps": 7532, "loss": 0.3570155203342438, "lr": 1.9151456172430186e-05, "epoch": 0.3513477625813305, "percentage": 17.57, "elapsed_time": "4:46:51", "remaining_time": "22:26:15"} +{"current_steps": 1324, "total_steps": 7532, "loss": 0.34426411986351013, "lr": 1.9149685269929833e-05, "epoch": 0.35161333156287344, "percentage": 17.58, "elapsed_time": "4:47:04", "remaining_time": "22:26:04"} +{"current_steps": 1325, "total_steps": 7532, "loss": 0.35666006803512573, "lr": 1.9147912603478066e-05, "epoch": 0.3518789005444164, "percentage": 17.59, "elapsed_time": "4:47:17", "remaining_time": "22:25:50"} +{"current_steps": 1326, "total_steps": 7532, "loss": 0.36225512623786926, "lr": 1.9146138173416643e-05, "epoch": 0.35214446952595935, "percentage": 17.6, "elapsed_time": "4:47:31", "remaining_time": "22:25:39"} +{"current_steps": 1327, "total_steps": 7532, "loss": 0.3312349319458008, "lr": 1.9144361980087643e-05, "epoch": 0.3524100385075023, "percentage": 17.62, "elapsed_time": "4:47:43", "remaining_time": "22:25:25"} +{"current_steps": 1328, "total_steps": 7532, "loss": 0.3590523302555084, "lr": 1.9142584023833506e-05, "epoch": 0.3526756074890453, "percentage": 17.63, "elapsed_time": "4:47:57", "remaining_time": "22:25:13"} +{"current_steps": 1329, "total_steps": 7532, "loss": 0.341480016708374, "lr": 1.9140804304996997e-05, "epoch": 0.35294117647058826, "percentage": 17.64, "elapsed_time": "4:48:09", "remaining_time": "22:24:59"} +{"current_steps": 1330, "total_steps": 7532, "loss": 0.37246501445770264, "lr": 1.913902282392122e-05, "epoch": 0.3532067454521312, "percentage": 17.66, "elapsed_time": "4:48:23", "remaining_time": "22:24:46"} +{"current_steps": 1331, "total_steps": 7532, "loss": 0.33834031224250793, "lr": 1.913723958094963e-05, "epoch": 0.35347231443367416, "percentage": 17.67, "elapsed_time": "4:48:35", "remaining_time": "22:24:31"} +{"current_steps": 1332, "total_steps": 7532, "loss": 0.29285067319869995, "lr": 1.913545457642601e-05, "epoch": 0.3537378834152171, "percentage": 17.68, "elapsed_time": "4:48:48", "remaining_time": "22:24:20"} +{"current_steps": 1333, "total_steps": 7532, "loss": 0.2903720736503601, "lr": 1.913366781069449e-05, "epoch": 0.35400345239676007, "percentage": 17.7, "elapsed_time": "4:49:01", "remaining_time": "22:24:05"} +{"current_steps": 1334, "total_steps": 7532, "loss": 0.36428314447402954, "lr": 1.913187928409954e-05, "epoch": 0.354269021378303, "percentage": 17.71, "elapsed_time": "4:49:14", "remaining_time": "22:23:50"} +{"current_steps": 1335, "total_steps": 7532, "loss": 0.3379477560520172, "lr": 1.9130088996985967e-05, "epoch": 0.35453459035984597, "percentage": 17.72, "elapsed_time": "4:49:26", "remaining_time": "22:23:35"} +{"current_steps": 1336, "total_steps": 7532, "loss": 0.35286659002304077, "lr": 1.912829694969891e-05, "epoch": 0.3548001593413889, "percentage": 17.74, "elapsed_time": "4:49:39", "remaining_time": "22:23:23"} +{"current_steps": 1337, "total_steps": 7532, "loss": 0.3670174479484558, "lr": 1.9126503142583864e-05, "epoch": 0.3550657283229319, "percentage": 17.75, "elapsed_time": "4:49:52", "remaining_time": "22:23:07"} +{"current_steps": 1338, "total_steps": 7532, "loss": 0.3422902226448059, "lr": 1.9124707575986642e-05, "epoch": 0.3553312973044748, "percentage": 17.76, "elapsed_time": "4:50:04", "remaining_time": "22:22:52"} +{"current_steps": 1339, "total_steps": 7532, "loss": 0.29778385162353516, "lr": 1.912291025025342e-05, "epoch": 0.3555968662860178, "percentage": 17.78, "elapsed_time": "4:50:18", "remaining_time": "22:22:40"} +{"current_steps": 1340, "total_steps": 7532, "loss": 0.36249661445617676, "lr": 1.91211111657307e-05, "epoch": 0.35586243526756073, "percentage": 17.79, "elapsed_time": "4:50:30", "remaining_time": "22:22:25"} +{"current_steps": 1341, "total_steps": 7532, "loss": 0.340925395488739, "lr": 1.9119310322765315e-05, "epoch": 0.3561280042491037, "percentage": 17.8, "elapsed_time": "4:50:43", "remaining_time": "22:22:12"} +{"current_steps": 1342, "total_steps": 7532, "loss": 0.35674089193344116, "lr": 1.9117507721704455e-05, "epoch": 0.3563935732306467, "percentage": 17.82, "elapsed_time": "4:50:56", "remaining_time": "22:21:56"} +{"current_steps": 1343, "total_steps": 7532, "loss": 0.3602067828178406, "lr": 1.9115703362895636e-05, "epoch": 0.35665914221218964, "percentage": 17.83, "elapsed_time": "4:51:08", "remaining_time": "22:21:42"} +{"current_steps": 1344, "total_steps": 7532, "loss": 0.35211697220802307, "lr": 1.9113897246686716e-05, "epoch": 0.3569247111937326, "percentage": 17.84, "elapsed_time": "4:51:21", "remaining_time": "22:21:28"} +{"current_steps": 1345, "total_steps": 7532, "loss": 0.3706115484237671, "lr": 1.91120893734259e-05, "epoch": 0.35719028017527554, "percentage": 17.86, "elapsed_time": "4:51:35", "remaining_time": "22:21:17"} +{"current_steps": 1346, "total_steps": 7532, "loss": 0.3365110754966736, "lr": 1.9110279743461717e-05, "epoch": 0.3574558491568185, "percentage": 17.87, "elapsed_time": "4:51:47", "remaining_time": "22:21:02"} +{"current_steps": 1347, "total_steps": 7532, "loss": 0.40012121200561523, "lr": 1.9108468357143047e-05, "epoch": 0.35772141813836145, "percentage": 17.88, "elapsed_time": "4:52:00", "remaining_time": "22:20:48"} +{"current_steps": 1348, "total_steps": 7532, "loss": 0.4003351926803589, "lr": 1.91066552148191e-05, "epoch": 0.3579869871199044, "percentage": 17.9, "elapsed_time": "4:52:12", "remaining_time": "22:20:31"} +{"current_steps": 1349, "total_steps": 7532, "loss": 0.3574616014957428, "lr": 1.910484031683943e-05, "epoch": 0.35825255610144735, "percentage": 17.91, "elapsed_time": "4:52:25", "remaining_time": "22:20:17"} +{"current_steps": 1350, "total_steps": 7532, "loss": 0.3345073461532593, "lr": 1.910302366355393e-05, "epoch": 0.3585181250829903, "percentage": 17.92, "elapsed_time": "4:52:37", "remaining_time": "22:20:02"} +{"current_steps": 1351, "total_steps": 7532, "loss": 0.3467676341533661, "lr": 1.910120525531283e-05, "epoch": 0.35878369406453325, "percentage": 17.94, "elapsed_time": "4:52:50", "remaining_time": "22:19:48"} +{"current_steps": 1352, "total_steps": 7532, "loss": 0.32433655858039856, "lr": 1.9099385092466695e-05, "epoch": 0.3590492630460762, "percentage": 17.95, "elapsed_time": "4:53:04", "remaining_time": "22:19:37"} +{"current_steps": 1353, "total_steps": 7532, "loss": 0.3366447985172272, "lr": 1.909756317536643e-05, "epoch": 0.35931483202761916, "percentage": 17.96, "elapsed_time": "4:53:17", "remaining_time": "22:19:23"} +{"current_steps": 1354, "total_steps": 7532, "loss": 0.310118168592453, "lr": 1.909573950436328e-05, "epoch": 0.3595804010091621, "percentage": 17.98, "elapsed_time": "4:53:30", "remaining_time": "22:19:10"} +{"current_steps": 1355, "total_steps": 7532, "loss": 0.3503451943397522, "lr": 1.909391407980883e-05, "epoch": 0.35984596999070506, "percentage": 17.99, "elapsed_time": "4:53:42", "remaining_time": "22:18:54"} +{"current_steps": 1356, "total_steps": 7532, "loss": 0.3375343978404999, "lr": 1.9092086902054996e-05, "epoch": 0.36011153897224807, "percentage": 18.0, "elapsed_time": "4:53:55", "remaining_time": "22:18:41"} +{"current_steps": 1357, "total_steps": 7532, "loss": 0.3056451082229614, "lr": 1.909025797145404e-05, "epoch": 0.360377107953791, "percentage": 18.02, "elapsed_time": "4:54:08", "remaining_time": "22:18:27"} +{"current_steps": 1358, "total_steps": 7532, "loss": 0.3063391447067261, "lr": 1.9088427288358556e-05, "epoch": 0.360642676935334, "percentage": 18.03, "elapsed_time": "4:54:21", "remaining_time": "22:18:17"} +{"current_steps": 1359, "total_steps": 7532, "loss": 0.3055405616760254, "lr": 1.908659485312148e-05, "epoch": 0.3609082459168769, "percentage": 18.04, "elapsed_time": "4:54:34", "remaining_time": "22:18:02"} +{"current_steps": 1360, "total_steps": 7532, "loss": 0.38323235511779785, "lr": 1.908476066609608e-05, "epoch": 0.3611738148984199, "percentage": 18.06, "elapsed_time": "4:54:47", "remaining_time": "22:17:48"} +{"current_steps": 1361, "total_steps": 7532, "loss": 0.33526092767715454, "lr": 1.908292472763597e-05, "epoch": 0.36143938387996283, "percentage": 18.07, "elapsed_time": "4:54:59", "remaining_time": "22:17:32"} +{"current_steps": 1362, "total_steps": 7532, "loss": 0.34485238790512085, "lr": 1.9081087038095094e-05, "epoch": 0.3617049528615058, "percentage": 18.08, "elapsed_time": "4:55:12", "remaining_time": "22:17:17"} +{"current_steps": 1363, "total_steps": 7532, "loss": 0.2963239252567291, "lr": 1.907924759782774e-05, "epoch": 0.36197052184304873, "percentage": 18.1, "elapsed_time": "4:55:24", "remaining_time": "22:17:00"} +{"current_steps": 1364, "total_steps": 7532, "loss": 0.3536864221096039, "lr": 1.9077406407188532e-05, "epoch": 0.3622360908245917, "percentage": 18.11, "elapsed_time": "4:55:36", "remaining_time": "22:16:44"} +{"current_steps": 1365, "total_steps": 7532, "loss": 0.3724798858165741, "lr": 1.907556346653242e-05, "epoch": 0.36250165980613464, "percentage": 18.12, "elapsed_time": "4:55:48", "remaining_time": "22:16:27"} +{"current_steps": 1366, "total_steps": 7532, "loss": 0.36241161823272705, "lr": 1.9073718776214717e-05, "epoch": 0.3627672287876776, "percentage": 18.14, "elapsed_time": "4:56:01", "remaining_time": "22:16:11"} +{"current_steps": 1367, "total_steps": 7532, "loss": 0.3484225273132324, "lr": 1.9071872336591042e-05, "epoch": 0.36303279776922054, "percentage": 18.15, "elapsed_time": "4:56:13", "remaining_time": "22:15:54"} +{"current_steps": 1368, "total_steps": 7532, "loss": 0.33606311678886414, "lr": 1.9070024148017375e-05, "epoch": 0.3632983667507635, "percentage": 18.16, "elapsed_time": "4:56:26", "remaining_time": "22:15:43"} +{"current_steps": 1369, "total_steps": 7532, "loss": 0.3263503909111023, "lr": 1.906817421085002e-05, "epoch": 0.36356393573230644, "percentage": 18.18, "elapsed_time": "4:56:38", "remaining_time": "22:15:27"} +{"current_steps": 1370, "total_steps": 7532, "loss": 0.33454492688179016, "lr": 1.906632252544563e-05, "epoch": 0.36382950471384945, "percentage": 18.19, "elapsed_time": "4:56:51", "remaining_time": "22:15:12"} +{"current_steps": 1371, "total_steps": 7532, "loss": 0.34858438372612, "lr": 1.9064469092161185e-05, "epoch": 0.3640950736953924, "percentage": 18.2, "elapsed_time": "4:57:04", "remaining_time": "22:15:01"} +{"current_steps": 1372, "total_steps": 7532, "loss": 0.3466234505176544, "lr": 1.9062613911354005e-05, "epoch": 0.36436064267693535, "percentage": 18.22, "elapsed_time": "4:57:18", "remaining_time": "22:14:49"} +{"current_steps": 1373, "total_steps": 7532, "loss": 0.33574312925338745, "lr": 1.9060756983381743e-05, "epoch": 0.3646262116584783, "percentage": 18.23, "elapsed_time": "4:57:31", "remaining_time": "22:14:39"} +{"current_steps": 1374, "total_steps": 7532, "loss": 0.3012363016605377, "lr": 1.90588983086024e-05, "epoch": 0.36489178064002126, "percentage": 18.24, "elapsed_time": "4:57:44", "remaining_time": "22:14:25"} +{"current_steps": 1375, "total_steps": 7532, "loss": 0.3050191402435303, "lr": 1.90570378873743e-05, "epoch": 0.3651573496215642, "percentage": 18.26, "elapsed_time": "4:57:57", "remaining_time": "22:14:11"} +{"current_steps": 1376, "total_steps": 7532, "loss": 0.35090070962905884, "lr": 1.905517572005611e-05, "epoch": 0.36542291860310716, "percentage": 18.27, "elapsed_time": "4:58:10", "remaining_time": "22:13:58"} +{"current_steps": 1377, "total_steps": 7532, "loss": 0.3276262581348419, "lr": 1.9053311807006845e-05, "epoch": 0.3656884875846501, "percentage": 18.28, "elapsed_time": "4:58:23", "remaining_time": "22:13:46"} +{"current_steps": 1378, "total_steps": 7532, "loss": 0.3303500711917877, "lr": 1.9051446148585833e-05, "epoch": 0.36595405656619306, "percentage": 18.3, "elapsed_time": "4:58:36", "remaining_time": "22:13:31"} +{"current_steps": 1379, "total_steps": 7532, "loss": 0.3748486042022705, "lr": 1.9049578745152754e-05, "epoch": 0.366219625547736, "percentage": 18.31, "elapsed_time": "4:58:48", "remaining_time": "22:13:17"} +{"current_steps": 1380, "total_steps": 7532, "loss": 0.30339744687080383, "lr": 1.9047709597067628e-05, "epoch": 0.36648519452927897, "percentage": 18.32, "elapsed_time": "4:59:01", "remaining_time": "22:13:02"} +{"current_steps": 1381, "total_steps": 7532, "loss": 0.31811147928237915, "lr": 1.9045838704690796e-05, "epoch": 0.3667507635108219, "percentage": 18.34, "elapsed_time": "4:59:13", "remaining_time": "22:12:45"} +{"current_steps": 1382, "total_steps": 7532, "loss": 0.3541119694709778, "lr": 1.9043966068382945e-05, "epoch": 0.36701633249236487, "percentage": 18.35, "elapsed_time": "4:59:26", "remaining_time": "22:12:30"} +{"current_steps": 1383, "total_steps": 7532, "loss": 0.36639657616615295, "lr": 1.9042091688505104e-05, "epoch": 0.3672819014739078, "percentage": 18.36, "elapsed_time": "4:59:38", "remaining_time": "22:12:13"} +{"current_steps": 1384, "total_steps": 7532, "loss": 0.35859787464141846, "lr": 1.9040215565418628e-05, "epoch": 0.36754747045545083, "percentage": 18.37, "elapsed_time": "4:59:50", "remaining_time": "22:11:57"} +{"current_steps": 1385, "total_steps": 7532, "loss": 0.3210521340370178, "lr": 1.9038337699485207e-05, "epoch": 0.3678130394369938, "percentage": 18.39, "elapsed_time": "5:00:02", "remaining_time": "22:11:41"} +{"current_steps": 1386, "total_steps": 7532, "loss": 0.3207433819770813, "lr": 1.9036458091066875e-05, "epoch": 0.36807860841853673, "percentage": 18.4, "elapsed_time": "5:00:15", "remaining_time": "22:11:26"} +{"current_steps": 1387, "total_steps": 7532, "loss": 0.3475082218647003, "lr": 1.9034576740526e-05, "epoch": 0.3683441774000797, "percentage": 18.41, "elapsed_time": "5:00:27", "remaining_time": "22:11:09"} +{"current_steps": 1388, "total_steps": 7532, "loss": 0.33252987265586853, "lr": 1.903269364822528e-05, "epoch": 0.36860974638162264, "percentage": 18.43, "elapsed_time": "5:00:39", "remaining_time": "22:10:54"} +{"current_steps": 1389, "total_steps": 7532, "loss": 0.32200103998184204, "lr": 1.903080881452776e-05, "epoch": 0.3688753153631656, "percentage": 18.44, "elapsed_time": "5:00:51", "remaining_time": "22:10:36"} +{"current_steps": 1390, "total_steps": 7532, "loss": 0.34780022501945496, "lr": 1.9028922239796803e-05, "epoch": 0.36914088434470854, "percentage": 18.45, "elapsed_time": "5:01:04", "remaining_time": "22:10:22"} +{"current_steps": 1391, "total_steps": 7532, "loss": 0.35411912202835083, "lr": 1.902703392439613e-05, "epoch": 0.3694064533262515, "percentage": 18.47, "elapsed_time": "5:01:17", "remaining_time": "22:10:06"} +{"current_steps": 1392, "total_steps": 7532, "loss": 0.35232803225517273, "lr": 1.9025143868689773e-05, "epoch": 0.36967202230779445, "percentage": 18.48, "elapsed_time": "5:01:29", "remaining_time": "22:09:50"} +{"current_steps": 1393, "total_steps": 7532, "loss": 0.38561391830444336, "lr": 1.9023252073042128e-05, "epoch": 0.3699375912893374, "percentage": 18.49, "elapsed_time": "5:01:42", "remaining_time": "22:09:38"} +{"current_steps": 1394, "total_steps": 7532, "loss": 0.3184170126914978, "lr": 1.9021358537817897e-05, "epoch": 0.37020316027088035, "percentage": 18.51, "elapsed_time": "5:01:55", "remaining_time": "22:09:23"} +{"current_steps": 1395, "total_steps": 7532, "loss": 0.32455068826675415, "lr": 1.9019463263382142e-05, "epoch": 0.3704687292524233, "percentage": 18.52, "elapsed_time": "5:02:07", "remaining_time": "22:09:09"} +{"current_steps": 1396, "total_steps": 7532, "loss": 0.32998934388160706, "lr": 1.901756625010024e-05, "epoch": 0.37073429823396625, "percentage": 18.53, "elapsed_time": "5:02:20", "remaining_time": "22:08:53"} +{"current_steps": 1397, "total_steps": 7532, "loss": 0.3361780643463135, "lr": 1.901566749833792e-05, "epoch": 0.3709998672155092, "percentage": 18.55, "elapsed_time": "5:02:33", "remaining_time": "22:08:41"} +{"current_steps": 1398, "total_steps": 7532, "loss": 0.3618711829185486, "lr": 1.9013767008461236e-05, "epoch": 0.37126543619705216, "percentage": 18.56, "elapsed_time": "5:02:45", "remaining_time": "22:08:26"} +{"current_steps": 1399, "total_steps": 7532, "loss": 0.3904131054878235, "lr": 1.901186478083658e-05, "epoch": 0.37153100517859516, "percentage": 18.57, "elapsed_time": "5:02:59", "remaining_time": "22:08:14"} +{"current_steps": 1400, "total_steps": 7532, "loss": 0.35742759704589844, "lr": 1.9009960815830676e-05, "epoch": 0.3717965741601381, "percentage": 18.59, "elapsed_time": "5:03:12", "remaining_time": "22:08:02"} +{"current_steps": 1401, "total_steps": 7532, "loss": 0.32880812883377075, "lr": 1.9008055113810595e-05, "epoch": 0.37206214314168107, "percentage": 18.6, "elapsed_time": "5:03:31", "remaining_time": "22:08:18"} +{"current_steps": 1402, "total_steps": 7532, "loss": 0.3379839360713959, "lr": 1.9006147675143724e-05, "epoch": 0.372327712123224, "percentage": 18.61, "elapsed_time": "5:03:44", "remaining_time": "22:08:02"} +{"current_steps": 1403, "total_steps": 7532, "loss": 0.3635789453983307, "lr": 1.90042385001978e-05, "epoch": 0.37259328110476697, "percentage": 18.63, "elapsed_time": "5:03:57", "remaining_time": "22:07:49"} +{"current_steps": 1404, "total_steps": 7532, "loss": 0.3462461233139038, "lr": 1.900232758934089e-05, "epoch": 0.3728588500863099, "percentage": 18.64, "elapsed_time": "5:04:09", "remaining_time": "22:07:31"} +{"current_steps": 1405, "total_steps": 7532, "loss": 0.34578579664230347, "lr": 1.900041494294139e-05, "epoch": 0.3731244190678529, "percentage": 18.65, "elapsed_time": "5:04:22", "remaining_time": "22:07:19"} +{"current_steps": 1406, "total_steps": 7532, "loss": 0.36266931891441345, "lr": 1.899850056136804e-05, "epoch": 0.3733899880493958, "percentage": 18.67, "elapsed_time": "5:04:35", "remaining_time": "22:07:05"} +{"current_steps": 1407, "total_steps": 7532, "loss": 0.34019365906715393, "lr": 1.899658444498991e-05, "epoch": 0.3736555570309388, "percentage": 18.68, "elapsed_time": "5:04:48", "remaining_time": "22:06:53"} +{"current_steps": 1408, "total_steps": 7532, "loss": 0.3057953119277954, "lr": 1.8994666594176404e-05, "epoch": 0.37392112601248173, "percentage": 18.69, "elapsed_time": "5:05:01", "remaining_time": "22:06:39"} +{"current_steps": 1409, "total_steps": 7532, "loss": 0.3663131892681122, "lr": 1.8992747009297265e-05, "epoch": 0.3741866949940247, "percentage": 18.71, "elapsed_time": "5:05:13", "remaining_time": "22:06:25"} +{"current_steps": 1410, "total_steps": 7532, "loss": 0.3402065634727478, "lr": 1.8990825690722557e-05, "epoch": 0.37445226397556763, "percentage": 18.72, "elapsed_time": "5:05:27", "remaining_time": "22:06:15"} +{"current_steps": 1411, "total_steps": 7532, "loss": 0.3437868654727936, "lr": 1.8988902638822693e-05, "epoch": 0.3747178329571106, "percentage": 18.73, "elapsed_time": "5:05:40", "remaining_time": "22:06:02"} +{"current_steps": 1412, "total_steps": 7532, "loss": 0.40972524881362915, "lr": 1.8986977853968416e-05, "epoch": 0.37498340193865354, "percentage": 18.75, "elapsed_time": "5:05:53", "remaining_time": "22:05:49"} +{"current_steps": 1413, "total_steps": 7532, "loss": 0.3237977921962738, "lr": 1.89850513365308e-05, "epoch": 0.37524897092019655, "percentage": 18.76, "elapsed_time": "5:06:06", "remaining_time": "22:05:35"} +{"current_steps": 1414, "total_steps": 7532, "loss": 0.3146173357963562, "lr": 1.8983123086881254e-05, "epoch": 0.3755145399017395, "percentage": 18.77, "elapsed_time": "5:06:19", "remaining_time": "22:05:25"} +{"current_steps": 1415, "total_steps": 7532, "loss": 0.33485543727874756, "lr": 1.8981193105391524e-05, "epoch": 0.37578010888328245, "percentage": 18.79, "elapsed_time": "5:06:32", "remaining_time": "22:05:10"} +{"current_steps": 1416, "total_steps": 7532, "loss": 0.36379897594451904, "lr": 1.8979261392433685e-05, "epoch": 0.3760456778648254, "percentage": 18.8, "elapsed_time": "5:06:45", "remaining_time": "22:04:59"} +{"current_steps": 1417, "total_steps": 7532, "loss": 0.2737882137298584, "lr": 1.8977327948380154e-05, "epoch": 0.37631124684636835, "percentage": 18.81, "elapsed_time": "5:06:59", "remaining_time": "22:04:46"} +{"current_steps": 1418, "total_steps": 7532, "loss": 0.3554575443267822, "lr": 1.897539277360367e-05, "epoch": 0.3765768158279113, "percentage": 18.83, "elapsed_time": "5:07:12", "remaining_time": "22:04:34"} +{"current_steps": 1419, "total_steps": 7532, "loss": 0.3297621011734009, "lr": 1.897345586847731e-05, "epoch": 0.37684238480945426, "percentage": 18.84, "elapsed_time": "5:07:25", "remaining_time": "22:04:21"} +{"current_steps": 1420, "total_steps": 7532, "loss": 0.32272985577583313, "lr": 1.8971517233374497e-05, "epoch": 0.3771079537909972, "percentage": 18.85, "elapsed_time": "5:07:38", "remaining_time": "22:04:10"} +{"current_steps": 1421, "total_steps": 7532, "loss": 0.32175642251968384, "lr": 1.8969576868668967e-05, "epoch": 0.37737352277254016, "percentage": 18.87, "elapsed_time": "5:07:51", "remaining_time": "22:03:56"} +{"current_steps": 1422, "total_steps": 7532, "loss": 0.35973137617111206, "lr": 1.8967634774734807e-05, "epoch": 0.3776390917540831, "percentage": 18.88, "elapsed_time": "5:08:04", "remaining_time": "22:03:43"} +{"current_steps": 1423, "total_steps": 7532, "loss": 0.3385169506072998, "lr": 1.8965690951946424e-05, "epoch": 0.37790466073562606, "percentage": 18.89, "elapsed_time": "5:08:17", "remaining_time": "22:03:29"} +{"current_steps": 1424, "total_steps": 7532, "loss": 0.3683067560195923, "lr": 1.8963745400678564e-05, "epoch": 0.378170229717169, "percentage": 18.91, "elapsed_time": "5:08:29", "remaining_time": "22:03:14"} +{"current_steps": 1425, "total_steps": 7532, "loss": 0.3711622357368469, "lr": 1.896179812130631e-05, "epoch": 0.37843579869871197, "percentage": 18.92, "elapsed_time": "5:08:42", "remaining_time": "22:03:00"} +{"current_steps": 1426, "total_steps": 7532, "loss": 0.30416572093963623, "lr": 1.895984911420507e-05, "epoch": 0.3787013676802549, "percentage": 18.93, "elapsed_time": "5:08:54", "remaining_time": "22:02:44"} +{"current_steps": 1427, "total_steps": 7532, "loss": 0.3439522385597229, "lr": 1.8957898379750598e-05, "epoch": 0.3789669366617979, "percentage": 18.95, "elapsed_time": "5:09:07", "remaining_time": "22:02:29"} +{"current_steps": 1428, "total_steps": 7532, "loss": 0.3663806617259979, "lr": 1.895594591831896e-05, "epoch": 0.3792325056433409, "percentage": 18.96, "elapsed_time": "5:09:20", "remaining_time": "22:02:15"} +{"current_steps": 1429, "total_steps": 7532, "loss": 0.32132354378700256, "lr": 1.895399173028658e-05, "epoch": 0.37949807462488383, "percentage": 18.97, "elapsed_time": "5:09:33", "remaining_time": "22:02:02"} +{"current_steps": 1430, "total_steps": 7532, "loss": 0.3040635585784912, "lr": 1.8952035816030196e-05, "epoch": 0.3797636436064268, "percentage": 18.99, "elapsed_time": "5:09:46", "remaining_time": "22:01:49"} +{"current_steps": 1431, "total_steps": 7532, "loss": 0.3548869788646698, "lr": 1.8950078175926886e-05, "epoch": 0.38002921258796973, "percentage": 19.0, "elapsed_time": "5:09:59", "remaining_time": "22:01:38"} +{"current_steps": 1432, "total_steps": 7532, "loss": 0.3114319443702698, "lr": 1.894811881035406e-05, "epoch": 0.3802947815695127, "percentage": 19.01, "elapsed_time": "5:10:12", "remaining_time": "22:01:24"} +{"current_steps": 1433, "total_steps": 7532, "loss": 0.3589673936367035, "lr": 1.894615771968946e-05, "epoch": 0.38056035055105564, "percentage": 19.03, "elapsed_time": "5:10:25", "remaining_time": "22:01:13"} +{"current_steps": 1434, "total_steps": 7532, "loss": 0.3073863983154297, "lr": 1.894419490431116e-05, "epoch": 0.3808259195325986, "percentage": 19.04, "elapsed_time": "5:10:38", "remaining_time": "22:00:59"} +{"current_steps": 1435, "total_steps": 7532, "loss": 0.32474076747894287, "lr": 1.8942230364597572e-05, "epoch": 0.38109148851414154, "percentage": 19.05, "elapsed_time": "5:10:52", "remaining_time": "22:00:48"} +{"current_steps": 1436, "total_steps": 7532, "loss": 0.3363546133041382, "lr": 1.8940264100927432e-05, "epoch": 0.3813570574956845, "percentage": 19.07, "elapsed_time": "5:11:05", "remaining_time": "22:00:36"} +{"current_steps": 1437, "total_steps": 7532, "loss": 0.33679312467575073, "lr": 1.8938296113679814e-05, "epoch": 0.38162262647722744, "percentage": 19.08, "elapsed_time": "5:11:18", "remaining_time": "22:00:26"} +{"current_steps": 1438, "total_steps": 7532, "loss": 0.33171382546424866, "lr": 1.8936326403234125e-05, "epoch": 0.3818881954587704, "percentage": 19.09, "elapsed_time": "5:11:31", "remaining_time": "22:00:12"} +{"current_steps": 1439, "total_steps": 7532, "loss": 0.3717402219772339, "lr": 1.8934354969970097e-05, "epoch": 0.38215376444031335, "percentage": 19.11, "elapsed_time": "5:11:44", "remaining_time": "21:59:58"} +{"current_steps": 1440, "total_steps": 7532, "loss": 0.335337370634079, "lr": 1.8932381814267802e-05, "epoch": 0.3824193334218563, "percentage": 19.12, "elapsed_time": "5:11:57", "remaining_time": "21:59:44"} +{"current_steps": 1441, "total_steps": 7532, "loss": 0.32745444774627686, "lr": 1.893040693650764e-05, "epoch": 0.3826849024033993, "percentage": 19.13, "elapsed_time": "5:12:10", "remaining_time": "21:59:31"} +{"current_steps": 1442, "total_steps": 7532, "loss": 0.34863507747650146, "lr": 1.892843033707035e-05, "epoch": 0.38295047138494226, "percentage": 19.14, "elapsed_time": "5:12:23", "remaining_time": "21:59:20"} +{"current_steps": 1443, "total_steps": 7532, "loss": 0.3428313732147217, "lr": 1.8926452016336987e-05, "epoch": 0.3832160403664852, "percentage": 19.16, "elapsed_time": "5:12:36", "remaining_time": "21:59:05"} +{"current_steps": 1444, "total_steps": 7532, "loss": 0.3223801851272583, "lr": 1.8924471974688956e-05, "epoch": 0.38348160934802816, "percentage": 19.17, "elapsed_time": "5:12:49", "remaining_time": "21:58:55"} +{"current_steps": 1445, "total_steps": 7532, "loss": 0.33248746395111084, "lr": 1.8922490212507983e-05, "epoch": 0.3837471783295711, "percentage": 19.18, "elapsed_time": "5:13:02", "remaining_time": "21:58:39"} +{"current_steps": 1446, "total_steps": 7532, "loss": 0.3472076654434204, "lr": 1.8920506730176125e-05, "epoch": 0.38401274731111407, "percentage": 19.2, "elapsed_time": "5:13:14", "remaining_time": "21:58:24"} +{"current_steps": 1447, "total_steps": 7532, "loss": 0.4385136365890503, "lr": 1.891852152807578e-05, "epoch": 0.384278316292657, "percentage": 19.21, "elapsed_time": "5:13:27", "remaining_time": "21:58:09"} +{"current_steps": 1448, "total_steps": 7532, "loss": 0.36871540546417236, "lr": 1.8916534606589666e-05, "epoch": 0.38454388527419997, "percentage": 19.22, "elapsed_time": "5:13:39", "remaining_time": "21:57:54"} +{"current_steps": 1449, "total_steps": 7532, "loss": 0.3136710524559021, "lr": 1.8914545966100843e-05, "epoch": 0.3848094542557429, "percentage": 19.24, "elapsed_time": "5:13:52", "remaining_time": "21:57:38"} +{"current_steps": 1450, "total_steps": 7532, "loss": 0.3236457109451294, "lr": 1.891255560699269e-05, "epoch": 0.3850750232372859, "percentage": 19.25, "elapsed_time": "5:14:04", "remaining_time": "21:57:23"} +{"current_steps": 1451, "total_steps": 7532, "loss": 0.3176822066307068, "lr": 1.8910563529648933e-05, "epoch": 0.3853405922188288, "percentage": 19.26, "elapsed_time": "5:14:17", "remaining_time": "21:57:08"} +{"current_steps": 1452, "total_steps": 7532, "loss": 0.3531719744205475, "lr": 1.890856973445362e-05, "epoch": 0.3856061612003718, "percentage": 19.28, "elapsed_time": "5:14:29", "remaining_time": "21:56:51"} +{"current_steps": 1453, "total_steps": 7532, "loss": 0.2911416292190552, "lr": 1.8906574221791127e-05, "epoch": 0.38587173018191473, "percentage": 19.29, "elapsed_time": "5:14:41", "remaining_time": "21:56:37"} +{"current_steps": 1454, "total_steps": 7532, "loss": 0.3522392511367798, "lr": 1.890457699204617e-05, "epoch": 0.3861372991634577, "percentage": 19.3, "elapsed_time": "5:14:54", "remaining_time": "21:56:21"} +{"current_steps": 1455, "total_steps": 7532, "loss": 0.3724471628665924, "lr": 1.8902578045603787e-05, "epoch": 0.3864028681450007, "percentage": 19.32, "elapsed_time": "5:15:06", "remaining_time": "21:56:06"} +{"current_steps": 1456, "total_steps": 7532, "loss": 0.2935449481010437, "lr": 1.890057738284935e-05, "epoch": 0.38666843712654364, "percentage": 19.33, "elapsed_time": "5:15:19", "remaining_time": "21:55:50"} +{"current_steps": 1457, "total_steps": 7532, "loss": 0.3413137197494507, "lr": 1.8898575004168568e-05, "epoch": 0.3869340061080866, "percentage": 19.34, "elapsed_time": "5:15:31", "remaining_time": "21:55:36"} +{"current_steps": 1458, "total_steps": 7532, "loss": 0.32282277941703796, "lr": 1.8896570909947477e-05, "epoch": 0.38719957508962954, "percentage": 19.36, "elapsed_time": "5:15:44", "remaining_time": "21:55:20"} +{"current_steps": 1459, "total_steps": 7532, "loss": 0.3285476565361023, "lr": 1.8894565100572435e-05, "epoch": 0.3874651440711725, "percentage": 19.37, "elapsed_time": "5:15:56", "remaining_time": "21:55:06"} +{"current_steps": 1460, "total_steps": 7532, "loss": 0.29517480731010437, "lr": 1.8892557576430147e-05, "epoch": 0.38773071305271545, "percentage": 19.38, "elapsed_time": "5:16:08", "remaining_time": "21:54:49"} +{"current_steps": 1461, "total_steps": 7532, "loss": 0.2913149297237396, "lr": 1.8890548337907636e-05, "epoch": 0.3879962820342584, "percentage": 19.4, "elapsed_time": "5:16:21", "remaining_time": "21:54:36"} +{"current_steps": 1462, "total_steps": 7532, "loss": 0.32154160737991333, "lr": 1.8888537385392258e-05, "epoch": 0.38826185101580135, "percentage": 19.41, "elapsed_time": "5:16:33", "remaining_time": "21:54:20"} +{"current_steps": 1463, "total_steps": 7532, "loss": 0.30677905678749084, "lr": 1.88865247192717e-05, "epoch": 0.3885274199973443, "percentage": 19.42, "elapsed_time": "5:16:46", "remaining_time": "21:54:03"} +{"current_steps": 1464, "total_steps": 7532, "loss": 0.37568169832229614, "lr": 1.888451033993399e-05, "epoch": 0.38879298897888726, "percentage": 19.44, "elapsed_time": "5:16:58", "remaining_time": "21:53:48"} +{"current_steps": 1465, "total_steps": 7532, "loss": 0.34972083568573, "lr": 1.8882494247767465e-05, "epoch": 0.3890585579604302, "percentage": 19.45, "elapsed_time": "5:17:11", "remaining_time": "21:53:33"} +{"current_steps": 1466, "total_steps": 7532, "loss": 0.3198736906051636, "lr": 1.888047644316081e-05, "epoch": 0.38932412694197316, "percentage": 19.46, "elapsed_time": "5:17:23", "remaining_time": "21:53:19"} +{"current_steps": 1467, "total_steps": 7532, "loss": 0.3405846953392029, "lr": 1.887845692650303e-05, "epoch": 0.3895896959235161, "percentage": 19.48, "elapsed_time": "5:17:36", "remaining_time": "21:53:04"} +{"current_steps": 1468, "total_steps": 7532, "loss": 0.3600257337093353, "lr": 1.8876435698183465e-05, "epoch": 0.38985526490505906, "percentage": 19.49, "elapsed_time": "5:17:48", "remaining_time": "21:52:49"} +{"current_steps": 1469, "total_steps": 7532, "loss": 0.32415103912353516, "lr": 1.887441275859179e-05, "epoch": 0.39012083388660207, "percentage": 19.5, "elapsed_time": "5:18:01", "remaining_time": "21:52:33"} +{"current_steps": 1470, "total_steps": 7532, "loss": 0.3450891673564911, "lr": 1.8872388108117995e-05, "epoch": 0.390386402868145, "percentage": 19.52, "elapsed_time": "5:18:13", "remaining_time": "21:52:18"} +{"current_steps": 1471, "total_steps": 7532, "loss": 0.3210057318210602, "lr": 1.8870361747152416e-05, "epoch": 0.390651971849688, "percentage": 19.53, "elapsed_time": "5:18:25", "remaining_time": "21:52:02"} +{"current_steps": 1472, "total_steps": 7532, "loss": 0.3615706264972687, "lr": 1.8868333676085707e-05, "epoch": 0.3909175408312309, "percentage": 19.54, "elapsed_time": "5:18:38", "remaining_time": "21:51:47"} +{"current_steps": 1473, "total_steps": 7532, "loss": 0.34149813652038574, "lr": 1.8866303895308856e-05, "epoch": 0.3911831098127739, "percentage": 19.56, "elapsed_time": "5:18:50", "remaining_time": "21:51:32"} +{"current_steps": 1474, "total_steps": 7532, "loss": 0.2795295715332031, "lr": 1.8864272405213188e-05, "epoch": 0.39144867879431683, "percentage": 19.57, "elapsed_time": "5:19:03", "remaining_time": "21:51:17"} +{"current_steps": 1475, "total_steps": 7532, "loss": 0.3459053933620453, "lr": 1.8862239206190337e-05, "epoch": 0.3917142477758598, "percentage": 19.58, "elapsed_time": "5:19:15", "remaining_time": "21:51:01"} +{"current_steps": 1476, "total_steps": 7532, "loss": 0.3531072735786438, "lr": 1.8860204298632294e-05, "epoch": 0.39197981675740273, "percentage": 19.6, "elapsed_time": "5:19:28", "remaining_time": "21:50:49"} +{"current_steps": 1477, "total_steps": 7532, "loss": 0.3788977265357971, "lr": 1.8858167682931357e-05, "epoch": 0.3922453857389457, "percentage": 19.61, "elapsed_time": "5:19:41", "remaining_time": "21:50:33"} +{"current_steps": 1478, "total_steps": 7532, "loss": 0.3210671544075012, "lr": 1.8856129359480163e-05, "epoch": 0.39251095472048864, "percentage": 19.62, "elapsed_time": "5:19:54", "remaining_time": "21:50:21"} +{"current_steps": 1479, "total_steps": 7532, "loss": 0.3442102074623108, "lr": 1.8854089328671673e-05, "epoch": 0.3927765237020316, "percentage": 19.64, "elapsed_time": "5:20:06", "remaining_time": "21:50:05"} +{"current_steps": 1480, "total_steps": 7532, "loss": 0.29128211736679077, "lr": 1.885204759089919e-05, "epoch": 0.39304209268357454, "percentage": 19.65, "elapsed_time": "5:20:19", "remaining_time": "21:49:51"} +{"current_steps": 1481, "total_steps": 7532, "loss": 0.3601154088973999, "lr": 1.885000414655633e-05, "epoch": 0.3933076616651175, "percentage": 19.66, "elapsed_time": "5:20:32", "remaining_time": "21:49:38"} +{"current_steps": 1482, "total_steps": 7532, "loss": 0.3173052668571472, "lr": 1.8847958996037042e-05, "epoch": 0.39357323064666044, "percentage": 19.68, "elapsed_time": "5:20:44", "remaining_time": "21:49:21"} +{"current_steps": 1483, "total_steps": 7532, "loss": 0.32759106159210205, "lr": 1.8845912139735616e-05, "epoch": 0.39383879962820345, "percentage": 19.69, "elapsed_time": "5:20:57", "remaining_time": "21:49:07"} +{"current_steps": 1484, "total_steps": 7532, "loss": 0.3213586211204529, "lr": 1.8843863578046657e-05, "epoch": 0.3941043686097464, "percentage": 19.7, "elapsed_time": "5:21:09", "remaining_time": "21:48:51"} +{"current_steps": 1485, "total_steps": 7532, "loss": 0.342970073223114, "lr": 1.8841813311365105e-05, "epoch": 0.39436993759128935, "percentage": 19.72, "elapsed_time": "5:21:22", "remaining_time": "21:48:37"} +{"current_steps": 1486, "total_steps": 7532, "loss": 0.3852401375770569, "lr": 1.883976134008622e-05, "epoch": 0.3946355065728323, "percentage": 19.73, "elapsed_time": "5:21:34", "remaining_time": "21:48:22"} +{"current_steps": 1487, "total_steps": 7532, "loss": 0.2965390682220459, "lr": 1.883770766460561e-05, "epoch": 0.39490107555437526, "percentage": 19.74, "elapsed_time": "5:21:47", "remaining_time": "21:48:07"} +{"current_steps": 1488, "total_steps": 7532, "loss": 0.3899655044078827, "lr": 1.883565228531919e-05, "epoch": 0.3951666445359182, "percentage": 19.76, "elapsed_time": "5:21:58", "remaining_time": "21:47:50"} +{"current_steps": 1489, "total_steps": 7532, "loss": 0.339199423789978, "lr": 1.8833595202623222e-05, "epoch": 0.39543221351746116, "percentage": 19.77, "elapsed_time": "5:22:11", "remaining_time": "21:47:33"} +{"current_steps": 1490, "total_steps": 7532, "loss": 0.3121682405471802, "lr": 1.8831536416914278e-05, "epoch": 0.3956977824990041, "percentage": 19.78, "elapsed_time": "5:22:23", "remaining_time": "21:47:16"} +{"current_steps": 1491, "total_steps": 7532, "loss": 0.31947991251945496, "lr": 1.8829475928589272e-05, "epoch": 0.39596335148054707, "percentage": 19.8, "elapsed_time": "5:22:35", "remaining_time": "21:47:00"} +{"current_steps": 1492, "total_steps": 7532, "loss": 0.3569333553314209, "lr": 1.882741373804544e-05, "epoch": 0.39622892046209, "percentage": 19.81, "elapsed_time": "5:22:46", "remaining_time": "21:46:42"} +{"current_steps": 1493, "total_steps": 7532, "loss": 0.3739020526409149, "lr": 1.882534984568035e-05, "epoch": 0.39649448944363297, "percentage": 19.82, "elapsed_time": "5:22:58", "remaining_time": "21:46:24"} +{"current_steps": 1494, "total_steps": 7532, "loss": 0.34350353479385376, "lr": 1.882328425189189e-05, "epoch": 0.3967600584251759, "percentage": 19.84, "elapsed_time": "5:23:11", "remaining_time": "21:46:10"} +{"current_steps": 1495, "total_steps": 7532, "loss": 0.3103981614112854, "lr": 1.882121695707829e-05, "epoch": 0.3970256274067189, "percentage": 19.85, "elapsed_time": "5:23:23", "remaining_time": "21:45:55"} +{"current_steps": 1496, "total_steps": 7532, "loss": 0.33847716450691223, "lr": 1.8819147961638104e-05, "epoch": 0.3972911963882618, "percentage": 19.86, "elapsed_time": "5:23:36", "remaining_time": "21:45:41"} +{"current_steps": 1497, "total_steps": 7532, "loss": 0.3080996870994568, "lr": 1.8817077265970196e-05, "epoch": 0.39755676536980483, "percentage": 19.88, "elapsed_time": "5:23:48", "remaining_time": "21:45:25"} +{"current_steps": 1498, "total_steps": 7532, "loss": 0.3247831463813782, "lr": 1.8815004870473777e-05, "epoch": 0.3978223343513478, "percentage": 19.89, "elapsed_time": "5:24:01", "remaining_time": "21:45:11"} +{"current_steps": 1499, "total_steps": 7532, "loss": 0.2919698655605316, "lr": 1.8812930775548387e-05, "epoch": 0.39808790333289074, "percentage": 19.9, "elapsed_time": "5:24:13", "remaining_time": "21:44:55"} +{"current_steps": 1500, "total_steps": 7532, "loss": 0.3627319931983948, "lr": 1.8810854981593883e-05, "epoch": 0.3983534723144337, "percentage": 19.92, "elapsed_time": "5:24:26", "remaining_time": "21:44:41"} +{"current_steps": 1501, "total_steps": 7532, "loss": 0.3619319796562195, "lr": 1.880877748901045e-05, "epoch": 0.39861904129597664, "percentage": 19.93, "elapsed_time": "5:24:43", "remaining_time": "21:44:46"} +{"current_steps": 1502, "total_steps": 7532, "loss": 0.3393789827823639, "lr": 1.8806698298198608e-05, "epoch": 0.3988846102775196, "percentage": 19.94, "elapsed_time": "5:24:56", "remaining_time": "21:44:32"} +{"current_steps": 1503, "total_steps": 7532, "loss": 0.3736116886138916, "lr": 1.88046174095592e-05, "epoch": 0.39915017925906254, "percentage": 19.95, "elapsed_time": "5:25:09", "remaining_time": "21:44:17"} +{"current_steps": 1504, "total_steps": 7532, "loss": 0.32829388976097107, "lr": 1.8802534823493395e-05, "epoch": 0.3994157482406055, "percentage": 19.97, "elapsed_time": "5:25:22", "remaining_time": "21:44:07"} +{"current_steps": 1505, "total_steps": 7532, "loss": 0.340041846036911, "lr": 1.8800450540402694e-05, "epoch": 0.39968131722214845, "percentage": 19.98, "elapsed_time": "5:25:35", "remaining_time": "21:43:54"} +{"current_steps": 1506, "total_steps": 7532, "loss": 0.2830736041069031, "lr": 1.8798364560688917e-05, "epoch": 0.3999468862036914, "percentage": 19.99, "elapsed_time": "5:25:48", "remaining_time": "21:43:41"} +{"current_steps": 1507, "total_steps": 7532, "loss": 0.33011579513549805, "lr": 1.8796276884754224e-05, "epoch": 0.40021245518523435, "percentage": 20.01, "elapsed_time": "5:26:01", "remaining_time": "21:43:28"} +{"current_steps": 1508, "total_steps": 7532, "loss": 0.2893834114074707, "lr": 1.8794187513001088e-05, "epoch": 0.4004780241667773, "percentage": 20.02, "elapsed_time": "5:26:15", "remaining_time": "21:43:19"} +{"current_steps": 1509, "total_steps": 7532, "loss": 0.3590015172958374, "lr": 1.8792096445832317e-05, "epoch": 0.40074359314832025, "percentage": 20.03, "elapsed_time": "5:26:29", "remaining_time": "21:43:07"} +{"current_steps": 1510, "total_steps": 7532, "loss": 0.3968508541584015, "lr": 1.8790003683651045e-05, "epoch": 0.4010091621298632, "percentage": 20.05, "elapsed_time": "5:26:42", "remaining_time": "21:42:57"} +{"current_steps": 1511, "total_steps": 7532, "loss": 0.324398934841156, "lr": 1.878790922686073e-05, "epoch": 0.4012747311114062, "percentage": 20.06, "elapsed_time": "5:26:55", "remaining_time": "21:42:45"} +{"current_steps": 1512, "total_steps": 7532, "loss": 0.35111895203590393, "lr": 1.8785813075865164e-05, "epoch": 0.40154030009294917, "percentage": 20.07, "elapsed_time": "5:27:09", "remaining_time": "21:42:33"} +{"current_steps": 1513, "total_steps": 7532, "loss": 0.28124356269836426, "lr": 1.8783715231068452e-05, "epoch": 0.4018058690744921, "percentage": 20.09, "elapsed_time": "5:27:22", "remaining_time": "21:42:22"} +{"current_steps": 1514, "total_steps": 7532, "loss": 0.28962311148643494, "lr": 1.878161569287504e-05, "epoch": 0.40207143805603507, "percentage": 20.1, "elapsed_time": "5:27:36", "remaining_time": "21:42:10"} +{"current_steps": 1515, "total_steps": 7532, "loss": 0.3646606206893921, "lr": 1.877951446168969e-05, "epoch": 0.402337007037578, "percentage": 20.11, "elapsed_time": "5:27:49", "remaining_time": "21:42:00"} +{"current_steps": 1516, "total_steps": 7532, "loss": 0.2815355360507965, "lr": 1.8777411537917497e-05, "epoch": 0.402602576019121, "percentage": 20.13, "elapsed_time": "5:28:02", "remaining_time": "21:41:47"} +{"current_steps": 1517, "total_steps": 7532, "loss": 0.33208370208740234, "lr": 1.877530692196388e-05, "epoch": 0.4028681450006639, "percentage": 20.14, "elapsed_time": "5:28:16", "remaining_time": "21:41:37"} +{"current_steps": 1518, "total_steps": 7532, "loss": 0.33741289377212524, "lr": 1.8773200614234587e-05, "epoch": 0.4031337139822069, "percentage": 20.15, "elapsed_time": "5:28:29", "remaining_time": "21:41:24"} +{"current_steps": 1519, "total_steps": 7532, "loss": 0.31304073333740234, "lr": 1.877109261513568e-05, "epoch": 0.40339928296374983, "percentage": 20.17, "elapsed_time": "5:28:42", "remaining_time": "21:41:13"} +{"current_steps": 1520, "total_steps": 7532, "loss": 0.32556387782096863, "lr": 1.8768982925073566e-05, "epoch": 0.4036648519452928, "percentage": 20.18, "elapsed_time": "5:28:55", "remaining_time": "21:40:58"} +{"current_steps": 1521, "total_steps": 7532, "loss": 0.3584224581718445, "lr": 1.8766871544454963e-05, "epoch": 0.40393042092683573, "percentage": 20.19, "elapsed_time": "5:29:09", "remaining_time": "21:40:48"} +{"current_steps": 1522, "total_steps": 7532, "loss": 0.2864416837692261, "lr": 1.8764758473686918e-05, "epoch": 0.4041959899083787, "percentage": 20.21, "elapsed_time": "5:29:22", "remaining_time": "21:40:36"} +{"current_steps": 1523, "total_steps": 7532, "loss": 0.28925320506095886, "lr": 1.8762643713176815e-05, "epoch": 0.40446155888992164, "percentage": 20.22, "elapsed_time": "5:29:35", "remaining_time": "21:40:25"} +{"current_steps": 1524, "total_steps": 7532, "loss": 0.30940550565719604, "lr": 1.876052726333235e-05, "epoch": 0.4047271278714646, "percentage": 20.23, "elapsed_time": "5:29:49", "remaining_time": "21:40:14"} +{"current_steps": 1525, "total_steps": 7532, "loss": 0.3463154733181, "lr": 1.875840912456155e-05, "epoch": 0.4049926968530076, "percentage": 20.25, "elapsed_time": "5:30:02", "remaining_time": "21:40:00"} +{"current_steps": 1526, "total_steps": 7532, "loss": 0.3349658250808716, "lr": 1.8756289297272764e-05, "epoch": 0.40525826583455055, "percentage": 20.26, "elapsed_time": "5:30:15", "remaining_time": "21:39:47"} +{"current_steps": 1527, "total_steps": 7532, "loss": 0.32588714361190796, "lr": 1.8754167781874674e-05, "epoch": 0.4055238348160935, "percentage": 20.27, "elapsed_time": "5:30:28", "remaining_time": "21:39:35"} +{"current_steps": 1528, "total_steps": 7532, "loss": 0.33787310123443604, "lr": 1.875204457877628e-05, "epoch": 0.40578940379763645, "percentage": 20.29, "elapsed_time": "5:30:41", "remaining_time": "21:39:23"} +{"current_steps": 1529, "total_steps": 7532, "loss": 0.3223261833190918, "lr": 1.8749919688386912e-05, "epoch": 0.4060549727791794, "percentage": 20.3, "elapsed_time": "5:30:54", "remaining_time": "21:39:09"} +{"current_steps": 1530, "total_steps": 7532, "loss": 0.38505882024765015, "lr": 1.8747793111116226e-05, "epoch": 0.40632054176072235, "percentage": 20.31, "elapsed_time": "5:31:06", "remaining_time": "21:38:55"} +{"current_steps": 1531, "total_steps": 7532, "loss": 0.33071833848953247, "lr": 1.8745664847374197e-05, "epoch": 0.4065861107422653, "percentage": 20.33, "elapsed_time": "5:31:19", "remaining_time": "21:38:41"} +{"current_steps": 1532, "total_steps": 7532, "loss": 0.36603987216949463, "lr": 1.874353489757113e-05, "epoch": 0.40685167972380826, "percentage": 20.34, "elapsed_time": "5:31:32", "remaining_time": "21:38:29"} +{"current_steps": 1533, "total_steps": 7532, "loss": 0.3103085160255432, "lr": 1.874140326211766e-05, "epoch": 0.4071172487053512, "percentage": 20.35, "elapsed_time": "5:31:45", "remaining_time": "21:38:14"} +{"current_steps": 1534, "total_steps": 7532, "loss": 0.3471127152442932, "lr": 1.873926994142473e-05, "epoch": 0.40738281768689416, "percentage": 20.37, "elapsed_time": "5:31:57", "remaining_time": "21:37:59"} +{"current_steps": 1535, "total_steps": 7532, "loss": 0.33152899146080017, "lr": 1.873713493590363e-05, "epoch": 0.4076483866684371, "percentage": 20.38, "elapsed_time": "5:32:10", "remaining_time": "21:37:45"} +{"current_steps": 1536, "total_steps": 7532, "loss": 0.340177059173584, "lr": 1.8734998245965958e-05, "epoch": 0.40791395564998006, "percentage": 20.39, "elapsed_time": "5:32:23", "remaining_time": "21:37:32"} +{"current_steps": 1537, "total_steps": 7532, "loss": 0.3331618010997772, "lr": 1.8732859872023644e-05, "epoch": 0.408179524631523, "percentage": 20.41, "elapsed_time": "5:32:36", "remaining_time": "21:37:20"} +{"current_steps": 1538, "total_steps": 7532, "loss": 0.3911997675895691, "lr": 1.8730719814488937e-05, "epoch": 0.40844509361306597, "percentage": 20.42, "elapsed_time": "5:32:49", "remaining_time": "21:37:06"} +{"current_steps": 1539, "total_steps": 7532, "loss": 0.3699817955493927, "lr": 1.8728578073774427e-05, "epoch": 0.408710662594609, "percentage": 20.43, "elapsed_time": "5:33:02", "remaining_time": "21:36:54"} +{"current_steps": 1540, "total_steps": 7532, "loss": 0.31567275524139404, "lr": 1.8726434650293e-05, "epoch": 0.4089762315761519, "percentage": 20.45, "elapsed_time": "5:33:15", "remaining_time": "21:36:40"} +{"current_steps": 1541, "total_steps": 7532, "loss": 0.3387305438518524, "lr": 1.8724289544457897e-05, "epoch": 0.4092418005576949, "percentage": 20.46, "elapsed_time": "5:33:29", "remaining_time": "21:36:30"} +{"current_steps": 1542, "total_steps": 7532, "loss": 0.3460234999656677, "lr": 1.8722142756682663e-05, "epoch": 0.40950736953923783, "percentage": 20.47, "elapsed_time": "5:33:41", "remaining_time": "21:36:16"} +{"current_steps": 1543, "total_steps": 7532, "loss": 0.35653382539749146, "lr": 1.8719994287381173e-05, "epoch": 0.4097729385207808, "percentage": 20.49, "elapsed_time": "5:33:55", "remaining_time": "21:36:03"} +{"current_steps": 1544, "total_steps": 7532, "loss": 0.3828277885913849, "lr": 1.8717844136967626e-05, "epoch": 0.41003850750232373, "percentage": 20.5, "elapsed_time": "5:34:07", "remaining_time": "21:35:48"} +{"current_steps": 1545, "total_steps": 7532, "loss": 0.35883858799934387, "lr": 1.871569230585655e-05, "epoch": 0.4103040764838667, "percentage": 20.51, "elapsed_time": "5:34:19", "remaining_time": "21:35:33"} +{"current_steps": 1546, "total_steps": 7532, "loss": 0.27414464950561523, "lr": 1.8713538794462783e-05, "epoch": 0.41056964546540964, "percentage": 20.53, "elapsed_time": "5:34:32", "remaining_time": "21:35:19"} +{"current_steps": 1547, "total_steps": 7532, "loss": 0.2924337387084961, "lr": 1.871138360320151e-05, "epoch": 0.4108352144469526, "percentage": 20.54, "elapsed_time": "5:34:45", "remaining_time": "21:35:04"} +{"current_steps": 1548, "total_steps": 7532, "loss": 0.34270918369293213, "lr": 1.8709226732488216e-05, "epoch": 0.41110078342849554, "percentage": 20.55, "elapsed_time": "5:34:57", "remaining_time": "21:34:49"} +{"current_steps": 1549, "total_steps": 7532, "loss": 0.33866482973098755, "lr": 1.870706818273872e-05, "epoch": 0.4113663524100385, "percentage": 20.57, "elapsed_time": "5:35:09", "remaining_time": "21:34:33"} +{"current_steps": 1550, "total_steps": 7532, "loss": 0.3350633382797241, "lr": 1.8704907954369176e-05, "epoch": 0.41163192139158145, "percentage": 20.58, "elapsed_time": "5:35:22", "remaining_time": "21:34:19"} +{"current_steps": 1551, "total_steps": 7532, "loss": 0.32763785123825073, "lr": 1.870274604779604e-05, "epoch": 0.4118974903731244, "percentage": 20.59, "elapsed_time": "5:35:34", "remaining_time": "21:34:03"} +{"current_steps": 1552, "total_steps": 7532, "loss": 0.3130378723144531, "lr": 1.8700582463436102e-05, "epoch": 0.41216305935466735, "percentage": 20.61, "elapsed_time": "5:35:47", "remaining_time": "21:33:49"} +{"current_steps": 1553, "total_steps": 7532, "loss": 0.34318777918815613, "lr": 1.8698417201706484e-05, "epoch": 0.41242862833621036, "percentage": 20.62, "elapsed_time": "5:35:59", "remaining_time": "21:33:33"} +{"current_steps": 1554, "total_steps": 7532, "loss": 0.3250104784965515, "lr": 1.8696250263024617e-05, "epoch": 0.4126941973177533, "percentage": 20.63, "elapsed_time": "5:36:12", "remaining_time": "21:33:20"} +{"current_steps": 1555, "total_steps": 7532, "loss": 0.3409217298030853, "lr": 1.869408164780826e-05, "epoch": 0.41295976629929626, "percentage": 20.65, "elapsed_time": "5:36:24", "remaining_time": "21:33:04"} +{"current_steps": 1556, "total_steps": 7532, "loss": 0.2885017395019531, "lr": 1.86919113564755e-05, "epoch": 0.4132253352808392, "percentage": 20.66, "elapsed_time": "5:36:37", "remaining_time": "21:32:52"} +{"current_steps": 1557, "total_steps": 7532, "loss": 0.31912562251091003, "lr": 1.8689739389444744e-05, "epoch": 0.41349090426238216, "percentage": 20.67, "elapsed_time": "5:36:50", "remaining_time": "21:32:38"} +{"current_steps": 1558, "total_steps": 7532, "loss": 0.29874011874198914, "lr": 1.8687565747134716e-05, "epoch": 0.4137564732439251, "percentage": 20.69, "elapsed_time": "5:37:03", "remaining_time": "21:32:26"} +{"current_steps": 1559, "total_steps": 7532, "loss": 0.3132701516151428, "lr": 1.8685390429964473e-05, "epoch": 0.41402204222546807, "percentage": 20.7, "elapsed_time": "5:37:16", "remaining_time": "21:32:11"} +{"current_steps": 1560, "total_steps": 7532, "loss": 0.31158843636512756, "lr": 1.868321343835339e-05, "epoch": 0.414287611207011, "percentage": 20.71, "elapsed_time": "5:37:29", "remaining_time": "21:31:57"} +{"current_steps": 1561, "total_steps": 7532, "loss": 0.30490344762802124, "lr": 1.8681034772721167e-05, "epoch": 0.41455318018855397, "percentage": 20.72, "elapsed_time": "5:37:41", "remaining_time": "21:31:43"} +{"current_steps": 1562, "total_steps": 7532, "loss": 0.3150998055934906, "lr": 1.867885443348782e-05, "epoch": 0.4148187491700969, "percentage": 20.74, "elapsed_time": "5:37:54", "remaining_time": "21:31:30"} +{"current_steps": 1563, "total_steps": 7532, "loss": 0.3391645550727844, "lr": 1.86766724210737e-05, "epoch": 0.4150843181516399, "percentage": 20.75, "elapsed_time": "5:38:07", "remaining_time": "21:31:16"} +{"current_steps": 1564, "total_steps": 7532, "loss": 0.35013002157211304, "lr": 1.8674488735899466e-05, "epoch": 0.4153498871331828, "percentage": 20.76, "elapsed_time": "5:38:20", "remaining_time": "21:31:02"} +{"current_steps": 1565, "total_steps": 7532, "loss": 0.3455789387226105, "lr": 1.867230337838611e-05, "epoch": 0.4156154561147258, "percentage": 20.78, "elapsed_time": "5:38:33", "remaining_time": "21:30:50"} +{"current_steps": 1566, "total_steps": 7532, "loss": 0.3179319500923157, "lr": 1.8670116348954945e-05, "epoch": 0.41588102509626873, "percentage": 20.79, "elapsed_time": "5:38:46", "remaining_time": "21:30:36"} +{"current_steps": 1567, "total_steps": 7532, "loss": 0.3628920018672943, "lr": 1.8667927648027596e-05, "epoch": 0.41614659407781174, "percentage": 20.8, "elapsed_time": "5:38:59", "remaining_time": "21:30:23"} +{"current_steps": 1568, "total_steps": 7532, "loss": 0.33599400520324707, "lr": 1.8665737276026033e-05, "epoch": 0.4164121630593547, "percentage": 20.82, "elapsed_time": "5:39:11", "remaining_time": "21:30:07"} +{"current_steps": 1569, "total_steps": 7532, "loss": 0.31519144773483276, "lr": 1.8663545233372524e-05, "epoch": 0.41667773204089764, "percentage": 20.83, "elapsed_time": "5:39:24", "remaining_time": "21:29:54"} +{"current_steps": 1570, "total_steps": 7532, "loss": 0.3326237201690674, "lr": 1.8661351520489667e-05, "epoch": 0.4169433010224406, "percentage": 20.84, "elapsed_time": "5:39:36", "remaining_time": "21:29:40"} +{"current_steps": 1571, "total_steps": 7532, "loss": 0.35254499316215515, "lr": 1.865915613780039e-05, "epoch": 0.41720887000398355, "percentage": 20.86, "elapsed_time": "5:39:49", "remaining_time": "21:29:27"} +{"current_steps": 1572, "total_steps": 7532, "loss": 0.36689436435699463, "lr": 1.8656959085727936e-05, "epoch": 0.4174744389855265, "percentage": 20.87, "elapsed_time": "5:40:02", "remaining_time": "21:29:12"} +{"current_steps": 1573, "total_steps": 7532, "loss": 0.3113600015640259, "lr": 1.8654760364695873e-05, "epoch": 0.41774000796706945, "percentage": 20.88, "elapsed_time": "5:40:15", "remaining_time": "21:28:59"} +{"current_steps": 1574, "total_steps": 7532, "loss": 0.3336432874202728, "lr": 1.865255997512808e-05, "epoch": 0.4180055769486124, "percentage": 20.9, "elapsed_time": "5:40:28", "remaining_time": "21:28:46"} +{"current_steps": 1575, "total_steps": 7532, "loss": 0.3657492995262146, "lr": 1.8650357917448774e-05, "epoch": 0.41827114593015535, "percentage": 20.91, "elapsed_time": "5:40:41", "remaining_time": "21:28:34"} +{"current_steps": 1576, "total_steps": 7532, "loss": 0.3087846338748932, "lr": 1.864815419208248e-05, "epoch": 0.4185367149116983, "percentage": 20.92, "elapsed_time": "5:40:54", "remaining_time": "21:28:21"} +{"current_steps": 1577, "total_steps": 7532, "loss": 0.31422343850135803, "lr": 1.8645948799454058e-05, "epoch": 0.41880228389324126, "percentage": 20.94, "elapsed_time": "5:41:08", "remaining_time": "21:28:10"} +{"current_steps": 1578, "total_steps": 7532, "loss": 0.3172760009765625, "lr": 1.8643741739988672e-05, "epoch": 0.4190678528747842, "percentage": 20.95, "elapsed_time": "5:41:21", "remaining_time": "21:27:58"} +{"current_steps": 1579, "total_steps": 7532, "loss": 0.36819136142730713, "lr": 1.8641533014111824e-05, "epoch": 0.41933342185632716, "percentage": 20.96, "elapsed_time": "5:41:34", "remaining_time": "21:27:45"} +{"current_steps": 1580, "total_steps": 7532, "loss": 0.29081088304519653, "lr": 1.863932262224933e-05, "epoch": 0.4195989908378701, "percentage": 20.98, "elapsed_time": "5:41:47", "remaining_time": "21:27:33"} +{"current_steps": 1581, "total_steps": 7532, "loss": 0.3209632635116577, "lr": 1.8637110564827325e-05, "epoch": 0.4198645598194131, "percentage": 20.99, "elapsed_time": "5:42:01", "remaining_time": "21:27:23"} +{"current_steps": 1582, "total_steps": 7532, "loss": 0.3357914686203003, "lr": 1.863489684227227e-05, "epoch": 0.42013012880095607, "percentage": 21.0, "elapsed_time": "5:42:14", "remaining_time": "21:27:10"} +{"current_steps": 1583, "total_steps": 7532, "loss": 0.285677969455719, "lr": 1.8632681455010937e-05, "epoch": 0.420395697782499, "percentage": 21.02, "elapsed_time": "5:42:27", "remaining_time": "21:26:58"} +{"current_steps": 1584, "total_steps": 7532, "loss": 0.377876341342926, "lr": 1.8630464403470435e-05, "epoch": 0.420661266764042, "percentage": 21.03, "elapsed_time": "5:42:41", "remaining_time": "21:26:48"} +{"current_steps": 1585, "total_steps": 7532, "loss": 0.3141768276691437, "lr": 1.8628245688078187e-05, "epoch": 0.4209268357455849, "percentage": 21.04, "elapsed_time": "5:42:54", "remaining_time": "21:26:36"} +{"current_steps": 1586, "total_steps": 7532, "loss": 0.34249693155288696, "lr": 1.8626025309261927e-05, "epoch": 0.4211924047271279, "percentage": 21.06, "elapsed_time": "5:43:07", "remaining_time": "21:26:25"} +{"current_steps": 1587, "total_steps": 7532, "loss": 0.32564717531204224, "lr": 1.8623803267449722e-05, "epoch": 0.42145797370867083, "percentage": 21.07, "elapsed_time": "5:43:20", "remaining_time": "21:26:12"} +{"current_steps": 1588, "total_steps": 7532, "loss": 0.3425004184246063, "lr": 1.8621579563069957e-05, "epoch": 0.4217235426902138, "percentage": 21.08, "elapsed_time": "5:43:34", "remaining_time": "21:26:02"} +{"current_steps": 1589, "total_steps": 7532, "loss": 0.3676222562789917, "lr": 1.8619354196551333e-05, "epoch": 0.42198911167175673, "percentage": 21.1, "elapsed_time": "5:43:47", "remaining_time": "21:25:49"} +{"current_steps": 1590, "total_steps": 7532, "loss": 0.28915971517562866, "lr": 1.8617127168322877e-05, "epoch": 0.4222546806532997, "percentage": 21.11, "elapsed_time": "5:44:01", "remaining_time": "21:25:38"} +{"current_steps": 1591, "total_steps": 7532, "loss": 0.3387221097946167, "lr": 1.8614898478813933e-05, "epoch": 0.42252024963484264, "percentage": 21.12, "elapsed_time": "5:44:14", "remaining_time": "21:25:25"} +{"current_steps": 1592, "total_steps": 7532, "loss": 0.33886784315109253, "lr": 1.8612668128454164e-05, "epoch": 0.4227858186163856, "percentage": 21.14, "elapsed_time": "5:44:27", "remaining_time": "21:25:15"} +{"current_steps": 1593, "total_steps": 7532, "loss": 0.3364121913909912, "lr": 1.8610436117673557e-05, "epoch": 0.42305138759792854, "percentage": 21.15, "elapsed_time": "5:44:41", "remaining_time": "21:25:03"} +{"current_steps": 1594, "total_steps": 7532, "loss": 0.3661370873451233, "lr": 1.8608202446902418e-05, "epoch": 0.4233169565794715, "percentage": 21.16, "elapsed_time": "5:44:53", "remaining_time": "21:24:48"} +{"current_steps": 1595, "total_steps": 7532, "loss": 0.2980557680130005, "lr": 1.8605967116571372e-05, "epoch": 0.4235825255610145, "percentage": 21.18, "elapsed_time": "5:45:06", "remaining_time": "21:24:34"} +{"current_steps": 1596, "total_steps": 7532, "loss": 0.36112043261528015, "lr": 1.8603730127111363e-05, "epoch": 0.42384809454255745, "percentage": 21.19, "elapsed_time": "5:45:18", "remaining_time": "21:24:19"} +{"current_steps": 1597, "total_steps": 7532, "loss": 0.30641958117485046, "lr": 1.860149147895366e-05, "epoch": 0.4241136635241004, "percentage": 21.2, "elapsed_time": "5:45:32", "remaining_time": "21:24:10"} +{"current_steps": 1598, "total_steps": 7532, "loss": 0.3312561511993408, "lr": 1.8599251172529836e-05, "epoch": 0.42437923250564336, "percentage": 21.22, "elapsed_time": "5:45:45", "remaining_time": "21:23:57"} +{"current_steps": 1599, "total_steps": 7532, "loss": 0.3757131099700928, "lr": 1.859700920827181e-05, "epoch": 0.4246448014871863, "percentage": 21.23, "elapsed_time": "5:45:59", "remaining_time": "21:23:47"} +{"current_steps": 1600, "total_steps": 7532, "loss": 0.3225080370903015, "lr": 1.8594765586611805e-05, "epoch": 0.42491037046872926, "percentage": 21.24, "elapsed_time": "5:46:12", "remaining_time": "21:23:35"} +{"current_steps": 1601, "total_steps": 7532, "loss": 0.35943928360939026, "lr": 1.859252030798236e-05, "epoch": 0.4251759394502722, "percentage": 21.26, "elapsed_time": "5:46:31", "remaining_time": "21:23:44"} +{"current_steps": 1602, "total_steps": 7532, "loss": 0.29319390654563904, "lr": 1.859027337281633e-05, "epoch": 0.42544150843181516, "percentage": 21.27, "elapsed_time": "5:46:44", "remaining_time": "21:23:31"} +{"current_steps": 1603, "total_steps": 7532, "loss": 0.32320237159729004, "lr": 1.8588024781546914e-05, "epoch": 0.4257070774133581, "percentage": 21.28, "elapsed_time": "5:46:58", "remaining_time": "21:23:20"} +{"current_steps": 1604, "total_steps": 7532, "loss": 0.3381520211696625, "lr": 1.8585774534607606e-05, "epoch": 0.42597264639490107, "percentage": 21.3, "elapsed_time": "5:47:11", "remaining_time": "21:23:08"} +{"current_steps": 1605, "total_steps": 7532, "loss": 0.30010825395584106, "lr": 1.858352263243223e-05, "epoch": 0.426238215376444, "percentage": 21.31, "elapsed_time": "5:47:25", "remaining_time": "21:22:57"} +{"current_steps": 1606, "total_steps": 7532, "loss": 0.26282748579978943, "lr": 1.8581269075454918e-05, "epoch": 0.42650378435798697, "percentage": 21.32, "elapsed_time": "5:47:38", "remaining_time": "21:22:47"} +{"current_steps": 1607, "total_steps": 7532, "loss": 0.33613401651382446, "lr": 1.857901386411014e-05, "epoch": 0.4267693533395299, "percentage": 21.34, "elapsed_time": "5:47:51", "remaining_time": "21:22:34"} +{"current_steps": 1608, "total_steps": 7532, "loss": 0.34522315859794617, "lr": 1.8576756998832667e-05, "epoch": 0.4270349223210729, "percentage": 21.35, "elapsed_time": "5:48:05", "remaining_time": "21:22:23"} +{"current_steps": 1609, "total_steps": 7532, "loss": 0.3253153860569, "lr": 1.8574498480057598e-05, "epoch": 0.4273004913026159, "percentage": 21.36, "elapsed_time": "5:48:18", "remaining_time": "21:22:11"} +{"current_steps": 1610, "total_steps": 7532, "loss": 0.32180655002593994, "lr": 1.8572238308220347e-05, "epoch": 0.42756606028415883, "percentage": 21.38, "elapsed_time": "5:48:32", "remaining_time": "21:22:00"} +{"current_steps": 1611, "total_steps": 7532, "loss": 0.3274008333683014, "lr": 1.856997648375665e-05, "epoch": 0.4278316292657018, "percentage": 21.39, "elapsed_time": "5:48:45", "remaining_time": "21:21:48"} +{"current_steps": 1612, "total_steps": 7532, "loss": 0.3196510374546051, "lr": 1.8567713007102565e-05, "epoch": 0.42809719824724474, "percentage": 21.4, "elapsed_time": "5:48:59", "remaining_time": "21:21:39"} +{"current_steps": 1613, "total_steps": 7532, "loss": 0.2759617567062378, "lr": 1.8565447878694455e-05, "epoch": 0.4283627672287877, "percentage": 21.42, "elapsed_time": "5:49:12", "remaining_time": "21:21:26"} +{"current_steps": 1614, "total_steps": 7532, "loss": 0.35069289803504944, "lr": 1.8563181098969017e-05, "epoch": 0.42862833621033064, "percentage": 21.43, "elapsed_time": "5:49:26", "remaining_time": "21:21:16"} +{"current_steps": 1615, "total_steps": 7532, "loss": 0.3388484716415405, "lr": 1.8560912668363253e-05, "epoch": 0.4288939051918736, "percentage": 21.44, "elapsed_time": "5:49:39", "remaining_time": "21:21:04"} +{"current_steps": 1616, "total_steps": 7532, "loss": 0.34116029739379883, "lr": 1.8558642587314496e-05, "epoch": 0.42915947417341654, "percentage": 21.46, "elapsed_time": "5:49:52", "remaining_time": "21:20:52"} +{"current_steps": 1617, "total_steps": 7532, "loss": 0.30212706327438354, "lr": 1.8556370856260387e-05, "epoch": 0.4294250431549595, "percentage": 21.47, "elapsed_time": "5:50:05", "remaining_time": "21:20:38"} +{"current_steps": 1618, "total_steps": 7532, "loss": 0.32250338792800903, "lr": 1.855409747563889e-05, "epoch": 0.42969061213650245, "percentage": 21.48, "elapsed_time": "5:50:19", "remaining_time": "21:20:29"} +{"current_steps": 1619, "total_steps": 7532, "loss": 0.35972943902015686, "lr": 1.8551822445888285e-05, "epoch": 0.4299561811180454, "percentage": 21.49, "elapsed_time": "5:50:32", "remaining_time": "21:20:17"} +{"current_steps": 1620, "total_steps": 7532, "loss": 0.3112533390522003, "lr": 1.8549545767447174e-05, "epoch": 0.43022175009958835, "percentage": 21.51, "elapsed_time": "5:50:46", "remaining_time": "21:20:06"} +{"current_steps": 1621, "total_steps": 7532, "loss": 0.3044458031654358, "lr": 1.854726744075447e-05, "epoch": 0.4304873190811313, "percentage": 21.52, "elapsed_time": "5:50:59", "remaining_time": "21:19:54"} +{"current_steps": 1622, "total_steps": 7532, "loss": 0.3261772096157074, "lr": 1.8544987466249412e-05, "epoch": 0.43075288806267426, "percentage": 21.53, "elapsed_time": "5:51:12", "remaining_time": "21:19:41"} +{"current_steps": 1623, "total_steps": 7532, "loss": 0.3485907018184662, "lr": 1.8542705844371544e-05, "epoch": 0.43101845704421726, "percentage": 21.55, "elapsed_time": "5:51:26", "remaining_time": "21:19:32"} +{"current_steps": 1624, "total_steps": 7532, "loss": 0.3016113340854645, "lr": 1.8540422575560747e-05, "epoch": 0.4312840260257602, "percentage": 21.56, "elapsed_time": "5:51:40", "remaining_time": "21:19:20"} +{"current_steps": 1625, "total_steps": 7532, "loss": 0.35383081436157227, "lr": 1.8538137660257198e-05, "epoch": 0.43154959500730317, "percentage": 21.57, "elapsed_time": "5:51:53", "remaining_time": "21:19:10"} +{"current_steps": 1626, "total_steps": 7532, "loss": 0.32015109062194824, "lr": 1.8535851098901406e-05, "epoch": 0.4318151639888461, "percentage": 21.59, "elapsed_time": "5:52:06", "remaining_time": "21:18:57"} +{"current_steps": 1627, "total_steps": 7532, "loss": 0.3801743984222412, "lr": 1.8533562891934195e-05, "epoch": 0.43208073297038907, "percentage": 21.6, "elapsed_time": "5:52:20", "remaining_time": "21:18:46"} +{"current_steps": 1628, "total_steps": 7532, "loss": 0.33140939474105835, "lr": 1.85312730397967e-05, "epoch": 0.432346301951932, "percentage": 21.61, "elapsed_time": "5:52:33", "remaining_time": "21:18:33"} +{"current_steps": 1629, "total_steps": 7532, "loss": 0.4052904546260834, "lr": 1.8528981542930382e-05, "epoch": 0.432611870933475, "percentage": 21.63, "elapsed_time": "5:52:47", "remaining_time": "21:18:23"} +{"current_steps": 1630, "total_steps": 7532, "loss": 0.3661607801914215, "lr": 1.8526688401777014e-05, "epoch": 0.4328774399150179, "percentage": 21.64, "elapsed_time": "5:53:00", "remaining_time": "21:18:10"} +{"current_steps": 1631, "total_steps": 7532, "loss": 0.33260756731033325, "lr": 1.852439361677868e-05, "epoch": 0.4331430088965609, "percentage": 21.65, "elapsed_time": "5:53:13", "remaining_time": "21:18:00"} +{"current_steps": 1632, "total_steps": 7532, "loss": 0.30222776532173157, "lr": 1.85220971883778e-05, "epoch": 0.43340857787810383, "percentage": 21.67, "elapsed_time": "5:53:26", "remaining_time": "21:17:46"} +{"current_steps": 1633, "total_steps": 7532, "loss": 0.3444751799106598, "lr": 1.8519799117017086e-05, "epoch": 0.4336741468596468, "percentage": 21.68, "elapsed_time": "5:53:40", "remaining_time": "21:17:35"} +{"current_steps": 1634, "total_steps": 7532, "loss": 0.33887404203414917, "lr": 1.8517499403139586e-05, "epoch": 0.43393971584118973, "percentage": 21.69, "elapsed_time": "5:53:53", "remaining_time": "21:17:22"} +{"current_steps": 1635, "total_steps": 7532, "loss": 0.287893146276474, "lr": 1.8515198047188652e-05, "epoch": 0.4342052848227327, "percentage": 21.71, "elapsed_time": "5:54:06", "remaining_time": "21:17:09"} +{"current_steps": 1636, "total_steps": 7532, "loss": 0.32236215472221375, "lr": 1.8512895049607965e-05, "epoch": 0.43447085380427564, "percentage": 21.72, "elapsed_time": "5:54:19", "remaining_time": "21:16:58"} +{"current_steps": 1637, "total_steps": 7532, "loss": 0.30670079588890076, "lr": 1.8510590410841515e-05, "epoch": 0.43473642278581864, "percentage": 21.73, "elapsed_time": "5:54:32", "remaining_time": "21:16:43"} +{"current_steps": 1638, "total_steps": 7532, "loss": 0.34104713797569275, "lr": 1.8508284131333604e-05, "epoch": 0.4350019917673616, "percentage": 21.75, "elapsed_time": "5:54:44", "remaining_time": "21:16:29"} +{"current_steps": 1639, "total_steps": 7532, "loss": 0.3402378559112549, "lr": 1.8505976211528857e-05, "epoch": 0.43526756074890455, "percentage": 21.76, "elapsed_time": "5:54:57", "remaining_time": "21:16:13"} +{"current_steps": 1640, "total_steps": 7532, "loss": 0.35236096382141113, "lr": 1.8503666651872217e-05, "epoch": 0.4355331297304475, "percentage": 21.77, "elapsed_time": "5:55:09", "remaining_time": "21:15:59"} +{"current_steps": 1641, "total_steps": 7532, "loss": 0.3385634422302246, "lr": 1.850135545280894e-05, "epoch": 0.43579869871199045, "percentage": 21.79, "elapsed_time": "5:55:22", "remaining_time": "21:15:46"} +{"current_steps": 1642, "total_steps": 7532, "loss": 0.32222414016723633, "lr": 1.849904261478459e-05, "epoch": 0.4360642676935334, "percentage": 21.8, "elapsed_time": "5:55:35", "remaining_time": "21:15:33"} +{"current_steps": 1643, "total_steps": 7532, "loss": 0.3251120448112488, "lr": 1.8496728138245062e-05, "epoch": 0.43632983667507635, "percentage": 21.81, "elapsed_time": "5:55:48", "remaining_time": "21:15:20"} +{"current_steps": 1644, "total_steps": 7532, "loss": 0.3199063837528229, "lr": 1.8494412023636563e-05, "epoch": 0.4365954056566193, "percentage": 21.83, "elapsed_time": "5:56:02", "remaining_time": "21:15:08"} +{"current_steps": 1645, "total_steps": 7532, "loss": 0.3470883071422577, "lr": 1.8492094271405605e-05, "epoch": 0.43686097463816226, "percentage": 21.84, "elapsed_time": "5:56:15", "remaining_time": "21:14:56"} +{"current_steps": 1646, "total_steps": 7532, "loss": 0.319596529006958, "lr": 1.848977488199903e-05, "epoch": 0.4371265436197052, "percentage": 21.85, "elapsed_time": "5:56:28", "remaining_time": "21:14:44"} +{"current_steps": 1647, "total_steps": 7532, "loss": 0.3445591628551483, "lr": 1.848745385586398e-05, "epoch": 0.43739211260124816, "percentage": 21.87, "elapsed_time": "5:56:41", "remaining_time": "21:14:30"} +{"current_steps": 1648, "total_steps": 7532, "loss": 0.35861149430274963, "lr": 1.848513119344793e-05, "epoch": 0.4376576815827911, "percentage": 21.88, "elapsed_time": "5:56:54", "remaining_time": "21:14:18"} +{"current_steps": 1649, "total_steps": 7532, "loss": 0.36727622151374817, "lr": 1.8482806895198658e-05, "epoch": 0.43792325056433407, "percentage": 21.89, "elapsed_time": "5:57:07", "remaining_time": "21:14:05"} +{"current_steps": 1650, "total_steps": 7532, "loss": 0.3505704402923584, "lr": 1.848048096156426e-05, "epoch": 0.438188819545877, "percentage": 21.91, "elapsed_time": "5:57:21", "remaining_time": "21:13:56"} +{"current_steps": 1651, "total_steps": 7532, "loss": 0.3508742153644562, "lr": 1.8478153392993154e-05, "epoch": 0.43845438852742, "percentage": 21.92, "elapsed_time": "5:57:34", "remaining_time": "21:13:42"} +{"current_steps": 1652, "total_steps": 7532, "loss": 0.32757264375686646, "lr": 1.8475824189934063e-05, "epoch": 0.438719957508963, "percentage": 21.93, "elapsed_time": "5:57:47", "remaining_time": "21:13:29"} +{"current_steps": 1653, "total_steps": 7532, "loss": 0.3117530643939972, "lr": 1.8473493352836032e-05, "epoch": 0.43898552649050593, "percentage": 21.95, "elapsed_time": "5:57:59", "remaining_time": "21:13:14"} +{"current_steps": 1654, "total_steps": 7532, "loss": 0.3506043553352356, "lr": 1.8471160882148417e-05, "epoch": 0.4392510954720489, "percentage": 21.96, "elapsed_time": "5:58:12", "remaining_time": "21:12:59"} +{"current_steps": 1655, "total_steps": 7532, "loss": 0.33997148275375366, "lr": 1.8468826778320892e-05, "epoch": 0.43951666445359183, "percentage": 21.97, "elapsed_time": "5:58:25", "remaining_time": "21:12:46"} +{"current_steps": 1656, "total_steps": 7532, "loss": 0.30060335993766785, "lr": 1.8466491041803446e-05, "epoch": 0.4397822334351348, "percentage": 21.99, "elapsed_time": "5:58:37", "remaining_time": "21:12:32"} +{"current_steps": 1657, "total_steps": 7532, "loss": 0.3057805597782135, "lr": 1.846415367304638e-05, "epoch": 0.44004780241667774, "percentage": 22.0, "elapsed_time": "5:58:50", "remaining_time": "21:12:18"} +{"current_steps": 1658, "total_steps": 7532, "loss": 0.30772098898887634, "lr": 1.846181467250031e-05, "epoch": 0.4403133713982207, "percentage": 22.01, "elapsed_time": "5:59:02", "remaining_time": "21:12:02"} +{"current_steps": 1659, "total_steps": 7532, "loss": 0.3183813989162445, "lr": 1.845947404061617e-05, "epoch": 0.44057894037976364, "percentage": 22.03, "elapsed_time": "5:59:15", "remaining_time": "21:11:48"} +{"current_steps": 1660, "total_steps": 7532, "loss": 0.2986184358596802, "lr": 1.8457131777845204e-05, "epoch": 0.4408445093613066, "percentage": 22.04, "elapsed_time": "5:59:27", "remaining_time": "21:11:33"} +{"current_steps": 1661, "total_steps": 7532, "loss": 0.33342432975769043, "lr": 1.8454787884638973e-05, "epoch": 0.44111007834284954, "percentage": 22.05, "elapsed_time": "5:59:40", "remaining_time": "21:11:19"} +{"current_steps": 1662, "total_steps": 7532, "loss": 0.33435192704200745, "lr": 1.8452442361449353e-05, "epoch": 0.4413756473243925, "percentage": 22.07, "elapsed_time": "5:59:53", "remaining_time": "21:11:05"} +{"current_steps": 1663, "total_steps": 7532, "loss": 0.31596100330352783, "lr": 1.8450095208728537e-05, "epoch": 0.44164121630593545, "percentage": 22.08, "elapsed_time": "6:00:06", "remaining_time": "21:10:53"} +{"current_steps": 1664, "total_steps": 7532, "loss": 0.29850512742996216, "lr": 1.8447746426929022e-05, "epoch": 0.4419067852874784, "percentage": 22.09, "elapsed_time": "6:00:18", "remaining_time": "21:10:37"} +{"current_steps": 1665, "total_steps": 7532, "loss": 0.34898555278778076, "lr": 1.8445396016503628e-05, "epoch": 0.4421723542690214, "percentage": 22.11, "elapsed_time": "6:00:31", "remaining_time": "21:10:22"} +{"current_steps": 1666, "total_steps": 7532, "loss": 0.283272385597229, "lr": 1.8443043977905484e-05, "epoch": 0.44243792325056436, "percentage": 22.12, "elapsed_time": "6:00:43", "remaining_time": "21:10:07"} +{"current_steps": 1667, "total_steps": 7532, "loss": 0.32765433192253113, "lr": 1.844069031158804e-05, "epoch": 0.4427034922321073, "percentage": 22.13, "elapsed_time": "6:00:55", "remaining_time": "21:09:51"} +{"current_steps": 1668, "total_steps": 7532, "loss": 0.347957044839859, "lr": 1.8438335018005052e-05, "epoch": 0.44296906121365026, "percentage": 22.15, "elapsed_time": "6:01:08", "remaining_time": "21:09:37"} +{"current_steps": 1669, "total_steps": 7532, "loss": 0.36188018321990967, "lr": 1.8435978097610594e-05, "epoch": 0.4432346301951932, "percentage": 22.16, "elapsed_time": "6:01:20", "remaining_time": "21:09:21"} +{"current_steps": 1670, "total_steps": 7532, "loss": 0.35944315791130066, "lr": 1.843361955085905e-05, "epoch": 0.44350019917673617, "percentage": 22.17, "elapsed_time": "6:01:33", "remaining_time": "21:09:08"} +{"current_steps": 1671, "total_steps": 7532, "loss": 0.33441367745399475, "lr": 1.8431259378205122e-05, "epoch": 0.4437657681582791, "percentage": 22.19, "elapsed_time": "6:01:45", "remaining_time": "21:08:52"} +{"current_steps": 1672, "total_steps": 7532, "loss": 0.3157849907875061, "lr": 1.8428897580103827e-05, "epoch": 0.44403133713982207, "percentage": 22.2, "elapsed_time": "6:01:58", "remaining_time": "21:08:38"} +{"current_steps": 1673, "total_steps": 7532, "loss": 0.33416497707366943, "lr": 1.8426534157010486e-05, "epoch": 0.444296906121365, "percentage": 22.21, "elapsed_time": "6:02:10", "remaining_time": "21:08:23"} +{"current_steps": 1674, "total_steps": 7532, "loss": 0.3611617684364319, "lr": 1.842416910938074e-05, "epoch": 0.444562475102908, "percentage": 22.23, "elapsed_time": "6:02:23", "remaining_time": "21:08:10"} +{"current_steps": 1675, "total_steps": 7532, "loss": 0.3030395805835724, "lr": 1.8421802437670546e-05, "epoch": 0.4448280440844509, "percentage": 22.24, "elapsed_time": "6:02:36", "remaining_time": "21:07:54"} +{"current_steps": 1676, "total_steps": 7532, "loss": 0.30281510949134827, "lr": 1.8419434142336167e-05, "epoch": 0.4450936130659939, "percentage": 22.25, "elapsed_time": "6:02:48", "remaining_time": "21:07:39"} +{"current_steps": 1677, "total_steps": 7532, "loss": 0.3489738404750824, "lr": 1.8417064223834184e-05, "epoch": 0.44535918204753683, "percentage": 22.27, "elapsed_time": "6:03:01", "remaining_time": "21:07:25"} +{"current_steps": 1678, "total_steps": 7532, "loss": 0.30453425645828247, "lr": 1.8414692682621487e-05, "epoch": 0.4456247510290798, "percentage": 22.28, "elapsed_time": "6:03:13", "remaining_time": "21:07:10"} +{"current_steps": 1679, "total_steps": 7532, "loss": 0.28717339038848877, "lr": 1.841231951915528e-05, "epoch": 0.44589032001062273, "percentage": 22.29, "elapsed_time": "6:03:26", "remaining_time": "21:06:56"} +{"current_steps": 1680, "total_steps": 7532, "loss": 0.3227912187576294, "lr": 1.840994473389309e-05, "epoch": 0.44615588899216574, "percentage": 22.3, "elapsed_time": "6:03:38", "remaining_time": "21:06:40"} +{"current_steps": 1681, "total_steps": 7532, "loss": 0.3575928807258606, "lr": 1.8407568327292737e-05, "epoch": 0.4464214579737087, "percentage": 22.32, "elapsed_time": "6:03:51", "remaining_time": "21:06:26"} +{"current_steps": 1682, "total_steps": 7532, "loss": 0.35601454973220825, "lr": 1.840519029981237e-05, "epoch": 0.44668702695525164, "percentage": 22.33, "elapsed_time": "6:04:03", "remaining_time": "21:06:12"} +{"current_steps": 1683, "total_steps": 7532, "loss": 0.34867429733276367, "lr": 1.8402810651910444e-05, "epoch": 0.4469525959367946, "percentage": 22.34, "elapsed_time": "6:04:16", "remaining_time": "21:05:59"} +{"current_steps": 1684, "total_steps": 7532, "loss": 0.3333359360694885, "lr": 1.8400429384045724e-05, "epoch": 0.44721816491833755, "percentage": 22.36, "elapsed_time": "6:04:29", "remaining_time": "21:05:45"} +{"current_steps": 1685, "total_steps": 7532, "loss": 0.3269057273864746, "lr": 1.8398046496677296e-05, "epoch": 0.4474837338998805, "percentage": 22.37, "elapsed_time": "6:04:43", "remaining_time": "21:05:35"} +{"current_steps": 1686, "total_steps": 7532, "loss": 0.3507213890552521, "lr": 1.839566199026455e-05, "epoch": 0.44774930288142345, "percentage": 22.38, "elapsed_time": "6:04:56", "remaining_time": "21:05:23"} +{"current_steps": 1687, "total_steps": 7532, "loss": 0.32935822010040283, "lr": 1.8393275865267185e-05, "epoch": 0.4480148718629664, "percentage": 22.4, "elapsed_time": "6:05:10", "remaining_time": "21:05:13"} +{"current_steps": 1688, "total_steps": 7532, "loss": 0.3780096769332886, "lr": 1.8390888122145225e-05, "epoch": 0.44828044084450935, "percentage": 22.41, "elapsed_time": "6:05:23", "remaining_time": "21:05:01"} +{"current_steps": 1689, "total_steps": 7532, "loss": 0.3412250578403473, "lr": 1.8388498761358997e-05, "epoch": 0.4485460098260523, "percentage": 22.42, "elapsed_time": "6:05:36", "remaining_time": "21:04:49"} +{"current_steps": 1690, "total_steps": 7532, "loss": 0.33751022815704346, "lr": 1.838610778336914e-05, "epoch": 0.44881157880759526, "percentage": 22.44, "elapsed_time": "6:05:49", "remaining_time": "21:04:36"} +{"current_steps": 1691, "total_steps": 7532, "loss": 0.35736170411109924, "lr": 1.8383715188636608e-05, "epoch": 0.4490771477891382, "percentage": 22.45, "elapsed_time": "6:06:03", "remaining_time": "21:04:24"} +{"current_steps": 1692, "total_steps": 7532, "loss": 0.3133913278579712, "lr": 1.8381320977622664e-05, "epoch": 0.44934271677068116, "percentage": 22.46, "elapsed_time": "6:06:15", "remaining_time": "21:04:10"} +{"current_steps": 1693, "total_steps": 7532, "loss": 0.2890821099281311, "lr": 1.8378925150788886e-05, "epoch": 0.4496082857522241, "percentage": 22.48, "elapsed_time": "6:06:28", "remaining_time": "21:03:56"} +{"current_steps": 1694, "total_steps": 7532, "loss": 0.34016966819763184, "lr": 1.8376527708597155e-05, "epoch": 0.4498738547337671, "percentage": 22.49, "elapsed_time": "6:06:41", "remaining_time": "21:03:45"} +{"current_steps": 1695, "total_steps": 7532, "loss": 0.3502900302410126, "lr": 1.8374128651509676e-05, "epoch": 0.45013942371531007, "percentage": 22.5, "elapsed_time": "6:06:55", "remaining_time": "21:03:32"} +{"current_steps": 1696, "total_steps": 7532, "loss": 0.31828251481056213, "lr": 1.8371727979988957e-05, "epoch": 0.450404992696853, "percentage": 22.52, "elapsed_time": "6:07:08", "remaining_time": "21:03:21"} +{"current_steps": 1697, "total_steps": 7532, "loss": 0.33322471380233765, "lr": 1.836932569449782e-05, "epoch": 0.450670561678396, "percentage": 22.53, "elapsed_time": "6:07:22", "remaining_time": "21:03:09"} +{"current_steps": 1698, "total_steps": 7532, "loss": 0.28489458560943604, "lr": 1.8366921795499394e-05, "epoch": 0.4509361306599389, "percentage": 22.54, "elapsed_time": "6:07:35", "remaining_time": "21:03:00"} +{"current_steps": 1699, "total_steps": 7532, "loss": 0.3125787079334259, "lr": 1.8364516283457127e-05, "epoch": 0.4512016996414819, "percentage": 22.56, "elapsed_time": "6:07:49", "remaining_time": "21:02:48"} +{"current_steps": 1700, "total_steps": 7532, "loss": 0.3352596163749695, "lr": 1.8362109158834767e-05, "epoch": 0.45146726862302483, "percentage": 22.57, "elapsed_time": "6:08:03", "remaining_time": "21:02:37"} +{"current_steps": 1701, "total_steps": 7532, "loss": 0.2986747622489929, "lr": 1.8359700422096385e-05, "epoch": 0.4517328376045678, "percentage": 22.58, "elapsed_time": "6:08:22", "remaining_time": "21:02:45"} +{"current_steps": 1702, "total_steps": 7532, "loss": 0.3276829123497009, "lr": 1.8357290073706355e-05, "epoch": 0.45199840658611073, "percentage": 22.6, "elapsed_time": "6:08:35", "remaining_time": "21:02:34"} +{"current_steps": 1703, "total_steps": 7532, "loss": 0.3183029890060425, "lr": 1.8354878114129368e-05, "epoch": 0.4522639755676537, "percentage": 22.61, "elapsed_time": "6:08:48", "remaining_time": "21:02:20"} +{"current_steps": 1704, "total_steps": 7532, "loss": 0.32149460911750793, "lr": 1.835246454383041e-05, "epoch": 0.45252954454919664, "percentage": 22.62, "elapsed_time": "6:09:01", "remaining_time": "21:02:09"} +{"current_steps": 1705, "total_steps": 7532, "loss": 0.2963859438896179, "lr": 1.8350049363274802e-05, "epoch": 0.4527951135307396, "percentage": 22.64, "elapsed_time": "6:09:14", "remaining_time": "21:01:55"} +{"current_steps": 1706, "total_steps": 7532, "loss": 0.35251080989837646, "lr": 1.8347632572928154e-05, "epoch": 0.45306068251228254, "percentage": 22.65, "elapsed_time": "6:09:28", "remaining_time": "21:01:44"} +{"current_steps": 1707, "total_steps": 7532, "loss": 0.3585474491119385, "lr": 1.8345214173256395e-05, "epoch": 0.4533262514938255, "percentage": 22.66, "elapsed_time": "6:09:41", "remaining_time": "21:01:32"} +{"current_steps": 1708, "total_steps": 7532, "loss": 0.32339078187942505, "lr": 1.834279416472577e-05, "epoch": 0.4535918204753685, "percentage": 22.68, "elapsed_time": "6:09:53", "remaining_time": "21:01:17"} +{"current_steps": 1709, "total_steps": 7532, "loss": 0.3473295569419861, "lr": 1.8340372547802822e-05, "epoch": 0.45385738945691145, "percentage": 22.69, "elapsed_time": "6:10:07", "remaining_time": "21:01:05"} +{"current_steps": 1710, "total_steps": 7532, "loss": 0.35146117210388184, "lr": 1.833794932295441e-05, "epoch": 0.4541229584384544, "percentage": 22.7, "elapsed_time": "6:10:19", "remaining_time": "21:00:51"} +{"current_steps": 1711, "total_steps": 7532, "loss": 0.29697534441947937, "lr": 1.833552449064771e-05, "epoch": 0.45438852741999736, "percentage": 22.72, "elapsed_time": "6:10:32", "remaining_time": "21:00:38"} +{"current_steps": 1712, "total_steps": 7532, "loss": 0.30980685353279114, "lr": 1.8333098051350197e-05, "epoch": 0.4546540964015403, "percentage": 22.73, "elapsed_time": "6:10:45", "remaining_time": "21:00:24"} +{"current_steps": 1713, "total_steps": 7532, "loss": 0.3271983861923218, "lr": 1.8330670005529657e-05, "epoch": 0.45491966538308326, "percentage": 22.74, "elapsed_time": "6:10:58", "remaining_time": "21:00:12"} +{"current_steps": 1714, "total_steps": 7532, "loss": 0.3421804904937744, "lr": 1.8328240353654193e-05, "epoch": 0.4551852343646262, "percentage": 22.76, "elapsed_time": "6:11:11", "remaining_time": "20:59:57"} +{"current_steps": 1715, "total_steps": 7532, "loss": 0.2949771285057068, "lr": 1.8325809096192207e-05, "epoch": 0.45545080334616916, "percentage": 22.77, "elapsed_time": "6:11:24", "remaining_time": "20:59:45"} +{"current_steps": 1716, "total_steps": 7532, "loss": 0.35578668117523193, "lr": 1.832337623361242e-05, "epoch": 0.4557163723277121, "percentage": 22.78, "elapsed_time": "6:11:37", "remaining_time": "20:59:32"} +{"current_steps": 1717, "total_steps": 7532, "loss": 0.3714647889137268, "lr": 1.832094176638387e-05, "epoch": 0.45598194130925507, "percentage": 22.8, "elapsed_time": "6:11:50", "remaining_time": "20:59:20"} +{"current_steps": 1718, "total_steps": 7532, "loss": 0.36253875494003296, "lr": 1.8318505694975877e-05, "epoch": 0.456247510290798, "percentage": 22.81, "elapsed_time": "6:12:03", "remaining_time": "20:59:07"} +{"current_steps": 1719, "total_steps": 7532, "loss": 0.3148016035556793, "lr": 1.8316068019858093e-05, "epoch": 0.45651307927234097, "percentage": 22.82, "elapsed_time": "6:12:17", "remaining_time": "20:58:56"} +{"current_steps": 1720, "total_steps": 7532, "loss": 0.3420512080192566, "lr": 1.8313628741500476e-05, "epoch": 0.4567786482538839, "percentage": 22.84, "elapsed_time": "6:12:30", "remaining_time": "20:58:43"} +{"current_steps": 1721, "total_steps": 7532, "loss": 0.2941698431968689, "lr": 1.831118786037329e-05, "epoch": 0.4570442172354269, "percentage": 22.85, "elapsed_time": "6:12:43", "remaining_time": "20:58:31"} +{"current_steps": 1722, "total_steps": 7532, "loss": 0.3033481240272522, "lr": 1.83087453769471e-05, "epoch": 0.4573097862169699, "percentage": 22.86, "elapsed_time": "6:12:56", "remaining_time": "20:58:18"} +{"current_steps": 1723, "total_steps": 7532, "loss": 0.3405943810939789, "lr": 1.8306301291692798e-05, "epoch": 0.45757535519851283, "percentage": 22.88, "elapsed_time": "6:13:09", "remaining_time": "20:58:05"} +{"current_steps": 1724, "total_steps": 7532, "loss": 0.32217931747436523, "lr": 1.8303855605081567e-05, "epoch": 0.4578409241800558, "percentage": 22.89, "elapsed_time": "6:13:22", "remaining_time": "20:57:51"} +{"current_steps": 1725, "total_steps": 7532, "loss": 0.3627573847770691, "lr": 1.8301408317584913e-05, "epoch": 0.45810649316159874, "percentage": 22.9, "elapsed_time": "6:13:35", "remaining_time": "20:57:38"} +{"current_steps": 1726, "total_steps": 7532, "loss": 0.3512224853038788, "lr": 1.829895942967464e-05, "epoch": 0.4583720621431417, "percentage": 22.92, "elapsed_time": "6:13:49", "remaining_time": "20:57:27"} +{"current_steps": 1727, "total_steps": 7532, "loss": 0.35433265566825867, "lr": 1.8296508941822868e-05, "epoch": 0.45863763112468464, "percentage": 22.93, "elapsed_time": "6:14:01", "remaining_time": "20:57:14"} +{"current_steps": 1728, "total_steps": 7532, "loss": 0.33105185627937317, "lr": 1.829405685450202e-05, "epoch": 0.4589032001062276, "percentage": 22.94, "elapsed_time": "6:14:15", "remaining_time": "20:57:03"} +{"current_steps": 1729, "total_steps": 7532, "loss": 0.31765925884246826, "lr": 1.829160316818483e-05, "epoch": 0.45916876908777055, "percentage": 22.96, "elapsed_time": "6:14:28", "remaining_time": "20:56:49"} +{"current_steps": 1730, "total_steps": 7532, "loss": 0.3276101350784302, "lr": 1.8289147883344338e-05, "epoch": 0.4594343380693135, "percentage": 22.97, "elapsed_time": "6:14:41", "remaining_time": "20:56:38"} +{"current_steps": 1731, "total_steps": 7532, "loss": 0.2921130061149597, "lr": 1.8286691000453895e-05, "epoch": 0.45969990705085645, "percentage": 22.98, "elapsed_time": "6:14:54", "remaining_time": "20:56:24"} +{"current_steps": 1732, "total_steps": 7532, "loss": 0.3025062382221222, "lr": 1.828423251998716e-05, "epoch": 0.4599654760323994, "percentage": 23.0, "elapsed_time": "6:15:08", "remaining_time": "20:56:13"} +{"current_steps": 1733, "total_steps": 7532, "loss": 0.3128702640533447, "lr": 1.82817724424181e-05, "epoch": 0.46023104501394235, "percentage": 23.01, "elapsed_time": "6:15:21", "remaining_time": "20:56:01"} +{"current_steps": 1734, "total_steps": 7532, "loss": 0.31156033277511597, "lr": 1.8279310768220987e-05, "epoch": 0.4604966139954853, "percentage": 23.02, "elapsed_time": "6:15:34", "remaining_time": "20:55:49"} +{"current_steps": 1735, "total_steps": 7532, "loss": 0.30409976840019226, "lr": 1.82768474978704e-05, "epoch": 0.46076218297702826, "percentage": 23.04, "elapsed_time": "6:15:47", "remaining_time": "20:55:36"} +{"current_steps": 1736, "total_steps": 7532, "loss": 0.305557519197464, "lr": 1.827438263184124e-05, "epoch": 0.46102775195857126, "percentage": 23.05, "elapsed_time": "6:16:00", "remaining_time": "20:55:22"} +{"current_steps": 1737, "total_steps": 7532, "loss": 0.36079999804496765, "lr": 1.827191617060869e-05, "epoch": 0.4612933209401142, "percentage": 23.06, "elapsed_time": "6:16:13", "remaining_time": "20:55:11"} +{"current_steps": 1738, "total_steps": 7532, "loss": 0.3341830372810364, "lr": 1.8269448114648264e-05, "epoch": 0.46155888992165717, "percentage": 23.07, "elapsed_time": "6:16:26", "remaining_time": "20:54:57"} +{"current_steps": 1739, "total_steps": 7532, "loss": 0.3222450017929077, "lr": 1.8266978464435764e-05, "epoch": 0.4618244589032001, "percentage": 23.09, "elapsed_time": "6:16:40", "remaining_time": "20:54:46"} +{"current_steps": 1740, "total_steps": 7532, "loss": 0.34665441513061523, "lr": 1.826450722044732e-05, "epoch": 0.46209002788474307, "percentage": 23.1, "elapsed_time": "6:16:52", "remaining_time": "20:54:32"} +{"current_steps": 1741, "total_steps": 7532, "loss": 0.31024169921875, "lr": 1.8262034383159357e-05, "epoch": 0.462355596866286, "percentage": 23.11, "elapsed_time": "6:17:06", "remaining_time": "20:54:20"} +{"current_steps": 1742, "total_steps": 7532, "loss": 0.2950369119644165, "lr": 1.8259559953048606e-05, "epoch": 0.462621165847829, "percentage": 23.13, "elapsed_time": "6:17:19", "remaining_time": "20:54:07"} +{"current_steps": 1743, "total_steps": 7532, "loss": 0.3378523886203766, "lr": 1.8257083930592102e-05, "epoch": 0.4628867348293719, "percentage": 23.14, "elapsed_time": "6:17:32", "remaining_time": "20:53:56"} +{"current_steps": 1744, "total_steps": 7532, "loss": 0.2930060923099518, "lr": 1.8254606316267204e-05, "epoch": 0.4631523038109149, "percentage": 23.15, "elapsed_time": "6:17:45", "remaining_time": "20:53:43"} +{"current_steps": 1745, "total_steps": 7532, "loss": 0.3236517012119293, "lr": 1.8252127110551564e-05, "epoch": 0.46341787279245783, "percentage": 23.17, "elapsed_time": "6:17:59", "remaining_time": "20:53:32"} +{"current_steps": 1746, "total_steps": 7532, "loss": 0.3010406196117401, "lr": 1.824964631392314e-05, "epoch": 0.4636834417740008, "percentage": 23.18, "elapsed_time": "6:18:12", "remaining_time": "20:53:18"} +{"current_steps": 1747, "total_steps": 7532, "loss": 0.3269607424736023, "lr": 1.8247163926860204e-05, "epoch": 0.46394901075554373, "percentage": 23.19, "elapsed_time": "6:18:24", "remaining_time": "20:53:04"} +{"current_steps": 1748, "total_steps": 7532, "loss": 0.3437904715538025, "lr": 1.8244679949841328e-05, "epoch": 0.4642145797370867, "percentage": 23.21, "elapsed_time": "6:18:38", "remaining_time": "20:52:54"} +{"current_steps": 1749, "total_steps": 7532, "loss": 0.37820738554000854, "lr": 1.8242194383345394e-05, "epoch": 0.46448014871862964, "percentage": 23.22, "elapsed_time": "6:18:51", "remaining_time": "20:52:40"} +{"current_steps": 1750, "total_steps": 7532, "loss": 0.3365899920463562, "lr": 1.8239707227851592e-05, "epoch": 0.46474571770017264, "percentage": 23.23, "elapsed_time": "6:19:04", "remaining_time": "20:52:28"} +{"current_steps": 1751, "total_steps": 7532, "loss": 0.30418774485588074, "lr": 1.8237218483839414e-05, "epoch": 0.4650112866817156, "percentage": 23.25, "elapsed_time": "6:19:17", "remaining_time": "20:52:14"} +{"current_steps": 1752, "total_steps": 7532, "loss": 0.2923222780227661, "lr": 1.823472815178866e-05, "epoch": 0.46527685566325855, "percentage": 23.26, "elapsed_time": "6:19:30", "remaining_time": "20:52:02"} +{"current_steps": 1753, "total_steps": 7532, "loss": 0.3358995020389557, "lr": 1.823223623217944e-05, "epoch": 0.4655424246448015, "percentage": 23.27, "elapsed_time": "6:19:43", "remaining_time": "20:51:48"} +{"current_steps": 1754, "total_steps": 7532, "loss": 0.3413343131542206, "lr": 1.822974272549216e-05, "epoch": 0.46580799362634445, "percentage": 23.29, "elapsed_time": "6:19:56", "remaining_time": "20:51:36"} +{"current_steps": 1755, "total_steps": 7532, "loss": 0.33553364872932434, "lr": 1.822724763220755e-05, "epoch": 0.4660735626078874, "percentage": 23.3, "elapsed_time": "6:20:09", "remaining_time": "20:51:22"} +{"current_steps": 1756, "total_steps": 7532, "loss": 0.35896626114845276, "lr": 1.8224750952806626e-05, "epoch": 0.46633913158943036, "percentage": 23.31, "elapsed_time": "6:20:22", "remaining_time": "20:51:10"} +{"current_steps": 1757, "total_steps": 7532, "loss": 0.35345566272735596, "lr": 1.8222252687770718e-05, "epoch": 0.4666047005709733, "percentage": 23.33, "elapsed_time": "6:20:35", "remaining_time": "20:50:58"} +{"current_steps": 1758, "total_steps": 7532, "loss": 0.3146013617515564, "lr": 1.8219752837581466e-05, "epoch": 0.46687026955251626, "percentage": 23.34, "elapsed_time": "6:20:49", "remaining_time": "20:50:46"} +{"current_steps": 1759, "total_steps": 7532, "loss": 0.33270642161369324, "lr": 1.8217251402720807e-05, "epoch": 0.4671358385340592, "percentage": 23.35, "elapsed_time": "6:21:02", "remaining_time": "20:50:32"} +{"current_steps": 1760, "total_steps": 7532, "loss": 0.3172033727169037, "lr": 1.821474838367099e-05, "epoch": 0.46740140751560216, "percentage": 23.37, "elapsed_time": "6:21:15", "remaining_time": "20:50:20"} +{"current_steps": 1761, "total_steps": 7532, "loss": 0.3277033567428589, "lr": 1.8212243780914578e-05, "epoch": 0.4676669764971451, "percentage": 23.38, "elapsed_time": "6:21:28", "remaining_time": "20:50:07"} +{"current_steps": 1762, "total_steps": 7532, "loss": 0.3523799777030945, "lr": 1.820973759493441e-05, "epoch": 0.46793254547868807, "percentage": 23.39, "elapsed_time": "6:21:41", "remaining_time": "20:49:55"} +{"current_steps": 1763, "total_steps": 7532, "loss": 0.32437676191329956, "lr": 1.8207229826213664e-05, "epoch": 0.468198114460231, "percentage": 23.41, "elapsed_time": "6:21:54", "remaining_time": "20:49:41"} +{"current_steps": 1764, "total_steps": 7532, "loss": 0.34185051918029785, "lr": 1.82047204752358e-05, "epoch": 0.468463683441774, "percentage": 23.42, "elapsed_time": "6:22:07", "remaining_time": "20:49:28"} +{"current_steps": 1765, "total_steps": 7532, "loss": 0.32034197449684143, "lr": 1.8202209542484594e-05, "epoch": 0.468729252423317, "percentage": 23.43, "elapsed_time": "6:22:20", "remaining_time": "20:49:17"} +{"current_steps": 1766, "total_steps": 7532, "loss": 0.30969515442848206, "lr": 1.8199697028444125e-05, "epoch": 0.46899482140485993, "percentage": 23.45, "elapsed_time": "6:22:33", "remaining_time": "20:49:02"} +{"current_steps": 1767, "total_steps": 7532, "loss": 0.24751389026641846, "lr": 1.8197182933598776e-05, "epoch": 0.4692603903864029, "percentage": 23.46, "elapsed_time": "6:22:46", "remaining_time": "20:48:50"} +{"current_steps": 1768, "total_steps": 7532, "loss": 0.3859948217868805, "lr": 1.8194667258433235e-05, "epoch": 0.46952595936794583, "percentage": 23.47, "elapsed_time": "6:22:59", "remaining_time": "20:48:37"} +{"current_steps": 1769, "total_steps": 7532, "loss": 0.29364967346191406, "lr": 1.819215000343249e-05, "epoch": 0.4697915283494888, "percentage": 23.49, "elapsed_time": "6:23:12", "remaining_time": "20:48:24"} +{"current_steps": 1770, "total_steps": 7532, "loss": 0.3560323715209961, "lr": 1.8189631169081845e-05, "epoch": 0.47005709733103174, "percentage": 23.5, "elapsed_time": "6:23:25", "remaining_time": "20:48:10"} +{"current_steps": 1771, "total_steps": 7532, "loss": 0.3458098769187927, "lr": 1.8187110755866898e-05, "epoch": 0.4703226663125747, "percentage": 23.51, "elapsed_time": "6:23:38", "remaining_time": "20:47:57"} +{"current_steps": 1772, "total_steps": 7532, "loss": 0.32131001353263855, "lr": 1.8184588764273555e-05, "epoch": 0.47058823529411764, "percentage": 23.53, "elapsed_time": "6:23:50", "remaining_time": "20:47:43"} +{"current_steps": 1773, "total_steps": 7532, "loss": 0.3011054992675781, "lr": 1.8182065194788024e-05, "epoch": 0.4708538042756606, "percentage": 23.54, "elapsed_time": "6:24:04", "remaining_time": "20:47:32"} +{"current_steps": 1774, "total_steps": 7532, "loss": 0.3314674496650696, "lr": 1.8179540047896827e-05, "epoch": 0.47111937325720354, "percentage": 23.55, "elapsed_time": "6:24:17", "remaining_time": "20:47:19"} +{"current_steps": 1775, "total_steps": 7532, "loss": 0.3437536060810089, "lr": 1.8177013324086774e-05, "epoch": 0.4713849422387465, "percentage": 23.57, "elapsed_time": "6:24:31", "remaining_time": "20:47:08"} +{"current_steps": 1776, "total_steps": 7532, "loss": 0.36137935519218445, "lr": 1.8174485023844993e-05, "epoch": 0.47165051122028945, "percentage": 23.58, "elapsed_time": "6:24:43", "remaining_time": "20:46:54"} +{"current_steps": 1777, "total_steps": 7532, "loss": 0.34018874168395996, "lr": 1.8171955147658905e-05, "epoch": 0.4719160802018324, "percentage": 23.59, "elapsed_time": "6:24:56", "remaining_time": "20:46:41"} +{"current_steps": 1778, "total_steps": 7532, "loss": 0.33298587799072266, "lr": 1.8169423696016245e-05, "epoch": 0.4721816491833754, "percentage": 23.61, "elapsed_time": "6:25:10", "remaining_time": "20:46:29"} +{"current_steps": 1779, "total_steps": 7532, "loss": 0.3649418354034424, "lr": 1.816689066940505e-05, "epoch": 0.47244721816491836, "percentage": 23.62, "elapsed_time": "6:25:22", "remaining_time": "20:46:15"} +{"current_steps": 1780, "total_steps": 7532, "loss": 0.32419171929359436, "lr": 1.8164356068313646e-05, "epoch": 0.4727127871464613, "percentage": 23.63, "elapsed_time": "6:25:36", "remaining_time": "20:46:03"} +{"current_steps": 1781, "total_steps": 7532, "loss": 0.288555383682251, "lr": 1.8161819893230688e-05, "epoch": 0.47297835612800426, "percentage": 23.65, "elapsed_time": "6:25:48", "remaining_time": "20:45:49"} +{"current_steps": 1782, "total_steps": 7532, "loss": 0.3231011629104614, "lr": 1.815928214464511e-05, "epoch": 0.4732439251095472, "percentage": 23.66, "elapsed_time": "6:26:01", "remaining_time": "20:45:36"} +{"current_steps": 1783, "total_steps": 7532, "loss": 0.29310134053230286, "lr": 1.815674282304617e-05, "epoch": 0.47350949409109017, "percentage": 23.67, "elapsed_time": "6:26:14", "remaining_time": "20:45:22"} +{"current_steps": 1784, "total_steps": 7532, "loss": 0.32683852314949036, "lr": 1.815420192892341e-05, "epoch": 0.4737750630726331, "percentage": 23.69, "elapsed_time": "6:26:27", "remaining_time": "20:45:08"} +{"current_steps": 1785, "total_steps": 7532, "loss": 0.3200969099998474, "lr": 1.8151659462766685e-05, "epoch": 0.47404063205417607, "percentage": 23.7, "elapsed_time": "6:26:39", "remaining_time": "20:44:54"} +{"current_steps": 1786, "total_steps": 7532, "loss": 0.3091360032558441, "lr": 1.814911542506616e-05, "epoch": 0.474306201035719, "percentage": 23.71, "elapsed_time": "6:26:52", "remaining_time": "20:44:40"} +{"current_steps": 1787, "total_steps": 7532, "loss": 0.3679049611091614, "lr": 1.814656981631229e-05, "epoch": 0.474571770017262, "percentage": 23.73, "elapsed_time": "6:27:05", "remaining_time": "20:44:26"} +{"current_steps": 1788, "total_steps": 7532, "loss": 0.290119469165802, "lr": 1.814402263699584e-05, "epoch": 0.4748373389988049, "percentage": 23.74, "elapsed_time": "6:27:18", "remaining_time": "20:44:13"} +{"current_steps": 1789, "total_steps": 7532, "loss": 0.31878861784935, "lr": 1.8141473887607874e-05, "epoch": 0.4751029079803479, "percentage": 23.75, "elapsed_time": "6:27:31", "remaining_time": "20:44:00"} +{"current_steps": 1790, "total_steps": 7532, "loss": 0.35820287466049194, "lr": 1.8138923568639763e-05, "epoch": 0.47536847696189083, "percentage": 23.77, "elapsed_time": "6:27:44", "remaining_time": "20:43:48"} +{"current_steps": 1791, "total_steps": 7532, "loss": 0.2924647629261017, "lr": 1.8136371680583176e-05, "epoch": 0.4756340459434338, "percentage": 23.78, "elapsed_time": "6:27:57", "remaining_time": "20:43:34"} +{"current_steps": 1792, "total_steps": 7532, "loss": 0.3799927234649658, "lr": 1.8133818223930092e-05, "epoch": 0.4758996149249768, "percentage": 23.79, "elapsed_time": "6:28:10", "remaining_time": "20:43:23"} +{"current_steps": 1793, "total_steps": 7532, "loss": 0.3505420386791229, "lr": 1.8131263199172783e-05, "epoch": 0.47616518390651974, "percentage": 23.81, "elapsed_time": "6:28:23", "remaining_time": "20:43:09"} +{"current_steps": 1794, "total_steps": 7532, "loss": 0.3291688859462738, "lr": 1.8128706606803823e-05, "epoch": 0.4764307528880627, "percentage": 23.82, "elapsed_time": "6:28:36", "remaining_time": "20:42:57"} +{"current_steps": 1795, "total_steps": 7532, "loss": 0.34079697728157043, "lr": 1.8126148447316104e-05, "epoch": 0.47669632186960564, "percentage": 23.83, "elapsed_time": "6:28:49", "remaining_time": "20:42:43"} +{"current_steps": 1796, "total_steps": 7532, "loss": 0.2898064851760864, "lr": 1.8123588721202802e-05, "epoch": 0.4769618908511486, "percentage": 23.84, "elapsed_time": "6:29:01", "remaining_time": "20:42:28"} +{"current_steps": 1797, "total_steps": 7532, "loss": 0.32089224457740784, "lr": 1.8121027428957402e-05, "epoch": 0.47722745983269155, "percentage": 23.86, "elapsed_time": "6:29:14", "remaining_time": "20:42:14"} +{"current_steps": 1798, "total_steps": 7532, "loss": 0.3402039408683777, "lr": 1.8118464571073697e-05, "epoch": 0.4774930288142345, "percentage": 23.87, "elapsed_time": "6:29:27", "remaining_time": "20:41:59"} +{"current_steps": 1799, "total_steps": 7532, "loss": 0.29904159903526306, "lr": 1.8115900148045767e-05, "epoch": 0.47775859779577745, "percentage": 23.88, "elapsed_time": "6:29:39", "remaining_time": "20:41:46"} +{"current_steps": 1800, "total_steps": 7532, "loss": 0.34074240922927856, "lr": 1.8113334160368007e-05, "epoch": 0.4780241667773204, "percentage": 23.9, "elapsed_time": "6:29:52", "remaining_time": "20:41:32"} +{"current_steps": 1801, "total_steps": 7532, "loss": 0.28566253185272217, "lr": 1.811076660853511e-05, "epoch": 0.47828973575886335, "percentage": 23.91, "elapsed_time": "6:30:10", "remaining_time": "20:41:35"} +{"current_steps": 1802, "total_steps": 7532, "loss": 0.34523358941078186, "lr": 1.8108197493042065e-05, "epoch": 0.4785553047404063, "percentage": 23.92, "elapsed_time": "6:30:23", "remaining_time": "20:41:22"} +{"current_steps": 1803, "total_steps": 7532, "loss": 0.3261171281337738, "lr": 1.8105626814384173e-05, "epoch": 0.47882087372194926, "percentage": 23.94, "elapsed_time": "6:30:36", "remaining_time": "20:41:09"} +{"current_steps": 1804, "total_steps": 7532, "loss": 0.2915942966938019, "lr": 1.8103054573057027e-05, "epoch": 0.4790864427034922, "percentage": 23.95, "elapsed_time": "6:30:49", "remaining_time": "20:40:54"} +{"current_steps": 1805, "total_steps": 7532, "loss": 0.2999255657196045, "lr": 1.810048076955653e-05, "epoch": 0.47935201168503516, "percentage": 23.96, "elapsed_time": "6:31:02", "remaining_time": "20:40:43"} +{"current_steps": 1806, "total_steps": 7532, "loss": 0.3294594883918762, "lr": 1.8097905404378874e-05, "epoch": 0.47961758066657817, "percentage": 23.98, "elapsed_time": "6:31:15", "remaining_time": "20:40:31"} +{"current_steps": 1807, "total_steps": 7532, "loss": 0.30720093846321106, "lr": 1.8095328478020563e-05, "epoch": 0.4798831496481211, "percentage": 23.99, "elapsed_time": "6:31:29", "remaining_time": "20:40:19"} +{"current_steps": 1808, "total_steps": 7532, "loss": 0.31076985597610474, "lr": 1.8092749990978395e-05, "epoch": 0.4801487186296641, "percentage": 24.0, "elapsed_time": "6:31:43", "remaining_time": "20:40:09"} +{"current_steps": 1809, "total_steps": 7532, "loss": 0.3182013928890228, "lr": 1.8090169943749477e-05, "epoch": 0.480414287611207, "percentage": 24.02, "elapsed_time": "6:31:56", "remaining_time": "20:39:56"} +{"current_steps": 1810, "total_steps": 7532, "loss": 0.325716108083725, "lr": 1.8087588336831206e-05, "epoch": 0.48067985659275, "percentage": 24.03, "elapsed_time": "6:32:10", "remaining_time": "20:39:46"} +{"current_steps": 1811, "total_steps": 7532, "loss": 0.3148769736289978, "lr": 1.8085005170721287e-05, "epoch": 0.48094542557429293, "percentage": 24.04, "elapsed_time": "6:32:23", "remaining_time": "20:39:34"} +{"current_steps": 1812, "total_steps": 7532, "loss": 0.30645644664764404, "lr": 1.8082420445917727e-05, "epoch": 0.4812109945558359, "percentage": 24.06, "elapsed_time": "6:32:37", "remaining_time": "20:39:24"} +{"current_steps": 1813, "total_steps": 7532, "loss": 0.2978900969028473, "lr": 1.807983416291883e-05, "epoch": 0.48147656353737883, "percentage": 24.07, "elapsed_time": "6:32:50", "remaining_time": "20:39:11"} +{"current_steps": 1814, "total_steps": 7532, "loss": 0.34340181946754456, "lr": 1.8077246322223194e-05, "epoch": 0.4817421325189218, "percentage": 24.08, "elapsed_time": "6:33:04", "remaining_time": "20:39:00"} +{"current_steps": 1815, "total_steps": 7532, "loss": 0.3272106349468231, "lr": 1.8074656924329733e-05, "epoch": 0.48200770150046474, "percentage": 24.1, "elapsed_time": "6:33:17", "remaining_time": "20:38:48"} +{"current_steps": 1816, "total_steps": 7532, "loss": 0.31061962246894836, "lr": 1.807206596973765e-05, "epoch": 0.4822732704820077, "percentage": 24.11, "elapsed_time": "6:33:31", "remaining_time": "20:38:37"} +{"current_steps": 1817, "total_steps": 7532, "loss": 0.28947243094444275, "lr": 1.8069473458946445e-05, "epoch": 0.48253883946355064, "percentage": 24.12, "elapsed_time": "6:33:44", "remaining_time": "20:38:24"} +{"current_steps": 1818, "total_steps": 7532, "loss": 0.35057532787323, "lr": 1.8066879392455932e-05, "epoch": 0.4828044084450936, "percentage": 24.14, "elapsed_time": "6:33:57", "remaining_time": "20:38:12"} +{"current_steps": 1819, "total_steps": 7532, "loss": 0.31032001972198486, "lr": 1.8064283770766212e-05, "epoch": 0.48306997742663654, "percentage": 24.15, "elapsed_time": "6:34:11", "remaining_time": "20:38:02"} +{"current_steps": 1820, "total_steps": 7532, "loss": 0.3802293539047241, "lr": 1.8061686594377685e-05, "epoch": 0.48333554640817955, "percentage": 24.16, "elapsed_time": "6:34:24", "remaining_time": "20:37:50"} +{"current_steps": 1821, "total_steps": 7532, "loss": 0.3306402564048767, "lr": 1.8059087863791066e-05, "epoch": 0.4836011153897225, "percentage": 24.18, "elapsed_time": "6:34:38", "remaining_time": "20:37:40"} +{"current_steps": 1822, "total_steps": 7532, "loss": 0.32170724868774414, "lr": 1.8056487579507352e-05, "epoch": 0.48386668437126545, "percentage": 24.19, "elapsed_time": "6:34:51", "remaining_time": "20:37:27"} +{"current_steps": 1823, "total_steps": 7532, "loss": 0.35058924555778503, "lr": 1.8053885742027854e-05, "epoch": 0.4841322533528084, "percentage": 24.2, "elapsed_time": "6:35:05", "remaining_time": "20:37:17"} +{"current_steps": 1824, "total_steps": 7532, "loss": 0.3796595335006714, "lr": 1.8051282351854168e-05, "epoch": 0.48439782233435136, "percentage": 24.22, "elapsed_time": "6:35:18", "remaining_time": "20:37:05"} +{"current_steps": 1825, "total_steps": 7532, "loss": 0.28997284173965454, "lr": 1.8048677409488205e-05, "epoch": 0.4846633913158943, "percentage": 24.23, "elapsed_time": "6:35:32", "remaining_time": "20:36:54"} +{"current_steps": 1826, "total_steps": 7532, "loss": 0.35110151767730713, "lr": 1.804607091543216e-05, "epoch": 0.48492896029743726, "percentage": 24.24, "elapsed_time": "6:35:45", "remaining_time": "20:36:41"} +{"current_steps": 1827, "total_steps": 7532, "loss": 0.3194088637828827, "lr": 1.8043462870188535e-05, "epoch": 0.4851945292789802, "percentage": 24.26, "elapsed_time": "6:35:59", "remaining_time": "20:36:30"} +{"current_steps": 1828, "total_steps": 7532, "loss": 0.28777945041656494, "lr": 1.8040853274260137e-05, "epoch": 0.48546009826052317, "percentage": 24.27, "elapsed_time": "6:36:12", "remaining_time": "20:36:17"} +{"current_steps": 1829, "total_steps": 7532, "loss": 0.3642069697380066, "lr": 1.803824212815006e-05, "epoch": 0.4857256672420661, "percentage": 24.28, "elapsed_time": "6:36:26", "remaining_time": "20:36:07"} +{"current_steps": 1830, "total_steps": 7532, "loss": 0.32396575808525085, "lr": 1.80356294323617e-05, "epoch": 0.48599123622360907, "percentage": 24.3, "elapsed_time": "6:36:39", "remaining_time": "20:35:54"} +{"current_steps": 1831, "total_steps": 7532, "loss": 0.36421436071395874, "lr": 1.8033015187398758e-05, "epoch": 0.486256805205152, "percentage": 24.31, "elapsed_time": "6:36:52", "remaining_time": "20:35:43"} +{"current_steps": 1832, "total_steps": 7532, "loss": 0.3170832395553589, "lr": 1.8030399393765227e-05, "epoch": 0.486522374186695, "percentage": 24.32, "elapsed_time": "6:37:05", "remaining_time": "20:35:30"} +{"current_steps": 1833, "total_steps": 7532, "loss": 0.3003416359424591, "lr": 1.8027782051965408e-05, "epoch": 0.4867879431682379, "percentage": 24.34, "elapsed_time": "6:37:19", "remaining_time": "20:35:20"} +{"current_steps": 1834, "total_steps": 7532, "loss": 0.30362898111343384, "lr": 1.802516316250388e-05, "epoch": 0.48705351214978093, "percentage": 24.35, "elapsed_time": "6:37:32", "remaining_time": "20:35:07"} +{"current_steps": 1835, "total_steps": 7532, "loss": 0.32721444964408875, "lr": 1.802254272588555e-05, "epoch": 0.4873190811313239, "percentage": 24.36, "elapsed_time": "6:37:46", "remaining_time": "20:34:55"} +{"current_steps": 1836, "total_steps": 7532, "loss": 0.317483514547348, "lr": 1.8019920742615596e-05, "epoch": 0.48758465011286684, "percentage": 24.38, "elapsed_time": "6:37:59", "remaining_time": "20:34:44"} +{"current_steps": 1837, "total_steps": 7532, "loss": 0.2928479015827179, "lr": 1.801729721319951e-05, "epoch": 0.4878502190944098, "percentage": 24.39, "elapsed_time": "6:38:12", "remaining_time": "20:34:31"} +{"current_steps": 1838, "total_steps": 7532, "loss": 0.3425772190093994, "lr": 1.8014672138143073e-05, "epoch": 0.48811578807595274, "percentage": 24.4, "elapsed_time": "6:38:26", "remaining_time": "20:34:21"} +{"current_steps": 1839, "total_steps": 7532, "loss": 0.334087997674942, "lr": 1.801204551795238e-05, "epoch": 0.4883813570574957, "percentage": 24.42, "elapsed_time": "6:38:39", "remaining_time": "20:34:08"} +{"current_steps": 1840, "total_steps": 7532, "loss": 0.3186641335487366, "lr": 1.80094173531338e-05, "epoch": 0.48864692603903864, "percentage": 24.43, "elapsed_time": "6:38:53", "remaining_time": "20:33:57"} +{"current_steps": 1841, "total_steps": 7532, "loss": 0.3153733015060425, "lr": 1.800678764419401e-05, "epoch": 0.4889124950205816, "percentage": 24.44, "elapsed_time": "6:39:06", "remaining_time": "20:33:44"} +{"current_steps": 1842, "total_steps": 7532, "loss": 0.3323214054107666, "lr": 1.8004156391640004e-05, "epoch": 0.48917806400212455, "percentage": 24.46, "elapsed_time": "6:39:20", "remaining_time": "20:33:34"} +{"current_steps": 1843, "total_steps": 7532, "loss": 0.2856762409210205, "lr": 1.8001523595979043e-05, "epoch": 0.4894436329836675, "percentage": 24.47, "elapsed_time": "6:39:33", "remaining_time": "20:33:22"} +{"current_steps": 1844, "total_steps": 7532, "loss": 0.32493725419044495, "lr": 1.79988892577187e-05, "epoch": 0.48970920196521045, "percentage": 24.48, "elapsed_time": "6:39:47", "remaining_time": "20:33:10"} +{"current_steps": 1845, "total_steps": 7532, "loss": 0.350448876619339, "lr": 1.7996253377366846e-05, "epoch": 0.4899747709467534, "percentage": 24.5, "elapsed_time": "6:40:00", "remaining_time": "20:32:58"} +{"current_steps": 1846, "total_steps": 7532, "loss": 0.32246965169906616, "lr": 1.7993615955431648e-05, "epoch": 0.49024033992829635, "percentage": 24.51, "elapsed_time": "6:40:13", "remaining_time": "20:32:47"} +{"current_steps": 1847, "total_steps": 7532, "loss": 0.302636057138443, "lr": 1.799097699242157e-05, "epoch": 0.4905059089098393, "percentage": 24.52, "elapsed_time": "6:40:27", "remaining_time": "20:32:34"} +{"current_steps": 1848, "total_steps": 7532, "loss": 0.34280693531036377, "lr": 1.7988336488845374e-05, "epoch": 0.4907714778913823, "percentage": 24.54, "elapsed_time": "6:40:40", "remaining_time": "20:32:22"} +{"current_steps": 1849, "total_steps": 7532, "loss": 0.3650673031806946, "lr": 1.7985694445212118e-05, "epoch": 0.49103704687292526, "percentage": 24.55, "elapsed_time": "6:40:54", "remaining_time": "20:32:11"} +{"current_steps": 1850, "total_steps": 7532, "loss": 0.33800822496414185, "lr": 1.798305086203115e-05, "epoch": 0.4913026158544682, "percentage": 24.56, "elapsed_time": "6:41:07", "remaining_time": "20:31:59"} +{"current_steps": 1851, "total_steps": 7532, "loss": 0.31522083282470703, "lr": 1.7980405739812134e-05, "epoch": 0.49156818483601117, "percentage": 24.58, "elapsed_time": "6:41:21", "remaining_time": "20:31:48"} +{"current_steps": 1852, "total_steps": 7532, "loss": 0.3374335765838623, "lr": 1.7977759079065003e-05, "epoch": 0.4918337538175541, "percentage": 24.59, "elapsed_time": "6:41:34", "remaining_time": "20:31:35"} +{"current_steps": 1853, "total_steps": 7532, "loss": 0.33803191781044006, "lr": 1.7975110880300018e-05, "epoch": 0.49209932279909707, "percentage": 24.6, "elapsed_time": "6:41:47", "remaining_time": "20:31:24"} +{"current_steps": 1854, "total_steps": 7532, "loss": 0.37764933705329895, "lr": 1.797246114402771e-05, "epoch": 0.49236489178064, "percentage": 24.61, "elapsed_time": "6:42:00", "remaining_time": "20:31:12"} +{"current_steps": 1855, "total_steps": 7532, "loss": 0.3075840473175049, "lr": 1.796980987075892e-05, "epoch": 0.492630460762183, "percentage": 24.63, "elapsed_time": "6:42:14", "remaining_time": "20:31:00"} +{"current_steps": 1856, "total_steps": 7532, "loss": 0.306305855512619, "lr": 1.7967157061004782e-05, "epoch": 0.4928960297437259, "percentage": 24.64, "elapsed_time": "6:42:27", "remaining_time": "20:30:48"} +{"current_steps": 1857, "total_steps": 7532, "loss": 0.3474302291870117, "lr": 1.796450271527673e-05, "epoch": 0.4931615987252689, "percentage": 24.65, "elapsed_time": "6:42:40", "remaining_time": "20:30:36"} +{"current_steps": 1858, "total_steps": 7532, "loss": 0.31059685349464417, "lr": 1.7961846834086483e-05, "epoch": 0.49342716770681183, "percentage": 24.67, "elapsed_time": "6:42:53", "remaining_time": "20:30:22"} +{"current_steps": 1859, "total_steps": 7532, "loss": 0.346218079328537, "lr": 1.795918941794607e-05, "epoch": 0.4936927366883548, "percentage": 24.68, "elapsed_time": "6:43:07", "remaining_time": "20:30:10"} +{"current_steps": 1860, "total_steps": 7532, "loss": 0.28371214866638184, "lr": 1.7956530467367805e-05, "epoch": 0.49395830566989773, "percentage": 24.69, "elapsed_time": "6:43:20", "remaining_time": "20:29:56"} +{"current_steps": 1861, "total_steps": 7532, "loss": 0.27775150537490845, "lr": 1.7953869982864306e-05, "epoch": 0.4942238746514407, "percentage": 24.71, "elapsed_time": "6:43:33", "remaining_time": "20:29:45"} +{"current_steps": 1862, "total_steps": 7532, "loss": 0.328782856464386, "lr": 1.795120796494848e-05, "epoch": 0.4944894436329837, "percentage": 24.72, "elapsed_time": "6:43:47", "remaining_time": "20:29:33"} +{"current_steps": 1863, "total_steps": 7532, "loss": 0.33220064640045166, "lr": 1.7948544414133534e-05, "epoch": 0.49475501261452665, "percentage": 24.73, "elapsed_time": "6:44:00", "remaining_time": "20:29:22"} +{"current_steps": 1864, "total_steps": 7532, "loss": 0.32681554555892944, "lr": 1.794587933093297e-05, "epoch": 0.4950205815960696, "percentage": 24.75, "elapsed_time": "6:44:13", "remaining_time": "20:29:09"} +{"current_steps": 1865, "total_steps": 7532, "loss": 0.32202866673469543, "lr": 1.7943212715860586e-05, "epoch": 0.49528615057761255, "percentage": 24.76, "elapsed_time": "6:44:27", "remaining_time": "20:28:59"} +{"current_steps": 1866, "total_steps": 7532, "loss": 0.3051350712776184, "lr": 1.7940544569430468e-05, "epoch": 0.4955517195591555, "percentage": 24.77, "elapsed_time": "6:44:40", "remaining_time": "20:28:46"} +{"current_steps": 1867, "total_steps": 7532, "loss": 0.3924705386161804, "lr": 1.793787489215701e-05, "epoch": 0.49581728854069845, "percentage": 24.79, "elapsed_time": "6:44:53", "remaining_time": "20:28:34"} +{"current_steps": 1868, "total_steps": 7532, "loss": 0.30267882347106934, "lr": 1.793520368455489e-05, "epoch": 0.4960828575222414, "percentage": 24.8, "elapsed_time": "6:45:07", "remaining_time": "20:28:23"} +{"current_steps": 1869, "total_steps": 7532, "loss": 0.3150729238986969, "lr": 1.793253094713909e-05, "epoch": 0.49634842650378436, "percentage": 24.81, "elapsed_time": "6:45:20", "remaining_time": "20:28:11"} +{"current_steps": 1870, "total_steps": 7532, "loss": 0.33814147114753723, "lr": 1.7929856680424872e-05, "epoch": 0.4966139954853273, "percentage": 24.83, "elapsed_time": "6:45:34", "remaining_time": "20:28:00"} +{"current_steps": 1871, "total_steps": 7532, "loss": 0.31929856538772583, "lr": 1.7927180884927814e-05, "epoch": 0.49687956446687026, "percentage": 24.84, "elapsed_time": "6:45:48", "remaining_time": "20:27:48"} +{"current_steps": 1872, "total_steps": 7532, "loss": 0.3797461688518524, "lr": 1.7924503561163775e-05, "epoch": 0.4971451334484132, "percentage": 24.85, "elapsed_time": "6:46:01", "remaining_time": "20:27:37"} +{"current_steps": 1873, "total_steps": 7532, "loss": 0.3056377172470093, "lr": 1.792182470964891e-05, "epoch": 0.49741070242995616, "percentage": 24.87, "elapsed_time": "6:46:15", "remaining_time": "20:27:26"} +{"current_steps": 1874, "total_steps": 7532, "loss": 0.3526398539543152, "lr": 1.7919144330899668e-05, "epoch": 0.4976762714114991, "percentage": 24.88, "elapsed_time": "6:46:29", "remaining_time": "20:27:15"} +{"current_steps": 1875, "total_steps": 7532, "loss": 0.3183595538139343, "lr": 1.79164624254328e-05, "epoch": 0.49794184039304207, "percentage": 24.89, "elapsed_time": "6:46:42", "remaining_time": "20:27:03"} +{"current_steps": 1876, "total_steps": 7532, "loss": 0.3604113459587097, "lr": 1.791377899376534e-05, "epoch": 0.4982074093745851, "percentage": 24.91, "elapsed_time": "6:46:56", "remaining_time": "20:26:52"} +{"current_steps": 1877, "total_steps": 7532, "loss": 0.3219848573207855, "lr": 1.7911094036414623e-05, "epoch": 0.498472978356128, "percentage": 24.92, "elapsed_time": "6:47:09", "remaining_time": "20:26:40"} +{"current_steps": 1878, "total_steps": 7532, "loss": 0.28773394227027893, "lr": 1.7908407553898282e-05, "epoch": 0.498738547337671, "percentage": 24.93, "elapsed_time": "6:47:22", "remaining_time": "20:26:28"} +{"current_steps": 1879, "total_steps": 7532, "loss": 0.31453996896743774, "lr": 1.7905719546734233e-05, "epoch": 0.49900411631921393, "percentage": 24.95, "elapsed_time": "6:47:36", "remaining_time": "20:26:18"} +{"current_steps": 1880, "total_steps": 7532, "loss": 0.2947153151035309, "lr": 1.7903030015440696e-05, "epoch": 0.4992696853007569, "percentage": 24.96, "elapsed_time": "6:47:50", "remaining_time": "20:26:06"} +{"current_steps": 1881, "total_steps": 7532, "loss": 0.313723087310791, "lr": 1.7900338960536178e-05, "epoch": 0.49953525428229983, "percentage": 24.97, "elapsed_time": "6:48:04", "remaining_time": "20:25:56"} +{"current_steps": 1882, "total_steps": 7532, "loss": 0.3385108709335327, "lr": 1.7897646382539485e-05, "epoch": 0.4998008232638428, "percentage": 24.99, "elapsed_time": "6:48:17", "remaining_time": "20:25:44"} +{"current_steps": 1883, "total_steps": 7532, "loss": 0.31417039036750793, "lr": 1.7894952281969712e-05, "epoch": 0.5000663922453857, "percentage": 25.0, "elapsed_time": "6:48:31", "remaining_time": "20:25:33"} +{"current_steps": 1884, "total_steps": 7532, "loss": 0.3555717468261719, "lr": 1.7892256659346253e-05, "epoch": 0.5003319612269287, "percentage": 25.01, "elapsed_time": "6:48:44", "remaining_time": "20:25:20"} +{"current_steps": 1885, "total_steps": 7532, "loss": 0.3724518120288849, "lr": 1.7889559515188793e-05, "epoch": 0.5005975302084716, "percentage": 25.03, "elapsed_time": "6:48:57", "remaining_time": "20:25:09"} +{"current_steps": 1886, "total_steps": 7532, "loss": 0.32646167278289795, "lr": 1.7886860850017306e-05, "epoch": 0.5008630991900146, "percentage": 25.04, "elapsed_time": "6:49:10", "remaining_time": "20:24:56"} +{"current_steps": 1887, "total_steps": 7532, "loss": 0.31072959303855896, "lr": 1.7884160664352062e-05, "epoch": 0.5011286681715575, "percentage": 25.05, "elapsed_time": "6:49:24", "remaining_time": "20:24:46"} +{"current_steps": 1888, "total_steps": 7532, "loss": 0.26987242698669434, "lr": 1.7881458958713628e-05, "epoch": 0.5013942371531005, "percentage": 25.07, "elapsed_time": "6:49:37", "remaining_time": "20:24:33"} +{"current_steps": 1889, "total_steps": 7532, "loss": 0.30105817317962646, "lr": 1.787875573362286e-05, "epoch": 0.5016598061346434, "percentage": 25.08, "elapsed_time": "6:49:51", "remaining_time": "20:24:21"} +{"current_steps": 1890, "total_steps": 7532, "loss": 0.31277188658714294, "lr": 1.7876050989600908e-05, "epoch": 0.5019253751161864, "percentage": 25.09, "elapsed_time": "6:50:05", "remaining_time": "20:24:10"} +{"current_steps": 1891, "total_steps": 7532, "loss": 0.31068161129951477, "lr": 1.7873344727169214e-05, "epoch": 0.5021909440977294, "percentage": 25.11, "elapsed_time": "6:50:18", "remaining_time": "20:23:58"} +{"current_steps": 1892, "total_steps": 7532, "loss": 0.3491121530532837, "lr": 1.7870636946849512e-05, "epoch": 0.5024565130792723, "percentage": 25.12, "elapsed_time": "6:50:31", "remaining_time": "20:23:47"} +{"current_steps": 1893, "total_steps": 7532, "loss": 0.3223581612110138, "lr": 1.7867927649163838e-05, "epoch": 0.5027220820608153, "percentage": 25.13, "elapsed_time": "6:50:44", "remaining_time": "20:23:33"} +{"current_steps": 1894, "total_steps": 7532, "loss": 0.345224529504776, "lr": 1.7865216834634506e-05, "epoch": 0.5029876510423582, "percentage": 25.15, "elapsed_time": "6:50:57", "remaining_time": "20:23:19"} +{"current_steps": 1895, "total_steps": 7532, "loss": 0.3408205211162567, "lr": 1.7862504503784123e-05, "epoch": 0.5032532200239012, "percentage": 25.16, "elapsed_time": "6:51:10", "remaining_time": "20:23:06"} +{"current_steps": 1896, "total_steps": 7532, "loss": 0.2680068016052246, "lr": 1.7859790657135608e-05, "epoch": 0.5035187890054441, "percentage": 25.17, "elapsed_time": "6:51:24", "remaining_time": "20:22:55"} +{"current_steps": 1897, "total_steps": 7532, "loss": 0.29733535647392273, "lr": 1.7857075295212148e-05, "epoch": 0.5037843579869872, "percentage": 25.19, "elapsed_time": "6:51:37", "remaining_time": "20:22:43"} +{"current_steps": 1898, "total_steps": 7532, "loss": 0.34820133447647095, "lr": 1.785435841853724e-05, "epoch": 0.5040499269685301, "percentage": 25.2, "elapsed_time": "6:51:51", "remaining_time": "20:22:31"} +{"current_steps": 1899, "total_steps": 7532, "loss": 0.3306594491004944, "lr": 1.785164002763466e-05, "epoch": 0.5043154959500731, "percentage": 25.21, "elapsed_time": "6:52:04", "remaining_time": "20:22:18"} +{"current_steps": 1900, "total_steps": 7532, "loss": 0.3166846036911011, "lr": 1.7848920123028482e-05, "epoch": 0.504581064931616, "percentage": 25.23, "elapsed_time": "6:52:17", "remaining_time": "20:22:07"} +{"current_steps": 1901, "total_steps": 7532, "loss": 0.3406408727169037, "lr": 1.784619870524308e-05, "epoch": 0.504846633913159, "percentage": 25.24, "elapsed_time": "6:52:35", "remaining_time": "20:22:09"} +{"current_steps": 1902, "total_steps": 7532, "loss": 0.36358171701431274, "lr": 1.78434757748031e-05, "epoch": 0.5051122028947019, "percentage": 25.25, "elapsed_time": "6:52:49", "remaining_time": "20:21:57"} +{"current_steps": 1903, "total_steps": 7532, "loss": 0.34045761823654175, "lr": 1.7840751332233498e-05, "epoch": 0.5053777718762449, "percentage": 25.27, "elapsed_time": "6:53:01", "remaining_time": "20:21:43"} +{"current_steps": 1904, "total_steps": 7532, "loss": 0.3442475199699402, "lr": 1.783802537805951e-05, "epoch": 0.5056433408577878, "percentage": 25.28, "elapsed_time": "6:53:15", "remaining_time": "20:21:31"} +{"current_steps": 1905, "total_steps": 7532, "loss": 0.3488585650920868, "lr": 1.7835297912806675e-05, "epoch": 0.5059089098393308, "percentage": 25.29, "elapsed_time": "6:53:27", "remaining_time": "20:21:16"} +{"current_steps": 1906, "total_steps": 7532, "loss": 0.3340107500553131, "lr": 1.7832568937000808e-05, "epoch": 0.5061744788208737, "percentage": 25.31, "elapsed_time": "6:53:40", "remaining_time": "20:21:03"} +{"current_steps": 1907, "total_steps": 7532, "loss": 0.3206177353858948, "lr": 1.7829838451168027e-05, "epoch": 0.5064400478024167, "percentage": 25.32, "elapsed_time": "6:53:54", "remaining_time": "20:20:52"} +{"current_steps": 1908, "total_steps": 7532, "loss": 0.2851010262966156, "lr": 1.782710645583473e-05, "epoch": 0.5067056167839596, "percentage": 25.33, "elapsed_time": "6:54:06", "remaining_time": "20:20:38"} +{"current_steps": 1909, "total_steps": 7532, "loss": 0.31850844621658325, "lr": 1.782437295152763e-05, "epoch": 0.5069711857655026, "percentage": 25.35, "elapsed_time": "6:54:20", "remaining_time": "20:20:26"} +{"current_steps": 1910, "total_steps": 7532, "loss": 0.3343108892440796, "lr": 1.7821637938773704e-05, "epoch": 0.5072367547470455, "percentage": 25.36, "elapsed_time": "6:54:32", "remaining_time": "20:20:12"} +{"current_steps": 1911, "total_steps": 7532, "loss": 0.3423745930194855, "lr": 1.781890141810023e-05, "epoch": 0.5075023237285885, "percentage": 25.37, "elapsed_time": "6:54:46", "remaining_time": "20:19:59"} +{"current_steps": 1912, "total_steps": 7532, "loss": 0.30980780720710754, "lr": 1.7816163390034775e-05, "epoch": 0.5077678927101315, "percentage": 25.39, "elapsed_time": "6:54:59", "remaining_time": "20:19:46"} +{"current_steps": 1913, "total_steps": 7532, "loss": 0.31217479705810547, "lr": 1.7813423855105203e-05, "epoch": 0.5080334616916744, "percentage": 25.4, "elapsed_time": "6:55:12", "remaining_time": "20:19:33"} +{"current_steps": 1914, "total_steps": 7532, "loss": 0.34741947054862976, "lr": 1.7810682813839664e-05, "epoch": 0.5082990306732174, "percentage": 25.41, "elapsed_time": "6:55:25", "remaining_time": "20:19:20"} +{"current_steps": 1915, "total_steps": 7532, "loss": 0.32275527715682983, "lr": 1.7807940266766595e-05, "epoch": 0.5085645996547603, "percentage": 25.42, "elapsed_time": "6:55:38", "remaining_time": "20:19:08"} +{"current_steps": 1916, "total_steps": 7532, "loss": 0.32760411500930786, "lr": 1.7805196214414728e-05, "epoch": 0.5088301686363033, "percentage": 25.44, "elapsed_time": "6:55:50", "remaining_time": "20:18:53"} +{"current_steps": 1917, "total_steps": 7532, "loss": 0.3877720832824707, "lr": 1.7802450657313086e-05, "epoch": 0.5090957376178462, "percentage": 25.45, "elapsed_time": "6:56:03", "remaining_time": "20:18:40"} +{"current_steps": 1918, "total_steps": 7532, "loss": 0.33458876609802246, "lr": 1.779970359599098e-05, "epoch": 0.5093613065993892, "percentage": 25.46, "elapsed_time": "6:56:16", "remaining_time": "20:18:26"} +{"current_steps": 1919, "total_steps": 7532, "loss": 0.30603206157684326, "lr": 1.7796955030978007e-05, "epoch": 0.5096268755809321, "percentage": 25.48, "elapsed_time": "6:56:29", "remaining_time": "20:18:12"} +{"current_steps": 1920, "total_steps": 7532, "loss": 0.2920286953449249, "lr": 1.7794204962804063e-05, "epoch": 0.5098924445624751, "percentage": 25.49, "elapsed_time": "6:56:42", "remaining_time": "20:17:59"} +{"current_steps": 1921, "total_steps": 7532, "loss": 0.32407981157302856, "lr": 1.7791453391999325e-05, "epoch": 0.510158013544018, "percentage": 25.5, "elapsed_time": "6:56:54", "remaining_time": "20:17:44"} +{"current_steps": 1922, "total_steps": 7532, "loss": 0.30423563718795776, "lr": 1.7788700319094263e-05, "epoch": 0.510423582525561, "percentage": 25.52, "elapsed_time": "6:57:07", "remaining_time": "20:17:31"} +{"current_steps": 1923, "total_steps": 7532, "loss": 0.34691399335861206, "lr": 1.7785945744619642e-05, "epoch": 0.5106891515071039, "percentage": 25.53, "elapsed_time": "6:57:20", "remaining_time": "20:17:17"} +{"current_steps": 1924, "total_steps": 7532, "loss": 0.3217603266239166, "lr": 1.7783189669106503e-05, "epoch": 0.5109547204886469, "percentage": 25.54, "elapsed_time": "6:57:33", "remaining_time": "20:17:04"} +{"current_steps": 1925, "total_steps": 7532, "loss": 0.365132212638855, "lr": 1.7780432093086198e-05, "epoch": 0.5112202894701899, "percentage": 25.56, "elapsed_time": "6:57:46", "remaining_time": "20:16:50"} +{"current_steps": 1926, "total_steps": 7532, "loss": 0.29662930965423584, "lr": 1.7777673017090344e-05, "epoch": 0.5114858584517329, "percentage": 25.57, "elapsed_time": "6:57:59", "remaining_time": "20:16:38"} +{"current_steps": 1927, "total_steps": 7532, "loss": 0.3324819803237915, "lr": 1.7774912441650857e-05, "epoch": 0.5117514274332758, "percentage": 25.58, "elapsed_time": "6:58:12", "remaining_time": "20:16:24"} +{"current_steps": 1928, "total_steps": 7532, "loss": 0.29331067204475403, "lr": 1.7772150367299953e-05, "epoch": 0.5120169964148188, "percentage": 25.6, "elapsed_time": "6:58:25", "remaining_time": "20:16:11"} +{"current_steps": 1929, "total_steps": 7532, "loss": 0.3158259987831116, "lr": 1.7769386794570117e-05, "epoch": 0.5122825653963617, "percentage": 25.61, "elapsed_time": "6:58:38", "remaining_time": "20:15:58"} +{"current_steps": 1930, "total_steps": 7532, "loss": 0.2824791967868805, "lr": 1.7766621723994145e-05, "epoch": 0.5125481343779047, "percentage": 25.62, "elapsed_time": "6:58:51", "remaining_time": "20:15:45"} +{"current_steps": 1931, "total_steps": 7532, "loss": 0.2690732777118683, "lr": 1.7763855156105097e-05, "epoch": 0.5128137033594476, "percentage": 25.64, "elapsed_time": "6:59:03", "remaining_time": "20:15:31"} +{"current_steps": 1932, "total_steps": 7532, "loss": 0.31360942125320435, "lr": 1.7761087091436346e-05, "epoch": 0.5130792723409906, "percentage": 25.65, "elapsed_time": "6:59:16", "remaining_time": "20:15:18"} +{"current_steps": 1933, "total_steps": 7532, "loss": 0.28334349393844604, "lr": 1.7758317530521535e-05, "epoch": 0.5133448413225336, "percentage": 25.66, "elapsed_time": "6:59:29", "remaining_time": "20:15:03"} +{"current_steps": 1934, "total_steps": 7532, "loss": 0.3857404589653015, "lr": 1.7755546473894604e-05, "epoch": 0.5136104103040765, "percentage": 25.68, "elapsed_time": "6:59:42", "remaining_time": "20:14:50"} +{"current_steps": 1935, "total_steps": 7532, "loss": 0.2852492332458496, "lr": 1.7752773922089784e-05, "epoch": 0.5138759792856195, "percentage": 25.69, "elapsed_time": "6:59:54", "remaining_time": "20:14:36"} +{"current_steps": 1936, "total_steps": 7532, "loss": 0.2959831953048706, "lr": 1.7749999875641585e-05, "epoch": 0.5141415482671624, "percentage": 25.7, "elapsed_time": "7:00:08", "remaining_time": "20:14:23"} +{"current_steps": 1937, "total_steps": 7532, "loss": 0.3129635453224182, "lr": 1.7747224335084815e-05, "epoch": 0.5144071172487054, "percentage": 25.72, "elapsed_time": "7:00:20", "remaining_time": "20:14:09"} +{"current_steps": 1938, "total_steps": 7532, "loss": 0.31391531229019165, "lr": 1.774444730095456e-05, "epoch": 0.5146726862302483, "percentage": 25.73, "elapsed_time": "7:00:33", "remaining_time": "20:13:55"} +{"current_steps": 1939, "total_steps": 7532, "loss": 0.30274757742881775, "lr": 1.7741668773786202e-05, "epoch": 0.5149382552117913, "percentage": 25.74, "elapsed_time": "7:00:46", "remaining_time": "20:13:42"} +{"current_steps": 1940, "total_steps": 7532, "loss": 0.29162222146987915, "lr": 1.7738888754115413e-05, "epoch": 0.5152038241933342, "percentage": 25.76, "elapsed_time": "7:00:58", "remaining_time": "20:13:27"} +{"current_steps": 1941, "total_steps": 7532, "loss": 0.30358970165252686, "lr": 1.7736107242478143e-05, "epoch": 0.5154693931748772, "percentage": 25.77, "elapsed_time": "7:01:11", "remaining_time": "20:13:14"} +{"current_steps": 1942, "total_steps": 7532, "loss": 0.32268065214157104, "lr": 1.7733324239410634e-05, "epoch": 0.5157349621564201, "percentage": 25.78, "elapsed_time": "7:01:24", "remaining_time": "20:12:59"} +{"current_steps": 1943, "total_steps": 7532, "loss": 0.31925222277641296, "lr": 1.7730539745449417e-05, "epoch": 0.5160005311379631, "percentage": 25.8, "elapsed_time": "7:01:37", "remaining_time": "20:12:46"} +{"current_steps": 1944, "total_steps": 7532, "loss": 0.32883748412132263, "lr": 1.7727753761131312e-05, "epoch": 0.516266100119506, "percentage": 25.81, "elapsed_time": "7:01:49", "remaining_time": "20:12:32"} +{"current_steps": 1945, "total_steps": 7532, "loss": 0.3212829530239105, "lr": 1.7724966286993425e-05, "epoch": 0.516531669101049, "percentage": 25.82, "elapsed_time": "7:02:02", "remaining_time": "20:12:18"} +{"current_steps": 1946, "total_steps": 7532, "loss": 0.32909759879112244, "lr": 1.772217732357314e-05, "epoch": 0.5167972380825919, "percentage": 25.84, "elapsed_time": "7:02:15", "remaining_time": "20:12:05"} +{"current_steps": 1947, "total_steps": 7532, "loss": 0.3451213538646698, "lr": 1.7719386871408147e-05, "epoch": 0.5170628070641349, "percentage": 25.85, "elapsed_time": "7:02:29", "remaining_time": "20:11:55"} +{"current_steps": 1948, "total_steps": 7532, "loss": 0.318422794342041, "lr": 1.7716594931036402e-05, "epoch": 0.5173283760456778, "percentage": 25.86, "elapsed_time": "7:02:41", "remaining_time": "20:11:40"} +{"current_steps": 1949, "total_steps": 7532, "loss": 0.3165292739868164, "lr": 1.7713801502996166e-05, "epoch": 0.5175939450272208, "percentage": 25.88, "elapsed_time": "7:02:53", "remaining_time": "20:11:24"} +{"current_steps": 1950, "total_steps": 7532, "loss": 0.3116700351238251, "lr": 1.7711006587825975e-05, "epoch": 0.5178595140087637, "percentage": 25.89, "elapsed_time": "7:03:06", "remaining_time": "20:11:10"} +{"current_steps": 1951, "total_steps": 7532, "loss": 0.32102686166763306, "lr": 1.7708210186064656e-05, "epoch": 0.5181250829903067, "percentage": 25.9, "elapsed_time": "7:03:18", "remaining_time": "20:10:55"} +{"current_steps": 1952, "total_steps": 7532, "loss": 0.33025500178337097, "lr": 1.7705412298251323e-05, "epoch": 0.5183906519718496, "percentage": 25.92, "elapsed_time": "7:03:31", "remaining_time": "20:10:40"} +{"current_steps": 1953, "total_steps": 7532, "loss": 0.36113062500953674, "lr": 1.7702612924925377e-05, "epoch": 0.5186562209533926, "percentage": 25.93, "elapsed_time": "7:03:43", "remaining_time": "20:10:25"} +{"current_steps": 1954, "total_steps": 7532, "loss": 0.3092479109764099, "lr": 1.7699812066626503e-05, "epoch": 0.5189217899349357, "percentage": 25.94, "elapsed_time": "7:03:56", "remaining_time": "20:10:11"} +{"current_steps": 1955, "total_steps": 7532, "loss": 0.3389117419719696, "lr": 1.769700972389467e-05, "epoch": 0.5191873589164786, "percentage": 25.96, "elapsed_time": "7:04:08", "remaining_time": "20:09:55"} +{"current_steps": 1956, "total_steps": 7532, "loss": 0.3225803077220917, "lr": 1.7694205897270147e-05, "epoch": 0.5194529278980216, "percentage": 25.97, "elapsed_time": "7:04:20", "remaining_time": "20:09:41"} +{"current_steps": 1957, "total_steps": 7532, "loss": 0.3226786255836487, "lr": 1.7691400587293467e-05, "epoch": 0.5197184968795645, "percentage": 25.98, "elapsed_time": "7:04:33", "remaining_time": "20:09:26"} +{"current_steps": 1958, "total_steps": 7532, "loss": 0.27708399295806885, "lr": 1.7688593794505466e-05, "epoch": 0.5199840658611075, "percentage": 26.0, "elapsed_time": "7:04:46", "remaining_time": "20:09:14"} +{"current_steps": 1959, "total_steps": 7532, "loss": 0.36100950837135315, "lr": 1.768578551944726e-05, "epoch": 0.5202496348426504, "percentage": 26.01, "elapsed_time": "7:04:59", "remaining_time": "20:09:00"} +{"current_steps": 1960, "total_steps": 7532, "loss": 0.3138211965560913, "lr": 1.768297576266025e-05, "epoch": 0.5205152038241934, "percentage": 26.02, "elapsed_time": "7:05:11", "remaining_time": "20:08:46"} +{"current_steps": 1961, "total_steps": 7532, "loss": 0.33959656953811646, "lr": 1.7680164524686128e-05, "epoch": 0.5207807728057363, "percentage": 26.04, "elapsed_time": "7:05:25", "remaining_time": "20:08:33"} +{"current_steps": 1962, "total_steps": 7532, "loss": 0.3093605637550354, "lr": 1.7677351806066863e-05, "epoch": 0.5210463417872793, "percentage": 26.05, "elapsed_time": "7:05:37", "remaining_time": "20:08:19"} +{"current_steps": 1963, "total_steps": 7532, "loss": 0.3098641633987427, "lr": 1.7674537607344717e-05, "epoch": 0.5213119107688222, "percentage": 26.06, "elapsed_time": "7:05:50", "remaining_time": "20:08:06"} +{"current_steps": 1964, "total_steps": 7532, "loss": 0.35172683000564575, "lr": 1.767172192906223e-05, "epoch": 0.5215774797503652, "percentage": 26.08, "elapsed_time": "7:06:02", "remaining_time": "20:07:51"} +{"current_steps": 1965, "total_steps": 7532, "loss": 0.3535798192024231, "lr": 1.7668904771762242e-05, "epoch": 0.5218430487319081, "percentage": 26.09, "elapsed_time": "7:06:15", "remaining_time": "20:07:36"} +{"current_steps": 1966, "total_steps": 7532, "loss": 0.36183854937553406, "lr": 1.766608613598785e-05, "epoch": 0.5221086177134511, "percentage": 26.1, "elapsed_time": "7:06:27", "remaining_time": "20:07:21"} +{"current_steps": 1967, "total_steps": 7532, "loss": 0.35995131731033325, "lr": 1.7663266022282473e-05, "epoch": 0.522374186694994, "percentage": 26.12, "elapsed_time": "7:06:40", "remaining_time": "20:07:08"} +{"current_steps": 1968, "total_steps": 7532, "loss": 0.38672733306884766, "lr": 1.766044443118978e-05, "epoch": 0.522639755676537, "percentage": 26.13, "elapsed_time": "7:06:53", "remaining_time": "20:06:55"} +{"current_steps": 1969, "total_steps": 7532, "loss": 0.3389524221420288, "lr": 1.765762136325375e-05, "epoch": 0.5229053246580799, "percentage": 26.14, "elapsed_time": "7:07:06", "remaining_time": "20:06:42"} +{"current_steps": 1970, "total_steps": 7532, "loss": 0.3325779139995575, "lr": 1.7654796819018635e-05, "epoch": 0.5231708936396229, "percentage": 26.16, "elapsed_time": "7:07:19", "remaining_time": "20:06:29"} +{"current_steps": 1971, "total_steps": 7532, "loss": 0.328407347202301, "lr": 1.7651970799028976e-05, "epoch": 0.5234364626211658, "percentage": 26.17, "elapsed_time": "7:07:33", "remaining_time": "20:06:18"} +{"current_steps": 1972, "total_steps": 7532, "loss": 0.3050537705421448, "lr": 1.764914330382959e-05, "epoch": 0.5237020316027088, "percentage": 26.18, "elapsed_time": "7:07:46", "remaining_time": "20:06:05"} +{"current_steps": 1973, "total_steps": 7532, "loss": 0.35500285029411316, "lr": 1.7646314333965588e-05, "epoch": 0.5239676005842517, "percentage": 26.19, "elapsed_time": "7:07:59", "remaining_time": "20:05:54"} +{"current_steps": 1974, "total_steps": 7532, "loss": 0.30319780111312866, "lr": 1.7643483889982364e-05, "epoch": 0.5242331695657947, "percentage": 26.21, "elapsed_time": "7:08:13", "remaining_time": "20:05:42"} +{"current_steps": 1975, "total_steps": 7532, "loss": 0.315757691860199, "lr": 1.7640651972425592e-05, "epoch": 0.5244987385473376, "percentage": 26.22, "elapsed_time": "7:08:26", "remaining_time": "20:05:30"} +{"current_steps": 1976, "total_steps": 7532, "loss": 0.28562331199645996, "lr": 1.7637818581841234e-05, "epoch": 0.5247643075288806, "percentage": 26.23, "elapsed_time": "7:08:39", "remaining_time": "20:05:16"} +{"current_steps": 1977, "total_steps": 7532, "loss": 0.29798296093940735, "lr": 1.763498371877553e-05, "epoch": 0.5250298765104235, "percentage": 26.25, "elapsed_time": "7:08:51", "remaining_time": "20:05:01"} +{"current_steps": 1978, "total_steps": 7532, "loss": 0.2923639416694641, "lr": 1.763214738377501e-05, "epoch": 0.5252954454919665, "percentage": 26.26, "elapsed_time": "7:09:05", "remaining_time": "20:04:49"} +{"current_steps": 1979, "total_steps": 7532, "loss": 0.2858009934425354, "lr": 1.7629309577386492e-05, "epoch": 0.5255610144735094, "percentage": 26.27, "elapsed_time": "7:09:17", "remaining_time": "20:04:35"} +{"current_steps": 1980, "total_steps": 7532, "loss": 0.3615952134132385, "lr": 1.7626470300157064e-05, "epoch": 0.5258265834550524, "percentage": 26.29, "elapsed_time": "7:09:30", "remaining_time": "20:04:22"} +{"current_steps": 1981, "total_steps": 7532, "loss": 0.36142098903656006, "lr": 1.762362955263411e-05, "epoch": 0.5260921524365954, "percentage": 26.3, "elapsed_time": "7:09:43", "remaining_time": "20:04:07"} +{"current_steps": 1982, "total_steps": 7532, "loss": 0.3335961699485779, "lr": 1.762078733536529e-05, "epoch": 0.5263577214181384, "percentage": 26.31, "elapsed_time": "7:09:56", "remaining_time": "20:03:55"} +{"current_steps": 1983, "total_steps": 7532, "loss": 0.34549272060394287, "lr": 1.761794364889855e-05, "epoch": 0.5266232903996814, "percentage": 26.33, "elapsed_time": "7:10:09", "remaining_time": "20:03:41"} +{"current_steps": 1984, "total_steps": 7532, "loss": 0.3177812993526459, "lr": 1.761509849378212e-05, "epoch": 0.5268888593812243, "percentage": 26.34, "elapsed_time": "7:10:22", "remaining_time": "20:03:28"} +{"current_steps": 1985, "total_steps": 7532, "loss": 0.33191388845443726, "lr": 1.7612251870564515e-05, "epoch": 0.5271544283627673, "percentage": 26.35, "elapsed_time": "7:10:34", "remaining_time": "20:03:14"} +{"current_steps": 1986, "total_steps": 7532, "loss": 0.30732038617134094, "lr": 1.7609403779794523e-05, "epoch": 0.5274199973443102, "percentage": 26.37, "elapsed_time": "7:10:48", "remaining_time": "20:03:02"} +{"current_steps": 1987, "total_steps": 7532, "loss": 0.33012068271636963, "lr": 1.7606554222021226e-05, "epoch": 0.5276855663258532, "percentage": 26.38, "elapsed_time": "7:11:00", "remaining_time": "20:02:48"} +{"current_steps": 1988, "total_steps": 7532, "loss": 0.3396066427230835, "lr": 1.760370319779399e-05, "epoch": 0.5279511353073961, "percentage": 26.39, "elapsed_time": "7:11:14", "remaining_time": "20:02:36"} +{"current_steps": 1989, "total_steps": 7532, "loss": 0.29053401947021484, "lr": 1.7600850707662454e-05, "epoch": 0.5282167042889391, "percentage": 26.41, "elapsed_time": "7:11:26", "remaining_time": "20:02:22"} +{"current_steps": 1990, "total_steps": 7532, "loss": 0.32927206158638, "lr": 1.7597996752176545e-05, "epoch": 0.528482273270482, "percentage": 26.42, "elapsed_time": "7:11:39", "remaining_time": "20:02:07"} +{"current_steps": 1991, "total_steps": 7532, "loss": 0.309224933385849, "lr": 1.759514133188647e-05, "epoch": 0.528747842252025, "percentage": 26.43, "elapsed_time": "7:11:52", "remaining_time": "20:01:54"} +{"current_steps": 1992, "total_steps": 7532, "loss": 0.31973862648010254, "lr": 1.7592284447342725e-05, "epoch": 0.5290134112335679, "percentage": 26.45, "elapsed_time": "7:12:05", "remaining_time": "20:01:40"} +{"current_steps": 1993, "total_steps": 7532, "loss": 0.3331080377101898, "lr": 1.758942609909608e-05, "epoch": 0.5292789802151109, "percentage": 26.46, "elapsed_time": "7:12:18", "remaining_time": "20:01:27"} +{"current_steps": 1994, "total_steps": 7532, "loss": 0.32755160331726074, "lr": 1.7586566287697592e-05, "epoch": 0.5295445491966538, "percentage": 26.47, "elapsed_time": "7:12:30", "remaining_time": "20:01:14"} +{"current_steps": 1995, "total_steps": 7532, "loss": 0.31942498683929443, "lr": 1.7583705013698602e-05, "epoch": 0.5298101181781968, "percentage": 26.49, "elapsed_time": "7:12:43", "remaining_time": "20:01:01"} +{"current_steps": 1996, "total_steps": 7532, "loss": 0.3199199438095093, "lr": 1.7580842277650723e-05, "epoch": 0.5300756871597397, "percentage": 26.5, "elapsed_time": "7:12:56", "remaining_time": "20:00:46"} +{"current_steps": 1997, "total_steps": 7532, "loss": 0.28153708577156067, "lr": 1.7577978080105864e-05, "epoch": 0.5303412561412827, "percentage": 26.51, "elapsed_time": "7:13:09", "remaining_time": "20:00:33"} +{"current_steps": 1998, "total_steps": 7532, "loss": 0.3050921559333801, "lr": 1.7575112421616203e-05, "epoch": 0.5306068251228256, "percentage": 26.53, "elapsed_time": "7:13:21", "remaining_time": "20:00:19"} +{"current_steps": 1999, "total_steps": 7532, "loss": 0.3242149353027344, "lr": 1.7572245302734208e-05, "epoch": 0.5308723941043686, "percentage": 26.54, "elapsed_time": "7:13:35", "remaining_time": "20:00:06"} +{"current_steps": 2000, "total_steps": 7532, "loss": 0.29947227239608765, "lr": 1.7569376724012622e-05, "epoch": 0.5311379630859115, "percentage": 26.55, "elapsed_time": "7:13:47", "remaining_time": "19:59:52"} +{"current_steps": 2001, "total_steps": 7532, "loss": 0.3229755163192749, "lr": 1.756650668600448e-05, "epoch": 0.5314035320674545, "percentage": 26.57, "elapsed_time": "7:14:06", "remaining_time": "19:59:56"} +{"current_steps": 2002, "total_steps": 7532, "loss": 0.3544544577598572, "lr": 1.7563635189263086e-05, "epoch": 0.5316691010489975, "percentage": 26.58, "elapsed_time": "7:14:19", "remaining_time": "19:59:41"} +{"current_steps": 2003, "total_steps": 7532, "loss": 0.32807621359825134, "lr": 1.756076223434203e-05, "epoch": 0.5319346700305404, "percentage": 26.59, "elapsed_time": "7:14:31", "remaining_time": "19:59:27"} +{"current_steps": 2004, "total_steps": 7532, "loss": 0.3057190477848053, "lr": 1.7557887821795192e-05, "epoch": 0.5322002390120834, "percentage": 26.61, "elapsed_time": "7:14:44", "remaining_time": "19:59:12"} +{"current_steps": 2005, "total_steps": 7532, "loss": 0.29419198632240295, "lr": 1.7555011952176716e-05, "epoch": 0.5324658079936263, "percentage": 26.62, "elapsed_time": "7:14:57", "remaining_time": "19:59:00"} +{"current_steps": 2006, "total_steps": 7532, "loss": 0.3232089877128601, "lr": 1.755213462604104e-05, "epoch": 0.5327313769751693, "percentage": 26.63, "elapsed_time": "7:15:09", "remaining_time": "19:58:45"} +{"current_steps": 2007, "total_steps": 7532, "loss": 0.29784274101257324, "lr": 1.7549255843942875e-05, "epoch": 0.5329969459567122, "percentage": 26.65, "elapsed_time": "7:15:22", "remaining_time": "19:58:31"} +{"current_steps": 2008, "total_steps": 7532, "loss": 0.31421899795532227, "lr": 1.7546375606437216e-05, "epoch": 0.5332625149382552, "percentage": 26.66, "elapsed_time": "7:15:34", "remaining_time": "19:58:15"} +{"current_steps": 2009, "total_steps": 7532, "loss": 0.30681121349334717, "lr": 1.7543493914079345e-05, "epoch": 0.5335280839197981, "percentage": 26.67, "elapsed_time": "7:15:46", "remaining_time": "19:58:01"} +{"current_steps": 2010, "total_steps": 7532, "loss": 0.3114027976989746, "lr": 1.7540610767424813e-05, "epoch": 0.5337936529013412, "percentage": 26.69, "elapsed_time": "7:15:59", "remaining_time": "19:57:47"} +{"current_steps": 2011, "total_steps": 7532, "loss": 0.3030378520488739, "lr": 1.753772616702946e-05, "epoch": 0.5340592218828841, "percentage": 26.7, "elapsed_time": "7:16:12", "remaining_time": "19:57:32"} +{"current_steps": 2012, "total_steps": 7532, "loss": 0.30272024869918823, "lr": 1.75348401134494e-05, "epoch": 0.5343247908644271, "percentage": 26.71, "elapsed_time": "7:16:24", "remaining_time": "19:57:18"} +{"current_steps": 2013, "total_steps": 7532, "loss": 0.35117241740226746, "lr": 1.7531952607241033e-05, "epoch": 0.53459035984597, "percentage": 26.73, "elapsed_time": "7:16:37", "remaining_time": "19:57:03"} +{"current_steps": 2014, "total_steps": 7532, "loss": 0.297889769077301, "lr": 1.7529063648961035e-05, "epoch": 0.534855928827513, "percentage": 26.74, "elapsed_time": "7:16:49", "remaining_time": "19:56:49"} +{"current_steps": 2015, "total_steps": 7532, "loss": 0.32858210802078247, "lr": 1.752617323916636e-05, "epoch": 0.5351214978090559, "percentage": 26.75, "elapsed_time": "7:17:02", "remaining_time": "19:56:35"} +{"current_steps": 2016, "total_steps": 7532, "loss": 0.3095484673976898, "lr": 1.7523281378414246e-05, "epoch": 0.5353870667905989, "percentage": 26.77, "elapsed_time": "7:17:14", "remaining_time": "19:56:21"} +{"current_steps": 2017, "total_steps": 7532, "loss": 0.34490731358528137, "lr": 1.752038806726222e-05, "epoch": 0.5356526357721418, "percentage": 26.78, "elapsed_time": "7:17:26", "remaining_time": "19:56:05"} +{"current_steps": 2018, "total_steps": 7532, "loss": 0.35144859552383423, "lr": 1.751749330626806e-05, "epoch": 0.5359182047536848, "percentage": 26.79, "elapsed_time": "7:17:39", "remaining_time": "19:55:51"} +{"current_steps": 2019, "total_steps": 7532, "loss": 0.26337549090385437, "lr": 1.751459709598985e-05, "epoch": 0.5361837737352277, "percentage": 26.81, "elapsed_time": "7:17:51", "remaining_time": "19:55:36"} +{"current_steps": 2020, "total_steps": 7532, "loss": 0.3235297203063965, "lr": 1.7511699436985952e-05, "epoch": 0.5364493427167707, "percentage": 26.82, "elapsed_time": "7:18:03", "remaining_time": "19:55:20"} +{"current_steps": 2021, "total_steps": 7532, "loss": 0.35195302963256836, "lr": 1.7508800329814993e-05, "epoch": 0.5367149116983136, "percentage": 26.83, "elapsed_time": "7:18:16", "remaining_time": "19:55:06"} +{"current_steps": 2022, "total_steps": 7532, "loss": 0.3226467967033386, "lr": 1.7505899775035887e-05, "epoch": 0.5369804806798566, "percentage": 26.85, "elapsed_time": "7:18:28", "remaining_time": "19:54:51"} +{"current_steps": 2023, "total_steps": 7532, "loss": 0.30616605281829834, "lr": 1.750299777320783e-05, "epoch": 0.5372460496613995, "percentage": 26.86, "elapsed_time": "7:18:41", "remaining_time": "19:54:37"} +{"current_steps": 2024, "total_steps": 7532, "loss": 0.3007400333881378, "lr": 1.7500094324890294e-05, "epoch": 0.5375116186429425, "percentage": 26.87, "elapsed_time": "7:18:53", "remaining_time": "19:54:22"} +{"current_steps": 2025, "total_steps": 7532, "loss": 0.35409432649612427, "lr": 1.7497189430643025e-05, "epoch": 0.5377771876244855, "percentage": 26.89, "elapsed_time": "7:19:06", "remaining_time": "19:54:08"} +{"current_steps": 2026, "total_steps": 7532, "loss": 0.33718281984329224, "lr": 1.7494283091026053e-05, "epoch": 0.5380427566060284, "percentage": 26.9, "elapsed_time": "7:19:18", "remaining_time": "19:53:53"} +{"current_steps": 2027, "total_steps": 7532, "loss": 0.3589650094509125, "lr": 1.749137530659969e-05, "epoch": 0.5383083255875714, "percentage": 26.91, "elapsed_time": "7:19:30", "remaining_time": "19:53:39"} +{"current_steps": 2028, "total_steps": 7532, "loss": 0.35314273834228516, "lr": 1.7488466077924525e-05, "epoch": 0.5385738945691143, "percentage": 26.93, "elapsed_time": "7:19:43", "remaining_time": "19:53:24"} +{"current_steps": 2029, "total_steps": 7532, "loss": 0.28393587470054626, "lr": 1.7485555405561412e-05, "epoch": 0.5388394635506573, "percentage": 26.94, "elapsed_time": "7:19:55", "remaining_time": "19:53:10"} +{"current_steps": 2030, "total_steps": 7532, "loss": 0.3262496292591095, "lr": 1.7482643290071503e-05, "epoch": 0.5391050325322002, "percentage": 26.95, "elapsed_time": "7:20:08", "remaining_time": "19:52:54"} +{"current_steps": 2031, "total_steps": 7532, "loss": 0.3549670875072479, "lr": 1.7479729732016218e-05, "epoch": 0.5393706015137432, "percentage": 26.96, "elapsed_time": "7:20:20", "remaining_time": "19:52:40"} +{"current_steps": 2032, "total_steps": 7532, "loss": 0.30668947100639343, "lr": 1.7476814731957253e-05, "epoch": 0.5396361704952861, "percentage": 26.98, "elapsed_time": "7:20:33", "remaining_time": "19:52:26"} +{"current_steps": 2033, "total_steps": 7532, "loss": 0.2942228317260742, "lr": 1.747389829045659e-05, "epoch": 0.5399017394768291, "percentage": 26.99, "elapsed_time": "7:20:45", "remaining_time": "19:52:12"} +{"current_steps": 2034, "total_steps": 7532, "loss": 0.3166583478450775, "lr": 1.7470980408076484e-05, "epoch": 0.540167308458372, "percentage": 27.0, "elapsed_time": "7:20:58", "remaining_time": "19:51:58"} +{"current_steps": 2035, "total_steps": 7532, "loss": 0.35149675607681274, "lr": 1.7468061085379467e-05, "epoch": 0.540432877439915, "percentage": 27.02, "elapsed_time": "7:21:10", "remaining_time": "19:51:42"} +{"current_steps": 2036, "total_steps": 7532, "loss": 0.32645004987716675, "lr": 1.7465140322928353e-05, "epoch": 0.5406984464214579, "percentage": 27.03, "elapsed_time": "7:21:23", "remaining_time": "19:51:28"} +{"current_steps": 2037, "total_steps": 7532, "loss": 0.3078027367591858, "lr": 1.7462218121286224e-05, "epoch": 0.5409640154030009, "percentage": 27.04, "elapsed_time": "7:21:35", "remaining_time": "19:51:14"} +{"current_steps": 2038, "total_steps": 7532, "loss": 0.28726300597190857, "lr": 1.7459294481016452e-05, "epoch": 0.5412295843845439, "percentage": 27.06, "elapsed_time": "7:21:48", "remaining_time": "19:51:00"} +{"current_steps": 2039, "total_steps": 7532, "loss": 0.29330572485923767, "lr": 1.7456369402682675e-05, "epoch": 0.5414951533660869, "percentage": 27.07, "elapsed_time": "7:22:00", "remaining_time": "19:50:45"} +{"current_steps": 2040, "total_steps": 7532, "loss": 0.3151019215583801, "lr": 1.7453442886848818e-05, "epoch": 0.5417607223476298, "percentage": 27.08, "elapsed_time": "7:22:13", "remaining_time": "19:50:31"} +{"current_steps": 2041, "total_steps": 7532, "loss": 0.3267561197280884, "lr": 1.745051493407908e-05, "epoch": 0.5420262913291728, "percentage": 27.1, "elapsed_time": "7:22:25", "remaining_time": "19:50:15"} +{"current_steps": 2042, "total_steps": 7532, "loss": 0.2834410071372986, "lr": 1.7447585544937933e-05, "epoch": 0.5422918603107157, "percentage": 27.11, "elapsed_time": "7:22:37", "remaining_time": "19:50:01"} +{"current_steps": 2043, "total_steps": 7532, "loss": 0.29896080493927, "lr": 1.7444654719990128e-05, "epoch": 0.5425574292922587, "percentage": 27.12, "elapsed_time": "7:22:50", "remaining_time": "19:49:46"} +{"current_steps": 2044, "total_steps": 7532, "loss": 0.3084600865840912, "lr": 1.7441722459800695e-05, "epoch": 0.5428229982738016, "percentage": 27.14, "elapsed_time": "7:23:02", "remaining_time": "19:49:32"} +{"current_steps": 2045, "total_steps": 7532, "loss": 0.3178163170814514, "lr": 1.743878876493494e-05, "epoch": 0.5430885672553446, "percentage": 27.15, "elapsed_time": "7:23:15", "remaining_time": "19:49:18"} +{"current_steps": 2046, "total_steps": 7532, "loss": 0.32886385917663574, "lr": 1.743585363595844e-05, "epoch": 0.5433541362368876, "percentage": 27.16, "elapsed_time": "7:23:28", "remaining_time": "19:49:07"} +{"current_steps": 2047, "total_steps": 7532, "loss": 0.31810784339904785, "lr": 1.743291707343706e-05, "epoch": 0.5436197052184305, "percentage": 27.18, "elapsed_time": "7:23:41", "remaining_time": "19:48:54"} +{"current_steps": 2048, "total_steps": 7532, "loss": 0.3003198504447937, "lr": 1.7429979077936928e-05, "epoch": 0.5438852741999735, "percentage": 27.19, "elapsed_time": "7:23:54", "remaining_time": "19:48:41"} +{"current_steps": 2049, "total_steps": 7532, "loss": 0.33889323472976685, "lr": 1.7427039650024462e-05, "epoch": 0.5441508431815164, "percentage": 27.2, "elapsed_time": "7:24:08", "remaining_time": "19:48:29"} +{"current_steps": 2050, "total_steps": 7532, "loss": 0.3238763213157654, "lr": 1.7424098790266343e-05, "epoch": 0.5444164121630594, "percentage": 27.22, "elapsed_time": "7:24:21", "remaining_time": "19:48:16"} +{"current_steps": 2051, "total_steps": 7532, "loss": 0.34304776787757874, "lr": 1.742115649922954e-05, "epoch": 0.5446819811446023, "percentage": 27.23, "elapsed_time": "7:24:35", "remaining_time": "19:48:05"} +{"current_steps": 2052, "total_steps": 7532, "loss": 0.31528347730636597, "lr": 1.741821277748128e-05, "epoch": 0.5449475501261453, "percentage": 27.24, "elapsed_time": "7:24:48", "remaining_time": "19:47:52"} +{"current_steps": 2053, "total_steps": 7532, "loss": 0.2992726266384125, "lr": 1.7415267625589094e-05, "epoch": 0.5452131191076882, "percentage": 27.26, "elapsed_time": "7:25:01", "remaining_time": "19:47:41"} +{"current_steps": 2054, "total_steps": 7532, "loss": 0.31706419587135315, "lr": 1.741232104412076e-05, "epoch": 0.5454786880892312, "percentage": 27.27, "elapsed_time": "7:25:14", "remaining_time": "19:47:26"} +{"current_steps": 2055, "total_steps": 7532, "loss": 0.2887676954269409, "lr": 1.7409373033644355e-05, "epoch": 0.5457442570707741, "percentage": 27.28, "elapsed_time": "7:25:27", "remaining_time": "19:47:13"} +{"current_steps": 2056, "total_steps": 7532, "loss": 0.3410964906215668, "lr": 1.740642359472821e-05, "epoch": 0.5460098260523171, "percentage": 27.3, "elapsed_time": "7:25:39", "remaining_time": "19:46:59"} +{"current_steps": 2057, "total_steps": 7532, "loss": 0.3711693286895752, "lr": 1.740347272794095e-05, "epoch": 0.54627539503386, "percentage": 27.31, "elapsed_time": "7:25:52", "remaining_time": "19:46:46"} +{"current_steps": 2058, "total_steps": 7532, "loss": 0.3512499928474426, "lr": 1.7400520433851457e-05, "epoch": 0.546540964015403, "percentage": 27.32, "elapsed_time": "7:26:05", "remaining_time": "19:46:31"} +{"current_steps": 2059, "total_steps": 7532, "loss": 0.3136678636074066, "lr": 1.739756671302891e-05, "epoch": 0.5468065329969459, "percentage": 27.34, "elapsed_time": "7:26:18", "remaining_time": "19:46:18"} +{"current_steps": 2060, "total_steps": 7532, "loss": 0.2983730435371399, "lr": 1.7394611566042748e-05, "epoch": 0.5470721019784889, "percentage": 27.35, "elapsed_time": "7:26:30", "remaining_time": "19:46:04"} +{"current_steps": 2061, "total_steps": 7532, "loss": 0.36603933572769165, "lr": 1.7391654993462686e-05, "epoch": 0.5473376709600318, "percentage": 27.36, "elapsed_time": "7:26:43", "remaining_time": "19:45:50"} +{"current_steps": 2062, "total_steps": 7532, "loss": 0.3651789128780365, "lr": 1.7388696995858717e-05, "epoch": 0.5476032399415748, "percentage": 27.38, "elapsed_time": "7:26:55", "remaining_time": "19:45:36"} +{"current_steps": 2063, "total_steps": 7532, "loss": 0.30580615997314453, "lr": 1.7385737573801108e-05, "epoch": 0.5478688089231177, "percentage": 27.39, "elapsed_time": "7:27:08", "remaining_time": "19:45:21"} +{"current_steps": 2064, "total_steps": 7532, "loss": 0.2630755305290222, "lr": 1.7382776727860406e-05, "epoch": 0.5481343779046607, "percentage": 27.4, "elapsed_time": "7:27:21", "remaining_time": "19:45:08"} +{"current_steps": 2065, "total_steps": 7532, "loss": 0.2947537899017334, "lr": 1.7379814458607416e-05, "epoch": 0.5483999468862036, "percentage": 27.42, "elapsed_time": "7:27:34", "remaining_time": "19:44:54"} +{"current_steps": 2066, "total_steps": 7532, "loss": 0.3119455873966217, "lr": 1.737685076661324e-05, "epoch": 0.5486655158677467, "percentage": 27.43, "elapsed_time": "7:27:46", "remaining_time": "19:44:41"} +{"current_steps": 2067, "total_steps": 7532, "loss": 0.3162347972393036, "lr": 1.7373885652449237e-05, "epoch": 0.5489310848492897, "percentage": 27.44, "elapsed_time": "7:27:59", "remaining_time": "19:44:27"} +{"current_steps": 2068, "total_steps": 7532, "loss": 0.34120452404022217, "lr": 1.7370919116687047e-05, "epoch": 0.5491966538308326, "percentage": 27.46, "elapsed_time": "7:28:12", "remaining_time": "19:44:14"} +{"current_steps": 2069, "total_steps": 7532, "loss": 0.3126780092716217, "lr": 1.7367951159898583e-05, "epoch": 0.5494622228123756, "percentage": 27.47, "elapsed_time": "7:28:25", "remaining_time": "19:44:00"} +{"current_steps": 2070, "total_steps": 7532, "loss": 0.2833349406719208, "lr": 1.7364981782656033e-05, "epoch": 0.5497277917939185, "percentage": 27.48, "elapsed_time": "7:28:38", "remaining_time": "19:43:47"} +{"current_steps": 2071, "total_steps": 7532, "loss": 0.31617453694343567, "lr": 1.7362010985531855e-05, "epoch": 0.5499933607754615, "percentage": 27.5, "elapsed_time": "7:28:51", "remaining_time": "19:43:34"} +{"current_steps": 2072, "total_steps": 7532, "loss": 0.31372442841529846, "lr": 1.735903876909879e-05, "epoch": 0.5502589297570044, "percentage": 27.51, "elapsed_time": "7:29:04", "remaining_time": "19:43:21"} +{"current_steps": 2073, "total_steps": 7532, "loss": 0.3500489592552185, "lr": 1.735606513392984e-05, "epoch": 0.5505244987385474, "percentage": 27.52, "elapsed_time": "7:29:17", "remaining_time": "19:43:08"} +{"current_steps": 2074, "total_steps": 7532, "loss": 0.3219031095504761, "lr": 1.735309008059829e-05, "epoch": 0.5507900677200903, "percentage": 27.54, "elapsed_time": "7:29:30", "remaining_time": "19:42:57"} +{"current_steps": 2075, "total_steps": 7532, "loss": 0.32419610023498535, "lr": 1.7350113609677694e-05, "epoch": 0.5510556367016333, "percentage": 27.55, "elapsed_time": "7:29:43", "remaining_time": "19:42:43"} +{"current_steps": 2076, "total_steps": 7532, "loss": 0.34804612398147583, "lr": 1.7347135721741874e-05, "epoch": 0.5513212056831762, "percentage": 27.56, "elapsed_time": "7:29:56", "remaining_time": "19:42:30"} +{"current_steps": 2077, "total_steps": 7532, "loss": 0.33105939626693726, "lr": 1.7344156417364946e-05, "epoch": 0.5515867746647192, "percentage": 27.58, "elapsed_time": "7:30:09", "remaining_time": "19:42:17"} +{"current_steps": 2078, "total_steps": 7532, "loss": 0.3426011800765991, "lr": 1.7341175697121273e-05, "epoch": 0.5518523436462621, "percentage": 27.59, "elapsed_time": "7:30:22", "remaining_time": "19:42:05"} +{"current_steps": 2079, "total_steps": 7532, "loss": 0.33207643032073975, "lr": 1.7338193561585507e-05, "epoch": 0.5521179126278051, "percentage": 27.6, "elapsed_time": "7:30:35", "remaining_time": "19:41:51"} +{"current_steps": 2080, "total_steps": 7532, "loss": 0.31849467754364014, "lr": 1.7335210011332573e-05, "epoch": 0.552383481609348, "percentage": 27.62, "elapsed_time": "7:30:48", "remaining_time": "19:41:37"} +{"current_steps": 2081, "total_steps": 7532, "loss": 0.3549337685108185, "lr": 1.7332225046937655e-05, "epoch": 0.552649050590891, "percentage": 27.63, "elapsed_time": "7:31:01", "remaining_time": "19:41:25"} +{"current_steps": 2082, "total_steps": 7532, "loss": 0.2850857377052307, "lr": 1.7329238668976224e-05, "epoch": 0.5529146195724339, "percentage": 27.64, "elapsed_time": "7:31:14", "remaining_time": "19:41:11"} +{"current_steps": 2083, "total_steps": 7532, "loss": 0.3277609348297119, "lr": 1.732625087802402e-05, "epoch": 0.5531801885539769, "percentage": 27.66, "elapsed_time": "7:31:27", "remaining_time": "19:40:58"} +{"current_steps": 2084, "total_steps": 7532, "loss": 0.2951444983482361, "lr": 1.732326167465705e-05, "epoch": 0.5534457575355198, "percentage": 27.67, "elapsed_time": "7:31:39", "remaining_time": "19:40:44"} +{"current_steps": 2085, "total_steps": 7532, "loss": 0.36634138226509094, "lr": 1.7320271059451597e-05, "epoch": 0.5537113265170628, "percentage": 27.68, "elapsed_time": "7:31:53", "remaining_time": "19:40:31"} +{"current_steps": 2086, "total_steps": 7532, "loss": 0.3407907783985138, "lr": 1.7317279032984222e-05, "epoch": 0.5539768954986057, "percentage": 27.7, "elapsed_time": "7:32:06", "remaining_time": "19:40:18"} +{"current_steps": 2087, "total_steps": 7532, "loss": 0.34038978815078735, "lr": 1.7314285595831747e-05, "epoch": 0.5542424644801487, "percentage": 27.71, "elapsed_time": "7:32:19", "remaining_time": "19:40:05"} +{"current_steps": 2088, "total_steps": 7532, "loss": 0.337898313999176, "lr": 1.7311290748571273e-05, "epoch": 0.5545080334616916, "percentage": 27.72, "elapsed_time": "7:32:31", "remaining_time": "19:39:52"} +{"current_steps": 2089, "total_steps": 7532, "loss": 0.3250765800476074, "lr": 1.7308294491780175e-05, "epoch": 0.5547736024432346, "percentage": 27.73, "elapsed_time": "7:32:45", "remaining_time": "19:39:40"} +{"current_steps": 2090, "total_steps": 7532, "loss": 0.31562721729278564, "lr": 1.730529682603609e-05, "epoch": 0.5550391714247775, "percentage": 27.75, "elapsed_time": "7:32:57", "remaining_time": "19:39:26"} +{"current_steps": 2091, "total_steps": 7532, "loss": 0.32757896184921265, "lr": 1.730229775191693e-05, "epoch": 0.5553047404063205, "percentage": 27.76, "elapsed_time": "7:33:10", "remaining_time": "19:39:12"} +{"current_steps": 2092, "total_steps": 7532, "loss": 0.35861605405807495, "lr": 1.7299297270000894e-05, "epoch": 0.5555703093878634, "percentage": 27.77, "elapsed_time": "7:33:23", "remaining_time": "19:38:59"} +{"current_steps": 2093, "total_steps": 7532, "loss": 0.3383220434188843, "lr": 1.7296295380866425e-05, "epoch": 0.5558358783694064, "percentage": 27.79, "elapsed_time": "7:33:35", "remaining_time": "19:38:44"} +{"current_steps": 2094, "total_steps": 7532, "loss": 0.30144187808036804, "lr": 1.7293292085092263e-05, "epoch": 0.5561014473509495, "percentage": 27.8, "elapsed_time": "7:33:48", "remaining_time": "19:38:32"} +{"current_steps": 2095, "total_steps": 7532, "loss": 0.2626546323299408, "lr": 1.72902873832574e-05, "epoch": 0.5563670163324924, "percentage": 27.81, "elapsed_time": "7:34:01", "remaining_time": "19:38:16"} +{"current_steps": 2096, "total_steps": 7532, "loss": 0.3289363980293274, "lr": 1.7287281275941112e-05, "epoch": 0.5566325853140354, "percentage": 27.83, "elapsed_time": "7:34:13", "remaining_time": "19:38:02"} +{"current_steps": 2097, "total_steps": 7532, "loss": 0.26631784439086914, "lr": 1.7284273763722943e-05, "epoch": 0.5568981542955783, "percentage": 27.84, "elapsed_time": "7:34:26", "remaining_time": "19:37:47"} +{"current_steps": 2098, "total_steps": 7532, "loss": 0.3051939606666565, "lr": 1.7281264847182697e-05, "epoch": 0.5571637232771213, "percentage": 27.85, "elapsed_time": "7:34:38", "remaining_time": "19:37:33"} +{"current_steps": 2099, "total_steps": 7532, "loss": 0.34456121921539307, "lr": 1.7278254526900468e-05, "epoch": 0.5574292922586642, "percentage": 27.87, "elapsed_time": "7:34:50", "remaining_time": "19:37:18"} +{"current_steps": 2100, "total_steps": 7532, "loss": 0.2747807502746582, "lr": 1.72752428034566e-05, "epoch": 0.5576948612402072, "percentage": 27.88, "elapsed_time": "7:35:03", "remaining_time": "19:37:04"} +{"current_steps": 2101, "total_steps": 7532, "loss": 0.31111812591552734, "lr": 1.7272229677431723e-05, "epoch": 0.5579604302217501, "percentage": 27.89, "elapsed_time": "7:35:22", "remaining_time": "19:37:06"} +{"current_steps": 2102, "total_steps": 7532, "loss": 0.29648226499557495, "lr": 1.7269215149406737e-05, "epoch": 0.5582259992032931, "percentage": 27.91, "elapsed_time": "7:35:35", "remaining_time": "19:36:53"} +{"current_steps": 2103, "total_steps": 7532, "loss": 0.28303876519203186, "lr": 1.72661992199628e-05, "epoch": 0.558491568184836, "percentage": 27.92, "elapsed_time": "7:35:47", "remaining_time": "19:36:39"} +{"current_steps": 2104, "total_steps": 7532, "loss": 0.30540165305137634, "lr": 1.726318188968135e-05, "epoch": 0.558757137166379, "percentage": 27.93, "elapsed_time": "7:36:00", "remaining_time": "19:36:25"} +{"current_steps": 2105, "total_steps": 7532, "loss": 0.31810393929481506, "lr": 1.726016315914409e-05, "epoch": 0.5590227061479219, "percentage": 27.95, "elapsed_time": "7:36:13", "remaining_time": "19:36:13"} +{"current_steps": 2106, "total_steps": 7532, "loss": 0.33605068922042847, "lr": 1.7257143028933004e-05, "epoch": 0.5592882751294649, "percentage": 27.96, "elapsed_time": "7:36:26", "remaining_time": "19:35:59"} +{"current_steps": 2107, "total_steps": 7532, "loss": 0.3340590298175812, "lr": 1.725412149963033e-05, "epoch": 0.5595538441110078, "percentage": 27.97, "elapsed_time": "7:36:39", "remaining_time": "19:35:46"} +{"current_steps": 2108, "total_steps": 7532, "loss": 0.29560500383377075, "lr": 1.7251098571818586e-05, "epoch": 0.5598194130925508, "percentage": 27.99, "elapsed_time": "7:36:51", "remaining_time": "19:35:31"} +{"current_steps": 2109, "total_steps": 7532, "loss": 0.30100107192993164, "lr": 1.7248074246080555e-05, "epoch": 0.5600849820740937, "percentage": 28.0, "elapsed_time": "7:37:04", "remaining_time": "19:35:18"} +{"current_steps": 2110, "total_steps": 7532, "loss": 0.35551172494888306, "lr": 1.7245048522999294e-05, "epoch": 0.5603505510556367, "percentage": 28.01, "elapsed_time": "7:37:16", "remaining_time": "19:35:03"} +{"current_steps": 2111, "total_steps": 7532, "loss": 0.3182663023471832, "lr": 1.724202140315812e-05, "epoch": 0.5606161200371796, "percentage": 28.03, "elapsed_time": "7:37:29", "remaining_time": "19:34:49"} +{"current_steps": 2112, "total_steps": 7532, "loss": 0.3160201609134674, "lr": 1.723899288714064e-05, "epoch": 0.5608816890187226, "percentage": 28.04, "elapsed_time": "7:37:41", "remaining_time": "19:34:35"} +{"current_steps": 2113, "total_steps": 7532, "loss": 0.3126063942909241, "lr": 1.72359629755307e-05, "epoch": 0.5611472580002655, "percentage": 28.05, "elapsed_time": "7:37:55", "remaining_time": "19:34:22"} +{"current_steps": 2114, "total_steps": 7532, "loss": 0.3222552239894867, "lr": 1.723293166891244e-05, "epoch": 0.5614128269818085, "percentage": 28.07, "elapsed_time": "7:38:08", "remaining_time": "19:34:10"} +{"current_steps": 2115, "total_steps": 7532, "loss": 0.33601805567741394, "lr": 1.722989896787026e-05, "epoch": 0.5616783959633515, "percentage": 28.08, "elapsed_time": "7:38:21", "remaining_time": "19:33:57"} +{"current_steps": 2116, "total_steps": 7532, "loss": 0.28679755330085754, "lr": 1.722686487298883e-05, "epoch": 0.5619439649448944, "percentage": 28.09, "elapsed_time": "7:38:34", "remaining_time": "19:33:45"} +{"current_steps": 2117, "total_steps": 7532, "loss": 0.2895340323448181, "lr": 1.722382938485308e-05, "epoch": 0.5622095339264374, "percentage": 28.11, "elapsed_time": "7:38:47", "remaining_time": "19:33:32"} +{"current_steps": 2118, "total_steps": 7532, "loss": 0.310183048248291, "lr": 1.7220792504048227e-05, "epoch": 0.5624751029079803, "percentage": 28.12, "elapsed_time": "7:39:01", "remaining_time": "19:33:20"} +{"current_steps": 2119, "total_steps": 7532, "loss": 0.2768586277961731, "lr": 1.7217754231159737e-05, "epoch": 0.5627406718895233, "percentage": 28.13, "elapsed_time": "7:39:13", "remaining_time": "19:33:05"} +{"current_steps": 2120, "total_steps": 7532, "loss": 0.2785574793815613, "lr": 1.7214714566773358e-05, "epoch": 0.5630062408710662, "percentage": 28.15, "elapsed_time": "7:39:26", "remaining_time": "19:32:53"} +{"current_steps": 2121, "total_steps": 7532, "loss": 0.30544358491897583, "lr": 1.72116735114751e-05, "epoch": 0.5632718098526092, "percentage": 28.16, "elapsed_time": "7:39:39", "remaining_time": "19:32:39"} +{"current_steps": 2122, "total_steps": 7532, "loss": 0.31662559509277344, "lr": 1.7208631065851243e-05, "epoch": 0.5635373788341522, "percentage": 28.17, "elapsed_time": "7:39:52", "remaining_time": "19:32:26"} +{"current_steps": 2123, "total_steps": 7532, "loss": 0.31466105580329895, "lr": 1.7205587230488335e-05, "epoch": 0.5638029478156952, "percentage": 28.19, "elapsed_time": "7:40:05", "remaining_time": "19:32:13"} +{"current_steps": 2124, "total_steps": 7532, "loss": 0.3471367359161377, "lr": 1.720254200597319e-05, "epoch": 0.5640685167972381, "percentage": 28.2, "elapsed_time": "7:40:19", "remaining_time": "19:32:02"} +{"current_steps": 2125, "total_steps": 7532, "loss": 0.3325269818305969, "lr": 1.7199495392892892e-05, "epoch": 0.5643340857787811, "percentage": 28.21, "elapsed_time": "7:40:32", "remaining_time": "19:31:50"} +{"current_steps": 2126, "total_steps": 7532, "loss": 0.32423460483551025, "lr": 1.7196447391834797e-05, "epoch": 0.564599654760324, "percentage": 28.23, "elapsed_time": "7:40:45", "remaining_time": "19:31:37"} +{"current_steps": 2127, "total_steps": 7532, "loss": 0.3083527088165283, "lr": 1.7193398003386514e-05, "epoch": 0.564865223741867, "percentage": 28.24, "elapsed_time": "7:40:59", "remaining_time": "19:31:26"} +{"current_steps": 2128, "total_steps": 7532, "loss": 0.3418716490268707, "lr": 1.7190347228135933e-05, "epoch": 0.5651307927234099, "percentage": 28.25, "elapsed_time": "7:41:12", "remaining_time": "19:31:14"} +{"current_steps": 2129, "total_steps": 7532, "loss": 0.33037957549095154, "lr": 1.7187295066671214e-05, "epoch": 0.5653963617049529, "percentage": 28.27, "elapsed_time": "7:41:26", "remaining_time": "19:31:02"} +{"current_steps": 2130, "total_steps": 7532, "loss": 0.3383673131465912, "lr": 1.7184241519580767e-05, "epoch": 0.5656619306864958, "percentage": 28.28, "elapsed_time": "7:41:39", "remaining_time": "19:30:50"} +{"current_steps": 2131, "total_steps": 7532, "loss": 0.27756133675575256, "lr": 1.718118658745329e-05, "epoch": 0.5659274996680388, "percentage": 28.29, "elapsed_time": "7:41:53", "remaining_time": "19:30:39"} +{"current_steps": 2132, "total_steps": 7532, "loss": 0.2987852692604065, "lr": 1.717813027087773e-05, "epoch": 0.5661930686495817, "percentage": 28.31, "elapsed_time": "7:42:06", "remaining_time": "19:30:26"} +{"current_steps": 2133, "total_steps": 7532, "loss": 0.30016621947288513, "lr": 1.717507257044331e-05, "epoch": 0.5664586376311247, "percentage": 28.32, "elapsed_time": "7:42:20", "remaining_time": "19:30:15"} +{"current_steps": 2134, "total_steps": 7532, "loss": 0.31592345237731934, "lr": 1.7172013486739528e-05, "epoch": 0.5667242066126676, "percentage": 28.33, "elapsed_time": "7:42:32", "remaining_time": "19:30:01"} +{"current_steps": 2135, "total_steps": 7532, "loss": 0.3500048816204071, "lr": 1.716895302035613e-05, "epoch": 0.5669897755942106, "percentage": 28.35, "elapsed_time": "7:42:45", "remaining_time": "19:29:47"} +{"current_steps": 2136, "total_steps": 7532, "loss": 0.32069307565689087, "lr": 1.7165891171883134e-05, "epoch": 0.5672553445757536, "percentage": 28.36, "elapsed_time": "7:42:58", "remaining_time": "19:29:34"} +{"current_steps": 2137, "total_steps": 7532, "loss": 0.3100130558013916, "lr": 1.7162827941910837e-05, "epoch": 0.5675209135572965, "percentage": 28.37, "elapsed_time": "7:43:11", "remaining_time": "19:29:21"} +{"current_steps": 2138, "total_steps": 7532, "loss": 0.3205985128879547, "lr": 1.715976333102979e-05, "epoch": 0.5677864825388395, "percentage": 28.39, "elapsed_time": "7:43:24", "remaining_time": "19:29:07"} +{"current_steps": 2139, "total_steps": 7532, "loss": 0.3243224024772644, "lr": 1.715669733983081e-05, "epoch": 0.5680520515203824, "percentage": 28.4, "elapsed_time": "7:43:36", "remaining_time": "19:28:54"} +{"current_steps": 2140, "total_steps": 7532, "loss": 0.3278832733631134, "lr": 1.7153629968904997e-05, "epoch": 0.5683176205019254, "percentage": 28.41, "elapsed_time": "7:43:49", "remaining_time": "19:28:39"} +{"current_steps": 2141, "total_steps": 7532, "loss": 0.29137033224105835, "lr": 1.7150561218843693e-05, "epoch": 0.5685831894834683, "percentage": 28.43, "elapsed_time": "7:44:02", "remaining_time": "19:28:26"} +{"current_steps": 2142, "total_steps": 7532, "loss": 0.3065168857574463, "lr": 1.7147491090238516e-05, "epoch": 0.5688487584650113, "percentage": 28.44, "elapsed_time": "7:44:14", "remaining_time": "19:28:11"} +{"current_steps": 2143, "total_steps": 7532, "loss": 0.3367912173271179, "lr": 1.7144419583681354e-05, "epoch": 0.5691143274465542, "percentage": 28.45, "elapsed_time": "7:44:27", "remaining_time": "19:27:58"} +{"current_steps": 2144, "total_steps": 7532, "loss": 0.32278239727020264, "lr": 1.7141346699764357e-05, "epoch": 0.5693798964280972, "percentage": 28.47, "elapsed_time": "7:44:40", "remaining_time": "19:27:45"} +{"current_steps": 2145, "total_steps": 7532, "loss": 0.2887166440486908, "lr": 1.713827243907994e-05, "epoch": 0.5696454654096401, "percentage": 28.48, "elapsed_time": "7:44:53", "remaining_time": "19:27:32"} +{"current_steps": 2146, "total_steps": 7532, "loss": 0.33214619755744934, "lr": 1.713519680222079e-05, "epoch": 0.5699110343911831, "percentage": 28.49, "elapsed_time": "7:45:06", "remaining_time": "19:27:19"} +{"current_steps": 2147, "total_steps": 7532, "loss": 0.2865470051765442, "lr": 1.7132119789779846e-05, "epoch": 0.570176603372726, "percentage": 28.51, "elapsed_time": "7:45:19", "remaining_time": "19:27:06"} +{"current_steps": 2148, "total_steps": 7532, "loss": 0.32746967673301697, "lr": 1.7129041402350317e-05, "epoch": 0.570442172354269, "percentage": 28.52, "elapsed_time": "7:45:32", "remaining_time": "19:26:53"} +{"current_steps": 2149, "total_steps": 7532, "loss": 0.3029513359069824, "lr": 1.712596164052569e-05, "epoch": 0.5707077413358119, "percentage": 28.53, "elapsed_time": "7:45:45", "remaining_time": "19:26:40"} +{"current_steps": 2150, "total_steps": 7532, "loss": 0.3052698075771332, "lr": 1.7122880504899698e-05, "epoch": 0.570973310317355, "percentage": 28.54, "elapsed_time": "7:45:58", "remaining_time": "19:26:27"} +{"current_steps": 2151, "total_steps": 7532, "loss": 0.29221272468566895, "lr": 1.7119797996066355e-05, "epoch": 0.5712388792988979, "percentage": 28.56, "elapsed_time": "7:46:11", "remaining_time": "19:26:13"} +{"current_steps": 2152, "total_steps": 7532, "loss": 0.3165368139743805, "lr": 1.711671411461993e-05, "epoch": 0.5715044482804409, "percentage": 28.57, "elapsed_time": "7:46:24", "remaining_time": "19:26:01"} +{"current_steps": 2153, "total_steps": 7532, "loss": 0.30877187848091125, "lr": 1.7113628861154953e-05, "epoch": 0.5717700172619838, "percentage": 28.58, "elapsed_time": "7:46:37", "remaining_time": "19:25:48"} +{"current_steps": 2154, "total_steps": 7532, "loss": 0.2985781729221344, "lr": 1.711054223626623e-05, "epoch": 0.5720355862435268, "percentage": 28.6, "elapsed_time": "7:46:50", "remaining_time": "19:25:36"} +{"current_steps": 2155, "total_steps": 7532, "loss": 0.3449699878692627, "lr": 1.7107454240548825e-05, "epoch": 0.5723011552250697, "percentage": 28.61, "elapsed_time": "7:47:03", "remaining_time": "19:25:22"} +{"current_steps": 2156, "total_steps": 7532, "loss": 0.3219606578350067, "lr": 1.7104364874598066e-05, "epoch": 0.5725667242066127, "percentage": 28.62, "elapsed_time": "7:47:16", "remaining_time": "19:25:10"} +{"current_steps": 2157, "total_steps": 7532, "loss": 0.3059350550174713, "lr": 1.710127413900955e-05, "epoch": 0.5728322931881557, "percentage": 28.64, "elapsed_time": "7:47:30", "remaining_time": "19:24:58"} +{"current_steps": 2158, "total_steps": 7532, "loss": 0.29461371898651123, "lr": 1.7098182034379132e-05, "epoch": 0.5730978621696986, "percentage": 28.65, "elapsed_time": "7:47:43", "remaining_time": "19:24:46"} +{"current_steps": 2159, "total_steps": 7532, "loss": 0.2998795509338379, "lr": 1.709508856130293e-05, "epoch": 0.5733634311512416, "percentage": 28.66, "elapsed_time": "7:47:57", "remaining_time": "19:24:34"} +{"current_steps": 2160, "total_steps": 7532, "loss": 0.28214582800865173, "lr": 1.7091993720377336e-05, "epoch": 0.5736290001327845, "percentage": 28.68, "elapsed_time": "7:48:10", "remaining_time": "19:24:22"} +{"current_steps": 2161, "total_steps": 7532, "loss": 0.3036864697933197, "lr": 1.708889751219899e-05, "epoch": 0.5738945691143275, "percentage": 28.69, "elapsed_time": "7:48:24", "remaining_time": "19:24:12"} +{"current_steps": 2162, "total_steps": 7532, "loss": 0.34146320819854736, "lr": 1.7085799937364815e-05, "epoch": 0.5741601380958704, "percentage": 28.7, "elapsed_time": "7:48:38", "remaining_time": "19:24:00"} +{"current_steps": 2163, "total_steps": 7532, "loss": 0.33996909856796265, "lr": 1.708270099647198e-05, "epoch": 0.5744257070774134, "percentage": 28.72, "elapsed_time": "7:48:52", "remaining_time": "19:23:49"} +{"current_steps": 2164, "total_steps": 7532, "loss": 0.3308744728565216, "lr": 1.7079600690117924e-05, "epoch": 0.5746912760589563, "percentage": 28.73, "elapsed_time": "7:49:05", "remaining_time": "19:23:36"} +{"current_steps": 2165, "total_steps": 7532, "loss": 0.2945587933063507, "lr": 1.707649901890035e-05, "epoch": 0.5749568450404993, "percentage": 28.74, "elapsed_time": "7:49:18", "remaining_time": "19:23:24"} +{"current_steps": 2166, "total_steps": 7532, "loss": 0.30348697304725647, "lr": 1.7073395983417227e-05, "epoch": 0.5752224140220422, "percentage": 28.76, "elapsed_time": "7:49:31", "remaining_time": "19:23:10"} +{"current_steps": 2167, "total_steps": 7532, "loss": 0.28789055347442627, "lr": 1.707029158426678e-05, "epoch": 0.5754879830035852, "percentage": 28.77, "elapsed_time": "7:49:44", "remaining_time": "19:22:58"} +{"current_steps": 2168, "total_steps": 7532, "loss": 0.3026643693447113, "lr": 1.7067185822047502e-05, "epoch": 0.5757535519851281, "percentage": 28.78, "elapsed_time": "7:49:57", "remaining_time": "19:22:46"} +{"current_steps": 2169, "total_steps": 7532, "loss": 0.34021061658859253, "lr": 1.7064078697358147e-05, "epoch": 0.5760191209666711, "percentage": 28.8, "elapsed_time": "7:50:11", "remaining_time": "19:22:35"} +{"current_steps": 2170, "total_steps": 7532, "loss": 0.32793867588043213, "lr": 1.7060970210797735e-05, "epoch": 0.576284689948214, "percentage": 28.81, "elapsed_time": "7:50:24", "remaining_time": "19:22:22"} +{"current_steps": 2171, "total_steps": 7532, "loss": 0.36144691705703735, "lr": 1.705786036296554e-05, "epoch": 0.576550258929757, "percentage": 28.82, "elapsed_time": "7:50:38", "remaining_time": "19:22:12"} +{"current_steps": 2172, "total_steps": 7532, "loss": 0.3630291223526001, "lr": 1.7054749154461105e-05, "epoch": 0.5768158279112999, "percentage": 28.84, "elapsed_time": "7:50:52", "remaining_time": "19:21:59"} +{"current_steps": 2173, "total_steps": 7532, "loss": 0.34964969754219055, "lr": 1.705163658588424e-05, "epoch": 0.5770813968928429, "percentage": 28.85, "elapsed_time": "7:51:05", "remaining_time": "19:21:48"} +{"current_steps": 2174, "total_steps": 7532, "loss": 0.2877815067768097, "lr": 1.7048522657835004e-05, "epoch": 0.5773469658743858, "percentage": 28.86, "elapsed_time": "7:51:19", "remaining_time": "19:21:36"} +{"current_steps": 2175, "total_steps": 7532, "loss": 0.3185664713382721, "lr": 1.7045407370913732e-05, "epoch": 0.5776125348559288, "percentage": 28.88, "elapsed_time": "7:51:32", "remaining_time": "19:21:23"} +{"current_steps": 2176, "total_steps": 7532, "loss": 0.3035257160663605, "lr": 1.704229072572101e-05, "epoch": 0.5778781038374717, "percentage": 28.89, "elapsed_time": "7:51:45", "remaining_time": "19:21:11"} +{"current_steps": 2177, "total_steps": 7532, "loss": 0.325702965259552, "lr": 1.7039172722857695e-05, "epoch": 0.5781436728190147, "percentage": 28.9, "elapsed_time": "7:51:59", "remaining_time": "19:20:59"} +{"current_steps": 2178, "total_steps": 7532, "loss": 0.32837462425231934, "lr": 1.7036053362924896e-05, "epoch": 0.5784092418005577, "percentage": 28.92, "elapsed_time": "7:52:12", "remaining_time": "19:20:48"} +{"current_steps": 2179, "total_steps": 7532, "loss": 0.3430028259754181, "lr": 1.703293264652399e-05, "epoch": 0.5786748107821007, "percentage": 28.93, "elapsed_time": "7:52:25", "remaining_time": "19:20:35"} +{"current_steps": 2180, "total_steps": 7532, "loss": 0.32792964577674866, "lr": 1.702981057425662e-05, "epoch": 0.5789403797636437, "percentage": 28.94, "elapsed_time": "7:52:39", "remaining_time": "19:20:24"} +{"current_steps": 2181, "total_steps": 7532, "loss": 0.3037140965461731, "lr": 1.7026687146724675e-05, "epoch": 0.5792059487451866, "percentage": 28.96, "elapsed_time": "7:52:52", "remaining_time": "19:20:11"} +{"current_steps": 2182, "total_steps": 7532, "loss": 0.33083540201187134, "lr": 1.7023562364530322e-05, "epoch": 0.5794715177267296, "percentage": 28.97, "elapsed_time": "7:53:06", "remaining_time": "19:19:59"} +{"current_steps": 2183, "total_steps": 7532, "loss": 0.3108663260936737, "lr": 1.702043622827598e-05, "epoch": 0.5797370867082725, "percentage": 28.98, "elapsed_time": "7:53:19", "remaining_time": "19:19:47"} +{"current_steps": 2184, "total_steps": 7532, "loss": 0.2939792573451996, "lr": 1.7017308738564336e-05, "epoch": 0.5800026556898155, "percentage": 29.0, "elapsed_time": "7:53:33", "remaining_time": "19:19:36"} +{"current_steps": 2185, "total_steps": 7532, "loss": 0.3686106503009796, "lr": 1.7014179895998322e-05, "epoch": 0.5802682246713584, "percentage": 29.01, "elapsed_time": "7:53:46", "remaining_time": "19:19:24"} +{"current_steps": 2186, "total_steps": 7532, "loss": 0.3497159779071808, "lr": 1.7011049701181152e-05, "epoch": 0.5805337936529014, "percentage": 29.02, "elapsed_time": "7:53:59", "remaining_time": "19:19:11"} +{"current_steps": 2187, "total_steps": 7532, "loss": 0.31730401515960693, "lr": 1.7007918154716286e-05, "epoch": 0.5807993626344443, "percentage": 29.04, "elapsed_time": "7:54:12", "remaining_time": "19:18:58"} +{"current_steps": 2188, "total_steps": 7532, "loss": 0.3064701557159424, "lr": 1.7004785257207456e-05, "epoch": 0.5810649316159873, "percentage": 29.05, "elapsed_time": "7:54:25", "remaining_time": "19:18:44"} +{"current_steps": 2189, "total_steps": 7532, "loss": 0.37174129486083984, "lr": 1.7001651009258635e-05, "epoch": 0.5813305005975302, "percentage": 29.06, "elapsed_time": "7:54:38", "remaining_time": "19:18:32"} +{"current_steps": 2190, "total_steps": 7532, "loss": 0.3548140823841095, "lr": 1.699851541147408e-05, "epoch": 0.5815960695790732, "percentage": 29.08, "elapsed_time": "7:54:51", "remaining_time": "19:18:18"} +{"current_steps": 2191, "total_steps": 7532, "loss": 0.3486049473285675, "lr": 1.6995378464458292e-05, "epoch": 0.5818616385606161, "percentage": 29.09, "elapsed_time": "7:55:04", "remaining_time": "19:18:05"} +{"current_steps": 2192, "total_steps": 7532, "loss": 0.3083210587501526, "lr": 1.6992240168816037e-05, "epoch": 0.5821272075421591, "percentage": 29.1, "elapsed_time": "7:55:17", "remaining_time": "19:17:52"} +{"current_steps": 2193, "total_steps": 7532, "loss": 0.3006829619407654, "lr": 1.6989100525152346e-05, "epoch": 0.582392776523702, "percentage": 29.12, "elapsed_time": "7:55:30", "remaining_time": "19:17:40"} +{"current_steps": 2194, "total_steps": 7532, "loss": 0.32856425642967224, "lr": 1.6985959534072502e-05, "epoch": 0.582658345505245, "percentage": 29.13, "elapsed_time": "7:55:43", "remaining_time": "19:17:27"} +{"current_steps": 2195, "total_steps": 7532, "loss": 0.3382526934146881, "lr": 1.6982817196182052e-05, "epoch": 0.5829239144867879, "percentage": 29.14, "elapsed_time": "7:55:57", "remaining_time": "19:17:14"} +{"current_steps": 2196, "total_steps": 7532, "loss": 0.3311583399772644, "lr": 1.69796735120868e-05, "epoch": 0.5831894834683309, "percentage": 29.16, "elapsed_time": "7:56:10", "remaining_time": "19:17:01"} +{"current_steps": 2197, "total_steps": 7532, "loss": 0.312778115272522, "lr": 1.6976528482392815e-05, "epoch": 0.5834550524498738, "percentage": 29.17, "elapsed_time": "7:56:23", "remaining_time": "19:16:48"} +{"current_steps": 2198, "total_steps": 7532, "loss": 0.2996736466884613, "lr": 1.697338210770642e-05, "epoch": 0.5837206214314168, "percentage": 29.18, "elapsed_time": "7:56:36", "remaining_time": "19:16:36"} +{"current_steps": 2199, "total_steps": 7532, "loss": 0.344571590423584, "lr": 1.6970234388634192e-05, "epoch": 0.5839861904129597, "percentage": 29.2, "elapsed_time": "7:56:49", "remaining_time": "19:16:22"} +{"current_steps": 2200, "total_steps": 7532, "loss": 0.25299468636512756, "lr": 1.6967085325782984e-05, "epoch": 0.5842517593945027, "percentage": 29.21, "elapsed_time": "7:57:02", "remaining_time": "19:16:10"} +{"current_steps": 2201, "total_steps": 7532, "loss": 0.3080691695213318, "lr": 1.6963934919759896e-05, "epoch": 0.5845173283760456, "percentage": 29.22, "elapsed_time": "7:57:19", "remaining_time": "19:16:07"} +{"current_steps": 2202, "total_steps": 7532, "loss": 0.27491697669029236, "lr": 1.6960783171172286e-05, "epoch": 0.5847828973575886, "percentage": 29.24, "elapsed_time": "7:57:32", "remaining_time": "19:15:54"} +{"current_steps": 2203, "total_steps": 7532, "loss": 0.3422500193119049, "lr": 1.6957630080627772e-05, "epoch": 0.5850484663391315, "percentage": 29.25, "elapsed_time": "7:57:44", "remaining_time": "19:15:39"} +{"current_steps": 2204, "total_steps": 7532, "loss": 0.27703234553337097, "lr": 1.695447564873424e-05, "epoch": 0.5853140353206745, "percentage": 29.26, "elapsed_time": "7:57:57", "remaining_time": "19:15:26"} +{"current_steps": 2205, "total_steps": 7532, "loss": 0.3088543117046356, "lr": 1.6951319876099825e-05, "epoch": 0.5855796043022174, "percentage": 29.28, "elapsed_time": "7:58:10", "remaining_time": "19:15:13"} +{"current_steps": 2206, "total_steps": 7532, "loss": 0.29875609278678894, "lr": 1.694816276333292e-05, "epoch": 0.5858451732837605, "percentage": 29.29, "elapsed_time": "7:58:24", "remaining_time": "19:15:01"} +{"current_steps": 2207, "total_steps": 7532, "loss": 0.30804386734962463, "lr": 1.6945004311042176e-05, "epoch": 0.5861107422653035, "percentage": 29.3, "elapsed_time": "7:58:36", "remaining_time": "19:14:47"} +{"current_steps": 2208, "total_steps": 7532, "loss": 0.3324572741985321, "lr": 1.694184451983651e-05, "epoch": 0.5863763112468464, "percentage": 29.31, "elapsed_time": "7:58:49", "remaining_time": "19:14:33"} +{"current_steps": 2209, "total_steps": 7532, "loss": 0.30302488803863525, "lr": 1.6938683390325096e-05, "epoch": 0.5866418802283894, "percentage": 29.33, "elapsed_time": "7:59:03", "remaining_time": "19:14:21"} +{"current_steps": 2210, "total_steps": 7532, "loss": 0.3264358341693878, "lr": 1.6935520923117355e-05, "epoch": 0.5869074492099323, "percentage": 29.34, "elapsed_time": "7:59:15", "remaining_time": "19:14:08"} +{"current_steps": 2211, "total_steps": 7532, "loss": 0.3172164261341095, "lr": 1.693235711882298e-05, "epoch": 0.5871730181914753, "percentage": 29.35, "elapsed_time": "7:59:29", "remaining_time": "19:13:57"} +{"current_steps": 2212, "total_steps": 7532, "loss": 0.300851047039032, "lr": 1.6929191978051908e-05, "epoch": 0.5874385871730182, "percentage": 29.37, "elapsed_time": "7:59:43", "remaining_time": "19:13:45"} +{"current_steps": 2213, "total_steps": 7532, "loss": 0.2887764871120453, "lr": 1.6926025501414352e-05, "epoch": 0.5877041561545612, "percentage": 29.38, "elapsed_time": "7:59:56", "remaining_time": "19:13:33"} +{"current_steps": 2214, "total_steps": 7532, "loss": 0.3246796727180481, "lr": 1.692285768952076e-05, "epoch": 0.5879697251361041, "percentage": 29.39, "elapsed_time": "8:00:10", "remaining_time": "19:13:21"} +{"current_steps": 2215, "total_steps": 7532, "loss": 0.30595412850379944, "lr": 1.6919688542981852e-05, "epoch": 0.5882352941176471, "percentage": 29.41, "elapsed_time": "8:00:23", "remaining_time": "19:13:09"} +{"current_steps": 2216, "total_steps": 7532, "loss": 0.2885501980781555, "lr": 1.6916518062408604e-05, "epoch": 0.58850086309919, "percentage": 29.42, "elapsed_time": "8:00:35", "remaining_time": "19:12:54"} +{"current_steps": 2217, "total_steps": 7532, "loss": 0.34449082612991333, "lr": 1.6913346248412245e-05, "epoch": 0.588766432080733, "percentage": 29.43, "elapsed_time": "8:00:48", "remaining_time": "19:12:40"} +{"current_steps": 2218, "total_steps": 7532, "loss": 0.29410409927368164, "lr": 1.6910173101604267e-05, "epoch": 0.5890320010622759, "percentage": 29.45, "elapsed_time": "8:01:00", "remaining_time": "19:12:25"} +{"current_steps": 2219, "total_steps": 7532, "loss": 0.3250378370285034, "lr": 1.690699862259641e-05, "epoch": 0.5892975700438189, "percentage": 29.46, "elapsed_time": "8:01:12", "remaining_time": "19:12:10"} +{"current_steps": 2220, "total_steps": 7532, "loss": 0.34420648217201233, "lr": 1.690382281200068e-05, "epoch": 0.5895631390253618, "percentage": 29.47, "elapsed_time": "8:01:25", "remaining_time": "19:11:55"} +{"current_steps": 2221, "total_steps": 7532, "loss": 0.33951860666275024, "lr": 1.6900645670429338e-05, "epoch": 0.5898287080069048, "percentage": 29.49, "elapsed_time": "8:01:36", "remaining_time": "19:11:40"} +{"current_steps": 2222, "total_steps": 7532, "loss": 0.35045644640922546, "lr": 1.6897467198494892e-05, "epoch": 0.5900942769884477, "percentage": 29.5, "elapsed_time": "8:01:49", "remaining_time": "19:11:25"} +{"current_steps": 2223, "total_steps": 7532, "loss": 0.3262789845466614, "lr": 1.689428739681012e-05, "epoch": 0.5903598459699907, "percentage": 29.51, "elapsed_time": "8:02:01", "remaining_time": "19:11:10"} +{"current_steps": 2224, "total_steps": 7532, "loss": 0.2959234118461609, "lr": 1.689110626598805e-05, "epoch": 0.5906254149515336, "percentage": 29.53, "elapsed_time": "8:02:14", "remaining_time": "19:10:56"} +{"current_steps": 2225, "total_steps": 7532, "loss": 0.3185187876224518, "lr": 1.6887923806641965e-05, "epoch": 0.5908909839330766, "percentage": 29.54, "elapsed_time": "8:02:26", "remaining_time": "19:10:43"} +{"current_steps": 2226, "total_steps": 7532, "loss": 0.2861860692501068, "lr": 1.6884740019385403e-05, "epoch": 0.5911565529146195, "percentage": 29.55, "elapsed_time": "8:02:39", "remaining_time": "19:10:29"} +{"current_steps": 2227, "total_steps": 7532, "loss": 0.28718897700309753, "lr": 1.6881554904832163e-05, "epoch": 0.5914221218961625, "percentage": 29.57, "elapsed_time": "8:02:52", "remaining_time": "19:10:15"} +{"current_steps": 2228, "total_steps": 7532, "loss": 0.2919235825538635, "lr": 1.68783684635963e-05, "epoch": 0.5916876908777055, "percentage": 29.58, "elapsed_time": "8:03:04", "remaining_time": "19:10:01"} +{"current_steps": 2229, "total_steps": 7532, "loss": 0.29265689849853516, "lr": 1.687518069629212e-05, "epoch": 0.5919532598592484, "percentage": 29.59, "elapsed_time": "8:03:17", "remaining_time": "19:09:47"} +{"current_steps": 2230, "total_steps": 7532, "loss": 0.3257937431335449, "lr": 1.6871991603534183e-05, "epoch": 0.5922188288407914, "percentage": 29.61, "elapsed_time": "8:03:30", "remaining_time": "19:09:34"} +{"current_steps": 2231, "total_steps": 7532, "loss": 0.30709922313690186, "lr": 1.6868801185937318e-05, "epoch": 0.5924843978223343, "percentage": 29.62, "elapsed_time": "8:03:42", "remaining_time": "19:09:20"} +{"current_steps": 2232, "total_steps": 7532, "loss": 0.34016695618629456, "lr": 1.6865609444116594e-05, "epoch": 0.5927499668038773, "percentage": 29.63, "elapsed_time": "8:03:55", "remaining_time": "19:09:05"} +{"current_steps": 2233, "total_steps": 7532, "loss": 0.27988332509994507, "lr": 1.686241637868734e-05, "epoch": 0.5930155357854202, "percentage": 29.65, "elapsed_time": "8:04:07", "remaining_time": "19:08:51"} +{"current_steps": 2234, "total_steps": 7532, "loss": 0.33241748809814453, "lr": 1.685922199026514e-05, "epoch": 0.5932811047669632, "percentage": 29.66, "elapsed_time": "8:04:20", "remaining_time": "19:08:37"} +{"current_steps": 2235, "total_steps": 7532, "loss": 0.29636645317077637, "lr": 1.685602627946584e-05, "epoch": 0.5935466737485062, "percentage": 29.67, "elapsed_time": "8:04:32", "remaining_time": "19:08:23"} +{"current_steps": 2236, "total_steps": 7532, "loss": 0.32173705101013184, "lr": 1.6852829246905532e-05, "epoch": 0.5938122427300492, "percentage": 29.69, "elapsed_time": "8:04:45", "remaining_time": "19:08:08"} +{"current_steps": 2237, "total_steps": 7532, "loss": 0.318726122379303, "lr": 1.6849630893200567e-05, "epoch": 0.5940778117115921, "percentage": 29.7, "elapsed_time": "8:04:57", "remaining_time": "19:07:54"} +{"current_steps": 2238, "total_steps": 7532, "loss": 0.3085494339466095, "lr": 1.684643121896755e-05, "epoch": 0.5943433806931351, "percentage": 29.71, "elapsed_time": "8:05:09", "remaining_time": "19:07:39"} +{"current_steps": 2239, "total_steps": 7532, "loss": 0.3402160406112671, "lr": 1.684323022482334e-05, "epoch": 0.594608949674678, "percentage": 29.73, "elapsed_time": "8:05:22", "remaining_time": "19:07:26"} +{"current_steps": 2240, "total_steps": 7532, "loss": 0.28099578619003296, "lr": 1.684002791138505e-05, "epoch": 0.594874518656221, "percentage": 29.74, "elapsed_time": "8:05:35", "remaining_time": "19:07:12"} +{"current_steps": 2241, "total_steps": 7532, "loss": 0.3049670159816742, "lr": 1.6836824279270053e-05, "epoch": 0.5951400876377639, "percentage": 29.75, "elapsed_time": "8:05:48", "remaining_time": "19:06:58"} +{"current_steps": 2242, "total_steps": 7532, "loss": 0.2999834716320038, "lr": 1.6833619329095966e-05, "epoch": 0.5954056566193069, "percentage": 29.77, "elapsed_time": "8:06:00", "remaining_time": "19:06:43"} +{"current_steps": 2243, "total_steps": 7532, "loss": 0.2976648509502411, "lr": 1.6830413061480663e-05, "epoch": 0.5956712256008498, "percentage": 29.78, "elapsed_time": "8:06:12", "remaining_time": "19:06:29"} +{"current_steps": 2244, "total_steps": 7532, "loss": 0.2937200963497162, "lr": 1.6827205477042282e-05, "epoch": 0.5959367945823928, "percentage": 29.79, "elapsed_time": "8:06:24", "remaining_time": "19:06:14"} +{"current_steps": 2245, "total_steps": 7532, "loss": 0.27944231033325195, "lr": 1.6823996576399208e-05, "epoch": 0.5962023635639357, "percentage": 29.81, "elapsed_time": "8:06:37", "remaining_time": "19:06:01"} +{"current_steps": 2246, "total_steps": 7532, "loss": 0.37821248173713684, "lr": 1.6820786360170073e-05, "epoch": 0.5964679325454787, "percentage": 29.82, "elapsed_time": "8:06:49", "remaining_time": "19:05:45"} +{"current_steps": 2247, "total_steps": 7532, "loss": 0.31929296255111694, "lr": 1.681757482897377e-05, "epoch": 0.5967335015270216, "percentage": 29.83, "elapsed_time": "8:07:02", "remaining_time": "19:05:31"} +{"current_steps": 2248, "total_steps": 7532, "loss": 0.29905542731285095, "lr": 1.6814361983429446e-05, "epoch": 0.5969990705085646, "percentage": 29.85, "elapsed_time": "8:07:14", "remaining_time": "19:05:16"} +{"current_steps": 2249, "total_steps": 7532, "loss": 0.31056714057922363, "lr": 1.6811147824156503e-05, "epoch": 0.5972646394901076, "percentage": 29.86, "elapsed_time": "8:07:26", "remaining_time": "19:05:02"} +{"current_steps": 2250, "total_steps": 7532, "loss": 0.3311445415019989, "lr": 1.6807932351774585e-05, "epoch": 0.5975302084716505, "percentage": 29.87, "elapsed_time": "8:07:39", "remaining_time": "19:04:47"} +{"current_steps": 2251, "total_steps": 7532, "loss": 0.28413334488868713, "lr": 1.6804715566903603e-05, "epoch": 0.5977957774531935, "percentage": 29.89, "elapsed_time": "8:07:51", "remaining_time": "19:04:33"} +{"current_steps": 2252, "total_steps": 7532, "loss": 0.27681154012680054, "lr": 1.6801497470163717e-05, "epoch": 0.5980613464347364, "percentage": 29.9, "elapsed_time": "8:08:05", "remaining_time": "19:04:21"} +{"current_steps": 2253, "total_steps": 7532, "loss": 0.290216863155365, "lr": 1.679827806217533e-05, "epoch": 0.5983269154162794, "percentage": 29.91, "elapsed_time": "8:08:18", "remaining_time": "19:04:08"} +{"current_steps": 2254, "total_steps": 7532, "loss": 0.31263259053230286, "lr": 1.6795057343559115e-05, "epoch": 0.5985924843978223, "percentage": 29.93, "elapsed_time": "8:08:31", "remaining_time": "19:03:55"} +{"current_steps": 2255, "total_steps": 7532, "loss": 0.31527474522590637, "lr": 1.6791835314935984e-05, "epoch": 0.5988580533793653, "percentage": 29.94, "elapsed_time": "8:08:44", "remaining_time": "19:03:42"} +{"current_steps": 2256, "total_steps": 7532, "loss": 0.308803915977478, "lr": 1.6788611976927104e-05, "epoch": 0.5991236223609082, "percentage": 29.95, "elapsed_time": "8:08:57", "remaining_time": "19:03:29"} +{"current_steps": 2257, "total_steps": 7532, "loss": 0.3038686215877533, "lr": 1.6785387330153898e-05, "epoch": 0.5993891913424512, "percentage": 29.97, "elapsed_time": "8:09:10", "remaining_time": "19:03:16"} +{"current_steps": 2258, "total_steps": 7532, "loss": 0.32485973834991455, "lr": 1.6782161375238045e-05, "epoch": 0.5996547603239941, "percentage": 29.98, "elapsed_time": "8:09:23", "remaining_time": "19:03:04"} +{"current_steps": 2259, "total_steps": 7532, "loss": 0.32350587844848633, "lr": 1.6778934112801467e-05, "epoch": 0.5999203293055371, "percentage": 29.99, "elapsed_time": "8:09:36", "remaining_time": "19:02:50"} +{"current_steps": 2260, "total_steps": 7532, "loss": 0.31593745946884155, "lr": 1.6775705543466337e-05, "epoch": 0.60018589828708, "percentage": 30.01, "elapsed_time": "8:09:49", "remaining_time": "19:02:37"} +{"current_steps": 2261, "total_steps": 7532, "loss": 0.3266843855381012, "lr": 1.6772475667855098e-05, "epoch": 0.600451467268623, "percentage": 30.02, "elapsed_time": "8:10:02", "remaining_time": "19:02:24"} +{"current_steps": 2262, "total_steps": 7532, "loss": 0.3334394693374634, "lr": 1.676924448659042e-05, "epoch": 0.6007170362501659, "percentage": 30.03, "elapsed_time": "8:10:15", "remaining_time": "19:02:12"} +{"current_steps": 2263, "total_steps": 7532, "loss": 0.29688704013824463, "lr": 1.676601200029524e-05, "epoch": 0.600982605231709, "percentage": 30.05, "elapsed_time": "8:10:28", "remaining_time": "19:01:58"} +{"current_steps": 2264, "total_steps": 7532, "loss": 0.3163599967956543, "lr": 1.6762778209592744e-05, "epoch": 0.6012481742132519, "percentage": 30.06, "elapsed_time": "8:10:41", "remaining_time": "19:01:45"} +{"current_steps": 2265, "total_steps": 7532, "loss": 0.3001909554004669, "lr": 1.675954311510637e-05, "epoch": 0.6015137431947949, "percentage": 30.07, "elapsed_time": "8:10:54", "remaining_time": "19:01:33"} +{"current_steps": 2266, "total_steps": 7532, "loss": 0.306442528963089, "lr": 1.6756306717459804e-05, "epoch": 0.6017793121763378, "percentage": 30.08, "elapsed_time": "8:11:07", "remaining_time": "19:01:19"} +{"current_steps": 2267, "total_steps": 7532, "loss": 0.32714736461639404, "lr": 1.6753069017276988e-05, "epoch": 0.6020448811578808, "percentage": 30.1, "elapsed_time": "8:11:20", "remaining_time": "19:01:08"} +{"current_steps": 2268, "total_steps": 7532, "loss": 0.3276352286338806, "lr": 1.6749830015182106e-05, "epoch": 0.6023104501394237, "percentage": 30.11, "elapsed_time": "8:11:33", "remaining_time": "19:00:54"} +{"current_steps": 2269, "total_steps": 7532, "loss": 0.3151017427444458, "lr": 1.6746589711799607e-05, "epoch": 0.6025760191209667, "percentage": 30.12, "elapsed_time": "8:11:46", "remaining_time": "19:00:42"} +{"current_steps": 2270, "total_steps": 7532, "loss": 0.30252715945243835, "lr": 1.674334810775418e-05, "epoch": 0.6028415881025097, "percentage": 30.14, "elapsed_time": "8:11:59", "remaining_time": "19:00:28"} +{"current_steps": 2271, "total_steps": 7532, "loss": 0.28994205594062805, "lr": 1.674010520367077e-05, "epoch": 0.6031071570840526, "percentage": 30.15, "elapsed_time": "8:12:13", "remaining_time": "19:00:17"} +{"current_steps": 2272, "total_steps": 7532, "loss": 0.31821542978286743, "lr": 1.6736861000174566e-05, "epoch": 0.6033727260655956, "percentage": 30.16, "elapsed_time": "8:12:26", "remaining_time": "19:00:03"} +{"current_steps": 2273, "total_steps": 7532, "loss": 0.33488404750823975, "lr": 1.6733615497891018e-05, "epoch": 0.6036382950471385, "percentage": 30.18, "elapsed_time": "8:12:39", "remaining_time": "18:59:51"} +{"current_steps": 2274, "total_steps": 7532, "loss": 0.32545825839042664, "lr": 1.6730368697445815e-05, "epoch": 0.6039038640286815, "percentage": 30.19, "elapsed_time": "8:12:52", "remaining_time": "18:59:38"} +{"current_steps": 2275, "total_steps": 7532, "loss": 0.3229105770587921, "lr": 1.6727120599464904e-05, "epoch": 0.6041694330102244, "percentage": 30.2, "elapsed_time": "8:13:05", "remaining_time": "18:59:24"} +{"current_steps": 2276, "total_steps": 7532, "loss": 0.29090648889541626, "lr": 1.672387120457448e-05, "epoch": 0.6044350019917674, "percentage": 30.22, "elapsed_time": "8:13:19", "remaining_time": "18:59:13"} +{"current_steps": 2277, "total_steps": 7532, "loss": 0.3102695345878601, "lr": 1.6720620513400993e-05, "epoch": 0.6047005709733103, "percentage": 30.23, "elapsed_time": "8:13:31", "remaining_time": "18:58:59"} +{"current_steps": 2278, "total_steps": 7532, "loss": 0.3104533851146698, "lr": 1.6717368526571133e-05, "epoch": 0.6049661399548533, "percentage": 30.24, "elapsed_time": "8:13:45", "remaining_time": "18:58:47"} +{"current_steps": 2279, "total_steps": 7532, "loss": 0.3340798616409302, "lr": 1.671411524471184e-05, "epoch": 0.6052317089363962, "percentage": 30.26, "elapsed_time": "8:13:57", "remaining_time": "18:58:33"} +{"current_steps": 2280, "total_steps": 7532, "loss": 0.2807982563972473, "lr": 1.6710860668450318e-05, "epoch": 0.6054972779179392, "percentage": 30.27, "elapsed_time": "8:14:11", "remaining_time": "18:58:21"} +{"current_steps": 2281, "total_steps": 7532, "loss": 0.28892064094543457, "lr": 1.6707604798414005e-05, "epoch": 0.6057628468994821, "percentage": 30.28, "elapsed_time": "8:14:23", "remaining_time": "18:58:07"} +{"current_steps": 2282, "total_steps": 7532, "loss": 0.29660698771476746, "lr": 1.6704347635230594e-05, "epoch": 0.6060284158810251, "percentage": 30.3, "elapsed_time": "8:14:36", "remaining_time": "18:57:54"} +{"current_steps": 2283, "total_steps": 7532, "loss": 0.32079893350601196, "lr": 1.6701089179528032e-05, "epoch": 0.606293984862568, "percentage": 30.31, "elapsed_time": "8:14:49", "remaining_time": "18:57:42"} +{"current_steps": 2284, "total_steps": 7532, "loss": 0.3464012145996094, "lr": 1.6697829431934508e-05, "epoch": 0.606559553844111, "percentage": 30.32, "elapsed_time": "8:15:02", "remaining_time": "18:57:28"} +{"current_steps": 2285, "total_steps": 7532, "loss": 0.3378494381904602, "lr": 1.669456839307846e-05, "epoch": 0.6068251228256539, "percentage": 30.34, "elapsed_time": "8:15:15", "remaining_time": "18:57:14"} +{"current_steps": 2286, "total_steps": 7532, "loss": 0.2856704294681549, "lr": 1.6691306063588583e-05, "epoch": 0.6070906918071969, "percentage": 30.35, "elapsed_time": "8:15:27", "remaining_time": "18:57:00"} +{"current_steps": 2287, "total_steps": 7532, "loss": 0.317970871925354, "lr": 1.6688042444093816e-05, "epoch": 0.6073562607887398, "percentage": 30.36, "elapsed_time": "8:15:40", "remaining_time": "18:56:47"} +{"current_steps": 2288, "total_steps": 7532, "loss": 0.3067381978034973, "lr": 1.6684777535223338e-05, "epoch": 0.6076218297702828, "percentage": 30.38, "elapsed_time": "8:15:53", "remaining_time": "18:56:33"} +{"current_steps": 2289, "total_steps": 7532, "loss": 0.28682243824005127, "lr": 1.6681511337606594e-05, "epoch": 0.6078873987518257, "percentage": 30.39, "elapsed_time": "8:16:05", "remaining_time": "18:56:19"} +{"current_steps": 2290, "total_steps": 7532, "loss": 0.30516478419303894, "lr": 1.667824385187327e-05, "epoch": 0.6081529677333687, "percentage": 30.4, "elapsed_time": "8:16:18", "remaining_time": "18:56:04"} +{"current_steps": 2291, "total_steps": 7532, "loss": 0.3114034831523895, "lr": 1.6674975078653284e-05, "epoch": 0.6084185367149118, "percentage": 30.42, "elapsed_time": "8:16:30", "remaining_time": "18:55:50"} +{"current_steps": 2292, "total_steps": 7532, "loss": 0.3119916617870331, "lr": 1.6671705018576837e-05, "epoch": 0.6086841056964547, "percentage": 30.43, "elapsed_time": "8:16:43", "remaining_time": "18:55:36"} +{"current_steps": 2293, "total_steps": 7532, "loss": 0.2695278823375702, "lr": 1.666843367227434e-05, "epoch": 0.6089496746779977, "percentage": 30.44, "elapsed_time": "8:16:56", "remaining_time": "18:55:23"} +{"current_steps": 2294, "total_steps": 7532, "loss": 0.32162508368492126, "lr": 1.6665161040376483e-05, "epoch": 0.6092152436595406, "percentage": 30.46, "elapsed_time": "8:17:08", "remaining_time": "18:55:09"} +{"current_steps": 2295, "total_steps": 7532, "loss": 0.3115222752094269, "lr": 1.6661887123514183e-05, "epoch": 0.6094808126410836, "percentage": 30.47, "elapsed_time": "8:17:21", "remaining_time": "18:54:56"} +{"current_steps": 2296, "total_steps": 7532, "loss": 0.3239362835884094, "lr": 1.6658611922318618e-05, "epoch": 0.6097463816226265, "percentage": 30.48, "elapsed_time": "8:17:34", "remaining_time": "18:54:42"} +{"current_steps": 2297, "total_steps": 7532, "loss": 0.29716256260871887, "lr": 1.66553354374212e-05, "epoch": 0.6100119506041695, "percentage": 30.5, "elapsed_time": "8:17:46", "remaining_time": "18:54:28"} +{"current_steps": 2298, "total_steps": 7532, "loss": 0.3337557911872864, "lr": 1.6652057669453606e-05, "epoch": 0.6102775195857124, "percentage": 30.51, "elapsed_time": "8:17:58", "remaining_time": "18:54:12"} +{"current_steps": 2299, "total_steps": 7532, "loss": 0.30258649587631226, "lr": 1.6648778619047747e-05, "epoch": 0.6105430885672554, "percentage": 30.52, "elapsed_time": "8:18:11", "remaining_time": "18:53:58"} +{"current_steps": 2300, "total_steps": 7532, "loss": 0.3151426315307617, "lr": 1.6645498286835784e-05, "epoch": 0.6108086575487983, "percentage": 30.54, "elapsed_time": "8:18:23", "remaining_time": "18:53:43"} +{"current_steps": 2301, "total_steps": 7532, "loss": 0.274954617023468, "lr": 1.664221667345013e-05, "epoch": 0.6110742265303413, "percentage": 30.55, "elapsed_time": "8:18:42", "remaining_time": "18:53:44"} +{"current_steps": 2302, "total_steps": 7532, "loss": 0.3055363893508911, "lr": 1.6638933779523437e-05, "epoch": 0.6113397955118842, "percentage": 30.56, "elapsed_time": "8:18:54", "remaining_time": "18:53:30"} +{"current_steps": 2303, "total_steps": 7532, "loss": 0.30296921730041504, "lr": 1.663564960568861e-05, "epoch": 0.6116053644934272, "percentage": 30.58, "elapsed_time": "8:19:07", "remaining_time": "18:53:17"} +{"current_steps": 2304, "total_steps": 7532, "loss": 0.3118343651294708, "lr": 1.66323641525788e-05, "epoch": 0.6118709334749701, "percentage": 30.59, "elapsed_time": "8:19:21", "remaining_time": "18:53:04"} +{"current_steps": 2305, "total_steps": 7532, "loss": 0.3277447819709778, "lr": 1.6629077420827405e-05, "epoch": 0.6121365024565131, "percentage": 30.6, "elapsed_time": "8:19:33", "remaining_time": "18:52:51"} +{"current_steps": 2306, "total_steps": 7532, "loss": 0.307643860578537, "lr": 1.6625789411068063e-05, "epoch": 0.612402071438056, "percentage": 30.62, "elapsed_time": "8:19:47", "remaining_time": "18:52:39"} +{"current_steps": 2307, "total_steps": 7532, "loss": 0.3043777346611023, "lr": 1.6622500123934665e-05, "epoch": 0.612667640419599, "percentage": 30.63, "elapsed_time": "8:20:00", "remaining_time": "18:52:26"} +{"current_steps": 2308, "total_steps": 7532, "loss": 0.28634852170944214, "lr": 1.6619209560061352e-05, "epoch": 0.6129332094011419, "percentage": 30.64, "elapsed_time": "8:20:14", "remaining_time": "18:52:14"} +{"current_steps": 2309, "total_steps": 7532, "loss": 0.33200016617774963, "lr": 1.6615917720082503e-05, "epoch": 0.6131987783826849, "percentage": 30.66, "elapsed_time": "8:20:27", "remaining_time": "18:52:01"} +{"current_steps": 2310, "total_steps": 7532, "loss": 0.26568055152893066, "lr": 1.661262460463274e-05, "epoch": 0.6134643473642278, "percentage": 30.67, "elapsed_time": "8:20:40", "remaining_time": "18:51:49"} +{"current_steps": 2311, "total_steps": 7532, "loss": 0.2772855758666992, "lr": 1.6609330214346945e-05, "epoch": 0.6137299163457708, "percentage": 30.68, "elapsed_time": "8:20:53", "remaining_time": "18:51:36"} +{"current_steps": 2312, "total_steps": 7532, "loss": 0.3330409824848175, "lr": 1.6606034549860236e-05, "epoch": 0.6139954853273137, "percentage": 30.7, "elapsed_time": "8:21:07", "remaining_time": "18:51:25"} +{"current_steps": 2313, "total_steps": 7532, "loss": 0.27702978253364563, "lr": 1.6602737611807975e-05, "epoch": 0.6142610543088567, "percentage": 30.71, "elapsed_time": "8:21:20", "remaining_time": "18:51:12"} +{"current_steps": 2314, "total_steps": 7532, "loss": 0.29985183477401733, "lr": 1.6599439400825775e-05, "epoch": 0.6145266232903996, "percentage": 30.72, "elapsed_time": "8:21:33", "remaining_time": "18:51:00"} +{"current_steps": 2315, "total_steps": 7532, "loss": 0.2666100859642029, "lr": 1.659613991754949e-05, "epoch": 0.6147921922719426, "percentage": 30.74, "elapsed_time": "8:21:46", "remaining_time": "18:50:47"} +{"current_steps": 2316, "total_steps": 7532, "loss": 0.2968613803386688, "lr": 1.6592839162615223e-05, "epoch": 0.6150577612534855, "percentage": 30.75, "elapsed_time": "8:21:59", "remaining_time": "18:50:34"} +{"current_steps": 2317, "total_steps": 7532, "loss": 0.2693714499473572, "lr": 1.6589537136659326e-05, "epoch": 0.6153233302350285, "percentage": 30.76, "elapsed_time": "8:22:13", "remaining_time": "18:50:22"} +{"current_steps": 2318, "total_steps": 7532, "loss": 0.3192713260650635, "lr": 1.658623384031838e-05, "epoch": 0.6155888992165715, "percentage": 30.78, "elapsed_time": "8:22:26", "remaining_time": "18:50:09"} +{"current_steps": 2319, "total_steps": 7532, "loss": 0.2958469092845917, "lr": 1.658292927422923e-05, "epoch": 0.6158544681981145, "percentage": 30.79, "elapsed_time": "8:22:40", "remaining_time": "18:49:58"} +{"current_steps": 2320, "total_steps": 7532, "loss": 0.28580743074417114, "lr": 1.657962343902895e-05, "epoch": 0.6161200371796575, "percentage": 30.8, "elapsed_time": "8:22:52", "remaining_time": "18:49:45"} +{"current_steps": 2321, "total_steps": 7532, "loss": 0.34325680136680603, "lr": 1.6576316335354875e-05, "epoch": 0.6163856061612004, "percentage": 30.82, "elapsed_time": "8:23:06", "remaining_time": "18:49:33"} +{"current_steps": 2322, "total_steps": 7532, "loss": 0.3220894932746887, "lr": 1.657300796384457e-05, "epoch": 0.6166511751427434, "percentage": 30.83, "elapsed_time": "8:23:19", "remaining_time": "18:49:20"} +{"current_steps": 2323, "total_steps": 7532, "loss": 0.2934642434120178, "lr": 1.656969832513585e-05, "epoch": 0.6169167441242863, "percentage": 30.84, "elapsed_time": "8:23:33", "remaining_time": "18:49:08"} +{"current_steps": 2324, "total_steps": 7532, "loss": 0.3066999912261963, "lr": 1.656638741986677e-05, "epoch": 0.6171823131058293, "percentage": 30.86, "elapsed_time": "8:23:46", "remaining_time": "18:48:56"} +{"current_steps": 2325, "total_steps": 7532, "loss": 0.2947896122932434, "lr": 1.6563075248675645e-05, "epoch": 0.6174478820873722, "percentage": 30.87, "elapsed_time": "8:24:00", "remaining_time": "18:48:44"} +{"current_steps": 2326, "total_steps": 7532, "loss": 0.33616161346435547, "lr": 1.6559761812201018e-05, "epoch": 0.6177134510689152, "percentage": 30.88, "elapsed_time": "8:24:13", "remaining_time": "18:48:33"} +{"current_steps": 2327, "total_steps": 7532, "loss": 0.29555875062942505, "lr": 1.6556447111081678e-05, "epoch": 0.6179790200504581, "percentage": 30.89, "elapsed_time": "8:24:26", "remaining_time": "18:48:20"} +{"current_steps": 2328, "total_steps": 7532, "loss": 0.276498019695282, "lr": 1.655313114595666e-05, "epoch": 0.6182445890320011, "percentage": 30.91, "elapsed_time": "8:24:40", "remaining_time": "18:48:08"} +{"current_steps": 2329, "total_steps": 7532, "loss": 0.3081165552139282, "lr": 1.6549813917465242e-05, "epoch": 0.618510158013544, "percentage": 30.92, "elapsed_time": "8:24:53", "remaining_time": "18:47:55"} +{"current_steps": 2330, "total_steps": 7532, "loss": 0.3610053062438965, "lr": 1.654649542624695e-05, "epoch": 0.618775726995087, "percentage": 30.93, "elapsed_time": "8:25:07", "remaining_time": "18:47:44"} +{"current_steps": 2331, "total_steps": 7532, "loss": 0.2775106430053711, "lr": 1.654317567294155e-05, "epoch": 0.6190412959766299, "percentage": 30.95, "elapsed_time": "8:25:20", "remaining_time": "18:47:31"} +{"current_steps": 2332, "total_steps": 7532, "loss": 0.2915893793106079, "lr": 1.653985465818905e-05, "epoch": 0.6193068649581729, "percentage": 30.96, "elapsed_time": "8:25:33", "remaining_time": "18:47:19"} +{"current_steps": 2333, "total_steps": 7532, "loss": 0.30868977308273315, "lr": 1.6536532382629696e-05, "epoch": 0.6195724339397158, "percentage": 30.97, "elapsed_time": "8:25:47", "remaining_time": "18:47:07"} +{"current_steps": 2334, "total_steps": 7532, "loss": 0.3083038330078125, "lr": 1.6533208846903996e-05, "epoch": 0.6198380029212588, "percentage": 30.99, "elapsed_time": "8:26:00", "remaining_time": "18:46:55"} +{"current_steps": 2335, "total_steps": 7532, "loss": 0.25192466378211975, "lr": 1.652988405165268e-05, "epoch": 0.6201035719028017, "percentage": 31.0, "elapsed_time": "8:26:13", "remaining_time": "18:46:43"} +{"current_steps": 2336, "total_steps": 7532, "loss": 0.32154130935668945, "lr": 1.6526557997516737e-05, "epoch": 0.6203691408843447, "percentage": 31.01, "elapsed_time": "8:26:27", "remaining_time": "18:46:31"} +{"current_steps": 2337, "total_steps": 7532, "loss": 0.2860945165157318, "lr": 1.6523230685137382e-05, "epoch": 0.6206347098658876, "percentage": 31.03, "elapsed_time": "8:26:40", "remaining_time": "18:46:17"} +{"current_steps": 2338, "total_steps": 7532, "loss": 0.3279789984226227, "lr": 1.6519902115156084e-05, "epoch": 0.6209002788474306, "percentage": 31.04, "elapsed_time": "8:26:53", "remaining_time": "18:46:05"} +{"current_steps": 2339, "total_steps": 7532, "loss": 0.3082200884819031, "lr": 1.6516572288214555e-05, "epoch": 0.6211658478289735, "percentage": 31.05, "elapsed_time": "8:27:06", "remaining_time": "18:45:52"} +{"current_steps": 2340, "total_steps": 7532, "loss": 0.29032304883003235, "lr": 1.6513241204954745e-05, "epoch": 0.6214314168105165, "percentage": 31.07, "elapsed_time": "8:27:19", "remaining_time": "18:45:40"} +{"current_steps": 2341, "total_steps": 7532, "loss": 0.3096848130226135, "lr": 1.6509908866018843e-05, "epoch": 0.6216969857920595, "percentage": 31.08, "elapsed_time": "8:27:33", "remaining_time": "18:45:27"} +{"current_steps": 2342, "total_steps": 7532, "loss": 0.309989333152771, "lr": 1.6506575272049294e-05, "epoch": 0.6219625547736024, "percentage": 31.09, "elapsed_time": "8:27:46", "remaining_time": "18:45:15"} +{"current_steps": 2343, "total_steps": 7532, "loss": 0.311350554227829, "lr": 1.6503240423688768e-05, "epoch": 0.6222281237551454, "percentage": 31.11, "elapsed_time": "8:27:59", "remaining_time": "18:45:02"} +{"current_steps": 2344, "total_steps": 7532, "loss": 0.3313952386379242, "lr": 1.6499904321580187e-05, "epoch": 0.6224936927366883, "percentage": 31.12, "elapsed_time": "8:28:12", "remaining_time": "18:44:49"} +{"current_steps": 2345, "total_steps": 7532, "loss": 0.2984781265258789, "lr": 1.649656696636671e-05, "epoch": 0.6227592617182313, "percentage": 31.13, "elapsed_time": "8:28:25", "remaining_time": "18:44:36"} +{"current_steps": 2346, "total_steps": 7532, "loss": 0.3058238625526428, "lr": 1.6493228358691748e-05, "epoch": 0.6230248306997742, "percentage": 31.15, "elapsed_time": "8:28:38", "remaining_time": "18:44:22"} +{"current_steps": 2347, "total_steps": 7532, "loss": 0.33439138531684875, "lr": 1.6489888499198935e-05, "epoch": 0.6232903996813173, "percentage": 31.16, "elapsed_time": "8:28:51", "remaining_time": "18:44:10"} +{"current_steps": 2348, "total_steps": 7532, "loss": 0.2883133292198181, "lr": 1.6486547388532157e-05, "epoch": 0.6235559686628602, "percentage": 31.17, "elapsed_time": "8:29:04", "remaining_time": "18:43:57"} +{"current_steps": 2349, "total_steps": 7532, "loss": 0.30258435010910034, "lr": 1.648320502733555e-05, "epoch": 0.6238215376444032, "percentage": 31.19, "elapsed_time": "8:29:18", "remaining_time": "18:43:45"} +{"current_steps": 2350, "total_steps": 7532, "loss": 0.316353440284729, "lr": 1.6479861416253476e-05, "epoch": 0.6240871066259461, "percentage": 31.2, "elapsed_time": "8:29:30", "remaining_time": "18:43:32"} +{"current_steps": 2351, "total_steps": 7532, "loss": 0.3230556547641754, "lr": 1.647651655593054e-05, "epoch": 0.6243526756074891, "percentage": 31.21, "elapsed_time": "8:29:44", "remaining_time": "18:43:20"} +{"current_steps": 2352, "total_steps": 7532, "loss": 0.3327128291130066, "lr": 1.6473170447011593e-05, "epoch": 0.624618244589032, "percentage": 31.23, "elapsed_time": "8:29:57", "remaining_time": "18:43:08"} +{"current_steps": 2353, "total_steps": 7532, "loss": 0.3152993619441986, "lr": 1.6469823090141733e-05, "epoch": 0.624883813570575, "percentage": 31.24, "elapsed_time": "8:30:11", "remaining_time": "18:42:56"} +{"current_steps": 2354, "total_steps": 7532, "loss": 0.26792511343955994, "lr": 1.6466474485966286e-05, "epoch": 0.6251493825521179, "percentage": 31.25, "elapsed_time": "8:30:24", "remaining_time": "18:42:44"} +{"current_steps": 2355, "total_steps": 7532, "loss": 0.31665652990341187, "lr": 1.6463124635130824e-05, "epoch": 0.6254149515336609, "percentage": 31.27, "elapsed_time": "8:30:38", "remaining_time": "18:42:32"} +{"current_steps": 2356, "total_steps": 7532, "loss": 0.29573655128479004, "lr": 1.645977353828115e-05, "epoch": 0.6256805205152038, "percentage": 31.28, "elapsed_time": "8:30:51", "remaining_time": "18:42:19"} +{"current_steps": 2357, "total_steps": 7532, "loss": 0.3210436999797821, "lr": 1.6456421196063334e-05, "epoch": 0.6259460894967468, "percentage": 31.29, "elapsed_time": "8:31:04", "remaining_time": "18:42:06"} +{"current_steps": 2358, "total_steps": 7532, "loss": 0.2837316691875458, "lr": 1.6453067609123656e-05, "epoch": 0.6262116584782897, "percentage": 31.31, "elapsed_time": "8:31:16", "remaining_time": "18:41:52"} +{"current_steps": 2359, "total_steps": 7532, "loss": 0.2885812520980835, "lr": 1.6449712778108645e-05, "epoch": 0.6264772274598327, "percentage": 31.32, "elapsed_time": "8:31:29", "remaining_time": "18:41:37"} +{"current_steps": 2360, "total_steps": 7532, "loss": 0.34908249974250793, "lr": 1.6446356703665078e-05, "epoch": 0.6267427964413756, "percentage": 31.33, "elapsed_time": "8:31:42", "remaining_time": "18:41:24"} +{"current_steps": 2361, "total_steps": 7532, "loss": 0.30398470163345337, "lr": 1.6442999386439967e-05, "epoch": 0.6270083654229186, "percentage": 31.35, "elapsed_time": "8:31:54", "remaining_time": "18:41:10"} +{"current_steps": 2362, "total_steps": 7532, "loss": 0.2780487537384033, "lr": 1.6439640827080565e-05, "epoch": 0.6272739344044616, "percentage": 31.36, "elapsed_time": "8:32:07", "remaining_time": "18:40:58"} +{"current_steps": 2363, "total_steps": 7532, "loss": 0.2575770616531372, "lr": 1.6436281026234357e-05, "epoch": 0.6275395033860045, "percentage": 31.37, "elapsed_time": "8:32:20", "remaining_time": "18:40:44"} +{"current_steps": 2364, "total_steps": 7532, "loss": 0.2888547480106354, "lr": 1.6432919984549077e-05, "epoch": 0.6278050723675475, "percentage": 31.39, "elapsed_time": "8:32:33", "remaining_time": "18:40:31"} +{"current_steps": 2365, "total_steps": 7532, "loss": 0.3259009122848511, "lr": 1.6429557702672694e-05, "epoch": 0.6280706413490904, "percentage": 31.4, "elapsed_time": "8:32:46", "remaining_time": "18:40:17"} +{"current_steps": 2366, "total_steps": 7532, "loss": 0.2899959683418274, "lr": 1.6426194181253415e-05, "epoch": 0.6283362103306334, "percentage": 31.41, "elapsed_time": "8:32:59", "remaining_time": "18:40:04"} +{"current_steps": 2367, "total_steps": 7532, "loss": 0.28471851348876953, "lr": 1.6422829420939688e-05, "epoch": 0.6286017793121763, "percentage": 31.43, "elapsed_time": "8:33:12", "remaining_time": "18:39:51"} +{"current_steps": 2368, "total_steps": 7532, "loss": 0.2958947420120239, "lr": 1.64194634223802e-05, "epoch": 0.6288673482937193, "percentage": 31.44, "elapsed_time": "8:33:25", "remaining_time": "18:39:39"} +{"current_steps": 2369, "total_steps": 7532, "loss": 0.3089750111103058, "lr": 1.6416096186223872e-05, "epoch": 0.6291329172752622, "percentage": 31.45, "elapsed_time": "8:33:38", "remaining_time": "18:39:25"} +{"current_steps": 2370, "total_steps": 7532, "loss": 0.31597089767456055, "lr": 1.641272771311987e-05, "epoch": 0.6293984862568052, "percentage": 31.47, "elapsed_time": "8:33:51", "remaining_time": "18:39:12"} +{"current_steps": 2371, "total_steps": 7532, "loss": 0.2968488931655884, "lr": 1.6409358003717598e-05, "epoch": 0.6296640552383481, "percentage": 31.48, "elapsed_time": "8:34:04", "remaining_time": "18:39:00"} +{"current_steps": 2372, "total_steps": 7532, "loss": 0.27532660961151123, "lr": 1.6405987058666694e-05, "epoch": 0.6299296242198911, "percentage": 31.49, "elapsed_time": "8:34:18", "remaining_time": "18:38:48"} +{"current_steps": 2373, "total_steps": 7532, "loss": 0.2800731956958771, "lr": 1.6402614878617037e-05, "epoch": 0.630195193201434, "percentage": 31.51, "elapsed_time": "8:34:31", "remaining_time": "18:38:36"} +{"current_steps": 2374, "total_steps": 7532, "loss": 0.31728652119636536, "lr": 1.6399241464218744e-05, "epoch": 0.630460762182977, "percentage": 31.52, "elapsed_time": "8:34:44", "remaining_time": "18:38:23"} +{"current_steps": 2375, "total_steps": 7532, "loss": 0.2776367664337158, "lr": 1.6395866816122167e-05, "epoch": 0.63072633116452, "percentage": 31.53, "elapsed_time": "8:34:58", "remaining_time": "18:38:11"} +{"current_steps": 2376, "total_steps": 7532, "loss": 0.3308418095111847, "lr": 1.63924909349779e-05, "epoch": 0.630991900146063, "percentage": 31.55, "elapsed_time": "8:35:11", "remaining_time": "18:37:58"} +{"current_steps": 2377, "total_steps": 7532, "loss": 0.31589487195014954, "lr": 1.6389113821436775e-05, "epoch": 0.6312574691276059, "percentage": 31.56, "elapsed_time": "8:35:25", "remaining_time": "18:37:47"} +{"current_steps": 2378, "total_steps": 7532, "loss": 0.27181899547576904, "lr": 1.6385735476149855e-05, "epoch": 0.6315230381091489, "percentage": 31.57, "elapsed_time": "8:35:38", "remaining_time": "18:37:34"} +{"current_steps": 2379, "total_steps": 7532, "loss": 0.2603747546672821, "lr": 1.638235589976845e-05, "epoch": 0.6317886070906918, "percentage": 31.59, "elapsed_time": "8:35:51", "remaining_time": "18:37:22"} +{"current_steps": 2380, "total_steps": 7532, "loss": 0.29050707817077637, "lr": 1.63789750929441e-05, "epoch": 0.6320541760722348, "percentage": 31.6, "elapsed_time": "8:36:04", "remaining_time": "18:37:09"} +{"current_steps": 2381, "total_steps": 7532, "loss": 0.2979413866996765, "lr": 1.6375593056328586e-05, "epoch": 0.6323197450537777, "percentage": 31.61, "elapsed_time": "8:36:17", "remaining_time": "18:36:55"} +{"current_steps": 2382, "total_steps": 7532, "loss": 0.30875420570373535, "lr": 1.6372209790573926e-05, "epoch": 0.6325853140353207, "percentage": 31.63, "elapsed_time": "8:36:30", "remaining_time": "18:36:43"} +{"current_steps": 2383, "total_steps": 7532, "loss": 0.2755935788154602, "lr": 1.6368825296332366e-05, "epoch": 0.6328508830168637, "percentage": 31.64, "elapsed_time": "8:36:43", "remaining_time": "18:36:29"} +{"current_steps": 2384, "total_steps": 7532, "loss": 0.3459136486053467, "lr": 1.6365439574256406e-05, "epoch": 0.6331164519984066, "percentage": 31.65, "elapsed_time": "8:36:56", "remaining_time": "18:36:17"} +{"current_steps": 2385, "total_steps": 7532, "loss": 0.29043829441070557, "lr": 1.6362052624998767e-05, "epoch": 0.6333820209799496, "percentage": 31.66, "elapsed_time": "8:37:09", "remaining_time": "18:36:03"} +{"current_steps": 2386, "total_steps": 7532, "loss": 0.321551114320755, "lr": 1.635866444921242e-05, "epoch": 0.6336475899614925, "percentage": 31.68, "elapsed_time": "8:37:22", "remaining_time": "18:35:51"} +{"current_steps": 2387, "total_steps": 7532, "loss": 0.28478139638900757, "lr": 1.6355275047550553e-05, "epoch": 0.6339131589430355, "percentage": 31.69, "elapsed_time": "8:37:35", "remaining_time": "18:35:36"} +{"current_steps": 2388, "total_steps": 7532, "loss": 0.30913087725639343, "lr": 1.6351884420666616e-05, "epoch": 0.6341787279245784, "percentage": 31.7, "elapsed_time": "8:37:47", "remaining_time": "18:35:23"} +{"current_steps": 2389, "total_steps": 7532, "loss": 0.328342467546463, "lr": 1.6348492569214275e-05, "epoch": 0.6344442969061214, "percentage": 31.72, "elapsed_time": "8:37:59", "remaining_time": "18:35:08"} +{"current_steps": 2390, "total_steps": 7532, "loss": 0.3291119933128357, "lr": 1.634509949384744e-05, "epoch": 0.6347098658876643, "percentage": 31.73, "elapsed_time": "8:38:12", "remaining_time": "18:34:54"} +{"current_steps": 2391, "total_steps": 7532, "loss": 0.3542378544807434, "lr": 1.6341705195220257e-05, "epoch": 0.6349754348692073, "percentage": 31.74, "elapsed_time": "8:38:25", "remaining_time": "18:34:40"} +{"current_steps": 2392, "total_steps": 7532, "loss": 0.3167935609817505, "lr": 1.63383096739871e-05, "epoch": 0.6352410038507502, "percentage": 31.76, "elapsed_time": "8:38:38", "remaining_time": "18:34:27"} +{"current_steps": 2393, "total_steps": 7532, "loss": 0.27623263001441956, "lr": 1.63349129308026e-05, "epoch": 0.6355065728322932, "percentage": 31.77, "elapsed_time": "8:38:51", "remaining_time": "18:34:14"} +{"current_steps": 2394, "total_steps": 7532, "loss": 0.3615761399269104, "lr": 1.6331514966321596e-05, "epoch": 0.6357721418138361, "percentage": 31.78, "elapsed_time": "8:39:05", "remaining_time": "18:34:03"} +{"current_steps": 2395, "total_steps": 7532, "loss": 0.3503292500972748, "lr": 1.632811578119918e-05, "epoch": 0.6360377107953791, "percentage": 31.8, "elapsed_time": "8:39:18", "remaining_time": "18:33:51"} +{"current_steps": 2396, "total_steps": 7532, "loss": 0.2994767129421234, "lr": 1.6324715376090673e-05, "epoch": 0.636303279776922, "percentage": 31.81, "elapsed_time": "8:39:32", "remaining_time": "18:33:40"} +{"current_steps": 2397, "total_steps": 7532, "loss": 0.29903143644332886, "lr": 1.6321313751651638e-05, "epoch": 0.636568848758465, "percentage": 31.82, "elapsed_time": "8:39:45", "remaining_time": "18:33:27"} +{"current_steps": 2398, "total_steps": 7532, "loss": 0.310536652803421, "lr": 1.6317910908537865e-05, "epoch": 0.6368344177400079, "percentage": 31.84, "elapsed_time": "8:39:58", "remaining_time": "18:33:14"} +{"current_steps": 2399, "total_steps": 7532, "loss": 0.32584354281425476, "lr": 1.6314506847405382e-05, "epoch": 0.6370999867215509, "percentage": 31.85, "elapsed_time": "8:40:12", "remaining_time": "18:33:03"} +{"current_steps": 2400, "total_steps": 7532, "loss": 0.3536352217197418, "lr": 1.6311101568910448e-05, "epoch": 0.6373655557030938, "percentage": 31.86, "elapsed_time": "8:40:25", "remaining_time": "18:32:50"} +{"current_steps": 2401, "total_steps": 7532, "loss": 0.3198434114456177, "lr": 1.6307695073709565e-05, "epoch": 0.6376311246846368, "percentage": 31.88, "elapsed_time": "8:40:45", "remaining_time": "18:32:52"} +{"current_steps": 2402, "total_steps": 7532, "loss": 0.264182448387146, "lr": 1.6304287362459462e-05, "epoch": 0.6378966936661797, "percentage": 31.89, "elapsed_time": "8:40:58", "remaining_time": "18:32:40"} +{"current_steps": 2403, "total_steps": 7532, "loss": 0.31182044744491577, "lr": 1.6300878435817115e-05, "epoch": 0.6381622626477228, "percentage": 31.9, "elapsed_time": "8:41:12", "remaining_time": "18:32:28"} +{"current_steps": 2404, "total_steps": 7532, "loss": 0.28221404552459717, "lr": 1.6297468294439708e-05, "epoch": 0.6384278316292658, "percentage": 31.92, "elapsed_time": "8:41:25", "remaining_time": "18:32:16"} +{"current_steps": 2405, "total_steps": 7532, "loss": 0.27788785099983215, "lr": 1.6294056938984693e-05, "epoch": 0.6386934006108087, "percentage": 31.93, "elapsed_time": "8:41:39", "remaining_time": "18:32:05"} +{"current_steps": 2406, "total_steps": 7532, "loss": 0.3300796151161194, "lr": 1.6290644370109728e-05, "epoch": 0.6389589695923517, "percentage": 31.94, "elapsed_time": "8:41:53", "remaining_time": "18:31:52"} +{"current_steps": 2407, "total_steps": 7532, "loss": 0.32170963287353516, "lr": 1.628723058847272e-05, "epoch": 0.6392245385738946, "percentage": 31.96, "elapsed_time": "8:42:06", "remaining_time": "18:31:41"} +{"current_steps": 2408, "total_steps": 7532, "loss": 0.3243589997291565, "lr": 1.628381559473181e-05, "epoch": 0.6394901075554376, "percentage": 31.97, "elapsed_time": "8:42:19", "remaining_time": "18:31:28"} +{"current_steps": 2409, "total_steps": 7532, "loss": 0.311046838760376, "lr": 1.6280399389545358e-05, "epoch": 0.6397556765369805, "percentage": 31.98, "elapsed_time": "8:42:33", "remaining_time": "18:31:17"} +{"current_steps": 2410, "total_steps": 7532, "loss": 0.2642543911933899, "lr": 1.6276981973571973e-05, "epoch": 0.6400212455185235, "percentage": 32.0, "elapsed_time": "8:42:47", "remaining_time": "18:31:04"} +{"current_steps": 2411, "total_steps": 7532, "loss": 0.3593730926513672, "lr": 1.62735633474705e-05, "epoch": 0.6402868145000664, "percentage": 32.01, "elapsed_time": "8:43:00", "remaining_time": "18:30:53"} +{"current_steps": 2412, "total_steps": 7532, "loss": 0.3147425353527069, "lr": 1.62701435119e-05, "epoch": 0.6405523834816094, "percentage": 32.02, "elapsed_time": "8:43:13", "remaining_time": "18:30:40"} +{"current_steps": 2413, "total_steps": 7532, "loss": 0.32639142870903015, "lr": 1.6266722467519783e-05, "epoch": 0.6408179524631523, "percentage": 32.04, "elapsed_time": "8:43:27", "remaining_time": "18:30:28"} +{"current_steps": 2414, "total_steps": 7532, "loss": 0.32113659381866455, "lr": 1.626330021498938e-05, "epoch": 0.6410835214446953, "percentage": 32.05, "elapsed_time": "8:43:40", "remaining_time": "18:30:15"} +{"current_steps": 2415, "total_steps": 7532, "loss": 0.3188290297985077, "lr": 1.6259876754968568e-05, "epoch": 0.6413490904262382, "percentage": 32.06, "elapsed_time": "8:43:54", "remaining_time": "18:30:03"} +{"current_steps": 2416, "total_steps": 7532, "loss": 0.3145543932914734, "lr": 1.625645208811734e-05, "epoch": 0.6416146594077812, "percentage": 32.08, "elapsed_time": "8:44:07", "remaining_time": "18:29:50"} +{"current_steps": 2417, "total_steps": 7532, "loss": 0.30433323979377747, "lr": 1.6253026215095943e-05, "epoch": 0.6418802283893241, "percentage": 32.09, "elapsed_time": "8:44:19", "remaining_time": "18:29:37"} +{"current_steps": 2418, "total_steps": 7532, "loss": 0.30946728587150574, "lr": 1.6249599136564837e-05, "epoch": 0.6421457973708671, "percentage": 32.1, "elapsed_time": "8:44:33", "remaining_time": "18:29:25"} +{"current_steps": 2419, "total_steps": 7532, "loss": 0.26245906949043274, "lr": 1.6246170853184726e-05, "epoch": 0.64241136635241, "percentage": 32.12, "elapsed_time": "8:44:46", "remaining_time": "18:29:12"} +{"current_steps": 2420, "total_steps": 7532, "loss": 0.31468862295150757, "lr": 1.624274136561654e-05, "epoch": 0.642676935333953, "percentage": 32.13, "elapsed_time": "8:44:59", "remaining_time": "18:28:59"} +{"current_steps": 2421, "total_steps": 7532, "loss": 0.28946155309677124, "lr": 1.6239310674521443e-05, "epoch": 0.6429425043154959, "percentage": 32.14, "elapsed_time": "8:45:12", "remaining_time": "18:28:45"} +{"current_steps": 2422, "total_steps": 7532, "loss": 0.26272106170654297, "lr": 1.6235878780560835e-05, "epoch": 0.6432080732970389, "percentage": 32.16, "elapsed_time": "8:45:24", "remaining_time": "18:28:32"} +{"current_steps": 2423, "total_steps": 7532, "loss": 0.3094574213027954, "lr": 1.6232445684396347e-05, "epoch": 0.6434736422785818, "percentage": 32.17, "elapsed_time": "8:45:37", "remaining_time": "18:28:17"} +{"current_steps": 2424, "total_steps": 7532, "loss": 0.2503833770751953, "lr": 1.6229011386689832e-05, "epoch": 0.6437392112601248, "percentage": 32.18, "elapsed_time": "8:45:50", "remaining_time": "18:28:04"} +{"current_steps": 2425, "total_steps": 7532, "loss": 0.2655009627342224, "lr": 1.6225575888103387e-05, "epoch": 0.6440047802416677, "percentage": 32.2, "elapsed_time": "8:46:02", "remaining_time": "18:27:50"} +{"current_steps": 2426, "total_steps": 7532, "loss": 0.2819611728191376, "lr": 1.6222139189299336e-05, "epoch": 0.6442703492232107, "percentage": 32.21, "elapsed_time": "8:46:15", "remaining_time": "18:27:36"} +{"current_steps": 2427, "total_steps": 7532, "loss": 0.2956068217754364, "lr": 1.6218701290940232e-05, "epoch": 0.6445359182047536, "percentage": 32.22, "elapsed_time": "8:46:27", "remaining_time": "18:27:22"} +{"current_steps": 2428, "total_steps": 7532, "loss": 0.3330997824668884, "lr": 1.6215262193688862e-05, "epoch": 0.6448014871862966, "percentage": 32.24, "elapsed_time": "8:46:40", "remaining_time": "18:27:08"} +{"current_steps": 2429, "total_steps": 7532, "loss": 0.25897055864334106, "lr": 1.6211821898208242e-05, "epoch": 0.6450670561678395, "percentage": 32.25, "elapsed_time": "8:46:53", "remaining_time": "18:26:56"} +{"current_steps": 2430, "total_steps": 7532, "loss": 0.3119947016239166, "lr": 1.6208380405161623e-05, "epoch": 0.6453326251493825, "percentage": 32.26, "elapsed_time": "8:47:06", "remaining_time": "18:26:43"} +{"current_steps": 2431, "total_steps": 7532, "loss": 0.30833956599235535, "lr": 1.6204937715212482e-05, "epoch": 0.6455981941309256, "percentage": 32.28, "elapsed_time": "8:47:19", "remaining_time": "18:26:30"} +{"current_steps": 2432, "total_steps": 7532, "loss": 0.2935214638710022, "lr": 1.620149382902453e-05, "epoch": 0.6458637631124685, "percentage": 32.29, "elapsed_time": "8:47:32", "remaining_time": "18:26:17"} +{"current_steps": 2433, "total_steps": 7532, "loss": 0.24297356605529785, "lr": 1.619804874726171e-05, "epoch": 0.6461293320940115, "percentage": 32.3, "elapsed_time": "8:47:45", "remaining_time": "18:26:03"} +{"current_steps": 2434, "total_steps": 7532, "loss": 0.319774866104126, "lr": 1.6194602470588186e-05, "epoch": 0.6463949010755544, "percentage": 32.32, "elapsed_time": "8:47:58", "remaining_time": "18:25:49"} +{"current_steps": 2435, "total_steps": 7532, "loss": 0.29197463393211365, "lr": 1.6191154999668368e-05, "epoch": 0.6466604700570974, "percentage": 32.33, "elapsed_time": "8:48:11", "remaining_time": "18:25:36"} +{"current_steps": 2436, "total_steps": 7532, "loss": 0.2939727306365967, "lr": 1.6187706335166882e-05, "epoch": 0.6469260390386403, "percentage": 32.34, "elapsed_time": "8:48:24", "remaining_time": "18:25:23"} +{"current_steps": 2437, "total_steps": 7532, "loss": 0.2941162586212158, "lr": 1.6184256477748595e-05, "epoch": 0.6471916080201833, "percentage": 32.36, "elapsed_time": "8:48:37", "remaining_time": "18:25:10"} +{"current_steps": 2438, "total_steps": 7532, "loss": 0.2823144197463989, "lr": 1.6180805428078593e-05, "epoch": 0.6474571770017262, "percentage": 32.37, "elapsed_time": "8:48:50", "remaining_time": "18:24:57"} +{"current_steps": 2439, "total_steps": 7532, "loss": 0.30048274993896484, "lr": 1.61773531868222e-05, "epoch": 0.6477227459832692, "percentage": 32.38, "elapsed_time": "8:49:03", "remaining_time": "18:24:44"} +{"current_steps": 2440, "total_steps": 7532, "loss": 0.30927354097366333, "lr": 1.617389975464497e-05, "epoch": 0.6479883149648121, "percentage": 32.4, "elapsed_time": "8:49:15", "remaining_time": "18:24:31"} +{"current_steps": 2441, "total_steps": 7532, "loss": 0.34835004806518555, "lr": 1.6170445132212678e-05, "epoch": 0.6482538839463551, "percentage": 32.41, "elapsed_time": "8:49:28", "remaining_time": "18:24:17"} +{"current_steps": 2442, "total_steps": 7532, "loss": 0.2890225648880005, "lr": 1.616698932019134e-05, "epoch": 0.648519452927898, "percentage": 32.42, "elapsed_time": "8:49:41", "remaining_time": "18:24:04"} +{"current_steps": 2443, "total_steps": 7532, "loss": 0.31410521268844604, "lr": 1.6163532319247195e-05, "epoch": 0.648785021909441, "percentage": 32.43, "elapsed_time": "8:49:54", "remaining_time": "18:23:51"} +{"current_steps": 2444, "total_steps": 7532, "loss": 0.267375111579895, "lr": 1.616007413004671e-05, "epoch": 0.6490505908909839, "percentage": 32.45, "elapsed_time": "8:50:07", "remaining_time": "18:23:38"} +{"current_steps": 2445, "total_steps": 7532, "loss": 0.3300023376941681, "lr": 1.6156614753256583e-05, "epoch": 0.6493161598725269, "percentage": 32.46, "elapsed_time": "8:50:20", "remaining_time": "18:23:25"} +{"current_steps": 2446, "total_steps": 7532, "loss": 0.2822847366333008, "lr": 1.615315418954374e-05, "epoch": 0.6495817288540698, "percentage": 32.47, "elapsed_time": "8:50:33", "remaining_time": "18:23:12"} +{"current_steps": 2447, "total_steps": 7532, "loss": 0.3093401789665222, "lr": 1.6149692439575348e-05, "epoch": 0.6498472978356128, "percentage": 32.49, "elapsed_time": "8:50:46", "remaining_time": "18:22:57"} +{"current_steps": 2448, "total_steps": 7532, "loss": 0.2892506718635559, "lr": 1.6146229504018777e-05, "epoch": 0.6501128668171557, "percentage": 32.5, "elapsed_time": "8:50:59", "remaining_time": "18:22:44"} +{"current_steps": 2449, "total_steps": 7532, "loss": 0.2805558741092682, "lr": 1.6142765383541643e-05, "epoch": 0.6503784357986987, "percentage": 32.51, "elapsed_time": "8:51:11", "remaining_time": "18:22:30"} +{"current_steps": 2450, "total_steps": 7532, "loss": 0.29852935671806335, "lr": 1.6139300078811794e-05, "epoch": 0.6506440047802416, "percentage": 32.53, "elapsed_time": "8:51:24", "remaining_time": "18:22:17"} +{"current_steps": 2451, "total_steps": 7532, "loss": 0.3567991256713867, "lr": 1.6135833590497295e-05, "epoch": 0.6509095737617846, "percentage": 32.54, "elapsed_time": "8:51:36", "remaining_time": "18:22:02"} +{"current_steps": 2452, "total_steps": 7532, "loss": 0.29564782977104187, "lr": 1.6132365919266442e-05, "epoch": 0.6511751427433276, "percentage": 32.55, "elapsed_time": "8:51:49", "remaining_time": "18:21:49"} +{"current_steps": 2453, "total_steps": 7532, "loss": 0.30027297139167786, "lr": 1.612889706578777e-05, "epoch": 0.6514407117248705, "percentage": 32.57, "elapsed_time": "8:52:02", "remaining_time": "18:21:35"} +{"current_steps": 2454, "total_steps": 7532, "loss": 0.3318096697330475, "lr": 1.6125427030730027e-05, "epoch": 0.6517062807064135, "percentage": 32.58, "elapsed_time": "8:52:14", "remaining_time": "18:21:22"} +{"current_steps": 2455, "total_steps": 7532, "loss": 0.30962997674942017, "lr": 1.612195581476219e-05, "epoch": 0.6519718496879564, "percentage": 32.59, "elapsed_time": "8:52:27", "remaining_time": "18:21:08"} +{"current_steps": 2456, "total_steps": 7532, "loss": 0.3152836859226227, "lr": 1.6118483418553476e-05, "epoch": 0.6522374186694994, "percentage": 32.61, "elapsed_time": "8:52:40", "remaining_time": "18:20:54"} +{"current_steps": 2457, "total_steps": 7532, "loss": 0.26117920875549316, "lr": 1.6115009842773322e-05, "epoch": 0.6525029876510423, "percentage": 32.62, "elapsed_time": "8:52:52", "remaining_time": "18:20:40"} +{"current_steps": 2458, "total_steps": 7532, "loss": 0.2705717384815216, "lr": 1.6111535088091388e-05, "epoch": 0.6527685566325853, "percentage": 32.63, "elapsed_time": "8:53:05", "remaining_time": "18:20:27"} +{"current_steps": 2459, "total_steps": 7532, "loss": 0.3281205892562866, "lr": 1.6108059155177568e-05, "epoch": 0.6530341256141283, "percentage": 32.65, "elapsed_time": "8:53:18", "remaining_time": "18:20:13"} +{"current_steps": 2460, "total_steps": 7532, "loss": 0.3300125002861023, "lr": 1.6104582044701983e-05, "epoch": 0.6532996945956713, "percentage": 32.66, "elapsed_time": "8:53:31", "remaining_time": "18:20:01"} +{"current_steps": 2461, "total_steps": 7532, "loss": 0.29286977648735046, "lr": 1.6101103757334973e-05, "epoch": 0.6535652635772142, "percentage": 32.67, "elapsed_time": "8:53:45", "remaining_time": "18:19:49"} +{"current_steps": 2462, "total_steps": 7532, "loss": 0.2920498847961426, "lr": 1.6097624293747115e-05, "epoch": 0.6538308325587572, "percentage": 32.69, "elapsed_time": "8:53:58", "remaining_time": "18:19:36"} +{"current_steps": 2463, "total_steps": 7532, "loss": 0.31018689274787903, "lr": 1.609414365460921e-05, "epoch": 0.6540964015403001, "percentage": 32.7, "elapsed_time": "8:54:11", "remaining_time": "18:19:23"} +{"current_steps": 2464, "total_steps": 7532, "loss": 0.26806512475013733, "lr": 1.609066184059228e-05, "epoch": 0.6543619705218431, "percentage": 32.71, "elapsed_time": "8:54:24", "remaining_time": "18:19:11"} +{"current_steps": 2465, "total_steps": 7532, "loss": 0.29770639538764954, "lr": 1.608717885236758e-05, "epoch": 0.654627539503386, "percentage": 32.73, "elapsed_time": "8:54:37", "remaining_time": "18:18:58"} +{"current_steps": 2466, "total_steps": 7532, "loss": 0.36161965131759644, "lr": 1.6083694690606592e-05, "epoch": 0.654893108484929, "percentage": 32.74, "elapsed_time": "8:54:50", "remaining_time": "18:18:45"} +{"current_steps": 2467, "total_steps": 7532, "loss": 0.36114081740379333, "lr": 1.6080209355981016e-05, "epoch": 0.6551586774664719, "percentage": 32.75, "elapsed_time": "8:55:04", "remaining_time": "18:18:32"} +{"current_steps": 2468, "total_steps": 7532, "loss": 0.28924882411956787, "lr": 1.6076722849162786e-05, "epoch": 0.6554242464480149, "percentage": 32.77, "elapsed_time": "8:55:17", "remaining_time": "18:18:19"} +{"current_steps": 2469, "total_steps": 7532, "loss": 0.3088049292564392, "lr": 1.6073235170824058e-05, "epoch": 0.6556898154295578, "percentage": 32.78, "elapsed_time": "8:55:30", "remaining_time": "18:18:07"} +{"current_steps": 2470, "total_steps": 7532, "loss": 0.2684907615184784, "lr": 1.6069746321637216e-05, "epoch": 0.6559553844111008, "percentage": 32.79, "elapsed_time": "8:55:43", "remaining_time": "18:17:54"} +{"current_steps": 2471, "total_steps": 7532, "loss": 0.2674641013145447, "lr": 1.6066256302274873e-05, "epoch": 0.6562209533926437, "percentage": 32.81, "elapsed_time": "8:55:56", "remaining_time": "18:17:42"} +{"current_steps": 2472, "total_steps": 7532, "loss": 0.2865106165409088, "lr": 1.6062765113409854e-05, "epoch": 0.6564865223741867, "percentage": 32.82, "elapsed_time": "8:56:09", "remaining_time": "18:17:28"} +{"current_steps": 2473, "total_steps": 7532, "loss": 0.33163607120513916, "lr": 1.605927275571523e-05, "epoch": 0.6567520913557297, "percentage": 32.83, "elapsed_time": "8:56:23", "remaining_time": "18:17:16"} +{"current_steps": 2474, "total_steps": 7532, "loss": 0.32725927233695984, "lr": 1.6055779229864276e-05, "epoch": 0.6570176603372726, "percentage": 32.85, "elapsed_time": "8:56:36", "remaining_time": "18:17:04"} +{"current_steps": 2475, "total_steps": 7532, "loss": 0.31537747383117676, "lr": 1.605228453653051e-05, "epoch": 0.6572832293188156, "percentage": 32.86, "elapsed_time": "8:56:49", "remaining_time": "18:16:51"} +{"current_steps": 2476, "total_steps": 7532, "loss": 0.29331761598587036, "lr": 1.604878867638767e-05, "epoch": 0.6575487983003585, "percentage": 32.87, "elapsed_time": "8:57:02", "remaining_time": "18:16:38"} +{"current_steps": 2477, "total_steps": 7532, "loss": 0.315193772315979, "lr": 1.6045291650109706e-05, "epoch": 0.6578143672819015, "percentage": 32.89, "elapsed_time": "8:57:14", "remaining_time": "18:16:23"} +{"current_steps": 2478, "total_steps": 7532, "loss": 0.3595796227455139, "lr": 1.6041793458370812e-05, "epoch": 0.6580799362634444, "percentage": 32.9, "elapsed_time": "8:57:27", "remaining_time": "18:16:10"} +{"current_steps": 2479, "total_steps": 7532, "loss": 0.3069949150085449, "lr": 1.6038294101845394e-05, "epoch": 0.6583455052449874, "percentage": 32.91, "elapsed_time": "8:57:40", "remaining_time": "18:15:56"} +{"current_steps": 2480, "total_steps": 7532, "loss": 0.3154812455177307, "lr": 1.603479358120809e-05, "epoch": 0.6586110742265303, "percentage": 32.93, "elapsed_time": "8:57:52", "remaining_time": "18:15:43"} +{"current_steps": 2481, "total_steps": 7532, "loss": 0.3005039691925049, "lr": 1.6031291897133756e-05, "epoch": 0.6588766432080733, "percentage": 32.94, "elapsed_time": "8:58:05", "remaining_time": "18:15:29"} +{"current_steps": 2482, "total_steps": 7532, "loss": 0.2885095775127411, "lr": 1.6027789050297476e-05, "epoch": 0.6591422121896162, "percentage": 32.95, "elapsed_time": "8:58:18", "remaining_time": "18:15:16"} +{"current_steps": 2483, "total_steps": 7532, "loss": 0.291950523853302, "lr": 1.602428504137456e-05, "epoch": 0.6594077811711592, "percentage": 32.97, "elapsed_time": "8:58:31", "remaining_time": "18:15:02"} +{"current_steps": 2484, "total_steps": 7532, "loss": 0.31630760431289673, "lr": 1.6020779871040538e-05, "epoch": 0.6596733501527021, "percentage": 32.98, "elapsed_time": "8:58:44", "remaining_time": "18:14:49"} +{"current_steps": 2485, "total_steps": 7532, "loss": 0.29767507314682007, "lr": 1.6017273539971167e-05, "epoch": 0.6599389191342451, "percentage": 32.99, "elapsed_time": "8:58:56", "remaining_time": "18:14:34"} +{"current_steps": 2486, "total_steps": 7532, "loss": 0.2882775664329529, "lr": 1.601376604884242e-05, "epoch": 0.660204488115788, "percentage": 33.01, "elapsed_time": "8:59:09", "remaining_time": "18:14:22"} +{"current_steps": 2487, "total_steps": 7532, "loss": 0.325736403465271, "lr": 1.601025739833051e-05, "epoch": 0.6604700570973311, "percentage": 33.02, "elapsed_time": "8:59:22", "remaining_time": "18:14:08"} +{"current_steps": 2488, "total_steps": 7532, "loss": 0.3007255792617798, "lr": 1.6006747589111854e-05, "epoch": 0.660735626078874, "percentage": 33.03, "elapsed_time": "8:59:34", "remaining_time": "18:13:54"} +{"current_steps": 2489, "total_steps": 7532, "loss": 0.33199968934059143, "lr": 1.6003236621863107e-05, "epoch": 0.661001195060417, "percentage": 33.05, "elapsed_time": "8:59:48", "remaining_time": "18:13:42"} +{"current_steps": 2490, "total_steps": 7532, "loss": 0.3784569799900055, "lr": 1.5999724497261138e-05, "epoch": 0.6612667640419599, "percentage": 33.06, "elapsed_time": "9:00:00", "remaining_time": "18:13:28"} +{"current_steps": 2491, "total_steps": 7532, "loss": 0.28146931529045105, "lr": 1.5996211215983052e-05, "epoch": 0.6615323330235029, "percentage": 33.07, "elapsed_time": "9:00:14", "remaining_time": "18:13:16"} +{"current_steps": 2492, "total_steps": 7532, "loss": 0.32187730073928833, "lr": 1.599269677870616e-05, "epoch": 0.6617979020050458, "percentage": 33.09, "elapsed_time": "9:00:27", "remaining_time": "18:13:02"} +{"current_steps": 2493, "total_steps": 7532, "loss": 0.3021823465824127, "lr": 1.5989181186108003e-05, "epoch": 0.6620634709865888, "percentage": 33.1, "elapsed_time": "9:00:40", "remaining_time": "18:12:50"} +{"current_steps": 2494, "total_steps": 7532, "loss": 0.3309648334980011, "lr": 1.5985664438866354e-05, "epoch": 0.6623290399681317, "percentage": 33.11, "elapsed_time": "9:00:52", "remaining_time": "18:12:35"} +{"current_steps": 2495, "total_steps": 7532, "loss": 0.2939694821834564, "lr": 1.598214653765919e-05, "epoch": 0.6625946089496747, "percentage": 33.13, "elapsed_time": "9:01:05", "remaining_time": "18:12:22"} +{"current_steps": 2496, "total_steps": 7532, "loss": 0.31219810247421265, "lr": 1.597862748316473e-05, "epoch": 0.6628601779312177, "percentage": 33.14, "elapsed_time": "9:01:18", "remaining_time": "18:12:09"} +{"current_steps": 2497, "total_steps": 7532, "loss": 0.29435622692108154, "lr": 1.5975107276061405e-05, "epoch": 0.6631257469127606, "percentage": 33.15, "elapsed_time": "9:01:31", "remaining_time": "18:11:56"} +{"current_steps": 2498, "total_steps": 7532, "loss": 0.27167004346847534, "lr": 1.5971585917027864e-05, "epoch": 0.6633913158943036, "percentage": 33.17, "elapsed_time": "9:01:43", "remaining_time": "18:11:42"} +{"current_steps": 2499, "total_steps": 7532, "loss": 0.3360658884048462, "lr": 1.5968063406742988e-05, "epoch": 0.6636568848758465, "percentage": 33.18, "elapsed_time": "9:01:56", "remaining_time": "18:11:29"} +{"current_steps": 2500, "total_steps": 7532, "loss": 0.2994089424610138, "lr": 1.596453974588587e-05, "epoch": 0.6639224538573895, "percentage": 33.19, "elapsed_time": "9:02:09", "remaining_time": "18:11:15"} +{"current_steps": 2501, "total_steps": 7532, "loss": 0.32302889227867126, "lr": 1.596101493513584e-05, "epoch": 0.6641880228389324, "percentage": 33.2, "elapsed_time": "9:02:27", "remaining_time": "18:11:12"} +{"current_steps": 2502, "total_steps": 7532, "loss": 0.3122987747192383, "lr": 1.595748897517243e-05, "epoch": 0.6644535918204754, "percentage": 33.22, "elapsed_time": "9:02:40", "remaining_time": "18:10:59"} +{"current_steps": 2503, "total_steps": 7532, "loss": 0.2746438980102539, "lr": 1.5953961866675408e-05, "epoch": 0.6647191608020183, "percentage": 33.23, "elapsed_time": "9:02:53", "remaining_time": "18:10:46"} +{"current_steps": 2504, "total_steps": 7532, "loss": 0.3043097257614136, "lr": 1.5950433610324758e-05, "epoch": 0.6649847297835613, "percentage": 33.24, "elapsed_time": "9:03:05", "remaining_time": "18:10:32"} +{"current_steps": 2505, "total_steps": 7532, "loss": 0.3208698332309723, "lr": 1.594690420680069e-05, "epoch": 0.6652502987651042, "percentage": 33.26, "elapsed_time": "9:03:18", "remaining_time": "18:10:17"} +{"current_steps": 2506, "total_steps": 7532, "loss": 0.317341148853302, "lr": 1.5943373656783628e-05, "epoch": 0.6655158677466472, "percentage": 33.27, "elapsed_time": "9:03:31", "remaining_time": "18:10:04"} +{"current_steps": 2507, "total_steps": 7532, "loss": 0.3250347673892975, "lr": 1.5939841960954218e-05, "epoch": 0.6657814367281901, "percentage": 33.28, "elapsed_time": "9:03:43", "remaining_time": "18:09:51"} +{"current_steps": 2508, "total_steps": 7532, "loss": 0.32255828380584717, "lr": 1.5936309119993333e-05, "epoch": 0.6660470057097331, "percentage": 33.3, "elapsed_time": "9:03:56", "remaining_time": "18:09:38"} +{"current_steps": 2509, "total_steps": 7532, "loss": 0.3247614800930023, "lr": 1.593277513458206e-05, "epoch": 0.666312574691276, "percentage": 33.31, "elapsed_time": "9:04:09", "remaining_time": "18:09:23"} +{"current_steps": 2510, "total_steps": 7532, "loss": 0.34171730279922485, "lr": 1.5929240005401715e-05, "epoch": 0.666578143672819, "percentage": 33.32, "elapsed_time": "9:04:22", "remaining_time": "18:09:10"} +{"current_steps": 2511, "total_steps": 7532, "loss": 0.30671584606170654, "lr": 1.5925703733133823e-05, "epoch": 0.6668437126543619, "percentage": 33.34, "elapsed_time": "9:04:34", "remaining_time": "18:08:56"} +{"current_steps": 2512, "total_steps": 7532, "loss": 0.3387908339500427, "lr": 1.5922166318460138e-05, "epoch": 0.6671092816359049, "percentage": 33.35, "elapsed_time": "9:04:48", "remaining_time": "18:08:44"} +{"current_steps": 2513, "total_steps": 7532, "loss": 0.2772873044013977, "lr": 1.5918627762062635e-05, "epoch": 0.6673748506174478, "percentage": 33.36, "elapsed_time": "9:05:01", "remaining_time": "18:08:31"} +{"current_steps": 2514, "total_steps": 7532, "loss": 0.31555238366127014, "lr": 1.59150880646235e-05, "epoch": 0.6676404195989908, "percentage": 33.38, "elapsed_time": "9:05:15", "remaining_time": "18:08:20"} +{"current_steps": 2515, "total_steps": 7532, "loss": 0.2821594476699829, "lr": 1.5911547226825154e-05, "epoch": 0.6679059885805337, "percentage": 33.39, "elapsed_time": "9:05:28", "remaining_time": "18:08:06"} +{"current_steps": 2516, "total_steps": 7532, "loss": 0.3176054358482361, "lr": 1.5908005249350217e-05, "epoch": 0.6681715575620768, "percentage": 33.4, "elapsed_time": "9:05:42", "remaining_time": "18:07:55"} +{"current_steps": 2517, "total_steps": 7532, "loss": 0.28484907746315, "lr": 1.590446213288155e-05, "epoch": 0.6684371265436198, "percentage": 33.42, "elapsed_time": "9:05:55", "remaining_time": "18:07:42"} +{"current_steps": 2518, "total_steps": 7532, "loss": 0.25227850675582886, "lr": 1.590091787810222e-05, "epoch": 0.6687026955251627, "percentage": 33.43, "elapsed_time": "9:06:09", "remaining_time": "18:07:31"} +{"current_steps": 2519, "total_steps": 7532, "loss": 0.276819109916687, "lr": 1.5897372485695514e-05, "epoch": 0.6689682645067057, "percentage": 33.44, "elapsed_time": "9:06:21", "remaining_time": "18:07:17"} +{"current_steps": 2520, "total_steps": 7532, "loss": 0.27944183349609375, "lr": 1.589382595634495e-05, "epoch": 0.6692338334882486, "percentage": 33.46, "elapsed_time": "9:06:34", "remaining_time": "18:07:04"} +{"current_steps": 2521, "total_steps": 7532, "loss": 0.295337975025177, "lr": 1.589027829073425e-05, "epoch": 0.6694994024697916, "percentage": 33.47, "elapsed_time": "9:06:47", "remaining_time": "18:06:51"} +{"current_steps": 2522, "total_steps": 7532, "loss": 0.31168580055236816, "lr": 1.5886729489547365e-05, "epoch": 0.6697649714513345, "percentage": 33.48, "elapsed_time": "9:07:00", "remaining_time": "18:06:38"} +{"current_steps": 2523, "total_steps": 7532, "loss": 0.34520941972732544, "lr": 1.5883179553468465e-05, "epoch": 0.6700305404328775, "percentage": 33.5, "elapsed_time": "9:07:13", "remaining_time": "18:06:25"} +{"current_steps": 2524, "total_steps": 7532, "loss": 0.3121863901615143, "lr": 1.587962848318193e-05, "epoch": 0.6702961094144204, "percentage": 33.51, "elapsed_time": "9:07:26", "remaining_time": "18:06:12"} +{"current_steps": 2525, "total_steps": 7532, "loss": 0.3450377583503723, "lr": 1.587607627937237e-05, "epoch": 0.6705616783959634, "percentage": 33.52, "elapsed_time": "9:07:40", "remaining_time": "18:06:00"} +{"current_steps": 2526, "total_steps": 7532, "loss": 0.33431196212768555, "lr": 1.58725229427246e-05, "epoch": 0.6708272473775063, "percentage": 33.54, "elapsed_time": "9:07:53", "remaining_time": "18:05:47"} +{"current_steps": 2527, "total_steps": 7532, "loss": 0.2753226161003113, "lr": 1.5868968473923675e-05, "epoch": 0.6710928163590493, "percentage": 33.55, "elapsed_time": "9:08:06", "remaining_time": "18:05:35"} +{"current_steps": 2528, "total_steps": 7532, "loss": 0.31394219398498535, "lr": 1.586541287365484e-05, "epoch": 0.6713583853405922, "percentage": 33.56, "elapsed_time": "9:08:19", "remaining_time": "18:05:23"} +{"current_steps": 2529, "total_steps": 7532, "loss": 0.352859765291214, "lr": 1.586185614260358e-05, "epoch": 0.6716239543221352, "percentage": 33.58, "elapsed_time": "9:08:33", "remaining_time": "18:05:10"} +{"current_steps": 2530, "total_steps": 7532, "loss": 0.3182204067707062, "lr": 1.5858298281455592e-05, "epoch": 0.6718895233036781, "percentage": 33.59, "elapsed_time": "9:08:46", "remaining_time": "18:04:57"} +{"current_steps": 2531, "total_steps": 7532, "loss": 0.3107008934020996, "lr": 1.5854739290896785e-05, "epoch": 0.6721550922852211, "percentage": 33.6, "elapsed_time": "9:08:59", "remaining_time": "18:04:44"} +{"current_steps": 2532, "total_steps": 7532, "loss": 0.2737328112125397, "lr": 1.5851179171613294e-05, "epoch": 0.672420661266764, "percentage": 33.62, "elapsed_time": "9:09:11", "remaining_time": "18:04:30"} +{"current_steps": 2533, "total_steps": 7532, "loss": 0.2744509279727936, "lr": 1.5847617924291466e-05, "epoch": 0.672686230248307, "percentage": 33.63, "elapsed_time": "9:09:24", "remaining_time": "18:04:17"} +{"current_steps": 2534, "total_steps": 7532, "loss": 0.3149082660675049, "lr": 1.584405554961787e-05, "epoch": 0.6729517992298499, "percentage": 33.64, "elapsed_time": "9:09:37", "remaining_time": "18:04:03"} +{"current_steps": 2535, "total_steps": 7532, "loss": 0.32643741369247437, "lr": 1.584049204827929e-05, "epoch": 0.6732173682113929, "percentage": 33.66, "elapsed_time": "9:09:50", "remaining_time": "18:03:50"} +{"current_steps": 2536, "total_steps": 7532, "loss": 0.31901559233665466, "lr": 1.583692742096272e-05, "epoch": 0.6734829371929358, "percentage": 33.67, "elapsed_time": "9:10:03", "remaining_time": "18:03:38"} +{"current_steps": 2537, "total_steps": 7532, "loss": 0.3020802140235901, "lr": 1.583336166835539e-05, "epoch": 0.6737485061744788, "percentage": 33.68, "elapsed_time": "9:10:16", "remaining_time": "18:03:24"} +{"current_steps": 2538, "total_steps": 7532, "loss": 0.29683804512023926, "lr": 1.5829794791144723e-05, "epoch": 0.6740140751560217, "percentage": 33.7, "elapsed_time": "9:10:29", "remaining_time": "18:03:12"} +{"current_steps": 2539, "total_steps": 7532, "loss": 0.2898966073989868, "lr": 1.582622679001838e-05, "epoch": 0.6742796441375647, "percentage": 33.71, "elapsed_time": "9:10:42", "remaining_time": "18:02:58"} +{"current_steps": 2540, "total_steps": 7532, "loss": 0.2665000855922699, "lr": 1.582265766566422e-05, "epoch": 0.6745452131191076, "percentage": 33.72, "elapsed_time": "9:10:55", "remaining_time": "18:02:44"} +{"current_steps": 2541, "total_steps": 7532, "loss": 0.2987207770347595, "lr": 1.581908741877034e-05, "epoch": 0.6748107821006506, "percentage": 33.74, "elapsed_time": "9:11:07", "remaining_time": "18:02:31"} +{"current_steps": 2542, "total_steps": 7532, "loss": 0.32591086626052856, "lr": 1.5815516050025032e-05, "epoch": 0.6750763510821935, "percentage": 33.75, "elapsed_time": "9:11:20", "remaining_time": "18:02:17"} +{"current_steps": 2543, "total_steps": 7532, "loss": 0.28181299567222595, "lr": 1.581194356011682e-05, "epoch": 0.6753419200637365, "percentage": 33.76, "elapsed_time": "9:11:32", "remaining_time": "18:02:03"} +{"current_steps": 2544, "total_steps": 7532, "loss": 0.3256041407585144, "lr": 1.5808369949734433e-05, "epoch": 0.6756074890452796, "percentage": 33.78, "elapsed_time": "9:11:46", "remaining_time": "18:01:51"} +{"current_steps": 2545, "total_steps": 7532, "loss": 0.3079703152179718, "lr": 1.5804795219566825e-05, "epoch": 0.6758730580268225, "percentage": 33.79, "elapsed_time": "9:11:58", "remaining_time": "18:01:37"} +{"current_steps": 2546, "total_steps": 7532, "loss": 0.3364162743091583, "lr": 1.580121937030316e-05, "epoch": 0.6761386270083655, "percentage": 33.8, "elapsed_time": "9:12:12", "remaining_time": "18:01:25"} +{"current_steps": 2547, "total_steps": 7532, "loss": 0.2774898111820221, "lr": 1.5797642402632816e-05, "epoch": 0.6764041959899084, "percentage": 33.82, "elapsed_time": "9:12:25", "remaining_time": "18:01:12"} +{"current_steps": 2548, "total_steps": 7532, "loss": 0.33260244131088257, "lr": 1.5794064317245396e-05, "epoch": 0.6766697649714514, "percentage": 33.83, "elapsed_time": "9:12:38", "remaining_time": "18:01:00"} +{"current_steps": 2549, "total_steps": 7532, "loss": 0.3327571153640747, "lr": 1.5790485114830708e-05, "epoch": 0.6769353339529943, "percentage": 33.84, "elapsed_time": "9:12:52", "remaining_time": "18:00:47"} +{"current_steps": 2550, "total_steps": 7532, "loss": 0.28527912497520447, "lr": 1.5786904796078783e-05, "epoch": 0.6772009029345373, "percentage": 33.86, "elapsed_time": "9:13:05", "remaining_time": "18:00:35"} +{"current_steps": 2551, "total_steps": 7532, "loss": 0.3100908100605011, "lr": 1.5783323361679865e-05, "epoch": 0.6774664719160802, "percentage": 33.87, "elapsed_time": "9:13:18", "remaining_time": "18:00:21"} +{"current_steps": 2552, "total_steps": 7532, "loss": 0.3434574007987976, "lr": 1.577974081232441e-05, "epoch": 0.6777320408976232, "percentage": 33.88, "elapsed_time": "9:13:31", "remaining_time": "18:00:08"} +{"current_steps": 2553, "total_steps": 7532, "loss": 0.3151341676712036, "lr": 1.5776157148703094e-05, "epoch": 0.6779976098791661, "percentage": 33.9, "elapsed_time": "9:13:44", "remaining_time": "17:59:56"} +{"current_steps": 2554, "total_steps": 7532, "loss": 0.33334124088287354, "lr": 1.5772572371506803e-05, "epoch": 0.6782631788607091, "percentage": 33.91, "elapsed_time": "9:13:57", "remaining_time": "17:59:42"} +{"current_steps": 2555, "total_steps": 7532, "loss": 0.26933547854423523, "lr": 1.576898648142664e-05, "epoch": 0.678528747842252, "percentage": 33.92, "elapsed_time": "9:14:10", "remaining_time": "17:59:30"} +{"current_steps": 2556, "total_steps": 7532, "loss": 0.3087029755115509, "lr": 1.576539947915392e-05, "epoch": 0.678794316823795, "percentage": 33.94, "elapsed_time": "9:14:23", "remaining_time": "17:59:17"} +{"current_steps": 2557, "total_steps": 7532, "loss": 0.32620540261268616, "lr": 1.576181136538018e-05, "epoch": 0.6790598858053379, "percentage": 33.95, "elapsed_time": "9:14:37", "remaining_time": "17:59:05"} +{"current_steps": 2558, "total_steps": 7532, "loss": 0.29112139344215393, "lr": 1.575822214079716e-05, "epoch": 0.6793254547868809, "percentage": 33.96, "elapsed_time": "9:14:50", "remaining_time": "17:58:52"} +{"current_steps": 2559, "total_steps": 7532, "loss": 0.3394843339920044, "lr": 1.5754631806096822e-05, "epoch": 0.6795910237684238, "percentage": 33.98, "elapsed_time": "9:15:03", "remaining_time": "17:58:39"} +{"current_steps": 2560, "total_steps": 7532, "loss": 0.32754629850387573, "lr": 1.5751040361971342e-05, "epoch": 0.6798565927499668, "percentage": 33.99, "elapsed_time": "9:15:15", "remaining_time": "17:58:25"} +{"current_steps": 2561, "total_steps": 7532, "loss": 0.2829592823982239, "lr": 1.574744780911311e-05, "epoch": 0.6801221617315097, "percentage": 34.0, "elapsed_time": "9:15:28", "remaining_time": "17:58:12"} +{"current_steps": 2562, "total_steps": 7532, "loss": 0.2718046307563782, "lr": 1.5743854148214724e-05, "epoch": 0.6803877307130527, "percentage": 34.01, "elapsed_time": "9:15:41", "remaining_time": "17:57:59"} +{"current_steps": 2563, "total_steps": 7532, "loss": 0.29244256019592285, "lr": 1.5740259379969002e-05, "epoch": 0.6806532996945956, "percentage": 34.03, "elapsed_time": "9:15:55", "remaining_time": "17:57:46"} +{"current_steps": 2564, "total_steps": 7532, "loss": 0.2925388514995575, "lr": 1.5736663505068972e-05, "epoch": 0.6809188686761386, "percentage": 34.04, "elapsed_time": "9:16:08", "remaining_time": "17:57:33"} +{"current_steps": 2565, "total_steps": 7532, "loss": 0.26742440462112427, "lr": 1.5733066524207875e-05, "epoch": 0.6811844376576816, "percentage": 34.05, "elapsed_time": "9:16:21", "remaining_time": "17:57:21"} +{"current_steps": 2566, "total_steps": 7532, "loss": 0.33688807487487793, "lr": 1.5729468438079167e-05, "epoch": 0.6814500066392245, "percentage": 34.07, "elapsed_time": "9:16:34", "remaining_time": "17:57:09"} +{"current_steps": 2567, "total_steps": 7532, "loss": 0.2953096330165863, "lr": 1.5725869247376514e-05, "epoch": 0.6817155756207675, "percentage": 34.08, "elapsed_time": "9:16:47", "remaining_time": "17:56:55"} +{"current_steps": 2568, "total_steps": 7532, "loss": 0.321500301361084, "lr": 1.5722268952793806e-05, "epoch": 0.6819811446023104, "percentage": 34.09, "elapsed_time": "9:17:00", "remaining_time": "17:56:42"} +{"current_steps": 2569, "total_steps": 7532, "loss": 0.29148590564727783, "lr": 1.5718667555025127e-05, "epoch": 0.6822467135838534, "percentage": 34.11, "elapsed_time": "9:17:13", "remaining_time": "17:56:28"} +{"current_steps": 2570, "total_steps": 7532, "loss": 0.26887139678001404, "lr": 1.5715065054764792e-05, "epoch": 0.6825122825653963, "percentage": 34.12, "elapsed_time": "9:17:25", "remaining_time": "17:56:15"} +{"current_steps": 2571, "total_steps": 7532, "loss": 0.2698139250278473, "lr": 1.5711461452707316e-05, "epoch": 0.6827778515469393, "percentage": 34.13, "elapsed_time": "9:17:38", "remaining_time": "17:56:01"} +{"current_steps": 2572, "total_steps": 7532, "loss": 0.264956533908844, "lr": 1.5707856749547433e-05, "epoch": 0.6830434205284823, "percentage": 34.15, "elapsed_time": "9:17:51", "remaining_time": "17:55:48"} +{"current_steps": 2573, "total_steps": 7532, "loss": 0.32535314559936523, "lr": 1.5704250945980085e-05, "epoch": 0.6833089895100253, "percentage": 34.16, "elapsed_time": "9:18:04", "remaining_time": "17:55:35"} +{"current_steps": 2574, "total_steps": 7532, "loss": 0.30529654026031494, "lr": 1.5700644042700432e-05, "epoch": 0.6835745584915682, "percentage": 34.17, "elapsed_time": "9:18:17", "remaining_time": "17:55:23"} +{"current_steps": 2575, "total_steps": 7532, "loss": 0.27253150939941406, "lr": 1.569703604040384e-05, "epoch": 0.6838401274731112, "percentage": 34.19, "elapsed_time": "9:18:30", "remaining_time": "17:55:09"} +{"current_steps": 2576, "total_steps": 7532, "loss": 0.27451053261756897, "lr": 1.5693426939785886e-05, "epoch": 0.6841056964546541, "percentage": 34.2, "elapsed_time": "9:18:43", "remaining_time": "17:54:56"} +{"current_steps": 2577, "total_steps": 7532, "loss": 0.33280283212661743, "lr": 1.5689816741542374e-05, "epoch": 0.6843712654361971, "percentage": 34.21, "elapsed_time": "9:18:56", "remaining_time": "17:54:43"} +{"current_steps": 2578, "total_steps": 7532, "loss": 0.2911887764930725, "lr": 1.5686205446369293e-05, "epoch": 0.68463683441774, "percentage": 34.23, "elapsed_time": "9:19:09", "remaining_time": "17:54:29"} +{"current_steps": 2579, "total_steps": 7532, "loss": 0.2950279116630554, "lr": 1.5682593054962866e-05, "epoch": 0.684902403399283, "percentage": 34.24, "elapsed_time": "9:19:22", "remaining_time": "17:54:17"} +{"current_steps": 2580, "total_steps": 7532, "loss": 0.3267458975315094, "lr": 1.5678979568019518e-05, "epoch": 0.6851679723808259, "percentage": 34.25, "elapsed_time": "9:19:35", "remaining_time": "17:54:04"} +{"current_steps": 2581, "total_steps": 7532, "loss": 0.3209132254123688, "lr": 1.5675364986235887e-05, "epoch": 0.6854335413623689, "percentage": 34.27, "elapsed_time": "9:19:48", "remaining_time": "17:53:52"} +{"current_steps": 2582, "total_steps": 7532, "loss": 0.3186662197113037, "lr": 1.5671749310308818e-05, "epoch": 0.6856991103439118, "percentage": 34.28, "elapsed_time": "9:20:01", "remaining_time": "17:53:38"} +{"current_steps": 2583, "total_steps": 7532, "loss": 0.24875827133655548, "lr": 1.566813254093538e-05, "epoch": 0.6859646793254548, "percentage": 34.29, "elapsed_time": "9:20:15", "remaining_time": "17:53:26"} +{"current_steps": 2584, "total_steps": 7532, "loss": 0.26657983660697937, "lr": 1.5664514678812835e-05, "epoch": 0.6862302483069977, "percentage": 34.31, "elapsed_time": "9:20:27", "remaining_time": "17:53:12"} +{"current_steps": 2585, "total_steps": 7532, "loss": 0.2889682650566101, "lr": 1.5660895724638666e-05, "epoch": 0.6864958172885407, "percentage": 34.32, "elapsed_time": "9:20:41", "remaining_time": "17:53:00"} +{"current_steps": 2586, "total_steps": 7532, "loss": 0.32035061717033386, "lr": 1.5657275679110564e-05, "epoch": 0.6867613862700837, "percentage": 34.33, "elapsed_time": "9:20:54", "remaining_time": "17:52:47"} +{"current_steps": 2587, "total_steps": 7532, "loss": 0.2844264507293701, "lr": 1.5653654542926435e-05, "epoch": 0.6870269552516266, "percentage": 34.35, "elapsed_time": "9:21:07", "remaining_time": "17:52:33"} +{"current_steps": 2588, "total_steps": 7532, "loss": 0.27645713090896606, "lr": 1.5650032316784388e-05, "epoch": 0.6872925242331696, "percentage": 34.36, "elapsed_time": "9:21:20", "remaining_time": "17:52:21"} +{"current_steps": 2589, "total_steps": 7532, "loss": 0.29902809858322144, "lr": 1.5646409001382745e-05, "epoch": 0.6875580932147125, "percentage": 34.37, "elapsed_time": "9:21:33", "remaining_time": "17:52:08"} +{"current_steps": 2590, "total_steps": 7532, "loss": 0.28179824352264404, "lr": 1.564278459742004e-05, "epoch": 0.6878236621962555, "percentage": 34.39, "elapsed_time": "9:21:47", "remaining_time": "17:51:56"} +{"current_steps": 2591, "total_steps": 7532, "loss": 0.30527305603027344, "lr": 1.563915910559502e-05, "epoch": 0.6880892311777984, "percentage": 34.4, "elapsed_time": "9:21:59", "remaining_time": "17:51:43"} +{"current_steps": 2592, "total_steps": 7532, "loss": 0.29411792755126953, "lr": 1.5635532526606625e-05, "epoch": 0.6883548001593414, "percentage": 34.41, "elapsed_time": "9:22:13", "remaining_time": "17:51:30"} +{"current_steps": 2593, "total_steps": 7532, "loss": 0.32294154167175293, "lr": 1.563190486115403e-05, "epoch": 0.6886203691408843, "percentage": 34.43, "elapsed_time": "9:22:25", "remaining_time": "17:51:17"} +{"current_steps": 2594, "total_steps": 7532, "loss": 0.31873172521591187, "lr": 1.5628276109936594e-05, "epoch": 0.6888859381224273, "percentage": 34.44, "elapsed_time": "9:22:39", "remaining_time": "17:51:04"} +{"current_steps": 2595, "total_steps": 7532, "loss": 0.37790048122406006, "lr": 1.5624646273653908e-05, "epoch": 0.6891515071039702, "percentage": 34.45, "elapsed_time": "9:22:51", "remaining_time": "17:50:51"} +{"current_steps": 2596, "total_steps": 7532, "loss": 0.27596205472946167, "lr": 1.5621015353005754e-05, "epoch": 0.6894170760855132, "percentage": 34.47, "elapsed_time": "9:23:05", "remaining_time": "17:50:38"} +{"current_steps": 2597, "total_steps": 7532, "loss": 0.30952686071395874, "lr": 1.5617383348692135e-05, "epoch": 0.6896826450670561, "percentage": 34.48, "elapsed_time": "9:23:18", "remaining_time": "17:50:25"} +{"current_steps": 2598, "total_steps": 7532, "loss": 0.2933235764503479, "lr": 1.5613750261413256e-05, "epoch": 0.6899482140485991, "percentage": 34.49, "elapsed_time": "9:23:31", "remaining_time": "17:50:13"} +{"current_steps": 2599, "total_steps": 7532, "loss": 0.2961776554584503, "lr": 1.5610116091869538e-05, "epoch": 0.690213783030142, "percentage": 34.51, "elapsed_time": "9:23:44", "remaining_time": "17:49:59"} +{"current_steps": 2600, "total_steps": 7532, "loss": 0.2843313217163086, "lr": 1.56064808407616e-05, "epoch": 0.6904793520116851, "percentage": 34.52, "elapsed_time": "9:23:57", "remaining_time": "17:49:47"} +{"current_steps": 2601, "total_steps": 7532, "loss": 0.29366564750671387, "lr": 1.560284450879028e-05, "epoch": 0.690744920993228, "percentage": 34.53, "elapsed_time": "9:24:16", "remaining_time": "17:49:45"} +{"current_steps": 2602, "total_steps": 7532, "loss": 0.32668614387512207, "lr": 1.5599207096656614e-05, "epoch": 0.691010489974771, "percentage": 34.55, "elapsed_time": "9:24:29", "remaining_time": "17:49:32"} +{"current_steps": 2603, "total_steps": 7532, "loss": 0.344367653131485, "lr": 1.5595568605061858e-05, "epoch": 0.6912760589563139, "percentage": 34.56, "elapsed_time": "9:24:42", "remaining_time": "17:49:18"} +{"current_steps": 2604, "total_steps": 7532, "loss": 0.2875809371471405, "lr": 1.5591929034707468e-05, "epoch": 0.6915416279378569, "percentage": 34.57, "elapsed_time": "9:24:54", "remaining_time": "17:49:05"} +{"current_steps": 2605, "total_steps": 7532, "loss": 0.2688799202442169, "lr": 1.5588288386295113e-05, "epoch": 0.6918071969193998, "percentage": 34.59, "elapsed_time": "9:25:08", "remaining_time": "17:48:52"} +{"current_steps": 2606, "total_steps": 7532, "loss": 0.28575828671455383, "lr": 1.558464666052667e-05, "epoch": 0.6920727659009428, "percentage": 34.6, "elapsed_time": "9:25:21", "remaining_time": "17:48:39"} +{"current_steps": 2607, "total_steps": 7532, "loss": 0.2800632119178772, "lr": 1.5581003858104203e-05, "epoch": 0.6923383348824858, "percentage": 34.61, "elapsed_time": "9:25:34", "remaining_time": "17:48:27"} +{"current_steps": 2608, "total_steps": 7532, "loss": 0.3066416382789612, "lr": 1.5577359979730022e-05, "epoch": 0.6926039038640287, "percentage": 34.63, "elapsed_time": "9:25:47", "remaining_time": "17:48:13"} +{"current_steps": 2609, "total_steps": 7532, "loss": 0.3164110779762268, "lr": 1.5573715026106617e-05, "epoch": 0.6928694728455717, "percentage": 34.64, "elapsed_time": "9:26:01", "remaining_time": "17:48:02"} +{"current_steps": 2610, "total_steps": 7532, "loss": 0.2908422350883484, "lr": 1.5570068997936686e-05, "epoch": 0.6931350418271146, "percentage": 34.65, "elapsed_time": "9:26:13", "remaining_time": "17:47:48"} +{"current_steps": 2611, "total_steps": 7532, "loss": 0.29055240750312805, "lr": 1.5566421895923148e-05, "epoch": 0.6934006108086576, "percentage": 34.67, "elapsed_time": "9:26:27", "remaining_time": "17:47:36"} +{"current_steps": 2612, "total_steps": 7532, "loss": 0.3247227370738983, "lr": 1.556277372076912e-05, "epoch": 0.6936661797902005, "percentage": 34.68, "elapsed_time": "9:26:40", "remaining_time": "17:47:23"} +{"current_steps": 2613, "total_steps": 7532, "loss": 0.29944315552711487, "lr": 1.555912447317792e-05, "epoch": 0.6939317487717435, "percentage": 34.69, "elapsed_time": "9:26:53", "remaining_time": "17:47:11"} +{"current_steps": 2614, "total_steps": 7532, "loss": 0.2984931170940399, "lr": 1.5555474153853092e-05, "epoch": 0.6941973177532864, "percentage": 34.71, "elapsed_time": "9:27:06", "remaining_time": "17:46:58"} +{"current_steps": 2615, "total_steps": 7532, "loss": 0.301285982131958, "lr": 1.5551822763498364e-05, "epoch": 0.6944628867348294, "percentage": 34.72, "elapsed_time": "9:27:20", "remaining_time": "17:46:45"} +{"current_steps": 2616, "total_steps": 7532, "loss": 0.2862967252731323, "lr": 1.5548170302817683e-05, "epoch": 0.6947284557163723, "percentage": 34.73, "elapsed_time": "9:27:32", "remaining_time": "17:46:32"} +{"current_steps": 2617, "total_steps": 7532, "loss": 0.3071482181549072, "lr": 1.5544516772515207e-05, "epoch": 0.6949940246979153, "percentage": 34.75, "elapsed_time": "9:27:45", "remaining_time": "17:46:19"} +{"current_steps": 2618, "total_steps": 7532, "loss": 0.33668914437294006, "lr": 1.5540862173295285e-05, "epoch": 0.6952595936794582, "percentage": 34.76, "elapsed_time": "9:27:59", "remaining_time": "17:46:06"} +{"current_steps": 2619, "total_steps": 7532, "loss": 0.32204627990722656, "lr": 1.5537206505862486e-05, "epoch": 0.6955251626610012, "percentage": 34.77, "elapsed_time": "9:28:12", "remaining_time": "17:45:53"} +{"current_steps": 2620, "total_steps": 7532, "loss": 0.30210041999816895, "lr": 1.5533549770921576e-05, "epoch": 0.6957907316425441, "percentage": 34.78, "elapsed_time": "9:28:25", "remaining_time": "17:45:41"} +{"current_steps": 2621, "total_steps": 7532, "loss": 0.3116886019706726, "lr": 1.5529891969177535e-05, "epoch": 0.6960563006240871, "percentage": 34.8, "elapsed_time": "9:28:38", "remaining_time": "17:45:28"} +{"current_steps": 2622, "total_steps": 7532, "loss": 0.3460058867931366, "lr": 1.5526233101335543e-05, "epoch": 0.69632186960563, "percentage": 34.81, "elapsed_time": "9:28:52", "remaining_time": "17:45:16"} +{"current_steps": 2623, "total_steps": 7532, "loss": 0.30080512166023254, "lr": 1.552257316810098e-05, "epoch": 0.696587438587173, "percentage": 34.82, "elapsed_time": "9:29:04", "remaining_time": "17:45:02"} +{"current_steps": 2624, "total_steps": 7532, "loss": 0.3381347954273224, "lr": 1.5518912170179447e-05, "epoch": 0.6968530075687159, "percentage": 34.84, "elapsed_time": "9:29:18", "remaining_time": "17:44:50"} +{"current_steps": 2625, "total_steps": 7532, "loss": 0.30345672369003296, "lr": 1.5515250108276733e-05, "epoch": 0.6971185765502589, "percentage": 34.85, "elapsed_time": "9:29:30", "remaining_time": "17:44:36"} +{"current_steps": 2626, "total_steps": 7532, "loss": 0.3002641797065735, "lr": 1.5511586983098847e-05, "epoch": 0.6973841455318018, "percentage": 34.86, "elapsed_time": "9:29:44", "remaining_time": "17:44:24"} +{"current_steps": 2627, "total_steps": 7532, "loss": 0.2848126292228699, "lr": 1.5507922795351992e-05, "epoch": 0.6976497145133448, "percentage": 34.88, "elapsed_time": "9:29:57", "remaining_time": "17:44:11"} +{"current_steps": 2628, "total_steps": 7532, "loss": 0.32360371947288513, "lr": 1.5504257545742585e-05, "epoch": 0.6979152834948879, "percentage": 34.89, "elapsed_time": "9:30:10", "remaining_time": "17:43:59"} +{"current_steps": 2629, "total_steps": 7532, "loss": 0.2970595955848694, "lr": 1.5500591234977237e-05, "epoch": 0.6981808524764308, "percentage": 34.9, "elapsed_time": "9:30:23", "remaining_time": "17:43:45"} +{"current_steps": 2630, "total_steps": 7532, "loss": 0.35431474447250366, "lr": 1.5496923863762773e-05, "epoch": 0.6984464214579738, "percentage": 34.92, "elapsed_time": "9:30:36", "remaining_time": "17:43:32"} +{"current_steps": 2631, "total_steps": 7532, "loss": 0.30133551359176636, "lr": 1.549325543280622e-05, "epoch": 0.6987119904395167, "percentage": 34.93, "elapsed_time": "9:30:49", "remaining_time": "17:43:20"} +{"current_steps": 2632, "total_steps": 7532, "loss": 0.3013160824775696, "lr": 1.5489585942814807e-05, "epoch": 0.6989775594210597, "percentage": 34.94, "elapsed_time": "9:31:02", "remaining_time": "17:43:07"} +{"current_steps": 2633, "total_steps": 7532, "loss": 0.3291313052177429, "lr": 1.5485915394495967e-05, "epoch": 0.6992431284026026, "percentage": 34.96, "elapsed_time": "9:31:15", "remaining_time": "17:42:54"} +{"current_steps": 2634, "total_steps": 7532, "loss": 0.32308053970336914, "lr": 1.5482243788557336e-05, "epoch": 0.6995086973841456, "percentage": 34.97, "elapsed_time": "9:31:28", "remaining_time": "17:42:41"} +{"current_steps": 2635, "total_steps": 7532, "loss": 0.321450412273407, "lr": 1.5478571125706762e-05, "epoch": 0.6997742663656885, "percentage": 34.98, "elapsed_time": "9:31:42", "remaining_time": "17:42:28"} +{"current_steps": 2636, "total_steps": 7532, "loss": 0.30871254205703735, "lr": 1.547489740665229e-05, "epoch": 0.7000398353472315, "percentage": 35.0, "elapsed_time": "9:31:55", "remaining_time": "17:42:15"} +{"current_steps": 2637, "total_steps": 7532, "loss": 0.29414835572242737, "lr": 1.5471222632102168e-05, "epoch": 0.7003054043287744, "percentage": 35.01, "elapsed_time": "9:32:08", "remaining_time": "17:42:02"} +{"current_steps": 2638, "total_steps": 7532, "loss": 0.2841604948043823, "lr": 1.546754680276485e-05, "epoch": 0.7005709733103174, "percentage": 35.02, "elapsed_time": "9:32:20", "remaining_time": "17:41:48"} +{"current_steps": 2639, "total_steps": 7532, "loss": 0.2895316183567047, "lr": 1.546386991934899e-05, "epoch": 0.7008365422918603, "percentage": 35.04, "elapsed_time": "9:32:34", "remaining_time": "17:41:36"} +{"current_steps": 2640, "total_steps": 7532, "loss": 0.310278058052063, "lr": 1.546019198256345e-05, "epoch": 0.7011021112734033, "percentage": 35.05, "elapsed_time": "9:32:47", "remaining_time": "17:41:23"} +{"current_steps": 2641, "total_steps": 7532, "loss": 0.3000732660293579, "lr": 1.5456512993117297e-05, "epoch": 0.7013676802549462, "percentage": 35.06, "elapsed_time": "9:33:00", "remaining_time": "17:41:11"} +{"current_steps": 2642, "total_steps": 7532, "loss": 0.2650133967399597, "lr": 1.545283295171979e-05, "epoch": 0.7016332492364892, "percentage": 35.08, "elapsed_time": "9:33:13", "remaining_time": "17:40:58"} +{"current_steps": 2643, "total_steps": 7532, "loss": 0.3414345681667328, "lr": 1.5449151859080395e-05, "epoch": 0.7018988182180321, "percentage": 35.09, "elapsed_time": "9:33:26", "remaining_time": "17:40:45"} +{"current_steps": 2644, "total_steps": 7532, "loss": 0.26955321431159973, "lr": 1.5445469715908793e-05, "epoch": 0.7021643871995751, "percentage": 35.1, "elapsed_time": "9:33:39", "remaining_time": "17:40:32"} +{"current_steps": 2645, "total_steps": 7532, "loss": 0.3028743863105774, "lr": 1.5441786522914855e-05, "epoch": 0.702429956181118, "percentage": 35.12, "elapsed_time": "9:33:53", "remaining_time": "17:40:19"} +{"current_steps": 2646, "total_steps": 7532, "loss": 0.28710106015205383, "lr": 1.5438102280808653e-05, "epoch": 0.702695525162661, "percentage": 35.13, "elapsed_time": "9:34:06", "remaining_time": "17:40:06"} +{"current_steps": 2647, "total_steps": 7532, "loss": 0.33343076705932617, "lr": 1.543441699030047e-05, "epoch": 0.7029610941442039, "percentage": 35.14, "elapsed_time": "9:34:19", "remaining_time": "17:39:54"} +{"current_steps": 2648, "total_steps": 7532, "loss": 0.27760642766952515, "lr": 1.543073065210078e-05, "epoch": 0.7032266631257469, "percentage": 35.16, "elapsed_time": "9:34:32", "remaining_time": "17:39:40"} +{"current_steps": 2649, "total_steps": 7532, "loss": 0.2844334840774536, "lr": 1.5427043266920276e-05, "epoch": 0.7034922321072898, "percentage": 35.17, "elapsed_time": "9:34:44", "remaining_time": "17:39:27"} +{"current_steps": 2650, "total_steps": 7532, "loss": 0.28979432582855225, "lr": 1.542335483546983e-05, "epoch": 0.7037578010888328, "percentage": 35.18, "elapsed_time": "9:34:58", "remaining_time": "17:39:14"} +{"current_steps": 2651, "total_steps": 7532, "loss": 0.313267320394516, "lr": 1.5419665358460537e-05, "epoch": 0.7040233700703757, "percentage": 35.2, "elapsed_time": "9:35:10", "remaining_time": "17:39:01"} +{"current_steps": 2652, "total_steps": 7532, "loss": 0.26702141761779785, "lr": 1.5415974836603676e-05, "epoch": 0.7042889390519187, "percentage": 35.21, "elapsed_time": "9:35:24", "remaining_time": "17:38:48"} +{"current_steps": 2653, "total_steps": 7532, "loss": 0.3256012499332428, "lr": 1.5412283270610752e-05, "epoch": 0.7045545080334616, "percentage": 35.22, "elapsed_time": "9:35:37", "remaining_time": "17:38:35"} +{"current_steps": 2654, "total_steps": 7532, "loss": 0.3035642206668854, "lr": 1.540859066119344e-05, "epoch": 0.7048200770150046, "percentage": 35.24, "elapsed_time": "9:35:50", "remaining_time": "17:38:23"} +{"current_steps": 2655, "total_steps": 7532, "loss": 0.32206645607948303, "lr": 1.5404897009063636e-05, "epoch": 0.7050856459965475, "percentage": 35.25, "elapsed_time": "9:36:03", "remaining_time": "17:38:09"} +{"current_steps": 2656, "total_steps": 7532, "loss": 0.3023940920829773, "lr": 1.5401202314933436e-05, "epoch": 0.7053512149780906, "percentage": 35.26, "elapsed_time": "9:36:16", "remaining_time": "17:37:57"} +{"current_steps": 2657, "total_steps": 7532, "loss": 0.2839987277984619, "lr": 1.539750657951513e-05, "epoch": 0.7056167839596336, "percentage": 35.28, "elapsed_time": "9:36:29", "remaining_time": "17:37:43"} +{"current_steps": 2658, "total_steps": 7532, "loss": 0.2488149106502533, "lr": 1.5393809803521213e-05, "epoch": 0.7058823529411765, "percentage": 35.29, "elapsed_time": "9:36:42", "remaining_time": "17:37:30"} +{"current_steps": 2659, "total_steps": 7532, "loss": 0.27156201004981995, "lr": 1.539011198766438e-05, "epoch": 0.7061479219227195, "percentage": 35.3, "elapsed_time": "9:36:55", "remaining_time": "17:37:17"} +{"current_steps": 2660, "total_steps": 7532, "loss": 0.3038437068462372, "lr": 1.5386413132657528e-05, "epoch": 0.7064134909042624, "percentage": 35.32, "elapsed_time": "9:37:08", "remaining_time": "17:37:04"} +{"current_steps": 2661, "total_steps": 7532, "loss": 0.27626922726631165, "lr": 1.5382713239213746e-05, "epoch": 0.7066790598858054, "percentage": 35.33, "elapsed_time": "9:37:21", "remaining_time": "17:36:51"} +{"current_steps": 2662, "total_steps": 7532, "loss": 0.27338162064552307, "lr": 1.537901230804634e-05, "epoch": 0.7069446288673483, "percentage": 35.34, "elapsed_time": "9:37:34", "remaining_time": "17:36:38"} +{"current_steps": 2663, "total_steps": 7532, "loss": 0.2635098099708557, "lr": 1.5375310339868798e-05, "epoch": 0.7072101978488913, "percentage": 35.36, "elapsed_time": "9:37:47", "remaining_time": "17:36:25"} +{"current_steps": 2664, "total_steps": 7532, "loss": 0.3245551288127899, "lr": 1.537160733539482e-05, "epoch": 0.7074757668304342, "percentage": 35.37, "elapsed_time": "9:37:59", "remaining_time": "17:36:11"} +{"current_steps": 2665, "total_steps": 7532, "loss": 0.3226238787174225, "lr": 1.53679032953383e-05, "epoch": 0.7077413358119772, "percentage": 35.38, "elapsed_time": "9:38:13", "remaining_time": "17:35:58"} +{"current_steps": 2666, "total_steps": 7532, "loss": 0.31588318943977356, "lr": 1.536419822041333e-05, "epoch": 0.7080069047935201, "percentage": 35.4, "elapsed_time": "9:38:25", "remaining_time": "17:35:45"} +{"current_steps": 2667, "total_steps": 7532, "loss": 0.2494429647922516, "lr": 1.536049211133421e-05, "epoch": 0.7082724737750631, "percentage": 35.41, "elapsed_time": "9:38:38", "remaining_time": "17:35:32"} +{"current_steps": 2668, "total_steps": 7532, "loss": 0.30966901779174805, "lr": 1.5356784968815436e-05, "epoch": 0.708538042756606, "percentage": 35.42, "elapsed_time": "9:38:51", "remaining_time": "17:35:18"} +{"current_steps": 2669, "total_steps": 7532, "loss": 0.29383328557014465, "lr": 1.5353076793571692e-05, "epoch": 0.708803611738149, "percentage": 35.44, "elapsed_time": "9:39:05", "remaining_time": "17:35:06"} +{"current_steps": 2670, "total_steps": 7532, "loss": 0.30337825417518616, "lr": 1.5349367586317875e-05, "epoch": 0.7090691807196919, "percentage": 35.45, "elapsed_time": "9:39:17", "remaining_time": "17:34:53"} +{"current_steps": 2671, "total_steps": 7532, "loss": 0.28128665685653687, "lr": 1.5345657347769082e-05, "epoch": 0.7093347497012349, "percentage": 35.46, "elapsed_time": "9:39:31", "remaining_time": "17:34:40"} +{"current_steps": 2672, "total_steps": 7532, "loss": 0.35167062282562256, "lr": 1.5341946078640594e-05, "epoch": 0.7096003186827778, "percentage": 35.48, "elapsed_time": "9:39:44", "remaining_time": "17:34:27"} +{"current_steps": 2673, "total_steps": 7532, "loss": 0.30409517884254456, "lr": 1.533823377964791e-05, "epoch": 0.7098658876643208, "percentage": 35.49, "elapsed_time": "9:39:56", "remaining_time": "17:34:13"} +{"current_steps": 2674, "total_steps": 7532, "loss": 0.2667735815048218, "lr": 1.5334520451506706e-05, "epoch": 0.7101314566458637, "percentage": 35.5, "elapsed_time": "9:40:10", "remaining_time": "17:34:01"} +{"current_steps": 2675, "total_steps": 7532, "loss": 0.290219247341156, "lr": 1.5330806094932876e-05, "epoch": 0.7103970256274067, "percentage": 35.52, "elapsed_time": "9:40:22", "remaining_time": "17:33:48"} +{"current_steps": 2676, "total_steps": 7532, "loss": 0.33118927478790283, "lr": 1.5327090710642503e-05, "epoch": 0.7106625946089496, "percentage": 35.53, "elapsed_time": "9:40:36", "remaining_time": "17:33:35"} +{"current_steps": 2677, "total_steps": 7532, "loss": 0.34287041425704956, "lr": 1.5323374299351867e-05, "epoch": 0.7109281635904926, "percentage": 35.54, "elapsed_time": "9:40:49", "remaining_time": "17:33:22"} +{"current_steps": 2678, "total_steps": 7532, "loss": 0.27093711495399475, "lr": 1.531965686177745e-05, "epoch": 0.7111937325720356, "percentage": 35.55, "elapsed_time": "9:41:02", "remaining_time": "17:33:09"} +{"current_steps": 2679, "total_steps": 7532, "loss": 0.2987911105155945, "lr": 1.531593839863593e-05, "epoch": 0.7114593015535785, "percentage": 35.57, "elapsed_time": "9:41:15", "remaining_time": "17:32:56"} +{"current_steps": 2680, "total_steps": 7532, "loss": 0.2914583086967468, "lr": 1.5312218910644185e-05, "epoch": 0.7117248705351215, "percentage": 35.58, "elapsed_time": "9:41:29", "remaining_time": "17:32:45"} +{"current_steps": 2681, "total_steps": 7532, "loss": 0.34159964323043823, "lr": 1.530849839851928e-05, "epoch": 0.7119904395166644, "percentage": 35.59, "elapsed_time": "9:41:42", "remaining_time": "17:32:31"} +{"current_steps": 2682, "total_steps": 7532, "loss": 0.28327372670173645, "lr": 1.5304776862978496e-05, "epoch": 0.7122560084982074, "percentage": 35.61, "elapsed_time": "9:41:55", "remaining_time": "17:32:19"} +{"current_steps": 2683, "total_steps": 7532, "loss": 0.2902851104736328, "lr": 1.5301054304739292e-05, "epoch": 0.7125215774797503, "percentage": 35.62, "elapsed_time": "9:42:08", "remaining_time": "17:32:05"} +{"current_steps": 2684, "total_steps": 7532, "loss": 0.3192726969718933, "lr": 1.5297330724519344e-05, "epoch": 0.7127871464612934, "percentage": 35.63, "elapsed_time": "9:42:21", "remaining_time": "17:31:52"} +{"current_steps": 2685, "total_steps": 7532, "loss": 0.30242764949798584, "lr": 1.5293606123036508e-05, "epoch": 0.7130527154428363, "percentage": 35.65, "elapsed_time": "9:42:34", "remaining_time": "17:31:40"} +{"current_steps": 2686, "total_steps": 7532, "loss": 0.2718653082847595, "lr": 1.528988050100884e-05, "epoch": 0.7133182844243793, "percentage": 35.66, "elapsed_time": "9:42:47", "remaining_time": "17:31:27"} +{"current_steps": 2687, "total_steps": 7532, "loss": 0.3014821708202362, "lr": 1.52861538591546e-05, "epoch": 0.7135838534059222, "percentage": 35.67, "elapsed_time": "9:43:00", "remaining_time": "17:31:15"} +{"current_steps": 2688, "total_steps": 7532, "loss": 0.3378177881240845, "lr": 1.528242619819224e-05, "epoch": 0.7138494223874652, "percentage": 35.69, "elapsed_time": "9:43:13", "remaining_time": "17:31:01"} +{"current_steps": 2689, "total_steps": 7532, "loss": 0.29286471009254456, "lr": 1.5278697518840415e-05, "epoch": 0.7141149913690081, "percentage": 35.7, "elapsed_time": "9:43:27", "remaining_time": "17:30:49"} +{"current_steps": 2690, "total_steps": 7532, "loss": 0.3371768593788147, "lr": 1.527496782181796e-05, "epoch": 0.7143805603505511, "percentage": 35.71, "elapsed_time": "9:43:40", "remaining_time": "17:30:36"} +{"current_steps": 2691, "total_steps": 7532, "loss": 0.30571556091308594, "lr": 1.5271237107843925e-05, "epoch": 0.714646129332094, "percentage": 35.73, "elapsed_time": "9:43:53", "remaining_time": "17:30:24"} +{"current_steps": 2692, "total_steps": 7532, "loss": 0.33064618706703186, "lr": 1.526750537763754e-05, "epoch": 0.714911698313637, "percentage": 35.74, "elapsed_time": "9:44:06", "remaining_time": "17:30:11"} +{"current_steps": 2693, "total_steps": 7532, "loss": 0.3369274139404297, "lr": 1.5263772631918242e-05, "epoch": 0.7151772672951799, "percentage": 35.75, "elapsed_time": "9:44:20", "remaining_time": "17:29:59"} +{"current_steps": 2694, "total_steps": 7532, "loss": 0.2422705739736557, "lr": 1.5260038871405663e-05, "epoch": 0.7154428362767229, "percentage": 35.77, "elapsed_time": "9:44:33", "remaining_time": "17:29:45"} +{"current_steps": 2695, "total_steps": 7532, "loss": 0.35786008834838867, "lr": 1.5256304096819628e-05, "epoch": 0.7157084052582658, "percentage": 35.78, "elapsed_time": "9:44:46", "remaining_time": "17:29:33"} +{"current_steps": 2696, "total_steps": 7532, "loss": 0.2853243052959442, "lr": 1.5252568308880155e-05, "epoch": 0.7159739742398088, "percentage": 35.79, "elapsed_time": "9:44:59", "remaining_time": "17:29:21"} +{"current_steps": 2697, "total_steps": 7532, "loss": 0.2903040051460266, "lr": 1.5248831508307459e-05, "epoch": 0.7162395432213517, "percentage": 35.81, "elapsed_time": "9:45:13", "remaining_time": "17:29:08"} +{"current_steps": 2698, "total_steps": 7532, "loss": 0.3375359773635864, "lr": 1.5245093695821954e-05, "epoch": 0.7165051122028947, "percentage": 35.82, "elapsed_time": "9:45:26", "remaining_time": "17:28:55"} +{"current_steps": 2699, "total_steps": 7532, "loss": 0.27855974435806274, "lr": 1.5241354872144242e-05, "epoch": 0.7167706811844377, "percentage": 35.83, "elapsed_time": "9:45:39", "remaining_time": "17:28:43"} +{"current_steps": 2700, "total_steps": 7532, "loss": 0.32226768136024475, "lr": 1.5237615037995129e-05, "epoch": 0.7170362501659806, "percentage": 35.85, "elapsed_time": "9:45:52", "remaining_time": "17:28:30"} +{"current_steps": 2701, "total_steps": 7532, "loss": 0.32856303453445435, "lr": 1.5233874194095606e-05, "epoch": 0.7173018191475236, "percentage": 35.86, "elapsed_time": "9:46:11", "remaining_time": "17:28:27"} +{"current_steps": 2702, "total_steps": 7532, "loss": 0.31619006395339966, "lr": 1.5230132341166868e-05, "epoch": 0.7175673881290665, "percentage": 35.87, "elapsed_time": "9:46:24", "remaining_time": "17:28:15"} +{"current_steps": 2703, "total_steps": 7532, "loss": 0.29736411571502686, "lr": 1.5226389479930296e-05, "epoch": 0.7178329571106095, "percentage": 35.89, "elapsed_time": "9:46:37", "remaining_time": "17:28:01"} +{"current_steps": 2704, "total_steps": 7532, "loss": 0.2767728865146637, "lr": 1.5222645611107477e-05, "epoch": 0.7180985260921524, "percentage": 35.9, "elapsed_time": "9:46:50", "remaining_time": "17:27:49"} +{"current_steps": 2705, "total_steps": 7532, "loss": 0.30994221568107605, "lr": 1.5218900735420174e-05, "epoch": 0.7183640950736954, "percentage": 35.91, "elapsed_time": "9:47:03", "remaining_time": "17:27:35"} +{"current_steps": 2706, "total_steps": 7532, "loss": 0.3419484496116638, "lr": 1.5215154853590362e-05, "epoch": 0.7186296640552383, "percentage": 35.93, "elapsed_time": "9:47:17", "remaining_time": "17:27:23"} +{"current_steps": 2707, "total_steps": 7532, "loss": 0.3063664436340332, "lr": 1.5211407966340203e-05, "epoch": 0.7188952330367813, "percentage": 35.94, "elapsed_time": "9:47:30", "remaining_time": "17:27:10"} +{"current_steps": 2708, "total_steps": 7532, "loss": 0.2856604754924774, "lr": 1.520766007439205e-05, "epoch": 0.7191608020183242, "percentage": 35.95, "elapsed_time": "9:47:43", "remaining_time": "17:26:57"} +{"current_steps": 2709, "total_steps": 7532, "loss": 0.23257851600646973, "lr": 1.5203911178468453e-05, "epoch": 0.7194263709998672, "percentage": 35.97, "elapsed_time": "9:47:56", "remaining_time": "17:26:44"} +{"current_steps": 2710, "total_steps": 7532, "loss": 0.31451839208602905, "lr": 1.5200161279292154e-05, "epoch": 0.7196919399814101, "percentage": 35.98, "elapsed_time": "9:48:09", "remaining_time": "17:26:32"} +{"current_steps": 2711, "total_steps": 7532, "loss": 0.30298277735710144, "lr": 1.5196410377586095e-05, "epoch": 0.7199575089629531, "percentage": 35.99, "elapsed_time": "9:48:22", "remaining_time": "17:26:18"} +{"current_steps": 2712, "total_steps": 7532, "loss": 0.28654640913009644, "lr": 1.5192658474073398e-05, "epoch": 0.7202230779444961, "percentage": 36.01, "elapsed_time": "9:48:35", "remaining_time": "17:26:06"} +{"current_steps": 2713, "total_steps": 7532, "loss": 0.3148455023765564, "lr": 1.5188905569477391e-05, "epoch": 0.7204886469260391, "percentage": 36.02, "elapsed_time": "9:48:48", "remaining_time": "17:25:53"} +{"current_steps": 2714, "total_steps": 7532, "loss": 0.3004840612411499, "lr": 1.5185151664521585e-05, "epoch": 0.720754215907582, "percentage": 36.03, "elapsed_time": "9:49:01", "remaining_time": "17:25:39"} +{"current_steps": 2715, "total_steps": 7532, "loss": 0.3378010392189026, "lr": 1.518139675992969e-05, "epoch": 0.721019784889125, "percentage": 36.05, "elapsed_time": "9:49:14", "remaining_time": "17:25:27"} +{"current_steps": 2716, "total_steps": 7532, "loss": 0.3084215223789215, "lr": 1.517764085642561e-05, "epoch": 0.721285353870668, "percentage": 36.06, "elapsed_time": "9:49:28", "remaining_time": "17:25:15"} +{"current_steps": 2717, "total_steps": 7532, "loss": 0.3434324264526367, "lr": 1.517388395473344e-05, "epoch": 0.7215509228522109, "percentage": 36.07, "elapsed_time": "9:49:41", "remaining_time": "17:25:02"} +{"current_steps": 2718, "total_steps": 7532, "loss": 0.2862265706062317, "lr": 1.517012605557746e-05, "epoch": 0.7218164918337538, "percentage": 36.09, "elapsed_time": "9:49:54", "remaining_time": "17:24:48"} +{"current_steps": 2719, "total_steps": 7532, "loss": 0.2760370671749115, "lr": 1.5166367159682156e-05, "epoch": 0.7220820608152968, "percentage": 36.1, "elapsed_time": "9:50:07", "remaining_time": "17:24:35"} +{"current_steps": 2720, "total_steps": 7532, "loss": 0.26659202575683594, "lr": 1.5162607267772194e-05, "epoch": 0.7223476297968398, "percentage": 36.11, "elapsed_time": "9:50:19", "remaining_time": "17:24:21"} +{"current_steps": 2721, "total_steps": 7532, "loss": 0.31900978088378906, "lr": 1.5158846380572439e-05, "epoch": 0.7226131987783827, "percentage": 36.13, "elapsed_time": "9:50:32", "remaining_time": "17:24:08"} +{"current_steps": 2722, "total_steps": 7532, "loss": 0.2983658015727997, "lr": 1.5155084498807941e-05, "epoch": 0.7228787677599257, "percentage": 36.14, "elapsed_time": "9:50:45", "remaining_time": "17:23:54"} +{"current_steps": 2723, "total_steps": 7532, "loss": 0.3086162805557251, "lr": 1.5151321623203953e-05, "epoch": 0.7231443367414686, "percentage": 36.15, "elapsed_time": "9:50:58", "remaining_time": "17:23:42"} +{"current_steps": 2724, "total_steps": 7532, "loss": 0.3233461380004883, "lr": 1.5147557754485908e-05, "epoch": 0.7234099057230116, "percentage": 36.17, "elapsed_time": "9:51:11", "remaining_time": "17:23:29"} +{"current_steps": 2725, "total_steps": 7532, "loss": 0.2979195713996887, "lr": 1.5143792893379441e-05, "epoch": 0.7236754747045545, "percentage": 36.18, "elapsed_time": "9:51:24", "remaining_time": "17:23:16"} +{"current_steps": 2726, "total_steps": 7532, "loss": 0.27854713797569275, "lr": 1.5140027040610367e-05, "epoch": 0.7239410436860975, "percentage": 36.19, "elapsed_time": "9:51:38", "remaining_time": "17:23:04"} +{"current_steps": 2727, "total_steps": 7532, "loss": 0.293560266494751, "lr": 1.5136260196904704e-05, "epoch": 0.7242066126676404, "percentage": 36.21, "elapsed_time": "9:51:51", "remaining_time": "17:22:50"} +{"current_steps": 2728, "total_steps": 7532, "loss": 0.3033742308616638, "lr": 1.513249236298865e-05, "epoch": 0.7244721816491834, "percentage": 36.22, "elapsed_time": "9:52:04", "remaining_time": "17:22:38"} +{"current_steps": 2729, "total_steps": 7532, "loss": 0.27958324551582336, "lr": 1.51287235395886e-05, "epoch": 0.7247377506307263, "percentage": 36.23, "elapsed_time": "9:52:17", "remaining_time": "17:22:24"} +{"current_steps": 2730, "total_steps": 7532, "loss": 0.3063122034072876, "lr": 1.512495372743114e-05, "epoch": 0.7250033196122693, "percentage": 36.25, "elapsed_time": "9:52:30", "remaining_time": "17:22:11"} +{"current_steps": 2731, "total_steps": 7532, "loss": 0.29126864671707153, "lr": 1.5121182927243043e-05, "epoch": 0.7252688885938122, "percentage": 36.26, "elapsed_time": "9:52:42", "remaining_time": "17:21:57"} +{"current_steps": 2732, "total_steps": 7532, "loss": 0.27507084608078003, "lr": 1.5117411139751279e-05, "epoch": 0.7255344575753552, "percentage": 36.27, "elapsed_time": "9:52:55", "remaining_time": "17:21:44"} +{"current_steps": 2733, "total_steps": 7532, "loss": 0.3432404398918152, "lr": 1.5113638365682996e-05, "epoch": 0.7258000265568981, "percentage": 36.29, "elapsed_time": "9:53:08", "remaining_time": "17:21:30"} +{"current_steps": 2734, "total_steps": 7532, "loss": 0.27633196115493774, "lr": 1.5109864605765552e-05, "epoch": 0.7260655955384411, "percentage": 36.3, "elapsed_time": "9:53:20", "remaining_time": "17:21:17"} +{"current_steps": 2735, "total_steps": 7532, "loss": 0.274509072303772, "lr": 1.5106089860726474e-05, "epoch": 0.726331164519984, "percentage": 36.31, "elapsed_time": "9:53:33", "remaining_time": "17:21:03"} +{"current_steps": 2736, "total_steps": 7532, "loss": 0.26650723814964294, "lr": 1.5102314131293494e-05, "epoch": 0.726596733501527, "percentage": 36.33, "elapsed_time": "9:53:47", "remaining_time": "17:20:51"} +{"current_steps": 2737, "total_steps": 7532, "loss": 0.24476298689842224, "lr": 1.5098537418194524e-05, "epoch": 0.7268623024830699, "percentage": 36.34, "elapsed_time": "9:53:59", "remaining_time": "17:20:37"} +{"current_steps": 2738, "total_steps": 7532, "loss": 0.3337150812149048, "lr": 1.5094759722157671e-05, "epoch": 0.7271278714646129, "percentage": 36.35, "elapsed_time": "9:54:13", "remaining_time": "17:20:25"} +{"current_steps": 2739, "total_steps": 7532, "loss": 0.3147660195827484, "lr": 1.509098104391123e-05, "epoch": 0.7273934404461558, "percentage": 36.36, "elapsed_time": "9:54:25", "remaining_time": "17:20:12"} +{"current_steps": 2740, "total_steps": 7532, "loss": 0.2613281309604645, "lr": 1.5087201384183687e-05, "epoch": 0.7276590094276989, "percentage": 36.38, "elapsed_time": "9:54:39", "remaining_time": "17:19:59"} +{"current_steps": 2741, "total_steps": 7532, "loss": 0.2773926854133606, "lr": 1.5083420743703717e-05, "epoch": 0.7279245784092419, "percentage": 36.39, "elapsed_time": "9:54:52", "remaining_time": "17:19:46"} +{"current_steps": 2742, "total_steps": 7532, "loss": 0.30515575408935547, "lr": 1.5079639123200179e-05, "epoch": 0.7281901473907848, "percentage": 36.4, "elapsed_time": "9:55:05", "remaining_time": "17:19:33"} +{"current_steps": 2743, "total_steps": 7532, "loss": 0.3174355626106262, "lr": 1.5075856523402128e-05, "epoch": 0.7284557163723278, "percentage": 36.42, "elapsed_time": "9:55:18", "remaining_time": "17:19:20"} +{"current_steps": 2744, "total_steps": 7532, "loss": 0.25163760781288147, "lr": 1.5072072945038802e-05, "epoch": 0.7287212853538707, "percentage": 36.43, "elapsed_time": "9:55:31", "remaining_time": "17:19:06"} +{"current_steps": 2745, "total_steps": 7532, "loss": 0.28822118043899536, "lr": 1.5068288388839634e-05, "epoch": 0.7289868543354137, "percentage": 36.44, "elapsed_time": "9:55:44", "remaining_time": "17:18:54"} +{"current_steps": 2746, "total_steps": 7532, "loss": 0.3129134476184845, "lr": 1.5064502855534237e-05, "epoch": 0.7292524233169566, "percentage": 36.46, "elapsed_time": "9:55:56", "remaining_time": "17:18:40"} +{"current_steps": 2747, "total_steps": 7532, "loss": 0.332313597202301, "lr": 1.5060716345852423e-05, "epoch": 0.7295179922984996, "percentage": 36.47, "elapsed_time": "9:56:09", "remaining_time": "17:18:27"} +{"current_steps": 2748, "total_steps": 7532, "loss": 0.3425176739692688, "lr": 1.5056928860524181e-05, "epoch": 0.7297835612800425, "percentage": 36.48, "elapsed_time": "9:56:22", "remaining_time": "17:18:14"} +{"current_steps": 2749, "total_steps": 7532, "loss": 0.2737991511821747, "lr": 1.5053140400279693e-05, "epoch": 0.7300491302615855, "percentage": 36.5, "elapsed_time": "9:56:36", "remaining_time": "17:18:02"} +{"current_steps": 2750, "total_steps": 7532, "loss": 0.27506589889526367, "lr": 1.5049350965849337e-05, "epoch": 0.7303146992431284, "percentage": 36.51, "elapsed_time": "9:56:49", "remaining_time": "17:17:48"} +{"current_steps": 2751, "total_steps": 7532, "loss": 0.25581830739974976, "lr": 1.5045560557963663e-05, "epoch": 0.7305802682246714, "percentage": 36.52, "elapsed_time": "9:57:02", "remaining_time": "17:17:35"} +{"current_steps": 2752, "total_steps": 7532, "loss": 0.31746333837509155, "lr": 1.5041769177353423e-05, "epoch": 0.7308458372062143, "percentage": 36.54, "elapsed_time": "9:57:15", "remaining_time": "17:17:23"} +{"current_steps": 2753, "total_steps": 7532, "loss": 0.3119337260723114, "lr": 1.5037976824749545e-05, "epoch": 0.7311114061877573, "percentage": 36.55, "elapsed_time": "9:57:28", "remaining_time": "17:17:11"} +{"current_steps": 2754, "total_steps": 7532, "loss": 0.3330266773700714, "lr": 1.5034183500883153e-05, "epoch": 0.7313769751693002, "percentage": 36.56, "elapsed_time": "9:57:41", "remaining_time": "17:16:57"} +{"current_steps": 2755, "total_steps": 7532, "loss": 0.2794867753982544, "lr": 1.5030389206485554e-05, "epoch": 0.7316425441508432, "percentage": 36.58, "elapsed_time": "9:57:54", "remaining_time": "17:16:44"} +{"current_steps": 2756, "total_steps": 7532, "loss": 0.33273079991340637, "lr": 1.5026593942288248e-05, "epoch": 0.7319081131323861, "percentage": 36.59, "elapsed_time": "9:58:07", "remaining_time": "17:16:31"} +{"current_steps": 2757, "total_steps": 7532, "loss": 0.30673256516456604, "lr": 1.502279770902291e-05, "epoch": 0.7321736821139291, "percentage": 36.6, "elapsed_time": "9:58:20", "remaining_time": "17:16:18"} +{"current_steps": 2758, "total_steps": 7532, "loss": 0.3126910924911499, "lr": 1.5019000507421412e-05, "epoch": 0.732439251095472, "percentage": 36.62, "elapsed_time": "9:58:34", "remaining_time": "17:16:06"} +{"current_steps": 2759, "total_steps": 7532, "loss": 0.35423290729522705, "lr": 1.5015202338215811e-05, "epoch": 0.732704820077015, "percentage": 36.63, "elapsed_time": "9:58:47", "remaining_time": "17:15:53"} +{"current_steps": 2760, "total_steps": 7532, "loss": 0.31541377305984497, "lr": 1.5011403202138346e-05, "epoch": 0.7329703890585579, "percentage": 36.64, "elapsed_time": "9:59:00", "remaining_time": "17:15:41"} +{"current_steps": 2761, "total_steps": 7532, "loss": 0.31460440158843994, "lr": 1.5007603099921451e-05, "epoch": 0.7332359580401009, "percentage": 36.66, "elapsed_time": "9:59:13", "remaining_time": "17:15:27"} +{"current_steps": 2762, "total_steps": 7532, "loss": 0.2786293923854828, "lr": 1.5003802032297735e-05, "epoch": 0.7335015270216438, "percentage": 36.67, "elapsed_time": "9:59:27", "remaining_time": "17:15:15"} +{"current_steps": 2763, "total_steps": 7532, "loss": 0.27977997064590454, "lr": 1.5000000000000002e-05, "epoch": 0.7337670960031868, "percentage": 36.68, "elapsed_time": "9:59:40", "remaining_time": "17:15:03"} +{"current_steps": 2764, "total_steps": 7532, "loss": 0.2933383584022522, "lr": 1.4996197003761237e-05, "epoch": 0.7340326649847297, "percentage": 36.7, "elapsed_time": "9:59:54", "remaining_time": "17:14:51"} +{"current_steps": 2765, "total_steps": 7532, "loss": 0.30623573064804077, "lr": 1.4992393044314617e-05, "epoch": 0.7342982339662727, "percentage": 36.71, "elapsed_time": "10:00:07", "remaining_time": "17:14:38"} +{"current_steps": 2766, "total_steps": 7532, "loss": 0.28665077686309814, "lr": 1.4988588122393497e-05, "epoch": 0.7345638029478156, "percentage": 36.72, "elapsed_time": "10:00:20", "remaining_time": "17:14:25"} +{"current_steps": 2767, "total_steps": 7532, "loss": 0.3245697021484375, "lr": 1.4984782238731422e-05, "epoch": 0.7348293719293586, "percentage": 36.74, "elapsed_time": "10:00:33", "remaining_time": "17:14:11"} +{"current_steps": 2768, "total_steps": 7532, "loss": 0.29477447271347046, "lr": 1.4980975394062122e-05, "epoch": 0.7350949409109017, "percentage": 36.75, "elapsed_time": "10:00:45", "remaining_time": "17:13:58"} +{"current_steps": 2769, "total_steps": 7532, "loss": 0.29174134135246277, "lr": 1.4977167589119508e-05, "epoch": 0.7353605098924446, "percentage": 36.76, "elapsed_time": "10:00:59", "remaining_time": "17:13:45"} +{"current_steps": 2770, "total_steps": 7532, "loss": 0.29473474621772766, "lr": 1.4973358824637687e-05, "epoch": 0.7356260788739876, "percentage": 36.78, "elapsed_time": "10:01:11", "remaining_time": "17:13:32"} +{"current_steps": 2771, "total_steps": 7532, "loss": 0.3095156252384186, "lr": 1.4969549101350938e-05, "epoch": 0.7358916478555305, "percentage": 36.79, "elapsed_time": "10:01:24", "remaining_time": "17:13:19"} +{"current_steps": 2772, "total_steps": 7532, "loss": 0.26295265555381775, "lr": 1.4965738419993733e-05, "epoch": 0.7361572168370735, "percentage": 36.8, "elapsed_time": "10:01:37", "remaining_time": "17:13:06"} +{"current_steps": 2773, "total_steps": 7532, "loss": 0.2989509701728821, "lr": 1.4961926781300723e-05, "epoch": 0.7364227858186164, "percentage": 36.82, "elapsed_time": "10:01:51", "remaining_time": "17:12:53"} +{"current_steps": 2774, "total_steps": 7532, "loss": 0.31087079644203186, "lr": 1.4958114186006756e-05, "epoch": 0.7366883548001594, "percentage": 36.83, "elapsed_time": "10:02:04", "remaining_time": "17:12:40"} +{"current_steps": 2775, "total_steps": 7532, "loss": 0.3063197433948517, "lr": 1.4954300634846845e-05, "epoch": 0.7369539237817023, "percentage": 36.84, "elapsed_time": "10:02:17", "remaining_time": "17:12:27"} +{"current_steps": 2776, "total_steps": 7532, "loss": 0.3149424195289612, "lr": 1.4950486128556208e-05, "epoch": 0.7372194927632453, "percentage": 36.86, "elapsed_time": "10:02:29", "remaining_time": "17:12:14"} +{"current_steps": 2777, "total_steps": 7532, "loss": 0.2724878191947937, "lr": 1.4946670667870224e-05, "epoch": 0.7374850617447882, "percentage": 36.87, "elapsed_time": "10:02:42", "remaining_time": "17:12:01"} +{"current_steps": 2778, "total_steps": 7532, "loss": 0.2556690275669098, "lr": 1.4942854253524479e-05, "epoch": 0.7377506307263312, "percentage": 36.88, "elapsed_time": "10:02:55", "remaining_time": "17:11:47"} +{"current_steps": 2779, "total_steps": 7532, "loss": 0.2704542875289917, "lr": 1.4939036886254727e-05, "epoch": 0.7380161997078741, "percentage": 36.9, "elapsed_time": "10:03:08", "remaining_time": "17:11:34"} +{"current_steps": 2780, "total_steps": 7532, "loss": 0.26762163639068604, "lr": 1.4935218566796918e-05, "epoch": 0.7382817686894171, "percentage": 36.91, "elapsed_time": "10:03:21", "remaining_time": "17:11:21"} +{"current_steps": 2781, "total_steps": 7532, "loss": 0.3376831114292145, "lr": 1.4931399295887172e-05, "epoch": 0.73854733767096, "percentage": 36.92, "elapsed_time": "10:03:34", "remaining_time": "17:11:07"} +{"current_steps": 2782, "total_steps": 7532, "loss": 0.2980082631111145, "lr": 1.4927579074261803e-05, "epoch": 0.738812906652503, "percentage": 36.94, "elapsed_time": "10:03:47", "remaining_time": "17:10:54"} +{"current_steps": 2783, "total_steps": 7532, "loss": 0.27693796157836914, "lr": 1.4923757902657306e-05, "epoch": 0.7390784756340459, "percentage": 36.95, "elapsed_time": "10:04:00", "remaining_time": "17:10:41"} +{"current_steps": 2784, "total_steps": 7532, "loss": 0.3109282851219177, "lr": 1.4919935781810353e-05, "epoch": 0.7393440446155889, "percentage": 36.96, "elapsed_time": "10:04:13", "remaining_time": "17:10:28"} +{"current_steps": 2785, "total_steps": 7532, "loss": 0.3123949468135834, "lr": 1.4916112712457807e-05, "epoch": 0.7396096135971318, "percentage": 36.98, "elapsed_time": "10:04:26", "remaining_time": "17:10:15"} +{"current_steps": 2786, "total_steps": 7532, "loss": 0.3232062757015228, "lr": 1.4912288695336709e-05, "epoch": 0.7398751825786748, "percentage": 36.99, "elapsed_time": "10:04:39", "remaining_time": "17:10:02"} +{"current_steps": 2787, "total_steps": 7532, "loss": 0.2685563862323761, "lr": 1.4908463731184287e-05, "epoch": 0.7401407515602177, "percentage": 37.0, "elapsed_time": "10:04:51", "remaining_time": "17:09:48"} +{"current_steps": 2788, "total_steps": 7532, "loss": 0.25752881169319153, "lr": 1.4904637820737945e-05, "epoch": 0.7404063205417607, "percentage": 37.02, "elapsed_time": "10:05:04", "remaining_time": "17:09:35"} +{"current_steps": 2789, "total_steps": 7532, "loss": 0.2887497544288635, "lr": 1.4900810964735279e-05, "epoch": 0.7406718895233037, "percentage": 37.03, "elapsed_time": "10:05:17", "remaining_time": "17:09:22"} +{"current_steps": 2790, "total_steps": 7532, "loss": 0.28804779052734375, "lr": 1.489698316391406e-05, "epoch": 0.7409374585048466, "percentage": 37.04, "elapsed_time": "10:05:30", "remaining_time": "17:09:09"} +{"current_steps": 2791, "total_steps": 7532, "loss": 0.2684408724308014, "lr": 1.489315441901224e-05, "epoch": 0.7412030274863896, "percentage": 37.06, "elapsed_time": "10:05:43", "remaining_time": "17:08:56"} +{"current_steps": 2792, "total_steps": 7532, "loss": 0.31945526599884033, "lr": 1.4889324730767959e-05, "epoch": 0.7414685964679325, "percentage": 37.07, "elapsed_time": "10:05:56", "remaining_time": "17:08:43"} +{"current_steps": 2793, "total_steps": 7532, "loss": 0.34446024894714355, "lr": 1.488549409991953e-05, "epoch": 0.7417341654494755, "percentage": 37.08, "elapsed_time": "10:06:09", "remaining_time": "17:08:29"} +{"current_steps": 2794, "total_steps": 7532, "loss": 0.28849151730537415, "lr": 1.488166252720546e-05, "epoch": 0.7419997344310184, "percentage": 37.1, "elapsed_time": "10:06:22", "remaining_time": "17:08:17"} +{"current_steps": 2795, "total_steps": 7532, "loss": 0.2793633043766022, "lr": 1.4877830013364429e-05, "epoch": 0.7422653034125614, "percentage": 37.11, "elapsed_time": "10:06:35", "remaining_time": "17:08:03"} +{"current_steps": 2796, "total_steps": 7532, "loss": 0.3211687505245209, "lr": 1.4873996559135298e-05, "epoch": 0.7425308723941043, "percentage": 37.12, "elapsed_time": "10:06:47", "remaining_time": "17:07:49"} +{"current_steps": 2797, "total_steps": 7532, "loss": 0.26225876808166504, "lr": 1.4870162165257114e-05, "epoch": 0.7427964413756474, "percentage": 37.13, "elapsed_time": "10:07:00", "remaining_time": "17:07:36"} +{"current_steps": 2798, "total_steps": 7532, "loss": 0.3100029528141022, "lr": 1.4866326832469105e-05, "epoch": 0.7430620103571903, "percentage": 37.15, "elapsed_time": "10:07:13", "remaining_time": "17:07:22"} +{"current_steps": 2799, "total_steps": 7532, "loss": 0.29399827122688293, "lr": 1.4862490561510675e-05, "epoch": 0.7433275793387333, "percentage": 37.16, "elapsed_time": "10:07:26", "remaining_time": "17:07:09"} +{"current_steps": 2800, "total_steps": 7532, "loss": 0.27357399463653564, "lr": 1.4858653353121412e-05, "epoch": 0.7435931483202762, "percentage": 37.17, "elapsed_time": "10:07:39", "remaining_time": "17:06:56"} +{"current_steps": 2801, "total_steps": 7532, "loss": 0.34575730562210083, "lr": 1.4854815208041087e-05, "epoch": 0.7438587173018192, "percentage": 37.19, "elapsed_time": "10:07:57", "remaining_time": "17:06:51"} +{"current_steps": 2802, "total_steps": 7532, "loss": 0.28487247228622437, "lr": 1.4850976127009644e-05, "epoch": 0.7441242862833621, "percentage": 37.2, "elapsed_time": "10:08:10", "remaining_time": "17:06:38"} +{"current_steps": 2803, "total_steps": 7532, "loss": 0.264443576335907, "lr": 1.484713611076722e-05, "epoch": 0.7443898552649051, "percentage": 37.21, "elapsed_time": "10:08:22", "remaining_time": "17:06:24"} +{"current_steps": 2804, "total_steps": 7532, "loss": 0.32750973105430603, "lr": 1.4843295160054116e-05, "epoch": 0.744655424246448, "percentage": 37.23, "elapsed_time": "10:08:35", "remaining_time": "17:06:11"} +{"current_steps": 2805, "total_steps": 7532, "loss": 0.24080191552639008, "lr": 1.4839453275610827e-05, "epoch": 0.744920993227991, "percentage": 37.24, "elapsed_time": "10:08:49", "remaining_time": "17:05:59"} +{"current_steps": 2806, "total_steps": 7532, "loss": 0.31667011976242065, "lr": 1.4835610458178025e-05, "epoch": 0.7451865622095339, "percentage": 37.25, "elapsed_time": "10:09:02", "remaining_time": "17:05:45"} +{"current_steps": 2807, "total_steps": 7532, "loss": 0.2754175066947937, "lr": 1.4831766708496553e-05, "epoch": 0.7454521311910769, "percentage": 37.27, "elapsed_time": "10:09:15", "remaining_time": "17:05:33"} +{"current_steps": 2808, "total_steps": 7532, "loss": 0.2890132963657379, "lr": 1.482792202730745e-05, "epoch": 0.7457177001726198, "percentage": 37.28, "elapsed_time": "10:09:28", "remaining_time": "17:05:20"} +{"current_steps": 2809, "total_steps": 7532, "loss": 0.3402877748012543, "lr": 1.4824076415351918e-05, "epoch": 0.7459832691541628, "percentage": 37.29, "elapsed_time": "10:09:40", "remaining_time": "17:05:06"} +{"current_steps": 2810, "total_steps": 7532, "loss": 0.3167210519313812, "lr": 1.4820229873371347e-05, "epoch": 0.7462488381357057, "percentage": 37.31, "elapsed_time": "10:09:54", "remaining_time": "17:04:54"} +{"current_steps": 2811, "total_steps": 7532, "loss": 0.2653643786907196, "lr": 1.4816382402107308e-05, "epoch": 0.7465144071172487, "percentage": 37.32, "elapsed_time": "10:10:07", "remaining_time": "17:04:41"} +{"current_steps": 2812, "total_steps": 7532, "loss": 0.3202674984931946, "lr": 1.4812534002301547e-05, "epoch": 0.7467799760987917, "percentage": 37.33, "elapsed_time": "10:10:20", "remaining_time": "17:04:28"} +{"current_steps": 2813, "total_steps": 7532, "loss": 0.2942724823951721, "lr": 1.4808684674695985e-05, "epoch": 0.7470455450803346, "percentage": 37.35, "elapsed_time": "10:10:33", "remaining_time": "17:04:15"} +{"current_steps": 2814, "total_steps": 7532, "loss": 0.28640663623809814, "lr": 1.480483442003273e-05, "epoch": 0.7473111140618776, "percentage": 37.36, "elapsed_time": "10:10:46", "remaining_time": "17:04:02"} +{"current_steps": 2815, "total_steps": 7532, "loss": 0.26214420795440674, "lr": 1.4800983239054071e-05, "epoch": 0.7475766830434205, "percentage": 37.37, "elapsed_time": "10:10:59", "remaining_time": "17:03:49"} +{"current_steps": 2816, "total_steps": 7532, "loss": 0.3288992643356323, "lr": 1.4797131132502464e-05, "epoch": 0.7478422520249635, "percentage": 37.39, "elapsed_time": "10:11:13", "remaining_time": "17:03:36"} +{"current_steps": 2817, "total_steps": 7532, "loss": 0.2622208297252655, "lr": 1.4793278101120551e-05, "epoch": 0.7481078210065064, "percentage": 37.4, "elapsed_time": "10:11:25", "remaining_time": "17:03:23"} +{"current_steps": 2818, "total_steps": 7532, "loss": 0.3223533034324646, "lr": 1.4789424145651152e-05, "epoch": 0.7483733899880494, "percentage": 37.41, "elapsed_time": "10:11:38", "remaining_time": "17:03:10"} +{"current_steps": 2819, "total_steps": 7532, "loss": 0.25849875807762146, "lr": 1.4785569266837264e-05, "epoch": 0.7486389589695923, "percentage": 37.43, "elapsed_time": "10:11:51", "remaining_time": "17:02:56"} +{"current_steps": 2820, "total_steps": 7532, "loss": 0.3477833569049835, "lr": 1.478171346542206e-05, "epoch": 0.7489045279511353, "percentage": 37.44, "elapsed_time": "10:12:04", "remaining_time": "17:02:44"} +{"current_steps": 2821, "total_steps": 7532, "loss": 0.2799205780029297, "lr": 1.4777856742148897e-05, "epoch": 0.7491700969326782, "percentage": 37.45, "elapsed_time": "10:12:17", "remaining_time": "17:02:30"} +{"current_steps": 2822, "total_steps": 7532, "loss": 0.2591988444328308, "lr": 1.4773999097761304e-05, "epoch": 0.7494356659142212, "percentage": 37.47, "elapsed_time": "10:12:30", "remaining_time": "17:02:18"} +{"current_steps": 2823, "total_steps": 7532, "loss": 0.30161747336387634, "lr": 1.477014053300299e-05, "epoch": 0.7497012348957641, "percentage": 37.48, "elapsed_time": "10:12:43", "remaining_time": "17:02:04"} +{"current_steps": 2824, "total_steps": 7532, "loss": 0.28202176094055176, "lr": 1.4766281048617837e-05, "epoch": 0.7499668038773071, "percentage": 37.49, "elapsed_time": "10:12:57", "remaining_time": "17:01:52"} +{"current_steps": 2825, "total_steps": 7532, "loss": 0.26074907183647156, "lr": 1.4762420645349912e-05, "epoch": 0.7502323728588501, "percentage": 37.51, "elapsed_time": "10:13:10", "remaining_time": "17:01:39"} +{"current_steps": 2826, "total_steps": 7532, "loss": 0.2822819948196411, "lr": 1.4758559323943455e-05, "epoch": 0.7504979418403931, "percentage": 37.52, "elapsed_time": "10:13:22", "remaining_time": "17:01:26"} +{"current_steps": 2827, "total_steps": 7532, "loss": 0.2704991102218628, "lr": 1.4754697085142879e-05, "epoch": 0.750763510821936, "percentage": 37.53, "elapsed_time": "10:13:36", "remaining_time": "17:01:13"} +{"current_steps": 2828, "total_steps": 7532, "loss": 0.2627401053905487, "lr": 1.4750833929692785e-05, "epoch": 0.751029079803479, "percentage": 37.55, "elapsed_time": "10:13:49", "remaining_time": "17:01:00"} +{"current_steps": 2829, "total_steps": 7532, "loss": 0.2898240089416504, "lr": 1.474696985833794e-05, "epoch": 0.751294648785022, "percentage": 37.56, "elapsed_time": "10:14:02", "remaining_time": "17:00:48"} +{"current_steps": 2830, "total_steps": 7532, "loss": 0.30080029368400574, "lr": 1.4743104871823291e-05, "epoch": 0.7515602177665649, "percentage": 37.57, "elapsed_time": "10:14:15", "remaining_time": "17:00:35"} +{"current_steps": 2831, "total_steps": 7532, "loss": 0.2950359284877777, "lr": 1.473923897089396e-05, "epoch": 0.7518257867481078, "percentage": 37.59, "elapsed_time": "10:14:29", "remaining_time": "17:00:22"} +{"current_steps": 2832, "total_steps": 7532, "loss": 0.31936827301979065, "lr": 1.4735372156295253e-05, "epoch": 0.7520913557296508, "percentage": 37.6, "elapsed_time": "10:14:42", "remaining_time": "17:00:09"} +{"current_steps": 2833, "total_steps": 7532, "loss": 0.2771468460559845, "lr": 1.4731504428772642e-05, "epoch": 0.7523569247111938, "percentage": 37.61, "elapsed_time": "10:14:55", "remaining_time": "16:59:57"} +{"current_steps": 2834, "total_steps": 7532, "loss": 0.3135997951030731, "lr": 1.4727635789071779e-05, "epoch": 0.7526224936927367, "percentage": 37.63, "elapsed_time": "10:15:08", "remaining_time": "16:59:44"} +{"current_steps": 2835, "total_steps": 7532, "loss": 0.29874372482299805, "lr": 1.4723766237938495e-05, "epoch": 0.7528880626742797, "percentage": 37.64, "elapsed_time": "10:15:21", "remaining_time": "16:59:31"} +{"current_steps": 2836, "total_steps": 7532, "loss": 0.249681293964386, "lr": 1.4719895776118789e-05, "epoch": 0.7531536316558226, "percentage": 37.65, "elapsed_time": "10:15:34", "remaining_time": "16:59:18"} +{"current_steps": 2837, "total_steps": 7532, "loss": 0.28544771671295166, "lr": 1.4716024404358847e-05, "epoch": 0.7534192006373656, "percentage": 37.67, "elapsed_time": "10:15:48", "remaining_time": "16:59:06"} +{"current_steps": 2838, "total_steps": 7532, "loss": 0.32532355189323425, "lr": 1.4712152123405018e-05, "epoch": 0.7536847696189085, "percentage": 37.68, "elapsed_time": "10:16:00", "remaining_time": "16:58:52"} +{"current_steps": 2839, "total_steps": 7532, "loss": 0.31663140654563904, "lr": 1.4708278934003835e-05, "epoch": 0.7539503386004515, "percentage": 37.69, "elapsed_time": "10:16:13", "remaining_time": "16:58:39"} +{"current_steps": 2840, "total_steps": 7532, "loss": 0.30552318692207336, "lr": 1.4704404836902005e-05, "epoch": 0.7542159075819944, "percentage": 37.71, "elapsed_time": "10:16:26", "remaining_time": "16:58:26"} +{"current_steps": 2841, "total_steps": 7532, "loss": 0.3019601106643677, "lr": 1.47005298328464e-05, "epoch": 0.7544814765635374, "percentage": 37.72, "elapsed_time": "10:16:39", "remaining_time": "16:58:13"} +{"current_steps": 2842, "total_steps": 7532, "loss": 0.321606308221817, "lr": 1.4696653922584084e-05, "epoch": 0.7547470455450803, "percentage": 37.73, "elapsed_time": "10:16:52", "remaining_time": "16:58:00"} +{"current_steps": 2843, "total_steps": 7532, "loss": 0.2709462642669678, "lr": 1.4692777106862281e-05, "epoch": 0.7550126145266233, "percentage": 37.75, "elapsed_time": "10:17:05", "remaining_time": "16:57:47"} +{"current_steps": 2844, "total_steps": 7532, "loss": 0.2882609963417053, "lr": 1.46888993864284e-05, "epoch": 0.7552781835081662, "percentage": 37.76, "elapsed_time": "10:17:19", "remaining_time": "16:57:34"} +{"current_steps": 2845, "total_steps": 7532, "loss": 0.25843000411987305, "lr": 1.4685020762030019e-05, "epoch": 0.7555437524897092, "percentage": 37.77, "elapsed_time": "10:17:32", "remaining_time": "16:57:21"} +{"current_steps": 2846, "total_steps": 7532, "loss": 0.30962038040161133, "lr": 1.4681141234414889e-05, "epoch": 0.7558093214712521, "percentage": 37.79, "elapsed_time": "10:17:45", "remaining_time": "16:57:08"} +{"current_steps": 2847, "total_steps": 7532, "loss": 0.304874062538147, "lr": 1.4677260804330938e-05, "epoch": 0.7560748904527951, "percentage": 37.8, "elapsed_time": "10:17:57", "remaining_time": "16:56:54"} +{"current_steps": 2848, "total_steps": 7532, "loss": 0.3425619602203369, "lr": 1.4673379472526268e-05, "epoch": 0.756340459434338, "percentage": 37.81, "elapsed_time": "10:18:10", "remaining_time": "16:56:42"} +{"current_steps": 2849, "total_steps": 7532, "loss": 0.3002302050590515, "lr": 1.4669497239749153e-05, "epoch": 0.756606028415881, "percentage": 37.83, "elapsed_time": "10:18:23", "remaining_time": "16:56:28"} +{"current_steps": 2850, "total_steps": 7532, "loss": 0.31008803844451904, "lr": 1.4665614106748038e-05, "epoch": 0.7568715973974239, "percentage": 37.84, "elapsed_time": "10:18:36", "remaining_time": "16:56:16"} +{"current_steps": 2851, "total_steps": 7532, "loss": 0.27829408645629883, "lr": 1.4661730074271551e-05, "epoch": 0.7571371663789669, "percentage": 37.85, "elapsed_time": "10:18:50", "remaining_time": "16:56:03"} +{"current_steps": 2852, "total_steps": 7532, "loss": 0.25915467739105225, "lr": 1.4657845143068488e-05, "epoch": 0.7574027353605098, "percentage": 37.87, "elapsed_time": "10:19:03", "remaining_time": "16:55:50"} +{"current_steps": 2853, "total_steps": 7532, "loss": 0.2843416929244995, "lr": 1.4653959313887813e-05, "epoch": 0.7576683043420529, "percentage": 37.88, "elapsed_time": "10:19:16", "remaining_time": "16:55:37"} +{"current_steps": 2854, "total_steps": 7532, "loss": 0.2851647138595581, "lr": 1.465007258747867e-05, "epoch": 0.7579338733235959, "percentage": 37.89, "elapsed_time": "10:19:29", "remaining_time": "16:55:25"} +{"current_steps": 2855, "total_steps": 7532, "loss": 0.266017884016037, "lr": 1.4646184964590378e-05, "epoch": 0.7581994423051388, "percentage": 37.9, "elapsed_time": "10:19:42", "remaining_time": "16:55:12"} +{"current_steps": 2856, "total_steps": 7532, "loss": 0.30142179131507874, "lr": 1.4642296445972421e-05, "epoch": 0.7584650112866818, "percentage": 37.92, "elapsed_time": "10:19:56", "remaining_time": "16:54:59"} +{"current_steps": 2857, "total_steps": 7532, "loss": 0.2878327965736389, "lr": 1.463840703237446e-05, "epoch": 0.7587305802682247, "percentage": 37.93, "elapsed_time": "10:20:09", "remaining_time": "16:54:46"} +{"current_steps": 2858, "total_steps": 7532, "loss": 0.2919169068336487, "lr": 1.4634516724546326e-05, "epoch": 0.7589961492497677, "percentage": 37.94, "elapsed_time": "10:20:21", "remaining_time": "16:54:33"} +{"current_steps": 2859, "total_steps": 7532, "loss": 0.2530924081802368, "lr": 1.4630625523238027e-05, "epoch": 0.7592617182313106, "percentage": 37.96, "elapsed_time": "10:20:35", "remaining_time": "16:54:20"} +{"current_steps": 2860, "total_steps": 7532, "loss": 0.3009106516838074, "lr": 1.462673342919974e-05, "epoch": 0.7595272872128536, "percentage": 37.97, "elapsed_time": "10:20:48", "remaining_time": "16:54:07"} +{"current_steps": 2861, "total_steps": 7532, "loss": 0.3114222288131714, "lr": 1.4622840443181817e-05, "epoch": 0.7597928561943965, "percentage": 37.98, "elapsed_time": "10:21:01", "remaining_time": "16:53:55"} +{"current_steps": 2862, "total_steps": 7532, "loss": 0.344540536403656, "lr": 1.4618946565934775e-05, "epoch": 0.7600584251759395, "percentage": 38.0, "elapsed_time": "10:21:14", "remaining_time": "16:53:41"} +{"current_steps": 2863, "total_steps": 7532, "loss": 0.263607919216156, "lr": 1.4615051798209312e-05, "epoch": 0.7603239941574824, "percentage": 38.01, "elapsed_time": "10:21:28", "remaining_time": "16:53:29"} +{"current_steps": 2864, "total_steps": 7532, "loss": 0.2685706317424774, "lr": 1.4611156140756293e-05, "epoch": 0.7605895631390254, "percentage": 38.02, "elapsed_time": "10:21:41", "remaining_time": "16:53:16"} +{"current_steps": 2865, "total_steps": 7532, "loss": 0.32342326641082764, "lr": 1.4607259594326752e-05, "epoch": 0.7608551321205683, "percentage": 38.04, "elapsed_time": "10:21:54", "remaining_time": "16:53:04"} +{"current_steps": 2866, "total_steps": 7532, "loss": 0.3088849186897278, "lr": 1.4603362159671902e-05, "epoch": 0.7611207011021113, "percentage": 38.05, "elapsed_time": "10:22:07", "remaining_time": "16:52:51"} +{"current_steps": 2867, "total_steps": 7532, "loss": 0.26718589663505554, "lr": 1.4599463837543114e-05, "epoch": 0.7613862700836542, "percentage": 38.06, "elapsed_time": "10:22:20", "remaining_time": "16:52:38"} +{"current_steps": 2868, "total_steps": 7532, "loss": 0.29511263966560364, "lr": 1.4595564628691944e-05, "epoch": 0.7616518390651972, "percentage": 38.08, "elapsed_time": "10:22:34", "remaining_time": "16:52:25"} +{"current_steps": 2869, "total_steps": 7532, "loss": 0.2940484285354614, "lr": 1.4591664533870118e-05, "epoch": 0.7619174080467401, "percentage": 38.09, "elapsed_time": "10:22:46", "remaining_time": "16:52:12"} +{"current_steps": 2870, "total_steps": 7532, "loss": 0.28167295455932617, "lr": 1.4587763553829521e-05, "epoch": 0.7621829770282831, "percentage": 38.1, "elapsed_time": "10:23:00", "remaining_time": "16:51:59"} +{"current_steps": 2871, "total_steps": 7532, "loss": 0.3362962007522583, "lr": 1.4583861689322219e-05, "epoch": 0.762448546009826, "percentage": 38.12, "elapsed_time": "10:23:12", "remaining_time": "16:51:46"} +{"current_steps": 2872, "total_steps": 7532, "loss": 0.3003339171409607, "lr": 1.4579958941100445e-05, "epoch": 0.762714114991369, "percentage": 38.13, "elapsed_time": "10:23:26", "remaining_time": "16:51:34"} +{"current_steps": 2873, "total_steps": 7532, "loss": 0.3191443979740143, "lr": 1.4576055309916602e-05, "epoch": 0.7629796839729119, "percentage": 38.14, "elapsed_time": "10:23:39", "remaining_time": "16:51:20"} +{"current_steps": 2874, "total_steps": 7532, "loss": 0.30804574489593506, "lr": 1.4572150796523265e-05, "epoch": 0.7632452529544549, "percentage": 38.16, "elapsed_time": "10:23:52", "remaining_time": "16:51:08"} +{"current_steps": 2875, "total_steps": 7532, "loss": 0.32462549209594727, "lr": 1.4568245401673178e-05, "epoch": 0.7635108219359978, "percentage": 38.17, "elapsed_time": "10:24:05", "remaining_time": "16:50:55"} +{"current_steps": 2876, "total_steps": 7532, "loss": 0.27751386165618896, "lr": 1.4564339126119254e-05, "epoch": 0.7637763909175408, "percentage": 38.18, "elapsed_time": "10:24:19", "remaining_time": "16:50:43"} +{"current_steps": 2877, "total_steps": 7532, "loss": 0.27194011211395264, "lr": 1.4560431970614578e-05, "epoch": 0.7640419598990837, "percentage": 38.2, "elapsed_time": "10:24:31", "remaining_time": "16:50:29"} +{"current_steps": 2878, "total_steps": 7532, "loss": 0.28701072931289673, "lr": 1.4556523935912406e-05, "epoch": 0.7643075288806267, "percentage": 38.21, "elapsed_time": "10:24:45", "remaining_time": "16:50:17"} +{"current_steps": 2879, "total_steps": 7532, "loss": 0.3278783857822418, "lr": 1.4552615022766156e-05, "epoch": 0.7645730978621696, "percentage": 38.22, "elapsed_time": "10:24:58", "remaining_time": "16:50:04"} +{"current_steps": 2880, "total_steps": 7532, "loss": 0.3292006254196167, "lr": 1.4548705231929426e-05, "epoch": 0.7648386668437126, "percentage": 38.24, "elapsed_time": "10:25:11", "remaining_time": "16:49:51"} +{"current_steps": 2881, "total_steps": 7532, "loss": 0.33038759231567383, "lr": 1.4544794564155971e-05, "epoch": 0.7651042358252557, "percentage": 38.25, "elapsed_time": "10:25:24", "remaining_time": "16:49:38"} +{"current_steps": 2882, "total_steps": 7532, "loss": 0.29183000326156616, "lr": 1.4540883020199725e-05, "epoch": 0.7653698048067986, "percentage": 38.26, "elapsed_time": "10:25:37", "remaining_time": "16:49:25"} +{"current_steps": 2883, "total_steps": 7532, "loss": 0.28066399693489075, "lr": 1.4536970600814789e-05, "epoch": 0.7656353737883416, "percentage": 38.28, "elapsed_time": "10:25:50", "remaining_time": "16:49:12"} +{"current_steps": 2884, "total_steps": 7532, "loss": 0.2832046151161194, "lr": 1.4533057306755427e-05, "epoch": 0.7659009427698845, "percentage": 38.29, "elapsed_time": "10:26:03", "remaining_time": "16:48:59"} +{"current_steps": 2885, "total_steps": 7532, "loss": 0.3006540834903717, "lr": 1.4529143138776078e-05, "epoch": 0.7661665117514275, "percentage": 38.3, "elapsed_time": "10:26:17", "remaining_time": "16:48:47"} +{"current_steps": 2886, "total_steps": 7532, "loss": 0.2793240547180176, "lr": 1.4525228097631351e-05, "epoch": 0.7664320807329704, "percentage": 38.32, "elapsed_time": "10:26:30", "remaining_time": "16:48:34"} +{"current_steps": 2887, "total_steps": 7532, "loss": 0.2895192503929138, "lr": 1.452131218407602e-05, "epoch": 0.7666976497145134, "percentage": 38.33, "elapsed_time": "10:26:43", "remaining_time": "16:48:21"} +{"current_steps": 2888, "total_steps": 7532, "loss": 0.27707618474960327, "lr": 1.4517395398865022e-05, "epoch": 0.7669632186960563, "percentage": 38.34, "elapsed_time": "10:26:56", "remaining_time": "16:48:08"} +{"current_steps": 2889, "total_steps": 7532, "loss": 0.29167065024375916, "lr": 1.4513477742753465e-05, "epoch": 0.7672287876775993, "percentage": 38.36, "elapsed_time": "10:27:09", "remaining_time": "16:47:56"} +{"current_steps": 2890, "total_steps": 7532, "loss": 0.2670987844467163, "lr": 1.4509559216496631e-05, "epoch": 0.7674943566591422, "percentage": 38.37, "elapsed_time": "10:27:23", "remaining_time": "16:47:43"} +{"current_steps": 2891, "total_steps": 7532, "loss": 0.3025206923484802, "lr": 1.4505639820849968e-05, "epoch": 0.7677599256406852, "percentage": 38.38, "elapsed_time": "10:27:36", "remaining_time": "16:47:30"} +{"current_steps": 2892, "total_steps": 7532, "loss": 0.3104705512523651, "lr": 1.4501719556569087e-05, "epoch": 0.7680254946222281, "percentage": 38.4, "elapsed_time": "10:27:49", "remaining_time": "16:47:17"} +{"current_steps": 2893, "total_steps": 7532, "loss": 0.2972267270088196, "lr": 1.4497798424409766e-05, "epoch": 0.7682910636037711, "percentage": 38.41, "elapsed_time": "10:28:02", "remaining_time": "16:47:05"} +{"current_steps": 2894, "total_steps": 7532, "loss": 0.34956347942352295, "lr": 1.4493876425127957e-05, "epoch": 0.768556632585314, "percentage": 38.42, "elapsed_time": "10:28:15", "remaining_time": "16:46:52"} +{"current_steps": 2895, "total_steps": 7532, "loss": 0.3122873902320862, "lr": 1.4489953559479775e-05, "epoch": 0.768822201566857, "percentage": 38.44, "elapsed_time": "10:28:29", "remaining_time": "16:46:39"} +{"current_steps": 2896, "total_steps": 7532, "loss": 0.29645755887031555, "lr": 1.4486029828221497e-05, "epoch": 0.7690877705483999, "percentage": 38.45, "elapsed_time": "10:28:41", "remaining_time": "16:46:26"} +{"current_steps": 2897, "total_steps": 7532, "loss": 0.33357223868370056, "lr": 1.448210523210958e-05, "epoch": 0.7693533395299429, "percentage": 38.46, "elapsed_time": "10:28:54", "remaining_time": "16:46:13"} +{"current_steps": 2898, "total_steps": 7532, "loss": 0.2780191898345947, "lr": 1.4478179771900634e-05, "epoch": 0.7696189085114858, "percentage": 38.48, "elapsed_time": "10:29:08", "remaining_time": "16:46:00"} +{"current_steps": 2899, "total_steps": 7532, "loss": 0.31503236293792725, "lr": 1.447425344835144e-05, "epoch": 0.7698844774930288, "percentage": 38.49, "elapsed_time": "10:29:21", "remaining_time": "16:45:48"} +{"current_steps": 2900, "total_steps": 7532, "loss": 0.2843332290649414, "lr": 1.4470326262218955e-05, "epoch": 0.7701500464745717, "percentage": 38.5, "elapsed_time": "10:29:34", "remaining_time": "16:45:35"} +{"current_steps": 2901, "total_steps": 7532, "loss": 0.305475652217865, "lr": 1.4466398214260286e-05, "epoch": 0.7704156154561147, "percentage": 38.52, "elapsed_time": "10:29:53", "remaining_time": "16:45:30"} +{"current_steps": 2902, "total_steps": 7532, "loss": 0.28418007493019104, "lr": 1.446246930523272e-05, "epoch": 0.7706811844376577, "percentage": 38.53, "elapsed_time": "10:30:06", "remaining_time": "16:45:18"} +{"current_steps": 2903, "total_steps": 7532, "loss": 0.28237032890319824, "lr": 1.44585395358937e-05, "epoch": 0.7709467534192006, "percentage": 38.54, "elapsed_time": "10:30:19", "remaining_time": "16:45:04"} +{"current_steps": 2904, "total_steps": 7532, "loss": 0.33727777004241943, "lr": 1.4454608907000843e-05, "epoch": 0.7712123224007436, "percentage": 38.56, "elapsed_time": "10:30:32", "remaining_time": "16:44:52"} +{"current_steps": 2905, "total_steps": 7532, "loss": 0.2977198660373688, "lr": 1.4450677419311925e-05, "epoch": 0.7714778913822865, "percentage": 38.57, "elapsed_time": "10:30:45", "remaining_time": "16:44:39"} +{"current_steps": 2906, "total_steps": 7532, "loss": 0.3095981776714325, "lr": 1.4446745073584891e-05, "epoch": 0.7717434603638295, "percentage": 38.58, "elapsed_time": "10:30:58", "remaining_time": "16:44:26"} +{"current_steps": 2907, "total_steps": 7532, "loss": 0.29808440804481506, "lr": 1.4442811870577851e-05, "epoch": 0.7720090293453724, "percentage": 38.6, "elapsed_time": "10:31:11", "remaining_time": "16:44:13"} +{"current_steps": 2908, "total_steps": 7532, "loss": 0.32444530725479126, "lr": 1.4438877811049079e-05, "epoch": 0.7722745983269154, "percentage": 38.61, "elapsed_time": "10:31:24", "remaining_time": "16:44:00"} +{"current_steps": 2909, "total_steps": 7532, "loss": 0.24782602488994598, "lr": 1.443494289575702e-05, "epoch": 0.7725401673084584, "percentage": 38.62, "elapsed_time": "10:31:37", "remaining_time": "16:43:47"} +{"current_steps": 2910, "total_steps": 7532, "loss": 0.31289762258529663, "lr": 1.4431007125460274e-05, "epoch": 0.7728057362900014, "percentage": 38.64, "elapsed_time": "10:31:50", "remaining_time": "16:43:33"} +{"current_steps": 2911, "total_steps": 7532, "loss": 0.31444042921066284, "lr": 1.4427070500917615e-05, "epoch": 0.7730713052715443, "percentage": 38.65, "elapsed_time": "10:32:03", "remaining_time": "16:43:20"} +{"current_steps": 2912, "total_steps": 7532, "loss": 0.31347882747650146, "lr": 1.4423133022887973e-05, "epoch": 0.7733368742530873, "percentage": 38.66, "elapsed_time": "10:32:16", "remaining_time": "16:43:07"} +{"current_steps": 2913, "total_steps": 7532, "loss": 0.3025411367416382, "lr": 1.4419194692130453e-05, "epoch": 0.7736024432346302, "percentage": 38.67, "elapsed_time": "10:32:29", "remaining_time": "16:42:54"} +{"current_steps": 2914, "total_steps": 7532, "loss": 0.2954581081867218, "lr": 1.4415255509404316e-05, "epoch": 0.7738680122161732, "percentage": 38.69, "elapsed_time": "10:32:42", "remaining_time": "16:42:41"} +{"current_steps": 2915, "total_steps": 7532, "loss": 0.2675531506538391, "lr": 1.4411315475468988e-05, "epoch": 0.7741335811977161, "percentage": 38.7, "elapsed_time": "10:32:56", "remaining_time": "16:42:29"} +{"current_steps": 2916, "total_steps": 7532, "loss": 0.29307854175567627, "lr": 1.4407374591084064e-05, "epoch": 0.7743991501792591, "percentage": 38.71, "elapsed_time": "10:33:09", "remaining_time": "16:42:16"} +{"current_steps": 2917, "total_steps": 7532, "loss": 0.2805953025817871, "lr": 1.4403432857009295e-05, "epoch": 0.774664719160802, "percentage": 38.73, "elapsed_time": "10:33:22", "remaining_time": "16:42:03"} +{"current_steps": 2918, "total_steps": 7532, "loss": 0.30805838108062744, "lr": 1.439949027400461e-05, "epoch": 0.774930288142345, "percentage": 38.74, "elapsed_time": "10:33:35", "remaining_time": "16:41:50"} +{"current_steps": 2919, "total_steps": 7532, "loss": 0.31501835584640503, "lr": 1.4395546842830085e-05, "epoch": 0.7751958571238879, "percentage": 38.75, "elapsed_time": "10:33:48", "remaining_time": "16:41:38"} +{"current_steps": 2920, "total_steps": 7532, "loss": 0.2719186246395111, "lr": 1.4391602564245975e-05, "epoch": 0.7754614261054309, "percentage": 38.77, "elapsed_time": "10:34:01", "remaining_time": "16:41:25"} +{"current_steps": 2921, "total_steps": 7532, "loss": 0.29554325342178345, "lr": 1.4387657439012677e-05, "epoch": 0.7757269950869738, "percentage": 38.78, "elapsed_time": "10:34:15", "remaining_time": "16:41:13"} +{"current_steps": 2922, "total_steps": 7532, "loss": 0.2993816137313843, "lr": 1.4383711467890776e-05, "epoch": 0.7759925640685168, "percentage": 38.79, "elapsed_time": "10:34:28", "remaining_time": "16:41:00"} +{"current_steps": 2923, "total_steps": 7532, "loss": 0.3412264883518219, "lr": 1.4379764651641004e-05, "epoch": 0.7762581330500598, "percentage": 38.81, "elapsed_time": "10:34:42", "remaining_time": "16:40:48"} +{"current_steps": 2924, "total_steps": 7532, "loss": 0.3137913942337036, "lr": 1.4375816991024263e-05, "epoch": 0.7765237020316027, "percentage": 38.82, "elapsed_time": "10:34:55", "remaining_time": "16:40:35"} +{"current_steps": 2925, "total_steps": 7532, "loss": 0.2710151672363281, "lr": 1.4371868486801611e-05, "epoch": 0.7767892710131457, "percentage": 38.83, "elapsed_time": "10:35:08", "remaining_time": "16:40:23"} +{"current_steps": 2926, "total_steps": 7532, "loss": 0.28521692752838135, "lr": 1.4367919139734279e-05, "epoch": 0.7770548399946886, "percentage": 38.85, "elapsed_time": "10:35:22", "remaining_time": "16:40:10"} +{"current_steps": 2927, "total_steps": 7532, "loss": 0.2889919579029083, "lr": 1.4363968950583651e-05, "epoch": 0.7773204089762316, "percentage": 38.86, "elapsed_time": "10:35:34", "remaining_time": "16:39:56"} +{"current_steps": 2928, "total_steps": 7532, "loss": 0.31562381982803345, "lr": 1.436001792011128e-05, "epoch": 0.7775859779577745, "percentage": 38.87, "elapsed_time": "10:35:48", "remaining_time": "16:39:44"} +{"current_steps": 2929, "total_steps": 7532, "loss": 0.2747528553009033, "lr": 1.4356066049078871e-05, "epoch": 0.7778515469393175, "percentage": 38.89, "elapsed_time": "10:36:01", "remaining_time": "16:39:31"} +{"current_steps": 2930, "total_steps": 7532, "loss": 0.2918938398361206, "lr": 1.4352113338248303e-05, "epoch": 0.7781171159208604, "percentage": 38.9, "elapsed_time": "10:36:14", "remaining_time": "16:39:19"} +{"current_steps": 2931, "total_steps": 7532, "loss": 0.3348507285118103, "lr": 1.4348159788381615e-05, "epoch": 0.7783826849024034, "percentage": 38.91, "elapsed_time": "10:36:27", "remaining_time": "16:39:05"} +{"current_steps": 2932, "total_steps": 7532, "loss": 0.27206242084503174, "lr": 1.4344205400241e-05, "epoch": 0.7786482538839463, "percentage": 38.93, "elapsed_time": "10:36:40", "remaining_time": "16:38:53"} +{"current_steps": 2933, "total_steps": 7532, "loss": 0.28496092557907104, "lr": 1.434025017458882e-05, "epoch": 0.7789138228654893, "percentage": 38.94, "elapsed_time": "10:36:53", "remaining_time": "16:38:40"} +{"current_steps": 2934, "total_steps": 7532, "loss": 0.3080131411552429, "lr": 1.4336294112187595e-05, "epoch": 0.7791793918470322, "percentage": 38.95, "elapsed_time": "10:37:07", "remaining_time": "16:38:27"} +{"current_steps": 2935, "total_steps": 7532, "loss": 0.3116779029369354, "lr": 1.4332337213800008e-05, "epoch": 0.7794449608285752, "percentage": 38.97, "elapsed_time": "10:37:20", "remaining_time": "16:38:14"} +{"current_steps": 2936, "total_steps": 7532, "loss": 0.26526543498039246, "lr": 1.43283794801889e-05, "epoch": 0.7797105298101181, "percentage": 38.98, "elapsed_time": "10:37:33", "remaining_time": "16:38:01"} +{"current_steps": 2937, "total_steps": 7532, "loss": 0.2829325497150421, "lr": 1.4324420912117274e-05, "epoch": 0.7799760987916612, "percentage": 38.99, "elapsed_time": "10:37:46", "remaining_time": "16:37:48"} +{"current_steps": 2938, "total_steps": 7532, "loss": 0.34146445989608765, "lr": 1.43204615103483e-05, "epoch": 0.7802416677732041, "percentage": 39.01, "elapsed_time": "10:37:59", "remaining_time": "16:37:36"} +{"current_steps": 2939, "total_steps": 7532, "loss": 0.316609650850296, "lr": 1.43165012756453e-05, "epoch": 0.7805072367547471, "percentage": 39.02, "elapsed_time": "10:38:13", "remaining_time": "16:37:23"} +{"current_steps": 2940, "total_steps": 7532, "loss": 0.3215107321739197, "lr": 1.4312540208771766e-05, "epoch": 0.78077280573629, "percentage": 39.03, "elapsed_time": "10:38:26", "remaining_time": "16:37:10"} +{"current_steps": 2941, "total_steps": 7532, "loss": 0.2834000587463379, "lr": 1.4308578310491342e-05, "epoch": 0.781038374717833, "percentage": 39.05, "elapsed_time": "10:38:40", "remaining_time": "16:36:58"} +{"current_steps": 2942, "total_steps": 7532, "loss": 0.30184993147850037, "lr": 1.430461558156783e-05, "epoch": 0.781303943699376, "percentage": 39.06, "elapsed_time": "10:38:52", "remaining_time": "16:36:45"} +{"current_steps": 2943, "total_steps": 7532, "loss": 0.3299996256828308, "lr": 1.4300652022765207e-05, "epoch": 0.7815695126809189, "percentage": 39.07, "elapsed_time": "10:39:06", "remaining_time": "16:36:33"} +{"current_steps": 2944, "total_steps": 7532, "loss": 0.27565228939056396, "lr": 1.4296687634847592e-05, "epoch": 0.7818350816624619, "percentage": 39.09, "elapsed_time": "10:39:19", "remaining_time": "16:36:19"} +{"current_steps": 2945, "total_steps": 7532, "loss": 0.30347493290901184, "lr": 1.4292722418579278e-05, "epoch": 0.7821006506440048, "percentage": 39.1, "elapsed_time": "10:39:32", "remaining_time": "16:36:07"} +{"current_steps": 2946, "total_steps": 7532, "loss": 0.31469428539276123, "lr": 1.4288756374724709e-05, "epoch": 0.7823662196255478, "percentage": 39.11, "elapsed_time": "10:39:46", "remaining_time": "16:35:54"} +{"current_steps": 2947, "total_steps": 7532, "loss": 0.27361029386520386, "lr": 1.4284789504048493e-05, "epoch": 0.7826317886070907, "percentage": 39.13, "elapsed_time": "10:39:59", "remaining_time": "16:35:42"} +{"current_steps": 2948, "total_steps": 7532, "loss": 0.29180705547332764, "lr": 1.428082180731539e-05, "epoch": 0.7828973575886337, "percentage": 39.14, "elapsed_time": "10:40:12", "remaining_time": "16:35:29"} +{"current_steps": 2949, "total_steps": 7532, "loss": 0.281120628118515, "lr": 1.4276853285290334e-05, "epoch": 0.7831629265701766, "percentage": 39.15, "elapsed_time": "10:40:26", "remaining_time": "16:35:17"} +{"current_steps": 2950, "total_steps": 7532, "loss": 0.26144471764564514, "lr": 1.4272883938738406e-05, "epoch": 0.7834284955517196, "percentage": 39.17, "elapsed_time": "10:40:38", "remaining_time": "16:35:03"} +{"current_steps": 2951, "total_steps": 7532, "loss": 0.3118991255760193, "lr": 1.4268913768424848e-05, "epoch": 0.7836940645332625, "percentage": 39.18, "elapsed_time": "10:40:52", "remaining_time": "16:34:51"} +{"current_steps": 2952, "total_steps": 7532, "loss": 0.29352328181266785, "lr": 1.4264942775115065e-05, "epoch": 0.7839596335148055, "percentage": 39.19, "elapsed_time": "10:41:05", "remaining_time": "16:34:38"} +{"current_steps": 2953, "total_steps": 7532, "loss": 0.2687748968601227, "lr": 1.426097095957461e-05, "epoch": 0.7842252024963484, "percentage": 39.21, "elapsed_time": "10:41:18", "remaining_time": "16:34:26"} +{"current_steps": 2954, "total_steps": 7532, "loss": 0.3106890916824341, "lr": 1.4256998322569212e-05, "epoch": 0.7844907714778914, "percentage": 39.22, "elapsed_time": "10:41:31", "remaining_time": "16:34:13"} +{"current_steps": 2955, "total_steps": 7532, "loss": 0.2522161304950714, "lr": 1.4253024864864742e-05, "epoch": 0.7847563404594343, "percentage": 39.23, "elapsed_time": "10:41:45", "remaining_time": "16:34:00"} +{"current_steps": 2956, "total_steps": 7532, "loss": 0.2994377613067627, "lr": 1.424905058722724e-05, "epoch": 0.7850219094409773, "percentage": 39.25, "elapsed_time": "10:41:58", "remaining_time": "16:33:47"} +{"current_steps": 2957, "total_steps": 7532, "loss": 0.3753565549850464, "lr": 1.4245075490422893e-05, "epoch": 0.7852874784225202, "percentage": 39.26, "elapsed_time": "10:42:11", "remaining_time": "16:33:35"} +{"current_steps": 2958, "total_steps": 7532, "loss": 0.29544737935066223, "lr": 1.424109957521806e-05, "epoch": 0.7855530474040632, "percentage": 39.27, "elapsed_time": "10:42:24", "remaining_time": "16:33:22"} +{"current_steps": 2959, "total_steps": 7532, "loss": 0.307847797870636, "lr": 1.423712284237925e-05, "epoch": 0.7858186163856061, "percentage": 39.29, "elapsed_time": "10:42:37", "remaining_time": "16:33:08"} +{"current_steps": 2960, "total_steps": 7532, "loss": 0.31758183240890503, "lr": 1.4233145292673127e-05, "epoch": 0.7860841853671491, "percentage": 39.3, "elapsed_time": "10:42:50", "remaining_time": "16:32:56"} +{"current_steps": 2961, "total_steps": 7532, "loss": 0.307254433631897, "lr": 1.4229166926866517e-05, "epoch": 0.786349754348692, "percentage": 39.31, "elapsed_time": "10:43:03", "remaining_time": "16:32:42"} +{"current_steps": 2962, "total_steps": 7532, "loss": 0.3513748049736023, "lr": 1.42251877457264e-05, "epoch": 0.786615323330235, "percentage": 39.33, "elapsed_time": "10:43:16", "remaining_time": "16:32:29"} +{"current_steps": 2963, "total_steps": 7532, "loss": 0.3025718629360199, "lr": 1.422120775001992e-05, "epoch": 0.7868808923117779, "percentage": 39.34, "elapsed_time": "10:43:29", "remaining_time": "16:32:16"} +{"current_steps": 2964, "total_steps": 7532, "loss": 0.2922811508178711, "lr": 1.4217226940514367e-05, "epoch": 0.7871464612933209, "percentage": 39.35, "elapsed_time": "10:43:42", "remaining_time": "16:32:04"} +{"current_steps": 2965, "total_steps": 7532, "loss": 0.29599297046661377, "lr": 1.42132453179772e-05, "epoch": 0.787412030274864, "percentage": 39.37, "elapsed_time": "10:43:55", "remaining_time": "16:31:50"} +{"current_steps": 2966, "total_steps": 7532, "loss": 0.28336548805236816, "lr": 1.4209262883176025e-05, "epoch": 0.7876775992564069, "percentage": 39.38, "elapsed_time": "10:44:09", "remaining_time": "16:31:38"} +{"current_steps": 2967, "total_steps": 7532, "loss": 0.3100801110267639, "lr": 1.4205279636878613e-05, "epoch": 0.7879431682379499, "percentage": 39.39, "elapsed_time": "10:44:22", "remaining_time": "16:31:25"} +{"current_steps": 2968, "total_steps": 7532, "loss": 0.33067989349365234, "lr": 1.4201295579852881e-05, "epoch": 0.7882087372194928, "percentage": 39.41, "elapsed_time": "10:44:35", "remaining_time": "16:31:12"} +{"current_steps": 2969, "total_steps": 7532, "loss": 0.282347172498703, "lr": 1.4197310712866909e-05, "epoch": 0.7884743062010358, "percentage": 39.42, "elapsed_time": "10:44:48", "remaining_time": "16:30:58"} +{"current_steps": 2970, "total_steps": 7532, "loss": 0.30585426092147827, "lr": 1.419332503668894e-05, "epoch": 0.7887398751825787, "percentage": 39.43, "elapsed_time": "10:45:00", "remaining_time": "16:30:45"} +{"current_steps": 2971, "total_steps": 7532, "loss": 0.3011561632156372, "lr": 1.4189338552087351e-05, "epoch": 0.7890054441641217, "percentage": 39.45, "elapsed_time": "10:45:14", "remaining_time": "16:30:32"} +{"current_steps": 2972, "total_steps": 7532, "loss": 0.2700524926185608, "lr": 1.4185351259830705e-05, "epoch": 0.7892710131456646, "percentage": 39.46, "elapsed_time": "10:45:26", "remaining_time": "16:30:19"} +{"current_steps": 2973, "total_steps": 7532, "loss": 0.2963382303714752, "lr": 1.4181363160687693e-05, "epoch": 0.7895365821272076, "percentage": 39.47, "elapsed_time": "10:45:40", "remaining_time": "16:30:06"} +{"current_steps": 2974, "total_steps": 7532, "loss": 0.27132824063301086, "lr": 1.4177374255427183e-05, "epoch": 0.7898021511087505, "percentage": 39.48, "elapsed_time": "10:45:52", "remaining_time": "16:29:52"} +{"current_steps": 2975, "total_steps": 7532, "loss": 0.2539706826210022, "lr": 1.417338454481818e-05, "epoch": 0.7900677200902935, "percentage": 39.5, "elapsed_time": "10:46:05", "remaining_time": "16:29:40"} +{"current_steps": 2976, "total_steps": 7532, "loss": 0.28465601801872253, "lr": 1.416939402962986e-05, "epoch": 0.7903332890718364, "percentage": 39.51, "elapsed_time": "10:46:18", "remaining_time": "16:29:26"} +{"current_steps": 2977, "total_steps": 7532, "loss": 0.3020748198032379, "lr": 1.4165402710631544e-05, "epoch": 0.7905988580533794, "percentage": 39.52, "elapsed_time": "10:46:31", "remaining_time": "16:29:13"} +{"current_steps": 2978, "total_steps": 7532, "loss": 0.3157690465450287, "lr": 1.416141058859271e-05, "epoch": 0.7908644270349223, "percentage": 39.54, "elapsed_time": "10:46:44", "remaining_time": "16:29:00"} +{"current_steps": 2979, "total_steps": 7532, "loss": 0.2720191776752472, "lr": 1.4157417664282994e-05, "epoch": 0.7911299960164653, "percentage": 39.55, "elapsed_time": "10:46:58", "remaining_time": "16:28:48"} +{"current_steps": 2980, "total_steps": 7532, "loss": 0.2931746542453766, "lr": 1.4153423938472185e-05, "epoch": 0.7913955649980082, "percentage": 39.56, "elapsed_time": "10:47:11", "remaining_time": "16:28:36"} +{"current_steps": 2981, "total_steps": 7532, "loss": 0.2683875560760498, "lr": 1.4149429411930226e-05, "epoch": 0.7916611339795512, "percentage": 39.58, "elapsed_time": "10:47:24", "remaining_time": "16:28:23"} +{"current_steps": 2982, "total_steps": 7532, "loss": 0.2559819519519806, "lr": 1.4145434085427216e-05, "epoch": 0.7919267029610941, "percentage": 39.59, "elapsed_time": "10:47:38", "remaining_time": "16:28:10"} +{"current_steps": 2983, "total_steps": 7532, "loss": 0.2845582365989685, "lr": 1.4141437959733404e-05, "epoch": 0.7921922719426371, "percentage": 39.6, "elapsed_time": "10:47:50", "remaining_time": "16:27:57"} +{"current_steps": 2984, "total_steps": 7532, "loss": 0.26766544580459595, "lr": 1.4137441035619197e-05, "epoch": 0.79245784092418, "percentage": 39.62, "elapsed_time": "10:48:04", "remaining_time": "16:27:44"} +{"current_steps": 2985, "total_steps": 7532, "loss": 0.32089024782180786, "lr": 1.4133443313855155e-05, "epoch": 0.792723409905723, "percentage": 39.63, "elapsed_time": "10:48:16", "remaining_time": "16:27:30"} +{"current_steps": 2986, "total_steps": 7532, "loss": 0.2756182551383972, "lr": 1.4129444795211993e-05, "epoch": 0.7929889788872659, "percentage": 39.64, "elapsed_time": "10:48:30", "remaining_time": "16:27:18"} +{"current_steps": 2987, "total_steps": 7532, "loss": 0.29487302899360657, "lr": 1.4125445480460573e-05, "epoch": 0.7932545478688089, "percentage": 39.66, "elapsed_time": "10:48:43", "remaining_time": "16:27:05"} +{"current_steps": 2988, "total_steps": 7532, "loss": 0.3362561762332916, "lr": 1.4121445370371922e-05, "epoch": 0.7935201168503518, "percentage": 39.67, "elapsed_time": "10:48:56", "remaining_time": "16:26:53"} +{"current_steps": 2989, "total_steps": 7532, "loss": 0.2986692488193512, "lr": 1.4117444465717209e-05, "epoch": 0.7937856858318948, "percentage": 39.68, "elapsed_time": "10:49:09", "remaining_time": "16:26:39"} +{"current_steps": 2990, "total_steps": 7532, "loss": 0.2725266218185425, "lr": 1.4113442767267766e-05, "epoch": 0.7940512548134377, "percentage": 39.7, "elapsed_time": "10:49:23", "remaining_time": "16:26:27"} +{"current_steps": 2991, "total_steps": 7532, "loss": 0.29827257990837097, "lr": 1.4109440275795071e-05, "epoch": 0.7943168237949807, "percentage": 39.71, "elapsed_time": "10:49:36", "remaining_time": "16:26:14"} +{"current_steps": 2992, "total_steps": 7532, "loss": 0.2506203055381775, "lr": 1.410543699207076e-05, "epoch": 0.7945823927765236, "percentage": 39.72, "elapsed_time": "10:49:49", "remaining_time": "16:26:01"} +{"current_steps": 2993, "total_steps": 7532, "loss": 0.2675034701824188, "lr": 1.410143291686661e-05, "epoch": 0.7948479617580667, "percentage": 39.74, "elapsed_time": "10:50:02", "remaining_time": "16:25:49"} +{"current_steps": 2994, "total_steps": 7532, "loss": 0.34528690576553345, "lr": 1.4097428050954571e-05, "epoch": 0.7951135307396097, "percentage": 39.75, "elapsed_time": "10:50:16", "remaining_time": "16:25:36"} +{"current_steps": 2995, "total_steps": 7532, "loss": 0.27551063895225525, "lr": 1.4093422395106726e-05, "epoch": 0.7953790997211526, "percentage": 39.76, "elapsed_time": "10:50:29", "remaining_time": "16:25:23"} +{"current_steps": 2996, "total_steps": 7532, "loss": 0.3176268935203552, "lr": 1.408941595009532e-05, "epoch": 0.7956446687026956, "percentage": 39.78, "elapsed_time": "10:50:42", "remaining_time": "16:25:11"} +{"current_steps": 2997, "total_steps": 7532, "loss": 0.30056723952293396, "lr": 1.408540871669275e-05, "epoch": 0.7959102376842385, "percentage": 39.79, "elapsed_time": "10:50:56", "remaining_time": "16:24:59"} +{"current_steps": 2998, "total_steps": 7532, "loss": 0.32109886407852173, "lr": 1.4081400695671562e-05, "epoch": 0.7961758066657815, "percentage": 39.8, "elapsed_time": "10:51:09", "remaining_time": "16:24:45"} +{"current_steps": 2999, "total_steps": 7532, "loss": 0.33622005581855774, "lr": 1.4077391887804457e-05, "epoch": 0.7964413756473244, "percentage": 39.82, "elapsed_time": "10:51:22", "remaining_time": "16:24:33"} +{"current_steps": 3000, "total_steps": 7532, "loss": 0.3054961860179901, "lr": 1.4073382293864283e-05, "epoch": 0.7967069446288674, "percentage": 39.83, "elapsed_time": "10:51:35", "remaining_time": "16:24:19"} +{"current_steps": 3001, "total_steps": 7532, "loss": 0.3022462725639343, "lr": 1.4069371914624044e-05, "epoch": 0.7969725136104103, "percentage": 39.84, "elapsed_time": "10:51:53", "remaining_time": "16:24:14"} +{"current_steps": 3002, "total_steps": 7532, "loss": 0.2500512897968292, "lr": 1.4065360750856891e-05, "epoch": 0.7972380825919533, "percentage": 39.86, "elapsed_time": "10:52:06", "remaining_time": "16:24:01"} +{"current_steps": 3003, "total_steps": 7532, "loss": 0.2960171699523926, "lr": 1.4061348803336135e-05, "epoch": 0.7975036515734962, "percentage": 39.87, "elapsed_time": "10:52:19", "remaining_time": "16:23:48"} +{"current_steps": 3004, "total_steps": 7532, "loss": 0.2941724359989166, "lr": 1.4057336072835228e-05, "epoch": 0.7977692205550392, "percentage": 39.88, "elapsed_time": "10:52:32", "remaining_time": "16:23:35"} +{"current_steps": 3005, "total_steps": 7532, "loss": 0.2827858328819275, "lr": 1.4053322560127779e-05, "epoch": 0.7980347895365821, "percentage": 39.9, "elapsed_time": "10:52:46", "remaining_time": "16:23:23"} +{"current_steps": 3006, "total_steps": 7532, "loss": 0.32525116205215454, "lr": 1.4049308265987544e-05, "epoch": 0.7983003585181251, "percentage": 39.91, "elapsed_time": "10:52:58", "remaining_time": "16:23:09"} +{"current_steps": 3007, "total_steps": 7532, "loss": 0.26509979367256165, "lr": 1.4045293191188431e-05, "epoch": 0.798565927499668, "percentage": 39.92, "elapsed_time": "10:53:11", "remaining_time": "16:22:56"} +{"current_steps": 3008, "total_steps": 7532, "loss": 0.3462742567062378, "lr": 1.4041277336504503e-05, "epoch": 0.798831496481211, "percentage": 39.94, "elapsed_time": "10:53:24", "remaining_time": "16:22:42"} +{"current_steps": 3009, "total_steps": 7532, "loss": 0.2971092164516449, "lr": 1.4037260702709967e-05, "epoch": 0.7990970654627539, "percentage": 39.95, "elapsed_time": "10:53:37", "remaining_time": "16:22:29"} +{"current_steps": 3010, "total_steps": 7532, "loss": 0.32359808683395386, "lr": 1.4033243290579182e-05, "epoch": 0.7993626344442969, "percentage": 39.96, "elapsed_time": "10:53:49", "remaining_time": "16:22:16"} +{"current_steps": 3011, "total_steps": 7532, "loss": 0.2949031591415405, "lr": 1.4029225100886657e-05, "epoch": 0.7996282034258398, "percentage": 39.98, "elapsed_time": "10:54:02", "remaining_time": "16:22:02"} +{"current_steps": 3012, "total_steps": 7532, "loss": 0.29888901114463806, "lr": 1.4025206134407051e-05, "epoch": 0.7998937724073828, "percentage": 39.99, "elapsed_time": "10:54:15", "remaining_time": "16:21:49"} +{"current_steps": 3013, "total_steps": 7532, "loss": 0.2999705672264099, "lr": 1.4021186391915181e-05, "epoch": 0.8001593413889257, "percentage": 40.0, "elapsed_time": "10:54:28", "remaining_time": "16:21:36"} +{"current_steps": 3014, "total_steps": 7532, "loss": 0.2725638449192047, "lr": 1.4017165874185996e-05, "epoch": 0.8004249103704687, "percentage": 40.02, "elapsed_time": "10:54:41", "remaining_time": "16:21:23"} +{"current_steps": 3015, "total_steps": 7532, "loss": 0.2809314727783203, "lr": 1.4013144581994609e-05, "epoch": 0.8006904793520117, "percentage": 40.03, "elapsed_time": "10:54:54", "remaining_time": "16:21:09"} +{"current_steps": 3016, "total_steps": 7532, "loss": 0.30335327982902527, "lr": 1.400912251611628e-05, "epoch": 0.8009560483335546, "percentage": 40.04, "elapsed_time": "10:55:07", "remaining_time": "16:20:56"} +{"current_steps": 3017, "total_steps": 7532, "loss": 0.27780598402023315, "lr": 1.400509967732641e-05, "epoch": 0.8012216173150976, "percentage": 40.06, "elapsed_time": "10:55:19", "remaining_time": "16:20:42"} +{"current_steps": 3018, "total_steps": 7532, "loss": 0.2865309715270996, "lr": 1.400107606640056e-05, "epoch": 0.8014871862966405, "percentage": 40.07, "elapsed_time": "10:55:32", "remaining_time": "16:20:29"} +{"current_steps": 3019, "total_steps": 7532, "loss": 0.2691546082496643, "lr": 1.3997051684114431e-05, "epoch": 0.8017527552781835, "percentage": 40.08, "elapsed_time": "10:55:45", "remaining_time": "16:20:15"} +{"current_steps": 3020, "total_steps": 7532, "loss": 0.30289226770401, "lr": 1.3993026531243876e-05, "epoch": 0.8020183242597264, "percentage": 40.1, "elapsed_time": "10:55:58", "remaining_time": "16:20:02"} +{"current_steps": 3021, "total_steps": 7532, "loss": 0.2767682671546936, "lr": 1.3989000608564905e-05, "epoch": 0.8022838932412695, "percentage": 40.11, "elapsed_time": "10:56:10", "remaining_time": "16:19:48"} +{"current_steps": 3022, "total_steps": 7532, "loss": 0.3423742353916168, "lr": 1.3984973916853657e-05, "epoch": 0.8025494622228124, "percentage": 40.12, "elapsed_time": "10:56:23", "remaining_time": "16:19:36"} +{"current_steps": 3023, "total_steps": 7532, "loss": 0.3000536561012268, "lr": 1.3980946456886439e-05, "epoch": 0.8028150312043554, "percentage": 40.14, "elapsed_time": "10:56:36", "remaining_time": "16:19:22"} +{"current_steps": 3024, "total_steps": 7532, "loss": 0.3071063756942749, "lr": 1.3976918229439698e-05, "epoch": 0.8030806001858983, "percentage": 40.15, "elapsed_time": "10:56:49", "remaining_time": "16:19:09"} +{"current_steps": 3025, "total_steps": 7532, "loss": 0.31261157989501953, "lr": 1.397288923529002e-05, "epoch": 0.8033461691674413, "percentage": 40.16, "elapsed_time": "10:57:01", "remaining_time": "16:18:55"} +{"current_steps": 3026, "total_steps": 7532, "loss": 0.2658939063549042, "lr": 1.3968859475214156e-05, "epoch": 0.8036117381489842, "percentage": 40.18, "elapsed_time": "10:57:15", "remaining_time": "16:18:42"} +{"current_steps": 3027, "total_steps": 7532, "loss": 0.2772905230522156, "lr": 1.3964828949988993e-05, "epoch": 0.8038773071305272, "percentage": 40.19, "elapsed_time": "10:57:27", "remaining_time": "16:18:28"} +{"current_steps": 3028, "total_steps": 7532, "loss": 0.2903479337692261, "lr": 1.396079766039157e-05, "epoch": 0.8041428761120701, "percentage": 40.2, "elapsed_time": "10:57:40", "remaining_time": "16:18:14"} +{"current_steps": 3029, "total_steps": 7532, "loss": 0.35709524154663086, "lr": 1.3956765607199069e-05, "epoch": 0.8044084450936131, "percentage": 40.22, "elapsed_time": "10:57:53", "remaining_time": "16:18:01"} +{"current_steps": 3030, "total_steps": 7532, "loss": 0.2929389774799347, "lr": 1.3952732791188828e-05, "epoch": 0.804674014075156, "percentage": 40.23, "elapsed_time": "10:58:05", "remaining_time": "16:17:48"} +{"current_steps": 3031, "total_steps": 7532, "loss": 0.2609884440898895, "lr": 1.3948699213138321e-05, "epoch": 0.804939583056699, "percentage": 40.24, "elapsed_time": "10:58:19", "remaining_time": "16:17:36"} +{"current_steps": 3032, "total_steps": 7532, "loss": 0.3026544749736786, "lr": 1.394466487382518e-05, "epoch": 0.805205152038242, "percentage": 40.25, "elapsed_time": "10:58:32", "remaining_time": "16:17:23"} +{"current_steps": 3033, "total_steps": 7532, "loss": 0.28281137347221375, "lr": 1.394062977402717e-05, "epoch": 0.8054707210197849, "percentage": 40.27, "elapsed_time": "10:58:45", "remaining_time": "16:17:10"} +{"current_steps": 3034, "total_steps": 7532, "loss": 0.26189178228378296, "lr": 1.3936593914522214e-05, "epoch": 0.8057362900013278, "percentage": 40.28, "elapsed_time": "10:58:58", "remaining_time": "16:16:57"} +{"current_steps": 3035, "total_steps": 7532, "loss": 0.27987509965896606, "lr": 1.3932557296088383e-05, "epoch": 0.8060018589828708, "percentage": 40.29, "elapsed_time": "10:59:11", "remaining_time": "16:16:44"} +{"current_steps": 3036, "total_steps": 7532, "loss": 0.2857724130153656, "lr": 1.3928519919503884e-05, "epoch": 0.8062674279644138, "percentage": 40.31, "elapsed_time": "10:59:24", "remaining_time": "16:16:30"} +{"current_steps": 3037, "total_steps": 7532, "loss": 0.28102418780326843, "lr": 1.3924481785547076e-05, "epoch": 0.8065329969459567, "percentage": 40.32, "elapsed_time": "10:59:37", "remaining_time": "16:16:17"} +{"current_steps": 3038, "total_steps": 7532, "loss": 0.30250412225723267, "lr": 1.3920442894996464e-05, "epoch": 0.8067985659274997, "percentage": 40.33, "elapsed_time": "10:59:50", "remaining_time": "16:16:04"} +{"current_steps": 3039, "total_steps": 7532, "loss": 0.28951483964920044, "lr": 1.3916403248630703e-05, "epoch": 0.8070641349090426, "percentage": 40.35, "elapsed_time": "11:00:03", "remaining_time": "16:15:51"} +{"current_steps": 3040, "total_steps": 7532, "loss": 0.28455328941345215, "lr": 1.3912362847228585e-05, "epoch": 0.8073297038905856, "percentage": 40.36, "elapsed_time": "11:00:15", "remaining_time": "16:15:37"} +{"current_steps": 3041, "total_steps": 7532, "loss": 0.29541105031967163, "lr": 1.3908321691569048e-05, "epoch": 0.8075952728721285, "percentage": 40.37, "elapsed_time": "11:00:27", "remaining_time": "16:15:23"} +{"current_steps": 3042, "total_steps": 7532, "loss": 0.3057629466056824, "lr": 1.3904279782431187e-05, "epoch": 0.8078608418536715, "percentage": 40.39, "elapsed_time": "11:00:40", "remaining_time": "16:15:09"} +{"current_steps": 3043, "total_steps": 7532, "loss": 0.3204082250595093, "lr": 1.3900237120594226e-05, "epoch": 0.8081264108352144, "percentage": 40.4, "elapsed_time": "11:00:53", "remaining_time": "16:14:55"} +{"current_steps": 3044, "total_steps": 7532, "loss": 0.28629523515701294, "lr": 1.3896193706837551e-05, "epoch": 0.8083919798167574, "percentage": 40.41, "elapsed_time": "11:01:06", "remaining_time": "16:14:42"} +{"current_steps": 3045, "total_steps": 7532, "loss": 0.298164427280426, "lr": 1.389214954194068e-05, "epoch": 0.8086575487983003, "percentage": 40.43, "elapsed_time": "11:01:18", "remaining_time": "16:14:28"} +{"current_steps": 3046, "total_steps": 7532, "loss": 0.27309298515319824, "lr": 1.3888104626683282e-05, "epoch": 0.8089231177798433, "percentage": 40.44, "elapsed_time": "11:01:31", "remaining_time": "16:14:15"} +{"current_steps": 3047, "total_steps": 7532, "loss": 0.25635263323783875, "lr": 1.3884058961845166e-05, "epoch": 0.8091886867613862, "percentage": 40.45, "elapsed_time": "11:01:44", "remaining_time": "16:14:02"} +{"current_steps": 3048, "total_steps": 7532, "loss": 0.29926127195358276, "lr": 1.3880012548206292e-05, "epoch": 0.8094542557429292, "percentage": 40.47, "elapsed_time": "11:01:56", "remaining_time": "16:13:48"} +{"current_steps": 3049, "total_steps": 7532, "loss": 0.26633137464523315, "lr": 1.387596538654676e-05, "epoch": 0.8097198247244722, "percentage": 40.48, "elapsed_time": "11:02:09", "remaining_time": "16:13:34"} +{"current_steps": 3050, "total_steps": 7532, "loss": 0.28725534677505493, "lr": 1.387191747764681e-05, "epoch": 0.8099853937060152, "percentage": 40.49, "elapsed_time": "11:02:22", "remaining_time": "16:13:21"} +{"current_steps": 3051, "total_steps": 7532, "loss": 0.3015314042568207, "lr": 1.3867868822286838e-05, "epoch": 0.8102509626875581, "percentage": 40.51, "elapsed_time": "11:02:34", "remaining_time": "16:13:07"} +{"current_steps": 3052, "total_steps": 7532, "loss": 0.3054691553115845, "lr": 1.3863819421247375e-05, "epoch": 0.8105165316691011, "percentage": 40.52, "elapsed_time": "11:02:47", "remaining_time": "16:12:54"} +{"current_steps": 3053, "total_steps": 7532, "loss": 0.26315444707870483, "lr": 1.3859769275309097e-05, "epoch": 0.810782100650644, "percentage": 40.53, "elapsed_time": "11:03:00", "remaining_time": "16:12:41"} +{"current_steps": 3054, "total_steps": 7532, "loss": 0.2973077595233917, "lr": 1.3855718385252824e-05, "epoch": 0.811047669632187, "percentage": 40.55, "elapsed_time": "11:03:13", "remaining_time": "16:12:27"} +{"current_steps": 3055, "total_steps": 7532, "loss": 0.32824432849884033, "lr": 1.385166675185952e-05, "epoch": 0.81131323861373, "percentage": 40.56, "elapsed_time": "11:03:25", "remaining_time": "16:12:14"} +{"current_steps": 3056, "total_steps": 7532, "loss": 0.3127811849117279, "lr": 1.3847614375910292e-05, "epoch": 0.8115788075952729, "percentage": 40.57, "elapsed_time": "11:03:39", "remaining_time": "16:12:01"} +{"current_steps": 3057, "total_steps": 7532, "loss": 0.2631932497024536, "lr": 1.384356125818639e-05, "epoch": 0.8118443765768159, "percentage": 40.59, "elapsed_time": "11:03:51", "remaining_time": "16:11:47"} +{"current_steps": 3058, "total_steps": 7532, "loss": 0.2856106162071228, "lr": 1.3839507399469213e-05, "epoch": 0.8121099455583588, "percentage": 40.6, "elapsed_time": "11:04:04", "remaining_time": "16:11:35"} +{"current_steps": 3059, "total_steps": 7532, "loss": 0.28986629843711853, "lr": 1.3835452800540288e-05, "epoch": 0.8123755145399018, "percentage": 40.61, "elapsed_time": "11:04:18", "remaining_time": "16:11:22"} +{"current_steps": 3060, "total_steps": 7532, "loss": 0.28411972522735596, "lr": 1.3831397462181298e-05, "epoch": 0.8126410835214447, "percentage": 40.63, "elapsed_time": "11:04:31", "remaining_time": "16:11:09"} +{"current_steps": 3061, "total_steps": 7532, "loss": 0.3234354853630066, "lr": 1.3827341385174063e-05, "epoch": 0.8129066525029877, "percentage": 40.64, "elapsed_time": "11:04:45", "remaining_time": "16:10:57"} +{"current_steps": 3062, "total_steps": 7532, "loss": 0.24779736995697021, "lr": 1.3823284570300551e-05, "epoch": 0.8131722214845306, "percentage": 40.65, "elapsed_time": "11:04:58", "remaining_time": "16:10:44"} +{"current_steps": 3063, "total_steps": 7532, "loss": 0.3306904137134552, "lr": 1.3819227018342865e-05, "epoch": 0.8134377904660736, "percentage": 40.67, "elapsed_time": "11:05:11", "remaining_time": "16:10:32"} +{"current_steps": 3064, "total_steps": 7532, "loss": 0.31705451011657715, "lr": 1.3815168730083254e-05, "epoch": 0.8137033594476165, "percentage": 40.68, "elapsed_time": "11:05:26", "remaining_time": "16:10:21"} +{"current_steps": 3065, "total_steps": 7532, "loss": 0.29830047488212585, "lr": 1.3811109706304105e-05, "epoch": 0.8139689284291595, "percentage": 40.69, "elapsed_time": "11:05:40", "remaining_time": "16:10:09"} +{"current_steps": 3066, "total_steps": 7532, "loss": 0.30605942010879517, "lr": 1.3807049947787954e-05, "epoch": 0.8142344974107024, "percentage": 40.71, "elapsed_time": "11:05:53", "remaining_time": "16:09:56"} +{"current_steps": 3067, "total_steps": 7532, "loss": 0.3139193058013916, "lr": 1.3802989455317475e-05, "epoch": 0.8145000663922454, "percentage": 40.72, "elapsed_time": "11:06:06", "remaining_time": "16:09:44"} +{"current_steps": 3068, "total_steps": 7532, "loss": 0.3175879716873169, "lr": 1.3798928229675478e-05, "epoch": 0.8147656353737883, "percentage": 40.73, "elapsed_time": "11:06:19", "remaining_time": "16:09:30"} +{"current_steps": 3069, "total_steps": 7532, "loss": 0.26391106843948364, "lr": 1.3794866271644922e-05, "epoch": 0.8150312043553313, "percentage": 40.75, "elapsed_time": "11:06:32", "remaining_time": "16:09:18"} +{"current_steps": 3070, "total_steps": 7532, "loss": 0.24128863215446472, "lr": 1.3790803582008906e-05, "epoch": 0.8152967733368742, "percentage": 40.76, "elapsed_time": "11:06:45", "remaining_time": "16:09:04"} +{"current_steps": 3071, "total_steps": 7532, "loss": 0.3249368965625763, "lr": 1.378674016155067e-05, "epoch": 0.8155623423184172, "percentage": 40.77, "elapsed_time": "11:06:58", "remaining_time": "16:08:51"} +{"current_steps": 3072, "total_steps": 7532, "loss": 0.2871986925601959, "lr": 1.3782676011053592e-05, "epoch": 0.8158279112999601, "percentage": 40.79, "elapsed_time": "11:07:11", "remaining_time": "16:08:38"} +{"current_steps": 3073, "total_steps": 7532, "loss": 0.29047372937202454, "lr": 1.377861113130119e-05, "epoch": 0.8160934802815031, "percentage": 40.8, "elapsed_time": "11:07:24", "remaining_time": "16:08:25"} +{"current_steps": 3074, "total_steps": 7532, "loss": 0.3055281341075897, "lr": 1.3774545523077122e-05, "epoch": 0.816359049263046, "percentage": 40.81, "elapsed_time": "11:07:37", "remaining_time": "16:08:12"} +{"current_steps": 3075, "total_steps": 7532, "loss": 0.2565494179725647, "lr": 1.37704791871652e-05, "epoch": 0.816624618244589, "percentage": 40.83, "elapsed_time": "11:07:50", "remaining_time": "16:07:59"} +{"current_steps": 3076, "total_steps": 7532, "loss": 0.3016049861907959, "lr": 1.3766412124349358e-05, "epoch": 0.8168901872261319, "percentage": 40.84, "elapsed_time": "11:08:03", "remaining_time": "16:07:46"} +{"current_steps": 3077, "total_steps": 7532, "loss": 0.3021200895309448, "lr": 1.3762344335413677e-05, "epoch": 0.8171557562076749, "percentage": 40.85, "elapsed_time": "11:08:15", "remaining_time": "16:07:32"} +{"current_steps": 3078, "total_steps": 7532, "loss": 0.3024774193763733, "lr": 1.3758275821142382e-05, "epoch": 0.817421325189218, "percentage": 40.87, "elapsed_time": "11:08:29", "remaining_time": "16:07:19"} +{"current_steps": 3079, "total_steps": 7532, "loss": 0.33114269375801086, "lr": 1.3754206582319836e-05, "epoch": 0.8176868941707609, "percentage": 40.88, "elapsed_time": "11:08:42", "remaining_time": "16:07:06"} +{"current_steps": 3080, "total_steps": 7532, "loss": 0.27339494228363037, "lr": 1.3750136619730534e-05, "epoch": 0.8179524631523039, "percentage": 40.89, "elapsed_time": "11:08:55", "remaining_time": "16:06:53"} +{"current_steps": 3081, "total_steps": 7532, "loss": 0.2827128767967224, "lr": 1.3746065934159123e-05, "epoch": 0.8182180321338468, "percentage": 40.91, "elapsed_time": "11:09:07", "remaining_time": "16:06:40"} +{"current_steps": 3082, "total_steps": 7532, "loss": 0.2972746193408966, "lr": 1.3741994526390379e-05, "epoch": 0.8184836011153898, "percentage": 40.92, "elapsed_time": "11:09:20", "remaining_time": "16:06:26"} +{"current_steps": 3083, "total_steps": 7532, "loss": 0.29932117462158203, "lr": 1.3737922397209222e-05, "epoch": 0.8187491700969327, "percentage": 40.93, "elapsed_time": "11:09:33", "remaining_time": "16:06:13"} +{"current_steps": 3084, "total_steps": 7532, "loss": 0.28307998180389404, "lr": 1.3733849547400713e-05, "epoch": 0.8190147390784757, "percentage": 40.95, "elapsed_time": "11:09:46", "remaining_time": "16:06:00"} +{"current_steps": 3085, "total_steps": 7532, "loss": 0.2885883152484894, "lr": 1.3729775977750048e-05, "epoch": 0.8192803080600186, "percentage": 40.96, "elapsed_time": "11:09:59", "remaining_time": "16:05:47"} +{"current_steps": 3086, "total_steps": 7532, "loss": 0.28837913274765015, "lr": 1.3725701689042564e-05, "epoch": 0.8195458770415616, "percentage": 40.97, "elapsed_time": "11:10:12", "remaining_time": "16:05:33"} +{"current_steps": 3087, "total_steps": 7532, "loss": 0.2775058150291443, "lr": 1.3721626682063733e-05, "epoch": 0.8198114460231045, "percentage": 40.99, "elapsed_time": "11:10:25", "remaining_time": "16:05:20"} +{"current_steps": 3088, "total_steps": 7532, "loss": 0.2813493609428406, "lr": 1.3717550957599172e-05, "epoch": 0.8200770150046475, "percentage": 41.0, "elapsed_time": "11:10:37", "remaining_time": "16:05:06"} +{"current_steps": 3089, "total_steps": 7532, "loss": 0.2677592933177948, "lr": 1.371347451643463e-05, "epoch": 0.8203425839861904, "percentage": 41.01, "elapsed_time": "11:10:50", "remaining_time": "16:04:54"} +{"current_steps": 3090, "total_steps": 7532, "loss": 0.3104957938194275, "lr": 1.3709397359355998e-05, "epoch": 0.8206081529677334, "percentage": 41.02, "elapsed_time": "11:11:03", "remaining_time": "16:04:40"} +{"current_steps": 3091, "total_steps": 7532, "loss": 0.29315799474716187, "lr": 1.3705319487149303e-05, "epoch": 0.8208737219492763, "percentage": 41.04, "elapsed_time": "11:11:16", "remaining_time": "16:04:27"} +{"current_steps": 3092, "total_steps": 7532, "loss": 0.3044348657131195, "lr": 1.370124090060071e-05, "epoch": 0.8211392909308193, "percentage": 41.05, "elapsed_time": "11:11:29", "remaining_time": "16:04:13"} +{"current_steps": 3093, "total_steps": 7532, "loss": 0.2918691635131836, "lr": 1.3697161600496525e-05, "epoch": 0.8214048599123622, "percentage": 41.06, "elapsed_time": "11:11:42", "remaining_time": "16:04:01"} +{"current_steps": 3094, "total_steps": 7532, "loss": 0.2887750267982483, "lr": 1.3693081587623187e-05, "epoch": 0.8216704288939052, "percentage": 41.08, "elapsed_time": "11:11:55", "remaining_time": "16:03:47"} +{"current_steps": 3095, "total_steps": 7532, "loss": 0.3055661916732788, "lr": 1.3689000862767274e-05, "epoch": 0.8219359978754481, "percentage": 41.09, "elapsed_time": "11:12:08", "remaining_time": "16:03:34"} +{"current_steps": 3096, "total_steps": 7532, "loss": 0.271525114774704, "lr": 1.3684919426715504e-05, "epoch": 0.8222015668569911, "percentage": 41.1, "elapsed_time": "11:12:20", "remaining_time": "16:03:20"} +{"current_steps": 3097, "total_steps": 7532, "loss": 0.3220426142215729, "lr": 1.3680837280254726e-05, "epoch": 0.822467135838534, "percentage": 41.12, "elapsed_time": "11:12:33", "remaining_time": "16:03:07"} +{"current_steps": 3098, "total_steps": 7532, "loss": 0.29091203212738037, "lr": 1.3676754424171935e-05, "epoch": 0.822732704820077, "percentage": 41.13, "elapsed_time": "11:12:46", "remaining_time": "16:02:54"} +{"current_steps": 3099, "total_steps": 7532, "loss": 0.2928692102432251, "lr": 1.3672670859254252e-05, "epoch": 0.8229982738016199, "percentage": 41.14, "elapsed_time": "11:12:58", "remaining_time": "16:02:40"} +{"current_steps": 3100, "total_steps": 7532, "loss": 0.28635919094085693, "lr": 1.3668586586288942e-05, "epoch": 0.8232638427831629, "percentage": 41.16, "elapsed_time": "11:13:11", "remaining_time": "16:02:27"} +{"current_steps": 3101, "total_steps": 7532, "loss": 0.2912571430206299, "lr": 1.3664501606063402e-05, "epoch": 0.8235294117647058, "percentage": 41.17, "elapsed_time": "11:13:30", "remaining_time": "16:02:21"} +{"current_steps": 3102, "total_steps": 7532, "loss": 0.2783615291118622, "lr": 1.3660415919365178e-05, "epoch": 0.8237949807462488, "percentage": 41.18, "elapsed_time": "11:13:43", "remaining_time": "16:02:09"} +{"current_steps": 3103, "total_steps": 7532, "loss": 0.3064395785331726, "lr": 1.365632952698193e-05, "epoch": 0.8240605497277917, "percentage": 41.2, "elapsed_time": "11:13:56", "remaining_time": "16:01:55"} +{"current_steps": 3104, "total_steps": 7532, "loss": 0.2528907358646393, "lr": 1.3652242429701477e-05, "epoch": 0.8243261187093347, "percentage": 41.21, "elapsed_time": "11:14:09", "remaining_time": "16:01:42"} +{"current_steps": 3105, "total_steps": 7532, "loss": 0.2648676633834839, "lr": 1.3648154628311754e-05, "epoch": 0.8245916876908777, "percentage": 41.22, "elapsed_time": "11:14:21", "remaining_time": "16:01:29"} +{"current_steps": 3106, "total_steps": 7532, "loss": 0.33425620198249817, "lr": 1.3644066123600846e-05, "epoch": 0.8248572566724207, "percentage": 41.24, "elapsed_time": "11:14:35", "remaining_time": "16:01:16"} +{"current_steps": 3107, "total_steps": 7532, "loss": 0.3108072280883789, "lr": 1.3639976916356965e-05, "epoch": 0.8251228256539637, "percentage": 41.25, "elapsed_time": "11:14:47", "remaining_time": "16:01:02"} +{"current_steps": 3108, "total_steps": 7532, "loss": 0.2860543131828308, "lr": 1.3635887007368467e-05, "epoch": 0.8253883946355066, "percentage": 41.26, "elapsed_time": "11:15:00", "remaining_time": "16:00:49"} +{"current_steps": 3109, "total_steps": 7532, "loss": 0.25440749526023865, "lr": 1.3631796397423833e-05, "epoch": 0.8256539636170496, "percentage": 41.28, "elapsed_time": "11:15:13", "remaining_time": "16:00:36"} +{"current_steps": 3110, "total_steps": 7532, "loss": 0.2676115334033966, "lr": 1.3627705087311687e-05, "epoch": 0.8259195325985925, "percentage": 41.29, "elapsed_time": "11:15:27", "remaining_time": "16:00:24"} +{"current_steps": 3111, "total_steps": 7532, "loss": 0.28977078199386597, "lr": 1.3623613077820788e-05, "epoch": 0.8261851015801355, "percentage": 41.3, "elapsed_time": "11:15:40", "remaining_time": "16:00:10"} +{"current_steps": 3112, "total_steps": 7532, "loss": 0.30161401629447937, "lr": 1.361952036974002e-05, "epoch": 0.8264506705616784, "percentage": 41.32, "elapsed_time": "11:15:53", "remaining_time": "15:59:58"} +{"current_steps": 3113, "total_steps": 7532, "loss": 0.28676310181617737, "lr": 1.3615426963858416e-05, "epoch": 0.8267162395432214, "percentage": 41.33, "elapsed_time": "11:16:05", "remaining_time": "15:59:44"} +{"current_steps": 3114, "total_steps": 7532, "loss": 0.2957243323326111, "lr": 1.361133286096513e-05, "epoch": 0.8269818085247643, "percentage": 41.34, "elapsed_time": "11:16:19", "remaining_time": "15:59:31"} +{"current_steps": 3115, "total_steps": 7532, "loss": 0.3036375343799591, "lr": 1.3607238061849461e-05, "epoch": 0.8272473775063073, "percentage": 41.36, "elapsed_time": "11:16:31", "remaining_time": "15:59:18"} +{"current_steps": 3116, "total_steps": 7532, "loss": 0.31175294518470764, "lr": 1.360314256730084e-05, "epoch": 0.8275129464878502, "percentage": 41.37, "elapsed_time": "11:16:45", "remaining_time": "15:59:06"} +{"current_steps": 3117, "total_steps": 7532, "loss": 0.30212485790252686, "lr": 1.3599046378108825e-05, "epoch": 0.8277785154693932, "percentage": 41.38, "elapsed_time": "11:16:58", "remaining_time": "15:58:52"} +{"current_steps": 3118, "total_steps": 7532, "loss": 0.3290692865848541, "lr": 1.3594949495063117e-05, "epoch": 0.8280440844509361, "percentage": 41.4, "elapsed_time": "11:17:10", "remaining_time": "15:58:38"} +{"current_steps": 3119, "total_steps": 7532, "loss": 0.25952839851379395, "lr": 1.3590851918953542e-05, "epoch": 0.8283096534324791, "percentage": 41.41, "elapsed_time": "11:17:23", "remaining_time": "15:58:26"} +{"current_steps": 3120, "total_steps": 7532, "loss": 0.27737247943878174, "lr": 1.3586753650570069e-05, "epoch": 0.828575222414022, "percentage": 41.42, "elapsed_time": "11:17:36", "remaining_time": "15:58:12"} +{"current_steps": 3121, "total_steps": 7532, "loss": 0.29415374994277954, "lr": 1.3582654690702795e-05, "epoch": 0.828840791395565, "percentage": 41.44, "elapsed_time": "11:17:49", "remaining_time": "15:57:59"} +{"current_steps": 3122, "total_steps": 7532, "loss": 0.29197627305984497, "lr": 1.3578555040141948e-05, "epoch": 0.8291063603771079, "percentage": 41.45, "elapsed_time": "11:18:02", "remaining_time": "15:57:46"} +{"current_steps": 3123, "total_steps": 7532, "loss": 0.30318522453308105, "lr": 1.3574454699677893e-05, "epoch": 0.8293719293586509, "percentage": 41.46, "elapsed_time": "11:18:15", "remaining_time": "15:57:33"} +{"current_steps": 3124, "total_steps": 7532, "loss": 0.3184241056442261, "lr": 1.357035367010113e-05, "epoch": 0.8296374983401938, "percentage": 41.48, "elapsed_time": "11:18:27", "remaining_time": "15:57:19"} +{"current_steps": 3125, "total_steps": 7532, "loss": 0.30330199003219604, "lr": 1.3566251952202288e-05, "epoch": 0.8299030673217368, "percentage": 41.49, "elapsed_time": "11:18:40", "remaining_time": "15:57:06"} +{"current_steps": 3126, "total_steps": 7532, "loss": 0.25366994738578796, "lr": 1.356214954677213e-05, "epoch": 0.8301686363032797, "percentage": 41.5, "elapsed_time": "11:18:53", "remaining_time": "15:56:52"} +{"current_steps": 3127, "total_steps": 7532, "loss": 0.3213343918323517, "lr": 1.3558046454601552e-05, "epoch": 0.8304342052848227, "percentage": 41.52, "elapsed_time": "11:19:06", "remaining_time": "15:56:39"} +{"current_steps": 3128, "total_steps": 7532, "loss": 0.3012468218803406, "lr": 1.355394267648158e-05, "epoch": 0.8306997742663657, "percentage": 41.53, "elapsed_time": "11:19:19", "remaining_time": "15:56:25"} +{"current_steps": 3129, "total_steps": 7532, "loss": 0.3272971510887146, "lr": 1.3549838213203374e-05, "epoch": 0.8309653432479086, "percentage": 41.54, "elapsed_time": "11:19:32", "remaining_time": "15:56:13"} +{"current_steps": 3130, "total_steps": 7532, "loss": 0.30032482743263245, "lr": 1.354573306555823e-05, "epoch": 0.8312309122294516, "percentage": 41.56, "elapsed_time": "11:19:44", "remaining_time": "15:55:59"} +{"current_steps": 3131, "total_steps": 7532, "loss": 0.2820669412612915, "lr": 1.3541627234337567e-05, "epoch": 0.8314964812109945, "percentage": 41.57, "elapsed_time": "11:19:57", "remaining_time": "15:55:46"} +{"current_steps": 3132, "total_steps": 7532, "loss": 0.2638673782348633, "lr": 1.3537520720332943e-05, "epoch": 0.8317620501925375, "percentage": 41.58, "elapsed_time": "11:20:10", "remaining_time": "15:55:32"} +{"current_steps": 3133, "total_steps": 7532, "loss": 0.2766842246055603, "lr": 1.3533413524336043e-05, "epoch": 0.8320276191740804, "percentage": 41.6, "elapsed_time": "11:20:23", "remaining_time": "15:55:19"} +{"current_steps": 3134, "total_steps": 7532, "loss": 0.330536425113678, "lr": 1.3529305647138689e-05, "epoch": 0.8322931881556235, "percentage": 41.61, "elapsed_time": "11:20:35", "remaining_time": "15:55:05"} +{"current_steps": 3135, "total_steps": 7532, "loss": 0.30375364422798157, "lr": 1.3525197089532833e-05, "epoch": 0.8325587571371664, "percentage": 41.62, "elapsed_time": "11:20:48", "remaining_time": "15:54:51"} +{"current_steps": 3136, "total_steps": 7532, "loss": 0.3092171549797058, "lr": 1.3521087852310555e-05, "epoch": 0.8328243261187094, "percentage": 41.64, "elapsed_time": "11:21:01", "remaining_time": "15:54:38"} +{"current_steps": 3137, "total_steps": 7532, "loss": 0.28651195764541626, "lr": 1.3516977936264062e-05, "epoch": 0.8330898951002523, "percentage": 41.65, "elapsed_time": "11:21:13", "remaining_time": "15:54:25"} +{"current_steps": 3138, "total_steps": 7532, "loss": 0.2882133722305298, "lr": 1.3512867342185705e-05, "epoch": 0.8333554640817953, "percentage": 41.66, "elapsed_time": "11:21:26", "remaining_time": "15:54:12"} +{"current_steps": 3139, "total_steps": 7532, "loss": 0.30633628368377686, "lr": 1.3508756070867955e-05, "epoch": 0.8336210330633382, "percentage": 41.68, "elapsed_time": "11:21:39", "remaining_time": "15:53:58"} +{"current_steps": 3140, "total_steps": 7532, "loss": 0.2819565236568451, "lr": 1.3504644123103415e-05, "epoch": 0.8338866020448812, "percentage": 41.69, "elapsed_time": "11:21:52", "remaining_time": "15:53:45"} +{"current_steps": 3141, "total_steps": 7532, "loss": 0.29544374346733093, "lr": 1.3500531499684819e-05, "epoch": 0.8341521710264241, "percentage": 41.7, "elapsed_time": "11:22:05", "remaining_time": "15:53:31"} +{"current_steps": 3142, "total_steps": 7532, "loss": 0.29383376240730286, "lr": 1.3496418201405037e-05, "epoch": 0.8344177400079671, "percentage": 41.72, "elapsed_time": "11:22:18", "remaining_time": "15:53:18"} +{"current_steps": 3143, "total_steps": 7532, "loss": 0.24945983290672302, "lr": 1.3492304229057062e-05, "epoch": 0.83468330898951, "percentage": 41.73, "elapsed_time": "11:22:30", "remaining_time": "15:53:05"} +{"current_steps": 3144, "total_steps": 7532, "loss": 0.338919997215271, "lr": 1.3488189583434023e-05, "epoch": 0.834948877971053, "percentage": 41.74, "elapsed_time": "11:22:43", "remaining_time": "15:52:52"} +{"current_steps": 3145, "total_steps": 7532, "loss": 0.29555821418762207, "lr": 1.348407426532917e-05, "epoch": 0.835214446952596, "percentage": 41.76, "elapsed_time": "11:22:56", "remaining_time": "15:52:38"} +{"current_steps": 3146, "total_steps": 7532, "loss": 0.31038299202919006, "lr": 1.3479958275535887e-05, "epoch": 0.8354800159341389, "percentage": 41.77, "elapsed_time": "11:23:09", "remaining_time": "15:52:26"} +{"current_steps": 3147, "total_steps": 7532, "loss": 0.2595089077949524, "lr": 1.347584161484769e-05, "epoch": 0.8357455849156818, "percentage": 41.78, "elapsed_time": "11:23:22", "remaining_time": "15:52:12"} +{"current_steps": 3148, "total_steps": 7532, "loss": 0.3287338614463806, "lr": 1.3471724284058227e-05, "epoch": 0.8360111538972248, "percentage": 41.8, "elapsed_time": "11:23:34", "remaining_time": "15:51:58"} +{"current_steps": 3149, "total_steps": 7532, "loss": 0.3109680414199829, "lr": 1.3467606283961268e-05, "epoch": 0.8362767228787678, "percentage": 41.81, "elapsed_time": "11:23:48", "remaining_time": "15:51:45"} +{"current_steps": 3150, "total_steps": 7532, "loss": 0.2584227919578552, "lr": 1.346348761535071e-05, "epoch": 0.8365422918603107, "percentage": 41.82, "elapsed_time": "11:24:00", "remaining_time": "15:51:32"} +{"current_steps": 3151, "total_steps": 7532, "loss": 0.3038554787635803, "lr": 1.345936827902059e-05, "epoch": 0.8368078608418537, "percentage": 41.83, "elapsed_time": "11:24:13", "remaining_time": "15:51:19"} +{"current_steps": 3152, "total_steps": 7532, "loss": 0.28267812728881836, "lr": 1.3455248275765067e-05, "epoch": 0.8370734298233966, "percentage": 41.85, "elapsed_time": "11:24:26", "remaining_time": "15:51:05"} +{"current_steps": 3153, "total_steps": 7532, "loss": 0.3328094184398651, "lr": 1.3451127606378425e-05, "epoch": 0.8373389988049396, "percentage": 41.86, "elapsed_time": "11:24:39", "remaining_time": "15:50:52"} +{"current_steps": 3154, "total_steps": 7532, "loss": 0.3235865533351898, "lr": 1.3447006271655082e-05, "epoch": 0.8376045677864825, "percentage": 41.87, "elapsed_time": "11:24:52", "remaining_time": "15:50:38"} +{"current_steps": 3155, "total_steps": 7532, "loss": 0.25394493341445923, "lr": 1.3442884272389583e-05, "epoch": 0.8378701367680255, "percentage": 41.89, "elapsed_time": "11:25:05", "remaining_time": "15:50:26"} +{"current_steps": 3156, "total_steps": 7532, "loss": 0.29841768741607666, "lr": 1.3438761609376604e-05, "epoch": 0.8381357057495684, "percentage": 41.9, "elapsed_time": "11:25:17", "remaining_time": "15:50:12"} +{"current_steps": 3157, "total_steps": 7532, "loss": 0.3161924183368683, "lr": 1.3434638283410942e-05, "epoch": 0.8384012747311114, "percentage": 41.91, "elapsed_time": "11:25:30", "remaining_time": "15:49:59"} +{"current_steps": 3158, "total_steps": 7532, "loss": 0.22781039774417877, "lr": 1.3430514295287526e-05, "epoch": 0.8386668437126543, "percentage": 41.93, "elapsed_time": "11:25:43", "remaining_time": "15:49:45"} +{"current_steps": 3159, "total_steps": 7532, "loss": 0.2947984039783478, "lr": 1.3426389645801415e-05, "epoch": 0.8389324126941973, "percentage": 41.94, "elapsed_time": "11:25:56", "remaining_time": "15:49:32"} +{"current_steps": 3160, "total_steps": 7532, "loss": 0.2827467918395996, "lr": 1.342226433574779e-05, "epoch": 0.8391979816757402, "percentage": 41.95, "elapsed_time": "11:26:09", "remaining_time": "15:49:19"} +{"current_steps": 3161, "total_steps": 7532, "loss": 0.3149232268333435, "lr": 1.3418138365921962e-05, "epoch": 0.8394635506572832, "percentage": 41.97, "elapsed_time": "11:26:22", "remaining_time": "15:49:06"} +{"current_steps": 3162, "total_steps": 7532, "loss": 0.33154603838920593, "lr": 1.3414011737119373e-05, "epoch": 0.8397291196388262, "percentage": 41.98, "elapsed_time": "11:26:34", "remaining_time": "15:48:52"} +{"current_steps": 3163, "total_steps": 7532, "loss": 0.28532034158706665, "lr": 1.3409884450135581e-05, "epoch": 0.8399946886203692, "percentage": 41.99, "elapsed_time": "11:26:48", "remaining_time": "15:48:40"} +{"current_steps": 3164, "total_steps": 7532, "loss": 0.2539500892162323, "lr": 1.3405756505766286e-05, "epoch": 0.8402602576019121, "percentage": 42.01, "elapsed_time": "11:27:00", "remaining_time": "15:48:26"} +{"current_steps": 3165, "total_steps": 7532, "loss": 0.3023888170719147, "lr": 1.3401627904807302e-05, "epoch": 0.8405258265834551, "percentage": 42.02, "elapsed_time": "11:27:13", "remaining_time": "15:48:13"} +{"current_steps": 3166, "total_steps": 7532, "loss": 0.3088506758213043, "lr": 1.3397498648054579e-05, "epoch": 0.840791395564998, "percentage": 42.03, "elapsed_time": "11:27:26", "remaining_time": "15:47:59"} +{"current_steps": 3167, "total_steps": 7532, "loss": 0.3223467469215393, "lr": 1.3393368736304184e-05, "epoch": 0.841056964546541, "percentage": 42.05, "elapsed_time": "11:27:38", "remaining_time": "15:47:46"} +{"current_steps": 3168, "total_steps": 7532, "loss": 0.2541419565677643, "lr": 1.3389238170352318e-05, "epoch": 0.841322533528084, "percentage": 42.06, "elapsed_time": "11:27:52", "remaining_time": "15:47:33"} +{"current_steps": 3169, "total_steps": 7532, "loss": 0.2915497422218323, "lr": 1.3385106950995308e-05, "epoch": 0.8415881025096269, "percentage": 42.07, "elapsed_time": "11:28:04", "remaining_time": "15:47:19"} +{"current_steps": 3170, "total_steps": 7532, "loss": 0.2907465994358063, "lr": 1.3380975079029598e-05, "epoch": 0.8418536714911699, "percentage": 42.09, "elapsed_time": "11:28:17", "remaining_time": "15:47:06"} +{"current_steps": 3171, "total_steps": 7532, "loss": 0.2587417960166931, "lr": 1.337684255525177e-05, "epoch": 0.8421192404727128, "percentage": 42.1, "elapsed_time": "11:28:30", "remaining_time": "15:46:52"} +{"current_steps": 3172, "total_steps": 7532, "loss": 0.2932469844818115, "lr": 1.3372709380458522e-05, "epoch": 0.8423848094542558, "percentage": 42.11, "elapsed_time": "11:28:43", "remaining_time": "15:46:39"} +{"current_steps": 3173, "total_steps": 7532, "loss": 0.31451860070228577, "lr": 1.3368575555446681e-05, "epoch": 0.8426503784357987, "percentage": 42.13, "elapsed_time": "11:28:55", "remaining_time": "15:46:26"} +{"current_steps": 3174, "total_steps": 7532, "loss": 0.24513742327690125, "lr": 1.3364441081013205e-05, "epoch": 0.8429159474173417, "percentage": 42.14, "elapsed_time": "11:29:08", "remaining_time": "15:46:13"} +{"current_steps": 3175, "total_steps": 7532, "loss": 0.29781201481819153, "lr": 1.3360305957955166e-05, "epoch": 0.8431815163988846, "percentage": 42.15, "elapsed_time": "11:29:21", "remaining_time": "15:45:59"} +{"current_steps": 3176, "total_steps": 7532, "loss": 0.30925726890563965, "lr": 1.3356170187069775e-05, "epoch": 0.8434470853804276, "percentage": 42.17, "elapsed_time": "11:29:34", "remaining_time": "15:45:46"} +{"current_steps": 3177, "total_steps": 7532, "loss": 0.2822851538658142, "lr": 1.3352033769154347e-05, "epoch": 0.8437126543619705, "percentage": 42.18, "elapsed_time": "11:29:47", "remaining_time": "15:45:33"} +{"current_steps": 3178, "total_steps": 7532, "loss": 0.2511071264743805, "lr": 1.3347896705006344e-05, "epoch": 0.8439782233435135, "percentage": 42.19, "elapsed_time": "11:29:59", "remaining_time": "15:45:19"} +{"current_steps": 3179, "total_steps": 7532, "loss": 0.3002505302429199, "lr": 1.3343758995423344e-05, "epoch": 0.8442437923250564, "percentage": 42.21, "elapsed_time": "11:30:12", "remaining_time": "15:45:06"} +{"current_steps": 3180, "total_steps": 7532, "loss": 0.285504549741745, "lr": 1.3339620641203043e-05, "epoch": 0.8445093613065994, "percentage": 42.22, "elapsed_time": "11:30:25", "remaining_time": "15:44:52"} +{"current_steps": 3181, "total_steps": 7532, "loss": 0.31988856196403503, "lr": 1.3335481643143271e-05, "epoch": 0.8447749302881423, "percentage": 42.23, "elapsed_time": "11:30:38", "remaining_time": "15:44:39"} +{"current_steps": 3182, "total_steps": 7532, "loss": 0.29330819845199585, "lr": 1.3331342002041973e-05, "epoch": 0.8450404992696853, "percentage": 42.25, "elapsed_time": "11:30:51", "remaining_time": "15:44:26"} +{"current_steps": 3183, "total_steps": 7532, "loss": 0.28694427013397217, "lr": 1.3327201718697232e-05, "epoch": 0.8453060682512282, "percentage": 42.26, "elapsed_time": "11:31:04", "remaining_time": "15:44:13"} +{"current_steps": 3184, "total_steps": 7532, "loss": 0.24912211298942566, "lr": 1.3323060793907239e-05, "epoch": 0.8455716372327712, "percentage": 42.27, "elapsed_time": "11:31:17", "remaining_time": "15:44:00"} +{"current_steps": 3185, "total_steps": 7532, "loss": 0.28117647767066956, "lr": 1.3318919228470315e-05, "epoch": 0.8458372062143141, "percentage": 42.29, "elapsed_time": "11:31:30", "remaining_time": "15:43:47"} +{"current_steps": 3186, "total_steps": 7532, "loss": 0.3176446557044983, "lr": 1.3314777023184907e-05, "epoch": 0.8461027751958571, "percentage": 42.3, "elapsed_time": "11:31:42", "remaining_time": "15:43:33"} +{"current_steps": 3187, "total_steps": 7532, "loss": 0.31205689907073975, "lr": 1.3310634178849583e-05, "epoch": 0.8463683441774, "percentage": 42.31, "elapsed_time": "11:31:55", "remaining_time": "15:43:20"} +{"current_steps": 3188, "total_steps": 7532, "loss": 0.29942232370376587, "lr": 1.3306490696263034e-05, "epoch": 0.846633913158943, "percentage": 42.33, "elapsed_time": "11:32:08", "remaining_time": "15:43:07"} +{"current_steps": 3189, "total_steps": 7532, "loss": 0.3149508833885193, "lr": 1.3302346576224077e-05, "epoch": 0.8468994821404859, "percentage": 42.34, "elapsed_time": "11:32:21", "remaining_time": "15:42:53"} +{"current_steps": 3190, "total_steps": 7532, "loss": 0.2930619418621063, "lr": 1.3298201819531646e-05, "epoch": 0.847165051122029, "percentage": 42.35, "elapsed_time": "11:32:34", "remaining_time": "15:42:40"} +{"current_steps": 3191, "total_steps": 7532, "loss": 0.3089582920074463, "lr": 1.3294056426984804e-05, "epoch": 0.847430620103572, "percentage": 42.37, "elapsed_time": "11:32:46", "remaining_time": "15:42:27"} +{"current_steps": 3192, "total_steps": 7532, "loss": 0.3120991587638855, "lr": 1.3289910399382733e-05, "epoch": 0.8476961890851149, "percentage": 42.38, "elapsed_time": "11:32:59", "remaining_time": "15:42:14"} +{"current_steps": 3193, "total_steps": 7532, "loss": 0.2728833258152008, "lr": 1.3285763737524738e-05, "epoch": 0.8479617580666579, "percentage": 42.39, "elapsed_time": "11:33:12", "remaining_time": "15:42:00"} +{"current_steps": 3194, "total_steps": 7532, "loss": 0.2833358347415924, "lr": 1.3281616442210246e-05, "epoch": 0.8482273270482008, "percentage": 42.41, "elapsed_time": "11:33:25", "remaining_time": "15:41:47"} +{"current_steps": 3195, "total_steps": 7532, "loss": 0.26218950748443604, "lr": 1.3277468514238803e-05, "epoch": 0.8484928960297438, "percentage": 42.42, "elapsed_time": "11:33:38", "remaining_time": "15:41:34"} +{"current_steps": 3196, "total_steps": 7532, "loss": 0.3120720386505127, "lr": 1.3273319954410088e-05, "epoch": 0.8487584650112867, "percentage": 42.43, "elapsed_time": "11:33:51", "remaining_time": "15:41:21"} +{"current_steps": 3197, "total_steps": 7532, "loss": 0.2748696208000183, "lr": 1.3269170763523892e-05, "epoch": 0.8490240339928297, "percentage": 42.45, "elapsed_time": "11:34:04", "remaining_time": "15:41:08"} +{"current_steps": 3198, "total_steps": 7532, "loss": 0.2892690598964691, "lr": 1.326502094238013e-05, "epoch": 0.8492896029743726, "percentage": 42.46, "elapsed_time": "11:34:17", "remaining_time": "15:40:55"} +{"current_steps": 3199, "total_steps": 7532, "loss": 0.26583510637283325, "lr": 1.3260870491778835e-05, "epoch": 0.8495551719559156, "percentage": 42.47, "elapsed_time": "11:34:30", "remaining_time": "15:40:41"} +{"current_steps": 3200, "total_steps": 7532, "loss": 0.31602388620376587, "lr": 1.325671941252017e-05, "epoch": 0.8498207409374585, "percentage": 42.49, "elapsed_time": "11:34:43", "remaining_time": "15:40:28"} +{"current_steps": 3201, "total_steps": 7532, "loss": 0.2980017364025116, "lr": 1.3252567705404409e-05, "epoch": 0.8500863099190015, "percentage": 42.5, "elapsed_time": "11:35:01", "remaining_time": "15:40:22"} +{"current_steps": 3202, "total_steps": 7532, "loss": 0.27081727981567383, "lr": 1.3248415371231957e-05, "epoch": 0.8503518789005444, "percentage": 42.51, "elapsed_time": "11:35:14", "remaining_time": "15:40:09"} +{"current_steps": 3203, "total_steps": 7532, "loss": 0.28895002603530884, "lr": 1.3244262410803333e-05, "epoch": 0.8506174478820874, "percentage": 42.53, "elapsed_time": "11:35:27", "remaining_time": "15:39:55"} +{"current_steps": 3204, "total_steps": 7532, "loss": 0.30804315209388733, "lr": 1.3240108824919176e-05, "epoch": 0.8508830168636303, "percentage": 42.54, "elapsed_time": "11:35:40", "remaining_time": "15:39:43"} +{"current_steps": 3205, "total_steps": 7532, "loss": 0.28173667192459106, "lr": 1.3235954614380253e-05, "epoch": 0.8511485858451733, "percentage": 42.55, "elapsed_time": "11:35:52", "remaining_time": "15:39:29"} +{"current_steps": 3206, "total_steps": 7532, "loss": 0.3113047778606415, "lr": 1.3231799779987445e-05, "epoch": 0.8514141548267162, "percentage": 42.57, "elapsed_time": "11:36:05", "remaining_time": "15:39:16"} +{"current_steps": 3207, "total_steps": 7532, "loss": 0.247248113155365, "lr": 1.3227644322541754e-05, "epoch": 0.8516797238082592, "percentage": 42.58, "elapsed_time": "11:36:18", "remaining_time": "15:39:02"} +{"current_steps": 3208, "total_steps": 7532, "loss": 0.27078187465667725, "lr": 1.3223488242844309e-05, "epoch": 0.8519452927898021, "percentage": 42.59, "elapsed_time": "11:36:31", "remaining_time": "15:38:50"} +{"current_steps": 3209, "total_steps": 7532, "loss": 0.2749357223510742, "lr": 1.321933154169634e-05, "epoch": 0.8522108617713451, "percentage": 42.6, "elapsed_time": "11:36:44", "remaining_time": "15:38:36"} +{"current_steps": 3210, "total_steps": 7532, "loss": 0.25382956862449646, "lr": 1.3215174219899224e-05, "epoch": 0.852476430752888, "percentage": 42.62, "elapsed_time": "11:36:56", "remaining_time": "15:38:22"} +{"current_steps": 3211, "total_steps": 7532, "loss": 0.3237685263156891, "lr": 1.3211016278254436e-05, "epoch": 0.852741999734431, "percentage": 42.63, "elapsed_time": "11:37:10", "remaining_time": "15:38:10"} +{"current_steps": 3212, "total_steps": 7532, "loss": 0.2899032235145569, "lr": 1.3206857717563581e-05, "epoch": 0.8530075687159739, "percentage": 42.64, "elapsed_time": "11:37:22", "remaining_time": "15:37:56"} +{"current_steps": 3213, "total_steps": 7532, "loss": 0.25997933745384216, "lr": 1.3202698538628376e-05, "epoch": 0.8532731376975169, "percentage": 42.66, "elapsed_time": "11:37:36", "remaining_time": "15:37:44"} +{"current_steps": 3214, "total_steps": 7532, "loss": 0.3228183090686798, "lr": 1.3198538742250668e-05, "epoch": 0.8535387066790598, "percentage": 42.67, "elapsed_time": "11:37:48", "remaining_time": "15:37:30"} +{"current_steps": 3215, "total_steps": 7532, "loss": 0.31993368268013, "lr": 1.3194378329232413e-05, "epoch": 0.8538042756606028, "percentage": 42.68, "elapsed_time": "11:38:01", "remaining_time": "15:37:17"} +{"current_steps": 3216, "total_steps": 7532, "loss": 0.29520007967948914, "lr": 1.3190217300375694e-05, "epoch": 0.8540698446421457, "percentage": 42.7, "elapsed_time": "11:38:14", "remaining_time": "15:37:03"} +{"current_steps": 3217, "total_steps": 7532, "loss": 0.31073522567749023, "lr": 1.3186055656482702e-05, "epoch": 0.8543354136236887, "percentage": 42.71, "elapsed_time": "11:38:27", "remaining_time": "15:36:51"} +{"current_steps": 3218, "total_steps": 7532, "loss": 0.34354183077812195, "lr": 1.3181893398355752e-05, "epoch": 0.8546009826052318, "percentage": 42.72, "elapsed_time": "11:38:40", "remaining_time": "15:36:37"} +{"current_steps": 3219, "total_steps": 7532, "loss": 0.27676698565483093, "lr": 1.3177730526797286e-05, "epoch": 0.8548665515867747, "percentage": 42.74, "elapsed_time": "11:38:53", "remaining_time": "15:36:24"} +{"current_steps": 3220, "total_steps": 7532, "loss": 0.27313530445098877, "lr": 1.3173567042609852e-05, "epoch": 0.8551321205683177, "percentage": 42.75, "elapsed_time": "11:39:06", "remaining_time": "15:36:11"} +{"current_steps": 3221, "total_steps": 7532, "loss": 0.2517555058002472, "lr": 1.3169402946596119e-05, "epoch": 0.8553976895498606, "percentage": 42.76, "elapsed_time": "11:39:19", "remaining_time": "15:35:58"} +{"current_steps": 3222, "total_steps": 7532, "loss": 0.29700207710266113, "lr": 1.3165238239558878e-05, "epoch": 0.8556632585314036, "percentage": 42.78, "elapsed_time": "11:39:31", "remaining_time": "15:35:44"} +{"current_steps": 3223, "total_steps": 7532, "loss": 0.3182620704174042, "lr": 1.3161072922301037e-05, "epoch": 0.8559288275129465, "percentage": 42.79, "elapsed_time": "11:39:45", "remaining_time": "15:35:32"} +{"current_steps": 3224, "total_steps": 7532, "loss": 0.3112961947917938, "lr": 1.3156906995625615e-05, "epoch": 0.8561943964944895, "percentage": 42.8, "elapsed_time": "11:39:57", "remaining_time": "15:35:18"} +{"current_steps": 3225, "total_steps": 7532, "loss": 0.3080563545227051, "lr": 1.3152740460335757e-05, "epoch": 0.8564599654760324, "percentage": 42.82, "elapsed_time": "11:40:10", "remaining_time": "15:35:05"} +{"current_steps": 3226, "total_steps": 7532, "loss": 0.31197935342788696, "lr": 1.3148573317234726e-05, "epoch": 0.8567255344575754, "percentage": 42.83, "elapsed_time": "11:40:23", "remaining_time": "15:34:51"} +{"current_steps": 3227, "total_steps": 7532, "loss": 0.27377086877822876, "lr": 1.3144405567125886e-05, "epoch": 0.8569911034391183, "percentage": 42.84, "elapsed_time": "11:40:35", "remaining_time": "15:34:38"} +{"current_steps": 3228, "total_steps": 7532, "loss": 0.25303182005882263, "lr": 1.3140237210812741e-05, "epoch": 0.8572566724206613, "percentage": 42.86, "elapsed_time": "11:40:48", "remaining_time": "15:34:25"} +{"current_steps": 3229, "total_steps": 7532, "loss": 0.27949726581573486, "lr": 1.3136068249098899e-05, "epoch": 0.8575222414022042, "percentage": 42.87, "elapsed_time": "11:41:01", "remaining_time": "15:34:11"} +{"current_steps": 3230, "total_steps": 7532, "loss": 0.278359055519104, "lr": 1.3131898682788082e-05, "epoch": 0.8577878103837472, "percentage": 42.88, "elapsed_time": "11:41:14", "remaining_time": "15:33:58"} +{"current_steps": 3231, "total_steps": 7532, "loss": 0.28507643938064575, "lr": 1.312772851268414e-05, "epoch": 0.8580533793652901, "percentage": 42.9, "elapsed_time": "11:41:27", "remaining_time": "15:33:44"} +{"current_steps": 3232, "total_steps": 7532, "loss": 0.2689790427684784, "lr": 1.3123557739591026e-05, "epoch": 0.8583189483468331, "percentage": 42.91, "elapsed_time": "11:41:40", "remaining_time": "15:33:31"} +{"current_steps": 3233, "total_steps": 7532, "loss": 0.31956973671913147, "lr": 1.3119386364312821e-05, "epoch": 0.858584517328376, "percentage": 42.92, "elapsed_time": "11:41:52", "remaining_time": "15:33:18"} +{"current_steps": 3234, "total_steps": 7532, "loss": 0.2837323546409607, "lr": 1.3115214387653711e-05, "epoch": 0.858850086309919, "percentage": 42.94, "elapsed_time": "11:42:05", "remaining_time": "15:33:05"} +{"current_steps": 3235, "total_steps": 7532, "loss": 0.2756272554397583, "lr": 1.3111041810418011e-05, "epoch": 0.8591156552914619, "percentage": 42.95, "elapsed_time": "11:42:18", "remaining_time": "15:32:51"} +{"current_steps": 3236, "total_steps": 7532, "loss": 0.2664923369884491, "lr": 1.3106868633410139e-05, "epoch": 0.8593812242730049, "percentage": 42.96, "elapsed_time": "11:42:31", "remaining_time": "15:32:38"} +{"current_steps": 3237, "total_steps": 7532, "loss": 0.2842246890068054, "lr": 1.3102694857434637e-05, "epoch": 0.8596467932545478, "percentage": 42.98, "elapsed_time": "11:42:43", "remaining_time": "15:32:24"} +{"current_steps": 3238, "total_steps": 7532, "loss": 0.3066467344760895, "lr": 1.3098520483296159e-05, "epoch": 0.8599123622360908, "percentage": 42.99, "elapsed_time": "11:42:56", "remaining_time": "15:32:11"} +{"current_steps": 3239, "total_steps": 7532, "loss": 0.3042510151863098, "lr": 1.3094345511799478e-05, "epoch": 0.8601779312176338, "percentage": 43.0, "elapsed_time": "11:43:09", "remaining_time": "15:31:58"} +{"current_steps": 3240, "total_steps": 7532, "loss": 0.2753696143627167, "lr": 1.3090169943749475e-05, "epoch": 0.8604435001991767, "percentage": 43.02, "elapsed_time": "11:43:22", "remaining_time": "15:31:45"} +{"current_steps": 3241, "total_steps": 7532, "loss": 0.2561766803264618, "lr": 1.3085993779951154e-05, "epoch": 0.8607090691807197, "percentage": 43.03, "elapsed_time": "11:43:35", "remaining_time": "15:31:31"} +{"current_steps": 3242, "total_steps": 7532, "loss": 0.297982782125473, "lr": 1.3081817021209626e-05, "epoch": 0.8609746381622626, "percentage": 43.04, "elapsed_time": "11:43:48", "remaining_time": "15:31:18"} +{"current_steps": 3243, "total_steps": 7532, "loss": 0.2961920499801636, "lr": 1.3077639668330124e-05, "epoch": 0.8612402071438056, "percentage": 43.06, "elapsed_time": "11:44:00", "remaining_time": "15:31:05"} +{"current_steps": 3244, "total_steps": 7532, "loss": 0.2868857979774475, "lr": 1.3073461722117991e-05, "epoch": 0.8615057761253485, "percentage": 43.07, "elapsed_time": "11:44:13", "remaining_time": "15:30:52"} +{"current_steps": 3245, "total_steps": 7532, "loss": 0.22930951416492462, "lr": 1.3069283183378683e-05, "epoch": 0.8617713451068915, "percentage": 43.08, "elapsed_time": "11:44:27", "remaining_time": "15:30:39"} +{"current_steps": 3246, "total_steps": 7532, "loss": 0.29737964272499084, "lr": 1.306510405291778e-05, "epoch": 0.8620369140884345, "percentage": 43.1, "elapsed_time": "11:44:39", "remaining_time": "15:30:25"} +{"current_steps": 3247, "total_steps": 7532, "loss": 0.2764522433280945, "lr": 1.3060924331540964e-05, "epoch": 0.8623024830699775, "percentage": 43.11, "elapsed_time": "11:44:52", "remaining_time": "15:30:13"} +{"current_steps": 3248, "total_steps": 7532, "loss": 0.27608832716941833, "lr": 1.3056744020054039e-05, "epoch": 0.8625680520515204, "percentage": 43.12, "elapsed_time": "11:45:05", "remaining_time": "15:29:59"} +{"current_steps": 3249, "total_steps": 7532, "loss": 0.25667035579681396, "lr": 1.3052563119262915e-05, "epoch": 0.8628336210330634, "percentage": 43.14, "elapsed_time": "11:45:18", "remaining_time": "15:29:46"} +{"current_steps": 3250, "total_steps": 7532, "loss": 0.3015863597393036, "lr": 1.3048381629973622e-05, "epoch": 0.8630991900146063, "percentage": 43.15, "elapsed_time": "11:45:31", "remaining_time": "15:29:33"} +{"current_steps": 3251, "total_steps": 7532, "loss": 0.2798422873020172, "lr": 1.3044199552992307e-05, "epoch": 0.8633647589961493, "percentage": 43.16, "elapsed_time": "11:45:44", "remaining_time": "15:29:20"} +{"current_steps": 3252, "total_steps": 7532, "loss": 0.2856596112251282, "lr": 1.304001688912522e-05, "epoch": 0.8636303279776922, "percentage": 43.18, "elapsed_time": "11:45:57", "remaining_time": "15:29:07"} +{"current_steps": 3253, "total_steps": 7532, "loss": 0.30247554183006287, "lr": 1.303583363917873e-05, "epoch": 0.8638958969592352, "percentage": 43.19, "elapsed_time": "11:46:10", "remaining_time": "15:28:54"} +{"current_steps": 3254, "total_steps": 7532, "loss": 0.26817965507507324, "lr": 1.303164980395932e-05, "epoch": 0.8641614659407781, "percentage": 43.2, "elapsed_time": "11:46:23", "remaining_time": "15:28:40"} +{"current_steps": 3255, "total_steps": 7532, "loss": 0.26919034123420715, "lr": 1.3027465384273579e-05, "epoch": 0.8644270349223211, "percentage": 43.22, "elapsed_time": "11:46:35", "remaining_time": "15:28:27"} +{"current_steps": 3256, "total_steps": 7532, "loss": 0.29495447874069214, "lr": 1.3023280380928223e-05, "epoch": 0.864692603903864, "percentage": 43.23, "elapsed_time": "11:46:48", "remaining_time": "15:28:13"} +{"current_steps": 3257, "total_steps": 7532, "loss": 0.26766717433929443, "lr": 1.3019094794730063e-05, "epoch": 0.864958172885407, "percentage": 43.24, "elapsed_time": "11:47:01", "remaining_time": "15:28:00"} +{"current_steps": 3258, "total_steps": 7532, "loss": 0.2573341131210327, "lr": 1.3014908626486032e-05, "epoch": 0.86522374186695, "percentage": 43.26, "elapsed_time": "11:47:14", "remaining_time": "15:27:47"} +{"current_steps": 3259, "total_steps": 7532, "loss": 0.32776498794555664, "lr": 1.3010721877003177e-05, "epoch": 0.8654893108484929, "percentage": 43.27, "elapsed_time": "11:47:26", "remaining_time": "15:27:33"} +{"current_steps": 3260, "total_steps": 7532, "loss": 0.3107950687408447, "lr": 1.3006534547088651e-05, "epoch": 0.8657548798300359, "percentage": 43.28, "elapsed_time": "11:47:39", "remaining_time": "15:27:20"} +{"current_steps": 3261, "total_steps": 7532, "loss": 0.26143360137939453, "lr": 1.3002346637549726e-05, "epoch": 0.8660204488115788, "percentage": 43.3, "elapsed_time": "11:47:52", "remaining_time": "15:27:07"} +{"current_steps": 3262, "total_steps": 7532, "loss": 0.25666722655296326, "lr": 1.2998158149193773e-05, "epoch": 0.8662860177931218, "percentage": 43.31, "elapsed_time": "11:48:05", "remaining_time": "15:26:54"} +{"current_steps": 3263, "total_steps": 7532, "loss": 0.2982695698738098, "lr": 1.2993969082828296e-05, "epoch": 0.8665515867746647, "percentage": 43.32, "elapsed_time": "11:48:18", "remaining_time": "15:26:40"} +{"current_steps": 3264, "total_steps": 7532, "loss": 0.30144304037094116, "lr": 1.2989779439260888e-05, "epoch": 0.8668171557562077, "percentage": 43.34, "elapsed_time": "11:48:31", "remaining_time": "15:26:27"} +{"current_steps": 3265, "total_steps": 7532, "loss": 0.30421534180641174, "lr": 1.2985589219299264e-05, "epoch": 0.8670827247377506, "percentage": 43.35, "elapsed_time": "11:48:44", "remaining_time": "15:26:14"} +{"current_steps": 3266, "total_steps": 7532, "loss": 0.23653842508792877, "lr": 1.298139842375125e-05, "epoch": 0.8673482937192936, "percentage": 43.36, "elapsed_time": "11:48:57", "remaining_time": "15:26:01"} +{"current_steps": 3267, "total_steps": 7532, "loss": 0.284118115901947, "lr": 1.2977207053424781e-05, "epoch": 0.8676138627008365, "percentage": 43.37, "elapsed_time": "11:49:09", "remaining_time": "15:25:48"} +{"current_steps": 3268, "total_steps": 7532, "loss": 0.30857348442077637, "lr": 1.2973015109127907e-05, "epoch": 0.8678794316823795, "percentage": 43.39, "elapsed_time": "11:49:22", "remaining_time": "15:25:35"} +{"current_steps": 3269, "total_steps": 7532, "loss": 0.2826589047908783, "lr": 1.2968822591668784e-05, "epoch": 0.8681450006639224, "percentage": 43.4, "elapsed_time": "11:49:35", "remaining_time": "15:25:21"} +{"current_steps": 3270, "total_steps": 7532, "loss": 0.27634552121162415, "lr": 1.2964629501855678e-05, "epoch": 0.8684105696454654, "percentage": 43.41, "elapsed_time": "11:49:48", "remaining_time": "15:25:08"} +{"current_steps": 3271, "total_steps": 7532, "loss": 0.25823545455932617, "lr": 1.296043584049697e-05, "epoch": 0.8686761386270083, "percentage": 43.43, "elapsed_time": "11:50:01", "remaining_time": "15:24:55"} +{"current_steps": 3272, "total_steps": 7532, "loss": 0.28939294815063477, "lr": 1.2956241608401145e-05, "epoch": 0.8689417076085513, "percentage": 43.44, "elapsed_time": "11:50:14", "remaining_time": "15:24:42"} +{"current_steps": 3273, "total_steps": 7532, "loss": 0.3042459785938263, "lr": 1.2952046806376806e-05, "epoch": 0.8692072765900942, "percentage": 43.45, "elapsed_time": "11:50:26", "remaining_time": "15:24:28"} +{"current_steps": 3274, "total_steps": 7532, "loss": 0.2834415137767792, "lr": 1.2947851435232658e-05, "epoch": 0.8694728455716373, "percentage": 43.47, "elapsed_time": "11:50:39", "remaining_time": "15:24:15"} +{"current_steps": 3275, "total_steps": 7532, "loss": 0.28226330876350403, "lr": 1.2943655495777518e-05, "epoch": 0.8697384145531802, "percentage": 43.48, "elapsed_time": "11:50:52", "remaining_time": "15:24:01"} +{"current_steps": 3276, "total_steps": 7532, "loss": 0.2796105742454529, "lr": 1.2939458988820317e-05, "epoch": 0.8700039835347232, "percentage": 43.49, "elapsed_time": "11:51:04", "remaining_time": "15:23:47"} +{"current_steps": 3277, "total_steps": 7532, "loss": 0.24790553748607635, "lr": 1.2935261915170091e-05, "epoch": 0.8702695525162661, "percentage": 43.51, "elapsed_time": "11:51:17", "remaining_time": "15:23:34"} +{"current_steps": 3278, "total_steps": 7532, "loss": 0.25101587176322937, "lr": 1.2931064275635987e-05, "epoch": 0.8705351214978091, "percentage": 43.52, "elapsed_time": "11:51:30", "remaining_time": "15:23:21"} +{"current_steps": 3279, "total_steps": 7532, "loss": 0.3060816526412964, "lr": 1.2926866071027257e-05, "epoch": 0.870800690479352, "percentage": 43.53, "elapsed_time": "11:51:43", "remaining_time": "15:23:07"} +{"current_steps": 3280, "total_steps": 7532, "loss": 0.3137212097644806, "lr": 1.2922667302153268e-05, "epoch": 0.871066259460895, "percentage": 43.55, "elapsed_time": "11:51:55", "remaining_time": "15:22:54"} +{"current_steps": 3281, "total_steps": 7532, "loss": 0.2391548752784729, "lr": 1.2918467969823497e-05, "epoch": 0.871331828442438, "percentage": 43.56, "elapsed_time": "11:52:08", "remaining_time": "15:22:41"} +{"current_steps": 3282, "total_steps": 7532, "loss": 0.3219330608844757, "lr": 1.2914268074847516e-05, "epoch": 0.8715973974239809, "percentage": 43.57, "elapsed_time": "11:52:21", "remaining_time": "15:22:27"} +{"current_steps": 3283, "total_steps": 7532, "loss": 0.2934436798095703, "lr": 1.2910067618035025e-05, "epoch": 0.8718629664055239, "percentage": 43.59, "elapsed_time": "11:52:34", "remaining_time": "15:22:14"} +{"current_steps": 3284, "total_steps": 7532, "loss": 0.2919486165046692, "lr": 1.2905866600195815e-05, "epoch": 0.8721285353870668, "percentage": 43.6, "elapsed_time": "11:52:47", "remaining_time": "15:22:01"} +{"current_steps": 3285, "total_steps": 7532, "loss": 0.2840641438961029, "lr": 1.2901665022139796e-05, "epoch": 0.8723941043686098, "percentage": 43.61, "elapsed_time": "11:53:00", "remaining_time": "15:21:48"} +{"current_steps": 3286, "total_steps": 7532, "loss": 0.24151530861854553, "lr": 1.2897462884676983e-05, "epoch": 0.8726596733501527, "percentage": 43.63, "elapsed_time": "11:53:13", "remaining_time": "15:21:34"} +{"current_steps": 3287, "total_steps": 7532, "loss": 0.24515505135059357, "lr": 1.28932601886175e-05, "epoch": 0.8729252423316957, "percentage": 43.64, "elapsed_time": "11:53:25", "remaining_time": "15:21:21"} +{"current_steps": 3288, "total_steps": 7532, "loss": 0.2561264634132385, "lr": 1.2889056934771577e-05, "epoch": 0.8731908113132386, "percentage": 43.65, "elapsed_time": "11:53:38", "remaining_time": "15:21:08"} +{"current_steps": 3289, "total_steps": 7532, "loss": 0.2798641622066498, "lr": 1.2884853123949547e-05, "epoch": 0.8734563802947816, "percentage": 43.67, "elapsed_time": "11:53:51", "remaining_time": "15:20:55"} +{"current_steps": 3290, "total_steps": 7532, "loss": 0.35207298398017883, "lr": 1.288064875696186e-05, "epoch": 0.8737219492763245, "percentage": 43.68, "elapsed_time": "11:54:04", "remaining_time": "15:20:42"} +{"current_steps": 3291, "total_steps": 7532, "loss": 0.2778821289539337, "lr": 1.2876443834619066e-05, "epoch": 0.8739875182578675, "percentage": 43.69, "elapsed_time": "11:54:17", "remaining_time": "15:20:29"} +{"current_steps": 3292, "total_steps": 7532, "loss": 0.2691737413406372, "lr": 1.2872238357731825e-05, "epoch": 0.8742530872394104, "percentage": 43.71, "elapsed_time": "11:54:30", "remaining_time": "15:20:16"} +{"current_steps": 3293, "total_steps": 7532, "loss": 0.25476595759391785, "lr": 1.2868032327110904e-05, "epoch": 0.8745186562209534, "percentage": 43.72, "elapsed_time": "11:54:43", "remaining_time": "15:20:03"} +{"current_steps": 3294, "total_steps": 7532, "loss": 0.258474737405777, "lr": 1.2863825743567174e-05, "epoch": 0.8747842252024963, "percentage": 43.73, "elapsed_time": "11:54:56", "remaining_time": "15:19:50"} +{"current_steps": 3295, "total_steps": 7532, "loss": 0.32421568036079407, "lr": 1.285961860791162e-05, "epoch": 0.8750497941840393, "percentage": 43.75, "elapsed_time": "11:55:09", "remaining_time": "15:19:36"} +{"current_steps": 3296, "total_steps": 7532, "loss": 0.3090333342552185, "lr": 1.2855410920955323e-05, "epoch": 0.8753153631655822, "percentage": 43.76, "elapsed_time": "11:55:22", "remaining_time": "15:19:24"} +{"current_steps": 3297, "total_steps": 7532, "loss": 0.26548707485198975, "lr": 1.2851202683509476e-05, "epoch": 0.8755809321471252, "percentage": 43.77, "elapsed_time": "11:55:35", "remaining_time": "15:19:10"} +{"current_steps": 3298, "total_steps": 7532, "loss": 0.3002355098724365, "lr": 1.2846993896385378e-05, "epoch": 0.8758465011286681, "percentage": 43.79, "elapsed_time": "11:55:48", "remaining_time": "15:18:57"} +{"current_steps": 3299, "total_steps": 7532, "loss": 0.2924933135509491, "lr": 1.2842784560394433e-05, "epoch": 0.8761120701102111, "percentage": 43.8, "elapsed_time": "11:56:01", "remaining_time": "15:18:44"} +{"current_steps": 3300, "total_steps": 7532, "loss": 0.2886514663696289, "lr": 1.2838574676348155e-05, "epoch": 0.876377639091754, "percentage": 43.81, "elapsed_time": "11:56:14", "remaining_time": "15:18:30"} +{"current_steps": 3301, "total_steps": 7532, "loss": 0.29821154475212097, "lr": 1.2834364245058155e-05, "epoch": 0.876643208073297, "percentage": 43.83, "elapsed_time": "11:56:32", "remaining_time": "15:18:25"} +{"current_steps": 3302, "total_steps": 7532, "loss": 0.2656530737876892, "lr": 1.2830153267336159e-05, "epoch": 0.87690877705484, "percentage": 43.84, "elapsed_time": "11:56:45", "remaining_time": "15:18:12"} +{"current_steps": 3303, "total_steps": 7532, "loss": 0.3437826633453369, "lr": 1.282594174399399e-05, "epoch": 0.877174346036383, "percentage": 43.85, "elapsed_time": "11:56:59", "remaining_time": "15:17:59"} +{"current_steps": 3304, "total_steps": 7532, "loss": 0.29773175716400146, "lr": 1.2821729675843581e-05, "epoch": 0.877439915017926, "percentage": 43.87, "elapsed_time": "11:57:11", "remaining_time": "15:17:46"} +{"current_steps": 3305, "total_steps": 7532, "loss": 0.29772818088531494, "lr": 1.2817517063696973e-05, "epoch": 0.8777054839994689, "percentage": 43.88, "elapsed_time": "11:57:25", "remaining_time": "15:17:33"} +{"current_steps": 3306, "total_steps": 7532, "loss": 0.3266611099243164, "lr": 1.2813303908366303e-05, "epoch": 0.8779710529810119, "percentage": 43.89, "elapsed_time": "11:57:38", "remaining_time": "15:17:20"} +{"current_steps": 3307, "total_steps": 7532, "loss": 0.26599690318107605, "lr": 1.2809090210663818e-05, "epoch": 0.8782366219625548, "percentage": 43.91, "elapsed_time": "11:57:51", "remaining_time": "15:17:07"} +{"current_steps": 3308, "total_steps": 7532, "loss": 0.27988117933273315, "lr": 1.2804875971401872e-05, "epoch": 0.8785021909440978, "percentage": 43.92, "elapsed_time": "11:58:04", "remaining_time": "15:16:54"} +{"current_steps": 3309, "total_steps": 7532, "loss": 0.2630334496498108, "lr": 1.2800661191392916e-05, "epoch": 0.8787677599256407, "percentage": 43.93, "elapsed_time": "11:58:17", "remaining_time": "15:16:41"} +{"current_steps": 3310, "total_steps": 7532, "loss": 0.2628091871738434, "lr": 1.2796445871449517e-05, "epoch": 0.8790333289071837, "percentage": 43.95, "elapsed_time": "11:58:30", "remaining_time": "15:16:28"} +{"current_steps": 3311, "total_steps": 7532, "loss": 0.3443898558616638, "lr": 1.2792230012384333e-05, "epoch": 0.8792988978887266, "percentage": 43.96, "elapsed_time": "11:58:43", "remaining_time": "15:16:15"} +{"current_steps": 3312, "total_steps": 7532, "loss": 0.2966022491455078, "lr": 1.2788013615010136e-05, "epoch": 0.8795644668702696, "percentage": 43.97, "elapsed_time": "11:58:57", "remaining_time": "15:16:03"} +{"current_steps": 3313, "total_steps": 7532, "loss": 0.2995494604110718, "lr": 1.2783796680139793e-05, "epoch": 0.8798300358518125, "percentage": 43.99, "elapsed_time": "11:59:10", "remaining_time": "15:15:50"} +{"current_steps": 3314, "total_steps": 7532, "loss": 0.2652590870857239, "lr": 1.2779579208586283e-05, "epoch": 0.8800956048333555, "percentage": 44.0, "elapsed_time": "11:59:23", "remaining_time": "15:15:38"} +{"current_steps": 3315, "total_steps": 7532, "loss": 0.3145690858364105, "lr": 1.2775361201162684e-05, "epoch": 0.8803611738148984, "percentage": 44.01, "elapsed_time": "11:59:36", "remaining_time": "15:15:24"} +{"current_steps": 3316, "total_steps": 7532, "loss": 0.25744086503982544, "lr": 1.2771142658682175e-05, "epoch": 0.8806267427964414, "percentage": 44.03, "elapsed_time": "11:59:50", "remaining_time": "15:15:12"} +{"current_steps": 3317, "total_steps": 7532, "loss": 0.3129793405532837, "lr": 1.2766923581958046e-05, "epoch": 0.8808923117779843, "percentage": 44.04, "elapsed_time": "12:00:02", "remaining_time": "15:14:59"} +{"current_steps": 3318, "total_steps": 7532, "loss": 0.233384907245636, "lr": 1.2762703971803684e-05, "epoch": 0.8811578807595273, "percentage": 44.05, "elapsed_time": "12:00:16", "remaining_time": "15:14:46"} +{"current_steps": 3319, "total_steps": 7532, "loss": 0.2422962635755539, "lr": 1.2758483829032579e-05, "epoch": 0.8814234497410702, "percentage": 44.07, "elapsed_time": "12:00:29", "remaining_time": "15:14:33"} +{"current_steps": 3320, "total_steps": 7532, "loss": 0.2801973819732666, "lr": 1.2754263154458328e-05, "epoch": 0.8816890187226132, "percentage": 44.08, "elapsed_time": "12:00:42", "remaining_time": "15:14:20"} +{"current_steps": 3321, "total_steps": 7532, "loss": 0.30659937858581543, "lr": 1.2750041948894621e-05, "epoch": 0.8819545877041561, "percentage": 44.09, "elapsed_time": "12:00:55", "remaining_time": "15:14:07"} +{"current_steps": 3322, "total_steps": 7532, "loss": 0.28527066111564636, "lr": 1.274582021315526e-05, "epoch": 0.8822201566856991, "percentage": 44.11, "elapsed_time": "12:01:08", "remaining_time": "15:13:53"} +{"current_steps": 3323, "total_steps": 7532, "loss": 0.23065675795078278, "lr": 1.2741597948054146e-05, "epoch": 0.882485725667242, "percentage": 44.12, "elapsed_time": "12:01:21", "remaining_time": "15:13:41"} +{"current_steps": 3324, "total_steps": 7532, "loss": 0.2727832794189453, "lr": 1.2737375154405283e-05, "epoch": 0.882751294648785, "percentage": 44.13, "elapsed_time": "12:01:34", "remaining_time": "15:13:27"} +{"current_steps": 3325, "total_steps": 7532, "loss": 0.26809507608413696, "lr": 1.273315183302277e-05, "epoch": 0.8830168636303279, "percentage": 44.14, "elapsed_time": "12:01:47", "remaining_time": "15:13:15"} +{"current_steps": 3326, "total_steps": 7532, "loss": 0.3250407576560974, "lr": 1.2728927984720823e-05, "epoch": 0.8832824326118709, "percentage": 44.16, "elapsed_time": "12:01:59", "remaining_time": "15:13:01"} +{"current_steps": 3327, "total_steps": 7532, "loss": 0.2651330232620239, "lr": 1.2724703610313742e-05, "epoch": 0.8835480015934138, "percentage": 44.17, "elapsed_time": "12:02:13", "remaining_time": "15:12:49"} +{"current_steps": 3328, "total_steps": 7532, "loss": 0.27337920665740967, "lr": 1.2720478710615944e-05, "epoch": 0.8838135705749568, "percentage": 44.18, "elapsed_time": "12:02:25", "remaining_time": "15:12:35"} +{"current_steps": 3329, "total_steps": 7532, "loss": 0.2664092183113098, "lr": 1.2716253286441935e-05, "epoch": 0.8840791395564997, "percentage": 44.2, "elapsed_time": "12:02:39", "remaining_time": "15:12:22"} +{"current_steps": 3330, "total_steps": 7532, "loss": 0.27927765250205994, "lr": 1.2712027338606323e-05, "epoch": 0.8843447085380428, "percentage": 44.21, "elapsed_time": "12:02:52", "remaining_time": "15:12:09"} +{"current_steps": 3331, "total_steps": 7532, "loss": 0.27241113781929016, "lr": 1.270780086792383e-05, "epoch": 0.8846102775195858, "percentage": 44.22, "elapsed_time": "12:03:05", "remaining_time": "15:11:56"} +{"current_steps": 3332, "total_steps": 7532, "loss": 0.28279373049736023, "lr": 1.2703573875209264e-05, "epoch": 0.8848758465011287, "percentage": 44.24, "elapsed_time": "12:03:18", "remaining_time": "15:11:43"} +{"current_steps": 3333, "total_steps": 7532, "loss": 0.3011108934879303, "lr": 1.2699346361277538e-05, "epoch": 0.8851414154826717, "percentage": 44.25, "elapsed_time": "12:03:31", "remaining_time": "15:11:31"} +{"current_steps": 3334, "total_steps": 7532, "loss": 0.3071288764476776, "lr": 1.2695118326943671e-05, "epoch": 0.8854069844642146, "percentage": 44.26, "elapsed_time": "12:03:44", "remaining_time": "15:11:17"} +{"current_steps": 3335, "total_steps": 7532, "loss": 0.2688761353492737, "lr": 1.2690889773022778e-05, "epoch": 0.8856725534457576, "percentage": 44.28, "elapsed_time": "12:03:57", "remaining_time": "15:11:04"} +{"current_steps": 3336, "total_steps": 7532, "loss": 0.2788669466972351, "lr": 1.2686660700330074e-05, "epoch": 0.8859381224273005, "percentage": 44.29, "elapsed_time": "12:04:10", "remaining_time": "15:10:51"} +{"current_steps": 3337, "total_steps": 7532, "loss": 0.2801516652107239, "lr": 1.268243110968087e-05, "epoch": 0.8862036914088435, "percentage": 44.3, "elapsed_time": "12:04:23", "remaining_time": "15:10:38"} +{"current_steps": 3338, "total_steps": 7532, "loss": 0.2876908779144287, "lr": 1.2678201001890587e-05, "epoch": 0.8864692603903864, "percentage": 44.32, "elapsed_time": "12:04:36", "remaining_time": "15:10:25"} +{"current_steps": 3339, "total_steps": 7532, "loss": 0.27709734439849854, "lr": 1.2673970377774733e-05, "epoch": 0.8867348293719294, "percentage": 44.33, "elapsed_time": "12:04:48", "remaining_time": "15:10:11"} +{"current_steps": 3340, "total_steps": 7532, "loss": 0.3191622793674469, "lr": 1.266973923814893e-05, "epoch": 0.8870003983534723, "percentage": 44.34, "elapsed_time": "12:05:02", "remaining_time": "15:09:59"} +{"current_steps": 3341, "total_steps": 7532, "loss": 0.2873385548591614, "lr": 1.2665507583828889e-05, "epoch": 0.8872659673350153, "percentage": 44.36, "elapsed_time": "12:05:15", "remaining_time": "15:09:46"} +{"current_steps": 3342, "total_steps": 7532, "loss": 0.2922922372817993, "lr": 1.2661275415630421e-05, "epoch": 0.8875315363165582, "percentage": 44.37, "elapsed_time": "12:05:28", "remaining_time": "15:09:33"} +{"current_steps": 3343, "total_steps": 7532, "loss": 0.305694043636322, "lr": 1.2657042734369443e-05, "epoch": 0.8877971052981012, "percentage": 44.38, "elapsed_time": "12:05:41", "remaining_time": "15:09:20"} +{"current_steps": 3344, "total_steps": 7532, "loss": 0.29108062386512756, "lr": 1.2652809540861958e-05, "epoch": 0.8880626742796441, "percentage": 44.4, "elapsed_time": "12:05:54", "remaining_time": "15:09:07"} +{"current_steps": 3345, "total_steps": 7532, "loss": 0.24170495569705963, "lr": 1.2648575835924084e-05, "epoch": 0.8883282432611871, "percentage": 44.41, "elapsed_time": "12:06:07", "remaining_time": "15:08:54"} +{"current_steps": 3346, "total_steps": 7532, "loss": 0.2987719476222992, "lr": 1.2644341620372025e-05, "epoch": 0.88859381224273, "percentage": 44.42, "elapsed_time": "12:06:21", "remaining_time": "15:08:42"} +{"current_steps": 3347, "total_steps": 7532, "loss": 0.21037599444389343, "lr": 1.2640106895022088e-05, "epoch": 0.888859381224273, "percentage": 44.44, "elapsed_time": "12:06:33", "remaining_time": "15:08:28"} +{"current_steps": 3348, "total_steps": 7532, "loss": 0.25263655185699463, "lr": 1.2635871660690677e-05, "epoch": 0.889124950205816, "percentage": 44.45, "elapsed_time": "12:06:47", "remaining_time": "15:08:15"} +{"current_steps": 3349, "total_steps": 7532, "loss": 0.30169543623924255, "lr": 1.2631635918194301e-05, "epoch": 0.8893905191873589, "percentage": 44.46, "elapsed_time": "12:06:59", "remaining_time": "15:08:02"} +{"current_steps": 3350, "total_steps": 7532, "loss": 0.26982420682907104, "lr": 1.2627399668349554e-05, "epoch": 0.8896560881689018, "percentage": 44.48, "elapsed_time": "12:07:13", "remaining_time": "15:07:49"} +{"current_steps": 3351, "total_steps": 7532, "loss": 0.3281899690628052, "lr": 1.262316291197314e-05, "epoch": 0.8899216571504448, "percentage": 44.49, "elapsed_time": "12:07:25", "remaining_time": "15:07:36"} +{"current_steps": 3352, "total_steps": 7532, "loss": 0.30140435695648193, "lr": 1.2618925649881852e-05, "epoch": 0.8901872261319878, "percentage": 44.5, "elapsed_time": "12:07:38", "remaining_time": "15:07:23"} +{"current_steps": 3353, "total_steps": 7532, "loss": 0.22343885898590088, "lr": 1.261468788289259e-05, "epoch": 0.8904527951135307, "percentage": 44.52, "elapsed_time": "12:07:52", "remaining_time": "15:07:10"} +{"current_steps": 3354, "total_steps": 7532, "loss": 0.2889901399612427, "lr": 1.261044961182234e-05, "epoch": 0.8907183640950737, "percentage": 44.53, "elapsed_time": "12:08:04", "remaining_time": "15:06:57"} +{"current_steps": 3355, "total_steps": 7532, "loss": 0.27896153926849365, "lr": 1.260621083748819e-05, "epoch": 0.8909839330766166, "percentage": 44.54, "elapsed_time": "12:08:17", "remaining_time": "15:06:44"} +{"current_steps": 3356, "total_steps": 7532, "loss": 0.29390811920166016, "lr": 1.2601971560707328e-05, "epoch": 0.8912495020581596, "percentage": 44.56, "elapsed_time": "12:08:30", "remaining_time": "15:06:30"} +{"current_steps": 3357, "total_steps": 7532, "loss": 0.2872384190559387, "lr": 1.2597731782297036e-05, "epoch": 0.8915150710397025, "percentage": 44.57, "elapsed_time": "12:08:43", "remaining_time": "15:06:18"} +{"current_steps": 3358, "total_steps": 7532, "loss": 0.29753726720809937, "lr": 1.2593491503074698e-05, "epoch": 0.8917806400212455, "percentage": 44.58, "elapsed_time": "12:08:56", "remaining_time": "15:06:04"} +{"current_steps": 3359, "total_steps": 7532, "loss": 0.31631946563720703, "lr": 1.2589250723857782e-05, "epoch": 0.8920462090027885, "percentage": 44.6, "elapsed_time": "12:09:10", "remaining_time": "15:05:52"} +{"current_steps": 3360, "total_steps": 7532, "loss": 0.2932048738002777, "lr": 1.2585009445463867e-05, "epoch": 0.8923117779843315, "percentage": 44.61, "elapsed_time": "12:09:22", "remaining_time": "15:05:38"} +{"current_steps": 3361, "total_steps": 7532, "loss": 0.2902034521102905, "lr": 1.2580767668710614e-05, "epoch": 0.8925773469658744, "percentage": 44.62, "elapsed_time": "12:09:36", "remaining_time": "15:05:26"} +{"current_steps": 3362, "total_steps": 7532, "loss": 0.2596299648284912, "lr": 1.2576525394415795e-05, "epoch": 0.8928429159474174, "percentage": 44.64, "elapsed_time": "12:09:49", "remaining_time": "15:05:13"} +{"current_steps": 3363, "total_steps": 7532, "loss": 0.29102641344070435, "lr": 1.2572282623397268e-05, "epoch": 0.8931084849289603, "percentage": 44.65, "elapsed_time": "12:10:02", "remaining_time": "15:05:00"} +{"current_steps": 3364, "total_steps": 7532, "loss": 0.2970406711101532, "lr": 1.2568039356472985e-05, "epoch": 0.8933740539105033, "percentage": 44.66, "elapsed_time": "12:10:15", "remaining_time": "15:04:46"} +{"current_steps": 3365, "total_steps": 7532, "loss": 0.2916618585586548, "lr": 1.2563795594461003e-05, "epoch": 0.8936396228920462, "percentage": 44.68, "elapsed_time": "12:10:28", "remaining_time": "15:04:34"} +{"current_steps": 3366, "total_steps": 7532, "loss": 0.3217374086380005, "lr": 1.2559551338179468e-05, "epoch": 0.8939051918735892, "percentage": 44.69, "elapsed_time": "12:10:40", "remaining_time": "15:04:20"} +{"current_steps": 3367, "total_steps": 7532, "loss": 0.3000059425830841, "lr": 1.255530658844662e-05, "epoch": 0.8941707608551321, "percentage": 44.7, "elapsed_time": "12:10:54", "remaining_time": "15:04:07"} +{"current_steps": 3368, "total_steps": 7532, "loss": 0.2848728895187378, "lr": 1.2551061346080804e-05, "epoch": 0.8944363298366751, "percentage": 44.72, "elapsed_time": "12:11:07", "remaining_time": "15:03:54"} +{"current_steps": 3369, "total_steps": 7532, "loss": 0.3328903317451477, "lr": 1.2546815611900442e-05, "epoch": 0.894701898818218, "percentage": 44.73, "elapsed_time": "12:11:20", "remaining_time": "15:03:42"} +{"current_steps": 3370, "total_steps": 7532, "loss": 0.2920045256614685, "lr": 1.2542569386724069e-05, "epoch": 0.894967467799761, "percentage": 44.74, "elapsed_time": "12:11:33", "remaining_time": "15:03:28"} +{"current_steps": 3371, "total_steps": 7532, "loss": 0.30370092391967773, "lr": 1.2538322671370305e-05, "epoch": 0.895233036781304, "percentage": 44.76, "elapsed_time": "12:11:46", "remaining_time": "15:03:15"} +{"current_steps": 3372, "total_steps": 7532, "loss": 0.24454624950885773, "lr": 1.2534075466657866e-05, "epoch": 0.8954986057628469, "percentage": 44.77, "elapsed_time": "12:11:59", "remaining_time": "15:03:03"} +{"current_steps": 3373, "total_steps": 7532, "loss": 0.30908581614494324, "lr": 1.2529827773405566e-05, "epoch": 0.8957641747443899, "percentage": 44.78, "elapsed_time": "12:12:12", "remaining_time": "15:02:49"} +{"current_steps": 3374, "total_steps": 7532, "loss": 0.2792360782623291, "lr": 1.2525579592432304e-05, "epoch": 0.8960297437259328, "percentage": 44.8, "elapsed_time": "12:12:26", "remaining_time": "15:02:37"} +{"current_steps": 3375, "total_steps": 7532, "loss": 0.285555362701416, "lr": 1.2521330924557087e-05, "epoch": 0.8962953127074758, "percentage": 44.81, "elapsed_time": "12:12:38", "remaining_time": "15:02:24"} +{"current_steps": 3376, "total_steps": 7532, "loss": 0.3159451484680176, "lr": 1.2517081770599002e-05, "epoch": 0.8965608816890187, "percentage": 44.82, "elapsed_time": "12:12:52", "remaining_time": "15:02:11"} +{"current_steps": 3377, "total_steps": 7532, "loss": 0.35929200053215027, "lr": 1.2512832131377237e-05, "epoch": 0.8968264506705617, "percentage": 44.84, "elapsed_time": "12:13:05", "remaining_time": "15:01:58"} +{"current_steps": 3378, "total_steps": 7532, "loss": 0.28624874353408813, "lr": 1.2508582007711074e-05, "epoch": 0.8970920196521046, "percentage": 44.85, "elapsed_time": "12:13:18", "remaining_time": "15:01:45"} +{"current_steps": 3379, "total_steps": 7532, "loss": 0.27670109272003174, "lr": 1.2504331400419884e-05, "epoch": 0.8973575886336476, "percentage": 44.86, "elapsed_time": "12:13:31", "remaining_time": "15:01:32"} +{"current_steps": 3380, "total_steps": 7532, "loss": 0.2894589304924011, "lr": 1.2500080310323139e-05, "epoch": 0.8976231576151905, "percentage": 44.88, "elapsed_time": "12:13:44", "remaining_time": "15:01:19"} +{"current_steps": 3381, "total_steps": 7532, "loss": 0.31378716230392456, "lr": 1.2495828738240396e-05, "epoch": 0.8978887265967335, "percentage": 44.89, "elapsed_time": "12:13:57", "remaining_time": "15:01:06"} +{"current_steps": 3382, "total_steps": 7532, "loss": 0.33676713705062866, "lr": 1.2491576684991306e-05, "epoch": 0.8981542955782764, "percentage": 44.9, "elapsed_time": "12:14:10", "remaining_time": "15:00:53"} +{"current_steps": 3383, "total_steps": 7532, "loss": 0.2875351011753082, "lr": 1.2487324151395618e-05, "epoch": 0.8984198645598194, "percentage": 44.92, "elapsed_time": "12:14:23", "remaining_time": "15:00:40"} +{"current_steps": 3384, "total_steps": 7532, "loss": 0.29729989171028137, "lr": 1.2483071138273168e-05, "epoch": 0.8986854335413623, "percentage": 44.93, "elapsed_time": "12:14:36", "remaining_time": "15:00:27"} +{"current_steps": 3385, "total_steps": 7532, "loss": 0.3227398991584778, "lr": 1.2478817646443888e-05, "epoch": 0.8989510025229053, "percentage": 44.94, "elapsed_time": "12:14:49", "remaining_time": "15:00:15"} +{"current_steps": 3386, "total_steps": 7532, "loss": 0.2664690315723419, "lr": 1.2474563676727803e-05, "epoch": 0.8992165715044482, "percentage": 44.95, "elapsed_time": "12:15:02", "remaining_time": "15:00:01"} +{"current_steps": 3387, "total_steps": 7532, "loss": 0.29543352127075195, "lr": 1.2470309229945021e-05, "epoch": 0.8994821404859913, "percentage": 44.97, "elapsed_time": "12:15:15", "remaining_time": "14:59:48"} +{"current_steps": 3388, "total_steps": 7532, "loss": 0.26658856868743896, "lr": 1.2466054306915756e-05, "epoch": 0.8997477094675342, "percentage": 44.98, "elapsed_time": "12:15:28", "remaining_time": "14:59:35"} +{"current_steps": 3389, "total_steps": 7532, "loss": 0.2899627387523651, "lr": 1.2461798908460305e-05, "epoch": 0.9000132784490772, "percentage": 44.99, "elapsed_time": "12:15:42", "remaining_time": "14:59:23"} +{"current_steps": 3390, "total_steps": 7532, "loss": 0.2708336114883423, "lr": 1.245754303539906e-05, "epoch": 0.9002788474306201, "percentage": 45.01, "elapsed_time": "12:15:54", "remaining_time": "14:59:09"} +{"current_steps": 3391, "total_steps": 7532, "loss": 0.28124746680259705, "lr": 1.2453286688552502e-05, "epoch": 0.9005444164121631, "percentage": 45.02, "elapsed_time": "12:16:08", "remaining_time": "14:58:57"} +{"current_steps": 3392, "total_steps": 7532, "loss": 0.2599399983882904, "lr": 1.2449029868741202e-05, "epoch": 0.900809985393706, "percentage": 45.03, "elapsed_time": "12:16:21", "remaining_time": "14:58:44"} +{"current_steps": 3393, "total_steps": 7532, "loss": 0.25035667419433594, "lr": 1.2444772576785828e-05, "epoch": 0.901075554375249, "percentage": 45.05, "elapsed_time": "12:16:33", "remaining_time": "14:58:30"} +{"current_steps": 3394, "total_steps": 7532, "loss": 0.2772521376609802, "lr": 1.2440514813507136e-05, "epoch": 0.901341123356792, "percentage": 45.06, "elapsed_time": "12:16:47", "remaining_time": "14:58:17"} +{"current_steps": 3395, "total_steps": 7532, "loss": 0.3282839357852936, "lr": 1.2436256579725969e-05, "epoch": 0.9016066923383349, "percentage": 45.07, "elapsed_time": "12:16:59", "remaining_time": "14:58:04"} +{"current_steps": 3396, "total_steps": 7532, "loss": 0.2507914900779724, "lr": 1.2431997876263269e-05, "epoch": 0.9018722613198779, "percentage": 45.09, "elapsed_time": "12:17:13", "remaining_time": "14:57:51"} +{"current_steps": 3397, "total_steps": 7532, "loss": 0.2620914876461029, "lr": 1.2427738703940055e-05, "epoch": 0.9021378303014208, "percentage": 45.1, "elapsed_time": "12:17:25", "remaining_time": "14:57:38"} +{"current_steps": 3398, "total_steps": 7532, "loss": 0.26561641693115234, "lr": 1.2423479063577458e-05, "epoch": 0.9024033992829638, "percentage": 45.11, "elapsed_time": "12:17:39", "remaining_time": "14:57:25"} +{"current_steps": 3399, "total_steps": 7532, "loss": 0.2998678386211395, "lr": 1.2419218955996677e-05, "epoch": 0.9026689682645067, "percentage": 45.13, "elapsed_time": "12:17:52", "remaining_time": "14:57:12"} +{"current_steps": 3400, "total_steps": 7532, "loss": 0.2368398755788803, "lr": 1.2414958382019017e-05, "epoch": 0.9029345372460497, "percentage": 45.14, "elapsed_time": "12:18:05", "remaining_time": "14:56:59"} +{"current_steps": 3401, "total_steps": 7532, "loss": 0.2623558044433594, "lr": 1.241069734246586e-05, "epoch": 0.9032001062275926, "percentage": 45.15, "elapsed_time": "12:18:24", "remaining_time": "14:56:53"} +{"current_steps": 3402, "total_steps": 7532, "loss": 0.2693074941635132, "lr": 1.2406435838158686e-05, "epoch": 0.9034656752091356, "percentage": 45.17, "elapsed_time": "12:18:37", "remaining_time": "14:56:40"} +{"current_steps": 3403, "total_steps": 7532, "loss": 0.2933652698993683, "lr": 1.2402173869919063e-05, "epoch": 0.9037312441906785, "percentage": 45.18, "elapsed_time": "12:18:49", "remaining_time": "14:56:27"} +{"current_steps": 3404, "total_steps": 7532, "loss": 0.28515487909317017, "lr": 1.2397911438568651e-05, "epoch": 0.9039968131722215, "percentage": 45.19, "elapsed_time": "12:19:03", "remaining_time": "14:56:14"} +{"current_steps": 3405, "total_steps": 7532, "loss": 0.282942533493042, "lr": 1.2393648544929193e-05, "epoch": 0.9042623821537644, "percentage": 45.21, "elapsed_time": "12:19:15", "remaining_time": "14:56:00"} +{"current_steps": 3406, "total_steps": 7532, "loss": 0.28300392627716064, "lr": 1.2389385189822526e-05, "epoch": 0.9045279511353074, "percentage": 45.22, "elapsed_time": "12:19:28", "remaining_time": "14:55:48"} +{"current_steps": 3407, "total_steps": 7532, "loss": 0.25697019696235657, "lr": 1.2385121374070577e-05, "epoch": 0.9047935201168503, "percentage": 45.23, "elapsed_time": "12:19:41", "remaining_time": "14:55:34"} +{"current_steps": 3408, "total_steps": 7532, "loss": 0.31156057119369507, "lr": 1.2380857098495355e-05, "epoch": 0.9050590890983933, "percentage": 45.25, "elapsed_time": "12:19:54", "remaining_time": "14:55:21"} +{"current_steps": 3409, "total_steps": 7532, "loss": 0.2943422794342041, "lr": 1.2376592363918967e-05, "epoch": 0.9053246580799362, "percentage": 45.26, "elapsed_time": "12:20:06", "remaining_time": "14:55:07"} +{"current_steps": 3410, "total_steps": 7532, "loss": 0.2792074680328369, "lr": 1.2372327171163596e-05, "epoch": 0.9055902270614792, "percentage": 45.27, "elapsed_time": "12:20:20", "remaining_time": "14:54:55"} +{"current_steps": 3411, "total_steps": 7532, "loss": 0.2547443211078644, "lr": 1.2368061521051526e-05, "epoch": 0.9058557960430221, "percentage": 45.29, "elapsed_time": "12:20:32", "remaining_time": "14:54:41"} +{"current_steps": 3412, "total_steps": 7532, "loss": 0.25637373328208923, "lr": 1.2363795414405125e-05, "epoch": 0.9061213650245651, "percentage": 45.3, "elapsed_time": "12:20:45", "remaining_time": "14:54:28"} +{"current_steps": 3413, "total_steps": 7532, "loss": 0.3269123435020447, "lr": 1.2359528852046844e-05, "epoch": 0.906386934006108, "percentage": 45.31, "elapsed_time": "12:20:58", "remaining_time": "14:54:14"} +{"current_steps": 3414, "total_steps": 7532, "loss": 0.28538423776626587, "lr": 1.2355261834799232e-05, "epoch": 0.906652502987651, "percentage": 45.33, "elapsed_time": "12:21:10", "remaining_time": "14:54:01"} +{"current_steps": 3415, "total_steps": 7532, "loss": 0.2961096167564392, "lr": 1.2350994363484915e-05, "epoch": 0.906918071969194, "percentage": 45.34, "elapsed_time": "12:21:23", "remaining_time": "14:53:48"} +{"current_steps": 3416, "total_steps": 7532, "loss": 0.3134537935256958, "lr": 1.2346726438926613e-05, "epoch": 0.907183640950737, "percentage": 45.35, "elapsed_time": "12:21:36", "remaining_time": "14:53:34"} +{"current_steps": 3417, "total_steps": 7532, "loss": 0.2614031434059143, "lr": 1.2342458061947129e-05, "epoch": 0.90744920993228, "percentage": 45.37, "elapsed_time": "12:21:49", "remaining_time": "14:53:21"} +{"current_steps": 3418, "total_steps": 7532, "loss": 0.27166056632995605, "lr": 1.2338189233369357e-05, "epoch": 0.9077147789138229, "percentage": 45.38, "elapsed_time": "12:22:01", "remaining_time": "14:53:07"} +{"current_steps": 3419, "total_steps": 7532, "loss": 0.26053497195243835, "lr": 1.2333919954016277e-05, "epoch": 0.9079803478953659, "percentage": 45.39, "elapsed_time": "12:22:14", "remaining_time": "14:52:54"} +{"current_steps": 3420, "total_steps": 7532, "loss": 0.3109636902809143, "lr": 1.2329650224710956e-05, "epoch": 0.9082459168769088, "percentage": 45.41, "elapsed_time": "12:22:26", "remaining_time": "14:52:40"} +{"current_steps": 3421, "total_steps": 7532, "loss": 0.2576507329940796, "lr": 1.232538004627655e-05, "epoch": 0.9085114858584518, "percentage": 45.42, "elapsed_time": "12:22:39", "remaining_time": "14:52:27"} +{"current_steps": 3422, "total_steps": 7532, "loss": 0.2525216341018677, "lr": 1.2321109419536292e-05, "epoch": 0.9087770548399947, "percentage": 45.43, "elapsed_time": "12:22:52", "remaining_time": "14:52:13"} +{"current_steps": 3423, "total_steps": 7532, "loss": 0.2483336180448532, "lr": 1.2316838345313517e-05, "epoch": 0.9090426238215377, "percentage": 45.45, "elapsed_time": "12:23:05", "remaining_time": "14:52:00"} +{"current_steps": 3424, "total_steps": 7532, "loss": 0.26372796297073364, "lr": 1.2312566824431631e-05, "epoch": 0.9093081928030806, "percentage": 45.46, "elapsed_time": "12:23:18", "remaining_time": "14:51:47"} +{"current_steps": 3425, "total_steps": 7532, "loss": 0.2933644950389862, "lr": 1.2308294857714138e-05, "epoch": 0.9095737617846236, "percentage": 45.47, "elapsed_time": "12:23:31", "remaining_time": "14:51:34"} +{"current_steps": 3426, "total_steps": 7532, "loss": 0.2543371915817261, "lr": 1.2304022445984618e-05, "epoch": 0.9098393307661665, "percentage": 45.49, "elapsed_time": "12:23:43", "remaining_time": "14:51:20"} +{"current_steps": 3427, "total_steps": 7532, "loss": 0.29246431589126587, "lr": 1.2299749590066745e-05, "epoch": 0.9101048997477095, "percentage": 45.5, "elapsed_time": "12:23:56", "remaining_time": "14:51:07"} +{"current_steps": 3428, "total_steps": 7532, "loss": 0.2475431263446808, "lr": 1.2295476290784273e-05, "epoch": 0.9103704687292524, "percentage": 45.51, "elapsed_time": "12:24:09", "remaining_time": "14:50:53"} +{"current_steps": 3429, "total_steps": 7532, "loss": 0.3312363624572754, "lr": 1.2291202548961042e-05, "epoch": 0.9106360377107954, "percentage": 45.53, "elapsed_time": "12:24:22", "remaining_time": "14:50:41"} +{"current_steps": 3430, "total_steps": 7532, "loss": 0.25192639231681824, "lr": 1.2286928365420987e-05, "epoch": 0.9109016066923383, "percentage": 45.54, "elapsed_time": "12:24:35", "remaining_time": "14:50:28"} +{"current_steps": 3431, "total_steps": 7532, "loss": 0.23189345002174377, "lr": 1.2282653740988114e-05, "epoch": 0.9111671756738813, "percentage": 45.55, "elapsed_time": "12:24:48", "remaining_time": "14:50:14"} +{"current_steps": 3432, "total_steps": 7532, "loss": 0.2888398766517639, "lr": 1.2278378676486522e-05, "epoch": 0.9114327446554242, "percentage": 45.57, "elapsed_time": "12:25:01", "remaining_time": "14:50:02"} +{"current_steps": 3433, "total_steps": 7532, "loss": 0.2857785224914551, "lr": 1.2274103172740387e-05, "epoch": 0.9116983136369672, "percentage": 45.58, "elapsed_time": "12:25:14", "remaining_time": "14:49:48"} +{"current_steps": 3434, "total_steps": 7532, "loss": 0.23961025476455688, "lr": 1.2269827230573986e-05, "epoch": 0.9119638826185101, "percentage": 45.59, "elapsed_time": "12:25:27", "remaining_time": "14:49:35"} +{"current_steps": 3435, "total_steps": 7532, "loss": 0.2791004478931427, "lr": 1.2265550850811663e-05, "epoch": 0.9122294516000531, "percentage": 45.61, "elapsed_time": "12:25:39", "remaining_time": "14:49:21"} +{"current_steps": 3436, "total_steps": 7532, "loss": 0.2875480651855469, "lr": 1.2261274034277858e-05, "epoch": 0.912495020581596, "percentage": 45.62, "elapsed_time": "12:25:52", "remaining_time": "14:49:08"} +{"current_steps": 3437, "total_steps": 7532, "loss": 0.29422929883003235, "lr": 1.2256996781797086e-05, "epoch": 0.912760589563139, "percentage": 45.63, "elapsed_time": "12:26:04", "remaining_time": "14:48:55"} +{"current_steps": 3438, "total_steps": 7532, "loss": 0.27114444971084595, "lr": 1.225271909419395e-05, "epoch": 0.9130261585446819, "percentage": 45.65, "elapsed_time": "12:26:17", "remaining_time": "14:48:41"} +{"current_steps": 3439, "total_steps": 7532, "loss": 0.3007166385650635, "lr": 1.2248440972293146e-05, "epoch": 0.9132917275262249, "percentage": 45.66, "elapsed_time": "12:26:30", "remaining_time": "14:48:28"} +{"current_steps": 3440, "total_steps": 7532, "loss": 0.28550055623054504, "lr": 1.224416241691944e-05, "epoch": 0.9135572965077678, "percentage": 45.67, "elapsed_time": "12:26:42", "remaining_time": "14:48:14"} +{"current_steps": 3441, "total_steps": 7532, "loss": 0.2861761450767517, "lr": 1.2239883428897687e-05, "epoch": 0.9138228654893108, "percentage": 45.69, "elapsed_time": "12:26:55", "remaining_time": "14:48:00"} +{"current_steps": 3442, "total_steps": 7532, "loss": 0.3288506865501404, "lr": 1.2235604009052823e-05, "epoch": 0.9140884344708538, "percentage": 45.7, "elapsed_time": "12:27:08", "remaining_time": "14:47:47"} +{"current_steps": 3443, "total_steps": 7532, "loss": 0.33189019560813904, "lr": 1.2231324158209876e-05, "epoch": 0.9143540034523968, "percentage": 45.71, "elapsed_time": "12:27:20", "remaining_time": "14:47:34"} +{"current_steps": 3444, "total_steps": 7532, "loss": 0.20846885442733765, "lr": 1.2227043877193947e-05, "epoch": 0.9146195724339398, "percentage": 45.72, "elapsed_time": "12:27:33", "remaining_time": "14:47:21"} +{"current_steps": 3445, "total_steps": 7532, "loss": 0.25184741616249084, "lr": 1.2222763166830223e-05, "epoch": 0.9148851414154827, "percentage": 45.74, "elapsed_time": "12:27:46", "remaining_time": "14:47:08"} +{"current_steps": 3446, "total_steps": 7532, "loss": 0.2954701781272888, "lr": 1.2218482027943977e-05, "epoch": 0.9151507103970257, "percentage": 45.75, "elapsed_time": "12:28:00", "remaining_time": "14:46:55"} +{"current_steps": 3447, "total_steps": 7532, "loss": 0.263336718082428, "lr": 1.221420046136056e-05, "epoch": 0.9154162793785686, "percentage": 45.76, "elapsed_time": "12:28:12", "remaining_time": "14:46:41"} +{"current_steps": 3448, "total_steps": 7532, "loss": 0.31178128719329834, "lr": 1.2209918467905405e-05, "epoch": 0.9156818483601116, "percentage": 45.78, "elapsed_time": "12:28:25", "remaining_time": "14:46:28"} +{"current_steps": 3449, "total_steps": 7532, "loss": 0.30373090505599976, "lr": 1.2205636048404037e-05, "epoch": 0.9159474173416545, "percentage": 45.79, "elapsed_time": "12:28:38", "remaining_time": "14:46:15"} +{"current_steps": 3450, "total_steps": 7532, "loss": 0.31057459115982056, "lr": 1.2201353203682052e-05, "epoch": 0.9162129863231975, "percentage": 45.8, "elapsed_time": "12:28:51", "remaining_time": "14:46:01"} +{"current_steps": 3451, "total_steps": 7532, "loss": 0.26834744215011597, "lr": 1.2197069934565126e-05, "epoch": 0.9164785553047404, "percentage": 45.82, "elapsed_time": "12:29:03", "remaining_time": "14:45:48"} +{"current_steps": 3452, "total_steps": 7532, "loss": 0.30224066972732544, "lr": 1.2192786241879033e-05, "epoch": 0.9167441242862834, "percentage": 45.83, "elapsed_time": "12:29:16", "remaining_time": "14:45:34"} +{"current_steps": 3453, "total_steps": 7532, "loss": 0.28249508142471313, "lr": 1.2188502126449616e-05, "epoch": 0.9170096932678263, "percentage": 45.84, "elapsed_time": "12:29:29", "remaining_time": "14:45:21"} +{"current_steps": 3454, "total_steps": 7532, "loss": 0.24823793768882751, "lr": 1.2184217589102798e-05, "epoch": 0.9172752622493693, "percentage": 45.86, "elapsed_time": "12:29:41", "remaining_time": "14:45:07"} +{"current_steps": 3455, "total_steps": 7532, "loss": 0.32556289434432983, "lr": 1.2179932630664589e-05, "epoch": 0.9175408312309122, "percentage": 45.87, "elapsed_time": "12:29:54", "remaining_time": "14:44:54"} +{"current_steps": 3456, "total_steps": 7532, "loss": 0.29420584440231323, "lr": 1.217564725196108e-05, "epoch": 0.9178064002124552, "percentage": 45.88, "elapsed_time": "12:30:06", "remaining_time": "14:44:40"} +{"current_steps": 3457, "total_steps": 7532, "loss": 0.29294469952583313, "lr": 1.2171361453818437e-05, "epoch": 0.9180719691939981, "percentage": 45.9, "elapsed_time": "12:30:19", "remaining_time": "14:44:27"} +{"current_steps": 3458, "total_steps": 7532, "loss": 0.3173823952674866, "lr": 1.2167075237062918e-05, "epoch": 0.9183375381755411, "percentage": 45.91, "elapsed_time": "12:30:31", "remaining_time": "14:44:13"} +{"current_steps": 3459, "total_steps": 7532, "loss": 0.32950159907341003, "lr": 1.2162788602520851e-05, "epoch": 0.918603107157084, "percentage": 45.92, "elapsed_time": "12:30:44", "remaining_time": "14:44:00"} +{"current_steps": 3460, "total_steps": 7532, "loss": 0.3011544942855835, "lr": 1.2158501551018647e-05, "epoch": 0.918868676138627, "percentage": 45.94, "elapsed_time": "12:30:57", "remaining_time": "14:43:46"} +{"current_steps": 3461, "total_steps": 7532, "loss": 0.25775954127311707, "lr": 1.2154214083382802e-05, "epoch": 0.91913424512017, "percentage": 45.95, "elapsed_time": "12:31:09", "remaining_time": "14:43:33"} +{"current_steps": 3462, "total_steps": 7532, "loss": 0.286748468875885, "lr": 1.214992620043989e-05, "epoch": 0.9193998141017129, "percentage": 45.96, "elapsed_time": "12:31:22", "remaining_time": "14:43:19"} +{"current_steps": 3463, "total_steps": 7532, "loss": 0.30588221549987793, "lr": 1.214563790301656e-05, "epoch": 0.9196653830832558, "percentage": 45.98, "elapsed_time": "12:31:34", "remaining_time": "14:43:06"} +{"current_steps": 3464, "total_steps": 7532, "loss": 0.23506608605384827, "lr": 1.214134919193955e-05, "epoch": 0.9199309520647988, "percentage": 45.99, "elapsed_time": "12:31:48", "remaining_time": "14:42:53"} +{"current_steps": 3465, "total_steps": 7532, "loss": 0.2612350285053253, "lr": 1.2137060068035672e-05, "epoch": 0.9201965210463418, "percentage": 46.0, "elapsed_time": "12:32:00", "remaining_time": "14:42:39"} +{"current_steps": 3466, "total_steps": 7532, "loss": 0.3268318772315979, "lr": 1.2132770532131815e-05, "epoch": 0.9204620900278847, "percentage": 46.02, "elapsed_time": "12:32:14", "remaining_time": "14:42:27"} +{"current_steps": 3467, "total_steps": 7532, "loss": 0.2970179319381714, "lr": 1.2128480585054951e-05, "epoch": 0.9207276590094277, "percentage": 46.03, "elapsed_time": "12:32:26", "remaining_time": "14:42:13"} +{"current_steps": 3468, "total_steps": 7532, "loss": 0.2910206615924835, "lr": 1.2124190227632138e-05, "epoch": 0.9209932279909706, "percentage": 46.04, "elapsed_time": "12:32:40", "remaining_time": "14:42:01"} +{"current_steps": 3469, "total_steps": 7532, "loss": 0.3000222444534302, "lr": 1.2119899460690496e-05, "epoch": 0.9212587969725136, "percentage": 46.06, "elapsed_time": "12:32:53", "remaining_time": "14:41:48"} +{"current_steps": 3470, "total_steps": 7532, "loss": 0.30304765701293945, "lr": 1.2115608285057242e-05, "epoch": 0.9215243659540565, "percentage": 46.07, "elapsed_time": "12:33:06", "remaining_time": "14:41:35"} +{"current_steps": 3471, "total_steps": 7532, "loss": 0.26393038034439087, "lr": 1.2111316701559663e-05, "epoch": 0.9217899349355996, "percentage": 46.08, "elapsed_time": "12:33:19", "remaining_time": "14:41:22"} +{"current_steps": 3472, "total_steps": 7532, "loss": 0.3111063838005066, "lr": 1.2107024711025128e-05, "epoch": 0.9220555039171425, "percentage": 46.1, "elapsed_time": "12:33:32", "remaining_time": "14:41:09"} +{"current_steps": 3473, "total_steps": 7532, "loss": 0.2897321581840515, "lr": 1.2102732314281073e-05, "epoch": 0.9223210728986855, "percentage": 46.11, "elapsed_time": "12:33:45", "remaining_time": "14:40:56"} +{"current_steps": 3474, "total_steps": 7532, "loss": 0.2835896611213684, "lr": 1.2098439512155028e-05, "epoch": 0.9225866418802284, "percentage": 46.12, "elapsed_time": "12:33:58", "remaining_time": "14:40:43"} +{"current_steps": 3475, "total_steps": 7532, "loss": 0.27648821473121643, "lr": 1.2094146305474596e-05, "epoch": 0.9228522108617714, "percentage": 46.14, "elapsed_time": "12:34:11", "remaining_time": "14:40:30"} +{"current_steps": 3476, "total_steps": 7532, "loss": 0.2528097629547119, "lr": 1.2089852695067457e-05, "epoch": 0.9231177798433143, "percentage": 46.15, "elapsed_time": "12:34:24", "remaining_time": "14:40:17"} +{"current_steps": 3477, "total_steps": 7532, "loss": 0.2750067412853241, "lr": 1.2085558681761361e-05, "epoch": 0.9233833488248573, "percentage": 46.16, "elapsed_time": "12:34:37", "remaining_time": "14:40:04"} +{"current_steps": 3478, "total_steps": 7532, "loss": 0.3115938901901245, "lr": 1.2081264266384148e-05, "epoch": 0.9236489178064002, "percentage": 46.18, "elapsed_time": "12:34:50", "remaining_time": "14:39:51"} +{"current_steps": 3479, "total_steps": 7532, "loss": 0.2858419418334961, "lr": 1.2076969449763734e-05, "epoch": 0.9239144867879432, "percentage": 46.19, "elapsed_time": "12:35:03", "remaining_time": "14:39:37"} +{"current_steps": 3480, "total_steps": 7532, "loss": 0.24990032613277435, "lr": 1.2072674232728105e-05, "epoch": 0.9241800557694861, "percentage": 46.2, "elapsed_time": "12:35:15", "remaining_time": "14:39:24"} +{"current_steps": 3481, "total_steps": 7532, "loss": 0.23106999695301056, "lr": 1.206837861610533e-05, "epoch": 0.9244456247510291, "percentage": 46.22, "elapsed_time": "12:35:28", "remaining_time": "14:39:11"} +{"current_steps": 3482, "total_steps": 7532, "loss": 0.2737967371940613, "lr": 1.2064082600723546e-05, "epoch": 0.924711193732572, "percentage": 46.23, "elapsed_time": "12:35:41", "remaining_time": "14:38:58"} +{"current_steps": 3483, "total_steps": 7532, "loss": 0.2810317873954773, "lr": 1.2059786187410984e-05, "epoch": 0.924976762714115, "percentage": 46.24, "elapsed_time": "12:35:54", "remaining_time": "14:38:45"} +{"current_steps": 3484, "total_steps": 7532, "loss": 0.30852559208869934, "lr": 1.2055489376995938e-05, "epoch": 0.925242331695658, "percentage": 46.26, "elapsed_time": "12:36:07", "remaining_time": "14:38:31"} +{"current_steps": 3485, "total_steps": 7532, "loss": 0.2956348657608032, "lr": 1.2051192170306784e-05, "epoch": 0.9255079006772009, "percentage": 46.27, "elapsed_time": "12:36:20", "remaining_time": "14:38:18"} +{"current_steps": 3486, "total_steps": 7532, "loss": 0.2825953960418701, "lr": 1.204689456817197e-05, "epoch": 0.9257734696587439, "percentage": 46.28, "elapsed_time": "12:36:33", "remaining_time": "14:38:05"} +{"current_steps": 3487, "total_steps": 7532, "loss": 0.3351168632507324, "lr": 1.2042596571420025e-05, "epoch": 0.9260390386402868, "percentage": 46.3, "elapsed_time": "12:36:46", "remaining_time": "14:37:52"} +{"current_steps": 3488, "total_steps": 7532, "loss": 0.2718926668167114, "lr": 1.2038298180879548e-05, "epoch": 0.9263046076218298, "percentage": 46.31, "elapsed_time": "12:36:58", "remaining_time": "14:37:38"} +{"current_steps": 3489, "total_steps": 7532, "loss": 0.29036587476730347, "lr": 1.2033999397379223e-05, "epoch": 0.9265701766033727, "percentage": 46.32, "elapsed_time": "12:37:12", "remaining_time": "14:37:26"} +{"current_steps": 3490, "total_steps": 7532, "loss": 0.22917689383029938, "lr": 1.2029700221747804e-05, "epoch": 0.9268357455849157, "percentage": 46.34, "elapsed_time": "12:37:25", "remaining_time": "14:37:13"} +{"current_steps": 3491, "total_steps": 7532, "loss": 0.2963443398475647, "lr": 1.2025400654814119e-05, "epoch": 0.9271013145664586, "percentage": 46.35, "elapsed_time": "12:37:37", "remaining_time": "14:36:59"} +{"current_steps": 3492, "total_steps": 7532, "loss": 0.2866464853286743, "lr": 1.2021100697407075e-05, "epoch": 0.9273668835480016, "percentage": 46.36, "elapsed_time": "12:37:50", "remaining_time": "14:36:46"} +{"current_steps": 3493, "total_steps": 7532, "loss": 0.3069216012954712, "lr": 1.2016800350355654e-05, "epoch": 0.9276324525295445, "percentage": 46.38, "elapsed_time": "12:38:03", "remaining_time": "14:36:32"} +{"current_steps": 3494, "total_steps": 7532, "loss": 0.27206870913505554, "lr": 1.2012499614488913e-05, "epoch": 0.9278980215110875, "percentage": 46.39, "elapsed_time": "12:38:16", "remaining_time": "14:36:19"} +{"current_steps": 3495, "total_steps": 7532, "loss": 0.32130372524261475, "lr": 1.2008198490635978e-05, "epoch": 0.9281635904926304, "percentage": 46.4, "elapsed_time": "12:38:29", "remaining_time": "14:36:06"} +{"current_steps": 3496, "total_steps": 7532, "loss": 0.30631259083747864, "lr": 1.2003896979626061e-05, "epoch": 0.9284291594741734, "percentage": 46.42, "elapsed_time": "12:38:42", "remaining_time": "14:35:53"} +{"current_steps": 3497, "total_steps": 7532, "loss": 0.3005716800689697, "lr": 1.199959508228844e-05, "epoch": 0.9286947284557163, "percentage": 46.43, "elapsed_time": "12:38:54", "remaining_time": "14:35:39"} +{"current_steps": 3498, "total_steps": 7532, "loss": 0.2381039410829544, "lr": 1.1995292799452472e-05, "epoch": 0.9289602974372593, "percentage": 46.44, "elapsed_time": "12:39:07", "remaining_time": "14:35:26"} +{"current_steps": 3499, "total_steps": 7532, "loss": 0.31764286756515503, "lr": 1.1990990131947582e-05, "epoch": 0.9292258664188023, "percentage": 46.46, "elapsed_time": "12:39:19", "remaining_time": "14:35:13"} +{"current_steps": 3500, "total_steps": 7532, "loss": 0.3029370903968811, "lr": 1.1986687080603273e-05, "epoch": 0.9294914354003453, "percentage": 46.47, "elapsed_time": "12:39:32", "remaining_time": "14:34:59"} +{"current_steps": 3501, "total_steps": 7532, "loss": 0.30967646837234497, "lr": 1.198238364624913e-05, "epoch": 0.9297570043818882, "percentage": 46.48, "elapsed_time": "12:39:51", "remaining_time": "14:34:53"} +{"current_steps": 3502, "total_steps": 7532, "loss": 0.24687506258487701, "lr": 1.1978079829714799e-05, "epoch": 0.9300225733634312, "percentage": 46.49, "elapsed_time": "12:40:03", "remaining_time": "14:34:39"} +{"current_steps": 3503, "total_steps": 7532, "loss": 0.25909408926963806, "lr": 1.1973775631830007e-05, "epoch": 0.9302881423449741, "percentage": 46.51, "elapsed_time": "12:40:17", "remaining_time": "14:34:26"} +{"current_steps": 3504, "total_steps": 7532, "loss": 0.281025230884552, "lr": 1.196947105342455e-05, "epoch": 0.9305537113265171, "percentage": 46.52, "elapsed_time": "12:40:29", "remaining_time": "14:34:13"} +{"current_steps": 3505, "total_steps": 7532, "loss": 0.33401811122894287, "lr": 1.1965166095328302e-05, "epoch": 0.93081928030806, "percentage": 46.53, "elapsed_time": "12:40:42", "remaining_time": "14:34:00"} +{"current_steps": 3506, "total_steps": 7532, "loss": 0.25839388370513916, "lr": 1.1960860758371208e-05, "epoch": 0.931084849289603, "percentage": 46.55, "elapsed_time": "12:40:54", "remaining_time": "14:33:46"} +{"current_steps": 3507, "total_steps": 7532, "loss": 0.23343560099601746, "lr": 1.1956555043383286e-05, "epoch": 0.931350418271146, "percentage": 46.56, "elapsed_time": "12:41:08", "remaining_time": "14:33:33"} +{"current_steps": 3508, "total_steps": 7532, "loss": 0.31106436252593994, "lr": 1.1952248951194629e-05, "epoch": 0.9316159872526889, "percentage": 46.57, "elapsed_time": "12:41:20", "remaining_time": "14:33:19"} +{"current_steps": 3509, "total_steps": 7532, "loss": 0.29152095317840576, "lr": 1.1947942482635395e-05, "epoch": 0.9318815562342319, "percentage": 46.59, "elapsed_time": "12:41:33", "remaining_time": "14:33:06"} +{"current_steps": 3510, "total_steps": 7532, "loss": 0.31517675518989563, "lr": 1.1943635638535827e-05, "epoch": 0.9321471252157748, "percentage": 46.6, "elapsed_time": "12:41:46", "remaining_time": "14:32:53"} +{"current_steps": 3511, "total_steps": 7532, "loss": 0.33221137523651123, "lr": 1.1939328419726231e-05, "epoch": 0.9324126941973178, "percentage": 46.61, "elapsed_time": "12:41:58", "remaining_time": "14:32:39"} +{"current_steps": 3512, "total_steps": 7532, "loss": 0.314359575510025, "lr": 1.193502082703699e-05, "epoch": 0.9326782631788607, "percentage": 46.63, "elapsed_time": "12:42:11", "remaining_time": "14:32:26"} +{"current_steps": 3513, "total_steps": 7532, "loss": 0.2879924178123474, "lr": 1.1930712861298553e-05, "epoch": 0.9329438321604037, "percentage": 46.64, "elapsed_time": "12:42:24", "remaining_time": "14:32:13"} +{"current_steps": 3514, "total_steps": 7532, "loss": 0.2732955515384674, "lr": 1.1926404523341443e-05, "epoch": 0.9332094011419466, "percentage": 46.65, "elapsed_time": "12:42:37", "remaining_time": "14:31:59"} +{"current_steps": 3515, "total_steps": 7532, "loss": 0.32156097888946533, "lr": 1.1922095813996264e-05, "epoch": 0.9334749701234896, "percentage": 46.67, "elapsed_time": "12:42:49", "remaining_time": "14:31:45"} +{"current_steps": 3516, "total_steps": 7532, "loss": 0.2694319486618042, "lr": 1.1917786734093682e-05, "epoch": 0.9337405391050325, "percentage": 46.68, "elapsed_time": "12:43:02", "remaining_time": "14:31:33"} +{"current_steps": 3517, "total_steps": 7532, "loss": 0.3049655258655548, "lr": 1.1913477284464434e-05, "epoch": 0.9340061080865755, "percentage": 46.69, "elapsed_time": "12:43:15", "remaining_time": "14:31:20"} +{"current_steps": 3518, "total_steps": 7532, "loss": 0.30053725838661194, "lr": 1.1909167465939334e-05, "epoch": 0.9342716770681184, "percentage": 46.71, "elapsed_time": "12:43:29", "remaining_time": "14:31:07"} +{"current_steps": 3519, "total_steps": 7532, "loss": 0.2611788809299469, "lr": 1.1904857279349265e-05, "epoch": 0.9345372460496614, "percentage": 46.72, "elapsed_time": "12:43:42", "remaining_time": "14:30:54"} +{"current_steps": 3520, "total_steps": 7532, "loss": 0.28344646096229553, "lr": 1.1900546725525175e-05, "epoch": 0.9348028150312043, "percentage": 46.73, "elapsed_time": "12:43:55", "remaining_time": "14:30:41"} +{"current_steps": 3521, "total_steps": 7532, "loss": 0.2504042685031891, "lr": 1.1896235805298093e-05, "epoch": 0.9350683840127473, "percentage": 46.75, "elapsed_time": "12:44:07", "remaining_time": "14:30:28"} +{"current_steps": 3522, "total_steps": 7532, "loss": 0.27877938747406006, "lr": 1.1891924519499113e-05, "epoch": 0.9353339529942902, "percentage": 46.76, "elapsed_time": "12:44:20", "remaining_time": "14:30:15"} +{"current_steps": 3523, "total_steps": 7532, "loss": 0.28176525235176086, "lr": 1.1887612868959394e-05, "epoch": 0.9355995219758332, "percentage": 46.77, "elapsed_time": "12:44:33", "remaining_time": "14:30:01"} +{"current_steps": 3524, "total_steps": 7532, "loss": 0.32376354932785034, "lr": 1.1883300854510178e-05, "epoch": 0.9358650909573761, "percentage": 46.79, "elapsed_time": "12:44:46", "remaining_time": "14:29:48"} +{"current_steps": 3525, "total_steps": 7532, "loss": 0.2846054434776306, "lr": 1.1878988476982772e-05, "epoch": 0.9361306599389191, "percentage": 46.8, "elapsed_time": "12:44:59", "remaining_time": "14:29:35"} +{"current_steps": 3526, "total_steps": 7532, "loss": 0.25711044669151306, "lr": 1.1874675737208546e-05, "epoch": 0.936396228920462, "percentage": 46.81, "elapsed_time": "12:45:12", "remaining_time": "14:29:22"} +{"current_steps": 3527, "total_steps": 7532, "loss": 0.2810837924480438, "lr": 1.1870362636018946e-05, "epoch": 0.9366617979020051, "percentage": 46.83, "elapsed_time": "12:45:25", "remaining_time": "14:29:09"} +{"current_steps": 3528, "total_steps": 7532, "loss": 0.3090322017669678, "lr": 1.186604917424549e-05, "epoch": 0.936927366883548, "percentage": 46.84, "elapsed_time": "12:45:38", "remaining_time": "14:28:56"} +{"current_steps": 3529, "total_steps": 7532, "loss": 0.2797972559928894, "lr": 1.1861735352719763e-05, "epoch": 0.937192935865091, "percentage": 46.85, "elapsed_time": "12:45:51", "remaining_time": "14:28:43"} +{"current_steps": 3530, "total_steps": 7532, "loss": 0.3124893605709076, "lr": 1.1857421172273415e-05, "epoch": 0.937458504846634, "percentage": 46.87, "elapsed_time": "12:46:03", "remaining_time": "14:28:29"} +{"current_steps": 3531, "total_steps": 7532, "loss": 0.28317195177078247, "lr": 1.1853106633738174e-05, "epoch": 0.9377240738281769, "percentage": 46.88, "elapsed_time": "12:46:16", "remaining_time": "14:28:16"} +{"current_steps": 3532, "total_steps": 7532, "loss": 0.27804574370384216, "lr": 1.1848791737945823e-05, "epoch": 0.9379896428097199, "percentage": 46.89, "elapsed_time": "12:46:29", "remaining_time": "14:28:03"} +{"current_steps": 3533, "total_steps": 7532, "loss": 0.24936731159687042, "lr": 1.1844476485728236e-05, "epoch": 0.9382552117912628, "percentage": 46.91, "elapsed_time": "12:46:42", "remaining_time": "14:27:50"} +{"current_steps": 3534, "total_steps": 7532, "loss": 0.296974778175354, "lr": 1.1840160877917335e-05, "epoch": 0.9385207807728058, "percentage": 46.92, "elapsed_time": "12:46:55", "remaining_time": "14:27:36"} +{"current_steps": 3535, "total_steps": 7532, "loss": 0.3048890233039856, "lr": 1.1835844915345117e-05, "epoch": 0.9387863497543487, "percentage": 46.93, "elapsed_time": "12:47:08", "remaining_time": "14:27:23"} +{"current_steps": 3536, "total_steps": 7532, "loss": 0.2703601121902466, "lr": 1.1831528598843654e-05, "epoch": 0.9390519187358917, "percentage": 46.95, "elapsed_time": "12:47:20", "remaining_time": "14:27:10"} +{"current_steps": 3537, "total_steps": 7532, "loss": 0.30738013982772827, "lr": 1.1827211929245075e-05, "epoch": 0.9393174877174346, "percentage": 46.96, "elapsed_time": "12:47:34", "remaining_time": "14:26:57"} +{"current_steps": 3538, "total_steps": 7532, "loss": 0.26538529992103577, "lr": 1.1822894907381589e-05, "epoch": 0.9395830566989776, "percentage": 46.97, "elapsed_time": "12:47:46", "remaining_time": "14:26:44"} +{"current_steps": 3539, "total_steps": 7532, "loss": 0.26795464754104614, "lr": 1.1818577534085462e-05, "epoch": 0.9398486256805205, "percentage": 46.99, "elapsed_time": "12:47:59", "remaining_time": "14:26:31"} +{"current_steps": 3540, "total_steps": 7532, "loss": 0.30891868472099304, "lr": 1.1814259810189034e-05, "epoch": 0.9401141946620635, "percentage": 47.0, "elapsed_time": "12:48:12", "remaining_time": "14:26:17"} +{"current_steps": 3541, "total_steps": 7532, "loss": 0.29164037108421326, "lr": 1.1809941736524713e-05, "epoch": 0.9403797636436064, "percentage": 47.01, "elapsed_time": "12:48:25", "remaining_time": "14:26:04"} +{"current_steps": 3542, "total_steps": 7532, "loss": 0.30322739481925964, "lr": 1.180562331392497e-05, "epoch": 0.9406453326251494, "percentage": 47.03, "elapsed_time": "12:48:37", "remaining_time": "14:25:50"} +{"current_steps": 3543, "total_steps": 7532, "loss": 0.275432288646698, "lr": 1.1801304543222349e-05, "epoch": 0.9409109016066923, "percentage": 47.04, "elapsed_time": "12:48:50", "remaining_time": "14:25:37"} +{"current_steps": 3544, "total_steps": 7532, "loss": 0.2788141965866089, "lr": 1.1796985425249459e-05, "epoch": 0.9411764705882353, "percentage": 47.05, "elapsed_time": "12:49:03", "remaining_time": "14:25:24"} +{"current_steps": 3545, "total_steps": 7532, "loss": 0.24254676699638367, "lr": 1.1792665960838967e-05, "epoch": 0.9414420395697782, "percentage": 47.07, "elapsed_time": "12:49:15", "remaining_time": "14:25:10"} +{"current_steps": 3546, "total_steps": 7532, "loss": 0.2803058326244354, "lr": 1.1788346150823625e-05, "epoch": 0.9417076085513212, "percentage": 47.08, "elapsed_time": "12:49:29", "remaining_time": "14:24:58"} +{"current_steps": 3547, "total_steps": 7532, "loss": 0.3068317174911499, "lr": 1.1784025996036232e-05, "epoch": 0.9419731775328641, "percentage": 47.09, "elapsed_time": "12:49:41", "remaining_time": "14:24:44"} +{"current_steps": 3548, "total_steps": 7532, "loss": 0.23124024271965027, "lr": 1.1779705497309673e-05, "epoch": 0.9422387465144071, "percentage": 47.11, "elapsed_time": "12:49:54", "remaining_time": "14:24:31"} +{"current_steps": 3549, "total_steps": 7532, "loss": 0.2815462648868561, "lr": 1.177538465547688e-05, "epoch": 0.94250431549595, "percentage": 47.12, "elapsed_time": "12:50:07", "remaining_time": "14:24:18"} +{"current_steps": 3550, "total_steps": 7532, "loss": 0.29448196291923523, "lr": 1.1771063471370862e-05, "epoch": 0.942769884477493, "percentage": 47.13, "elapsed_time": "12:50:21", "remaining_time": "14:24:05"} +{"current_steps": 3551, "total_steps": 7532, "loss": 0.3176615834236145, "lr": 1.1766741945824698e-05, "epoch": 0.9430354534590359, "percentage": 47.15, "elapsed_time": "12:50:33", "remaining_time": "14:23:52"} +{"current_steps": 3552, "total_steps": 7532, "loss": 0.29126274585723877, "lr": 1.1762420079671527e-05, "epoch": 0.9433010224405789, "percentage": 47.16, "elapsed_time": "12:50:46", "remaining_time": "14:23:39"} +{"current_steps": 3553, "total_steps": 7532, "loss": 0.27074337005615234, "lr": 1.1758097873744547e-05, "epoch": 0.9435665914221218, "percentage": 47.17, "elapsed_time": "12:50:59", "remaining_time": "14:23:26"} +{"current_steps": 3554, "total_steps": 7532, "loss": 0.2756083011627197, "lr": 1.175377532887703e-05, "epoch": 0.9438321604036648, "percentage": 47.19, "elapsed_time": "12:51:12", "remaining_time": "14:23:13"} +{"current_steps": 3555, "total_steps": 7532, "loss": 0.26918384432792664, "lr": 1.1749452445902315e-05, "epoch": 0.9440977293852079, "percentage": 47.2, "elapsed_time": "12:51:25", "remaining_time": "14:22:59"} +{"current_steps": 3556, "total_steps": 7532, "loss": 0.2550349235534668, "lr": 1.17451292256538e-05, "epoch": 0.9443632983667508, "percentage": 47.21, "elapsed_time": "12:51:37", "remaining_time": "14:22:46"} +{"current_steps": 3557, "total_steps": 7532, "loss": 0.2601481080055237, "lr": 1.1740805668964954e-05, "epoch": 0.9446288673482938, "percentage": 47.23, "elapsed_time": "12:51:50", "remaining_time": "14:22:33"} +{"current_steps": 3558, "total_steps": 7532, "loss": 0.2848352789878845, "lr": 1.1736481776669307e-05, "epoch": 0.9448944363298367, "percentage": 47.24, "elapsed_time": "12:52:03", "remaining_time": "14:22:19"} +{"current_steps": 3559, "total_steps": 7532, "loss": 0.266584575176239, "lr": 1.173215754960045e-05, "epoch": 0.9451600053113797, "percentage": 47.25, "elapsed_time": "12:52:16", "remaining_time": "14:22:06"} +{"current_steps": 3560, "total_steps": 7532, "loss": 0.25037410855293274, "lr": 1.172783298859205e-05, "epoch": 0.9454255742929226, "percentage": 47.27, "elapsed_time": "12:52:29", "remaining_time": "14:21:53"} +{"current_steps": 3561, "total_steps": 7532, "loss": 0.30239278078079224, "lr": 1.1723508094477825e-05, "epoch": 0.9456911432744656, "percentage": 47.28, "elapsed_time": "12:52:42", "remaining_time": "14:21:40"} +{"current_steps": 3562, "total_steps": 7532, "loss": 0.2893553078174591, "lr": 1.1719182868091567e-05, "epoch": 0.9459567122560085, "percentage": 47.29, "elapsed_time": "12:52:54", "remaining_time": "14:21:26"} +{"current_steps": 3563, "total_steps": 7532, "loss": 0.2840202748775482, "lr": 1.1714857310267124e-05, "epoch": 0.9462222812375515, "percentage": 47.3, "elapsed_time": "12:53:07", "remaining_time": "14:21:13"} +{"current_steps": 3564, "total_steps": 7532, "loss": 0.2614031732082367, "lr": 1.1710531421838422e-05, "epoch": 0.9464878502190944, "percentage": 47.32, "elapsed_time": "12:53:20", "remaining_time": "14:21:00"} +{"current_steps": 3565, "total_steps": 7532, "loss": 0.267095148563385, "lr": 1.1706205203639433e-05, "epoch": 0.9467534192006374, "percentage": 47.33, "elapsed_time": "12:53:33", "remaining_time": "14:20:47"} +{"current_steps": 3566, "total_steps": 7532, "loss": 0.25835227966308594, "lr": 1.1701878656504206e-05, "epoch": 0.9470189881821803, "percentage": 47.34, "elapsed_time": "12:53:46", "remaining_time": "14:20:33"} +{"current_steps": 3567, "total_steps": 7532, "loss": 0.27547580003738403, "lr": 1.1697551781266845e-05, "epoch": 0.9472845571637233, "percentage": 47.36, "elapsed_time": "12:53:59", "remaining_time": "14:20:20"} +{"current_steps": 3568, "total_steps": 7532, "loss": 0.251165509223938, "lr": 1.169322457876152e-05, "epoch": 0.9475501261452662, "percentage": 47.37, "elapsed_time": "12:54:11", "remaining_time": "14:20:07"} +{"current_steps": 3569, "total_steps": 7532, "loss": 0.2738516926765442, "lr": 1.1688897049822467e-05, "epoch": 0.9478156951268092, "percentage": 47.38, "elapsed_time": "12:54:24", "remaining_time": "14:19:54"} +{"current_steps": 3570, "total_steps": 7532, "loss": 0.2745274305343628, "lr": 1.1684569195283981e-05, "epoch": 0.9480812641083521, "percentage": 47.4, "elapsed_time": "12:54:37", "remaining_time": "14:19:40"} +{"current_steps": 3571, "total_steps": 7532, "loss": 0.28586819767951965, "lr": 1.1680241015980423e-05, "epoch": 0.9483468330898951, "percentage": 47.41, "elapsed_time": "12:54:50", "remaining_time": "14:19:28"} +{"current_steps": 3572, "total_steps": 7532, "loss": 0.2559577524662018, "lr": 1.167591251274621e-05, "epoch": 0.948612402071438, "percentage": 47.42, "elapsed_time": "12:55:03", "remaining_time": "14:19:14"} +{"current_steps": 3573, "total_steps": 7532, "loss": 0.26069143414497375, "lr": 1.1671583686415833e-05, "epoch": 0.948877971052981, "percentage": 47.44, "elapsed_time": "12:55:15", "remaining_time": "14:19:00"} +{"current_steps": 3574, "total_steps": 7532, "loss": 0.26866453886032104, "lr": 1.1667254537823838e-05, "epoch": 0.949143540034524, "percentage": 47.45, "elapsed_time": "12:55:28", "remaining_time": "14:18:47"} +{"current_steps": 3575, "total_steps": 7532, "loss": 0.25285348296165466, "lr": 1.166292506780483e-05, "epoch": 0.9494091090160669, "percentage": 47.46, "elapsed_time": "12:55:41", "remaining_time": "14:18:34"} +{"current_steps": 3576, "total_steps": 7532, "loss": 0.3330434262752533, "lr": 1.1658595277193479e-05, "epoch": 0.9496746779976099, "percentage": 47.48, "elapsed_time": "12:55:54", "remaining_time": "14:18:21"} +{"current_steps": 3577, "total_steps": 7532, "loss": 0.2789473533630371, "lr": 1.1654265166824522e-05, "epoch": 0.9499402469791528, "percentage": 47.49, "elapsed_time": "12:56:06", "remaining_time": "14:18:07"} +{"current_steps": 3578, "total_steps": 7532, "loss": 0.30984824895858765, "lr": 1.164993473753275e-05, "epoch": 0.9502058159606958, "percentage": 47.5, "elapsed_time": "12:56:19", "remaining_time": "14:17:54"} +{"current_steps": 3579, "total_steps": 7532, "loss": 0.23881833255290985, "lr": 1.164560399015302e-05, "epoch": 0.9504713849422387, "percentage": 47.52, "elapsed_time": "12:56:32", "remaining_time": "14:17:41"} +{"current_steps": 3580, "total_steps": 7532, "loss": 0.3027937114238739, "lr": 1.164127292552025e-05, "epoch": 0.9507369539237817, "percentage": 47.53, "elapsed_time": "12:56:45", "remaining_time": "14:17:28"} +{"current_steps": 3581, "total_steps": 7532, "loss": 0.2901906371116638, "lr": 1.1636941544469413e-05, "epoch": 0.9510025229053246, "percentage": 47.54, "elapsed_time": "12:56:57", "remaining_time": "14:17:14"} +{"current_steps": 3582, "total_steps": 7532, "loss": 0.28961148858070374, "lr": 1.1632609847835556e-05, "epoch": 0.9512680918868676, "percentage": 47.56, "elapsed_time": "12:57:10", "remaining_time": "14:17:01"} +{"current_steps": 3583, "total_steps": 7532, "loss": 0.2730783224105835, "lr": 1.1628277836453774e-05, "epoch": 0.9515336608684106, "percentage": 47.57, "elapsed_time": "12:57:23", "remaining_time": "14:16:47"} +{"current_steps": 3584, "total_steps": 7532, "loss": 0.3195485770702362, "lr": 1.1623945511159232e-05, "epoch": 0.9517992298499536, "percentage": 47.58, "elapsed_time": "12:57:36", "remaining_time": "14:16:34"} +{"current_steps": 3585, "total_steps": 7532, "loss": 0.3097516894340515, "lr": 1.1619612872787144e-05, "epoch": 0.9520647988314965, "percentage": 47.6, "elapsed_time": "12:57:48", "remaining_time": "14:16:20"} +{"current_steps": 3586, "total_steps": 7532, "loss": 0.2716284692287445, "lr": 1.1615279922172796e-05, "epoch": 0.9523303678130395, "percentage": 47.61, "elapsed_time": "12:58:00", "remaining_time": "14:16:07"} +{"current_steps": 3587, "total_steps": 7532, "loss": 0.2601209878921509, "lr": 1.1610946660151531e-05, "epoch": 0.9525959367945824, "percentage": 47.62, "elapsed_time": "12:58:13", "remaining_time": "14:15:54"} +{"current_steps": 3588, "total_steps": 7532, "loss": 0.28665289282798767, "lr": 1.1606613087558748e-05, "epoch": 0.9528615057761254, "percentage": 47.64, "elapsed_time": "12:58:26", "remaining_time": "14:15:40"} +{"current_steps": 3589, "total_steps": 7532, "loss": 0.3019893765449524, "lr": 1.1602279205229912e-05, "epoch": 0.9531270747576683, "percentage": 47.65, "elapsed_time": "12:58:39", "remaining_time": "14:15:27"} +{"current_steps": 3590, "total_steps": 7532, "loss": 0.2635146677494049, "lr": 1.1597945014000537e-05, "epoch": 0.9533926437392113, "percentage": 47.66, "elapsed_time": "12:58:52", "remaining_time": "14:15:14"} +{"current_steps": 3591, "total_steps": 7532, "loss": 0.2704858183860779, "lr": 1.1593610514706217e-05, "epoch": 0.9536582127207542, "percentage": 47.68, "elapsed_time": "12:59:05", "remaining_time": "14:15:01"} +{"current_steps": 3592, "total_steps": 7532, "loss": 0.31997931003570557, "lr": 1.1589275708182581e-05, "epoch": 0.9539237817022972, "percentage": 47.69, "elapsed_time": "12:59:18", "remaining_time": "14:14:48"} +{"current_steps": 3593, "total_steps": 7532, "loss": 0.2308788150548935, "lr": 1.1584940595265332e-05, "epoch": 0.9541893506838401, "percentage": 47.7, "elapsed_time": "12:59:31", "remaining_time": "14:14:35"} +{"current_steps": 3594, "total_steps": 7532, "loss": 0.28886470198631287, "lr": 1.1580605176790229e-05, "epoch": 0.9544549196653831, "percentage": 47.72, "elapsed_time": "12:59:43", "remaining_time": "14:14:21"} +{"current_steps": 3595, "total_steps": 7532, "loss": 0.30698686838150024, "lr": 1.157626945359309e-05, "epoch": 0.954720488646926, "percentage": 47.73, "elapsed_time": "12:59:56", "remaining_time": "14:14:08"} +{"current_steps": 3596, "total_steps": 7532, "loss": 0.27475905418395996, "lr": 1.1571933426509789e-05, "epoch": 0.954986057628469, "percentage": 47.74, "elapsed_time": "13:00:09", "remaining_time": "14:13:54"} +{"current_steps": 3597, "total_steps": 7532, "loss": 0.2568071484565735, "lr": 1.1567597096376264e-05, "epoch": 0.955251626610012, "percentage": 47.76, "elapsed_time": "13:00:22", "remaining_time": "14:13:42"} +{"current_steps": 3598, "total_steps": 7532, "loss": 0.2574060261249542, "lr": 1.1563260464028507e-05, "epoch": 0.9555171955915549, "percentage": 47.77, "elapsed_time": "13:00:34", "remaining_time": "14:13:28"} +{"current_steps": 3599, "total_steps": 7532, "loss": 0.2847997546195984, "lr": 1.1558923530302571e-05, "epoch": 0.9557827645730979, "percentage": 47.78, "elapsed_time": "13:00:47", "remaining_time": "14:13:15"} +{"current_steps": 3600, "total_steps": 7532, "loss": 0.2594734728336334, "lr": 1.155458629603456e-05, "epoch": 0.9560483335546408, "percentage": 47.8, "elapsed_time": "13:00:59", "remaining_time": "14:13:01"} +{"current_steps": 3601, "total_steps": 7532, "loss": 0.2300589680671692, "lr": 1.155024876206065e-05, "epoch": 0.9563139025361838, "percentage": 47.81, "elapsed_time": "13:01:17", "remaining_time": "14:12:53"} +{"current_steps": 3602, "total_steps": 7532, "loss": 0.29174795746803284, "lr": 1.1545910929217059e-05, "epoch": 0.9565794715177267, "percentage": 47.82, "elapsed_time": "13:01:30", "remaining_time": "14:12:40"} +{"current_steps": 3603, "total_steps": 7532, "loss": 0.2666400074958801, "lr": 1.1541572798340076e-05, "epoch": 0.9568450404992697, "percentage": 47.84, "elapsed_time": "13:01:42", "remaining_time": "14:12:26"} +{"current_steps": 3604, "total_steps": 7532, "loss": 0.24651308357715607, "lr": 1.1537234370266035e-05, "epoch": 0.9571106094808126, "percentage": 47.85, "elapsed_time": "13:01:56", "remaining_time": "14:12:13"} +{"current_steps": 3605, "total_steps": 7532, "loss": 0.29991376399993896, "lr": 1.1532895645831339e-05, "epoch": 0.9573761784623556, "percentage": 47.86, "elapsed_time": "13:02:08", "remaining_time": "14:12:00"} +{"current_steps": 3606, "total_steps": 7532, "loss": 0.27713578939437866, "lr": 1.1528556625872443e-05, "epoch": 0.9576417474438985, "percentage": 47.88, "elapsed_time": "13:02:21", "remaining_time": "14:11:47"} +{"current_steps": 3607, "total_steps": 7532, "loss": 0.26503294706344604, "lr": 1.1524217311225857e-05, "epoch": 0.9579073164254415, "percentage": 47.89, "elapsed_time": "13:02:34", "remaining_time": "14:11:34"} +{"current_steps": 3608, "total_steps": 7532, "loss": 0.28627675771713257, "lr": 1.1519877702728149e-05, "epoch": 0.9581728854069844, "percentage": 47.9, "elapsed_time": "13:02:47", "remaining_time": "14:11:21"} +{"current_steps": 3609, "total_steps": 7532, "loss": 0.26862916350364685, "lr": 1.1515537801215944e-05, "epoch": 0.9584384543885274, "percentage": 47.92, "elapsed_time": "13:03:00", "remaining_time": "14:11:08"} +{"current_steps": 3610, "total_steps": 7532, "loss": 0.29697147011756897, "lr": 1.1511197607525926e-05, "epoch": 0.9587040233700703, "percentage": 47.93, "elapsed_time": "13:03:13", "remaining_time": "14:10:54"} +{"current_steps": 3611, "total_steps": 7532, "loss": 0.2980155944824219, "lr": 1.1506857122494832e-05, "epoch": 0.9589695923516134, "percentage": 47.94, "elapsed_time": "13:03:26", "remaining_time": "14:10:41"} +{"current_steps": 3612, "total_steps": 7532, "loss": 0.2847440838813782, "lr": 1.1502516346959458e-05, "epoch": 0.9592351613331563, "percentage": 47.96, "elapsed_time": "13:03:39", "remaining_time": "14:10:28"} +{"current_steps": 3613, "total_steps": 7532, "loss": 0.2812016010284424, "lr": 1.149817528175665e-05, "epoch": 0.9595007303146993, "percentage": 47.97, "elapsed_time": "13:03:51", "remaining_time": "14:10:15"} +{"current_steps": 3614, "total_steps": 7532, "loss": 0.26856982707977295, "lr": 1.1493833927723319e-05, "epoch": 0.9597662992962422, "percentage": 47.98, "elapsed_time": "13:04:05", "remaining_time": "14:10:02"} +{"current_steps": 3615, "total_steps": 7532, "loss": 0.2651693820953369, "lr": 1.1489492285696424e-05, "epoch": 0.9600318682777852, "percentage": 48.0, "elapsed_time": "13:04:17", "remaining_time": "14:09:49"} +{"current_steps": 3616, "total_steps": 7532, "loss": 0.29811644554138184, "lr": 1.1485150356512986e-05, "epoch": 0.9602974372593281, "percentage": 48.01, "elapsed_time": "13:04:30", "remaining_time": "14:09:35"} +{"current_steps": 3617, "total_steps": 7532, "loss": 0.2622855007648468, "lr": 1.1480808141010071e-05, "epoch": 0.9605630062408711, "percentage": 48.02, "elapsed_time": "13:04:43", "remaining_time": "14:09:22"} +{"current_steps": 3618, "total_steps": 7532, "loss": 0.3067246377468109, "lr": 1.1476465640024814e-05, "epoch": 0.960828575222414, "percentage": 48.04, "elapsed_time": "13:04:55", "remaining_time": "14:09:08"} +{"current_steps": 3619, "total_steps": 7532, "loss": 0.25928011536598206, "lr": 1.1472122854394394e-05, "epoch": 0.961094144203957, "percentage": 48.05, "elapsed_time": "13:05:08", "remaining_time": "14:08:55"} +{"current_steps": 3620, "total_steps": 7532, "loss": 0.2574170231819153, "lr": 1.146777978495605e-05, "epoch": 0.9613597131855, "percentage": 48.06, "elapsed_time": "13:05:21", "remaining_time": "14:08:42"} +{"current_steps": 3621, "total_steps": 7532, "loss": 0.2845388650894165, "lr": 1.1463436432547073e-05, "epoch": 0.9616252821670429, "percentage": 48.07, "elapsed_time": "13:05:34", "remaining_time": "14:08:29"} +{"current_steps": 3622, "total_steps": 7532, "loss": 0.28735876083374023, "lr": 1.145909279800481e-05, "epoch": 0.9618908511485859, "percentage": 48.09, "elapsed_time": "13:05:47", "remaining_time": "14:08:16"} +{"current_steps": 3623, "total_steps": 7532, "loss": 0.25739723443984985, "lr": 1.1454748882166666e-05, "epoch": 0.9621564201301288, "percentage": 48.1, "elapsed_time": "13:06:00", "remaining_time": "14:08:03"} +{"current_steps": 3624, "total_steps": 7532, "loss": 0.25144338607788086, "lr": 1.1450404685870098e-05, "epoch": 0.9624219891116718, "percentage": 48.11, "elapsed_time": "13:06:12", "remaining_time": "14:07:49"} +{"current_steps": 3625, "total_steps": 7532, "loss": 0.23981891572475433, "lr": 1.144606020995261e-05, "epoch": 0.9626875580932147, "percentage": 48.13, "elapsed_time": "13:06:26", "remaining_time": "14:07:36"} +{"current_steps": 3626, "total_steps": 7532, "loss": 0.30925339460372925, "lr": 1.1441715455251764e-05, "epoch": 0.9629531270747577, "percentage": 48.14, "elapsed_time": "13:06:38", "remaining_time": "14:07:23"} +{"current_steps": 3627, "total_steps": 7532, "loss": 0.2559184432029724, "lr": 1.1437370422605184e-05, "epoch": 0.9632186960563006, "percentage": 48.15, "elapsed_time": "13:06:51", "remaining_time": "14:07:09"} +{"current_steps": 3628, "total_steps": 7532, "loss": 0.3001229166984558, "lr": 1.1433025112850542e-05, "epoch": 0.9634842650378436, "percentage": 48.17, "elapsed_time": "13:07:04", "remaining_time": "14:06:56"} +{"current_steps": 3629, "total_steps": 7532, "loss": 0.24304218590259552, "lr": 1.1428679526825557e-05, "epoch": 0.9637498340193865, "percentage": 48.18, "elapsed_time": "13:07:16", "remaining_time": "14:06:43"} +{"current_steps": 3630, "total_steps": 7532, "loss": 0.25677186250686646, "lr": 1.1424333665368011e-05, "epoch": 0.9640154030009295, "percentage": 48.19, "elapsed_time": "13:07:29", "remaining_time": "14:06:30"} +{"current_steps": 3631, "total_steps": 7532, "loss": 0.2589085102081299, "lr": 1.141998752931573e-05, "epoch": 0.9642809719824724, "percentage": 48.21, "elapsed_time": "13:07:42", "remaining_time": "14:06:16"} +{"current_steps": 3632, "total_steps": 7532, "loss": 0.2588059604167938, "lr": 1.1415641119506601e-05, "epoch": 0.9645465409640154, "percentage": 48.22, "elapsed_time": "13:07:55", "remaining_time": "14:06:03"} +{"current_steps": 3633, "total_steps": 7532, "loss": 0.26097869873046875, "lr": 1.1411294436778562e-05, "epoch": 0.9648121099455583, "percentage": 48.23, "elapsed_time": "13:08:07", "remaining_time": "14:05:50"} +{"current_steps": 3634, "total_steps": 7532, "loss": 0.26022520661354065, "lr": 1.1406947481969598e-05, "epoch": 0.9650776789271013, "percentage": 48.25, "elapsed_time": "13:08:20", "remaining_time": "14:05:37"} +{"current_steps": 3635, "total_steps": 7532, "loss": 0.26242876052856445, "lr": 1.140260025591775e-05, "epoch": 0.9653432479086442, "percentage": 48.26, "elapsed_time": "13:08:33", "remaining_time": "14:05:23"} +{"current_steps": 3636, "total_steps": 7532, "loss": 0.30035555362701416, "lr": 1.1398252759461119e-05, "epoch": 0.9656088168901872, "percentage": 48.27, "elapsed_time": "13:08:46", "remaining_time": "14:05:10"} +{"current_steps": 3637, "total_steps": 7532, "loss": 0.26388341188430786, "lr": 1.1393904993437848e-05, "epoch": 0.9658743858717301, "percentage": 48.29, "elapsed_time": "13:08:59", "remaining_time": "14:04:57"} +{"current_steps": 3638, "total_steps": 7532, "loss": 0.28116434812545776, "lr": 1.1389556958686132e-05, "epoch": 0.9661399548532731, "percentage": 48.3, "elapsed_time": "13:09:12", "remaining_time": "14:04:44"} +{"current_steps": 3639, "total_steps": 7532, "loss": 0.25372493267059326, "lr": 1.1385208656044222e-05, "epoch": 0.966405523834816, "percentage": 48.31, "elapsed_time": "13:09:25", "remaining_time": "14:04:31"} +{"current_steps": 3640, "total_steps": 7532, "loss": 0.2648317813873291, "lr": 1.1380860086350422e-05, "epoch": 0.9666710928163591, "percentage": 48.33, "elapsed_time": "13:09:38", "remaining_time": "14:04:18"} +{"current_steps": 3641, "total_steps": 7532, "loss": 0.26981276273727417, "lr": 1.1376511250443082e-05, "epoch": 0.966936661797902, "percentage": 48.34, "elapsed_time": "13:09:50", "remaining_time": "14:04:04"} +{"current_steps": 3642, "total_steps": 7532, "loss": 0.2934207618236542, "lr": 1.1372162149160608e-05, "epoch": 0.967202230779445, "percentage": 48.35, "elapsed_time": "13:10:03", "remaining_time": "14:03:51"} +{"current_steps": 3643, "total_steps": 7532, "loss": 0.24250900745391846, "lr": 1.1367812783341454e-05, "epoch": 0.967467799760988, "percentage": 48.37, "elapsed_time": "13:10:16", "remaining_time": "14:03:38"} +{"current_steps": 3644, "total_steps": 7532, "loss": 0.2565772235393524, "lr": 1.1363463153824125e-05, "epoch": 0.9677333687425309, "percentage": 48.38, "elapsed_time": "13:10:29", "remaining_time": "14:03:25"} +{"current_steps": 3645, "total_steps": 7532, "loss": 0.28407829999923706, "lr": 1.1359113261447183e-05, "epoch": 0.9679989377240739, "percentage": 48.39, "elapsed_time": "13:10:42", "remaining_time": "14:03:12"} +{"current_steps": 3646, "total_steps": 7532, "loss": 0.2974489629268646, "lr": 1.1354763107049234e-05, "epoch": 0.9682645067056168, "percentage": 48.41, "elapsed_time": "13:10:54", "remaining_time": "14:02:58"} +{"current_steps": 3647, "total_steps": 7532, "loss": 0.27539899945259094, "lr": 1.1350412691468935e-05, "epoch": 0.9685300756871598, "percentage": 48.42, "elapsed_time": "13:11:07", "remaining_time": "14:02:45"} +{"current_steps": 3648, "total_steps": 7532, "loss": 0.28256523609161377, "lr": 1.1346062015544997e-05, "epoch": 0.9687956446687027, "percentage": 48.43, "elapsed_time": "13:11:20", "remaining_time": "14:02:32"} +{"current_steps": 3649, "total_steps": 7532, "loss": 0.27582883834838867, "lr": 1.1341711080116176e-05, "epoch": 0.9690612136502457, "percentage": 48.45, "elapsed_time": "13:11:33", "remaining_time": "14:02:19"} +{"current_steps": 3650, "total_steps": 7532, "loss": 0.3199389576911926, "lr": 1.1337359886021285e-05, "epoch": 0.9693267826317886, "percentage": 48.46, "elapsed_time": "13:11:45", "remaining_time": "14:02:05"} +{"current_steps": 3651, "total_steps": 7532, "loss": 0.2922326922416687, "lr": 1.1333008434099178e-05, "epoch": 0.9695923516133316, "percentage": 48.47, "elapsed_time": "13:11:59", "remaining_time": "14:01:52"} +{"current_steps": 3652, "total_steps": 7532, "loss": 0.285635381937027, "lr": 1.1328656725188767e-05, "epoch": 0.9698579205948745, "percentage": 48.49, "elapsed_time": "13:12:11", "remaining_time": "14:01:39"} +{"current_steps": 3653, "total_steps": 7532, "loss": 0.3347492814064026, "lr": 1.1324304760129009e-05, "epoch": 0.9701234895764175, "percentage": 48.5, "elapsed_time": "13:12:24", "remaining_time": "14:01:26"} +{"current_steps": 3654, "total_steps": 7532, "loss": 0.27379873394966125, "lr": 1.1319952539758912e-05, "epoch": 0.9703890585579604, "percentage": 48.51, "elapsed_time": "13:12:37", "remaining_time": "14:01:12"} +{"current_steps": 3655, "total_steps": 7532, "loss": 0.27911311388015747, "lr": 1.1315600064917534e-05, "epoch": 0.9706546275395034, "percentage": 48.53, "elapsed_time": "13:12:50", "remaining_time": "14:00:59"} +{"current_steps": 3656, "total_steps": 7532, "loss": 0.25750118494033813, "lr": 1.1311247336443982e-05, "epoch": 0.9709201965210463, "percentage": 48.54, "elapsed_time": "13:13:03", "remaining_time": "14:00:46"} +{"current_steps": 3657, "total_steps": 7532, "loss": 0.28723078966140747, "lr": 1.1306894355177405e-05, "epoch": 0.9711857655025893, "percentage": 48.55, "elapsed_time": "13:13:16", "remaining_time": "14:00:33"} +{"current_steps": 3658, "total_steps": 7532, "loss": 0.25269389152526855, "lr": 1.1302541121957008e-05, "epoch": 0.9714513344841322, "percentage": 48.57, "elapsed_time": "13:13:29", "remaining_time": "14:00:20"} +{"current_steps": 3659, "total_steps": 7532, "loss": 0.3041607439517975, "lr": 1.1298187637622046e-05, "epoch": 0.9717169034656752, "percentage": 48.58, "elapsed_time": "13:13:41", "remaining_time": "14:00:07"} +{"current_steps": 3660, "total_steps": 7532, "loss": 0.2826605439186096, "lr": 1.1293833903011819e-05, "epoch": 0.9719824724472181, "percentage": 48.59, "elapsed_time": "13:13:55", "remaining_time": "13:59:54"} +{"current_steps": 3661, "total_steps": 7532, "loss": 0.2830520570278168, "lr": 1.1289479918965675e-05, "epoch": 0.9722480414287611, "percentage": 48.61, "elapsed_time": "13:14:07", "remaining_time": "13:59:40"} +{"current_steps": 3662, "total_steps": 7532, "loss": 0.24295952916145325, "lr": 1.1285125686323011e-05, "epoch": 0.972513610410304, "percentage": 48.62, "elapsed_time": "13:14:20", "remaining_time": "13:59:27"} +{"current_steps": 3663, "total_steps": 7532, "loss": 0.28775808215141296, "lr": 1.1280771205923269e-05, "epoch": 0.972779179391847, "percentage": 48.63, "elapsed_time": "13:14:33", "remaining_time": "13:59:14"} +{"current_steps": 3664, "total_steps": 7532, "loss": 0.24650296568870544, "lr": 1.127641647860595e-05, "epoch": 0.97304474837339, "percentage": 48.65, "elapsed_time": "13:14:46", "remaining_time": "13:59:01"} +{"current_steps": 3665, "total_steps": 7532, "loss": 0.22344040870666504, "lr": 1.1272061505210584e-05, "epoch": 0.9733103173549329, "percentage": 48.66, "elapsed_time": "13:14:59", "remaining_time": "13:58:48"} +{"current_steps": 3666, "total_steps": 7532, "loss": 0.26920852065086365, "lr": 1.1267706286576759e-05, "epoch": 0.9735758863364758, "percentage": 48.67, "elapsed_time": "13:15:12", "remaining_time": "13:58:35"} +{"current_steps": 3667, "total_steps": 7532, "loss": 0.27615875005722046, "lr": 1.1263350823544115e-05, "epoch": 0.9738414553180188, "percentage": 48.69, "elapsed_time": "13:15:25", "remaining_time": "13:58:22"} +{"current_steps": 3668, "total_steps": 7532, "loss": 0.2768712043762207, "lr": 1.1258995116952334e-05, "epoch": 0.9741070242995619, "percentage": 48.7, "elapsed_time": "13:15:38", "remaining_time": "13:58:08"} +{"current_steps": 3669, "total_steps": 7532, "loss": 0.27764153480529785, "lr": 1.1254639167641141e-05, "epoch": 0.9743725932811048, "percentage": 48.71, "elapsed_time": "13:15:50", "remaining_time": "13:57:55"} +{"current_steps": 3670, "total_steps": 7532, "loss": 0.27423611283302307, "lr": 1.1250282976450316e-05, "epoch": 0.9746381622626478, "percentage": 48.73, "elapsed_time": "13:16:03", "remaining_time": "13:57:42"} +{"current_steps": 3671, "total_steps": 7532, "loss": 0.2626228332519531, "lr": 1.1245926544219676e-05, "epoch": 0.9749037312441907, "percentage": 48.74, "elapsed_time": "13:16:16", "remaining_time": "13:57:29"} +{"current_steps": 3672, "total_steps": 7532, "loss": 0.25524014234542847, "lr": 1.1241569871789096e-05, "epoch": 0.9751693002257337, "percentage": 48.75, "elapsed_time": "13:16:29", "remaining_time": "13:57:15"} +{"current_steps": 3673, "total_steps": 7532, "loss": 0.30857735872268677, "lr": 1.1237212959998485e-05, "epoch": 0.9754348692072766, "percentage": 48.77, "elapsed_time": "13:16:42", "remaining_time": "13:57:02"} +{"current_steps": 3674, "total_steps": 7532, "loss": 0.25099021196365356, "lr": 1.1232855809687807e-05, "epoch": 0.9757004381888196, "percentage": 48.78, "elapsed_time": "13:16:54", "remaining_time": "13:56:49"} +{"current_steps": 3675, "total_steps": 7532, "loss": 0.22664576768875122, "lr": 1.1228498421697068e-05, "epoch": 0.9759660071703625, "percentage": 48.79, "elapsed_time": "13:17:08", "remaining_time": "13:56:36"} +{"current_steps": 3676, "total_steps": 7532, "loss": 0.24727366864681244, "lr": 1.1224140796866322e-05, "epoch": 0.9762315761519055, "percentage": 48.81, "elapsed_time": "13:17:20", "remaining_time": "13:56:23"} +{"current_steps": 3677, "total_steps": 7532, "loss": 0.2561935782432556, "lr": 1.121978293603567e-05, "epoch": 0.9764971451334484, "percentage": 48.82, "elapsed_time": "13:17:34", "remaining_time": "13:56:10"} +{"current_steps": 3678, "total_steps": 7532, "loss": 0.2594214677810669, "lr": 1.1215424840045254e-05, "epoch": 0.9767627141149914, "percentage": 48.83, "elapsed_time": "13:17:46", "remaining_time": "13:55:57"} +{"current_steps": 3679, "total_steps": 7532, "loss": 0.2383778691291809, "lr": 1.1211066509735265e-05, "epoch": 0.9770282830965343, "percentage": 48.84, "elapsed_time": "13:17:59", "remaining_time": "13:55:44"} +{"current_steps": 3680, "total_steps": 7532, "loss": 0.2864387035369873, "lr": 1.1206707945945934e-05, "epoch": 0.9772938520780773, "percentage": 48.86, "elapsed_time": "13:18:12", "remaining_time": "13:55:30"} +{"current_steps": 3681, "total_steps": 7532, "loss": 0.30415672063827515, "lr": 1.1202349149517541e-05, "epoch": 0.9775594210596202, "percentage": 48.87, "elapsed_time": "13:18:25", "remaining_time": "13:55:17"} +{"current_steps": 3682, "total_steps": 7532, "loss": 0.3030807375907898, "lr": 1.1197990121290415e-05, "epoch": 0.9778249900411632, "percentage": 48.88, "elapsed_time": "13:18:38", "remaining_time": "13:55:04"} +{"current_steps": 3683, "total_steps": 7532, "loss": 0.2518938481807709, "lr": 1.1193630862104922e-05, "epoch": 0.9780905590227061, "percentage": 48.9, "elapsed_time": "13:18:50", "remaining_time": "13:54:51"} +{"current_steps": 3684, "total_steps": 7532, "loss": 0.25353187322616577, "lr": 1.1189271372801474e-05, "epoch": 0.9783561280042491, "percentage": 48.91, "elapsed_time": "13:19:04", "remaining_time": "13:54:38"} +{"current_steps": 3685, "total_steps": 7532, "loss": 0.30639684200286865, "lr": 1.1184911654220534e-05, "epoch": 0.978621696985792, "percentage": 48.92, "elapsed_time": "13:19:16", "remaining_time": "13:54:24"} +{"current_steps": 3686, "total_steps": 7532, "loss": 0.295099139213562, "lr": 1.1180551707202602e-05, "epoch": 0.978887265967335, "percentage": 48.94, "elapsed_time": "13:19:29", "remaining_time": "13:54:11"} +{"current_steps": 3687, "total_steps": 7532, "loss": 0.2428167164325714, "lr": 1.1176191532588224e-05, "epoch": 0.979152834948878, "percentage": 48.95, "elapsed_time": "13:19:42", "remaining_time": "13:53:58"} +{"current_steps": 3688, "total_steps": 7532, "loss": 0.2716362774372101, "lr": 1.1171831131217989e-05, "epoch": 0.9794184039304209, "percentage": 48.96, "elapsed_time": "13:19:55", "remaining_time": "13:53:45"} +{"current_steps": 3689, "total_steps": 7532, "loss": 0.28350287675857544, "lr": 1.1167470503932534e-05, "epoch": 0.9796839729119639, "percentage": 48.98, "elapsed_time": "13:20:08", "remaining_time": "13:53:32"} +{"current_steps": 3690, "total_steps": 7532, "loss": 0.2776945233345032, "lr": 1.1163109651572535e-05, "epoch": 0.9799495418935068, "percentage": 48.99, "elapsed_time": "13:20:21", "remaining_time": "13:53:19"} +{"current_steps": 3691, "total_steps": 7532, "loss": 0.2712942063808441, "lr": 1.115874857497871e-05, "epoch": 0.9802151108750498, "percentage": 49.0, "elapsed_time": "13:20:34", "remaining_time": "13:53:06"} +{"current_steps": 3692, "total_steps": 7532, "loss": 0.2530008852481842, "lr": 1.1154387274991829e-05, "epoch": 0.9804806798565927, "percentage": 49.02, "elapsed_time": "13:20:47", "remaining_time": "13:52:53"} +{"current_steps": 3693, "total_steps": 7532, "loss": 0.24889500439167023, "lr": 1.1150025752452693e-05, "epoch": 0.9807462488381357, "percentage": 49.03, "elapsed_time": "13:20:59", "remaining_time": "13:52:39"} +{"current_steps": 3694, "total_steps": 7532, "loss": 0.3051255941390991, "lr": 1.1145664008202158e-05, "epoch": 0.9810118178196786, "percentage": 49.04, "elapsed_time": "13:21:12", "remaining_time": "13:52:26"} +{"current_steps": 3695, "total_steps": 7532, "loss": 0.24781765043735504, "lr": 1.1141302043081112e-05, "epoch": 0.9812773868012216, "percentage": 49.06, "elapsed_time": "13:21:25", "remaining_time": "13:52:13"} +{"current_steps": 3696, "total_steps": 7532, "loss": 0.3021858036518097, "lr": 1.1136939857930497e-05, "epoch": 0.9815429557827646, "percentage": 49.07, "elapsed_time": "13:21:38", "remaining_time": "13:52:00"} +{"current_steps": 3697, "total_steps": 7532, "loss": 0.3026372194290161, "lr": 1.1132577453591284e-05, "epoch": 0.9818085247643076, "percentage": 49.08, "elapsed_time": "13:21:51", "remaining_time": "13:51:47"} +{"current_steps": 3698, "total_steps": 7532, "loss": 0.31511861085891724, "lr": 1.1128214830904494e-05, "epoch": 0.9820740937458505, "percentage": 49.1, "elapsed_time": "13:22:04", "remaining_time": "13:51:33"} +{"current_steps": 3699, "total_steps": 7532, "loss": 0.27885258197784424, "lr": 1.112385199071119e-05, "epoch": 0.9823396627273935, "percentage": 49.11, "elapsed_time": "13:22:17", "remaining_time": "13:51:21"} +{"current_steps": 3700, "total_steps": 7532, "loss": 0.2724893391132355, "lr": 1.1119488933852477e-05, "epoch": 0.9826052317089364, "percentage": 49.12, "elapsed_time": "13:22:29", "remaining_time": "13:51:07"} +{"current_steps": 3701, "total_steps": 7532, "loss": 0.2836218774318695, "lr": 1.1115125661169503e-05, "epoch": 0.9828708006904794, "percentage": 49.14, "elapsed_time": "13:22:48", "remaining_time": "13:51:00"} +{"current_steps": 3702, "total_steps": 7532, "loss": 0.24220457673072815, "lr": 1.111076217350345e-05, "epoch": 0.9831363696720223, "percentage": 49.15, "elapsed_time": "13:23:01", "remaining_time": "13:50:47"} +{"current_steps": 3703, "total_steps": 7532, "loss": 0.28599557280540466, "lr": 1.1106398471695554e-05, "epoch": 0.9834019386535653, "percentage": 49.16, "elapsed_time": "13:23:14", "remaining_time": "13:50:33"} +{"current_steps": 3704, "total_steps": 7532, "loss": 0.30559849739074707, "lr": 1.110203455658708e-05, "epoch": 0.9836675076351082, "percentage": 49.18, "elapsed_time": "13:23:26", "remaining_time": "13:50:20"} +{"current_steps": 3705, "total_steps": 7532, "loss": 0.2763117551803589, "lr": 1.109767042901934e-05, "epoch": 0.9839330766166512, "percentage": 49.19, "elapsed_time": "13:23:39", "remaining_time": "13:50:07"} +{"current_steps": 3706, "total_steps": 7532, "loss": 0.2028101086616516, "lr": 1.109330608983369e-05, "epoch": 0.9841986455981941, "percentage": 49.2, "elapsed_time": "13:23:52", "remaining_time": "13:49:53"} +{"current_steps": 3707, "total_steps": 7532, "loss": 0.25386112928390503, "lr": 1.1088941539871515e-05, "epoch": 0.9844642145797371, "percentage": 49.22, "elapsed_time": "13:24:05", "remaining_time": "13:49:40"} +{"current_steps": 3708, "total_steps": 7532, "loss": 0.2588289976119995, "lr": 1.1084576779974257e-05, "epoch": 0.98472978356128, "percentage": 49.23, "elapsed_time": "13:24:17", "remaining_time": "13:49:27"} +{"current_steps": 3709, "total_steps": 7532, "loss": 0.3201071321964264, "lr": 1.1080211810983385e-05, "epoch": 0.984995352542823, "percentage": 49.24, "elapsed_time": "13:24:30", "remaining_time": "13:49:14"} +{"current_steps": 3710, "total_steps": 7532, "loss": 0.28439003229141235, "lr": 1.107584663374042e-05, "epoch": 0.985260921524366, "percentage": 49.26, "elapsed_time": "13:24:43", "remaining_time": "13:49:00"} +{"current_steps": 3711, "total_steps": 7532, "loss": 0.2734091579914093, "lr": 1.1071481249086908e-05, "epoch": 0.9855264905059089, "percentage": 49.27, "elapsed_time": "13:24:55", "remaining_time": "13:48:47"} +{"current_steps": 3712, "total_steps": 7532, "loss": 0.2917581796646118, "lr": 1.1067115657864451e-05, "epoch": 0.9857920594874519, "percentage": 49.28, "elapsed_time": "13:25:08", "remaining_time": "13:48:34"} +{"current_steps": 3713, "total_steps": 7532, "loss": 0.3569914996623993, "lr": 1.1062749860914681e-05, "epoch": 0.9860576284689948, "percentage": 49.3, "elapsed_time": "13:25:21", "remaining_time": "13:48:21"} +{"current_steps": 3714, "total_steps": 7532, "loss": 0.2574514150619507, "lr": 1.1058383859079271e-05, "epoch": 0.9863231974505378, "percentage": 49.31, "elapsed_time": "13:25:34", "remaining_time": "13:48:08"} +{"current_steps": 3715, "total_steps": 7532, "loss": 0.3035826086997986, "lr": 1.1054017653199936e-05, "epoch": 0.9865887664320807, "percentage": 49.32, "elapsed_time": "13:25:47", "remaining_time": "13:47:54"} +{"current_steps": 3716, "total_steps": 7532, "loss": 0.28067824244499207, "lr": 1.1049651244118424e-05, "epoch": 0.9868543354136237, "percentage": 49.34, "elapsed_time": "13:25:59", "remaining_time": "13:47:41"} +{"current_steps": 3717, "total_steps": 7532, "loss": 0.2511579394340515, "lr": 1.1045284632676535e-05, "epoch": 0.9871199043951666, "percentage": 49.35, "elapsed_time": "13:26:12", "remaining_time": "13:47:27"} +{"current_steps": 3718, "total_steps": 7532, "loss": 0.3059889078140259, "lr": 1.1040917819716097e-05, "epoch": 0.9873854733767096, "percentage": 49.36, "elapsed_time": "13:26:25", "remaining_time": "13:47:14"} +{"current_steps": 3719, "total_steps": 7532, "loss": 0.2642200291156769, "lr": 1.103655080607898e-05, "epoch": 0.9876510423582525, "percentage": 49.38, "elapsed_time": "13:26:37", "remaining_time": "13:47:01"} +{"current_steps": 3720, "total_steps": 7532, "loss": 0.2743483781814575, "lr": 1.1032183592607094e-05, "epoch": 0.9879166113397955, "percentage": 49.39, "elapsed_time": "13:26:50", "remaining_time": "13:46:48"} +{"current_steps": 3721, "total_steps": 7532, "loss": 0.2597433030605316, "lr": 1.1027816180142383e-05, "epoch": 0.9881821803213384, "percentage": 49.4, "elapsed_time": "13:27:03", "remaining_time": "13:46:34"} +{"current_steps": 3722, "total_steps": 7532, "loss": 0.24439337849617004, "lr": 1.1023448569526834e-05, "epoch": 0.9884477493028814, "percentage": 49.42, "elapsed_time": "13:27:16", "remaining_time": "13:46:21"} +{"current_steps": 3723, "total_steps": 7532, "loss": 0.2520195245742798, "lr": 1.1019080761602473e-05, "epoch": 0.9887133182844243, "percentage": 49.43, "elapsed_time": "13:27:29", "remaining_time": "13:46:08"} +{"current_steps": 3724, "total_steps": 7532, "loss": 0.2904737889766693, "lr": 1.1014712757211359e-05, "epoch": 0.9889788872659674, "percentage": 49.44, "elapsed_time": "13:27:41", "remaining_time": "13:45:54"} +{"current_steps": 3725, "total_steps": 7532, "loss": 0.28096869587898254, "lr": 1.1010344557195588e-05, "epoch": 0.9892444562475103, "percentage": 49.46, "elapsed_time": "13:27:54", "remaining_time": "13:45:41"} +{"current_steps": 3726, "total_steps": 7532, "loss": 0.317839652299881, "lr": 1.1005976162397309e-05, "epoch": 0.9895100252290533, "percentage": 49.47, "elapsed_time": "13:28:06", "remaining_time": "13:45:28"} +{"current_steps": 3727, "total_steps": 7532, "loss": 0.29213201999664307, "lr": 1.100160757365869e-05, "epoch": 0.9897755942105962, "percentage": 49.48, "elapsed_time": "13:28:20", "remaining_time": "13:45:15"} +{"current_steps": 3728, "total_steps": 7532, "loss": 0.27034991979599, "lr": 1.0997238791821943e-05, "epoch": 0.9900411631921392, "percentage": 49.5, "elapsed_time": "13:28:32", "remaining_time": "13:45:01"} +{"current_steps": 3729, "total_steps": 7532, "loss": 0.30504971742630005, "lr": 1.0992869817729317e-05, "epoch": 0.9903067321736821, "percentage": 49.51, "elapsed_time": "13:28:45", "remaining_time": "13:44:48"} +{"current_steps": 3730, "total_steps": 7532, "loss": 0.30673110485076904, "lr": 1.09885006522231e-05, "epoch": 0.9905723011552251, "percentage": 49.52, "elapsed_time": "13:28:58", "remaining_time": "13:44:35"} +{"current_steps": 3731, "total_steps": 7532, "loss": 0.27990686893463135, "lr": 1.0984131296145616e-05, "epoch": 0.990837870136768, "percentage": 49.54, "elapsed_time": "13:29:11", "remaining_time": "13:44:22"} +{"current_steps": 3732, "total_steps": 7532, "loss": 0.24379019439220428, "lr": 1.0979761750339225e-05, "epoch": 0.991103439118311, "percentage": 49.55, "elapsed_time": "13:29:24", "remaining_time": "13:44:08"} +{"current_steps": 3733, "total_steps": 7532, "loss": 0.30554595589637756, "lr": 1.0975392015646323e-05, "epoch": 0.991369008099854, "percentage": 49.56, "elapsed_time": "13:29:36", "remaining_time": "13:43:55"} +{"current_steps": 3734, "total_steps": 7532, "loss": 0.245269775390625, "lr": 1.0971022092909342e-05, "epoch": 0.9916345770813969, "percentage": 49.58, "elapsed_time": "13:29:49", "remaining_time": "13:43:42"} +{"current_steps": 3735, "total_steps": 7532, "loss": 0.2732948064804077, "lr": 1.0966651982970757e-05, "epoch": 0.9919001460629399, "percentage": 49.59, "elapsed_time": "13:30:02", "remaining_time": "13:43:29"} +{"current_steps": 3736, "total_steps": 7532, "loss": 0.25989004969596863, "lr": 1.0962281686673071e-05, "epoch": 0.9921657150444828, "percentage": 49.6, "elapsed_time": "13:30:15", "remaining_time": "13:43:16"} +{"current_steps": 3737, "total_steps": 7532, "loss": 0.32891198992729187, "lr": 1.0957911204858824e-05, "epoch": 0.9924312840260258, "percentage": 49.61, "elapsed_time": "13:30:28", "remaining_time": "13:43:02"} +{"current_steps": 3738, "total_steps": 7532, "loss": 0.29184675216674805, "lr": 1.0953540538370591e-05, "epoch": 0.9926968530075687, "percentage": 49.63, "elapsed_time": "13:30:41", "remaining_time": "13:42:49"} +{"current_steps": 3739, "total_steps": 7532, "loss": 0.2784018814563751, "lr": 1.094916968805099e-05, "epoch": 0.9929624219891117, "percentage": 49.64, "elapsed_time": "13:30:54", "remaining_time": "13:42:36"} +{"current_steps": 3740, "total_steps": 7532, "loss": 0.26586195826530457, "lr": 1.094479865474267e-05, "epoch": 0.9932279909706546, "percentage": 49.65, "elapsed_time": "13:31:07", "remaining_time": "13:42:23"} +{"current_steps": 3741, "total_steps": 7532, "loss": 0.24593298137187958, "lr": 1.094042743928831e-05, "epoch": 0.9934935599521976, "percentage": 49.67, "elapsed_time": "13:31:19", "remaining_time": "13:42:10"} +{"current_steps": 3742, "total_steps": 7532, "loss": 0.2462792694568634, "lr": 1.0936056042530632e-05, "epoch": 0.9937591289337405, "percentage": 49.68, "elapsed_time": "13:31:32", "remaining_time": "13:41:56"} +{"current_steps": 3743, "total_steps": 7532, "loss": 0.2688900828361511, "lr": 1.0931684465312388e-05, "epoch": 0.9940246979152835, "percentage": 49.69, "elapsed_time": "13:31:44", "remaining_time": "13:41:43"} +{"current_steps": 3744, "total_steps": 7532, "loss": 0.2842782735824585, "lr": 1.0927312708476367e-05, "epoch": 0.9942902668968264, "percentage": 49.71, "elapsed_time": "13:31:57", "remaining_time": "13:41:30"} +{"current_steps": 3745, "total_steps": 7532, "loss": 0.249299556016922, "lr": 1.0922940772865393e-05, "epoch": 0.9945558358783694, "percentage": 49.72, "elapsed_time": "13:32:10", "remaining_time": "13:41:16"} +{"current_steps": 3746, "total_steps": 7532, "loss": 0.2765413522720337, "lr": 1.0918568659322325e-05, "epoch": 0.9948214048599123, "percentage": 49.73, "elapsed_time": "13:32:23", "remaining_time": "13:41:03"} +{"current_steps": 3747, "total_steps": 7532, "loss": 0.29750365018844604, "lr": 1.0914196368690049e-05, "epoch": 0.9950869738414553, "percentage": 49.75, "elapsed_time": "13:32:35", "remaining_time": "13:40:50"} +{"current_steps": 3748, "total_steps": 7532, "loss": 0.25272879004478455, "lr": 1.0909823901811496e-05, "epoch": 0.9953525428229982, "percentage": 49.76, "elapsed_time": "13:32:48", "remaining_time": "13:40:37"} +{"current_steps": 3749, "total_steps": 7532, "loss": 0.3056861460208893, "lr": 1.0905451259529626e-05, "epoch": 0.9956181118045412, "percentage": 49.77, "elapsed_time": "13:33:01", "remaining_time": "13:40:23"} +{"current_steps": 3750, "total_steps": 7532, "loss": 0.26723814010620117, "lr": 1.090107844268743e-05, "epoch": 0.9958836807860841, "percentage": 49.79, "elapsed_time": "13:33:14", "remaining_time": "13:40:10"} +{"current_steps": 3751, "total_steps": 7532, "loss": 0.29998716711997986, "lr": 1.0896705452127943e-05, "epoch": 0.9961492497676271, "percentage": 49.8, "elapsed_time": "13:33:27", "remaining_time": "13:39:57"} +{"current_steps": 3752, "total_steps": 7532, "loss": 0.2690891623497009, "lr": 1.0892332288694216e-05, "epoch": 0.9964148187491702, "percentage": 49.81, "elapsed_time": "13:33:39", "remaining_time": "13:39:43"} +{"current_steps": 3753, "total_steps": 7532, "loss": 0.25555333495140076, "lr": 1.0887958953229349e-05, "epoch": 0.9966803877307131, "percentage": 49.83, "elapsed_time": "13:33:52", "remaining_time": "13:39:30"} +{"current_steps": 3754, "total_steps": 7532, "loss": 0.27788421511650085, "lr": 1.088358544657647e-05, "epoch": 0.996945956712256, "percentage": 49.84, "elapsed_time": "13:34:05", "remaining_time": "13:39:17"} +{"current_steps": 3755, "total_steps": 7532, "loss": 0.2566586136817932, "lr": 1.0879211769578734e-05, "epoch": 0.997211525693799, "percentage": 49.85, "elapsed_time": "13:34:18", "remaining_time": "13:39:04"} +{"current_steps": 3756, "total_steps": 7532, "loss": 0.3028980493545532, "lr": 1.0874837923079339e-05, "epoch": 0.997477094675342, "percentage": 49.87, "elapsed_time": "13:34:30", "remaining_time": "13:38:51"} +{"current_steps": 3757, "total_steps": 7532, "loss": 0.30244824290275574, "lr": 1.0870463907921512e-05, "epoch": 0.9977426636568849, "percentage": 49.88, "elapsed_time": "13:34:43", "remaining_time": "13:38:38"} +{"current_steps": 3758, "total_steps": 7532, "loss": 0.2610962390899658, "lr": 1.086608972494851e-05, "epoch": 0.9980082326384279, "percentage": 49.89, "elapsed_time": "13:34:56", "remaining_time": "13:38:24"} +{"current_steps": 3759, "total_steps": 7532, "loss": 0.2733536660671234, "lr": 1.0861715375003623e-05, "epoch": 0.9982738016199708, "percentage": 49.91, "elapsed_time": "13:35:09", "remaining_time": "13:38:11"} +{"current_steps": 3760, "total_steps": 7532, "loss": 0.2915020287036896, "lr": 1.0857340858930175e-05, "epoch": 0.9985393706015138, "percentage": 49.92, "elapsed_time": "13:35:22", "remaining_time": "13:37:58"} +{"current_steps": 3761, "total_steps": 7532, "loss": 0.2940186560153961, "lr": 1.085296617757152e-05, "epoch": 0.9988049395830567, "percentage": 49.93, "elapsed_time": "13:35:34", "remaining_time": "13:37:44"} +{"current_steps": 3762, "total_steps": 7532, "loss": 0.3002738952636719, "lr": 1.0848591331771045e-05, "epoch": 0.9990705085645997, "percentage": 49.95, "elapsed_time": "13:35:47", "remaining_time": "13:37:31"} +{"current_steps": 3763, "total_steps": 7532, "loss": 0.284588485956192, "lr": 1.0844216322372172e-05, "epoch": 0.9993360775461426, "percentage": 49.96, "elapsed_time": "13:36:00", "remaining_time": "13:37:18"} +{"current_steps": 3764, "total_steps": 7532, "loss": 0.29395923018455505, "lr": 1.0839841150218347e-05, "epoch": 0.9996016465276856, "percentage": 49.97, "elapsed_time": "13:36:13", "remaining_time": "13:37:05"} +{"current_steps": 3765, "total_steps": 7532, "loss": 0.2574613094329834, "lr": 1.083546581615305e-05, "epoch": 0.9998672155092285, "percentage": 49.99, "elapsed_time": "13:36:25", "remaining_time": "13:36:51"} +{"current_steps": 3766, "total_steps": 7532, "loss": 0.177712082862854, "lr": 1.0831090321019801e-05, "epoch": 1.0, "percentage": 50.0, "elapsed_time": "13:36:30", "remaining_time": "13:36:30"} +{"current_steps": 3767, "total_steps": 7532, "loss": 0.29758381843566895, "lr": 1.0826714665662139e-05, "epoch": 1.000265568981543, "percentage": 50.01, "elapsed_time": "13:36:43", "remaining_time": "13:36:17"} +{"current_steps": 3768, "total_steps": 7532, "loss": 0.23377545177936554, "lr": 1.0822338850923644e-05, "epoch": 1.000531137963086, "percentage": 50.03, "elapsed_time": "13:36:56", "remaining_time": "13:36:04"} +{"current_steps": 3769, "total_steps": 7532, "loss": 0.2505020797252655, "lr": 1.0817962877647911e-05, "epoch": 1.0007967069446289, "percentage": 50.04, "elapsed_time": "13:37:09", "remaining_time": "13:35:51"} +{"current_steps": 3770, "total_steps": 7532, "loss": 0.26122647523880005, "lr": 1.0813586746678584e-05, "epoch": 1.0010622759261718, "percentage": 50.05, "elapsed_time": "13:37:21", "remaining_time": "13:35:37"} +{"current_steps": 3771, "total_steps": 7532, "loss": 0.27962177991867065, "lr": 1.0809210458859327e-05, "epoch": 1.0013278449077148, "percentage": 50.07, "elapsed_time": "13:37:34", "remaining_time": "13:35:24"} +{"current_steps": 3772, "total_steps": 7532, "loss": 0.21921640634536743, "lr": 1.080483401503384e-05, "epoch": 1.0015934138892577, "percentage": 50.08, "elapsed_time": "13:37:47", "remaining_time": "13:35:10"} +{"current_steps": 3773, "total_steps": 7532, "loss": 0.24623796343803406, "lr": 1.0800457416045845e-05, "epoch": 1.0018589828708007, "percentage": 50.09, "elapsed_time": "13:38:00", "remaining_time": "13:34:57"} +{"current_steps": 3774, "total_steps": 7532, "loss": 0.3130728006362915, "lr": 1.0796080662739098e-05, "epoch": 1.0021245518523436, "percentage": 50.11, "elapsed_time": "13:38:12", "remaining_time": "13:34:44"} +{"current_steps": 3775, "total_steps": 7532, "loss": 0.2548064589500427, "lr": 1.0791703755957392e-05, "epoch": 1.0023901208338866, "percentage": 50.12, "elapsed_time": "13:38:25", "remaining_time": "13:34:31"} +{"current_steps": 3776, "total_steps": 7532, "loss": 0.20517288148403168, "lr": 1.078732669654454e-05, "epoch": 1.0026556898154295, "percentage": 50.13, "elapsed_time": "13:38:38", "remaining_time": "13:34:17"} +{"current_steps": 3777, "total_steps": 7532, "loss": 0.2634897530078888, "lr": 1.0782949485344385e-05, "epoch": 1.0029212587969725, "percentage": 50.15, "elapsed_time": "13:38:51", "remaining_time": "13:34:04"} +{"current_steps": 3778, "total_steps": 7532, "loss": 0.2743223309516907, "lr": 1.0778572123200804e-05, "epoch": 1.0031868277785154, "percentage": 50.16, "elapsed_time": "13:39:03", "remaining_time": "13:33:51"} +{"current_steps": 3779, "total_steps": 7532, "loss": 0.24595436453819275, "lr": 1.0774194610957695e-05, "epoch": 1.0034523967600584, "percentage": 50.17, "elapsed_time": "13:39:16", "remaining_time": "13:33:38"} +{"current_steps": 3780, "total_steps": 7532, "loss": 0.2508128881454468, "lr": 1.0769816949459002e-05, "epoch": 1.0037179657416013, "percentage": 50.19, "elapsed_time": "13:39:29", "remaining_time": "13:33:25"} +{"current_steps": 3781, "total_steps": 7532, "loss": 0.2326367199420929, "lr": 1.0765439139548677e-05, "epoch": 1.0039835347231443, "percentage": 50.2, "elapsed_time": "13:39:41", "remaining_time": "13:33:11"} +{"current_steps": 3782, "total_steps": 7532, "loss": 0.2888404130935669, "lr": 1.0761061182070716e-05, "epoch": 1.0042491037046872, "percentage": 50.21, "elapsed_time": "13:39:54", "remaining_time": "13:32:58"} +{"current_steps": 3783, "total_steps": 7532, "loss": 0.2804296612739563, "lr": 1.0756683077869133e-05, "epoch": 1.0045146726862302, "percentage": 50.23, "elapsed_time": "13:40:07", "remaining_time": "13:32:44"} +{"current_steps": 3784, "total_steps": 7532, "loss": 0.2644953429698944, "lr": 1.0752304827787979e-05, "epoch": 1.0047802416677731, "percentage": 50.24, "elapsed_time": "13:40:20", "remaining_time": "13:32:32"} +{"current_steps": 3785, "total_steps": 7532, "loss": 0.297788143157959, "lr": 1.0747926432671323e-05, "epoch": 1.005045810649316, "percentage": 50.25, "elapsed_time": "13:40:32", "remaining_time": "13:32:18"} +{"current_steps": 3786, "total_steps": 7532, "loss": 0.2644156515598297, "lr": 1.0743547893363276e-05, "epoch": 1.005311379630859, "percentage": 50.27, "elapsed_time": "13:40:45", "remaining_time": "13:32:05"} +{"current_steps": 3787, "total_steps": 7532, "loss": 0.23818905651569366, "lr": 1.073916921070796e-05, "epoch": 1.005576948612402, "percentage": 50.28, "elapsed_time": "13:40:58", "remaining_time": "13:31:51"} +{"current_steps": 3788, "total_steps": 7532, "loss": 0.2544933259487152, "lr": 1.0734790385549538e-05, "epoch": 1.005842517593945, "percentage": 50.29, "elapsed_time": "13:41:11", "remaining_time": "13:31:38"} +{"current_steps": 3789, "total_steps": 7532, "loss": 0.2569275498390198, "lr": 1.0730411418732198e-05, "epoch": 1.006108086575488, "percentage": 50.31, "elapsed_time": "13:41:23", "remaining_time": "13:31:25"} +{"current_steps": 3790, "total_steps": 7532, "loss": 0.2248159945011139, "lr": 1.0726032311100153e-05, "epoch": 1.0063736555570308, "percentage": 50.32, "elapsed_time": "13:41:36", "remaining_time": "13:31:12"} +{"current_steps": 3791, "total_steps": 7532, "loss": 0.25541940331459045, "lr": 1.072165306349764e-05, "epoch": 1.0066392245385738, "percentage": 50.33, "elapsed_time": "13:41:49", "remaining_time": "13:30:59"} +{"current_steps": 3792, "total_steps": 7532, "loss": 0.24429568648338318, "lr": 1.0717273676768924e-05, "epoch": 1.0069047935201167, "percentage": 50.35, "elapsed_time": "13:42:01", "remaining_time": "13:30:45"} +{"current_steps": 3793, "total_steps": 7532, "loss": 0.2586621344089508, "lr": 1.0712894151758306e-05, "epoch": 1.0071703625016597, "percentage": 50.36, "elapsed_time": "13:42:14", "remaining_time": "13:30:32"} +{"current_steps": 3794, "total_steps": 7532, "loss": 0.28685104846954346, "lr": 1.0708514489310103e-05, "epoch": 1.0074359314832027, "percentage": 50.37, "elapsed_time": "13:42:27", "remaining_time": "13:30:19"} +{"current_steps": 3795, "total_steps": 7532, "loss": 0.2847924530506134, "lr": 1.0704134690268661e-05, "epoch": 1.0077015004647458, "percentage": 50.39, "elapsed_time": "13:42:40", "remaining_time": "13:30:05"} +{"current_steps": 3796, "total_steps": 7532, "loss": 0.24646440148353577, "lr": 1.0699754755478358e-05, "epoch": 1.0079670694462888, "percentage": 50.4, "elapsed_time": "13:42:52", "remaining_time": "13:29:52"} +{"current_steps": 3797, "total_steps": 7532, "loss": 0.22286385297775269, "lr": 1.0695374685783586e-05, "epoch": 1.0082326384278317, "percentage": 50.41, "elapsed_time": "13:43:05", "remaining_time": "13:29:39"} +{"current_steps": 3798, "total_steps": 7532, "loss": 0.2524179518222809, "lr": 1.069099448202878e-05, "epoch": 1.0084982074093747, "percentage": 50.42, "elapsed_time": "13:43:18", "remaining_time": "13:29:26"} +{"current_steps": 3799, "total_steps": 7532, "loss": 0.2625758647918701, "lr": 1.0686614145058387e-05, "epoch": 1.0087637763909176, "percentage": 50.44, "elapsed_time": "13:43:31", "remaining_time": "13:29:13"} +{"current_steps": 3800, "total_steps": 7532, "loss": 0.25318068265914917, "lr": 1.0682233675716884e-05, "epoch": 1.0090293453724606, "percentage": 50.45, "elapsed_time": "13:43:44", "remaining_time": "13:28:59"} +{"current_steps": 3801, "total_steps": 7532, "loss": 0.24224570393562317, "lr": 1.0677853074848774e-05, "epoch": 1.0092949143540035, "percentage": 50.46, "elapsed_time": "13:44:03", "remaining_time": "13:28:52"} +{"current_steps": 3802, "total_steps": 7532, "loss": 0.28595417737960815, "lr": 1.0673472343298588e-05, "epoch": 1.0095604833355465, "percentage": 50.48, "elapsed_time": "13:44:15", "remaining_time": "13:28:38"} +{"current_steps": 3803, "total_steps": 7532, "loss": 0.26894015073776245, "lr": 1.0669091481910874e-05, "epoch": 1.0098260523170894, "percentage": 50.49, "elapsed_time": "13:44:28", "remaining_time": "13:28:25"} +{"current_steps": 3804, "total_steps": 7532, "loss": 0.2605208158493042, "lr": 1.0664710491530214e-05, "epoch": 1.0100916212986324, "percentage": 50.5, "elapsed_time": "13:44:40", "remaining_time": "13:28:12"} +{"current_steps": 3805, "total_steps": 7532, "loss": 0.2595113515853882, "lr": 1.0660329373001212e-05, "epoch": 1.0103571902801753, "percentage": 50.52, "elapsed_time": "13:44:53", "remaining_time": "13:27:59"} +{"current_steps": 3806, "total_steps": 7532, "loss": 0.27478674054145813, "lr": 1.0655948127168494e-05, "epoch": 1.0106227592617183, "percentage": 50.53, "elapsed_time": "13:45:06", "remaining_time": "13:27:45"} +{"current_steps": 3807, "total_steps": 7532, "loss": 0.2587064504623413, "lr": 1.0651566754876715e-05, "epoch": 1.0108883282432612, "percentage": 50.54, "elapsed_time": "13:45:19", "remaining_time": "13:27:32"} +{"current_steps": 3808, "total_steps": 7532, "loss": 0.2420537769794464, "lr": 1.064718525697055e-05, "epoch": 1.0111538972248042, "percentage": 50.56, "elapsed_time": "13:45:31", "remaining_time": "13:27:19"} +{"current_steps": 3809, "total_steps": 7532, "loss": 0.29424652457237244, "lr": 1.0642803634294699e-05, "epoch": 1.0114194662063472, "percentage": 50.57, "elapsed_time": "13:45:44", "remaining_time": "13:27:05"} +{"current_steps": 3810, "total_steps": 7532, "loss": 0.25162142515182495, "lr": 1.0638421887693887e-05, "epoch": 1.01168503518789, "percentage": 50.58, "elapsed_time": "13:45:57", "remaining_time": "13:26:52"} +{"current_steps": 3811, "total_steps": 7532, "loss": 0.25661247968673706, "lr": 1.0634040018012865e-05, "epoch": 1.011950604169433, "percentage": 50.6, "elapsed_time": "13:46:09", "remaining_time": "13:26:39"} +{"current_steps": 3812, "total_steps": 7532, "loss": 0.2042091339826584, "lr": 1.0629658026096408e-05, "epoch": 1.012216173150976, "percentage": 50.61, "elapsed_time": "13:46:22", "remaining_time": "13:26:26"} +{"current_steps": 3813, "total_steps": 7532, "loss": 0.22496266663074493, "lr": 1.0625275912789307e-05, "epoch": 1.012481742132519, "percentage": 50.62, "elapsed_time": "13:46:35", "remaining_time": "13:26:12"} +{"current_steps": 3814, "total_steps": 7532, "loss": 0.23609521985054016, "lr": 1.0620893678936385e-05, "epoch": 1.012747311114062, "percentage": 50.64, "elapsed_time": "13:46:48", "remaining_time": "13:25:59"} +{"current_steps": 3815, "total_steps": 7532, "loss": 0.2561722993850708, "lr": 1.0616511325382486e-05, "epoch": 1.0130128800956049, "percentage": 50.65, "elapsed_time": "13:47:01", "remaining_time": "13:25:46"} +{"current_steps": 3816, "total_steps": 7532, "loss": 0.2617529630661011, "lr": 1.0612128852972474e-05, "epoch": 1.0132784490771478, "percentage": 50.66, "elapsed_time": "13:47:13", "remaining_time": "13:25:33"} +{"current_steps": 3817, "total_steps": 7532, "loss": 0.2633543014526367, "lr": 1.060774626255124e-05, "epoch": 1.0135440180586908, "percentage": 50.68, "elapsed_time": "13:47:26", "remaining_time": "13:25:19"} +{"current_steps": 3818, "total_steps": 7532, "loss": 0.19401729106903076, "lr": 1.0603363554963693e-05, "epoch": 1.0138095870402337, "percentage": 50.69, "elapsed_time": "13:47:39", "remaining_time": "13:25:06"} +{"current_steps": 3819, "total_steps": 7532, "loss": 0.2583369016647339, "lr": 1.0598980731054765e-05, "epoch": 1.0140751560217767, "percentage": 50.7, "elapsed_time": "13:47:51", "remaining_time": "13:24:52"} +{"current_steps": 3820, "total_steps": 7532, "loss": 0.26138922572135925, "lr": 1.0594597791669419e-05, "epoch": 1.0143407250033196, "percentage": 50.72, "elapsed_time": "13:48:04", "remaining_time": "13:24:39"} +{"current_steps": 3821, "total_steps": 7532, "loss": 0.2506800591945648, "lr": 1.0590214737652632e-05, "epoch": 1.0146062939848626, "percentage": 50.73, "elapsed_time": "13:48:16", "remaining_time": "13:24:26"} +{"current_steps": 3822, "total_steps": 7532, "loss": 0.21569974720478058, "lr": 1.0585831569849405e-05, "epoch": 1.0148718629664055, "percentage": 50.74, "elapsed_time": "13:48:29", "remaining_time": "13:24:12"} +{"current_steps": 3823, "total_steps": 7532, "loss": 0.2765602767467499, "lr": 1.0581448289104759e-05, "epoch": 1.0151374319479485, "percentage": 50.76, "elapsed_time": "13:48:41", "remaining_time": "13:23:59"} +{"current_steps": 3824, "total_steps": 7532, "loss": 0.25180384516716003, "lr": 1.0577064896263743e-05, "epoch": 1.0154030009294914, "percentage": 50.77, "elapsed_time": "13:48:54", "remaining_time": "13:23:45"} +{"current_steps": 3825, "total_steps": 7532, "loss": 0.24164071679115295, "lr": 1.0572681392171417e-05, "epoch": 1.0156685699110344, "percentage": 50.78, "elapsed_time": "13:49:06", "remaining_time": "13:23:32"} +{"current_steps": 3826, "total_steps": 7532, "loss": 0.24206972122192383, "lr": 1.0568297777672875e-05, "epoch": 1.0159341388925773, "percentage": 50.8, "elapsed_time": "13:49:19", "remaining_time": "13:23:18"} +{"current_steps": 3827, "total_steps": 7532, "loss": 0.24563468992710114, "lr": 1.0563914053613227e-05, "epoch": 1.0161997078741203, "percentage": 50.81, "elapsed_time": "13:49:32", "remaining_time": "13:23:05"} +{"current_steps": 3828, "total_steps": 7532, "loss": 0.23226243257522583, "lr": 1.0559530220837593e-05, "epoch": 1.0164652768556632, "percentage": 50.82, "elapsed_time": "13:49:44", "remaining_time": "13:22:51"} +{"current_steps": 3829, "total_steps": 7532, "loss": 0.2245083749294281, "lr": 1.0555146280191137e-05, "epoch": 1.0167308458372062, "percentage": 50.84, "elapsed_time": "13:49:57", "remaining_time": "13:22:38"} +{"current_steps": 3830, "total_steps": 7532, "loss": 0.24455049633979797, "lr": 1.0550762232519023e-05, "epoch": 1.0169964148187491, "percentage": 50.85, "elapsed_time": "13:50:09", "remaining_time": "13:22:25"} +{"current_steps": 3831, "total_steps": 7532, "loss": 0.2540651857852936, "lr": 1.0546378078666448e-05, "epoch": 1.017261983800292, "percentage": 50.86, "elapsed_time": "13:50:22", "remaining_time": "13:22:12"} +{"current_steps": 3832, "total_steps": 7532, "loss": 0.23392565548419952, "lr": 1.0541993819478622e-05, "epoch": 1.017527552781835, "percentage": 50.88, "elapsed_time": "13:50:35", "remaining_time": "13:21:58"} +{"current_steps": 3833, "total_steps": 7532, "loss": 0.21601927280426025, "lr": 1.053760945580078e-05, "epoch": 1.017793121763378, "percentage": 50.89, "elapsed_time": "13:50:47", "remaining_time": "13:21:45"} +{"current_steps": 3834, "total_steps": 7532, "loss": 0.24622616171836853, "lr": 1.0533224988478176e-05, "epoch": 1.018058690744921, "percentage": 50.9, "elapsed_time": "13:51:01", "remaining_time": "13:21:32"} +{"current_steps": 3835, "total_steps": 7532, "loss": 0.2774650752544403, "lr": 1.0528840418356086e-05, "epoch": 1.018324259726464, "percentage": 50.92, "elapsed_time": "13:51:13", "remaining_time": "13:21:18"} +{"current_steps": 3836, "total_steps": 7532, "loss": 0.22323890030384064, "lr": 1.0524455746279795e-05, "epoch": 1.0185898287080069, "percentage": 50.93, "elapsed_time": "13:51:26", "remaining_time": "13:21:05"} +{"current_steps": 3837, "total_steps": 7532, "loss": 0.21901552379131317, "lr": 1.0520070973094622e-05, "epoch": 1.0188553976895498, "percentage": 50.94, "elapsed_time": "13:51:39", "remaining_time": "13:20:52"} +{"current_steps": 3838, "total_steps": 7532, "loss": 0.3037784695625305, "lr": 1.0515686099645901e-05, "epoch": 1.0191209666710928, "percentage": 50.96, "elapsed_time": "13:51:52", "remaining_time": "13:20:39"} +{"current_steps": 3839, "total_steps": 7532, "loss": 0.22658365964889526, "lr": 1.0511301126778984e-05, "epoch": 1.0193865356526357, "percentage": 50.97, "elapsed_time": "13:52:04", "remaining_time": "13:20:25"} +{"current_steps": 3840, "total_steps": 7532, "loss": 0.23144160211086273, "lr": 1.0506916055339237e-05, "epoch": 1.0196521046341787, "percentage": 50.98, "elapsed_time": "13:52:17", "remaining_time": "13:20:12"} +{"current_steps": 3841, "total_steps": 7532, "loss": 0.25658899545669556, "lr": 1.0502530886172055e-05, "epoch": 1.0199176736157216, "percentage": 51.0, "elapsed_time": "13:52:29", "remaining_time": "13:19:59"} +{"current_steps": 3842, "total_steps": 7532, "loss": 0.19658756256103516, "lr": 1.0498145620122845e-05, "epoch": 1.0201832425972646, "percentage": 51.01, "elapsed_time": "13:52:42", "remaining_time": "13:19:46"} +{"current_steps": 3843, "total_steps": 7532, "loss": 0.19045208394527435, "lr": 1.049376025803703e-05, "epoch": 1.0204488115788075, "percentage": 51.02, "elapsed_time": "13:52:55", "remaining_time": "13:19:32"} +{"current_steps": 3844, "total_steps": 7532, "loss": 0.2577810287475586, "lr": 1.0489374800760066e-05, "epoch": 1.0207143805603505, "percentage": 51.04, "elapsed_time": "13:53:08", "remaining_time": "13:19:19"} +{"current_steps": 3845, "total_steps": 7532, "loss": 0.2807403802871704, "lr": 1.048498924913741e-05, "epoch": 1.0209799495418934, "percentage": 51.05, "elapsed_time": "13:53:21", "remaining_time": "13:19:06"} +{"current_steps": 3846, "total_steps": 7532, "loss": 0.2710269093513489, "lr": 1.0480603604014545e-05, "epoch": 1.0212455185234364, "percentage": 51.06, "elapsed_time": "13:53:33", "remaining_time": "13:18:53"} +{"current_steps": 3847, "total_steps": 7532, "loss": 0.2560620903968811, "lr": 1.0476217866236974e-05, "epoch": 1.0215110875049793, "percentage": 51.08, "elapsed_time": "13:53:46", "remaining_time": "13:18:39"} +{"current_steps": 3848, "total_steps": 7532, "loss": 0.2599894404411316, "lr": 1.0471832036650217e-05, "epoch": 1.0217766564865223, "percentage": 51.09, "elapsed_time": "13:53:59", "remaining_time": "13:18:26"} +{"current_steps": 3849, "total_steps": 7532, "loss": 0.2411944717168808, "lr": 1.046744611609981e-05, "epoch": 1.0220422254680652, "percentage": 51.1, "elapsed_time": "13:54:12", "remaining_time": "13:18:13"} +{"current_steps": 3850, "total_steps": 7532, "loss": 0.25216251611709595, "lr": 1.0463060105431303e-05, "epoch": 1.0223077944496084, "percentage": 51.12, "elapsed_time": "13:54:24", "remaining_time": "13:18:00"} +{"current_steps": 3851, "total_steps": 7532, "loss": 0.255629301071167, "lr": 1.0458674005490263e-05, "epoch": 1.0225733634311513, "percentage": 51.13, "elapsed_time": "13:54:37", "remaining_time": "13:17:47"} +{"current_steps": 3852, "total_steps": 7532, "loss": 0.24032849073410034, "lr": 1.0454287817122291e-05, "epoch": 1.0228389324126943, "percentage": 51.14, "elapsed_time": "13:54:49", "remaining_time": "13:17:33"} +{"current_steps": 3853, "total_steps": 7532, "loss": 0.23188306391239166, "lr": 1.0449901541172983e-05, "epoch": 1.0231045013942373, "percentage": 51.16, "elapsed_time": "13:55:03", "remaining_time": "13:17:20"} +{"current_steps": 3854, "total_steps": 7532, "loss": 0.2718146741390228, "lr": 1.0445515178487965e-05, "epoch": 1.0233700703757802, "percentage": 51.17, "elapsed_time": "13:55:15", "remaining_time": "13:17:06"} +{"current_steps": 3855, "total_steps": 7532, "loss": 0.30279839038848877, "lr": 1.0441128729912876e-05, "epoch": 1.0236356393573232, "percentage": 51.18, "elapsed_time": "13:55:28", "remaining_time": "13:16:53"} +{"current_steps": 3856, "total_steps": 7532, "loss": 0.2185024917125702, "lr": 1.0436742196293368e-05, "epoch": 1.023901208338866, "percentage": 51.19, "elapsed_time": "13:55:40", "remaining_time": "13:16:40"} +{"current_steps": 3857, "total_steps": 7532, "loss": 0.2956481873989105, "lr": 1.0432355578475118e-05, "epoch": 1.024166777320409, "percentage": 51.21, "elapsed_time": "13:55:53", "remaining_time": "13:16:27"} +{"current_steps": 3858, "total_steps": 7532, "loss": 0.28460678458213806, "lr": 1.0427968877303809e-05, "epoch": 1.024432346301952, "percentage": 51.22, "elapsed_time": "13:56:06", "remaining_time": "13:16:13"} +{"current_steps": 3859, "total_steps": 7532, "loss": 0.24597057700157166, "lr": 1.0423582093625146e-05, "epoch": 1.024697915283495, "percentage": 51.23, "elapsed_time": "13:56:19", "remaining_time": "13:16:00"} +{"current_steps": 3860, "total_steps": 7532, "loss": 0.23986583948135376, "lr": 1.0419195228284856e-05, "epoch": 1.024963484265038, "percentage": 51.25, "elapsed_time": "13:56:31", "remaining_time": "13:15:47"} +{"current_steps": 3861, "total_steps": 7532, "loss": 0.2489446997642517, "lr": 1.0414808282128668e-05, "epoch": 1.0252290532465809, "percentage": 51.26, "elapsed_time": "13:56:44", "remaining_time": "13:15:33"} +{"current_steps": 3862, "total_steps": 7532, "loss": 0.26777884364128113, "lr": 1.0410421256002334e-05, "epoch": 1.0254946222281238, "percentage": 51.27, "elapsed_time": "13:56:57", "remaining_time": "13:15:20"} +{"current_steps": 3863, "total_steps": 7532, "loss": 0.23506489396095276, "lr": 1.0406034150751625e-05, "epoch": 1.0257601912096668, "percentage": 51.29, "elapsed_time": "13:57:09", "remaining_time": "13:15:07"} +{"current_steps": 3864, "total_steps": 7532, "loss": 0.2526484429836273, "lr": 1.040164696722232e-05, "epoch": 1.0260257601912097, "percentage": 51.3, "elapsed_time": "13:57:22", "remaining_time": "13:14:53"} +{"current_steps": 3865, "total_steps": 7532, "loss": 0.2179267853498459, "lr": 1.0397259706260216e-05, "epoch": 1.0262913291727527, "percentage": 51.31, "elapsed_time": "13:57:35", "remaining_time": "13:14:40"} +{"current_steps": 3866, "total_steps": 7532, "loss": 0.2431088387966156, "lr": 1.0392872368711126e-05, "epoch": 1.0265568981542956, "percentage": 51.33, "elapsed_time": "13:57:48", "remaining_time": "13:14:27"} +{"current_steps": 3867, "total_steps": 7532, "loss": 0.26101407408714294, "lr": 1.0388484955420877e-05, "epoch": 1.0268224671358386, "percentage": 51.34, "elapsed_time": "13:58:00", "remaining_time": "13:14:14"} +{"current_steps": 3868, "total_steps": 7532, "loss": 0.23780573904514313, "lr": 1.0384097467235308e-05, "epoch": 1.0270880361173815, "percentage": 51.35, "elapsed_time": "13:58:13", "remaining_time": "13:14:01"} +{"current_steps": 3869, "total_steps": 7532, "loss": 0.2469894289970398, "lr": 1.0379709905000278e-05, "epoch": 1.0273536050989245, "percentage": 51.37, "elapsed_time": "13:58:26", "remaining_time": "13:13:47"} +{"current_steps": 3870, "total_steps": 7532, "loss": 0.21271926164627075, "lr": 1.0375322269561658e-05, "epoch": 1.0276191740804674, "percentage": 51.38, "elapsed_time": "13:58:39", "remaining_time": "13:13:35"} +{"current_steps": 3871, "total_steps": 7532, "loss": 0.22995726764202118, "lr": 1.0370934561765331e-05, "epoch": 1.0278847430620104, "percentage": 51.39, "elapsed_time": "13:58:52", "remaining_time": "13:13:21"} +{"current_steps": 3872, "total_steps": 7532, "loss": 0.27448171377182007, "lr": 1.0366546782457196e-05, "epoch": 1.0281503120435533, "percentage": 51.41, "elapsed_time": "13:59:05", "remaining_time": "13:13:08"} +{"current_steps": 3873, "total_steps": 7532, "loss": 0.25459539890289307, "lr": 1.0362158932483165e-05, "epoch": 1.0284158810250963, "percentage": 51.42, "elapsed_time": "13:59:17", "remaining_time": "13:12:55"} +{"current_steps": 3874, "total_steps": 7532, "loss": 0.23213380575180054, "lr": 1.0357771012689162e-05, "epoch": 1.0286814500066392, "percentage": 51.43, "elapsed_time": "13:59:30", "remaining_time": "13:12:41"} +{"current_steps": 3875, "total_steps": 7532, "loss": 0.2219776064157486, "lr": 1.0353383023921127e-05, "epoch": 1.0289470189881822, "percentage": 51.45, "elapsed_time": "13:59:43", "remaining_time": "13:12:29"} +{"current_steps": 3876, "total_steps": 7532, "loss": 0.27059125900268555, "lr": 1.0348994967025012e-05, "epoch": 1.0292125879697251, "percentage": 51.46, "elapsed_time": "13:59:55", "remaining_time": "13:12:15"} +{"current_steps": 3877, "total_steps": 7532, "loss": 0.26921501755714417, "lr": 1.034460684284678e-05, "epoch": 1.029478156951268, "percentage": 51.47, "elapsed_time": "14:00:08", "remaining_time": "13:12:02"} +{"current_steps": 3878, "total_steps": 7532, "loss": 0.24727991223335266, "lr": 1.0340218652232419e-05, "epoch": 1.029743725932811, "percentage": 51.49, "elapsed_time": "14:00:21", "remaining_time": "13:11:48"} +{"current_steps": 3879, "total_steps": 7532, "loss": 0.26276054978370667, "lr": 1.0335830396027912e-05, "epoch": 1.030009294914354, "percentage": 51.5, "elapsed_time": "14:00:34", "remaining_time": "13:11:35"} +{"current_steps": 3880, "total_steps": 7532, "loss": 0.25906458497047424, "lr": 1.0331442075079268e-05, "epoch": 1.030274863895897, "percentage": 51.51, "elapsed_time": "14:00:46", "remaining_time": "13:11:22"} +{"current_steps": 3881, "total_steps": 7532, "loss": 0.2708794176578522, "lr": 1.0327053690232498e-05, "epoch": 1.03054043287744, "percentage": 51.53, "elapsed_time": "14:01:00", "remaining_time": "13:11:09"} +{"current_steps": 3882, "total_steps": 7532, "loss": 0.24968653917312622, "lr": 1.0322665242333634e-05, "epoch": 1.0308060018589829, "percentage": 51.54, "elapsed_time": "14:01:13", "remaining_time": "13:10:56"} +{"current_steps": 3883, "total_steps": 7532, "loss": 0.2669135332107544, "lr": 1.0318276732228716e-05, "epoch": 1.0310715708405258, "percentage": 51.55, "elapsed_time": "14:01:26", "remaining_time": "13:10:43"} +{"current_steps": 3884, "total_steps": 7532, "loss": 0.24173730611801147, "lr": 1.0313888160763799e-05, "epoch": 1.0313371398220688, "percentage": 51.57, "elapsed_time": "14:01:38", "remaining_time": "13:10:30"} +{"current_steps": 3885, "total_steps": 7532, "loss": 0.27513059973716736, "lr": 1.0309499528784948e-05, "epoch": 1.0316027088036117, "percentage": 51.58, "elapsed_time": "14:01:51", "remaining_time": "13:10:17"} +{"current_steps": 3886, "total_steps": 7532, "loss": 0.2512688934803009, "lr": 1.0305110837138235e-05, "epoch": 1.0318682777851547, "percentage": 51.59, "elapsed_time": "14:02:04", "remaining_time": "13:10:03"} +{"current_steps": 3887, "total_steps": 7532, "loss": 0.2584962844848633, "lr": 1.0300722086669753e-05, "epoch": 1.0321338467666976, "percentage": 51.61, "elapsed_time": "14:02:17", "remaining_time": "13:09:50"} +{"current_steps": 3888, "total_steps": 7532, "loss": 0.23692303895950317, "lr": 1.0296333278225599e-05, "epoch": 1.0323994157482406, "percentage": 51.62, "elapsed_time": "14:02:29", "remaining_time": "13:09:37"} +{"current_steps": 3889, "total_steps": 7532, "loss": 0.2570871114730835, "lr": 1.0291944412651884e-05, "epoch": 1.0326649847297835, "percentage": 51.63, "elapsed_time": "14:02:42", "remaining_time": "13:09:24"} +{"current_steps": 3890, "total_steps": 7532, "loss": 0.2896367609500885, "lr": 1.028755549079473e-05, "epoch": 1.0329305537113265, "percentage": 51.65, "elapsed_time": "14:02:55", "remaining_time": "13:09:10"} +{"current_steps": 3891, "total_steps": 7532, "loss": 0.19990365207195282, "lr": 1.0283166513500267e-05, "epoch": 1.0331961226928694, "percentage": 51.66, "elapsed_time": "14:03:08", "remaining_time": "13:08:58"} +{"current_steps": 3892, "total_steps": 7532, "loss": 0.25235646963119507, "lr": 1.0278777481614639e-05, "epoch": 1.0334616916744124, "percentage": 51.67, "elapsed_time": "14:03:20", "remaining_time": "13:08:44"} +{"current_steps": 3893, "total_steps": 7532, "loss": 0.23675012588500977, "lr": 1.0274388395984003e-05, "epoch": 1.0337272606559553, "percentage": 51.69, "elapsed_time": "14:03:33", "remaining_time": "13:08:31"} +{"current_steps": 3894, "total_steps": 7532, "loss": 0.250516414642334, "lr": 1.026999925745452e-05, "epoch": 1.0339928296374983, "percentage": 51.7, "elapsed_time": "14:03:46", "remaining_time": "13:08:18"} +{"current_steps": 3895, "total_steps": 7532, "loss": 0.24573490023612976, "lr": 1.0265610066872365e-05, "epoch": 1.0342583986190412, "percentage": 51.71, "elapsed_time": "14:03:59", "remaining_time": "13:08:05"} +{"current_steps": 3896, "total_steps": 7532, "loss": 0.2473086714744568, "lr": 1.026122082508372e-05, "epoch": 1.0345239676005842, "percentage": 51.73, "elapsed_time": "14:04:12", "remaining_time": "13:07:52"} +{"current_steps": 3897, "total_steps": 7532, "loss": 0.26546406745910645, "lr": 1.0256831532934783e-05, "epoch": 1.0347895365821271, "percentage": 51.74, "elapsed_time": "14:04:25", "remaining_time": "13:07:38"} +{"current_steps": 3898, "total_steps": 7532, "loss": 0.2565246522426605, "lr": 1.0252442191271754e-05, "epoch": 1.03505510556367, "percentage": 51.75, "elapsed_time": "14:04:38", "remaining_time": "13:07:25"} +{"current_steps": 3899, "total_steps": 7532, "loss": 0.24923476576805115, "lr": 1.0248052800940846e-05, "epoch": 1.035320674545213, "percentage": 51.77, "elapsed_time": "14:04:50", "remaining_time": "13:07:12"} +{"current_steps": 3900, "total_steps": 7532, "loss": 0.3079240322113037, "lr": 1.0243663362788286e-05, "epoch": 1.035586243526756, "percentage": 51.78, "elapsed_time": "14:05:03", "remaining_time": "13:06:59"} +{"current_steps": 3901, "total_steps": 7532, "loss": 0.2482951581478119, "lr": 1.0239273877660302e-05, "epoch": 1.035851812508299, "percentage": 51.79, "elapsed_time": "14:05:22", "remaining_time": "13:06:51"} +{"current_steps": 3902, "total_steps": 7532, "loss": 0.2626204192638397, "lr": 1.0234884346403138e-05, "epoch": 1.036117381489842, "percentage": 51.81, "elapsed_time": "14:05:35", "remaining_time": "13:06:38"} +{"current_steps": 3903, "total_steps": 7532, "loss": 0.23181654512882233, "lr": 1.023049476986304e-05, "epoch": 1.0363829504713848, "percentage": 51.82, "elapsed_time": "14:05:47", "remaining_time": "13:06:25"} +{"current_steps": 3904, "total_steps": 7532, "loss": 0.29164040088653564, "lr": 1.0226105148886272e-05, "epoch": 1.0366485194529278, "percentage": 51.83, "elapsed_time": "14:06:00", "remaining_time": "13:06:11"} +{"current_steps": 3905, "total_steps": 7532, "loss": 0.22025801241397858, "lr": 1.0221715484319094e-05, "epoch": 1.0369140884344707, "percentage": 51.85, "elapsed_time": "14:06:13", "remaining_time": "13:05:58"} +{"current_steps": 3906, "total_steps": 7532, "loss": 0.2819385826587677, "lr": 1.021732577700779e-05, "epoch": 1.0371796574160137, "percentage": 51.86, "elapsed_time": "14:06:26", "remaining_time": "13:05:45"} +{"current_steps": 3907, "total_steps": 7532, "loss": 0.24709002673625946, "lr": 1.0212936027798637e-05, "epoch": 1.0374452263975567, "percentage": 51.87, "elapsed_time": "14:06:38", "remaining_time": "13:05:31"} +{"current_steps": 3908, "total_steps": 7532, "loss": 0.22570034861564636, "lr": 1.0208546237537928e-05, "epoch": 1.0377107953790998, "percentage": 51.89, "elapsed_time": "14:06:50", "remaining_time": "13:05:18"} +{"current_steps": 3909, "total_steps": 7532, "loss": 0.25642865896224976, "lr": 1.0204156407071964e-05, "epoch": 1.0379763643606428, "percentage": 51.9, "elapsed_time": "14:07:03", "remaining_time": "13:05:05"} +{"current_steps": 3910, "total_steps": 7532, "loss": 0.25970256328582764, "lr": 1.0199766537247053e-05, "epoch": 1.0382419333421857, "percentage": 51.91, "elapsed_time": "14:07:15", "remaining_time": "13:04:51"} +{"current_steps": 3911, "total_steps": 7532, "loss": 0.2560003101825714, "lr": 1.019537662890951e-05, "epoch": 1.0385075023237287, "percentage": 51.93, "elapsed_time": "14:07:28", "remaining_time": "13:04:38"} +{"current_steps": 3912, "total_steps": 7532, "loss": 0.28138649463653564, "lr": 1.0190986682905656e-05, "epoch": 1.0387730713052716, "percentage": 51.94, "elapsed_time": "14:07:40", "remaining_time": "13:04:24"} +{"current_steps": 3913, "total_steps": 7532, "loss": 0.23531222343444824, "lr": 1.0186596700081825e-05, "epoch": 1.0390386402868146, "percentage": 51.95, "elapsed_time": "14:07:53", "remaining_time": "13:04:11"} +{"current_steps": 3914, "total_steps": 7532, "loss": 0.24912862479686737, "lr": 1.018220668128435e-05, "epoch": 1.0393042092683575, "percentage": 51.96, "elapsed_time": "14:08:05", "remaining_time": "13:03:57"} +{"current_steps": 3915, "total_steps": 7532, "loss": 0.24188724160194397, "lr": 1.0177816627359575e-05, "epoch": 1.0395697782499005, "percentage": 51.98, "elapsed_time": "14:08:18", "remaining_time": "13:03:44"} +{"current_steps": 3916, "total_steps": 7532, "loss": 0.2709474563598633, "lr": 1.0173426539153853e-05, "epoch": 1.0398353472314434, "percentage": 51.99, "elapsed_time": "14:08:30", "remaining_time": "13:03:30"} +{"current_steps": 3917, "total_steps": 7532, "loss": 0.2400204837322235, "lr": 1.0169036417513538e-05, "epoch": 1.0401009162129864, "percentage": 52.0, "elapsed_time": "14:08:43", "remaining_time": "13:03:17"} +{"current_steps": 3918, "total_steps": 7532, "loss": 0.2687132954597473, "lr": 1.0164646263284993e-05, "epoch": 1.0403664851945293, "percentage": 52.02, "elapsed_time": "14:08:55", "remaining_time": "13:03:03"} +{"current_steps": 3919, "total_steps": 7532, "loss": 0.25139346718788147, "lr": 1.0160256077314592e-05, "epoch": 1.0406320541760723, "percentage": 52.03, "elapsed_time": "14:09:08", "remaining_time": "13:02:50"} +{"current_steps": 3920, "total_steps": 7532, "loss": 0.25873464345932007, "lr": 1.0155865860448712e-05, "epoch": 1.0408976231576152, "percentage": 52.04, "elapsed_time": "14:09:21", "remaining_time": "13:02:37"} +{"current_steps": 3921, "total_steps": 7532, "loss": 0.2510434687137604, "lr": 1.0151475613533732e-05, "epoch": 1.0411631921391582, "percentage": 52.06, "elapsed_time": "14:09:33", "remaining_time": "13:02:23"} +{"current_steps": 3922, "total_steps": 7532, "loss": 0.24567106366157532, "lr": 1.0147085337416036e-05, "epoch": 1.0414287611207012, "percentage": 52.07, "elapsed_time": "14:09:46", "remaining_time": "13:02:10"} +{"current_steps": 3923, "total_steps": 7532, "loss": 0.25028282403945923, "lr": 1.0142695032942024e-05, "epoch": 1.041694330102244, "percentage": 52.08, "elapsed_time": "14:09:59", "remaining_time": "13:01:57"} +{"current_steps": 3924, "total_steps": 7532, "loss": 0.23542484641075134, "lr": 1.0138304700958096e-05, "epoch": 1.041959899083787, "percentage": 52.1, "elapsed_time": "14:10:11", "remaining_time": "13:01:43"} +{"current_steps": 3925, "total_steps": 7532, "loss": 0.28974449634552, "lr": 1.0133914342310649e-05, "epoch": 1.04222546806533, "percentage": 52.11, "elapsed_time": "14:10:23", "remaining_time": "13:01:29"} +{"current_steps": 3926, "total_steps": 7532, "loss": 0.23417247831821442, "lr": 1.0129523957846097e-05, "epoch": 1.042491037046873, "percentage": 52.12, "elapsed_time": "14:10:36", "remaining_time": "13:01:16"} +{"current_steps": 3927, "total_steps": 7532, "loss": 0.23247018456459045, "lr": 1.0125133548410852e-05, "epoch": 1.042756606028416, "percentage": 52.14, "elapsed_time": "14:10:48", "remaining_time": "13:01:02"} +{"current_steps": 3928, "total_steps": 7532, "loss": 0.23860129714012146, "lr": 1.0120743114851337e-05, "epoch": 1.0430221750099589, "percentage": 52.15, "elapsed_time": "14:11:01", "remaining_time": "13:00:49"} +{"current_steps": 3929, "total_steps": 7532, "loss": 0.2609105706214905, "lr": 1.0116352658013973e-05, "epoch": 1.0432877439915018, "percentage": 52.16, "elapsed_time": "14:11:13", "remaining_time": "13:00:35"} +{"current_steps": 3930, "total_steps": 7532, "loss": 0.2559507489204407, "lr": 1.0111962178745187e-05, "epoch": 1.0435533129730448, "percentage": 52.18, "elapsed_time": "14:11:26", "remaining_time": "13:00:22"} +{"current_steps": 3931, "total_steps": 7532, "loss": 0.2708527147769928, "lr": 1.0107571677891415e-05, "epoch": 1.0438188819545877, "percentage": 52.19, "elapsed_time": "14:11:38", "remaining_time": "13:00:09"} +{"current_steps": 3932, "total_steps": 7532, "loss": 0.25884875655174255, "lr": 1.0103181156299091e-05, "epoch": 1.0440844509361307, "percentage": 52.2, "elapsed_time": "14:11:51", "remaining_time": "12:59:55"} +{"current_steps": 3933, "total_steps": 7532, "loss": 0.2631877660751343, "lr": 1.0098790614814658e-05, "epoch": 1.0443500199176736, "percentage": 52.22, "elapsed_time": "14:12:03", "remaining_time": "12:59:42"} +{"current_steps": 3934, "total_steps": 7532, "loss": 0.27179086208343506, "lr": 1.0094400054284559e-05, "epoch": 1.0446155888992166, "percentage": 52.23, "elapsed_time": "14:12:16", "remaining_time": "12:59:28"} +{"current_steps": 3935, "total_steps": 7532, "loss": 0.21690386533737183, "lr": 1.0090009475555245e-05, "epoch": 1.0448811578807595, "percentage": 52.24, "elapsed_time": "14:12:28", "remaining_time": "12:59:15"} +{"current_steps": 3936, "total_steps": 7532, "loss": 0.20192815363407135, "lr": 1.0085618879473162e-05, "epoch": 1.0451467268623025, "percentage": 52.26, "elapsed_time": "14:12:41", "remaining_time": "12:59:02"} +{"current_steps": 3937, "total_steps": 7532, "loss": 0.2680777907371521, "lr": 1.0081228266884773e-05, "epoch": 1.0454122958438454, "percentage": 52.27, "elapsed_time": "14:12:53", "remaining_time": "12:58:48"} +{"current_steps": 3938, "total_steps": 7532, "loss": 0.2566579580307007, "lr": 1.007683763863653e-05, "epoch": 1.0456778648253884, "percentage": 52.28, "elapsed_time": "14:13:05", "remaining_time": "12:58:34"} +{"current_steps": 3939, "total_steps": 7532, "loss": 0.2508152723312378, "lr": 1.0072446995574895e-05, "epoch": 1.0459434338069313, "percentage": 52.3, "elapsed_time": "14:13:18", "remaining_time": "12:58:21"} +{"current_steps": 3940, "total_steps": 7532, "loss": 0.2880190908908844, "lr": 1.0068056338546335e-05, "epoch": 1.0462090027884743, "percentage": 52.31, "elapsed_time": "14:13:31", "remaining_time": "12:58:08"} +{"current_steps": 3941, "total_steps": 7532, "loss": 0.2646787464618683, "lr": 1.0063665668397316e-05, "epoch": 1.0464745717700172, "percentage": 52.32, "elapsed_time": "14:13:44", "remaining_time": "12:57:54"} +{"current_steps": 3942, "total_steps": 7532, "loss": 0.2327616810798645, "lr": 1.0059274985974305e-05, "epoch": 1.0467401407515602, "percentage": 52.34, "elapsed_time": "14:13:56", "remaining_time": "12:57:41"} +{"current_steps": 3943, "total_steps": 7532, "loss": 0.24756258726119995, "lr": 1.0054884292123778e-05, "epoch": 1.0470057097331031, "percentage": 52.35, "elapsed_time": "14:14:09", "remaining_time": "12:57:27"} +{"current_steps": 3944, "total_steps": 7532, "loss": 0.23657771944999695, "lr": 1.0050493587692207e-05, "epoch": 1.047271278714646, "percentage": 52.36, "elapsed_time": "14:14:21", "remaining_time": "12:57:14"} +{"current_steps": 3945, "total_steps": 7532, "loss": 0.2541351616382599, "lr": 1.0046102873526068e-05, "epoch": 1.047536847696189, "percentage": 52.38, "elapsed_time": "14:14:34", "remaining_time": "12:57:01"} +{"current_steps": 3946, "total_steps": 7532, "loss": 0.2330317348241806, "lr": 1.0041712150471839e-05, "epoch": 1.047802416677732, "percentage": 52.39, "elapsed_time": "14:14:46", "remaining_time": "12:56:47"} +{"current_steps": 3947, "total_steps": 7532, "loss": 0.23411181569099426, "lr": 1.0037321419375997e-05, "epoch": 1.048067985659275, "percentage": 52.4, "elapsed_time": "14:14:59", "remaining_time": "12:56:34"} +{"current_steps": 3948, "total_steps": 7532, "loss": 0.2605017125606537, "lr": 1.0032930681085028e-05, "epoch": 1.048333554640818, "percentage": 52.42, "elapsed_time": "14:15:11", "remaining_time": "12:56:20"} +{"current_steps": 3949, "total_steps": 7532, "loss": 0.28651514649391174, "lr": 1.0028539936445407e-05, "epoch": 1.0485991236223609, "percentage": 52.43, "elapsed_time": "14:15:24", "remaining_time": "12:56:07"} +{"current_steps": 3950, "total_steps": 7532, "loss": 0.22912876307964325, "lr": 1.0024149186303628e-05, "epoch": 1.0488646926039038, "percentage": 52.44, "elapsed_time": "14:15:37", "remaining_time": "12:55:54"} +{"current_steps": 3951, "total_steps": 7532, "loss": 0.24032847583293915, "lr": 1.001975843150617e-05, "epoch": 1.0491302615854468, "percentage": 52.46, "elapsed_time": "14:15:49", "remaining_time": "12:55:41"} +{"current_steps": 3952, "total_steps": 7532, "loss": 0.17826229333877563, "lr": 1.0015367672899521e-05, "epoch": 1.0493958305669897, "percentage": 52.47, "elapsed_time": "14:16:02", "remaining_time": "12:55:27"} +{"current_steps": 3953, "total_steps": 7532, "loss": 0.2619745433330536, "lr": 1.0010976911330163e-05, "epoch": 1.0496613995485327, "percentage": 52.48, "elapsed_time": "14:16:14", "remaining_time": "12:55:14"} +{"current_steps": 3954, "total_steps": 7532, "loss": 0.24104374647140503, "lr": 1.0006586147644585e-05, "epoch": 1.0499269685300756, "percentage": 52.5, "elapsed_time": "14:16:26", "remaining_time": "12:55:00"} +{"current_steps": 3955, "total_steps": 7532, "loss": 0.22913998365402222, "lr": 1.0002195382689277e-05, "epoch": 1.0501925375116186, "percentage": 52.51, "elapsed_time": "14:16:39", "remaining_time": "12:54:47"} +{"current_steps": 3956, "total_steps": 7532, "loss": 0.2625126838684082, "lr": 9.997804617310724e-06, "epoch": 1.0504581064931615, "percentage": 52.52, "elapsed_time": "14:16:51", "remaining_time": "12:54:33"} +{"current_steps": 3957, "total_steps": 7532, "loss": 0.23098430037498474, "lr": 9.993413852355416e-06, "epoch": 1.0507236754747045, "percentage": 52.54, "elapsed_time": "14:17:04", "remaining_time": "12:54:19"} +{"current_steps": 3958, "total_steps": 7532, "loss": 0.2866731882095337, "lr": 9.98902308866984e-06, "epoch": 1.0509892444562474, "percentage": 52.55, "elapsed_time": "14:17:16", "remaining_time": "12:54:06"} +{"current_steps": 3959, "total_steps": 7532, "loss": 0.2520306706428528, "lr": 9.984632327100482e-06, "epoch": 1.0512548134377904, "percentage": 52.56, "elapsed_time": "14:17:29", "remaining_time": "12:53:52"} +{"current_steps": 3960, "total_steps": 7532, "loss": 0.29688766598701477, "lr": 9.980241568493834e-06, "epoch": 1.0515203824193333, "percentage": 52.58, "elapsed_time": "14:17:41", "remaining_time": "12:53:39"} +{"current_steps": 3961, "total_steps": 7532, "loss": 0.2876695990562439, "lr": 9.975850813696375e-06, "epoch": 1.0517859514008763, "percentage": 52.59, "elapsed_time": "14:17:53", "remaining_time": "12:53:25"} +{"current_steps": 3962, "total_steps": 7532, "loss": 0.2402629554271698, "lr": 9.971460063554595e-06, "epoch": 1.0520515203824194, "percentage": 52.6, "elapsed_time": "14:18:06", "remaining_time": "12:53:12"} +{"current_steps": 3963, "total_steps": 7532, "loss": 0.32080164551734924, "lr": 9.967069318914977e-06, "epoch": 1.0523170893639624, "percentage": 52.62, "elapsed_time": "14:18:19", "remaining_time": "12:52:58"} +{"current_steps": 3964, "total_steps": 7532, "loss": 0.2642936110496521, "lr": 9.962678580624008e-06, "epoch": 1.0525826583455054, "percentage": 52.63, "elapsed_time": "14:18:32", "remaining_time": "12:52:46"} +{"current_steps": 3965, "total_steps": 7532, "loss": 0.255870521068573, "lr": 9.958287849528163e-06, "epoch": 1.0528482273270483, "percentage": 52.64, "elapsed_time": "14:18:44", "remaining_time": "12:52:32"} +{"current_steps": 3966, "total_steps": 7532, "loss": 0.2695184350013733, "lr": 9.953897126473933e-06, "epoch": 1.0531137963085913, "percentage": 52.66, "elapsed_time": "14:18:57", "remaining_time": "12:52:19"} +{"current_steps": 3967, "total_steps": 7532, "loss": 0.24576464295387268, "lr": 9.949506412307795e-06, "epoch": 1.0533793652901342, "percentage": 52.67, "elapsed_time": "14:19:09", "remaining_time": "12:52:05"} +{"current_steps": 3968, "total_steps": 7532, "loss": 0.26517459750175476, "lr": 9.945115707876224e-06, "epoch": 1.0536449342716772, "percentage": 52.68, "elapsed_time": "14:19:21", "remaining_time": "12:51:52"} +{"current_steps": 3969, "total_steps": 7532, "loss": 0.30468082427978516, "lr": 9.940725014025696e-06, "epoch": 1.05391050325322, "percentage": 52.7, "elapsed_time": "14:19:34", "remaining_time": "12:51:38"} +{"current_steps": 3970, "total_steps": 7532, "loss": 0.25299298763275146, "lr": 9.936334331602687e-06, "epoch": 1.054176072234763, "percentage": 52.71, "elapsed_time": "14:19:47", "remaining_time": "12:51:25"} +{"current_steps": 3971, "total_steps": 7532, "loss": 0.2659488320350647, "lr": 9.931943661453668e-06, "epoch": 1.054441641216306, "percentage": 52.72, "elapsed_time": "14:20:00", "remaining_time": "12:51:12"} +{"current_steps": 3972, "total_steps": 7532, "loss": 0.25957295298576355, "lr": 9.92755300442511e-06, "epoch": 1.054707210197849, "percentage": 52.73, "elapsed_time": "14:20:12", "remaining_time": "12:50:59"} +{"current_steps": 3973, "total_steps": 7532, "loss": 0.2416645884513855, "lr": 9.923162361363476e-06, "epoch": 1.054972779179392, "percentage": 52.75, "elapsed_time": "14:20:25", "remaining_time": "12:50:45"} +{"current_steps": 3974, "total_steps": 7532, "loss": 0.2627662122249603, "lr": 9.91877173311523e-06, "epoch": 1.0552383481609349, "percentage": 52.76, "elapsed_time": "14:20:37", "remaining_time": "12:50:32"} +{"current_steps": 3975, "total_steps": 7532, "loss": 0.2876631021499634, "lr": 9.91438112052684e-06, "epoch": 1.0555039171424778, "percentage": 52.77, "elapsed_time": "14:20:50", "remaining_time": "12:50:18"} +{"current_steps": 3976, "total_steps": 7532, "loss": 0.28336596488952637, "lr": 9.90999052444476e-06, "epoch": 1.0557694861240208, "percentage": 52.79, "elapsed_time": "14:21:02", "remaining_time": "12:50:05"} +{"current_steps": 3977, "total_steps": 7532, "loss": 0.2970484495162964, "lr": 9.905599945715443e-06, "epoch": 1.0560350551055637, "percentage": 52.8, "elapsed_time": "14:21:15", "remaining_time": "12:49:51"} +{"current_steps": 3978, "total_steps": 7532, "loss": 0.27202755212783813, "lr": 9.901209385185345e-06, "epoch": 1.0563006240871067, "percentage": 52.81, "elapsed_time": "14:21:27", "remaining_time": "12:49:38"} +{"current_steps": 3979, "total_steps": 7532, "loss": 0.2702459990978241, "lr": 9.896818843700912e-06, "epoch": 1.0565661930686496, "percentage": 52.83, "elapsed_time": "14:21:39", "remaining_time": "12:49:24"} +{"current_steps": 3980, "total_steps": 7532, "loss": 0.26057881116867065, "lr": 9.89242832210859e-06, "epoch": 1.0568317620501926, "percentage": 52.84, "elapsed_time": "14:21:52", "remaining_time": "12:49:11"} +{"current_steps": 3981, "total_steps": 7532, "loss": 0.24006876349449158, "lr": 9.888037821254816e-06, "epoch": 1.0570973310317355, "percentage": 52.85, "elapsed_time": "14:22:04", "remaining_time": "12:48:57"} +{"current_steps": 3982, "total_steps": 7532, "loss": 0.2437625676393509, "lr": 9.883647341986032e-06, "epoch": 1.0573629000132785, "percentage": 52.87, "elapsed_time": "14:22:17", "remaining_time": "12:48:44"} +{"current_steps": 3983, "total_steps": 7532, "loss": 0.24256819486618042, "lr": 9.879256885148666e-06, "epoch": 1.0576284689948214, "percentage": 52.88, "elapsed_time": "14:22:29", "remaining_time": "12:48:31"} +{"current_steps": 3984, "total_steps": 7532, "loss": 0.2714581787586212, "lr": 9.874866451589151e-06, "epoch": 1.0578940379763644, "percentage": 52.89, "elapsed_time": "14:22:42", "remaining_time": "12:48:17"} +{"current_steps": 3985, "total_steps": 7532, "loss": 0.30309075117111206, "lr": 9.870476042153907e-06, "epoch": 1.0581596069579073, "percentage": 52.91, "elapsed_time": "14:22:54", "remaining_time": "12:48:04"} +{"current_steps": 3986, "total_steps": 7532, "loss": 0.2938288450241089, "lr": 9.866085657689355e-06, "epoch": 1.0584251759394503, "percentage": 52.92, "elapsed_time": "14:23:07", "remaining_time": "12:47:50"} +{"current_steps": 3987, "total_steps": 7532, "loss": 0.23748518526554108, "lr": 9.86169529904191e-06, "epoch": 1.0586907449209932, "percentage": 52.93, "elapsed_time": "14:23:19", "remaining_time": "12:47:37"} +{"current_steps": 3988, "total_steps": 7532, "loss": 0.2883969247341156, "lr": 9.857304967057977e-06, "epoch": 1.0589563139025362, "percentage": 52.95, "elapsed_time": "14:23:32", "remaining_time": "12:47:23"} +{"current_steps": 3989, "total_steps": 7532, "loss": 0.28301289677619934, "lr": 9.852914662583966e-06, "epoch": 1.0592218828840791, "percentage": 52.96, "elapsed_time": "14:23:44", "remaining_time": "12:47:10"} +{"current_steps": 3990, "total_steps": 7532, "loss": 0.22616548836231232, "lr": 9.848524386466273e-06, "epoch": 1.059487451865622, "percentage": 52.97, "elapsed_time": "14:23:57", "remaining_time": "12:46:56"} +{"current_steps": 3991, "total_steps": 7532, "loss": 0.2282804250717163, "lr": 9.844134139551291e-06, "epoch": 1.059753020847165, "percentage": 52.99, "elapsed_time": "14:24:09", "remaining_time": "12:46:43"} +{"current_steps": 3992, "total_steps": 7532, "loss": 0.2407834678888321, "lr": 9.839743922685408e-06, "epoch": 1.060018589828708, "percentage": 53.0, "elapsed_time": "14:24:22", "remaining_time": "12:46:29"} +{"current_steps": 3993, "total_steps": 7532, "loss": 0.22690361738204956, "lr": 9.835353736715007e-06, "epoch": 1.060284158810251, "percentage": 53.01, "elapsed_time": "14:24:34", "remaining_time": "12:46:16"} +{"current_steps": 3994, "total_steps": 7532, "loss": 0.23291411995887756, "lr": 9.830963582486465e-06, "epoch": 1.060549727791794, "percentage": 53.03, "elapsed_time": "14:24:47", "remaining_time": "12:46:03"} +{"current_steps": 3995, "total_steps": 7532, "loss": 0.24524198472499847, "lr": 9.82657346084615e-06, "epoch": 1.0608152967733369, "percentage": 53.04, "elapsed_time": "14:25:00", "remaining_time": "12:45:50"} +{"current_steps": 3996, "total_steps": 7532, "loss": 0.22087743878364563, "lr": 9.822183372640426e-06, "epoch": 1.0610808657548798, "percentage": 53.05, "elapsed_time": "14:25:12", "remaining_time": "12:45:36"} +{"current_steps": 3997, "total_steps": 7532, "loss": 0.2459079772233963, "lr": 9.817793318715652e-06, "epoch": 1.0613464347364228, "percentage": 53.07, "elapsed_time": "14:25:25", "remaining_time": "12:45:23"} +{"current_steps": 3998, "total_steps": 7532, "loss": 0.24429920315742493, "lr": 9.813403299918178e-06, "epoch": 1.0616120037179657, "percentage": 53.08, "elapsed_time": "14:25:38", "remaining_time": "12:45:10"} +{"current_steps": 3999, "total_steps": 7532, "loss": 0.2332335114479065, "lr": 9.809013317094345e-06, "epoch": 1.0618775726995087, "percentage": 53.09, "elapsed_time": "14:25:51", "remaining_time": "12:44:57"} +{"current_steps": 4000, "total_steps": 7532, "loss": 0.2861659526824951, "lr": 9.804623371090493e-06, "epoch": 1.0621431416810516, "percentage": 53.11, "elapsed_time": "14:26:03", "remaining_time": "12:44:43"} +{"current_steps": 4001, "total_steps": 7532, "loss": 0.22731532156467438, "lr": 9.800233462752949e-06, "epoch": 1.0624087106625946, "percentage": 53.12, "elapsed_time": "14:26:21", "remaining_time": "12:44:35"} +{"current_steps": 4002, "total_steps": 7532, "loss": 0.245025634765625, "lr": 9.795843592928036e-06, "epoch": 1.0626742796441375, "percentage": 53.13, "elapsed_time": "14:26:34", "remaining_time": "12:44:22"} +{"current_steps": 4003, "total_steps": 7532, "loss": 0.2826273441314697, "lr": 9.791453762462075e-06, "epoch": 1.0629398486256805, "percentage": 53.15, "elapsed_time": "14:26:47", "remaining_time": "12:44:09"} +{"current_steps": 4004, "total_steps": 7532, "loss": 0.24737229943275452, "lr": 9.787063972201368e-06, "epoch": 1.0632054176072234, "percentage": 53.16, "elapsed_time": "14:26:59", "remaining_time": "12:43:55"} +{"current_steps": 4005, "total_steps": 7532, "loss": 0.23368477821350098, "lr": 9.782674222992214e-06, "epoch": 1.0634709865887664, "percentage": 53.17, "elapsed_time": "14:27:12", "remaining_time": "12:43:42"} +{"current_steps": 4006, "total_steps": 7532, "loss": 0.2754492461681366, "lr": 9.778284515680908e-06, "epoch": 1.0637365555703093, "percentage": 53.19, "elapsed_time": "14:27:25", "remaining_time": "12:43:29"} +{"current_steps": 4007, "total_steps": 7532, "loss": 0.2814168334007263, "lr": 9.773894851113732e-06, "epoch": 1.0640021245518523, "percentage": 53.2, "elapsed_time": "14:27:38", "remaining_time": "12:43:16"} +{"current_steps": 4008, "total_steps": 7532, "loss": 0.25388047099113464, "lr": 9.769505230136962e-06, "epoch": 1.0642676935333952, "percentage": 53.21, "elapsed_time": "14:27:50", "remaining_time": "12:43:02"} +{"current_steps": 4009, "total_steps": 7532, "loss": 0.25435230135917664, "lr": 9.765115653596867e-06, "epoch": 1.0645332625149382, "percentage": 53.23, "elapsed_time": "14:28:03", "remaining_time": "12:42:49"} +{"current_steps": 4010, "total_steps": 7532, "loss": 0.265840083360672, "lr": 9.760726122339698e-06, "epoch": 1.0647988314964811, "percentage": 53.24, "elapsed_time": "14:28:16", "remaining_time": "12:42:36"} +{"current_steps": 4011, "total_steps": 7532, "loss": 0.2533451020717621, "lr": 9.756336637211716e-06, "epoch": 1.065064400478024, "percentage": 53.25, "elapsed_time": "14:28:29", "remaining_time": "12:42:23"} +{"current_steps": 4012, "total_steps": 7532, "loss": 0.25214290618896484, "lr": 9.751947199059155e-06, "epoch": 1.065329969459567, "percentage": 53.27, "elapsed_time": "14:28:42", "remaining_time": "12:42:10"} +{"current_steps": 4013, "total_steps": 7532, "loss": 0.25039419531822205, "lr": 9.74755780872825e-06, "epoch": 1.06559553844111, "percentage": 53.28, "elapsed_time": "14:28:55", "remaining_time": "12:41:57"} +{"current_steps": 4014, "total_steps": 7532, "loss": 0.21251091361045837, "lr": 9.74316846706522e-06, "epoch": 1.065861107422653, "percentage": 53.29, "elapsed_time": "14:29:07", "remaining_time": "12:41:43"} +{"current_steps": 4015, "total_steps": 7532, "loss": 0.25898969173431396, "lr": 9.738779174916281e-06, "epoch": 1.066126676404196, "percentage": 53.31, "elapsed_time": "14:29:20", "remaining_time": "12:41:30"} +{"current_steps": 4016, "total_steps": 7532, "loss": 0.2655499577522278, "lr": 9.734389933127639e-06, "epoch": 1.0663922453857388, "percentage": 53.32, "elapsed_time": "14:29:33", "remaining_time": "12:41:17"} +{"current_steps": 4017, "total_steps": 7532, "loss": 0.2221338450908661, "lr": 9.730000742545485e-06, "epoch": 1.0666578143672818, "percentage": 53.33, "elapsed_time": "14:29:45", "remaining_time": "12:41:04"} +{"current_steps": 4018, "total_steps": 7532, "loss": 0.2567589581012726, "lr": 9.725611604016002e-06, "epoch": 1.0669233833488247, "percentage": 53.35, "elapsed_time": "14:29:59", "remaining_time": "12:40:51"} +{"current_steps": 4019, "total_steps": 7532, "loss": 0.24440976977348328, "lr": 9.721222518385361e-06, "epoch": 1.0671889523303677, "percentage": 53.36, "elapsed_time": "14:30:11", "remaining_time": "12:40:38"} +{"current_steps": 4020, "total_steps": 7532, "loss": 0.2229192852973938, "lr": 9.716833486499735e-06, "epoch": 1.0674545213119107, "percentage": 53.37, "elapsed_time": "14:30:25", "remaining_time": "12:40:25"} +{"current_steps": 4021, "total_steps": 7532, "loss": 0.26231470704078674, "lr": 9.712444509205273e-06, "epoch": 1.0677200902934538, "percentage": 53.39, "elapsed_time": "14:30:37", "remaining_time": "12:40:12"} +{"current_steps": 4022, "total_steps": 7532, "loss": 0.25099092721939087, "lr": 9.708055587348119e-06, "epoch": 1.0679856592749968, "percentage": 53.4, "elapsed_time": "14:30:50", "remaining_time": "12:39:59"} +{"current_steps": 4023, "total_steps": 7532, "loss": 0.22979633510112762, "lr": 9.703666721774403e-06, "epoch": 1.0682512282565397, "percentage": 53.41, "elapsed_time": "14:31:03", "remaining_time": "12:39:46"} +{"current_steps": 4024, "total_steps": 7532, "loss": 0.2361093908548355, "lr": 9.699277913330252e-06, "epoch": 1.0685167972380827, "percentage": 53.43, "elapsed_time": "14:31:16", "remaining_time": "12:39:33"} +{"current_steps": 4025, "total_steps": 7532, "loss": 0.2390863001346588, "lr": 9.694889162861768e-06, "epoch": 1.0687823662196256, "percentage": 53.44, "elapsed_time": "14:31:29", "remaining_time": "12:39:20"} +{"current_steps": 4026, "total_steps": 7532, "loss": 0.24917885661125183, "lr": 9.690500471215057e-06, "epoch": 1.0690479352011686, "percentage": 53.45, "elapsed_time": "14:31:42", "remaining_time": "12:39:07"} +{"current_steps": 4027, "total_steps": 7532, "loss": 0.24215272068977356, "lr": 9.686111839236206e-06, "epoch": 1.0693135041827115, "percentage": 53.47, "elapsed_time": "14:31:55", "remaining_time": "12:38:54"} +{"current_steps": 4028, "total_steps": 7532, "loss": 0.27874231338500977, "lr": 9.681723267771284e-06, "epoch": 1.0695790731642545, "percentage": 53.48, "elapsed_time": "14:32:08", "remaining_time": "12:38:41"} +{"current_steps": 4029, "total_steps": 7532, "loss": 0.24076086282730103, "lr": 9.677334757666368e-06, "epoch": 1.0698446421457974, "percentage": 53.49, "elapsed_time": "14:32:21", "remaining_time": "12:38:27"} +{"current_steps": 4030, "total_steps": 7532, "loss": 0.2444242238998413, "lr": 9.672946309767504e-06, "epoch": 1.0701102111273404, "percentage": 53.51, "elapsed_time": "14:32:33", "remaining_time": "12:38:14"} +{"current_steps": 4031, "total_steps": 7532, "loss": 0.2737279236316681, "lr": 9.668557924920735e-06, "epoch": 1.0703757801088833, "percentage": 53.52, "elapsed_time": "14:32:46", "remaining_time": "12:38:01"} +{"current_steps": 4032, "total_steps": 7532, "loss": 0.24105575680732727, "lr": 9.664169603972091e-06, "epoch": 1.0706413490904263, "percentage": 53.53, "elapsed_time": "14:32:58", "remaining_time": "12:37:47"} +{"current_steps": 4033, "total_steps": 7532, "loss": 0.27791836857795715, "lr": 9.659781347767584e-06, "epoch": 1.0709069180719692, "percentage": 53.54, "elapsed_time": "14:33:11", "remaining_time": "12:37:34"} +{"current_steps": 4034, "total_steps": 7532, "loss": 0.255472868680954, "lr": 9.655393157153221e-06, "epoch": 1.0711724870535122, "percentage": 53.56, "elapsed_time": "14:33:23", "remaining_time": "12:37:21"} +{"current_steps": 4035, "total_steps": 7532, "loss": 0.2523707151412964, "lr": 9.651005032974994e-06, "epoch": 1.0714380560350552, "percentage": 53.57, "elapsed_time": "14:33:36", "remaining_time": "12:37:07"} +{"current_steps": 4036, "total_steps": 7532, "loss": 0.24584606289863586, "lr": 9.64661697607888e-06, "epoch": 1.071703625016598, "percentage": 53.58, "elapsed_time": "14:33:49", "remaining_time": "12:36:54"} +{"current_steps": 4037, "total_steps": 7532, "loss": 0.25182732939720154, "lr": 9.64222898731084e-06, "epoch": 1.071969193998141, "percentage": 53.6, "elapsed_time": "14:34:02", "remaining_time": "12:36:41"} +{"current_steps": 4038, "total_steps": 7532, "loss": 0.254008412361145, "lr": 9.637841067516837e-06, "epoch": 1.072234762979684, "percentage": 53.61, "elapsed_time": "14:34:14", "remaining_time": "12:36:27"} +{"current_steps": 4039, "total_steps": 7532, "loss": 0.2314324826002121, "lr": 9.633453217542806e-06, "epoch": 1.072500331961227, "percentage": 53.62, "elapsed_time": "14:34:27", "remaining_time": "12:36:14"} +{"current_steps": 4040, "total_steps": 7532, "loss": 0.2256058305501938, "lr": 9.62906543823467e-06, "epoch": 1.07276590094277, "percentage": 53.64, "elapsed_time": "14:34:39", "remaining_time": "12:36:01"} +{"current_steps": 4041, "total_steps": 7532, "loss": 0.2577894330024719, "lr": 9.624677730438344e-06, "epoch": 1.0730314699243129, "percentage": 53.65, "elapsed_time": "14:34:53", "remaining_time": "12:35:48"} +{"current_steps": 4042, "total_steps": 7532, "loss": 0.23520560562610626, "lr": 9.620290094999723e-06, "epoch": 1.0732970389058558, "percentage": 53.66, "elapsed_time": "14:35:05", "remaining_time": "12:35:35"} +{"current_steps": 4043, "total_steps": 7532, "loss": 0.2472849190235138, "lr": 9.615902532764695e-06, "epoch": 1.0735626078873988, "percentage": 53.68, "elapsed_time": "14:35:18", "remaining_time": "12:35:22"} +{"current_steps": 4044, "total_steps": 7532, "loss": 0.25053414702415466, "lr": 9.611515044579128e-06, "epoch": 1.0738281768689417, "percentage": 53.69, "elapsed_time": "14:35:31", "remaining_time": "12:35:09"} +{"current_steps": 4045, "total_steps": 7532, "loss": 0.24229007959365845, "lr": 9.607127631288879e-06, "epoch": 1.0740937458504847, "percentage": 53.7, "elapsed_time": "14:35:44", "remaining_time": "12:34:56"} +{"current_steps": 4046, "total_steps": 7532, "loss": 0.2793073058128357, "lr": 9.602740293739786e-06, "epoch": 1.0743593148320276, "percentage": 53.72, "elapsed_time": "14:35:57", "remaining_time": "12:34:43"} +{"current_steps": 4047, "total_steps": 7532, "loss": 0.24547399580478668, "lr": 9.598353032777682e-06, "epoch": 1.0746248838135706, "percentage": 53.73, "elapsed_time": "14:36:10", "remaining_time": "12:34:30"} +{"current_steps": 4048, "total_steps": 7532, "loss": 0.2776937186717987, "lr": 9.593965849248378e-06, "epoch": 1.0748904527951135, "percentage": 53.74, "elapsed_time": "14:36:23", "remaining_time": "12:34:16"} +{"current_steps": 4049, "total_steps": 7532, "loss": 0.22677727043628693, "lr": 9.589578743997668e-06, "epoch": 1.0751560217766565, "percentage": 53.76, "elapsed_time": "14:36:36", "remaining_time": "12:34:03"} +{"current_steps": 4050, "total_steps": 7532, "loss": 0.23254704475402832, "lr": 9.585191717871336e-06, "epoch": 1.0754215907581994, "percentage": 53.77, "elapsed_time": "14:36:49", "remaining_time": "12:33:51"} +{"current_steps": 4051, "total_steps": 7532, "loss": 0.2899828255176544, "lr": 9.580804771715148e-06, "epoch": 1.0756871597397424, "percentage": 53.78, "elapsed_time": "14:37:02", "remaining_time": "12:33:37"} +{"current_steps": 4052, "total_steps": 7532, "loss": 0.24632850289344788, "lr": 9.576417906374856e-06, "epoch": 1.0759527287212853, "percentage": 53.8, "elapsed_time": "14:37:15", "remaining_time": "12:33:25"} +{"current_steps": 4053, "total_steps": 7532, "loss": 0.2661561369895935, "lr": 9.572031122696196e-06, "epoch": 1.0762182977028283, "percentage": 53.81, "elapsed_time": "14:37:28", "remaining_time": "12:33:12"} +{"current_steps": 4054, "total_steps": 7532, "loss": 0.22364279627799988, "lr": 9.567644421524889e-06, "epoch": 1.0764838666843712, "percentage": 53.82, "elapsed_time": "14:37:41", "remaining_time": "12:32:59"} +{"current_steps": 4055, "total_steps": 7532, "loss": 0.26748427748680115, "lr": 9.563257803706635e-06, "epoch": 1.0767494356659142, "percentage": 53.84, "elapsed_time": "14:37:54", "remaining_time": "12:32:46"} +{"current_steps": 4056, "total_steps": 7532, "loss": 0.22851283848285675, "lr": 9.55887127008713e-06, "epoch": 1.0770150046474571, "percentage": 53.85, "elapsed_time": "14:38:07", "remaining_time": "12:32:33"} +{"current_steps": 4057, "total_steps": 7532, "loss": 0.2456260323524475, "lr": 9.554484821512037e-06, "epoch": 1.077280573629, "percentage": 53.86, "elapsed_time": "14:38:20", "remaining_time": "12:32:20"} +{"current_steps": 4058, "total_steps": 7532, "loss": 0.2556169629096985, "lr": 9.55009845882702e-06, "epoch": 1.077546142610543, "percentage": 53.88, "elapsed_time": "14:38:33", "remaining_time": "12:32:07"} +{"current_steps": 4059, "total_steps": 7532, "loss": 0.280727744102478, "lr": 9.545712182877714e-06, "epoch": 1.077811711592086, "percentage": 53.89, "elapsed_time": "14:38:45", "remaining_time": "12:31:53"} +{"current_steps": 4060, "total_steps": 7532, "loss": 0.25315386056900024, "lr": 9.54132599450974e-06, "epoch": 1.078077280573629, "percentage": 53.9, "elapsed_time": "14:38:58", "remaining_time": "12:31:40"} +{"current_steps": 4061, "total_steps": 7532, "loss": 0.21985477209091187, "lr": 9.536939894568704e-06, "epoch": 1.078342849555172, "percentage": 53.92, "elapsed_time": "14:39:11", "remaining_time": "12:31:27"} +{"current_steps": 4062, "total_steps": 7532, "loss": 0.24329043924808502, "lr": 9.532553883900196e-06, "epoch": 1.0786084185367149, "percentage": 53.93, "elapsed_time": "14:39:24", "remaining_time": "12:31:14"} +{"current_steps": 4063, "total_steps": 7532, "loss": 0.2362256497144699, "lr": 9.528167963349786e-06, "epoch": 1.0788739875182578, "percentage": 53.94, "elapsed_time": "14:39:36", "remaining_time": "12:31:00"} +{"current_steps": 4064, "total_steps": 7532, "loss": 0.23685476183891296, "lr": 9.523782133763027e-06, "epoch": 1.0791395564998008, "percentage": 53.96, "elapsed_time": "14:39:49", "remaining_time": "12:30:47"} +{"current_steps": 4065, "total_steps": 7532, "loss": 0.26232481002807617, "lr": 9.519396395985456e-06, "epoch": 1.0794051254813437, "percentage": 53.97, "elapsed_time": "14:40:01", "remaining_time": "12:30:34"} +{"current_steps": 4066, "total_steps": 7532, "loss": 0.25196313858032227, "lr": 9.515010750862594e-06, "epoch": 1.0796706944628867, "percentage": 53.98, "elapsed_time": "14:40:14", "remaining_time": "12:30:20"} +{"current_steps": 4067, "total_steps": 7532, "loss": 0.22520464658737183, "lr": 9.510625199239939e-06, "epoch": 1.0799362634444296, "percentage": 54.0, "elapsed_time": "14:40:27", "remaining_time": "12:30:07"} +{"current_steps": 4068, "total_steps": 7532, "loss": 0.27422505617141724, "lr": 9.506239741962971e-06, "epoch": 1.0802018324259726, "percentage": 54.01, "elapsed_time": "14:40:39", "remaining_time": "12:29:53"} +{"current_steps": 4069, "total_steps": 7532, "loss": 0.2646682560443878, "lr": 9.50185437987716e-06, "epoch": 1.0804674014075155, "percentage": 54.02, "elapsed_time": "14:40:52", "remaining_time": "12:29:40"} +{"current_steps": 4070, "total_steps": 7532, "loss": 0.2661365866661072, "lr": 9.497469113827949e-06, "epoch": 1.0807329703890585, "percentage": 54.04, "elapsed_time": "14:41:04", "remaining_time": "12:29:27"} +{"current_steps": 4071, "total_steps": 7532, "loss": 0.23156839609146118, "lr": 9.493083944660766e-06, "epoch": 1.0809985393706014, "percentage": 54.05, "elapsed_time": "14:41:17", "remaining_time": "12:29:14"} +{"current_steps": 4072, "total_steps": 7532, "loss": 0.25353243947029114, "lr": 9.488698873221021e-06, "epoch": 1.0812641083521444, "percentage": 54.06, "elapsed_time": "14:41:29", "remaining_time": "12:29:00"} +{"current_steps": 4073, "total_steps": 7532, "loss": 0.27488404512405396, "lr": 9.484313900354099e-06, "epoch": 1.0815296773336873, "percentage": 54.08, "elapsed_time": "14:41:42", "remaining_time": "12:28:47"} +{"current_steps": 4074, "total_steps": 7532, "loss": 0.2580753564834595, "lr": 9.479929026905378e-06, "epoch": 1.0817952463152305, "percentage": 54.09, "elapsed_time": "14:41:54", "remaining_time": "12:28:33"} +{"current_steps": 4075, "total_steps": 7532, "loss": 0.2425471544265747, "lr": 9.475544253720206e-06, "epoch": 1.0820608152967734, "percentage": 54.1, "elapsed_time": "14:42:07", "remaining_time": "12:28:20"} +{"current_steps": 4076, "total_steps": 7532, "loss": 0.25268295407295227, "lr": 9.471159581643918e-06, "epoch": 1.0823263842783164, "percentage": 54.12, "elapsed_time": "14:42:19", "remaining_time": "12:28:07"} +{"current_steps": 4077, "total_steps": 7532, "loss": 0.2683602571487427, "lr": 9.466775011521825e-06, "epoch": 1.0825919532598594, "percentage": 54.13, "elapsed_time": "14:42:32", "remaining_time": "12:27:54"} +{"current_steps": 4078, "total_steps": 7532, "loss": 0.24945034086704254, "lr": 9.462390544199221e-06, "epoch": 1.0828575222414023, "percentage": 54.14, "elapsed_time": "14:42:45", "remaining_time": "12:27:41"} +{"current_steps": 4079, "total_steps": 7532, "loss": 0.21784156560897827, "lr": 9.458006180521379e-06, "epoch": 1.0831230912229453, "percentage": 54.16, "elapsed_time": "14:42:58", "remaining_time": "12:27:28"} +{"current_steps": 4080, "total_steps": 7532, "loss": 0.22704020142555237, "lr": 9.453621921333554e-06, "epoch": 1.0833886602044882, "percentage": 54.17, "elapsed_time": "14:43:11", "remaining_time": "12:27:15"} +{"current_steps": 4081, "total_steps": 7532, "loss": 0.2532106637954712, "lr": 9.449237767480979e-06, "epoch": 1.0836542291860312, "percentage": 54.18, "elapsed_time": "14:43:24", "remaining_time": "12:27:01"} +{"current_steps": 4082, "total_steps": 7532, "loss": 0.27809134125709534, "lr": 9.444853719808864e-06, "epoch": 1.0839197981675741, "percentage": 54.2, "elapsed_time": "14:43:37", "remaining_time": "12:26:48"} +{"current_steps": 4083, "total_steps": 7532, "loss": 0.25704264640808105, "lr": 9.440469779162407e-06, "epoch": 1.084185367149117, "percentage": 54.21, "elapsed_time": "14:43:50", "remaining_time": "12:26:35"} +{"current_steps": 4084, "total_steps": 7532, "loss": 0.2656276226043701, "lr": 9.436085946386778e-06, "epoch": 1.08445093613066, "percentage": 54.22, "elapsed_time": "14:44:02", "remaining_time": "12:26:22"} +{"current_steps": 4085, "total_steps": 7532, "loss": 0.277826726436615, "lr": 9.431702222327126e-06, "epoch": 1.084716505112203, "percentage": 54.24, "elapsed_time": "14:44:15", "remaining_time": "12:26:08"} +{"current_steps": 4086, "total_steps": 7532, "loss": 0.24656976759433746, "lr": 9.427318607828584e-06, "epoch": 1.084982074093746, "percentage": 54.25, "elapsed_time": "14:44:28", "remaining_time": "12:25:55"} +{"current_steps": 4087, "total_steps": 7532, "loss": 0.2498110830783844, "lr": 9.42293510373626e-06, "epoch": 1.0852476430752889, "percentage": 54.26, "elapsed_time": "14:44:40", "remaining_time": "12:25:42"} +{"current_steps": 4088, "total_steps": 7532, "loss": 0.24574093520641327, "lr": 9.418551710895243e-06, "epoch": 1.0855132120568318, "percentage": 54.28, "elapsed_time": "14:44:53", "remaining_time": "12:25:29"} +{"current_steps": 4089, "total_steps": 7532, "loss": 0.25271761417388916, "lr": 9.414168430150601e-06, "epoch": 1.0857787810383748, "percentage": 54.29, "elapsed_time": "14:45:05", "remaining_time": "12:25:15"} +{"current_steps": 4090, "total_steps": 7532, "loss": 0.29269370436668396, "lr": 9.409785262347373e-06, "epoch": 1.0860443500199177, "percentage": 54.3, "elapsed_time": "14:45:18", "remaining_time": "12:25:02"} +{"current_steps": 4091, "total_steps": 7532, "loss": 0.244449645280838, "lr": 9.405402208330581e-06, "epoch": 1.0863099190014607, "percentage": 54.31, "elapsed_time": "14:45:31", "remaining_time": "12:24:49"} +{"current_steps": 4092, "total_steps": 7532, "loss": 0.23785406351089478, "lr": 9.401019268945237e-06, "epoch": 1.0865754879830036, "percentage": 54.33, "elapsed_time": "14:45:44", "remaining_time": "12:24:36"} +{"current_steps": 4093, "total_steps": 7532, "loss": 0.2493479996919632, "lr": 9.39663644503631e-06, "epoch": 1.0868410569645466, "percentage": 54.34, "elapsed_time": "14:45:56", "remaining_time": "12:24:23"} +{"current_steps": 4094, "total_steps": 7532, "loss": 0.23758000135421753, "lr": 9.392253737448764e-06, "epoch": 1.0871066259460895, "percentage": 54.35, "elapsed_time": "14:46:10", "remaining_time": "12:24:10"} +{"current_steps": 4095, "total_steps": 7532, "loss": 0.22560475766658783, "lr": 9.387871147027528e-06, "epoch": 1.0873721949276325, "percentage": 54.37, "elapsed_time": "14:46:22", "remaining_time": "12:23:57"} +{"current_steps": 4096, "total_steps": 7532, "loss": 0.2558273673057556, "lr": 9.383488674617515e-06, "epoch": 1.0876377639091754, "percentage": 54.38, "elapsed_time": "14:46:35", "remaining_time": "12:23:43"} +{"current_steps": 4097, "total_steps": 7532, "loss": 0.2822023034095764, "lr": 9.379106321063618e-06, "epoch": 1.0879033328907184, "percentage": 54.39, "elapsed_time": "14:46:47", "remaining_time": "12:23:30"} +{"current_steps": 4098, "total_steps": 7532, "loss": 0.2596978545188904, "lr": 9.374724087210698e-06, "epoch": 1.0881689018722613, "percentage": 54.41, "elapsed_time": "14:47:00", "remaining_time": "12:23:16"} +{"current_steps": 4099, "total_steps": 7532, "loss": 0.25353628396987915, "lr": 9.370341973903597e-06, "epoch": 1.0884344708538043, "percentage": 54.42, "elapsed_time": "14:47:13", "remaining_time": "12:23:04"} +{"current_steps": 4100, "total_steps": 7532, "loss": 0.2547294497489929, "lr": 9.365959981987135e-06, "epoch": 1.0887000398353472, "percentage": 54.43, "elapsed_time": "14:47:26", "remaining_time": "12:22:50"} +{"current_steps": 4101, "total_steps": 7532, "loss": 0.2688470780849457, "lr": 9.361578112306115e-06, "epoch": 1.0889656088168902, "percentage": 54.45, "elapsed_time": "14:47:44", "remaining_time": "12:22:42"} +{"current_steps": 4102, "total_steps": 7532, "loss": 0.25772029161453247, "lr": 9.357196365705303e-06, "epoch": 1.0892311777984331, "percentage": 54.46, "elapsed_time": "14:47:57", "remaining_time": "12:22:29"} +{"current_steps": 4103, "total_steps": 7532, "loss": 0.2875550091266632, "lr": 9.352814743029454e-06, "epoch": 1.089496746779976, "percentage": 54.47, "elapsed_time": "14:48:10", "remaining_time": "12:22:16"} +{"current_steps": 4104, "total_steps": 7532, "loss": 0.23085735738277435, "lr": 9.34843324512329e-06, "epoch": 1.089762315761519, "percentage": 54.49, "elapsed_time": "14:48:23", "remaining_time": "12:22:03"} +{"current_steps": 4105, "total_steps": 7532, "loss": 0.2607901096343994, "lr": 9.34405187283151e-06, "epoch": 1.090027884743062, "percentage": 54.5, "elapsed_time": "14:48:37", "remaining_time": "12:21:50"} +{"current_steps": 4106, "total_steps": 7532, "loss": 0.26165345311164856, "lr": 9.339670626998791e-06, "epoch": 1.090293453724605, "percentage": 54.51, "elapsed_time": "14:48:49", "remaining_time": "12:21:37"} +{"current_steps": 4107, "total_steps": 7532, "loss": 0.27884238958358765, "lr": 9.335289508469789e-06, "epoch": 1.090559022706148, "percentage": 54.53, "elapsed_time": "14:49:03", "remaining_time": "12:21:25"} +{"current_steps": 4108, "total_steps": 7532, "loss": 0.2689289152622223, "lr": 9.33090851808913e-06, "epoch": 1.0908245916876909, "percentage": 54.54, "elapsed_time": "14:49:16", "remaining_time": "12:21:12"} +{"current_steps": 4109, "total_steps": 7532, "loss": 0.2633207440376282, "lr": 9.326527656701414e-06, "epoch": 1.0910901606692338, "percentage": 54.55, "elapsed_time": "14:49:29", "remaining_time": "12:20:59"} +{"current_steps": 4110, "total_steps": 7532, "loss": 0.26001888513565063, "lr": 9.322146925151226e-06, "epoch": 1.0913557296507768, "percentage": 54.57, "elapsed_time": "14:49:41", "remaining_time": "12:20:46"} +{"current_steps": 4111, "total_steps": 7532, "loss": 0.2739099860191345, "lr": 9.31776632428312e-06, "epoch": 1.0916212986323197, "percentage": 54.58, "elapsed_time": "14:49:55", "remaining_time": "12:20:33"} +{"current_steps": 4112, "total_steps": 7532, "loss": 0.24885550141334534, "lr": 9.313385854941616e-06, "epoch": 1.0918868676138627, "percentage": 54.59, "elapsed_time": "14:50:07", "remaining_time": "12:20:19"} +{"current_steps": 4113, "total_steps": 7532, "loss": 0.2609873414039612, "lr": 9.309005517971222e-06, "epoch": 1.0921524365954056, "percentage": 54.61, "elapsed_time": "14:50:20", "remaining_time": "12:20:06"} +{"current_steps": 4114, "total_steps": 7532, "loss": 0.28853538632392883, "lr": 9.304625314216415e-06, "epoch": 1.0924180055769486, "percentage": 54.62, "elapsed_time": "14:50:33", "remaining_time": "12:19:53"} +{"current_steps": 4115, "total_steps": 7532, "loss": 0.2629924714565277, "lr": 9.300245244521647e-06, "epoch": 1.0926835745584915, "percentage": 54.63, "elapsed_time": "14:50:46", "remaining_time": "12:19:40"} +{"current_steps": 4116, "total_steps": 7532, "loss": 0.2687820494174957, "lr": 9.295865309731342e-06, "epoch": 1.0929491435400345, "percentage": 54.65, "elapsed_time": "14:50:59", "remaining_time": "12:19:28"} +{"current_steps": 4117, "total_steps": 7532, "loss": 0.2293676733970642, "lr": 9.2914855106899e-06, "epoch": 1.0932147125215774, "percentage": 54.66, "elapsed_time": "14:51:12", "remaining_time": "12:19:14"} +{"current_steps": 4118, "total_steps": 7532, "loss": 0.25261443853378296, "lr": 9.287105848241694e-06, "epoch": 1.0934802815031204, "percentage": 54.67, "elapsed_time": "14:51:26", "remaining_time": "12:19:02"} +{"current_steps": 4119, "total_steps": 7532, "loss": 0.26238197088241577, "lr": 9.282726323231077e-06, "epoch": 1.0937458504846633, "percentage": 54.69, "elapsed_time": "14:51:38", "remaining_time": "12:18:49"} +{"current_steps": 4120, "total_steps": 7532, "loss": 0.25718310475349426, "lr": 9.278346936502364e-06, "epoch": 1.0940114194662063, "percentage": 54.7, "elapsed_time": "14:51:52", "remaining_time": "12:18:36"} +{"current_steps": 4121, "total_steps": 7532, "loss": 0.23810459673404694, "lr": 9.273967688899849e-06, "epoch": 1.0942769884477492, "percentage": 54.71, "elapsed_time": "14:52:04", "remaining_time": "12:18:23"} +{"current_steps": 4122, "total_steps": 7532, "loss": 0.2197081446647644, "lr": 9.269588581267804e-06, "epoch": 1.0945425574292922, "percentage": 54.73, "elapsed_time": "14:52:18", "remaining_time": "12:18:10"} +{"current_steps": 4123, "total_steps": 7532, "loss": 0.2429335117340088, "lr": 9.265209614450463e-06, "epoch": 1.0948081264108351, "percentage": 54.74, "elapsed_time": "14:52:31", "remaining_time": "12:17:57"} +{"current_steps": 4124, "total_steps": 7532, "loss": 0.23028087615966797, "lr": 9.260830789292043e-06, "epoch": 1.095073695392378, "percentage": 54.75, "elapsed_time": "14:52:44", "remaining_time": "12:17:44"} +{"current_steps": 4125, "total_steps": 7532, "loss": 0.2783699035644531, "lr": 9.25645210663673e-06, "epoch": 1.095339264373921, "percentage": 54.77, "elapsed_time": "14:52:57", "remaining_time": "12:17:31"} +{"current_steps": 4126, "total_steps": 7532, "loss": 0.25055867433547974, "lr": 9.25207356732868e-06, "epoch": 1.095604833355464, "percentage": 54.78, "elapsed_time": "14:53:10", "remaining_time": "12:17:18"} +{"current_steps": 4127, "total_steps": 7532, "loss": 0.28629350662231445, "lr": 9.247695172212026e-06, "epoch": 1.095870402337007, "percentage": 54.79, "elapsed_time": "14:53:23", "remaining_time": "12:17:05"} +{"current_steps": 4128, "total_steps": 7532, "loss": 0.2626604735851288, "lr": 9.24331692213087e-06, "epoch": 1.09613597131855, "percentage": 54.81, "elapsed_time": "14:53:36", "remaining_time": "12:16:53"} +{"current_steps": 4129, "total_steps": 7532, "loss": 0.237881600856781, "lr": 9.238938817929288e-06, "epoch": 1.0964015403000928, "percentage": 54.82, "elapsed_time": "14:53:49", "remaining_time": "12:16:40"} +{"current_steps": 4130, "total_steps": 7532, "loss": 0.2602109909057617, "lr": 9.234560860451325e-06, "epoch": 1.0966671092816358, "percentage": 54.83, "elapsed_time": "14:54:02", "remaining_time": "12:16:26"} +{"current_steps": 4131, "total_steps": 7532, "loss": 0.2721475064754486, "lr": 9.230183050541001e-06, "epoch": 1.0969326782631788, "percentage": 54.85, "elapsed_time": "14:54:15", "remaining_time": "12:16:14"} +{"current_steps": 4132, "total_steps": 7532, "loss": 0.25844910740852356, "lr": 9.225805389042307e-06, "epoch": 1.0971982472447217, "percentage": 54.86, "elapsed_time": "14:54:28", "remaining_time": "12:16:00"} +{"current_steps": 4133, "total_steps": 7532, "loss": 0.26671040058135986, "lr": 9.221427876799201e-06, "epoch": 1.0974638162262647, "percentage": 54.87, "elapsed_time": "14:54:41", "remaining_time": "12:15:48"} +{"current_steps": 4134, "total_steps": 7532, "loss": 0.2610115706920624, "lr": 9.21705051465562e-06, "epoch": 1.0977293852078078, "percentage": 54.89, "elapsed_time": "14:54:54", "remaining_time": "12:15:35"} +{"current_steps": 4135, "total_steps": 7532, "loss": 0.2518802881240845, "lr": 9.212673303455464e-06, "epoch": 1.0979949541893508, "percentage": 54.9, "elapsed_time": "14:55:07", "remaining_time": "12:15:22"} +{"current_steps": 4136, "total_steps": 7532, "loss": 0.28600364923477173, "lr": 9.20829624404261e-06, "epoch": 1.0982605231708937, "percentage": 54.91, "elapsed_time": "14:55:20", "remaining_time": "12:15:09"} +{"current_steps": 4137, "total_steps": 7532, "loss": 0.2649504840373993, "lr": 9.203919337260903e-06, "epoch": 1.0985260921524367, "percentage": 54.93, "elapsed_time": "14:55:34", "remaining_time": "12:14:56"} +{"current_steps": 4138, "total_steps": 7532, "loss": 0.22613298892974854, "lr": 9.199542583954159e-06, "epoch": 1.0987916611339796, "percentage": 54.94, "elapsed_time": "14:55:47", "remaining_time": "12:14:43"} +{"current_steps": 4139, "total_steps": 7532, "loss": 0.22546961903572083, "lr": 9.195165984966163e-06, "epoch": 1.0990572301155226, "percentage": 54.95, "elapsed_time": "14:56:00", "remaining_time": "12:14:31"} +{"current_steps": 4140, "total_steps": 7532, "loss": 0.20618169009685516, "lr": 9.190789541140675e-06, "epoch": 1.0993227990970655, "percentage": 54.97, "elapsed_time": "14:56:14", "remaining_time": "12:14:18"} +{"current_steps": 4141, "total_steps": 7532, "loss": 0.2434382289648056, "lr": 9.18641325332142e-06, "epoch": 1.0995883680786085, "percentage": 54.98, "elapsed_time": "14:56:27", "remaining_time": "12:14:05"} +{"current_steps": 4142, "total_steps": 7532, "loss": 0.19114840030670166, "lr": 9.182037122352092e-06, "epoch": 1.0998539370601514, "percentage": 54.99, "elapsed_time": "14:56:40", "remaining_time": "12:13:53"} +{"current_steps": 4143, "total_steps": 7532, "loss": 0.2793614864349365, "lr": 9.17766114907636e-06, "epoch": 1.1001195060416944, "percentage": 55.01, "elapsed_time": "14:56:53", "remaining_time": "12:13:39"} +{"current_steps": 4144, "total_steps": 7532, "loss": 0.2908466160297394, "lr": 9.173285334337863e-06, "epoch": 1.1003850750232373, "percentage": 55.02, "elapsed_time": "14:57:07", "remaining_time": "12:13:27"} +{"current_steps": 4145, "total_steps": 7532, "loss": 0.260933518409729, "lr": 9.168909678980199e-06, "epoch": 1.1006506440047803, "percentage": 55.03, "elapsed_time": "14:57:19", "remaining_time": "12:13:14"} +{"current_steps": 4146, "total_steps": 7532, "loss": 0.2819761037826538, "lr": 9.16453418384695e-06, "epoch": 1.1009162129863233, "percentage": 55.05, "elapsed_time": "14:57:33", "remaining_time": "12:13:01"} +{"current_steps": 4147, "total_steps": 7532, "loss": 0.25290411710739136, "lr": 9.160158849781657e-06, "epoch": 1.1011817819678662, "percentage": 55.06, "elapsed_time": "14:57:46", "remaining_time": "12:12:48"} +{"current_steps": 4148, "total_steps": 7532, "loss": 0.21255841851234436, "lr": 9.155783677627831e-06, "epoch": 1.1014473509494092, "percentage": 55.07, "elapsed_time": "14:57:59", "remaining_time": "12:12:35"} +{"current_steps": 4149, "total_steps": 7532, "loss": 0.20631751418113708, "lr": 9.151408668228958e-06, "epoch": 1.101712919930952, "percentage": 55.08, "elapsed_time": "14:58:12", "remaining_time": "12:12:23"} +{"current_steps": 4150, "total_steps": 7532, "loss": 0.20976273715496063, "lr": 9.147033822428484e-06, "epoch": 1.101978488912495, "percentage": 55.1, "elapsed_time": "14:58:26", "remaining_time": "12:12:10"} +{"current_steps": 4151, "total_steps": 7532, "loss": 0.21464477479457855, "lr": 9.142659141069828e-06, "epoch": 1.102244057894038, "percentage": 55.11, "elapsed_time": "14:58:39", "remaining_time": "12:11:57"} +{"current_steps": 4152, "total_steps": 7532, "loss": 0.2262338101863861, "lr": 9.13828462499638e-06, "epoch": 1.102509626875581, "percentage": 55.12, "elapsed_time": "14:58:52", "remaining_time": "12:11:44"} +{"current_steps": 4153, "total_steps": 7532, "loss": 0.26331469416618347, "lr": 9.133910275051493e-06, "epoch": 1.102775195857124, "percentage": 55.14, "elapsed_time": "14:59:05", "remaining_time": "12:11:31"} +{"current_steps": 4154, "total_steps": 7532, "loss": 0.26280921697616577, "lr": 9.129536092078488e-06, "epoch": 1.1030407648386669, "percentage": 55.15, "elapsed_time": "14:59:18", "remaining_time": "12:11:18"} +{"current_steps": 4155, "total_steps": 7532, "loss": 0.2527182698249817, "lr": 9.12516207692066e-06, "epoch": 1.1033063338202098, "percentage": 55.16, "elapsed_time": "14:59:31", "remaining_time": "12:11:05"} +{"current_steps": 4156, "total_steps": 7532, "loss": 0.21416455507278442, "lr": 9.120788230421267e-06, "epoch": 1.1035719028017528, "percentage": 55.18, "elapsed_time": "14:59:45", "remaining_time": "12:10:53"} +{"current_steps": 4157, "total_steps": 7532, "loss": 0.25882014632225037, "lr": 9.116414553423535e-06, "epoch": 1.1038374717832957, "percentage": 55.19, "elapsed_time": "14:59:58", "remaining_time": "12:10:40"} +{"current_steps": 4158, "total_steps": 7532, "loss": 0.20510248839855194, "lr": 9.112041046770653e-06, "epoch": 1.1041030407648387, "percentage": 55.2, "elapsed_time": "15:00:11", "remaining_time": "12:10:27"} +{"current_steps": 4159, "total_steps": 7532, "loss": 0.2348058819770813, "lr": 9.107667711305786e-06, "epoch": 1.1043686097463816, "percentage": 55.22, "elapsed_time": "15:00:24", "remaining_time": "12:10:14"} +{"current_steps": 4160, "total_steps": 7532, "loss": 0.24561384320259094, "lr": 9.10329454787206e-06, "epoch": 1.1046341787279246, "percentage": 55.23, "elapsed_time": "15:00:37", "remaining_time": "12:10:01"} +{"current_steps": 4161, "total_steps": 7532, "loss": 0.23025226593017578, "lr": 9.098921557312573e-06, "epoch": 1.1048997477094675, "percentage": 55.24, "elapsed_time": "15:00:50", "remaining_time": "12:09:48"} +{"current_steps": 4162, "total_steps": 7532, "loss": 0.2724589705467224, "lr": 9.094548740470375e-06, "epoch": 1.1051653166910105, "percentage": 55.26, "elapsed_time": "15:01:03", "remaining_time": "12:09:35"} +{"current_steps": 4163, "total_steps": 7532, "loss": 0.25196704268455505, "lr": 9.090176098188504e-06, "epoch": 1.1054308856725534, "percentage": 55.27, "elapsed_time": "15:01:17", "remaining_time": "12:09:23"} +{"current_steps": 4164, "total_steps": 7532, "loss": 0.22673696279525757, "lr": 9.085803631309953e-06, "epoch": 1.1056964546540964, "percentage": 55.28, "elapsed_time": "15:01:30", "remaining_time": "12:09:10"} +{"current_steps": 4165, "total_steps": 7532, "loss": 0.23913519084453583, "lr": 9.081431340677679e-06, "epoch": 1.1059620236356393, "percentage": 55.3, "elapsed_time": "15:01:43", "remaining_time": "12:08:57"} +{"current_steps": 4166, "total_steps": 7532, "loss": 0.2723861336708069, "lr": 9.07705922713461e-06, "epoch": 1.1062275926171823, "percentage": 55.31, "elapsed_time": "15:01:56", "remaining_time": "12:08:44"} +{"current_steps": 4167, "total_steps": 7532, "loss": 0.262167364358902, "lr": 9.072687291523636e-06, "epoch": 1.1064931615987252, "percentage": 55.32, "elapsed_time": "15:02:09", "remaining_time": "12:08:31"} +{"current_steps": 4168, "total_steps": 7532, "loss": 0.2394658625125885, "lr": 9.068315534687615e-06, "epoch": 1.1067587305802682, "percentage": 55.34, "elapsed_time": "15:02:22", "remaining_time": "12:08:18"} +{"current_steps": 4169, "total_steps": 7532, "loss": 0.2547619938850403, "lr": 9.063943957469373e-06, "epoch": 1.1070242995618111, "percentage": 55.35, "elapsed_time": "15:02:36", "remaining_time": "12:08:06"} +{"current_steps": 4170, "total_steps": 7532, "loss": 0.24057570099830627, "lr": 9.059572560711697e-06, "epoch": 1.107289868543354, "percentage": 55.36, "elapsed_time": "15:02:49", "remaining_time": "12:07:53"} +{"current_steps": 4171, "total_steps": 7532, "loss": 0.21729445457458496, "lr": 9.055201345257331e-06, "epoch": 1.107555437524897, "percentage": 55.38, "elapsed_time": "15:03:02", "remaining_time": "12:07:40"} +{"current_steps": 4172, "total_steps": 7532, "loss": 0.26590001583099365, "lr": 9.05083031194901e-06, "epoch": 1.10782100650644, "percentage": 55.39, "elapsed_time": "15:03:15", "remaining_time": "12:07:27"} +{"current_steps": 4173, "total_steps": 7532, "loss": 0.26114848256111145, "lr": 9.04645946162941e-06, "epoch": 1.108086575487983, "percentage": 55.4, "elapsed_time": "15:03:28", "remaining_time": "12:07:14"} +{"current_steps": 4174, "total_steps": 7532, "loss": 0.2255469262599945, "lr": 9.04208879514118e-06, "epoch": 1.108352144469526, "percentage": 55.42, "elapsed_time": "15:03:41", "remaining_time": "12:07:01"} +{"current_steps": 4175, "total_steps": 7532, "loss": 0.2597671151161194, "lr": 9.037718313326932e-06, "epoch": 1.1086177134510689, "percentage": 55.43, "elapsed_time": "15:03:54", "remaining_time": "12:06:48"} +{"current_steps": 4176, "total_steps": 7532, "loss": 0.24820469319820404, "lr": 9.033348017029247e-06, "epoch": 1.1088832824326118, "percentage": 55.44, "elapsed_time": "15:04:07", "remaining_time": "12:06:35"} +{"current_steps": 4177, "total_steps": 7532, "loss": 0.23886600136756897, "lr": 9.028977907090661e-06, "epoch": 1.1091488514141548, "percentage": 55.46, "elapsed_time": "15:04:21", "remaining_time": "12:06:22"} +{"current_steps": 4178, "total_steps": 7532, "loss": 0.24204152822494507, "lr": 9.024607984353682e-06, "epoch": 1.1094144203956977, "percentage": 55.47, "elapsed_time": "15:04:34", "remaining_time": "12:06:10"} +{"current_steps": 4179, "total_steps": 7532, "loss": 0.23246638476848602, "lr": 9.02023824966078e-06, "epoch": 1.1096799893772407, "percentage": 55.48, "elapsed_time": "15:04:47", "remaining_time": "12:05:57"} +{"current_steps": 4180, "total_steps": 7532, "loss": 0.25057342648506165, "lr": 9.015868703854386e-06, "epoch": 1.1099455583587836, "percentage": 55.5, "elapsed_time": "15:05:00", "remaining_time": "12:05:44"} +{"current_steps": 4181, "total_steps": 7532, "loss": 0.2316257357597351, "lr": 9.011499347776902e-06, "epoch": 1.1102111273403266, "percentage": 55.51, "elapsed_time": "15:05:13", "remaining_time": "12:05:31"} +{"current_steps": 4182, "total_steps": 7532, "loss": 0.24824783205986023, "lr": 9.007130182270685e-06, "epoch": 1.1104766963218695, "percentage": 55.52, "elapsed_time": "15:05:26", "remaining_time": "12:05:18"} +{"current_steps": 4183, "total_steps": 7532, "loss": 0.25174480676651, "lr": 9.002761208178059e-06, "epoch": 1.1107422653034125, "percentage": 55.54, "elapsed_time": "15:05:39", "remaining_time": "12:05:05"} +{"current_steps": 4184, "total_steps": 7532, "loss": 0.22364717721939087, "lr": 8.998392426341313e-06, "epoch": 1.1110078342849554, "percentage": 55.55, "elapsed_time": "15:05:52", "remaining_time": "12:04:52"} +{"current_steps": 4185, "total_steps": 7532, "loss": 0.2205432504415512, "lr": 8.994023837602694e-06, "epoch": 1.1112734032664984, "percentage": 55.56, "elapsed_time": "15:06:05", "remaining_time": "12:04:39"} +{"current_steps": 4186, "total_steps": 7532, "loss": 0.23303675651550293, "lr": 8.989655442804413e-06, "epoch": 1.1115389722480415, "percentage": 55.58, "elapsed_time": "15:06:18", "remaining_time": "12:04:26"} +{"current_steps": 4187, "total_steps": 7532, "loss": 0.3003222644329071, "lr": 8.985287242788646e-06, "epoch": 1.1118045412295845, "percentage": 55.59, "elapsed_time": "15:06:31", "remaining_time": "12:04:13"} +{"current_steps": 4188, "total_steps": 7532, "loss": 0.2734413146972656, "lr": 8.980919238397532e-06, "epoch": 1.1120701102111274, "percentage": 55.6, "elapsed_time": "15:06:44", "remaining_time": "12:04:00"} +{"current_steps": 4189, "total_steps": 7532, "loss": 0.24086692929267883, "lr": 8.976551430473166e-06, "epoch": 1.1123356791926704, "percentage": 55.62, "elapsed_time": "15:06:57", "remaining_time": "12:03:47"} +{"current_steps": 4190, "total_steps": 7532, "loss": 0.2531188130378723, "lr": 8.972183819857618e-06, "epoch": 1.1126012481742134, "percentage": 55.63, "elapsed_time": "15:07:10", "remaining_time": "12:03:34"} +{"current_steps": 4191, "total_steps": 7532, "loss": 0.25059640407562256, "lr": 8.96781640739291e-06, "epoch": 1.1128668171557563, "percentage": 55.64, "elapsed_time": "15:07:24", "remaining_time": "12:03:22"} +{"current_steps": 4192, "total_steps": 7532, "loss": 0.22427335381507874, "lr": 8.963449193921023e-06, "epoch": 1.1131323861372993, "percentage": 55.66, "elapsed_time": "15:07:37", "remaining_time": "12:03:09"} +{"current_steps": 4193, "total_steps": 7532, "loss": 0.28835898637771606, "lr": 8.959082180283906e-06, "epoch": 1.1133979551188422, "percentage": 55.67, "elapsed_time": "15:07:50", "remaining_time": "12:02:56"} +{"current_steps": 4194, "total_steps": 7532, "loss": 0.23919034004211426, "lr": 8.954715367323468e-06, "epoch": 1.1136635241003852, "percentage": 55.68, "elapsed_time": "15:08:03", "remaining_time": "12:02:43"} +{"current_steps": 4195, "total_steps": 7532, "loss": 0.24583986401557922, "lr": 8.950348755881578e-06, "epoch": 1.1139290930819281, "percentage": 55.7, "elapsed_time": "15:08:17", "remaining_time": "12:02:30"} +{"current_steps": 4196, "total_steps": 7532, "loss": 0.23869696259498596, "lr": 8.94598234680007e-06, "epoch": 1.114194662063471, "percentage": 55.71, "elapsed_time": "15:08:29", "remaining_time": "12:02:17"} +{"current_steps": 4197, "total_steps": 7532, "loss": 0.2672434449195862, "lr": 8.941616140920734e-06, "epoch": 1.114460231045014, "percentage": 55.72, "elapsed_time": "15:08:43", "remaining_time": "12:02:05"} +{"current_steps": 4198, "total_steps": 7532, "loss": 0.2660336494445801, "lr": 8.937250139085322e-06, "epoch": 1.114725800026557, "percentage": 55.74, "elapsed_time": "15:08:55", "remaining_time": "12:01:51"} +{"current_steps": 4199, "total_steps": 7532, "loss": 0.26461780071258545, "lr": 8.932884342135552e-06, "epoch": 1.1149913690081, "percentage": 55.75, "elapsed_time": "15:09:09", "remaining_time": "12:01:38"} +{"current_steps": 4200, "total_steps": 7532, "loss": 0.22947481274604797, "lr": 8.928518750913094e-06, "epoch": 1.1152569379896429, "percentage": 55.76, "elapsed_time": "15:09:21", "remaining_time": "12:01:25"} +{"current_steps": 4201, "total_steps": 7532, "loss": 0.22715970873832703, "lr": 8.924153366259584e-06, "epoch": 1.1155225069711858, "percentage": 55.78, "elapsed_time": "15:09:40", "remaining_time": "12:01:17"} +{"current_steps": 4202, "total_steps": 7532, "loss": 0.2994215190410614, "lr": 8.919788189016618e-06, "epoch": 1.1157880759527288, "percentage": 55.79, "elapsed_time": "15:09:53", "remaining_time": "12:01:03"} +{"current_steps": 4203, "total_steps": 7532, "loss": 0.2290656566619873, "lr": 8.915423220025747e-06, "epoch": 1.1160536449342717, "percentage": 55.8, "elapsed_time": "15:10:05", "remaining_time": "12:00:50"} +{"current_steps": 4204, "total_steps": 7532, "loss": 0.22284844517707825, "lr": 8.911058460128489e-06, "epoch": 1.1163192139158147, "percentage": 55.82, "elapsed_time": "15:10:18", "remaining_time": "12:00:37"} +{"current_steps": 4205, "total_steps": 7532, "loss": 0.2095392495393753, "lr": 8.906693910166316e-06, "epoch": 1.1165847828973576, "percentage": 55.83, "elapsed_time": "15:10:31", "remaining_time": "12:00:24"} +{"current_steps": 4206, "total_steps": 7532, "loss": 0.25098133087158203, "lr": 8.902329570980665e-06, "epoch": 1.1168503518789006, "percentage": 55.84, "elapsed_time": "15:10:44", "remaining_time": "12:00:11"} +{"current_steps": 4207, "total_steps": 7532, "loss": 0.24768148362636566, "lr": 8.897965443412923e-06, "epoch": 1.1171159208604435, "percentage": 55.86, "elapsed_time": "15:10:56", "remaining_time": "11:59:58"} +{"current_steps": 4208, "total_steps": 7532, "loss": 0.22255480289459229, "lr": 8.89360152830445e-06, "epoch": 1.1173814898419865, "percentage": 55.87, "elapsed_time": "15:11:10", "remaining_time": "11:59:45"} +{"current_steps": 4209, "total_steps": 7532, "loss": 0.23721200227737427, "lr": 8.889237826496551e-06, "epoch": 1.1176470588235294, "percentage": 55.88, "elapsed_time": "15:11:22", "remaining_time": "11:59:31"} +{"current_steps": 4210, "total_steps": 7532, "loss": 0.25002530217170715, "lr": 8.8848743388305e-06, "epoch": 1.1179126278050724, "percentage": 55.89, "elapsed_time": "15:11:35", "remaining_time": "11:59:19"} +{"current_steps": 4211, "total_steps": 7532, "loss": 0.27188029885292053, "lr": 8.880511066147524e-06, "epoch": 1.1181781967866153, "percentage": 55.91, "elapsed_time": "15:11:48", "remaining_time": "11:59:05"} +{"current_steps": 4212, "total_steps": 7532, "loss": 0.23056066036224365, "lr": 8.876148009288813e-06, "epoch": 1.1184437657681583, "percentage": 55.92, "elapsed_time": "15:12:01", "remaining_time": "11:58:52"} +{"current_steps": 4213, "total_steps": 7532, "loss": 0.2336079478263855, "lr": 8.87178516909551e-06, "epoch": 1.1187093347497012, "percentage": 55.93, "elapsed_time": "15:12:14", "remaining_time": "11:58:39"} +{"current_steps": 4214, "total_steps": 7532, "loss": 0.27449533343315125, "lr": 8.86742254640872e-06, "epoch": 1.1189749037312442, "percentage": 55.95, "elapsed_time": "15:12:27", "remaining_time": "11:58:27"} +{"current_steps": 4215, "total_steps": 7532, "loss": 0.24714893102645874, "lr": 8.863060142069508e-06, "epoch": 1.1192404727127871, "percentage": 55.96, "elapsed_time": "15:12:40", "remaining_time": "11:58:13"} +{"current_steps": 4216, "total_steps": 7532, "loss": 0.2155439257621765, "lr": 8.858697956918886e-06, "epoch": 1.11950604169433, "percentage": 55.97, "elapsed_time": "15:12:53", "remaining_time": "11:58:00"} +{"current_steps": 4217, "total_steps": 7532, "loss": 0.23189155757427216, "lr": 8.854335991797842e-06, "epoch": 1.119771610675873, "percentage": 55.99, "elapsed_time": "15:13:06", "remaining_time": "11:57:47"} +{"current_steps": 4218, "total_steps": 7532, "loss": 0.23413527011871338, "lr": 8.849974247547307e-06, "epoch": 1.120037179657416, "percentage": 56.0, "elapsed_time": "15:13:19", "remaining_time": "11:57:34"} +{"current_steps": 4219, "total_steps": 7532, "loss": 0.2569039463996887, "lr": 8.845612725008173e-06, "epoch": 1.120302748638959, "percentage": 56.01, "elapsed_time": "15:13:32", "remaining_time": "11:57:22"} +{"current_steps": 4220, "total_steps": 7532, "loss": 0.2699541449546814, "lr": 8.84125142502129e-06, "epoch": 1.120568317620502, "percentage": 56.03, "elapsed_time": "15:13:45", "remaining_time": "11:57:08"} +{"current_steps": 4221, "total_steps": 7532, "loss": 0.27172449231147766, "lr": 8.836890348427468e-06, "epoch": 1.1208338866020449, "percentage": 56.04, "elapsed_time": "15:13:58", "remaining_time": "11:56:56"} +{"current_steps": 4222, "total_steps": 7532, "loss": 0.2839444875717163, "lr": 8.83252949606747e-06, "epoch": 1.1210994555835878, "percentage": 56.05, "elapsed_time": "15:14:11", "remaining_time": "11:56:43"} +{"current_steps": 4223, "total_steps": 7532, "loss": 0.22782178223133087, "lr": 8.828168868782013e-06, "epoch": 1.1213650245651308, "percentage": 56.07, "elapsed_time": "15:14:24", "remaining_time": "11:56:30"} +{"current_steps": 4224, "total_steps": 7532, "loss": 0.2567726671695709, "lr": 8.82380846741178e-06, "epoch": 1.1216305935466737, "percentage": 56.08, "elapsed_time": "15:14:37", "remaining_time": "11:56:17"} +{"current_steps": 4225, "total_steps": 7532, "loss": 0.25879523158073425, "lr": 8.8194482927974e-06, "epoch": 1.1218961625282167, "percentage": 56.09, "elapsed_time": "15:14:50", "remaining_time": "11:56:04"} +{"current_steps": 4226, "total_steps": 7532, "loss": 0.22109058499336243, "lr": 8.815088345779466e-06, "epoch": 1.1221617315097596, "percentage": 56.11, "elapsed_time": "15:15:03", "remaining_time": "11:55:51"} +{"current_steps": 4227, "total_steps": 7532, "loss": 0.23615925014019012, "lr": 8.810728627198526e-06, "epoch": 1.1224273004913026, "percentage": 56.12, "elapsed_time": "15:15:16", "remaining_time": "11:55:37"} +{"current_steps": 4228, "total_steps": 7532, "loss": 0.2751353085041046, "lr": 8.806369137895081e-06, "epoch": 1.1226928694728455, "percentage": 56.13, "elapsed_time": "15:15:29", "remaining_time": "11:55:25"} +{"current_steps": 4229, "total_steps": 7532, "loss": 0.2361963391304016, "lr": 8.802009878709587e-06, "epoch": 1.1229584384543885, "percentage": 56.15, "elapsed_time": "15:15:42", "remaining_time": "11:55:11"} +{"current_steps": 4230, "total_steps": 7532, "loss": 0.22435930371284485, "lr": 8.79765085048246e-06, "epoch": 1.1232240074359314, "percentage": 56.16, "elapsed_time": "15:15:55", "remaining_time": "11:54:59"} +{"current_steps": 4231, "total_steps": 7532, "loss": 0.2355855256319046, "lr": 8.79329205405407e-06, "epoch": 1.1234895764174744, "percentage": 56.17, "elapsed_time": "15:16:08", "remaining_time": "11:54:46"} +{"current_steps": 4232, "total_steps": 7532, "loss": 0.24127572774887085, "lr": 8.78893349026474e-06, "epoch": 1.1237551453990173, "percentage": 56.19, "elapsed_time": "15:16:21", "remaining_time": "11:54:33"} +{"current_steps": 4233, "total_steps": 7532, "loss": 0.2677989602088928, "lr": 8.784575159954748e-06, "epoch": 1.1240207143805603, "percentage": 56.2, "elapsed_time": "15:16:34", "remaining_time": "11:54:20"} +{"current_steps": 4234, "total_steps": 7532, "loss": 0.2283135950565338, "lr": 8.78021706396433e-06, "epoch": 1.1242862833621032, "percentage": 56.21, "elapsed_time": "15:16:47", "remaining_time": "11:54:07"} +{"current_steps": 4235, "total_steps": 7532, "loss": 0.2686103582382202, "lr": 8.775859203133678e-06, "epoch": 1.1245518523436462, "percentage": 56.23, "elapsed_time": "15:17:00", "remaining_time": "11:53:54"} +{"current_steps": 4236, "total_steps": 7532, "loss": 0.2638726234436035, "lr": 8.771501578302934e-06, "epoch": 1.1248174213251891, "percentage": 56.24, "elapsed_time": "15:17:13", "remaining_time": "11:53:41"} +{"current_steps": 4237, "total_steps": 7532, "loss": 0.2517441511154175, "lr": 8.767144190312196e-06, "epoch": 1.125082990306732, "percentage": 56.25, "elapsed_time": "15:17:26", "remaining_time": "11:53:28"} +{"current_steps": 4238, "total_steps": 7532, "loss": 0.2593642771244049, "lr": 8.762787040001518e-06, "epoch": 1.125348559288275, "percentage": 56.27, "elapsed_time": "15:17:39", "remaining_time": "11:53:14"} +{"current_steps": 4239, "total_steps": 7532, "loss": 0.23758336901664734, "lr": 8.758430128210908e-06, "epoch": 1.125614128269818, "percentage": 56.28, "elapsed_time": "15:17:52", "remaining_time": "11:53:02"} +{"current_steps": 4240, "total_steps": 7532, "loss": 0.2557980716228485, "lr": 8.754073455780327e-06, "epoch": 1.125879697251361, "percentage": 56.29, "elapsed_time": "15:18:05", "remaining_time": "11:52:49"} +{"current_steps": 4241, "total_steps": 7532, "loss": 0.2484067678451538, "lr": 8.74971702354969e-06, "epoch": 1.126145266232904, "percentage": 56.31, "elapsed_time": "15:18:18", "remaining_time": "11:52:36"} +{"current_steps": 4242, "total_steps": 7532, "loss": 0.23103098571300507, "lr": 8.745360832358864e-06, "epoch": 1.1264108352144468, "percentage": 56.32, "elapsed_time": "15:18:31", "remaining_time": "11:52:23"} +{"current_steps": 4243, "total_steps": 7532, "loss": 0.2630731463432312, "lr": 8.741004883047667e-06, "epoch": 1.1266764041959898, "percentage": 56.33, "elapsed_time": "15:18:44", "remaining_time": "11:52:10"} +{"current_steps": 4244, "total_steps": 7532, "loss": 0.2413114309310913, "lr": 8.736649176455885e-06, "epoch": 1.1269419731775328, "percentage": 56.35, "elapsed_time": "15:18:57", "remaining_time": "11:51:57"} +{"current_steps": 4245, "total_steps": 7532, "loss": 0.22463169693946838, "lr": 8.732293713423243e-06, "epoch": 1.1272075421590757, "percentage": 56.36, "elapsed_time": "15:19:11", "remaining_time": "11:51:44"} +{"current_steps": 4246, "total_steps": 7532, "loss": 0.23641429841518402, "lr": 8.727938494789421e-06, "epoch": 1.1274731111406187, "percentage": 56.37, "elapsed_time": "15:19:24", "remaining_time": "11:51:31"} +{"current_steps": 4247, "total_steps": 7532, "loss": 0.2547767162322998, "lr": 8.723583521394054e-06, "epoch": 1.1277386801221616, "percentage": 56.39, "elapsed_time": "15:19:37", "remaining_time": "11:51:19"} +{"current_steps": 4248, "total_steps": 7532, "loss": 0.25753074884414673, "lr": 8.719228794076733e-06, "epoch": 1.1280042491037048, "percentage": 56.4, "elapsed_time": "15:19:50", "remaining_time": "11:51:05"} +{"current_steps": 4249, "total_steps": 7532, "loss": 0.30602240562438965, "lr": 8.714874313676992e-06, "epoch": 1.1282698180852477, "percentage": 56.41, "elapsed_time": "15:20:03", "remaining_time": "11:50:53"} +{"current_steps": 4250, "total_steps": 7532, "loss": 0.28336623311042786, "lr": 8.710520081034328e-06, "epoch": 1.1285353870667907, "percentage": 56.43, "elapsed_time": "15:20:16", "remaining_time": "11:50:40"} +{"current_steps": 4251, "total_steps": 7532, "loss": 0.24065867066383362, "lr": 8.706166096988185e-06, "epoch": 1.1288009560483336, "percentage": 56.44, "elapsed_time": "15:20:30", "remaining_time": "11:50:27"} +{"current_steps": 4252, "total_steps": 7532, "loss": 0.25674968957901, "lr": 8.701812362377954e-06, "epoch": 1.1290665250298766, "percentage": 56.45, "elapsed_time": "15:20:42", "remaining_time": "11:50:14"} +{"current_steps": 4253, "total_steps": 7532, "loss": 0.21502923965454102, "lr": 8.697458878042992e-06, "epoch": 1.1293320940114195, "percentage": 56.47, "elapsed_time": "15:20:55", "remaining_time": "11:50:01"} +{"current_steps": 4254, "total_steps": 7532, "loss": 0.26848286390304565, "lr": 8.693105644822598e-06, "epoch": 1.1295976629929625, "percentage": 56.48, "elapsed_time": "15:21:08", "remaining_time": "11:49:48"} +{"current_steps": 4255, "total_steps": 7532, "loss": 0.24283824861049652, "lr": 8.688752663556022e-06, "epoch": 1.1298632319745054, "percentage": 56.49, "elapsed_time": "15:21:21", "remaining_time": "11:49:35"} +{"current_steps": 4256, "total_steps": 7532, "loss": 0.2511506974697113, "lr": 8.684399935082468e-06, "epoch": 1.1301288009560484, "percentage": 56.51, "elapsed_time": "15:21:34", "remaining_time": "11:49:22"} +{"current_steps": 4257, "total_steps": 7532, "loss": 0.23932483792304993, "lr": 8.68004746024109e-06, "epoch": 1.1303943699375913, "percentage": 56.52, "elapsed_time": "15:21:47", "remaining_time": "11:49:09"} +{"current_steps": 4258, "total_steps": 7532, "loss": 0.30030694603919983, "lr": 8.675695239870993e-06, "epoch": 1.1306599389191343, "percentage": 56.53, "elapsed_time": "15:22:00", "remaining_time": "11:48:56"} +{"current_steps": 4259, "total_steps": 7532, "loss": 0.24699059128761292, "lr": 8.671343274811238e-06, "epoch": 1.1309255079006773, "percentage": 56.55, "elapsed_time": "15:22:13", "remaining_time": "11:48:43"} +{"current_steps": 4260, "total_steps": 7532, "loss": 0.26828041672706604, "lr": 8.666991565900827e-06, "epoch": 1.1311910768822202, "percentage": 56.56, "elapsed_time": "15:22:26", "remaining_time": "11:48:30"} +{"current_steps": 4261, "total_steps": 7532, "loss": 0.2372082769870758, "lr": 8.662640113978717e-06, "epoch": 1.1314566458637632, "percentage": 56.57, "elapsed_time": "15:22:39", "remaining_time": "11:48:17"} +{"current_steps": 4262, "total_steps": 7532, "loss": 0.26367881894111633, "lr": 8.658288919883824e-06, "epoch": 1.131722214845306, "percentage": 56.59, "elapsed_time": "15:22:53", "remaining_time": "11:48:04"} +{"current_steps": 4263, "total_steps": 7532, "loss": 0.2287222146987915, "lr": 8.653937984455007e-06, "epoch": 1.131987783826849, "percentage": 56.6, "elapsed_time": "15:23:05", "remaining_time": "11:47:51"} +{"current_steps": 4264, "total_steps": 7532, "loss": 0.244521826505661, "lr": 8.649587308531067e-06, "epoch": 1.132253352808392, "percentage": 56.61, "elapsed_time": "15:23:18", "remaining_time": "11:47:38"} +{"current_steps": 4265, "total_steps": 7532, "loss": 0.26912257075309753, "lr": 8.64523689295077e-06, "epoch": 1.132518921789935, "percentage": 56.63, "elapsed_time": "15:23:31", "remaining_time": "11:47:25"} +{"current_steps": 4266, "total_steps": 7532, "loss": 0.23002780973911285, "lr": 8.64088673855282e-06, "epoch": 1.132784490771478, "percentage": 56.64, "elapsed_time": "15:23:44", "remaining_time": "11:47:12"} +{"current_steps": 4267, "total_steps": 7532, "loss": 0.2561958432197571, "lr": 8.636536846175878e-06, "epoch": 1.1330500597530209, "percentage": 56.65, "elapsed_time": "15:23:57", "remaining_time": "11:46:59"} +{"current_steps": 4268, "total_steps": 7532, "loss": 0.25553008913993835, "lr": 8.63218721665855e-06, "epoch": 1.1333156287345638, "percentage": 56.66, "elapsed_time": "15:24:10", "remaining_time": "11:46:46"} +{"current_steps": 4269, "total_steps": 7532, "loss": 0.1992083340883255, "lr": 8.627837850839398e-06, "epoch": 1.1335811977161068, "percentage": 56.68, "elapsed_time": "15:24:23", "remaining_time": "11:46:33"} +{"current_steps": 4270, "total_steps": 7532, "loss": 0.23075388371944427, "lr": 8.62348874955692e-06, "epoch": 1.1338467666976497, "percentage": 56.69, "elapsed_time": "15:24:37", "remaining_time": "11:46:20"} +{"current_steps": 4271, "total_steps": 7532, "loss": 0.23691913485527039, "lr": 8.619139913649582e-06, "epoch": 1.1341123356791927, "percentage": 56.7, "elapsed_time": "15:24:50", "remaining_time": "11:46:08"} +{"current_steps": 4272, "total_steps": 7532, "loss": 0.2536017894744873, "lr": 8.61479134395578e-06, "epoch": 1.1343779046607356, "percentage": 56.72, "elapsed_time": "15:25:03", "remaining_time": "11:45:54"} +{"current_steps": 4273, "total_steps": 7532, "loss": 0.3014161288738251, "lr": 8.61044304131387e-06, "epoch": 1.1346434736422786, "percentage": 56.73, "elapsed_time": "15:25:16", "remaining_time": "11:45:42"} +{"current_steps": 4274, "total_steps": 7532, "loss": 0.26333582401275635, "lr": 8.606095006562156e-06, "epoch": 1.1349090426238215, "percentage": 56.74, "elapsed_time": "15:25:29", "remaining_time": "11:45:29"} +{"current_steps": 4275, "total_steps": 7532, "loss": 0.23796264827251434, "lr": 8.601747240538883e-06, "epoch": 1.1351746116053645, "percentage": 56.76, "elapsed_time": "15:25:42", "remaining_time": "11:45:16"} +{"current_steps": 4276, "total_steps": 7532, "loss": 0.23737141489982605, "lr": 8.597399744082251e-06, "epoch": 1.1354401805869074, "percentage": 56.77, "elapsed_time": "15:25:55", "remaining_time": "11:45:03"} +{"current_steps": 4277, "total_steps": 7532, "loss": 0.21073032915592194, "lr": 8.593052518030407e-06, "epoch": 1.1357057495684504, "percentage": 56.78, "elapsed_time": "15:26:08", "remaining_time": "11:44:50"} +{"current_steps": 4278, "total_steps": 7532, "loss": 0.2597163915634155, "lr": 8.588705563221444e-06, "epoch": 1.1359713185499933, "percentage": 56.8, "elapsed_time": "15:26:21", "remaining_time": "11:44:37"} +{"current_steps": 4279, "total_steps": 7532, "loss": 0.24541154503822327, "lr": 8.584358880493402e-06, "epoch": 1.1362368875315363, "percentage": 56.81, "elapsed_time": "15:26:34", "remaining_time": "11:44:24"} +{"current_steps": 4280, "total_steps": 7532, "loss": 0.19294027984142303, "lr": 8.580012470684273e-06, "epoch": 1.1365024565130792, "percentage": 56.82, "elapsed_time": "15:26:47", "remaining_time": "11:44:11"} +{"current_steps": 4281, "total_steps": 7532, "loss": 0.26909738779067993, "lr": 8.575666334631994e-06, "epoch": 1.1367680254946222, "percentage": 56.84, "elapsed_time": "15:27:00", "remaining_time": "11:43:58"} +{"current_steps": 4282, "total_steps": 7532, "loss": 0.2550502121448517, "lr": 8.571320473174444e-06, "epoch": 1.1370335944761651, "percentage": 56.85, "elapsed_time": "15:27:13", "remaining_time": "11:43:45"} +{"current_steps": 4283, "total_steps": 7532, "loss": 0.2256634682416916, "lr": 8.566974887149461e-06, "epoch": 1.137299163457708, "percentage": 56.86, "elapsed_time": "15:27:26", "remaining_time": "11:43:32"} +{"current_steps": 4284, "total_steps": 7532, "loss": 0.26154983043670654, "lr": 8.562629577394817e-06, "epoch": 1.137564732439251, "percentage": 56.88, "elapsed_time": "15:27:39", "remaining_time": "11:43:19"} +{"current_steps": 4285, "total_steps": 7532, "loss": 0.24685145914554596, "lr": 8.558284544748239e-06, "epoch": 1.137830301420794, "percentage": 56.89, "elapsed_time": "15:27:52", "remaining_time": "11:43:06"} +{"current_steps": 4286, "total_steps": 7532, "loss": 0.2584421932697296, "lr": 8.553939790047396e-06, "epoch": 1.138095870402337, "percentage": 56.9, "elapsed_time": "15:28:05", "remaining_time": "11:42:53"} +{"current_steps": 4287, "total_steps": 7532, "loss": 0.24582788348197937, "lr": 8.549595314129907e-06, "epoch": 1.13836143938388, "percentage": 56.92, "elapsed_time": "15:28:18", "remaining_time": "11:42:40"} +{"current_steps": 4288, "total_steps": 7532, "loss": 0.26023977994918823, "lr": 8.545251117833334e-06, "epoch": 1.1386270083654229, "percentage": 56.93, "elapsed_time": "15:28:32", "remaining_time": "11:42:27"} +{"current_steps": 4289, "total_steps": 7532, "loss": 0.25575515627861023, "lr": 8.54090720199519e-06, "epoch": 1.1388925773469658, "percentage": 56.94, "elapsed_time": "15:28:44", "remaining_time": "11:42:14"} +{"current_steps": 4290, "total_steps": 7532, "loss": 0.2784460783004761, "lr": 8.53656356745293e-06, "epoch": 1.1391581463285088, "percentage": 56.96, "elapsed_time": "15:28:58", "remaining_time": "11:42:01"} +{"current_steps": 4291, "total_steps": 7532, "loss": 0.24723297357559204, "lr": 8.532220215043953e-06, "epoch": 1.1394237153100517, "percentage": 56.97, "elapsed_time": "15:29:10", "remaining_time": "11:41:48"} +{"current_steps": 4292, "total_steps": 7532, "loss": 0.24694418907165527, "lr": 8.52787714560561e-06, "epoch": 1.1396892842915947, "percentage": 56.98, "elapsed_time": "15:29:24", "remaining_time": "11:41:35"} +{"current_steps": 4293, "total_steps": 7532, "loss": 0.19976040720939636, "lr": 8.52353435997519e-06, "epoch": 1.1399548532731376, "percentage": 57.0, "elapsed_time": "15:29:36", "remaining_time": "11:41:22"} +{"current_steps": 4294, "total_steps": 7532, "loss": 0.21742458641529083, "lr": 8.519191858989932e-06, "epoch": 1.1402204222546806, "percentage": 57.01, "elapsed_time": "15:29:50", "remaining_time": "11:41:09"} +{"current_steps": 4295, "total_steps": 7532, "loss": 0.26382917165756226, "lr": 8.514849643487018e-06, "epoch": 1.1404859912362235, "percentage": 57.02, "elapsed_time": "15:30:02", "remaining_time": "11:40:56"} +{"current_steps": 4296, "total_steps": 7532, "loss": 0.30778488516807556, "lr": 8.510507714303577e-06, "epoch": 1.1407515602177665, "percentage": 57.04, "elapsed_time": "15:30:16", "remaining_time": "11:40:44"} +{"current_steps": 4297, "total_steps": 7532, "loss": 0.20894449949264526, "lr": 8.506166072276681e-06, "epoch": 1.1410171291993096, "percentage": 57.05, "elapsed_time": "15:30:28", "remaining_time": "11:40:30"} +{"current_steps": 4298, "total_steps": 7532, "loss": 0.2389567494392395, "lr": 8.50182471824335e-06, "epoch": 1.1412826981808526, "percentage": 57.06, "elapsed_time": "15:30:41", "remaining_time": "11:40:17"} +{"current_steps": 4299, "total_steps": 7532, "loss": 0.20531126856803894, "lr": 8.497483653040545e-06, "epoch": 1.1415482671623955, "percentage": 57.08, "elapsed_time": "15:30:54", "remaining_time": "11:40:04"} +{"current_steps": 4300, "total_steps": 7532, "loss": 0.2577363848686218, "lr": 8.49314287750517e-06, "epoch": 1.1418138361439385, "percentage": 57.09, "elapsed_time": "15:31:07", "remaining_time": "11:39:51"} +{"current_steps": 4301, "total_steps": 7532, "loss": 0.24225997924804688, "lr": 8.488802392474076e-06, "epoch": 1.1420794051254815, "percentage": 57.1, "elapsed_time": "15:31:27", "remaining_time": "11:39:43"} +{"current_steps": 4302, "total_steps": 7532, "loss": 0.26494917273521423, "lr": 8.484462198784058e-06, "epoch": 1.1423449741070244, "percentage": 57.12, "elapsed_time": "15:31:39", "remaining_time": "11:39:30"} +{"current_steps": 4303, "total_steps": 7532, "loss": 0.24903994798660278, "lr": 8.480122297271855e-06, "epoch": 1.1426105430885674, "percentage": 57.13, "elapsed_time": "15:31:53", "remaining_time": "11:39:17"} +{"current_steps": 4304, "total_steps": 7532, "loss": 0.25291907787323, "lr": 8.475782688774147e-06, "epoch": 1.1428761120701103, "percentage": 57.14, "elapsed_time": "15:32:05", "remaining_time": "11:39:04"} +{"current_steps": 4305, "total_steps": 7532, "loss": 0.22958475351333618, "lr": 8.47144337412756e-06, "epoch": 1.1431416810516533, "percentage": 57.16, "elapsed_time": "15:32:18", "remaining_time": "11:38:51"} +{"current_steps": 4306, "total_steps": 7532, "loss": 0.2305452972650528, "lr": 8.46710435416866e-06, "epoch": 1.1434072500331962, "percentage": 57.17, "elapsed_time": "15:32:31", "remaining_time": "11:38:38"} +{"current_steps": 4307, "total_steps": 7532, "loss": 0.25028055906295776, "lr": 8.462765629733965e-06, "epoch": 1.1436728190147392, "percentage": 57.18, "elapsed_time": "15:32:44", "remaining_time": "11:38:25"} +{"current_steps": 4308, "total_steps": 7532, "loss": 0.24873222410678864, "lr": 8.458427201659926e-06, "epoch": 1.1439383879962821, "percentage": 57.2, "elapsed_time": "15:32:57", "remaining_time": "11:38:12"} +{"current_steps": 4309, "total_steps": 7532, "loss": 0.23396535217761993, "lr": 8.454089070782943e-06, "epoch": 1.144203956977825, "percentage": 57.21, "elapsed_time": "15:33:10", "remaining_time": "11:37:58"} +{"current_steps": 4310, "total_steps": 7532, "loss": 0.27120494842529297, "lr": 8.449751237939354e-06, "epoch": 1.144469525959368, "percentage": 57.22, "elapsed_time": "15:33:23", "remaining_time": "11:37:46"} +{"current_steps": 4311, "total_steps": 7532, "loss": 0.2734759449958801, "lr": 8.445413703965441e-06, "epoch": 1.144735094940911, "percentage": 57.24, "elapsed_time": "15:33:36", "remaining_time": "11:37:32"} +{"current_steps": 4312, "total_steps": 7532, "loss": 0.25353512167930603, "lr": 8.441076469697434e-06, "epoch": 1.145000663922454, "percentage": 57.25, "elapsed_time": "15:33:49", "remaining_time": "11:37:19"} +{"current_steps": 4313, "total_steps": 7532, "loss": 0.23851020634174347, "lr": 8.436739535971497e-06, "epoch": 1.1452662329039969, "percentage": 57.26, "elapsed_time": "15:34:01", "remaining_time": "11:37:06"} +{"current_steps": 4314, "total_steps": 7532, "loss": 0.26320093870162964, "lr": 8.432402903623741e-06, "epoch": 1.1455318018855398, "percentage": 57.28, "elapsed_time": "15:34:15", "remaining_time": "11:36:53"} +{"current_steps": 4315, "total_steps": 7532, "loss": 0.23859955370426178, "lr": 8.428066573490211e-06, "epoch": 1.1457973708670828, "percentage": 57.29, "elapsed_time": "15:34:27", "remaining_time": "11:36:40"} +{"current_steps": 4316, "total_steps": 7532, "loss": 0.2636772096157074, "lr": 8.423730546406911e-06, "epoch": 1.1460629398486257, "percentage": 57.3, "elapsed_time": "15:34:41", "remaining_time": "11:36:27"} +{"current_steps": 4317, "total_steps": 7532, "loss": 0.2656415104866028, "lr": 8.419394823209773e-06, "epoch": 1.1463285088301687, "percentage": 57.32, "elapsed_time": "15:34:53", "remaining_time": "11:36:14"} +{"current_steps": 4318, "total_steps": 7532, "loss": 0.2872830033302307, "lr": 8.41505940473467e-06, "epoch": 1.1465940778117116, "percentage": 57.33, "elapsed_time": "15:35:07", "remaining_time": "11:36:02"} +{"current_steps": 4319, "total_steps": 7532, "loss": 0.229783833026886, "lr": 8.410724291817422e-06, "epoch": 1.1468596467932546, "percentage": 57.34, "elapsed_time": "15:35:19", "remaining_time": "11:35:48"} +{"current_steps": 4320, "total_steps": 7532, "loss": 0.24418675899505615, "lr": 8.406389485293786e-06, "epoch": 1.1471252157747975, "percentage": 57.36, "elapsed_time": "15:35:33", "remaining_time": "11:35:36"} +{"current_steps": 4321, "total_steps": 7532, "loss": 0.2535584270954132, "lr": 8.402054985999464e-06, "epoch": 1.1473907847563405, "percentage": 57.37, "elapsed_time": "15:35:46", "remaining_time": "11:35:22"} +{"current_steps": 4322, "total_steps": 7532, "loss": 0.23207828402519226, "lr": 8.397720794770093e-06, "epoch": 1.1476563537378834, "percentage": 57.38, "elapsed_time": "15:35:59", "remaining_time": "11:35:10"} +{"current_steps": 4323, "total_steps": 7532, "loss": 0.27990391850471497, "lr": 8.393386912441257e-06, "epoch": 1.1479219227194264, "percentage": 57.4, "elapsed_time": "15:36:11", "remaining_time": "11:34:56"} +{"current_steps": 4324, "total_steps": 7532, "loss": 0.2098318189382553, "lr": 8.38905333984847e-06, "epoch": 1.1481874917009693, "percentage": 57.41, "elapsed_time": "15:36:25", "remaining_time": "11:34:44"} +{"current_steps": 4325, "total_steps": 7532, "loss": 0.25303804874420166, "lr": 8.384720077827204e-06, "epoch": 1.1484530606825123, "percentage": 57.42, "elapsed_time": "15:36:37", "remaining_time": "11:34:30"} +{"current_steps": 4326, "total_steps": 7532, "loss": 0.23481838405132294, "lr": 8.380387127212858e-06, "epoch": 1.1487186296640552, "percentage": 57.43, "elapsed_time": "15:36:50", "remaining_time": "11:34:17"} +{"current_steps": 4327, "total_steps": 7532, "loss": 0.24842356145381927, "lr": 8.376054488840771e-06, "epoch": 1.1489841986455982, "percentage": 57.45, "elapsed_time": "15:37:03", "remaining_time": "11:34:04"} +{"current_steps": 4328, "total_steps": 7532, "loss": 0.23927366733551025, "lr": 8.37172216354623e-06, "epoch": 1.1492497676271411, "percentage": 57.46, "elapsed_time": "15:37:16", "remaining_time": "11:33:51"} +{"current_steps": 4329, "total_steps": 7532, "loss": 0.23836453258991241, "lr": 8.367390152164448e-06, "epoch": 1.149515336608684, "percentage": 57.47, "elapsed_time": "15:37:30", "remaining_time": "11:33:39"} +{"current_steps": 4330, "total_steps": 7532, "loss": 0.2562161982059479, "lr": 8.36305845553059e-06, "epoch": 1.149780905590227, "percentage": 57.49, "elapsed_time": "15:37:43", "remaining_time": "11:33:26"} +{"current_steps": 4331, "total_steps": 7532, "loss": 0.21255920827388763, "lr": 8.358727074479755e-06, "epoch": 1.15004647457177, "percentage": 57.5, "elapsed_time": "15:37:56", "remaining_time": "11:33:13"} +{"current_steps": 4332, "total_steps": 7532, "loss": 0.24200043082237244, "lr": 8.354396009846985e-06, "epoch": 1.150312043553313, "percentage": 57.51, "elapsed_time": "15:38:09", "remaining_time": "11:33:00"} +{"current_steps": 4333, "total_steps": 7532, "loss": 0.23582379519939423, "lr": 8.35006526246725e-06, "epoch": 1.150577612534856, "percentage": 57.53, "elapsed_time": "15:38:22", "remaining_time": "11:32:47"} +{"current_steps": 4334, "total_steps": 7532, "loss": 0.21554499864578247, "lr": 8.34573483317548e-06, "epoch": 1.1508431815163989, "percentage": 57.54, "elapsed_time": "15:38:35", "remaining_time": "11:32:34"} +{"current_steps": 4335, "total_steps": 7532, "loss": 0.2789759039878845, "lr": 8.341404722806525e-06, "epoch": 1.1511087504979418, "percentage": 57.55, "elapsed_time": "15:38:48", "remaining_time": "11:32:21"} +{"current_steps": 4336, "total_steps": 7532, "loss": 0.24677832424640656, "lr": 8.337074932195175e-06, "epoch": 1.1513743194794848, "percentage": 57.57, "elapsed_time": "15:39:01", "remaining_time": "11:32:08"} +{"current_steps": 4337, "total_steps": 7532, "loss": 0.28122392296791077, "lr": 8.332745462176166e-06, "epoch": 1.1516398884610277, "percentage": 57.58, "elapsed_time": "15:39:14", "remaining_time": "11:31:55"} +{"current_steps": 4338, "total_steps": 7532, "loss": 0.23219403624534607, "lr": 8.328416313584169e-06, "epoch": 1.1519054574425707, "percentage": 57.59, "elapsed_time": "15:39:27", "remaining_time": "11:31:42"} +{"current_steps": 4339, "total_steps": 7532, "loss": 0.19928379356861115, "lr": 8.324087487253792e-06, "epoch": 1.1521710264241136, "percentage": 57.61, "elapsed_time": "15:39:40", "remaining_time": "11:31:29"} +{"current_steps": 4340, "total_steps": 7532, "loss": 0.27730467915534973, "lr": 8.31975898401958e-06, "epoch": 1.1524365954056566, "percentage": 57.62, "elapsed_time": "15:39:53", "remaining_time": "11:31:16"} +{"current_steps": 4341, "total_steps": 7532, "loss": 0.25462737679481506, "lr": 8.315430804716022e-06, "epoch": 1.1527021643871995, "percentage": 57.63, "elapsed_time": "15:40:05", "remaining_time": "11:31:03"} +{"current_steps": 4342, "total_steps": 7532, "loss": 0.2363007366657257, "lr": 8.311102950177533e-06, "epoch": 1.1529677333687425, "percentage": 57.65, "elapsed_time": "15:40:19", "remaining_time": "11:30:50"} +{"current_steps": 4343, "total_steps": 7532, "loss": 0.2648352384567261, "lr": 8.306775421238482e-06, "epoch": 1.1532333023502854, "percentage": 57.66, "elapsed_time": "15:40:32", "remaining_time": "11:30:37"} +{"current_steps": 4344, "total_steps": 7532, "loss": 0.25645309686660767, "lr": 8.302448218733158e-06, "epoch": 1.1534988713318284, "percentage": 57.67, "elapsed_time": "15:40:45", "remaining_time": "11:30:24"} +{"current_steps": 4345, "total_steps": 7532, "loss": 0.22962522506713867, "lr": 8.298121343495797e-06, "epoch": 1.1537644403133713, "percentage": 57.69, "elapsed_time": "15:40:57", "remaining_time": "11:30:11"} +{"current_steps": 4346, "total_steps": 7532, "loss": 0.21269623935222626, "lr": 8.293794796360569e-06, "epoch": 1.1540300092949143, "percentage": 57.7, "elapsed_time": "15:41:11", "remaining_time": "11:29:58"} +{"current_steps": 4347, "total_steps": 7532, "loss": 0.2518436014652252, "lr": 8.289468578161581e-06, "epoch": 1.1542955782764572, "percentage": 57.71, "elapsed_time": "15:41:24", "remaining_time": "11:29:45"} +{"current_steps": 4348, "total_steps": 7532, "loss": 0.2318439483642578, "lr": 8.285142689732877e-06, "epoch": 1.1545611472580002, "percentage": 57.73, "elapsed_time": "15:41:37", "remaining_time": "11:29:32"} +{"current_steps": 4349, "total_steps": 7532, "loss": 0.2278512567281723, "lr": 8.280817131908438e-06, "epoch": 1.1548267162395431, "percentage": 57.74, "elapsed_time": "15:41:50", "remaining_time": "11:29:19"} +{"current_steps": 4350, "total_steps": 7532, "loss": 0.2521114945411682, "lr": 8.27649190552218e-06, "epoch": 1.155092285221086, "percentage": 57.75, "elapsed_time": "15:42:03", "remaining_time": "11:29:06"} +{"current_steps": 4351, "total_steps": 7532, "loss": 0.2565760016441345, "lr": 8.272167011407955e-06, "epoch": 1.155357854202629, "percentage": 57.77, "elapsed_time": "15:42:16", "remaining_time": "11:28:53"} +{"current_steps": 4352, "total_steps": 7532, "loss": 0.2603546679019928, "lr": 8.267842450399552e-06, "epoch": 1.155623423184172, "percentage": 57.78, "elapsed_time": "15:42:29", "remaining_time": "11:28:40"} +{"current_steps": 4353, "total_steps": 7532, "loss": 0.2175855189561844, "lr": 8.263518223330698e-06, "epoch": 1.155888992165715, "percentage": 57.79, "elapsed_time": "15:42:42", "remaining_time": "11:28:27"} +{"current_steps": 4354, "total_steps": 7532, "loss": 0.24521774053573608, "lr": 8.25919433103505e-06, "epoch": 1.156154561147258, "percentage": 57.81, "elapsed_time": "15:42:55", "remaining_time": "11:28:14"} +{"current_steps": 4355, "total_steps": 7532, "loss": 0.29673823714256287, "lr": 8.254870774346203e-06, "epoch": 1.1564201301288008, "percentage": 57.82, "elapsed_time": "15:43:08", "remaining_time": "11:28:01"} +{"current_steps": 4356, "total_steps": 7532, "loss": 0.26994144916534424, "lr": 8.25054755409769e-06, "epoch": 1.1566856991103438, "percentage": 57.83, "elapsed_time": "15:43:21", "remaining_time": "11:27:48"} +{"current_steps": 4357, "total_steps": 7532, "loss": 0.2545935809612274, "lr": 8.246224671122974e-06, "epoch": 1.1569512680918868, "percentage": 57.85, "elapsed_time": "15:43:34", "remaining_time": "11:27:36"} +{"current_steps": 4358, "total_steps": 7532, "loss": 0.23589034378528595, "lr": 8.241902126255458e-06, "epoch": 1.1572168370734297, "percentage": 57.86, "elapsed_time": "15:43:47", "remaining_time": "11:27:22"} +{"current_steps": 4359, "total_steps": 7532, "loss": 0.2617190480232239, "lr": 8.237579920328478e-06, "epoch": 1.1574824060549727, "percentage": 57.87, "elapsed_time": "15:44:01", "remaining_time": "11:27:10"} +{"current_steps": 4360, "total_steps": 7532, "loss": 0.3092418313026428, "lr": 8.233258054175302e-06, "epoch": 1.1577479750365158, "percentage": 57.89, "elapsed_time": "15:44:13", "remaining_time": "11:26:56"} +{"current_steps": 4361, "total_steps": 7532, "loss": 0.22873908281326294, "lr": 8.228936528629138e-06, "epoch": 1.1580135440180588, "percentage": 57.9, "elapsed_time": "15:44:26", "remaining_time": "11:26:43"} +{"current_steps": 4362, "total_steps": 7532, "loss": 0.22549089789390564, "lr": 8.224615344523123e-06, "epoch": 1.1582791129996017, "percentage": 57.91, "elapsed_time": "15:44:39", "remaining_time": "11:26:31"} +{"current_steps": 4363, "total_steps": 7532, "loss": 0.19141459465026855, "lr": 8.22029450269033e-06, "epoch": 1.1585446819811447, "percentage": 57.93, "elapsed_time": "15:44:52", "remaining_time": "11:26:18"} +{"current_steps": 4364, "total_steps": 7532, "loss": 0.24277547001838684, "lr": 8.21597400396377e-06, "epoch": 1.1588102509626876, "percentage": 57.94, "elapsed_time": "15:45:06", "remaining_time": "11:26:05"} +{"current_steps": 4365, "total_steps": 7532, "loss": 0.2429513931274414, "lr": 8.21165384917638e-06, "epoch": 1.1590758199442306, "percentage": 57.95, "elapsed_time": "15:45:18", "remaining_time": "11:25:52"} +{"current_steps": 4366, "total_steps": 7532, "loss": 0.24710172414779663, "lr": 8.207334039161035e-06, "epoch": 1.1593413889257735, "percentage": 57.97, "elapsed_time": "15:45:32", "remaining_time": "11:25:39"} +{"current_steps": 4367, "total_steps": 7532, "loss": 0.2553783357143402, "lr": 8.203014574750546e-06, "epoch": 1.1596069579073165, "percentage": 57.98, "elapsed_time": "15:45:45", "remaining_time": "11:25:26"} +{"current_steps": 4368, "total_steps": 7532, "loss": 0.2558436095714569, "lr": 8.198695456777653e-06, "epoch": 1.1598725268888594, "percentage": 57.99, "elapsed_time": "15:45:59", "remaining_time": "11:25:13"} +{"current_steps": 4369, "total_steps": 7532, "loss": 0.20780377089977264, "lr": 8.19437668607503e-06, "epoch": 1.1601380958704024, "percentage": 58.01, "elapsed_time": "15:46:11", "remaining_time": "11:25:00"} +{"current_steps": 4370, "total_steps": 7532, "loss": 0.22397254407405853, "lr": 8.190058263475288e-06, "epoch": 1.1604036648519453, "percentage": 58.02, "elapsed_time": "15:46:25", "remaining_time": "11:24:48"} +{"current_steps": 4371, "total_steps": 7532, "loss": 0.2763773798942566, "lr": 8.185740189810967e-06, "epoch": 1.1606692338334883, "percentage": 58.03, "elapsed_time": "15:46:38", "remaining_time": "11:24:35"} +{"current_steps": 4372, "total_steps": 7532, "loss": 0.2801940441131592, "lr": 8.181422465914541e-06, "epoch": 1.1609348028150313, "percentage": 58.05, "elapsed_time": "15:46:51", "remaining_time": "11:24:22"} +{"current_steps": 4373, "total_steps": 7532, "loss": 0.20949441194534302, "lr": 8.177105092618413e-06, "epoch": 1.1612003717965742, "percentage": 58.06, "elapsed_time": "15:47:04", "remaining_time": "11:24:09"} +{"current_steps": 4374, "total_steps": 7532, "loss": 0.24503354728221893, "lr": 8.172788070754927e-06, "epoch": 1.1614659407781172, "percentage": 58.07, "elapsed_time": "15:47:17", "remaining_time": "11:23:56"} +{"current_steps": 4375, "total_steps": 7532, "loss": 0.256147563457489, "lr": 8.16847140115635e-06, "epoch": 1.16173150975966, "percentage": 58.09, "elapsed_time": "15:47:30", "remaining_time": "11:23:43"} +{"current_steps": 4376, "total_steps": 7532, "loss": 0.2178848683834076, "lr": 8.164155084654886e-06, "epoch": 1.161997078741203, "percentage": 58.1, "elapsed_time": "15:47:43", "remaining_time": "11:23:30"} +{"current_steps": 4377, "total_steps": 7532, "loss": 0.22624582052230835, "lr": 8.159839122082668e-06, "epoch": 1.162262647722746, "percentage": 58.11, "elapsed_time": "15:47:56", "remaining_time": "11:23:17"} +{"current_steps": 4378, "total_steps": 7532, "loss": 0.2184191346168518, "lr": 8.155523514271764e-06, "epoch": 1.162528216704289, "percentage": 58.13, "elapsed_time": "15:48:09", "remaining_time": "11:23:04"} +{"current_steps": 4379, "total_steps": 7532, "loss": 0.2623840868473053, "lr": 8.151208262054175e-06, "epoch": 1.162793785685832, "percentage": 58.14, "elapsed_time": "15:48:22", "remaining_time": "11:22:51"} +{"current_steps": 4380, "total_steps": 7532, "loss": 0.27181199193000793, "lr": 8.14689336626183e-06, "epoch": 1.1630593546673749, "percentage": 58.15, "elapsed_time": "15:48:35", "remaining_time": "11:22:38"} +{"current_steps": 4381, "total_steps": 7532, "loss": 0.2791554629802704, "lr": 8.142578827726587e-06, "epoch": 1.1633249236489178, "percentage": 58.17, "elapsed_time": "15:48:48", "remaining_time": "11:22:25"} +{"current_steps": 4382, "total_steps": 7532, "loss": 0.2466641068458557, "lr": 8.13826464728024e-06, "epoch": 1.1635904926304608, "percentage": 58.18, "elapsed_time": "15:49:01", "remaining_time": "11:22:12"} +{"current_steps": 4383, "total_steps": 7532, "loss": 0.1951724737882614, "lr": 8.133950825754511e-06, "epoch": 1.1638560616120037, "percentage": 58.19, "elapsed_time": "15:49:14", "remaining_time": "11:21:59"} +{"current_steps": 4384, "total_steps": 7532, "loss": 0.2520062029361725, "lr": 8.129637363981056e-06, "epoch": 1.1641216305935467, "percentage": 58.2, "elapsed_time": "15:49:27", "remaining_time": "11:21:46"} +{"current_steps": 4385, "total_steps": 7532, "loss": 0.24101334810256958, "lr": 8.12532426279146e-06, "epoch": 1.1643871995750896, "percentage": 58.22, "elapsed_time": "15:49:40", "remaining_time": "11:21:33"} +{"current_steps": 4386, "total_steps": 7532, "loss": 0.2741190791130066, "lr": 8.121011523017235e-06, "epoch": 1.1646527685566326, "percentage": 58.23, "elapsed_time": "15:49:53", "remaining_time": "11:21:20"} +{"current_steps": 4387, "total_steps": 7532, "loss": 0.2575281858444214, "lr": 8.116699145489822e-06, "epoch": 1.1649183375381755, "percentage": 58.24, "elapsed_time": "15:50:06", "remaining_time": "11:21:07"} +{"current_steps": 4388, "total_steps": 7532, "loss": 0.2557298243045807, "lr": 8.112387131040608e-06, "epoch": 1.1651839065197185, "percentage": 58.26, "elapsed_time": "15:50:19", "remaining_time": "11:20:54"} +{"current_steps": 4389, "total_steps": 7532, "loss": 0.27485036849975586, "lr": 8.108075480500892e-06, "epoch": 1.1654494755012614, "percentage": 58.27, "elapsed_time": "15:50:32", "remaining_time": "11:20:41"} +{"current_steps": 4390, "total_steps": 7532, "loss": 0.26458340883255005, "lr": 8.103764194701909e-06, "epoch": 1.1657150444828044, "percentage": 58.28, "elapsed_time": "15:50:45", "remaining_time": "11:20:28"} +{"current_steps": 4391, "total_steps": 7532, "loss": 0.2281840592622757, "lr": 8.099453274474827e-06, "epoch": 1.1659806134643473, "percentage": 58.3, "elapsed_time": "15:50:58", "remaining_time": "11:20:15"} +{"current_steps": 4392, "total_steps": 7532, "loss": 0.24956555664539337, "lr": 8.095142720650739e-06, "epoch": 1.1662461824458903, "percentage": 58.31, "elapsed_time": "15:51:11", "remaining_time": "11:20:02"} +{"current_steps": 4393, "total_steps": 7532, "loss": 0.22619420289993286, "lr": 8.090832534060671e-06, "epoch": 1.1665117514274332, "percentage": 58.32, "elapsed_time": "15:51:24", "remaining_time": "11:19:49"} +{"current_steps": 4394, "total_steps": 7532, "loss": 0.2780688405036926, "lr": 8.086522715535571e-06, "epoch": 1.1667773204089762, "percentage": 58.34, "elapsed_time": "15:51:37", "remaining_time": "11:19:36"} +{"current_steps": 4395, "total_steps": 7532, "loss": 0.2600886821746826, "lr": 8.082213265906323e-06, "epoch": 1.1670428893905191, "percentage": 58.35, "elapsed_time": "15:51:50", "remaining_time": "11:19:23"} +{"current_steps": 4396, "total_steps": 7532, "loss": 0.25049078464508057, "lr": 8.077904186003736e-06, "epoch": 1.167308458372062, "percentage": 58.36, "elapsed_time": "15:52:03", "remaining_time": "11:19:10"} +{"current_steps": 4397, "total_steps": 7532, "loss": 0.27745798230171204, "lr": 8.073595476658558e-06, "epoch": 1.167574027353605, "percentage": 58.38, "elapsed_time": "15:52:16", "remaining_time": "11:18:57"} +{"current_steps": 4398, "total_steps": 7532, "loss": 0.2191929668188095, "lr": 8.069287138701452e-06, "epoch": 1.167839596335148, "percentage": 58.39, "elapsed_time": "15:52:29", "remaining_time": "11:18:44"} +{"current_steps": 4399, "total_steps": 7532, "loss": 0.24307313561439514, "lr": 8.064979172963014e-06, "epoch": 1.168105165316691, "percentage": 58.4, "elapsed_time": "15:52:42", "remaining_time": "11:18:31"} +{"current_steps": 4400, "total_steps": 7532, "loss": 0.23036238551139832, "lr": 8.060671580273772e-06, "epoch": 1.168370734298234, "percentage": 58.42, "elapsed_time": "15:52:55", "remaining_time": "11:18:18"} +{"current_steps": 4401, "total_steps": 7532, "loss": 0.2394433617591858, "lr": 8.056364361464176e-06, "epoch": 1.1686363032797769, "percentage": 58.43, "elapsed_time": "15:53:19", "remaining_time": "11:18:13"} +{"current_steps": 4402, "total_steps": 7532, "loss": 0.24099211394786835, "lr": 8.052057517364608e-06, "epoch": 1.1689018722613198, "percentage": 58.44, "elapsed_time": "15:53:32", "remaining_time": "11:18:00"} +{"current_steps": 4403, "total_steps": 7532, "loss": 0.22036173939704895, "lr": 8.047751048805376e-06, "epoch": 1.1691674412428628, "percentage": 58.46, "elapsed_time": "15:53:45", "remaining_time": "11:17:47"} +{"current_steps": 4404, "total_steps": 7532, "loss": 0.22400429844856262, "lr": 8.043444956616717e-06, "epoch": 1.1694330102244057, "percentage": 58.47, "elapsed_time": "15:53:58", "remaining_time": "11:17:34"} +{"current_steps": 4405, "total_steps": 7532, "loss": 0.21649131178855896, "lr": 8.039139241628792e-06, "epoch": 1.1696985792059487, "percentage": 58.48, "elapsed_time": "15:54:11", "remaining_time": "11:17:21"} +{"current_steps": 4406, "total_steps": 7532, "loss": 0.23412205278873444, "lr": 8.034833904671698e-06, "epoch": 1.1699641481874916, "percentage": 58.5, "elapsed_time": "15:54:24", "remaining_time": "11:17:08"} +{"current_steps": 4407, "total_steps": 7532, "loss": 0.23822304606437683, "lr": 8.030528946575453e-06, "epoch": 1.1702297171690346, "percentage": 58.51, "elapsed_time": "15:54:36", "remaining_time": "11:16:54"} +{"current_steps": 4408, "total_steps": 7532, "loss": 0.29250186681747437, "lr": 8.026224368169998e-06, "epoch": 1.1704952861505775, "percentage": 58.52, "elapsed_time": "15:54:49", "remaining_time": "11:16:41"} +{"current_steps": 4409, "total_steps": 7532, "loss": 0.26794207096099854, "lr": 8.021920170285205e-06, "epoch": 1.1707608551321207, "percentage": 58.54, "elapsed_time": "15:55:02", "remaining_time": "11:16:28"} +{"current_steps": 4410, "total_steps": 7532, "loss": 0.2573787271976471, "lr": 8.017616353750874e-06, "epoch": 1.1710264241136636, "percentage": 58.55, "elapsed_time": "15:55:15", "remaining_time": "11:16:15"} +{"current_steps": 4411, "total_steps": 7532, "loss": 0.2744356691837311, "lr": 8.01331291939673e-06, "epoch": 1.1712919930952066, "percentage": 58.56, "elapsed_time": "15:55:27", "remaining_time": "11:16:02"} +{"current_steps": 4412, "total_steps": 7532, "loss": 0.2582886815071106, "lr": 8.009009868052424e-06, "epoch": 1.1715575620767495, "percentage": 58.58, "elapsed_time": "15:55:40", "remaining_time": "11:15:48"} +{"current_steps": 4413, "total_steps": 7532, "loss": 0.2553568482398987, "lr": 8.004707200547534e-06, "epoch": 1.1718231310582925, "percentage": 58.59, "elapsed_time": "15:55:53", "remaining_time": "11:15:36"} +{"current_steps": 4414, "total_steps": 7532, "loss": 0.2670289874076843, "lr": 8.00040491771156e-06, "epoch": 1.1720887000398355, "percentage": 58.6, "elapsed_time": "15:56:06", "remaining_time": "11:15:22"} +{"current_steps": 4415, "total_steps": 7532, "loss": 0.215460866689682, "lr": 7.99610302037394e-06, "epoch": 1.1723542690213784, "percentage": 58.62, "elapsed_time": "15:56:19", "remaining_time": "11:15:10"} +{"current_steps": 4416, "total_steps": 7532, "loss": 0.26481571793556213, "lr": 7.991801509364023e-06, "epoch": 1.1726198380029214, "percentage": 58.63, "elapsed_time": "15:56:32", "remaining_time": "11:14:56"} +{"current_steps": 4417, "total_steps": 7532, "loss": 0.2060776650905609, "lr": 7.98750038551109e-06, "epoch": 1.1728854069844643, "percentage": 58.64, "elapsed_time": "15:56:45", "remaining_time": "11:14:43"} +{"current_steps": 4418, "total_steps": 7532, "loss": 0.2401561588048935, "lr": 7.983199649644349e-06, "epoch": 1.1731509759660073, "percentage": 58.66, "elapsed_time": "15:56:57", "remaining_time": "11:14:30"} +{"current_steps": 4419, "total_steps": 7532, "loss": 0.2545842230319977, "lr": 7.978899302592927e-06, "epoch": 1.1734165449475502, "percentage": 58.67, "elapsed_time": "15:57:11", "remaining_time": "11:14:17"} +{"current_steps": 4420, "total_steps": 7532, "loss": 0.29925093054771423, "lr": 7.974599345185884e-06, "epoch": 1.1736821139290932, "percentage": 58.68, "elapsed_time": "15:57:24", "remaining_time": "11:14:04"} +{"current_steps": 4421, "total_steps": 7532, "loss": 0.23944757878780365, "lr": 7.9702997782522e-06, "epoch": 1.1739476829106361, "percentage": 58.7, "elapsed_time": "15:57:37", "remaining_time": "11:13:52"} +{"current_steps": 4422, "total_steps": 7532, "loss": 0.23745761811733246, "lr": 7.96600060262078e-06, "epoch": 1.174213251892179, "percentage": 58.71, "elapsed_time": "15:57:50", "remaining_time": "11:13:38"} +{"current_steps": 4423, "total_steps": 7532, "loss": 0.22170330584049225, "lr": 7.961701819120453e-06, "epoch": 1.174478820873722, "percentage": 58.72, "elapsed_time": "15:58:03", "remaining_time": "11:13:26"} +{"current_steps": 4424, "total_steps": 7532, "loss": 0.2645890712738037, "lr": 7.95740342857998e-06, "epoch": 1.174744389855265, "percentage": 58.74, "elapsed_time": "15:58:15", "remaining_time": "11:13:12"} +{"current_steps": 4425, "total_steps": 7532, "loss": 0.25232207775115967, "lr": 7.953105431828032e-06, "epoch": 1.175009958836808, "percentage": 58.75, "elapsed_time": "15:58:28", "remaining_time": "11:12:59"} +{"current_steps": 4426, "total_steps": 7532, "loss": 0.2656644880771637, "lr": 7.948807829693219e-06, "epoch": 1.1752755278183509, "percentage": 58.76, "elapsed_time": "15:58:41", "remaining_time": "11:12:46"} +{"current_steps": 4427, "total_steps": 7532, "loss": 0.25290653109550476, "lr": 7.944510623004063e-06, "epoch": 1.1755410967998938, "percentage": 58.78, "elapsed_time": "15:58:54", "remaining_time": "11:12:33"} +{"current_steps": 4428, "total_steps": 7532, "loss": 0.27464741468429565, "lr": 7.940213812589018e-06, "epoch": 1.1758066657814368, "percentage": 58.79, "elapsed_time": "15:59:07", "remaining_time": "11:12:20"} +{"current_steps": 4429, "total_steps": 7532, "loss": 0.2562064528465271, "lr": 7.935917399276455e-06, "epoch": 1.1760722347629797, "percentage": 58.8, "elapsed_time": "15:59:20", "remaining_time": "11:12:07"} +{"current_steps": 4430, "total_steps": 7532, "loss": 0.267793208360672, "lr": 7.931621383894676e-06, "epoch": 1.1763378037445227, "percentage": 58.82, "elapsed_time": "15:59:33", "remaining_time": "11:11:54"} +{"current_steps": 4431, "total_steps": 7532, "loss": 0.23651085793972015, "lr": 7.9273257672719e-06, "epoch": 1.1766033727260656, "percentage": 58.83, "elapsed_time": "15:59:45", "remaining_time": "11:11:41"} +{"current_steps": 4432, "total_steps": 7532, "loss": 0.23691008985042572, "lr": 7.923030550236267e-06, "epoch": 1.1768689417076086, "percentage": 58.84, "elapsed_time": "15:59:59", "remaining_time": "11:11:28"} +{"current_steps": 4433, "total_steps": 7532, "loss": 0.24495704472064972, "lr": 7.918735733615852e-06, "epoch": 1.1771345106891515, "percentage": 58.86, "elapsed_time": "16:00:12", "remaining_time": "11:11:15"} +{"current_steps": 4434, "total_steps": 7532, "loss": 0.25423017144203186, "lr": 7.91444131823864e-06, "epoch": 1.1774000796706945, "percentage": 58.87, "elapsed_time": "16:00:25", "remaining_time": "11:11:02"} +{"current_steps": 4435, "total_steps": 7532, "loss": 0.22870390117168427, "lr": 7.910147304932548e-06, "epoch": 1.1776656486522374, "percentage": 58.88, "elapsed_time": "16:00:37", "remaining_time": "11:10:49"} +{"current_steps": 4436, "total_steps": 7532, "loss": 0.23037508130073547, "lr": 7.905853694525405e-06, "epoch": 1.1779312176337804, "percentage": 58.9, "elapsed_time": "16:00:51", "remaining_time": "11:10:36"} +{"current_steps": 4437, "total_steps": 7532, "loss": 0.31184864044189453, "lr": 7.901560487844973e-06, "epoch": 1.1781967866153233, "percentage": 58.91, "elapsed_time": "16:01:03", "remaining_time": "11:10:22"} +{"current_steps": 4438, "total_steps": 7532, "loss": 0.24140426516532898, "lr": 7.89726768571893e-06, "epoch": 1.1784623555968663, "percentage": 58.92, "elapsed_time": "16:01:16", "remaining_time": "11:10:09"} +{"current_steps": 4439, "total_steps": 7532, "loss": 0.25602301955223083, "lr": 7.892975288974877e-06, "epoch": 1.1787279245784092, "percentage": 58.94, "elapsed_time": "16:01:29", "remaining_time": "11:09:56"} +{"current_steps": 4440, "total_steps": 7532, "loss": 0.2717514932155609, "lr": 7.888683298440339e-06, "epoch": 1.1789934935599522, "percentage": 58.95, "elapsed_time": "16:01:42", "remaining_time": "11:09:43"} +{"current_steps": 4441, "total_steps": 7532, "loss": 0.252475380897522, "lr": 7.884391714942757e-06, "epoch": 1.1792590625414952, "percentage": 58.96, "elapsed_time": "16:01:54", "remaining_time": "11:09:30"} +{"current_steps": 4442, "total_steps": 7532, "loss": 0.24777942895889282, "lr": 7.880100539309506e-06, "epoch": 1.179524631523038, "percentage": 58.98, "elapsed_time": "16:02:07", "remaining_time": "11:09:17"} +{"current_steps": 4443, "total_steps": 7532, "loss": 0.25111010670661926, "lr": 7.875809772367867e-06, "epoch": 1.179790200504581, "percentage": 58.99, "elapsed_time": "16:02:20", "remaining_time": "11:09:04"} +{"current_steps": 4444, "total_steps": 7532, "loss": 0.26183217763900757, "lr": 7.87151941494505e-06, "epoch": 1.180055769486124, "percentage": 59.0, "elapsed_time": "16:02:33", "remaining_time": "11:08:51"} +{"current_steps": 4445, "total_steps": 7532, "loss": 0.27538490295410156, "lr": 7.867229467868189e-06, "epoch": 1.180321338467667, "percentage": 59.01, "elapsed_time": "16:02:45", "remaining_time": "11:08:37"} +{"current_steps": 4446, "total_steps": 7532, "loss": 0.2192106693983078, "lr": 7.862939931964333e-06, "epoch": 1.18058690744921, "percentage": 59.03, "elapsed_time": "16:02:58", "remaining_time": "11:08:24"} +{"current_steps": 4447, "total_steps": 7532, "loss": 0.26506057381629944, "lr": 7.858650808060453e-06, "epoch": 1.1808524764307529, "percentage": 59.04, "elapsed_time": "16:03:11", "remaining_time": "11:08:11"} +{"current_steps": 4448, "total_steps": 7532, "loss": 0.2345719337463379, "lr": 7.854362096983443e-06, "epoch": 1.1811180454122958, "percentage": 59.05, "elapsed_time": "16:03:23", "remaining_time": "11:07:57"} +{"current_steps": 4449, "total_steps": 7532, "loss": 0.21404311060905457, "lr": 7.850073799560114e-06, "epoch": 1.1813836143938388, "percentage": 59.07, "elapsed_time": "16:03:37", "remaining_time": "11:07:45"} +{"current_steps": 4450, "total_steps": 7532, "loss": 0.24332138895988464, "lr": 7.8457859166172e-06, "epoch": 1.1816491833753817, "percentage": 59.08, "elapsed_time": "16:03:49", "remaining_time": "11:07:31"} +{"current_steps": 4451, "total_steps": 7532, "loss": 0.25025150179862976, "lr": 7.841498448981354e-06, "epoch": 1.1819147523569247, "percentage": 59.09, "elapsed_time": "16:04:02", "remaining_time": "11:07:18"} +{"current_steps": 4452, "total_steps": 7532, "loss": 0.21918940544128418, "lr": 7.837211397479152e-06, "epoch": 1.1821803213384676, "percentage": 59.11, "elapsed_time": "16:04:15", "remaining_time": "11:07:05"} +{"current_steps": 4453, "total_steps": 7532, "loss": 0.24976079165935516, "lr": 7.832924762937083e-06, "epoch": 1.1824458903200106, "percentage": 59.12, "elapsed_time": "16:04:28", "remaining_time": "11:06:52"} +{"current_steps": 4454, "total_steps": 7532, "loss": 0.21146243810653687, "lr": 7.828638546181565e-06, "epoch": 1.1827114593015535, "percentage": 59.13, "elapsed_time": "16:04:40", "remaining_time": "11:06:39"} +{"current_steps": 4455, "total_steps": 7532, "loss": 0.22921445965766907, "lr": 7.824352748038924e-06, "epoch": 1.1829770282830965, "percentage": 59.15, "elapsed_time": "16:04:53", "remaining_time": "11:06:26"} +{"current_steps": 4456, "total_steps": 7532, "loss": 0.24401478469371796, "lr": 7.820067369335413e-06, "epoch": 1.1832425972646394, "percentage": 59.16, "elapsed_time": "16:05:06", "remaining_time": "11:06:13"} +{"current_steps": 4457, "total_steps": 7532, "loss": 0.2717207074165344, "lr": 7.815782410897209e-06, "epoch": 1.1835081662461824, "percentage": 59.17, "elapsed_time": "16:05:19", "remaining_time": "11:06:00"} +{"current_steps": 4458, "total_steps": 7532, "loss": 0.20752058923244476, "lr": 7.81149787355039e-06, "epoch": 1.1837737352277253, "percentage": 59.19, "elapsed_time": "16:05:31", "remaining_time": "11:05:46"} +{"current_steps": 4459, "total_steps": 7532, "loss": 0.31095850467681885, "lr": 7.807213758120965e-06, "epoch": 1.1840393042092683, "percentage": 59.2, "elapsed_time": "16:05:44", "remaining_time": "11:05:33"} +{"current_steps": 4460, "total_steps": 7532, "loss": 0.23761102557182312, "lr": 7.802930065434874e-06, "epoch": 1.1843048731908112, "percentage": 59.21, "elapsed_time": "16:05:57", "remaining_time": "11:05:20"} +{"current_steps": 4461, "total_steps": 7532, "loss": 0.2509460151195526, "lr": 7.798646796317952e-06, "epoch": 1.1845704421723542, "percentage": 59.23, "elapsed_time": "16:06:10", "remaining_time": "11:05:07"} +{"current_steps": 4462, "total_steps": 7532, "loss": 0.25903213024139404, "lr": 7.794363951595966e-06, "epoch": 1.1848360111538971, "percentage": 59.24, "elapsed_time": "16:06:23", "remaining_time": "11:04:54"} +{"current_steps": 4463, "total_steps": 7532, "loss": 0.23304736614227295, "lr": 7.790081532094596e-06, "epoch": 1.18510158013544, "percentage": 59.25, "elapsed_time": "16:06:35", "remaining_time": "11:04:40"} +{"current_steps": 4464, "total_steps": 7532, "loss": 0.28707265853881836, "lr": 7.785799538639445e-06, "epoch": 1.185367149116983, "percentage": 59.27, "elapsed_time": "16:06:48", "remaining_time": "11:04:27"} +{"current_steps": 4465, "total_steps": 7532, "loss": 0.20282745361328125, "lr": 7.781517972056028e-06, "epoch": 1.185632718098526, "percentage": 59.28, "elapsed_time": "16:07:01", "remaining_time": "11:04:14"} +{"current_steps": 4466, "total_steps": 7532, "loss": 0.24056631326675415, "lr": 7.777236833169782e-06, "epoch": 1.185898287080069, "percentage": 59.29, "elapsed_time": "16:07:14", "remaining_time": "11:04:01"} +{"current_steps": 4467, "total_steps": 7532, "loss": 0.2677255868911743, "lr": 7.772956122806058e-06, "epoch": 1.186163856061612, "percentage": 59.31, "elapsed_time": "16:07:26", "remaining_time": "11:03:48"} +{"current_steps": 4468, "total_steps": 7532, "loss": 0.22032876312732697, "lr": 7.768675841790124e-06, "epoch": 1.1864294250431549, "percentage": 59.32, "elapsed_time": "16:07:39", "remaining_time": "11:03:35"} +{"current_steps": 4469, "total_steps": 7532, "loss": 0.2980336546897888, "lr": 7.764395990947177e-06, "epoch": 1.1866949940246978, "percentage": 59.33, "elapsed_time": "16:07:52", "remaining_time": "11:03:22"} +{"current_steps": 4470, "total_steps": 7532, "loss": 0.2562638521194458, "lr": 7.760116571102314e-06, "epoch": 1.1869605630062408, "percentage": 59.35, "elapsed_time": "16:08:05", "remaining_time": "11:03:09"} +{"current_steps": 4471, "total_steps": 7532, "loss": 0.262576699256897, "lr": 7.755837583080561e-06, "epoch": 1.1872261319877837, "percentage": 59.36, "elapsed_time": "16:08:17", "remaining_time": "11:02:55"} +{"current_steps": 4472, "total_steps": 7532, "loss": 0.2654029130935669, "lr": 7.751559027706858e-06, "epoch": 1.1874917009693267, "percentage": 59.37, "elapsed_time": "16:08:30", "remaining_time": "11:02:42"} +{"current_steps": 4473, "total_steps": 7532, "loss": 0.2946662902832031, "lr": 7.747280905806051e-06, "epoch": 1.1877572699508698, "percentage": 59.39, "elapsed_time": "16:08:43", "remaining_time": "11:02:29"} +{"current_steps": 4474, "total_steps": 7532, "loss": 0.25140905380249023, "lr": 7.743003218202921e-06, "epoch": 1.1880228389324128, "percentage": 59.4, "elapsed_time": "16:08:56", "remaining_time": "11:02:16"} +{"current_steps": 4475, "total_steps": 7532, "loss": 0.2601654529571533, "lr": 7.738725965722149e-06, "epoch": 1.1882884079139557, "percentage": 59.41, "elapsed_time": "16:09:09", "remaining_time": "11:02:03"} +{"current_steps": 4476, "total_steps": 7532, "loss": 0.2639954090118408, "lr": 7.73444914918834e-06, "epoch": 1.1885539768954987, "percentage": 59.43, "elapsed_time": "16:09:22", "remaining_time": "11:01:50"} +{"current_steps": 4477, "total_steps": 7532, "loss": 0.23391291499137878, "lr": 7.730172769426014e-06, "epoch": 1.1888195458770416, "percentage": 59.44, "elapsed_time": "16:09:34", "remaining_time": "11:01:37"} +{"current_steps": 4478, "total_steps": 7532, "loss": 0.2912144958972931, "lr": 7.725896827259613e-06, "epoch": 1.1890851148585846, "percentage": 59.45, "elapsed_time": "16:09:47", "remaining_time": "11:01:23"} +{"current_steps": 4479, "total_steps": 7532, "loss": 0.23867549002170563, "lr": 7.72162132351348e-06, "epoch": 1.1893506838401275, "percentage": 59.47, "elapsed_time": "16:10:00", "remaining_time": "11:01:10"} +{"current_steps": 4480, "total_steps": 7532, "loss": 0.22434742748737335, "lr": 7.717346259011888e-06, "epoch": 1.1896162528216705, "percentage": 59.48, "elapsed_time": "16:10:12", "remaining_time": "11:00:57"} +{"current_steps": 4481, "total_steps": 7532, "loss": 0.2504398822784424, "lr": 7.713071634579017e-06, "epoch": 1.1898818218032134, "percentage": 59.49, "elapsed_time": "16:10:25", "remaining_time": "11:00:44"} +{"current_steps": 4482, "total_steps": 7532, "loss": 0.24887195229530334, "lr": 7.70879745103896e-06, "epoch": 1.1901473907847564, "percentage": 59.51, "elapsed_time": "16:10:38", "remaining_time": "11:00:31"} +{"current_steps": 4483, "total_steps": 7532, "loss": 0.2730141580104828, "lr": 7.704523709215732e-06, "epoch": 1.1904129597662994, "percentage": 59.52, "elapsed_time": "16:10:51", "remaining_time": "11:00:18"} +{"current_steps": 4484, "total_steps": 7532, "loss": 0.22197315096855164, "lr": 7.70025040993326e-06, "epoch": 1.1906785287478423, "percentage": 59.53, "elapsed_time": "16:11:04", "remaining_time": "11:00:05"} +{"current_steps": 4485, "total_steps": 7532, "loss": 0.2852731943130493, "lr": 7.695977554015387e-06, "epoch": 1.1909440977293853, "percentage": 59.55, "elapsed_time": "16:11:17", "remaining_time": "10:59:52"} +{"current_steps": 4486, "total_steps": 7532, "loss": 0.2577238976955414, "lr": 7.691705142285863e-06, "epoch": 1.1912096667109282, "percentage": 59.56, "elapsed_time": "16:11:29", "remaining_time": "10:59:38"} +{"current_steps": 4487, "total_steps": 7532, "loss": 0.23510503768920898, "lr": 7.68743317556837e-06, "epoch": 1.1914752356924712, "percentage": 59.57, "elapsed_time": "16:11:42", "remaining_time": "10:59:25"} +{"current_steps": 4488, "total_steps": 7532, "loss": 0.2553985118865967, "lr": 7.683161654686486e-06, "epoch": 1.191740804674014, "percentage": 59.59, "elapsed_time": "16:11:55", "remaining_time": "10:59:12"} +{"current_steps": 4489, "total_steps": 7532, "loss": 0.2778642475605011, "lr": 7.67889058046371e-06, "epoch": 1.192006373655557, "percentage": 59.6, "elapsed_time": "16:12:07", "remaining_time": "10:58:59"} +{"current_steps": 4490, "total_steps": 7532, "loss": 0.24740618467330933, "lr": 7.674619953723455e-06, "epoch": 1.1922719426371, "percentage": 59.61, "elapsed_time": "16:12:20", "remaining_time": "10:58:46"} +{"current_steps": 4491, "total_steps": 7532, "loss": 0.2453901171684265, "lr": 7.670349775289047e-06, "epoch": 1.192537511618643, "percentage": 59.63, "elapsed_time": "16:12:33", "remaining_time": "10:58:32"} +{"current_steps": 4492, "total_steps": 7532, "loss": 0.2336064875125885, "lr": 7.666080045983726e-06, "epoch": 1.192803080600186, "percentage": 59.64, "elapsed_time": "16:12:46", "remaining_time": "10:58:20"} +{"current_steps": 4493, "total_steps": 7532, "loss": 0.2375800907611847, "lr": 7.661810766630648e-06, "epoch": 1.1930686495817289, "percentage": 59.65, "elapsed_time": "16:12:59", "remaining_time": "10:58:06"} +{"current_steps": 4494, "total_steps": 7532, "loss": 0.21272733807563782, "lr": 7.657541938052876e-06, "epoch": 1.1933342185632718, "percentage": 59.67, "elapsed_time": "16:13:12", "remaining_time": "10:57:54"} +{"current_steps": 4495, "total_steps": 7532, "loss": 0.26597708463668823, "lr": 7.65327356107339e-06, "epoch": 1.1935997875448148, "percentage": 59.68, "elapsed_time": "16:13:25", "remaining_time": "10:57:40"} +{"current_steps": 4496, "total_steps": 7532, "loss": 0.267806738615036, "lr": 7.649005636515088e-06, "epoch": 1.1938653565263577, "percentage": 59.69, "elapsed_time": "16:13:38", "remaining_time": "10:57:27"} +{"current_steps": 4497, "total_steps": 7532, "loss": 0.2260194569826126, "lr": 7.64473816520077e-06, "epoch": 1.1941309255079007, "percentage": 59.71, "elapsed_time": "16:13:50", "remaining_time": "10:57:14"} +{"current_steps": 4498, "total_steps": 7532, "loss": 0.24523532390594482, "lr": 7.640471147953157e-06, "epoch": 1.1943964944894436, "percentage": 59.72, "elapsed_time": "16:14:04", "remaining_time": "10:57:01"} +{"current_steps": 4499, "total_steps": 7532, "loss": 0.23230910301208496, "lr": 7.636204585594879e-06, "epoch": 1.1946620634709866, "percentage": 59.73, "elapsed_time": "16:14:16", "remaining_time": "10:56:48"} +{"current_steps": 4500, "total_steps": 7532, "loss": 0.23322705924510956, "lr": 7.631938478948478e-06, "epoch": 1.1949276324525295, "percentage": 59.75, "elapsed_time": "16:14:29", "remaining_time": "10:56:35"} +{"current_steps": 4501, "total_steps": 7532, "loss": 0.25614386796951294, "lr": 7.6276728288364086e-06, "epoch": 1.1951932014340725, "percentage": 59.76, "elapsed_time": "16:14:48", "remaining_time": "10:56:26"} +{"current_steps": 4502, "total_steps": 7532, "loss": 0.22921821475028992, "lr": 7.62340763608104e-06, "epoch": 1.1954587704156154, "percentage": 59.77, "elapsed_time": "16:15:01", "remaining_time": "10:56:13"} +{"current_steps": 4503, "total_steps": 7532, "loss": 0.25528913736343384, "lr": 7.619142901504649e-06, "epoch": 1.1957243393971584, "percentage": 59.78, "elapsed_time": "16:15:14", "remaining_time": "10:56:00"} +{"current_steps": 4504, "total_steps": 7532, "loss": 0.2528502643108368, "lr": 7.614878625929425e-06, "epoch": 1.1959899083787013, "percentage": 59.8, "elapsed_time": "16:15:27", "remaining_time": "10:55:47"} +{"current_steps": 4505, "total_steps": 7532, "loss": 0.2519027590751648, "lr": 7.610614810177474e-06, "epoch": 1.1962554773602443, "percentage": 59.81, "elapsed_time": "16:15:40", "remaining_time": "10:55:34"} +{"current_steps": 4506, "total_steps": 7532, "loss": 0.2895655333995819, "lr": 7.606351455070808e-06, "epoch": 1.1965210463417872, "percentage": 59.82, "elapsed_time": "16:15:53", "remaining_time": "10:55:21"} +{"current_steps": 4507, "total_steps": 7532, "loss": 0.24588793516159058, "lr": 7.6020885614313515e-06, "epoch": 1.1967866153233302, "percentage": 59.84, "elapsed_time": "16:16:06", "remaining_time": "10:55:08"} +{"current_steps": 4508, "total_steps": 7532, "loss": 0.2996830940246582, "lr": 7.597826130080938e-06, "epoch": 1.1970521843048731, "percentage": 59.85, "elapsed_time": "16:16:18", "remaining_time": "10:54:55"} +{"current_steps": 4509, "total_steps": 7532, "loss": 0.2654343247413635, "lr": 7.593564161841318e-06, "epoch": 1.197317753286416, "percentage": 59.86, "elapsed_time": "16:16:31", "remaining_time": "10:54:42"} +{"current_steps": 4510, "total_steps": 7532, "loss": 0.24949109554290771, "lr": 7.589302657534144e-06, "epoch": 1.197583322267959, "percentage": 59.88, "elapsed_time": "16:16:44", "remaining_time": "10:54:29"} +{"current_steps": 4511, "total_steps": 7532, "loss": 0.23205731809139252, "lr": 7.5850416179809886e-06, "epoch": 1.197848891249502, "percentage": 59.89, "elapsed_time": "16:16:57", "remaining_time": "10:54:16"} +{"current_steps": 4512, "total_steps": 7532, "loss": 0.232904314994812, "lr": 7.580781044003324e-06, "epoch": 1.198114460231045, "percentage": 59.9, "elapsed_time": "16:17:10", "remaining_time": "10:54:02"} +{"current_steps": 4513, "total_steps": 7532, "loss": 0.25071364641189575, "lr": 7.576520936422542e-06, "epoch": 1.198380029212588, "percentage": 59.92, "elapsed_time": "16:17:22", "remaining_time": "10:53:49"} +{"current_steps": 4514, "total_steps": 7532, "loss": 0.2574467658996582, "lr": 7.572261296059944e-06, "epoch": 1.1986455981941309, "percentage": 59.93, "elapsed_time": "16:17:36", "remaining_time": "10:53:36"} +{"current_steps": 4515, "total_steps": 7532, "loss": 0.23134055733680725, "lr": 7.568002123736735e-06, "epoch": 1.1989111671756738, "percentage": 59.94, "elapsed_time": "16:17:48", "remaining_time": "10:53:23"} +{"current_steps": 4516, "total_steps": 7532, "loss": 0.22163332998752594, "lr": 7.5637434202740334e-06, "epoch": 1.1991767361572168, "percentage": 59.96, "elapsed_time": "16:18:01", "remaining_time": "10:53:10"} +{"current_steps": 4517, "total_steps": 7532, "loss": 0.2665749788284302, "lr": 7.559485186492868e-06, "epoch": 1.1994423051387597, "percentage": 59.97, "elapsed_time": "16:18:14", "remaining_time": "10:52:57"} +{"current_steps": 4518, "total_steps": 7532, "loss": 0.2237103432416916, "lr": 7.555227423214174e-06, "epoch": 1.1997078741203027, "percentage": 59.98, "elapsed_time": "16:18:27", "remaining_time": "10:52:44"} +{"current_steps": 4519, "total_steps": 7532, "loss": 0.23287461698055267, "lr": 7.550970131258801e-06, "epoch": 1.1999734431018456, "percentage": 60.0, "elapsed_time": "16:18:40", "remaining_time": "10:52:31"} +{"current_steps": 4520, "total_steps": 7532, "loss": 0.2296323925256729, "lr": 7.5467133114475025e-06, "epoch": 1.2002390120833886, "percentage": 60.01, "elapsed_time": "16:18:53", "remaining_time": "10:52:18"} +{"current_steps": 4521, "total_steps": 7532, "loss": 0.21358339488506317, "lr": 7.542456964600944e-06, "epoch": 1.2005045810649315, "percentage": 60.02, "elapsed_time": "16:19:06", "remaining_time": "10:52:05"} +{"current_steps": 4522, "total_steps": 7532, "loss": 0.2355872094631195, "lr": 7.5382010915396954e-06, "epoch": 1.2007701500464747, "percentage": 60.04, "elapsed_time": "16:19:19", "remaining_time": "10:51:52"} +{"current_steps": 4523, "total_steps": 7532, "loss": 0.25397661328315735, "lr": 7.5339456930842455e-06, "epoch": 1.2010357190280176, "percentage": 60.05, "elapsed_time": "16:19:32", "remaining_time": "10:51:39"} +{"current_steps": 4524, "total_steps": 7532, "loss": 0.26658257842063904, "lr": 7.52969077005498e-06, "epoch": 1.2013012880095606, "percentage": 60.06, "elapsed_time": "16:19:44", "remaining_time": "10:51:26"} +{"current_steps": 4525, "total_steps": 7532, "loss": 0.27207136154174805, "lr": 7.525436323272201e-06, "epoch": 1.2015668569911035, "percentage": 60.08, "elapsed_time": "16:19:58", "remaining_time": "10:51:13"} +{"current_steps": 4526, "total_steps": 7532, "loss": 0.25889313220977783, "lr": 7.521182353556114e-06, "epoch": 1.2018324259726465, "percentage": 60.09, "elapsed_time": "16:20:10", "remaining_time": "10:50:59"} +{"current_steps": 4527, "total_steps": 7532, "loss": 0.272185742855072, "lr": 7.516928861726834e-06, "epoch": 1.2020979949541895, "percentage": 60.1, "elapsed_time": "16:20:23", "remaining_time": "10:50:47"} +{"current_steps": 4528, "total_steps": 7532, "loss": 0.25371503829956055, "lr": 7.512675848604385e-06, "epoch": 1.2023635639357324, "percentage": 60.12, "elapsed_time": "16:20:36", "remaining_time": "10:50:33"} +{"current_steps": 4529, "total_steps": 7532, "loss": 0.2554902732372284, "lr": 7.5084233150086964e-06, "epoch": 1.2026291329172754, "percentage": 60.13, "elapsed_time": "16:20:50", "remaining_time": "10:50:21"} +{"current_steps": 4530, "total_steps": 7532, "loss": 0.22007369995117188, "lr": 7.50417126175961e-06, "epoch": 1.2028947018988183, "percentage": 60.14, "elapsed_time": "16:21:02", "remaining_time": "10:50:08"} +{"current_steps": 4531, "total_steps": 7532, "loss": 0.27492445707321167, "lr": 7.499919689676861e-06, "epoch": 1.2031602708803613, "percentage": 60.16, "elapsed_time": "16:21:15", "remaining_time": "10:49:55"} +{"current_steps": 4532, "total_steps": 7532, "loss": 0.2321021854877472, "lr": 7.4956685995801144e-06, "epoch": 1.2034258398619042, "percentage": 60.17, "elapsed_time": "16:21:28", "remaining_time": "10:49:41"} +{"current_steps": 4533, "total_steps": 7532, "loss": 0.25410759449005127, "lr": 7.491417992288927e-06, "epoch": 1.2036914088434472, "percentage": 60.18, "elapsed_time": "16:21:41", "remaining_time": "10:49:29"} +{"current_steps": 4534, "total_steps": 7532, "loss": 0.2080576866865158, "lr": 7.487167868622765e-06, "epoch": 1.2039569778249901, "percentage": 60.2, "elapsed_time": "16:21:54", "remaining_time": "10:49:15"} +{"current_steps": 4535, "total_steps": 7532, "loss": 0.2333327978849411, "lr": 7.482918229401001e-06, "epoch": 1.204222546806533, "percentage": 60.21, "elapsed_time": "16:22:07", "remaining_time": "10:49:03"} +{"current_steps": 4536, "total_steps": 7532, "loss": 0.23160479962825775, "lr": 7.478669075442917e-06, "epoch": 1.204488115788076, "percentage": 60.22, "elapsed_time": "16:22:20", "remaining_time": "10:48:49"} +{"current_steps": 4537, "total_steps": 7532, "loss": 0.2627696394920349, "lr": 7.474420407567699e-06, "epoch": 1.204753684769619, "percentage": 60.24, "elapsed_time": "16:22:33", "remaining_time": "10:48:36"} +{"current_steps": 4538, "total_steps": 7532, "loss": 0.18656940758228302, "lr": 7.470172226594441e-06, "epoch": 1.205019253751162, "percentage": 60.25, "elapsed_time": "16:22:46", "remaining_time": "10:48:23"} +{"current_steps": 4539, "total_steps": 7532, "loss": 0.2749083340167999, "lr": 7.465924533342139e-06, "epoch": 1.2052848227327049, "percentage": 60.26, "elapsed_time": "16:22:59", "remaining_time": "10:48:10"} +{"current_steps": 4540, "total_steps": 7532, "loss": 0.27484387159347534, "lr": 7.461677328629696e-06, "epoch": 1.2055503917142478, "percentage": 60.28, "elapsed_time": "16:23:12", "remaining_time": "10:47:57"} +{"current_steps": 4541, "total_steps": 7532, "loss": 0.26357588171958923, "lr": 7.457430613275934e-06, "epoch": 1.2058159606957908, "percentage": 60.29, "elapsed_time": "16:23:25", "remaining_time": "10:47:44"} +{"current_steps": 4542, "total_steps": 7532, "loss": 0.23495343327522278, "lr": 7.453184388099559e-06, "epoch": 1.2060815296773337, "percentage": 60.3, "elapsed_time": "16:23:38", "remaining_time": "10:47:31"} +{"current_steps": 4543, "total_steps": 7532, "loss": 0.253970205783844, "lr": 7.4489386539192e-06, "epoch": 1.2063470986588767, "percentage": 60.32, "elapsed_time": "16:23:51", "remaining_time": "10:47:18"} +{"current_steps": 4544, "total_steps": 7532, "loss": 0.24919062852859497, "lr": 7.444693411553383e-06, "epoch": 1.2066126676404196, "percentage": 60.33, "elapsed_time": "16:24:04", "remaining_time": "10:47:05"} +{"current_steps": 4545, "total_steps": 7532, "loss": 0.24373450875282288, "lr": 7.440448661820536e-06, "epoch": 1.2068782366219626, "percentage": 60.34, "elapsed_time": "16:24:16", "remaining_time": "10:46:52"} +{"current_steps": 4546, "total_steps": 7532, "loss": 0.24739482998847961, "lr": 7.436204405539002e-06, "epoch": 1.2071438056035055, "percentage": 60.36, "elapsed_time": "16:24:30", "remaining_time": "10:46:39"} +{"current_steps": 4547, "total_steps": 7532, "loss": 0.27041494846343994, "lr": 7.4319606435270195e-06, "epoch": 1.2074093745850485, "percentage": 60.37, "elapsed_time": "16:24:43", "remaining_time": "10:46:26"} +{"current_steps": 4548, "total_steps": 7532, "loss": 0.23243938386440277, "lr": 7.427717376602739e-06, "epoch": 1.2076749435665914, "percentage": 60.38, "elapsed_time": "16:24:56", "remaining_time": "10:46:13"} +{"current_steps": 4549, "total_steps": 7532, "loss": 0.2346343696117401, "lr": 7.423474605584206e-06, "epoch": 1.2079405125481344, "percentage": 60.4, "elapsed_time": "16:25:09", "remaining_time": "10:46:00"} +{"current_steps": 4550, "total_steps": 7532, "loss": 0.2587367296218872, "lr": 7.419232331289385e-06, "epoch": 1.2082060815296773, "percentage": 60.41, "elapsed_time": "16:25:22", "remaining_time": "10:45:47"} +{"current_steps": 4551, "total_steps": 7532, "loss": 0.2552938461303711, "lr": 7.414990554536134e-06, "epoch": 1.2084716505112203, "percentage": 60.42, "elapsed_time": "16:25:34", "remaining_time": "10:45:34"} +{"current_steps": 4552, "total_steps": 7532, "loss": 0.2693648040294647, "lr": 7.410749276142221e-06, "epoch": 1.2087372194927632, "percentage": 60.44, "elapsed_time": "16:25:47", "remaining_time": "10:45:21"} +{"current_steps": 4553, "total_steps": 7532, "loss": 0.21543294191360474, "lr": 7.406508496925307e-06, "epoch": 1.2090027884743062, "percentage": 60.45, "elapsed_time": "16:26:00", "remaining_time": "10:45:08"} +{"current_steps": 4554, "total_steps": 7532, "loss": 0.2913009524345398, "lr": 7.402268217702966e-06, "epoch": 1.2092683574558492, "percentage": 60.46, "elapsed_time": "16:26:13", "remaining_time": "10:44:55"} +{"current_steps": 4555, "total_steps": 7532, "loss": 0.23279520869255066, "lr": 7.398028439292675e-06, "epoch": 1.209533926437392, "percentage": 60.48, "elapsed_time": "16:26:26", "remaining_time": "10:44:42"} +{"current_steps": 4556, "total_steps": 7532, "loss": 0.25086939334869385, "lr": 7.393789162511815e-06, "epoch": 1.209799495418935, "percentage": 60.49, "elapsed_time": "16:26:39", "remaining_time": "10:44:29"} +{"current_steps": 4557, "total_steps": 7532, "loss": 0.21704714000225067, "lr": 7.389550388177662e-06, "epoch": 1.210065064400478, "percentage": 60.5, "elapsed_time": "16:26:52", "remaining_time": "10:44:16"} +{"current_steps": 4558, "total_steps": 7532, "loss": 0.230219304561615, "lr": 7.3853121171074115e-06, "epoch": 1.210330633382021, "percentage": 60.52, "elapsed_time": "16:27:05", "remaining_time": "10:44:03"} +{"current_steps": 4559, "total_steps": 7532, "loss": 0.26073017716407776, "lr": 7.381074350118149e-06, "epoch": 1.210596202363564, "percentage": 60.53, "elapsed_time": "16:27:18", "remaining_time": "10:43:50"} +{"current_steps": 4560, "total_steps": 7532, "loss": 0.25186216831207275, "lr": 7.376837088026863e-06, "epoch": 1.2108617713451069, "percentage": 60.54, "elapsed_time": "16:27:31", "remaining_time": "10:43:37"} +{"current_steps": 4561, "total_steps": 7532, "loss": 0.28719040751457214, "lr": 7.372600331650449e-06, "epoch": 1.2111273403266498, "percentage": 60.55, "elapsed_time": "16:27:44", "remaining_time": "10:43:24"} +{"current_steps": 4562, "total_steps": 7532, "loss": 0.23972755670547485, "lr": 7.368364081805704e-06, "epoch": 1.2113929093081928, "percentage": 60.57, "elapsed_time": "16:27:57", "remaining_time": "10:43:11"} +{"current_steps": 4563, "total_steps": 7532, "loss": 0.23053769767284393, "lr": 7.364128339309326e-06, "epoch": 1.2116584782897357, "percentage": 60.58, "elapsed_time": "16:28:10", "remaining_time": "10:42:58"} +{"current_steps": 4564, "total_steps": 7532, "loss": 0.25124189257621765, "lr": 7.359893104977917e-06, "epoch": 1.2119240472712787, "percentage": 60.59, "elapsed_time": "16:28:23", "remaining_time": "10:42:45"} +{"current_steps": 4565, "total_steps": 7532, "loss": 0.2243686318397522, "lr": 7.355658379627981e-06, "epoch": 1.2121896162528216, "percentage": 60.61, "elapsed_time": "16:28:36", "remaining_time": "10:42:32"} +{"current_steps": 4566, "total_steps": 7532, "loss": 0.26047343015670776, "lr": 7.3514241640759175e-06, "epoch": 1.2124551852343646, "percentage": 60.62, "elapsed_time": "16:28:49", "remaining_time": "10:42:19"} +{"current_steps": 4567, "total_steps": 7532, "loss": 0.23603469133377075, "lr": 7.3471904591380434e-06, "epoch": 1.2127207542159075, "percentage": 60.63, "elapsed_time": "16:29:02", "remaining_time": "10:42:06"} +{"current_steps": 4568, "total_steps": 7532, "loss": 0.31320711970329285, "lr": 7.342957265630561e-06, "epoch": 1.2129863231974505, "percentage": 60.65, "elapsed_time": "16:29:15", "remaining_time": "10:41:53"} +{"current_steps": 4569, "total_steps": 7532, "loss": 0.22159788012504578, "lr": 7.338724584369581e-06, "epoch": 1.2132518921789934, "percentage": 60.66, "elapsed_time": "16:29:28", "remaining_time": "10:41:40"} +{"current_steps": 4570, "total_steps": 7532, "loss": 0.21992239356040955, "lr": 7.334492416171114e-06, "epoch": 1.2135174611605364, "percentage": 60.67, "elapsed_time": "16:29:41", "remaining_time": "10:41:27"} +{"current_steps": 4571, "total_steps": 7532, "loss": 0.20708827674388885, "lr": 7.330260761851071e-06, "epoch": 1.2137830301420793, "percentage": 60.69, "elapsed_time": "16:29:54", "remaining_time": "10:41:14"} +{"current_steps": 4572, "total_steps": 7532, "loss": 0.2846507132053375, "lr": 7.326029622225269e-06, "epoch": 1.2140485991236223, "percentage": 60.7, "elapsed_time": "16:30:07", "remaining_time": "10:41:01"} +{"current_steps": 4573, "total_steps": 7532, "loss": 0.24903801083564758, "lr": 7.321798998109417e-06, "epoch": 1.2143141681051652, "percentage": 60.71, "elapsed_time": "16:30:20", "remaining_time": "10:40:48"} +{"current_steps": 4574, "total_steps": 7532, "loss": 0.23426681756973267, "lr": 7.317568890319134e-06, "epoch": 1.2145797370867082, "percentage": 60.73, "elapsed_time": "16:30:33", "remaining_time": "10:40:35"} +{"current_steps": 4575, "total_steps": 7532, "loss": 0.2374490350484848, "lr": 7.31333929966993e-06, "epoch": 1.2148453060682511, "percentage": 60.74, "elapsed_time": "16:30:46", "remaining_time": "10:40:22"} +{"current_steps": 4576, "total_steps": 7532, "loss": 0.24035832285881042, "lr": 7.309110226977223e-06, "epoch": 1.215110875049794, "percentage": 60.75, "elapsed_time": "16:30:59", "remaining_time": "10:40:09"} +{"current_steps": 4577, "total_steps": 7532, "loss": 0.21872258186340332, "lr": 7.30488167305633e-06, "epoch": 1.215376444031337, "percentage": 60.77, "elapsed_time": "16:31:12", "remaining_time": "10:39:56"} +{"current_steps": 4578, "total_steps": 7532, "loss": 0.2940255403518677, "lr": 7.300653638722463e-06, "epoch": 1.21564201301288, "percentage": 60.78, "elapsed_time": "16:31:25", "remaining_time": "10:39:43"} +{"current_steps": 4579, "total_steps": 7532, "loss": 0.20970892906188965, "lr": 7.29642612479074e-06, "epoch": 1.215907581994423, "percentage": 60.79, "elapsed_time": "16:31:39", "remaining_time": "10:39:30"} +{"current_steps": 4580, "total_steps": 7532, "loss": 0.21217449009418488, "lr": 7.292199132076175e-06, "epoch": 1.216173150975966, "percentage": 60.81, "elapsed_time": "16:31:51", "remaining_time": "10:39:17"} +{"current_steps": 4581, "total_steps": 7532, "loss": 0.2463359832763672, "lr": 7.28797266139368e-06, "epoch": 1.2164387199575089, "percentage": 60.82, "elapsed_time": "16:32:05", "remaining_time": "10:39:05"} +{"current_steps": 4582, "total_steps": 7532, "loss": 0.21921415627002716, "lr": 7.283746713558071e-06, "epoch": 1.2167042889390518, "percentage": 60.83, "elapsed_time": "16:32:18", "remaining_time": "10:38:52"} +{"current_steps": 4583, "total_steps": 7532, "loss": 0.2412380576133728, "lr": 7.279521289384059e-06, "epoch": 1.2169698579205948, "percentage": 60.85, "elapsed_time": "16:32:31", "remaining_time": "10:38:39"} +{"current_steps": 4584, "total_steps": 7532, "loss": 0.2558564245700836, "lr": 7.275296389686258e-06, "epoch": 1.2172354269021377, "percentage": 60.86, "elapsed_time": "16:32:44", "remaining_time": "10:38:26"} +{"current_steps": 4585, "total_steps": 7532, "loss": 0.2548869848251343, "lr": 7.271072015279179e-06, "epoch": 1.2175009958836809, "percentage": 60.87, "elapsed_time": "16:32:57", "remaining_time": "10:38:13"} +{"current_steps": 4586, "total_steps": 7532, "loss": 0.22183407843112946, "lr": 7.2668481669772304e-06, "epoch": 1.2177665648652238, "percentage": 60.89, "elapsed_time": "16:33:10", "remaining_time": "10:38:00"} +{"current_steps": 4587, "total_steps": 7532, "loss": 0.24722473323345184, "lr": 7.262624845594721e-06, "epoch": 1.2180321338467668, "percentage": 60.9, "elapsed_time": "16:33:24", "remaining_time": "10:37:47"} +{"current_steps": 4588, "total_steps": 7532, "loss": 0.2678988575935364, "lr": 7.258402051945858e-06, "epoch": 1.2182977028283097, "percentage": 60.91, "elapsed_time": "16:33:36", "remaining_time": "10:37:34"} +{"current_steps": 4589, "total_steps": 7532, "loss": 0.2116469144821167, "lr": 7.2541797868447435e-06, "epoch": 1.2185632718098527, "percentage": 60.93, "elapsed_time": "16:33:50", "remaining_time": "10:37:21"} +{"current_steps": 4590, "total_steps": 7532, "loss": 0.23897933959960938, "lr": 7.249958051105383e-06, "epoch": 1.2188288407913956, "percentage": 60.94, "elapsed_time": "16:34:02", "remaining_time": "10:37:08"} +{"current_steps": 4591, "total_steps": 7532, "loss": 0.25434061884880066, "lr": 7.245736845541676e-06, "epoch": 1.2190944097729386, "percentage": 60.95, "elapsed_time": "16:34:15", "remaining_time": "10:36:55"} +{"current_steps": 4592, "total_steps": 7532, "loss": 0.2602628469467163, "lr": 7.2415161709674235e-06, "epoch": 1.2193599787544815, "percentage": 60.97, "elapsed_time": "16:34:28", "remaining_time": "10:36:42"} +{"current_steps": 4593, "total_steps": 7532, "loss": 0.2519065737724304, "lr": 7.2372960281963165e-06, "epoch": 1.2196255477360245, "percentage": 60.98, "elapsed_time": "16:34:41", "remaining_time": "10:36:29"} +{"current_steps": 4594, "total_steps": 7532, "loss": 0.24404102563858032, "lr": 7.233076418041954e-06, "epoch": 1.2198911167175674, "percentage": 60.99, "elapsed_time": "16:34:54", "remaining_time": "10:36:16"} +{"current_steps": 4595, "total_steps": 7532, "loss": 0.23633979260921478, "lr": 7.228857341317825e-06, "epoch": 1.2201566856991104, "percentage": 61.01, "elapsed_time": "16:35:06", "remaining_time": "10:36:03"} +{"current_steps": 4596, "total_steps": 7532, "loss": 0.2513781189918518, "lr": 7.224638798837319e-06, "epoch": 1.2204222546806534, "percentage": 61.02, "elapsed_time": "16:35:19", "remaining_time": "10:35:50"} +{"current_steps": 4597, "total_steps": 7532, "loss": 0.23270189762115479, "lr": 7.220420791413721e-06, "epoch": 1.2206878236621963, "percentage": 61.03, "elapsed_time": "16:35:32", "remaining_time": "10:35:36"} +{"current_steps": 4598, "total_steps": 7532, "loss": 0.2770010530948639, "lr": 7.21620331986021e-06, "epoch": 1.2209533926437393, "percentage": 61.05, "elapsed_time": "16:35:46", "remaining_time": "10:35:24"} +{"current_steps": 4599, "total_steps": 7532, "loss": 0.2312745451927185, "lr": 7.2119863849898684e-06, "epoch": 1.2212189616252822, "percentage": 61.06, "elapsed_time": "16:35:58", "remaining_time": "10:35:10"} +{"current_steps": 4600, "total_steps": 7532, "loss": 0.231276735663414, "lr": 7.20776998761567e-06, "epoch": 1.2214845306068252, "percentage": 61.07, "elapsed_time": "16:36:11", "remaining_time": "10:34:58"} +{"current_steps": 4601, "total_steps": 7532, "loss": 0.24927708506584167, "lr": 7.203554128550486e-06, "epoch": 1.221750099588368, "percentage": 61.09, "elapsed_time": "16:36:30", "remaining_time": "10:34:48"} +{"current_steps": 4602, "total_steps": 7532, "loss": 0.23033373057842255, "lr": 7.199338808607084e-06, "epoch": 1.222015668569911, "percentage": 61.1, "elapsed_time": "16:36:43", "remaining_time": "10:34:35"} +{"current_steps": 4603, "total_steps": 7532, "loss": 0.24003425240516663, "lr": 7.195124028598131e-06, "epoch": 1.222281237551454, "percentage": 61.11, "elapsed_time": "16:36:56", "remaining_time": "10:34:22"} +{"current_steps": 4604, "total_steps": 7532, "loss": 0.22648809850215912, "lr": 7.190909789336185e-06, "epoch": 1.222546806532997, "percentage": 61.13, "elapsed_time": "16:37:09", "remaining_time": "10:34:09"} +{"current_steps": 4605, "total_steps": 7532, "loss": 0.2605816125869751, "lr": 7.1866960916337006e-06, "epoch": 1.22281237551454, "percentage": 61.14, "elapsed_time": "16:37:22", "remaining_time": "10:33:56"} +{"current_steps": 4606, "total_steps": 7532, "loss": 0.21549202501773834, "lr": 7.1824829363030305e-06, "epoch": 1.2230779444960829, "percentage": 61.15, "elapsed_time": "16:37:35", "remaining_time": "10:33:43"} +{"current_steps": 4607, "total_steps": 7532, "loss": 0.23113220930099487, "lr": 7.17827032415642e-06, "epoch": 1.2233435134776258, "percentage": 61.17, "elapsed_time": "16:37:47", "remaining_time": "10:33:30"} +{"current_steps": 4608, "total_steps": 7532, "loss": 0.22736643254756927, "lr": 7.174058256006012e-06, "epoch": 1.2236090824591688, "percentage": 61.18, "elapsed_time": "16:38:00", "remaining_time": "10:33:17"} +{"current_steps": 4609, "total_steps": 7532, "loss": 0.2686663866043091, "lr": 7.169846732663845e-06, "epoch": 1.2238746514407117, "percentage": 61.19, "elapsed_time": "16:38:13", "remaining_time": "10:33:04"} +{"current_steps": 4610, "total_steps": 7532, "loss": 0.1980462670326233, "lr": 7.1656357549418485e-06, "epoch": 1.2241402204222547, "percentage": 61.21, "elapsed_time": "16:38:26", "remaining_time": "10:32:51"} +{"current_steps": 4611, "total_steps": 7532, "loss": 0.22997641563415527, "lr": 7.161425323651846e-06, "epoch": 1.2244057894037976, "percentage": 61.22, "elapsed_time": "16:38:39", "remaining_time": "10:32:37"} +{"current_steps": 4612, "total_steps": 7532, "loss": 0.28781357407569885, "lr": 7.157215439605567e-06, "epoch": 1.2246713583853406, "percentage": 61.23, "elapsed_time": "16:38:51", "remaining_time": "10:32:24"} +{"current_steps": 4613, "total_steps": 7532, "loss": 0.22558270394802094, "lr": 7.153006103614624e-06, "epoch": 1.2249369273668835, "percentage": 61.25, "elapsed_time": "16:39:04", "remaining_time": "10:32:11"} +{"current_steps": 4614, "total_steps": 7532, "loss": 0.2435922622680664, "lr": 7.148797316490527e-06, "epoch": 1.2252024963484265, "percentage": 61.26, "elapsed_time": "16:39:17", "remaining_time": "10:31:58"} +{"current_steps": 4615, "total_steps": 7532, "loss": 0.27840936183929443, "lr": 7.14458907904468e-06, "epoch": 1.2254680653299694, "percentage": 61.27, "elapsed_time": "16:39:30", "remaining_time": "10:31:45"} +{"current_steps": 4616, "total_steps": 7532, "loss": 0.2775651812553406, "lr": 7.1403813920883825e-06, "epoch": 1.2257336343115124, "percentage": 61.29, "elapsed_time": "16:39:43", "remaining_time": "10:31:32"} +{"current_steps": 4617, "total_steps": 7532, "loss": 0.2430988848209381, "lr": 7.136174256432828e-06, "epoch": 1.2259992032930553, "percentage": 61.3, "elapsed_time": "16:39:56", "remaining_time": "10:31:19"} +{"current_steps": 4618, "total_steps": 7532, "loss": 0.2018759697675705, "lr": 7.131967672889101e-06, "epoch": 1.2262647722745983, "percentage": 61.31, "elapsed_time": "16:40:08", "remaining_time": "10:31:06"} +{"current_steps": 4619, "total_steps": 7532, "loss": 0.25314825773239136, "lr": 7.127761642268179e-06, "epoch": 1.2265303412561412, "percentage": 61.33, "elapsed_time": "16:40:21", "remaining_time": "10:30:52"} +{"current_steps": 4620, "total_steps": 7532, "loss": 0.2542746365070343, "lr": 7.123556165380935e-06, "epoch": 1.2267959102376842, "percentage": 61.34, "elapsed_time": "16:40:34", "remaining_time": "10:30:39"} +{"current_steps": 4621, "total_steps": 7532, "loss": 0.2912300229072571, "lr": 7.119351243038142e-06, "epoch": 1.2270614792192271, "percentage": 61.35, "elapsed_time": "16:40:46", "remaining_time": "10:30:26"} +{"current_steps": 4622, "total_steps": 7532, "loss": 0.26893284916877747, "lr": 7.115146876050454e-06, "epoch": 1.22732704820077, "percentage": 61.36, "elapsed_time": "16:40:59", "remaining_time": "10:30:13"} +{"current_steps": 4623, "total_steps": 7532, "loss": 0.2711215317249298, "lr": 7.110943065228425e-06, "epoch": 1.227592617182313, "percentage": 61.38, "elapsed_time": "16:41:12", "remaining_time": "10:30:00"} +{"current_steps": 4624, "total_steps": 7532, "loss": 0.25530266761779785, "lr": 7.106739811382501e-06, "epoch": 1.227858186163856, "percentage": 61.39, "elapsed_time": "16:41:25", "remaining_time": "10:29:47"} +{"current_steps": 4625, "total_steps": 7532, "loss": 0.2547178864479065, "lr": 7.102537115323018e-06, "epoch": 1.228123755145399, "percentage": 61.4, "elapsed_time": "16:41:37", "remaining_time": "10:29:33"} +{"current_steps": 4626, "total_steps": 7532, "loss": 0.27973634004592896, "lr": 7.0983349778602064e-06, "epoch": 1.228389324126942, "percentage": 61.42, "elapsed_time": "16:41:50", "remaining_time": "10:29:20"} +{"current_steps": 4627, "total_steps": 7532, "loss": 0.24066339433193207, "lr": 7.0941333998041884e-06, "epoch": 1.2286548931084849, "percentage": 61.43, "elapsed_time": "16:42:03", "remaining_time": "10:29:07"} +{"current_steps": 4628, "total_steps": 7532, "loss": 0.24305742979049683, "lr": 7.0899323819649816e-06, "epoch": 1.2289204620900278, "percentage": 61.44, "elapsed_time": "16:42:16", "remaining_time": "10:28:54"} +{"current_steps": 4629, "total_steps": 7532, "loss": 0.22478783130645752, "lr": 7.085731925152484e-06, "epoch": 1.2291860310715708, "percentage": 61.46, "elapsed_time": "16:42:28", "remaining_time": "10:28:41"} +{"current_steps": 4630, "total_steps": 7532, "loss": 0.24995659291744232, "lr": 7.081532030176506e-06, "epoch": 1.2294516000531137, "percentage": 61.47, "elapsed_time": "16:42:41", "remaining_time": "10:28:28"} +{"current_steps": 4631, "total_steps": 7532, "loss": 0.2579454183578491, "lr": 7.077332697846733e-06, "epoch": 1.2297171690346567, "percentage": 61.48, "elapsed_time": "16:42:54", "remaining_time": "10:28:15"} +{"current_steps": 4632, "total_steps": 7532, "loss": 0.2513299286365509, "lr": 7.073133928972745e-06, "epoch": 1.2299827380161996, "percentage": 61.5, "elapsed_time": "16:43:06", "remaining_time": "10:28:01"} +{"current_steps": 4633, "total_steps": 7532, "loss": 0.23344315588474274, "lr": 7.068935724364016e-06, "epoch": 1.2302483069977426, "percentage": 61.51, "elapsed_time": "16:43:20", "remaining_time": "10:27:48"} +{"current_steps": 4634, "total_steps": 7532, "loss": 0.26750341057777405, "lr": 7.064738084829912e-06, "epoch": 1.2305138759792857, "percentage": 61.52, "elapsed_time": "16:43:32", "remaining_time": "10:27:35"} +{"current_steps": 4635, "total_steps": 7532, "loss": 0.22424373030662537, "lr": 7.0605410111796855e-06, "epoch": 1.2307794449608287, "percentage": 61.54, "elapsed_time": "16:43:45", "remaining_time": "10:27:22"} +{"current_steps": 4636, "total_steps": 7532, "loss": 0.24261844158172607, "lr": 7.056344504222485e-06, "epoch": 1.2310450139423716, "percentage": 61.55, "elapsed_time": "16:43:58", "remaining_time": "10:27:09"} +{"current_steps": 4637, "total_steps": 7532, "loss": 0.22273704409599304, "lr": 7.052148564767347e-06, "epoch": 1.2313105829239146, "percentage": 61.56, "elapsed_time": "16:44:11", "remaining_time": "10:26:56"} +{"current_steps": 4638, "total_steps": 7532, "loss": 0.23726603388786316, "lr": 7.047953193623195e-06, "epoch": 1.2315761519054576, "percentage": 61.58, "elapsed_time": "16:44:23", "remaining_time": "10:26:43"} +{"current_steps": 4639, "total_steps": 7532, "loss": 0.2612340748310089, "lr": 7.043758391598856e-06, "epoch": 1.2318417208870005, "percentage": 61.59, "elapsed_time": "16:44:36", "remaining_time": "10:26:30"} +{"current_steps": 4640, "total_steps": 7532, "loss": 0.25722867250442505, "lr": 7.039564159503034e-06, "epoch": 1.2321072898685435, "percentage": 61.6, "elapsed_time": "16:44:49", "remaining_time": "10:26:16"} +{"current_steps": 4641, "total_steps": 7532, "loss": 0.25940731167793274, "lr": 7.035370498144325e-06, "epoch": 1.2323728588500864, "percentage": 61.62, "elapsed_time": "16:45:02", "remaining_time": "10:26:03"} +{"current_steps": 4642, "total_steps": 7532, "loss": 0.2328685224056244, "lr": 7.03117740833122e-06, "epoch": 1.2326384278316294, "percentage": 61.63, "elapsed_time": "16:45:15", "remaining_time": "10:25:50"} +{"current_steps": 4643, "total_steps": 7532, "loss": 0.3019352853298187, "lr": 7.0269848908720965e-06, "epoch": 1.2329039968131723, "percentage": 61.64, "elapsed_time": "16:45:28", "remaining_time": "10:25:37"} +{"current_steps": 4644, "total_steps": 7532, "loss": 0.2665002942085266, "lr": 7.022792946575222e-06, "epoch": 1.2331695657947153, "percentage": 61.66, "elapsed_time": "16:45:40", "remaining_time": "10:25:24"} +{"current_steps": 4645, "total_steps": 7532, "loss": 0.2425101399421692, "lr": 7.018601576248755e-06, "epoch": 1.2334351347762582, "percentage": 61.67, "elapsed_time": "16:45:53", "remaining_time": "10:25:11"} +{"current_steps": 4646, "total_steps": 7532, "loss": 0.23319771885871887, "lr": 7.014410780700743e-06, "epoch": 1.2337007037578012, "percentage": 61.68, "elapsed_time": "16:46:05", "remaining_time": "10:24:58"} +{"current_steps": 4647, "total_steps": 7532, "loss": 0.23033195734024048, "lr": 7.010220560739116e-06, "epoch": 1.2339662727393441, "percentage": 61.7, "elapsed_time": "16:46:18", "remaining_time": "10:24:44"} +{"current_steps": 4648, "total_steps": 7532, "loss": 0.24682006239891052, "lr": 7.006030917171707e-06, "epoch": 1.234231841720887, "percentage": 61.71, "elapsed_time": "16:46:31", "remaining_time": "10:24:32"} +{"current_steps": 4649, "total_steps": 7532, "loss": 0.25566285848617554, "lr": 7.001841850806228e-06, "epoch": 1.23449741070243, "percentage": 61.72, "elapsed_time": "16:46:44", "remaining_time": "10:24:18"} +{"current_steps": 4650, "total_steps": 7532, "loss": 0.2791779339313507, "lr": 6.9976533624502784e-06, "epoch": 1.234762979683973, "percentage": 61.74, "elapsed_time": "16:46:57", "remaining_time": "10:24:05"} +{"current_steps": 4651, "total_steps": 7532, "loss": 0.25597846508026123, "lr": 6.993465452911352e-06, "epoch": 1.235028548665516, "percentage": 61.75, "elapsed_time": "16:47:09", "remaining_time": "10:23:52"} +{"current_steps": 4652, "total_steps": 7532, "loss": 0.24034728109836578, "lr": 6.9892781229968275e-06, "epoch": 1.2352941176470589, "percentage": 61.76, "elapsed_time": "16:47:22", "remaining_time": "10:23:39"} +{"current_steps": 4653, "total_steps": 7532, "loss": 0.2209509015083313, "lr": 6.985091373513972e-06, "epoch": 1.2355596866286018, "percentage": 61.78, "elapsed_time": "16:47:35", "remaining_time": "10:23:26"} +{"current_steps": 4654, "total_steps": 7532, "loss": 0.29106947779655457, "lr": 6.980905205269942e-06, "epoch": 1.2358252556101448, "percentage": 61.79, "elapsed_time": "16:47:48", "remaining_time": "10:23:13"} +{"current_steps": 4655, "total_steps": 7532, "loss": 0.24014753103256226, "lr": 6.976719619071782e-06, "epoch": 1.2360908245916877, "percentage": 61.8, "elapsed_time": "16:48:00", "remaining_time": "10:22:59"} +{"current_steps": 4656, "total_steps": 7532, "loss": 0.27135470509529114, "lr": 6.972534615726422e-06, "epoch": 1.2363563935732307, "percentage": 61.82, "elapsed_time": "16:48:14", "remaining_time": "10:22:47"} +{"current_steps": 4657, "total_steps": 7532, "loss": 0.23386257886886597, "lr": 6.968350196040683e-06, "epoch": 1.2366219625547736, "percentage": 61.83, "elapsed_time": "16:48:26", "remaining_time": "10:22:33"} +{"current_steps": 4658, "total_steps": 7532, "loss": 0.23119661211967468, "lr": 6.964166360821271e-06, "epoch": 1.2368875315363166, "percentage": 61.84, "elapsed_time": "16:48:39", "remaining_time": "10:22:20"} +{"current_steps": 4659, "total_steps": 7532, "loss": 0.2399922013282776, "lr": 6.959983110874782e-06, "epoch": 1.2371531005178595, "percentage": 61.86, "elapsed_time": "16:48:52", "remaining_time": "10:22:07"} +{"current_steps": 4660, "total_steps": 7532, "loss": 0.18323534727096558, "lr": 6.9558004470076944e-06, "epoch": 1.2374186694994025, "percentage": 61.87, "elapsed_time": "16:49:04", "remaining_time": "10:21:54"} +{"current_steps": 4661, "total_steps": 7532, "loss": 0.25683268904685974, "lr": 6.951618370026378e-06, "epoch": 1.2376842384809454, "percentage": 61.88, "elapsed_time": "16:49:17", "remaining_time": "10:21:41"} +{"current_steps": 4662, "total_steps": 7532, "loss": 0.2861499786376953, "lr": 6.947436880737089e-06, "epoch": 1.2379498074624884, "percentage": 61.9, "elapsed_time": "16:49:30", "remaining_time": "10:21:28"} +{"current_steps": 4663, "total_steps": 7532, "loss": 0.28021398186683655, "lr": 6.943255979945965e-06, "epoch": 1.2382153764440313, "percentage": 61.91, "elapsed_time": "16:49:43", "remaining_time": "10:21:15"} +{"current_steps": 4664, "total_steps": 7532, "loss": 0.2739776074886322, "lr": 6.939075668459039e-06, "epoch": 1.2384809454255743, "percentage": 61.92, "elapsed_time": "16:49:56", "remaining_time": "10:21:01"} +{"current_steps": 4665, "total_steps": 7532, "loss": 0.26015231013298035, "lr": 6.934895947082221e-06, "epoch": 1.2387465144071172, "percentage": 61.94, "elapsed_time": "16:50:09", "remaining_time": "10:20:48"} +{"current_steps": 4666, "total_steps": 7532, "loss": 0.2572113871574402, "lr": 6.930716816621317e-06, "epoch": 1.2390120833886602, "percentage": 61.95, "elapsed_time": "16:50:21", "remaining_time": "10:20:35"} +{"current_steps": 4667, "total_steps": 7532, "loss": 0.24094708263874054, "lr": 6.926538277882012e-06, "epoch": 1.2392776523702032, "percentage": 61.96, "elapsed_time": "16:50:34", "remaining_time": "10:20:22"} +{"current_steps": 4668, "total_steps": 7532, "loss": 0.22803835570812225, "lr": 6.92236033166988e-06, "epoch": 1.239543221351746, "percentage": 61.98, "elapsed_time": "16:50:47", "remaining_time": "10:20:09"} +{"current_steps": 4669, "total_steps": 7532, "loss": 0.23672322928905487, "lr": 6.9181829787903774e-06, "epoch": 1.239808790333289, "percentage": 61.99, "elapsed_time": "16:51:00", "remaining_time": "10:19:56"} +{"current_steps": 4670, "total_steps": 7532, "loss": 0.2568579912185669, "lr": 6.91400622004885e-06, "epoch": 1.240074359314832, "percentage": 62.0, "elapsed_time": "16:51:12", "remaining_time": "10:19:43"} +{"current_steps": 4671, "total_steps": 7532, "loss": 0.25267845392227173, "lr": 6.909830056250527e-06, "epoch": 1.240339928296375, "percentage": 62.02, "elapsed_time": "16:51:26", "remaining_time": "10:19:30"} +{"current_steps": 4672, "total_steps": 7532, "loss": 0.30336999893188477, "lr": 6.905654488200524e-06, "epoch": 1.240605497277918, "percentage": 62.03, "elapsed_time": "16:51:38", "remaining_time": "10:19:17"} +{"current_steps": 4673, "total_steps": 7532, "loss": 0.2741299867630005, "lr": 6.901479516703842e-06, "epoch": 1.2408710662594609, "percentage": 62.04, "elapsed_time": "16:51:51", "remaining_time": "10:19:04"} +{"current_steps": 4674, "total_steps": 7532, "loss": 0.2896823585033417, "lr": 6.897305142565363e-06, "epoch": 1.2411366352410038, "percentage": 62.06, "elapsed_time": "16:52:04", "remaining_time": "10:18:51"} +{"current_steps": 4675, "total_steps": 7532, "loss": 0.23102329671382904, "lr": 6.8931313665898625e-06, "epoch": 1.2414022042225468, "percentage": 62.07, "elapsed_time": "16:52:17", "remaining_time": "10:18:38"} +{"current_steps": 4676, "total_steps": 7532, "loss": 0.2600775361061096, "lr": 6.8889581895819915e-06, "epoch": 1.2416677732040897, "percentage": 62.08, "elapsed_time": "16:52:30", "remaining_time": "10:18:24"} +{"current_steps": 4677, "total_steps": 7532, "loss": 0.23589132726192474, "lr": 6.884785612346291e-06, "epoch": 1.2419333421856327, "percentage": 62.1, "elapsed_time": "16:52:43", "remaining_time": "10:18:11"} +{"current_steps": 4678, "total_steps": 7532, "loss": 0.24419361352920532, "lr": 6.880613635687184e-06, "epoch": 1.2421989111671756, "percentage": 62.11, "elapsed_time": "16:52:55", "remaining_time": "10:17:58"} +{"current_steps": 4679, "total_steps": 7532, "loss": 0.23267227411270142, "lr": 6.876442260408977e-06, "epoch": 1.2424644801487186, "percentage": 62.12, "elapsed_time": "16:53:08", "remaining_time": "10:17:45"} +{"current_steps": 4680, "total_steps": 7532, "loss": 0.2507064938545227, "lr": 6.8722714873158635e-06, "epoch": 1.2427300491302615, "percentage": 62.13, "elapsed_time": "16:53:21", "remaining_time": "10:17:32"} +{"current_steps": 4681, "total_steps": 7532, "loss": 0.2529929280281067, "lr": 6.868101317211922e-06, "epoch": 1.2429956181118045, "percentage": 62.15, "elapsed_time": "16:53:33", "remaining_time": "10:17:19"} +{"current_steps": 4682, "total_steps": 7532, "loss": 0.23255379498004913, "lr": 6.863931750901107e-06, "epoch": 1.2432611870933474, "percentage": 62.16, "elapsed_time": "16:53:47", "remaining_time": "10:17:06"} +{"current_steps": 4683, "total_steps": 7532, "loss": 0.22757332026958466, "lr": 6.859762789187259e-06, "epoch": 1.2435267560748904, "percentage": 62.17, "elapsed_time": "16:53:59", "remaining_time": "10:16:52"} +{"current_steps": 4684, "total_steps": 7532, "loss": 0.2578364312648773, "lr": 6.8555944328741145e-06, "epoch": 1.2437923250564333, "percentage": 62.19, "elapsed_time": "16:54:12", "remaining_time": "10:16:39"} +{"current_steps": 4685, "total_steps": 7532, "loss": 0.27568408846855164, "lr": 6.851426682765278e-06, "epoch": 1.2440578940379763, "percentage": 62.2, "elapsed_time": "16:54:25", "remaining_time": "10:16:26"} +{"current_steps": 4686, "total_steps": 7532, "loss": 0.25595831871032715, "lr": 6.847259539664244e-06, "epoch": 1.2443234630195192, "percentage": 62.21, "elapsed_time": "16:54:38", "remaining_time": "10:16:13"} +{"current_steps": 4687, "total_steps": 7532, "loss": 0.2195426970720291, "lr": 6.843093004374386e-06, "epoch": 1.2445890320010622, "percentage": 62.23, "elapsed_time": "16:54:50", "remaining_time": "10:16:00"} +{"current_steps": 4688, "total_steps": 7532, "loss": 0.23247741162776947, "lr": 6.838927077698967e-06, "epoch": 1.2448546009826051, "percentage": 62.24, "elapsed_time": "16:55:03", "remaining_time": "10:15:47"} +{"current_steps": 4689, "total_steps": 7532, "loss": 0.26149916648864746, "lr": 6.834761760441127e-06, "epoch": 1.245120169964148, "percentage": 62.25, "elapsed_time": "16:55:15", "remaining_time": "10:15:34"} +{"current_steps": 4690, "total_steps": 7532, "loss": 0.2521447241306305, "lr": 6.830597053403885e-06, "epoch": 1.245385738945691, "percentage": 62.27, "elapsed_time": "16:55:28", "remaining_time": "10:15:20"} +{"current_steps": 4691, "total_steps": 7532, "loss": 0.23401981592178345, "lr": 6.826432957390155e-06, "epoch": 1.245651307927234, "percentage": 62.28, "elapsed_time": "16:55:41", "remaining_time": "10:15:07"} +{"current_steps": 4692, "total_steps": 7532, "loss": 0.25341230630874634, "lr": 6.822269473202714e-06, "epoch": 1.245916876908777, "percentage": 62.29, "elapsed_time": "16:55:54", "remaining_time": "10:14:54"} +{"current_steps": 4693, "total_steps": 7532, "loss": 0.2513907551765442, "lr": 6.818106601644248e-06, "epoch": 1.24618244589032, "percentage": 62.31, "elapsed_time": "16:56:07", "remaining_time": "10:14:41"} +{"current_steps": 4694, "total_steps": 7532, "loss": 0.2682073414325714, "lr": 6.8139443435173005e-06, "epoch": 1.2464480148718629, "percentage": 62.32, "elapsed_time": "16:56:19", "remaining_time": "10:14:28"} +{"current_steps": 4695, "total_steps": 7532, "loss": 0.22726872563362122, "lr": 6.809782699624308e-06, "epoch": 1.2467135838534058, "percentage": 62.33, "elapsed_time": "16:56:33", "remaining_time": "10:14:15"} +{"current_steps": 4696, "total_steps": 7532, "loss": 0.24184030294418335, "lr": 6.805621670767588e-06, "epoch": 1.2469791528349488, "percentage": 62.35, "elapsed_time": "16:56:45", "remaining_time": "10:14:02"} +{"current_steps": 4697, "total_steps": 7532, "loss": 0.203639417886734, "lr": 6.801461257749334e-06, "epoch": 1.247244721816492, "percentage": 62.36, "elapsed_time": "16:56:58", "remaining_time": "10:13:49"} +{"current_steps": 4698, "total_steps": 7532, "loss": 0.2170606106519699, "lr": 6.797301461371626e-06, "epoch": 1.2475102907980349, "percentage": 62.37, "elapsed_time": "16:57:11", "remaining_time": "10:13:36"} +{"current_steps": 4699, "total_steps": 7532, "loss": 0.2225056290626526, "lr": 6.7931422824364245e-06, "epoch": 1.2477758597795778, "percentage": 62.39, "elapsed_time": "16:57:24", "remaining_time": "10:13:23"} +{"current_steps": 4700, "total_steps": 7532, "loss": 0.2388974130153656, "lr": 6.788983721745569e-06, "epoch": 1.2480414287611208, "percentage": 62.4, "elapsed_time": "16:57:36", "remaining_time": "10:13:10"} +{"current_steps": 4701, "total_steps": 7532, "loss": 0.2291644811630249, "lr": 6.784825780100776e-06, "epoch": 1.2483069977426637, "percentage": 62.41, "elapsed_time": "16:57:54", "remaining_time": "10:12:59"} +{"current_steps": 4702, "total_steps": 7532, "loss": 0.23793739080429077, "lr": 6.7806684583036595e-06, "epoch": 1.2485725667242067, "percentage": 62.43, "elapsed_time": "16:58:07", "remaining_time": "10:12:46"} +{"current_steps": 4703, "total_steps": 7532, "loss": 0.2756902277469635, "lr": 6.776511757155695e-06, "epoch": 1.2488381357057496, "percentage": 62.44, "elapsed_time": "16:58:19", "remaining_time": "10:12:33"} +{"current_steps": 4704, "total_steps": 7532, "loss": 0.25046268105506897, "lr": 6.772355677458249e-06, "epoch": 1.2491037046872926, "percentage": 62.45, "elapsed_time": "16:58:32", "remaining_time": "10:12:20"} +{"current_steps": 4705, "total_steps": 7532, "loss": 0.238486647605896, "lr": 6.7682002200125575e-06, "epoch": 1.2493692736688355, "percentage": 62.47, "elapsed_time": "16:58:45", "remaining_time": "10:12:07"} +{"current_steps": 4706, "total_steps": 7532, "loss": 0.2366628348827362, "lr": 6.764045385619751e-06, "epoch": 1.2496348426503785, "percentage": 62.48, "elapsed_time": "16:58:58", "remaining_time": "10:11:54"} +{"current_steps": 4707, "total_steps": 7532, "loss": 0.24825221300125122, "lr": 6.759891175080827e-06, "epoch": 1.2499004116319214, "percentage": 62.49, "elapsed_time": "16:59:11", "remaining_time": "10:11:41"} +{"current_steps": 4708, "total_steps": 7532, "loss": 0.2304186224937439, "lr": 6.755737589196673e-06, "epoch": 1.2501659806134644, "percentage": 62.51, "elapsed_time": "16:59:24", "remaining_time": "10:11:28"} +{"current_steps": 4709, "total_steps": 7532, "loss": 0.2824471592903137, "lr": 6.7515846287680476e-06, "epoch": 1.2504315495950074, "percentage": 62.52, "elapsed_time": "16:59:37", "remaining_time": "10:11:15"} +{"current_steps": 4710, "total_steps": 7532, "loss": 0.23130697011947632, "lr": 6.747432294595591e-06, "epoch": 1.2506971185765503, "percentage": 62.53, "elapsed_time": "16:59:50", "remaining_time": "10:11:02"} +{"current_steps": 4711, "total_steps": 7532, "loss": 0.28371602296829224, "lr": 6.7432805874798334e-06, "epoch": 1.2509626875580933, "percentage": 62.55, "elapsed_time": "17:00:03", "remaining_time": "10:10:49"} +{"current_steps": 4712, "total_steps": 7532, "loss": 0.23452092707157135, "lr": 6.739129508221167e-06, "epoch": 1.2512282565396362, "percentage": 62.56, "elapsed_time": "17:00:16", "remaining_time": "10:10:36"} +{"current_steps": 4713, "total_steps": 7532, "loss": 0.22486859560012817, "lr": 6.734979057619873e-06, "epoch": 1.2514938255211792, "percentage": 62.57, "elapsed_time": "17:00:30", "remaining_time": "10:10:23"} +{"current_steps": 4714, "total_steps": 7532, "loss": 0.2818532884120941, "lr": 6.730829236476111e-06, "epoch": 1.2517593945027221, "percentage": 62.59, "elapsed_time": "17:00:43", "remaining_time": "10:10:10"} +{"current_steps": 4715, "total_steps": 7532, "loss": 0.2060810923576355, "lr": 6.7266800455899125e-06, "epoch": 1.252024963484265, "percentage": 62.6, "elapsed_time": "17:00:56", "remaining_time": "10:09:57"} +{"current_steps": 4716, "total_steps": 7532, "loss": 0.2183244377374649, "lr": 6.722531485761199e-06, "epoch": 1.252290532465808, "percentage": 62.61, "elapsed_time": "17:01:09", "remaining_time": "10:09:44"} +{"current_steps": 4717, "total_steps": 7532, "loss": 0.24757327139377594, "lr": 6.71838355778976e-06, "epoch": 1.252556101447351, "percentage": 62.63, "elapsed_time": "17:01:22", "remaining_time": "10:09:31"} +{"current_steps": 4718, "total_steps": 7532, "loss": 0.3058333396911621, "lr": 6.714236262475268e-06, "epoch": 1.252821670428894, "percentage": 62.64, "elapsed_time": "17:01:35", "remaining_time": "10:09:18"} +{"current_steps": 4719, "total_steps": 7532, "loss": 0.24095620214939117, "lr": 6.71008960061727e-06, "epoch": 1.2530872394104369, "percentage": 62.65, "elapsed_time": "17:01:48", "remaining_time": "10:09:06"} +{"current_steps": 4720, "total_steps": 7532, "loss": 0.25614839792251587, "lr": 6.705943573015199e-06, "epoch": 1.2533528083919798, "percentage": 62.67, "elapsed_time": "17:02:01", "remaining_time": "10:08:52"} +{"current_steps": 4721, "total_steps": 7532, "loss": 0.22295254468917847, "lr": 6.701798180468356e-06, "epoch": 1.2536183773735228, "percentage": 62.68, "elapsed_time": "17:02:14", "remaining_time": "10:08:40"} +{"current_steps": 4722, "total_steps": 7532, "loss": 0.24783796072006226, "lr": 6.697653423775926e-06, "epoch": 1.2538839463550657, "percentage": 62.69, "elapsed_time": "17:02:28", "remaining_time": "10:08:27"} +{"current_steps": 4723, "total_steps": 7532, "loss": 0.19702200591564178, "lr": 6.693509303736969e-06, "epoch": 1.2541495153366087, "percentage": 62.71, "elapsed_time": "17:02:41", "remaining_time": "10:08:14"} +{"current_steps": 4724, "total_steps": 7532, "loss": 0.2539074122905731, "lr": 6.689365821150421e-06, "epoch": 1.2544150843181516, "percentage": 62.72, "elapsed_time": "17:02:54", "remaining_time": "10:08:01"} +{"current_steps": 4725, "total_steps": 7532, "loss": 0.2480372041463852, "lr": 6.6852229768150976e-06, "epoch": 1.2546806532996946, "percentage": 62.73, "elapsed_time": "17:03:07", "remaining_time": "10:07:48"} +{"current_steps": 4726, "total_steps": 7532, "loss": 0.2231048047542572, "lr": 6.68108077152969e-06, "epoch": 1.2549462222812375, "percentage": 62.75, "elapsed_time": "17:03:20", "remaining_time": "10:07:35"} +{"current_steps": 4727, "total_steps": 7532, "loss": 0.260783851146698, "lr": 6.676939206092766e-06, "epoch": 1.2552117912627805, "percentage": 62.76, "elapsed_time": "17:03:33", "remaining_time": "10:07:22"} +{"current_steps": 4728, "total_steps": 7532, "loss": 0.24069254100322723, "lr": 6.67279828130277e-06, "epoch": 1.2554773602443234, "percentage": 62.77, "elapsed_time": "17:03:46", "remaining_time": "10:07:09"} +{"current_steps": 4729, "total_steps": 7532, "loss": 0.2578867971897125, "lr": 6.668657997958027e-06, "epoch": 1.2557429292258664, "percentage": 62.79, "elapsed_time": "17:03:59", "remaining_time": "10:06:56"} +{"current_steps": 4730, "total_steps": 7532, "loss": 0.20724457502365112, "lr": 6.664518356856732e-06, "epoch": 1.2560084982074093, "percentage": 62.8, "elapsed_time": "17:04:12", "remaining_time": "10:06:43"} +{"current_steps": 4731, "total_steps": 7532, "loss": 0.23107580840587616, "lr": 6.6603793587969586e-06, "epoch": 1.2562740671889523, "percentage": 62.81, "elapsed_time": "17:04:25", "remaining_time": "10:06:30"} +{"current_steps": 4732, "total_steps": 7532, "loss": 0.2481832504272461, "lr": 6.656241004576659e-06, "epoch": 1.2565396361704952, "percentage": 62.83, "elapsed_time": "17:04:38", "remaining_time": "10:06:17"} +{"current_steps": 4733, "total_steps": 7532, "loss": 0.2219698578119278, "lr": 6.652103294993657e-06, "epoch": 1.2568052051520382, "percentage": 62.84, "elapsed_time": "17:04:51", "remaining_time": "10:06:05"} +{"current_steps": 4734, "total_steps": 7532, "loss": 0.2245863974094391, "lr": 6.647966230845655e-06, "epoch": 1.2570707741335811, "percentage": 62.85, "elapsed_time": "17:05:04", "remaining_time": "10:05:51"} +{"current_steps": 4735, "total_steps": 7532, "loss": 0.2086387574672699, "lr": 6.643829812930231e-06, "epoch": 1.257336343115124, "percentage": 62.87, "elapsed_time": "17:05:17", "remaining_time": "10:05:38"} +{"current_steps": 4736, "total_steps": 7532, "loss": 0.23484499752521515, "lr": 6.6396940420448355e-06, "epoch": 1.257601912096667, "percentage": 62.88, "elapsed_time": "17:05:30", "remaining_time": "10:05:25"} +{"current_steps": 4737, "total_steps": 7532, "loss": 0.22011062502861023, "lr": 6.635558918986797e-06, "epoch": 1.25786748107821, "percentage": 62.89, "elapsed_time": "17:05:43", "remaining_time": "10:05:13"} +{"current_steps": 4738, "total_steps": 7532, "loss": 0.2426830381155014, "lr": 6.631424444553319e-06, "epoch": 1.258133050059753, "percentage": 62.9, "elapsed_time": "17:05:57", "remaining_time": "10:05:00"} +{"current_steps": 4739, "total_steps": 7532, "loss": 0.2702174484729767, "lr": 6.627290619541481e-06, "epoch": 1.258398619041296, "percentage": 62.92, "elapsed_time": "17:06:10", "remaining_time": "10:04:47"} +{"current_steps": 4740, "total_steps": 7532, "loss": 0.26594820618629456, "lr": 6.623157444748234e-06, "epoch": 1.2586641880228389, "percentage": 62.93, "elapsed_time": "17:06:23", "remaining_time": "10:04:34"} +{"current_steps": 4741, "total_steps": 7532, "loss": 0.2546013593673706, "lr": 6.619024920970405e-06, "epoch": 1.2589297570043818, "percentage": 62.94, "elapsed_time": "17:06:36", "remaining_time": "10:04:21"} +{"current_steps": 4742, "total_steps": 7532, "loss": 0.27207985520362854, "lr": 6.614893049004696e-06, "epoch": 1.2591953259859248, "percentage": 62.96, "elapsed_time": "17:06:49", "remaining_time": "10:04:08"} +{"current_steps": 4743, "total_steps": 7532, "loss": 0.2640937566757202, "lr": 6.610761829647685e-06, "epoch": 1.259460894967468, "percentage": 62.97, "elapsed_time": "17:07:02", "remaining_time": "10:03:55"} +{"current_steps": 4744, "total_steps": 7532, "loss": 0.2890278697013855, "lr": 6.60663126369582e-06, "epoch": 1.2597264639490109, "percentage": 62.98, "elapsed_time": "17:07:15", "remaining_time": "10:03:42"} +{"current_steps": 4745, "total_steps": 7532, "loss": 0.24610492587089539, "lr": 6.602501351945425e-06, "epoch": 1.2599920329305538, "percentage": 63.0, "elapsed_time": "17:07:28", "remaining_time": "10:03:29"} +{"current_steps": 4746, "total_steps": 7532, "loss": 0.24946746230125427, "lr": 6.598372095192699e-06, "epoch": 1.2602576019120968, "percentage": 63.01, "elapsed_time": "17:07:41", "remaining_time": "10:03:16"} +{"current_steps": 4747, "total_steps": 7532, "loss": 0.2369944453239441, "lr": 6.594243494233717e-06, "epoch": 1.2605231708936397, "percentage": 63.02, "elapsed_time": "17:07:54", "remaining_time": "10:03:03"} +{"current_steps": 4748, "total_steps": 7532, "loss": 0.20980143547058105, "lr": 6.590115549864421e-06, "epoch": 1.2607887398751827, "percentage": 63.04, "elapsed_time": "17:08:07", "remaining_time": "10:02:50"} +{"current_steps": 4749, "total_steps": 7532, "loss": 0.22930344939231873, "lr": 6.5859882628806315e-06, "epoch": 1.2610543088567256, "percentage": 63.05, "elapsed_time": "17:08:20", "remaining_time": "10:02:37"} +{"current_steps": 4750, "total_steps": 7532, "loss": 0.22352416813373566, "lr": 6.5818616340780405e-06, "epoch": 1.2613198778382686, "percentage": 63.06, "elapsed_time": "17:08:33", "remaining_time": "10:02:24"} +{"current_steps": 4751, "total_steps": 7532, "loss": 0.2049327939748764, "lr": 6.577735664252214e-06, "epoch": 1.2615854468198116, "percentage": 63.08, "elapsed_time": "17:08:46", "remaining_time": "10:02:11"} +{"current_steps": 4752, "total_steps": 7532, "loss": 0.21858355402946472, "lr": 6.573610354198587e-06, "epoch": 1.2618510158013545, "percentage": 63.09, "elapsed_time": "17:08:59", "remaining_time": "10:01:58"} +{"current_steps": 4753, "total_steps": 7532, "loss": 0.225118950009346, "lr": 6.5694857047124786e-06, "epoch": 1.2621165847828975, "percentage": 63.1, "elapsed_time": "17:09:13", "remaining_time": "10:01:45"} +{"current_steps": 4754, "total_steps": 7532, "loss": 0.25780409574508667, "lr": 6.565361716589063e-06, "epoch": 1.2623821537644404, "percentage": 63.12, "elapsed_time": "17:09:25", "remaining_time": "10:01:32"} +{"current_steps": 4755, "total_steps": 7532, "loss": 0.23507939279079437, "lr": 6.5612383906233964e-06, "epoch": 1.2626477227459834, "percentage": 63.13, "elapsed_time": "17:09:39", "remaining_time": "10:01:20"} +{"current_steps": 4756, "total_steps": 7532, "loss": 0.27884477376937866, "lr": 6.557115727610417e-06, "epoch": 1.2629132917275263, "percentage": 63.14, "elapsed_time": "17:09:52", "remaining_time": "10:01:07"} +{"current_steps": 4757, "total_steps": 7532, "loss": 0.2564120888710022, "lr": 6.552993728344921e-06, "epoch": 1.2631788607090693, "percentage": 63.16, "elapsed_time": "17:10:04", "remaining_time": "10:00:53"} +{"current_steps": 4758, "total_steps": 7532, "loss": 0.259651243686676, "lr": 6.548872393621578e-06, "epoch": 1.2634444296906122, "percentage": 63.17, "elapsed_time": "17:10:18", "remaining_time": "10:00:41"} +{"current_steps": 4759, "total_steps": 7532, "loss": 0.23473814129829407, "lr": 6.544751724234937e-06, "epoch": 1.2637099986721552, "percentage": 63.18, "elapsed_time": "17:10:31", "remaining_time": "10:00:28"} +{"current_steps": 4760, "total_steps": 7532, "loss": 0.2447129189968109, "lr": 6.540631720979411e-06, "epoch": 1.2639755676536981, "percentage": 63.2, "elapsed_time": "17:10:44", "remaining_time": "10:00:15"} +{"current_steps": 4761, "total_steps": 7532, "loss": 0.22695237398147583, "lr": 6.536512384649294e-06, "epoch": 1.264241136635241, "percentage": 63.21, "elapsed_time": "17:10:57", "remaining_time": "10:00:02"} +{"current_steps": 4762, "total_steps": 7532, "loss": 0.24303656816482544, "lr": 6.532393716038738e-06, "epoch": 1.264506705616784, "percentage": 63.22, "elapsed_time": "17:11:10", "remaining_time": "9:59:49"} +{"current_steps": 4763, "total_steps": 7532, "loss": 0.23911908268928528, "lr": 6.528275715941776e-06, "epoch": 1.264772274598327, "percentage": 63.24, "elapsed_time": "17:11:23", "remaining_time": "9:59:36"} +{"current_steps": 4764, "total_steps": 7532, "loss": 0.19766747951507568, "lr": 6.524158385152309e-06, "epoch": 1.26503784357987, "percentage": 63.25, "elapsed_time": "17:11:36", "remaining_time": "9:59:23"} +{"current_steps": 4765, "total_steps": 7532, "loss": 0.24074134230613708, "lr": 6.520041724464114e-06, "epoch": 1.2653034125614129, "percentage": 63.26, "elapsed_time": "17:11:49", "remaining_time": "9:59:10"} +{"current_steps": 4766, "total_steps": 7532, "loss": 0.27557867765426636, "lr": 6.515925734670834e-06, "epoch": 1.2655689815429558, "percentage": 63.28, "elapsed_time": "17:12:03", "remaining_time": "9:58:57"} +{"current_steps": 4767, "total_steps": 7532, "loss": 0.24387787282466888, "lr": 6.511810416565979e-06, "epoch": 1.2658345505244988, "percentage": 63.29, "elapsed_time": "17:12:15", "remaining_time": "9:58:44"} +{"current_steps": 4768, "total_steps": 7532, "loss": 0.27863091230392456, "lr": 6.507695770942939e-06, "epoch": 1.2661001195060417, "percentage": 63.3, "elapsed_time": "17:12:29", "remaining_time": "9:58:31"} +{"current_steps": 4769, "total_steps": 7532, "loss": 0.23589591681957245, "lr": 6.503581798594965e-06, "epoch": 1.2663656884875847, "percentage": 63.32, "elapsed_time": "17:12:41", "remaining_time": "9:58:18"} +{"current_steps": 4770, "total_steps": 7532, "loss": 0.22869807481765747, "lr": 6.499468500315185e-06, "epoch": 1.2666312574691276, "percentage": 63.33, "elapsed_time": "17:12:55", "remaining_time": "9:58:05"} +{"current_steps": 4771, "total_steps": 7532, "loss": 0.2351568192243576, "lr": 6.495355876896592e-06, "epoch": 1.2668968264506706, "percentage": 63.34, "elapsed_time": "17:13:07", "remaining_time": "9:57:52"} +{"current_steps": 4772, "total_steps": 7532, "loss": 0.2291228175163269, "lr": 6.491243929132052e-06, "epoch": 1.2671623954322135, "percentage": 63.36, "elapsed_time": "17:13:21", "remaining_time": "9:57:39"} +{"current_steps": 4773, "total_steps": 7532, "loss": 0.23203743994235992, "lr": 6.487132657814297e-06, "epoch": 1.2674279644137565, "percentage": 63.37, "elapsed_time": "17:13:34", "remaining_time": "9:57:26"} +{"current_steps": 4774, "total_steps": 7532, "loss": 0.22035656869411469, "lr": 6.483022063735938e-06, "epoch": 1.2676935333952994, "percentage": 63.38, "elapsed_time": "17:13:46", "remaining_time": "9:57:13"} +{"current_steps": 4775, "total_steps": 7532, "loss": 0.21576716005802155, "lr": 6.478912147689448e-06, "epoch": 1.2679591023768424, "percentage": 63.4, "elapsed_time": "17:14:00", "remaining_time": "9:57:00"} +{"current_steps": 4776, "total_steps": 7532, "loss": 0.27764660120010376, "lr": 6.474802910467171e-06, "epoch": 1.2682246713583853, "percentage": 63.41, "elapsed_time": "17:14:12", "remaining_time": "9:56:47"} +{"current_steps": 4777, "total_steps": 7532, "loss": 0.23715822398662567, "lr": 6.4706943528613135e-06, "epoch": 1.2684902403399283, "percentage": 63.42, "elapsed_time": "17:14:26", "remaining_time": "9:56:34"} +{"current_steps": 4778, "total_steps": 7532, "loss": 0.27764302492141724, "lr": 6.4665864756639606e-06, "epoch": 1.2687558093214713, "percentage": 63.44, "elapsed_time": "17:14:39", "remaining_time": "9:56:21"} +{"current_steps": 4779, "total_steps": 7532, "loss": 0.21634885668754578, "lr": 6.4624792796670624e-06, "epoch": 1.2690213783030142, "percentage": 63.45, "elapsed_time": "17:14:52", "remaining_time": "9:56:09"} +{"current_steps": 4780, "total_steps": 7532, "loss": 0.27262234687805176, "lr": 6.458372765662438e-06, "epoch": 1.2692869472845572, "percentage": 63.46, "elapsed_time": "17:15:05", "remaining_time": "9:55:56"} +{"current_steps": 4781, "total_steps": 7532, "loss": 0.2219458371400833, "lr": 6.454266934441775e-06, "epoch": 1.2695525162661, "percentage": 63.48, "elapsed_time": "17:15:18", "remaining_time": "9:55:43"} +{"current_steps": 4782, "total_steps": 7532, "loss": 0.22181497514247894, "lr": 6.450161786796625e-06, "epoch": 1.269818085247643, "percentage": 63.49, "elapsed_time": "17:15:31", "remaining_time": "9:55:30"} +{"current_steps": 4783, "total_steps": 7532, "loss": 0.22642338275909424, "lr": 6.446057323518422e-06, "epoch": 1.270083654229186, "percentage": 63.5, "elapsed_time": "17:15:44", "remaining_time": "9:55:17"} +{"current_steps": 4784, "total_steps": 7532, "loss": 0.239711195230484, "lr": 6.441953545398451e-06, "epoch": 1.270349223210729, "percentage": 63.52, "elapsed_time": "17:15:57", "remaining_time": "9:55:04"} +{"current_steps": 4785, "total_steps": 7532, "loss": 0.2422255128622055, "lr": 6.437850453227872e-06, "epoch": 1.270614792192272, "percentage": 63.53, "elapsed_time": "17:16:10", "remaining_time": "9:54:51"} +{"current_steps": 4786, "total_steps": 7532, "loss": 0.23184439539909363, "lr": 6.433748047797715e-06, "epoch": 1.2708803611738149, "percentage": 63.54, "elapsed_time": "17:16:23", "remaining_time": "9:54:38"} +{"current_steps": 4787, "total_steps": 7532, "loss": 0.2737428843975067, "lr": 6.429646329898873e-06, "epoch": 1.2711459301553578, "percentage": 63.56, "elapsed_time": "17:16:36", "remaining_time": "9:54:25"} +{"current_steps": 4788, "total_steps": 7532, "loss": 0.23565897345542908, "lr": 6.4255453003221115e-06, "epoch": 1.2714114991369008, "percentage": 63.57, "elapsed_time": "17:16:50", "remaining_time": "9:54:12"} +{"current_steps": 4789, "total_steps": 7532, "loss": 0.24349254369735718, "lr": 6.421444959858059e-06, "epoch": 1.2716770681184437, "percentage": 63.58, "elapsed_time": "17:17:02", "remaining_time": "9:53:59"} +{"current_steps": 4790, "total_steps": 7532, "loss": 0.2637769281864166, "lr": 6.4173453092972115e-06, "epoch": 1.2719426370999867, "percentage": 63.6, "elapsed_time": "17:17:15", "remaining_time": "9:53:46"} +{"current_steps": 4791, "total_steps": 7532, "loss": 0.21420228481292725, "lr": 6.413246349429934e-06, "epoch": 1.2722082060815296, "percentage": 63.61, "elapsed_time": "17:17:28", "remaining_time": "9:53:33"} +{"current_steps": 4792, "total_steps": 7532, "loss": 0.25270405411720276, "lr": 6.409148081046461e-06, "epoch": 1.2724737750630726, "percentage": 63.62, "elapsed_time": "17:17:41", "remaining_time": "9:53:20"} +{"current_steps": 4793, "total_steps": 7532, "loss": 0.2710546851158142, "lr": 6.405050504936887e-06, "epoch": 1.2727393440446155, "percentage": 63.64, "elapsed_time": "17:17:54", "remaining_time": "9:53:07"} +{"current_steps": 4794, "total_steps": 7532, "loss": 0.2388489842414856, "lr": 6.400953621891178e-06, "epoch": 1.2730049130261585, "percentage": 63.65, "elapsed_time": "17:18:07", "remaining_time": "9:52:54"} +{"current_steps": 4795, "total_steps": 7532, "loss": 0.24581485986709595, "lr": 6.396857432699164e-06, "epoch": 1.2732704820077014, "percentage": 63.66, "elapsed_time": "17:18:20", "remaining_time": "9:52:41"} +{"current_steps": 4796, "total_steps": 7532, "loss": 0.24219104647636414, "lr": 6.3927619381505404e-06, "epoch": 1.2735360509892444, "percentage": 63.67, "elapsed_time": "17:18:34", "remaining_time": "9:52:28"} +{"current_steps": 4797, "total_steps": 7532, "loss": 0.22722014784812927, "lr": 6.388667139034873e-06, "epoch": 1.2738016199707873, "percentage": 63.69, "elapsed_time": "17:18:47", "remaining_time": "9:52:15"} +{"current_steps": 4798, "total_steps": 7532, "loss": 0.25177234411239624, "lr": 6.384573036141589e-06, "epoch": 1.2740671889523303, "percentage": 63.7, "elapsed_time": "17:19:00", "remaining_time": "9:52:02"} +{"current_steps": 4799, "total_steps": 7532, "loss": 0.2291412651538849, "lr": 6.380479630259983e-06, "epoch": 1.2743327579338732, "percentage": 63.71, "elapsed_time": "17:19:12", "remaining_time": "9:51:49"} +{"current_steps": 4800, "total_steps": 7532, "loss": 0.2528606951236725, "lr": 6.376386922179216e-06, "epoch": 1.2745983269154162, "percentage": 63.73, "elapsed_time": "17:19:26", "remaining_time": "9:51:36"} +{"current_steps": 4801, "total_steps": 7532, "loss": 0.21383032202720642, "lr": 6.372294912688315e-06, "epoch": 1.2748638958969591, "percentage": 63.74, "elapsed_time": "17:19:44", "remaining_time": "9:51:26"} +{"current_steps": 4802, "total_steps": 7532, "loss": 0.2538087069988251, "lr": 6.368203602576168e-06, "epoch": 1.275129464878502, "percentage": 63.75, "elapsed_time": "17:19:57", "remaining_time": "9:51:13"} +{"current_steps": 4803, "total_steps": 7532, "loss": 0.24437417089939117, "lr": 6.364112992631537e-06, "epoch": 1.275395033860045, "percentage": 63.77, "elapsed_time": "17:20:10", "remaining_time": "9:51:00"} +{"current_steps": 4804, "total_steps": 7532, "loss": 0.2347753942012787, "lr": 6.360023083643036e-06, "epoch": 1.275660602841588, "percentage": 63.78, "elapsed_time": "17:20:23", "remaining_time": "9:50:48"} +{"current_steps": 4805, "total_steps": 7532, "loss": 0.271645188331604, "lr": 6.3559338763991576e-06, "epoch": 1.275926171823131, "percentage": 63.79, "elapsed_time": "17:20:36", "remaining_time": "9:50:35"} +{"current_steps": 4806, "total_steps": 7532, "loss": 0.2465275228023529, "lr": 6.35184537168825e-06, "epoch": 1.276191740804674, "percentage": 63.81, "elapsed_time": "17:20:49", "remaining_time": "9:50:21"} +{"current_steps": 4807, "total_steps": 7532, "loss": 0.26494044065475464, "lr": 6.347757570298527e-06, "epoch": 1.2764573097862169, "percentage": 63.82, "elapsed_time": "17:21:03", "remaining_time": "9:50:09"} +{"current_steps": 4808, "total_steps": 7532, "loss": 0.28292080760002136, "lr": 6.343670473018071e-06, "epoch": 1.2767228787677598, "percentage": 63.83, "elapsed_time": "17:21:16", "remaining_time": "9:49:56"} +{"current_steps": 4809, "total_steps": 7532, "loss": 0.2525850534439087, "lr": 6.339584080634824e-06, "epoch": 1.2769884477493028, "percentage": 63.85, "elapsed_time": "17:21:29", "remaining_time": "9:49:43"} +{"current_steps": 4810, "total_steps": 7532, "loss": 0.22056345641613007, "lr": 6.335498393936597e-06, "epoch": 1.2772540167308457, "percentage": 63.86, "elapsed_time": "17:21:42", "remaining_time": "9:49:30"} +{"current_steps": 4811, "total_steps": 7532, "loss": 0.23081058263778687, "lr": 6.331413413711061e-06, "epoch": 1.2775195857123887, "percentage": 63.87, "elapsed_time": "17:21:56", "remaining_time": "9:49:17"} +{"current_steps": 4812, "total_steps": 7532, "loss": 0.2722470760345459, "lr": 6.327329140745751e-06, "epoch": 1.2777851546939316, "percentage": 63.89, "elapsed_time": "17:22:08", "remaining_time": "9:49:04"} +{"current_steps": 4813, "total_steps": 7532, "loss": 0.24454641342163086, "lr": 6.32324557582807e-06, "epoch": 1.2780507236754748, "percentage": 63.9, "elapsed_time": "17:22:21", "remaining_time": "9:48:51"} +{"current_steps": 4814, "total_steps": 7532, "loss": 0.21884413063526154, "lr": 6.319162719745277e-06, "epoch": 1.2783162926570177, "percentage": 63.91, "elapsed_time": "17:22:34", "remaining_time": "9:48:38"} +{"current_steps": 4815, "total_steps": 7532, "loss": 0.2737545669078827, "lr": 6.3150805732845e-06, "epoch": 1.2785818616385607, "percentage": 63.93, "elapsed_time": "17:22:47", "remaining_time": "9:48:25"} +{"current_steps": 4816, "total_steps": 7532, "loss": 0.2478230595588684, "lr": 6.31099913723273e-06, "epoch": 1.2788474306201036, "percentage": 63.94, "elapsed_time": "17:22:59", "remaining_time": "9:48:12"} +{"current_steps": 4817, "total_steps": 7532, "loss": 0.2508094310760498, "lr": 6.306918412376817e-06, "epoch": 1.2791129996016466, "percentage": 63.95, "elapsed_time": "17:23:13", "remaining_time": "9:47:59"} +{"current_steps": 4818, "total_steps": 7532, "loss": 0.24666383862495422, "lr": 6.302838399503477e-06, "epoch": 1.2793785685831895, "percentage": 63.97, "elapsed_time": "17:23:25", "remaining_time": "9:47:46"} +{"current_steps": 4819, "total_steps": 7532, "loss": 0.27833491563796997, "lr": 6.298759099399292e-06, "epoch": 1.2796441375647325, "percentage": 63.98, "elapsed_time": "17:23:38", "remaining_time": "9:47:32"} +{"current_steps": 4820, "total_steps": 7532, "loss": 0.23092475533485413, "lr": 6.294680512850699e-06, "epoch": 1.2799097065462754, "percentage": 63.99, "elapsed_time": "17:23:52", "remaining_time": "9:47:20"} +{"current_steps": 4821, "total_steps": 7532, "loss": 0.2714667022228241, "lr": 6.290602640644005e-06, "epoch": 1.2801752755278184, "percentage": 64.01, "elapsed_time": "17:24:04", "remaining_time": "9:47:07"} +{"current_steps": 4822, "total_steps": 7532, "loss": 0.23292411863803864, "lr": 6.286525483565373e-06, "epoch": 1.2804408445093614, "percentage": 64.02, "elapsed_time": "17:24:17", "remaining_time": "9:46:54"} +{"current_steps": 4823, "total_steps": 7532, "loss": 0.23809143900871277, "lr": 6.282449042400831e-06, "epoch": 1.2807064134909043, "percentage": 64.03, "elapsed_time": "17:24:30", "remaining_time": "9:46:40"} +{"current_steps": 4824, "total_steps": 7532, "loss": 0.22593267261981964, "lr": 6.278373317936269e-06, "epoch": 1.2809719824724473, "percentage": 64.05, "elapsed_time": "17:24:43", "remaining_time": "9:46:28"} +{"current_steps": 4825, "total_steps": 7532, "loss": 0.26024624705314636, "lr": 6.274298310957439e-06, "epoch": 1.2812375514539902, "percentage": 64.06, "elapsed_time": "17:24:56", "remaining_time": "9:46:14"} +{"current_steps": 4826, "total_steps": 7532, "loss": 0.22418126463890076, "lr": 6.270224022249957e-06, "epoch": 1.2815031204355332, "percentage": 64.07, "elapsed_time": "17:25:09", "remaining_time": "9:46:02"} +{"current_steps": 4827, "total_steps": 7532, "loss": 0.26452577114105225, "lr": 6.266150452599288e-06, "epoch": 1.2817686894170761, "percentage": 64.09, "elapsed_time": "17:25:22", "remaining_time": "9:45:49"} +{"current_steps": 4828, "total_steps": 7532, "loss": 0.24412381649017334, "lr": 6.262077602790779e-06, "epoch": 1.282034258398619, "percentage": 64.1, "elapsed_time": "17:25:35", "remaining_time": "9:45:36"} +{"current_steps": 4829, "total_steps": 7532, "loss": 0.22476118803024292, "lr": 6.258005473609623e-06, "epoch": 1.282299827380162, "percentage": 64.11, "elapsed_time": "17:25:48", "remaining_time": "9:45:23"} +{"current_steps": 4830, "total_steps": 7532, "loss": 0.2208547294139862, "lr": 6.25393406584088e-06, "epoch": 1.282565396361705, "percentage": 64.13, "elapsed_time": "17:26:01", "remaining_time": "9:45:10"} +{"current_steps": 4831, "total_steps": 7532, "loss": 0.2903650999069214, "lr": 6.249863380269467e-06, "epoch": 1.282830965343248, "percentage": 64.14, "elapsed_time": "17:26:14", "remaining_time": "9:44:57"} +{"current_steps": 4832, "total_steps": 7532, "loss": 0.24413639307022095, "lr": 6.245793417680168e-06, "epoch": 1.2830965343247909, "percentage": 64.15, "elapsed_time": "17:26:27", "remaining_time": "9:44:44"} +{"current_steps": 4833, "total_steps": 7532, "loss": 0.2193944752216339, "lr": 6.241724178857621e-06, "epoch": 1.2833621033063338, "percentage": 64.17, "elapsed_time": "17:26:40", "remaining_time": "9:44:31"} +{"current_steps": 4834, "total_steps": 7532, "loss": 0.22847513854503632, "lr": 6.237655664586326e-06, "epoch": 1.2836276722878768, "percentage": 64.18, "elapsed_time": "17:26:53", "remaining_time": "9:44:18"} +{"current_steps": 4835, "total_steps": 7532, "loss": 0.269639253616333, "lr": 6.233587875650648e-06, "epoch": 1.2838932412694197, "percentage": 64.19, "elapsed_time": "17:27:06", "remaining_time": "9:44:05"} +{"current_steps": 4836, "total_steps": 7532, "loss": 0.26329392194747925, "lr": 6.229520812834801e-06, "epoch": 1.2841588102509627, "percentage": 64.21, "elapsed_time": "17:27:19", "remaining_time": "9:43:52"} +{"current_steps": 4837, "total_steps": 7532, "loss": 0.18800514936447144, "lr": 6.225454476922877e-06, "epoch": 1.2844243792325056, "percentage": 64.22, "elapsed_time": "17:27:32", "remaining_time": "9:43:39"} +{"current_steps": 4838, "total_steps": 7532, "loss": 0.2617965340614319, "lr": 6.2213888686988125e-06, "epoch": 1.2846899482140486, "percentage": 64.23, "elapsed_time": "17:27:45", "remaining_time": "9:43:26"} +{"current_steps": 4839, "total_steps": 7532, "loss": 0.22468717396259308, "lr": 6.217323988946411e-06, "epoch": 1.2849555171955915, "percentage": 64.25, "elapsed_time": "17:27:58", "remaining_time": "9:43:13"} +{"current_steps": 4840, "total_steps": 7532, "loss": 0.22465646266937256, "lr": 6.213259838449333e-06, "epoch": 1.2852210861771345, "percentage": 64.26, "elapsed_time": "17:28:10", "remaining_time": "9:42:59"} +{"current_steps": 4841, "total_steps": 7532, "loss": 0.2655075490474701, "lr": 6.209196417991096e-06, "epoch": 1.2854866551586774, "percentage": 64.27, "elapsed_time": "17:28:23", "remaining_time": "9:42:46"} +{"current_steps": 4842, "total_steps": 7532, "loss": 0.25313282012939453, "lr": 6.205133728355081e-06, "epoch": 1.2857522241402204, "percentage": 64.29, "elapsed_time": "17:28:36", "remaining_time": "9:42:33"} +{"current_steps": 4843, "total_steps": 7532, "loss": 0.23176322877407074, "lr": 6.201071770324527e-06, "epoch": 1.2860177931217633, "percentage": 64.3, "elapsed_time": "17:28:49", "remaining_time": "9:42:20"} +{"current_steps": 4844, "total_steps": 7532, "loss": 0.27396953105926514, "lr": 6.197010544682531e-06, "epoch": 1.2862833621033063, "percentage": 64.31, "elapsed_time": "17:29:02", "remaining_time": "9:42:07"} +{"current_steps": 4845, "total_steps": 7532, "loss": 0.24966171383857727, "lr": 6.192950052212046e-06, "epoch": 1.2865489310848492, "percentage": 64.33, "elapsed_time": "17:29:14", "remaining_time": "9:41:54"} +{"current_steps": 4846, "total_steps": 7532, "loss": 0.23290866613388062, "lr": 6.188890293695895e-06, "epoch": 1.2868145000663922, "percentage": 64.34, "elapsed_time": "17:29:27", "remaining_time": "9:41:41"} +{"current_steps": 4847, "total_steps": 7532, "loss": 0.2368975132703781, "lr": 6.184831269916749e-06, "epoch": 1.2870800690479351, "percentage": 64.35, "elapsed_time": "17:29:40", "remaining_time": "9:41:28"} +{"current_steps": 4848, "total_steps": 7532, "loss": 0.25305312871932983, "lr": 6.180772981657139e-06, "epoch": 1.287345638029478, "percentage": 64.37, "elapsed_time": "17:29:53", "remaining_time": "9:41:15"} +{"current_steps": 4849, "total_steps": 7532, "loss": 0.22752982378005981, "lr": 6.176715429699452e-06, "epoch": 1.287611207011021, "percentage": 64.38, "elapsed_time": "17:30:07", "remaining_time": "9:41:02"} +{"current_steps": 4850, "total_steps": 7532, "loss": 0.22426503896713257, "lr": 6.1726586148259395e-06, "epoch": 1.287876775992564, "percentage": 64.39, "elapsed_time": "17:30:20", "remaining_time": "9:40:49"} +{"current_steps": 4851, "total_steps": 7532, "loss": 0.21261993050575256, "lr": 6.168602537818706e-06, "epoch": 1.288142344974107, "percentage": 64.41, "elapsed_time": "17:30:33", "remaining_time": "9:40:36"} +{"current_steps": 4852, "total_steps": 7532, "loss": 0.237461656332016, "lr": 6.1645471994597185e-06, "epoch": 1.28840791395565, "percentage": 64.42, "elapsed_time": "17:30:46", "remaining_time": "9:40:23"} +{"current_steps": 4853, "total_steps": 7532, "loss": 0.1926390826702118, "lr": 6.160492600530794e-06, "epoch": 1.2886734829371929, "percentage": 64.43, "elapsed_time": "17:31:00", "remaining_time": "9:40:11"} +{"current_steps": 4854, "total_steps": 7532, "loss": 0.22673740983009338, "lr": 6.156438741813608e-06, "epoch": 1.2889390519187358, "percentage": 64.45, "elapsed_time": "17:31:13", "remaining_time": "9:39:58"} +{"current_steps": 4855, "total_steps": 7532, "loss": 0.22148582339286804, "lr": 6.15238562408971e-06, "epoch": 1.289204620900279, "percentage": 64.46, "elapsed_time": "17:31:26", "remaining_time": "9:39:45"} +{"current_steps": 4856, "total_steps": 7532, "loss": 0.28319716453552246, "lr": 6.148333248140483e-06, "epoch": 1.289470189881822, "percentage": 64.47, "elapsed_time": "17:31:40", "remaining_time": "9:39:32"} +{"current_steps": 4857, "total_steps": 7532, "loss": 0.23505647480487823, "lr": 6.14428161474718e-06, "epoch": 1.289735758863365, "percentage": 64.48, "elapsed_time": "17:31:52", "remaining_time": "9:39:19"} +{"current_steps": 4858, "total_steps": 7532, "loss": 0.24323523044586182, "lr": 6.140230724690908e-06, "epoch": 1.2900013278449078, "percentage": 64.5, "elapsed_time": "17:32:05", "remaining_time": "9:39:06"} +{"current_steps": 4859, "total_steps": 7532, "loss": 0.22818386554718018, "lr": 6.136180578752629e-06, "epoch": 1.2902668968264508, "percentage": 64.51, "elapsed_time": "17:32:19", "remaining_time": "9:38:53"} +{"current_steps": 4860, "total_steps": 7532, "loss": 0.24285198748111725, "lr": 6.132131177713165e-06, "epoch": 1.2905324658079937, "percentage": 64.52, "elapsed_time": "17:32:31", "remaining_time": "9:38:40"} +{"current_steps": 4861, "total_steps": 7532, "loss": 0.24115213751792908, "lr": 6.128082522353194e-06, "epoch": 1.2907980347895367, "percentage": 64.54, "elapsed_time": "17:32:45", "remaining_time": "9:38:27"} +{"current_steps": 4862, "total_steps": 7532, "loss": 0.21564510464668274, "lr": 6.124034613453247e-06, "epoch": 1.2910636037710796, "percentage": 64.55, "elapsed_time": "17:32:58", "remaining_time": "9:38:14"} +{"current_steps": 4863, "total_steps": 7532, "loss": 0.2329743504524231, "lr": 6.119987451793711e-06, "epoch": 1.2913291727526226, "percentage": 64.56, "elapsed_time": "17:33:11", "remaining_time": "9:38:02"} +{"current_steps": 4864, "total_steps": 7532, "loss": 0.2161208689212799, "lr": 6.115941038154835e-06, "epoch": 1.2915947417341656, "percentage": 64.58, "elapsed_time": "17:33:24", "remaining_time": "9:37:49"} +{"current_steps": 4865, "total_steps": 7532, "loss": 0.22765520215034485, "lr": 6.111895373316721e-06, "epoch": 1.2918603107157085, "percentage": 64.59, "elapsed_time": "17:33:38", "remaining_time": "9:37:36"} +{"current_steps": 4866, "total_steps": 7532, "loss": 0.25506818294525146, "lr": 6.107850458059322e-06, "epoch": 1.2921258796972515, "percentage": 64.6, "elapsed_time": "17:33:51", "remaining_time": "9:37:23"} +{"current_steps": 4867, "total_steps": 7532, "loss": 0.22543852031230927, "lr": 6.1038062931624505e-06, "epoch": 1.2923914486787944, "percentage": 64.62, "elapsed_time": "17:34:04", "remaining_time": "9:37:10"} +{"current_steps": 4868, "total_steps": 7532, "loss": 0.24295030534267426, "lr": 6.099762879405776e-06, "epoch": 1.2926570176603374, "percentage": 64.63, "elapsed_time": "17:34:17", "remaining_time": "9:36:57"} +{"current_steps": 4869, "total_steps": 7532, "loss": 0.2385009229183197, "lr": 6.095720217568819e-06, "epoch": 1.2929225866418803, "percentage": 64.64, "elapsed_time": "17:34:30", "remaining_time": "9:36:44"} +{"current_steps": 4870, "total_steps": 7532, "loss": 0.21410472691059113, "lr": 6.091678308430956e-06, "epoch": 1.2931881556234233, "percentage": 64.66, "elapsed_time": "17:34:43", "remaining_time": "9:36:31"} +{"current_steps": 4871, "total_steps": 7532, "loss": 0.25934773683547974, "lr": 6.087637152771422e-06, "epoch": 1.2934537246049662, "percentage": 64.67, "elapsed_time": "17:34:56", "remaining_time": "9:36:18"} +{"current_steps": 4872, "total_steps": 7532, "loss": 0.24584373831748962, "lr": 6.0835967513693e-06, "epoch": 1.2937192935865092, "percentage": 64.68, "elapsed_time": "17:35:09", "remaining_time": "9:36:05"} +{"current_steps": 4873, "total_steps": 7532, "loss": 0.2403055876493454, "lr": 6.079557105003537e-06, "epoch": 1.2939848625680521, "percentage": 64.7, "elapsed_time": "17:35:22", "remaining_time": "9:35:52"} +{"current_steps": 4874, "total_steps": 7532, "loss": 0.23861736059188843, "lr": 6.075518214452927e-06, "epoch": 1.294250431549595, "percentage": 64.71, "elapsed_time": "17:35:36", "remaining_time": "9:35:39"} +{"current_steps": 4875, "total_steps": 7532, "loss": 0.21356427669525146, "lr": 6.071480080496119e-06, "epoch": 1.294516000531138, "percentage": 64.72, "elapsed_time": "17:35:49", "remaining_time": "9:35:26"} +{"current_steps": 4876, "total_steps": 7532, "loss": 0.2835869789123535, "lr": 6.067442703911621e-06, "epoch": 1.294781569512681, "percentage": 64.74, "elapsed_time": "17:36:02", "remaining_time": "9:35:14"} +{"current_steps": 4877, "total_steps": 7532, "loss": 0.24233242869377136, "lr": 6.063406085477788e-06, "epoch": 1.295047138494224, "percentage": 64.75, "elapsed_time": "17:36:15", "remaining_time": "9:35:00"} +{"current_steps": 4878, "total_steps": 7532, "loss": 0.24986369907855988, "lr": 6.059370225972834e-06, "epoch": 1.2953127074757669, "percentage": 64.76, "elapsed_time": "17:36:28", "remaining_time": "9:34:48"} +{"current_steps": 4879, "total_steps": 7532, "loss": 0.2445756494998932, "lr": 6.055335126174826e-06, "epoch": 1.2955782764573098, "percentage": 64.78, "elapsed_time": "17:36:41", "remaining_time": "9:34:35"} +{"current_steps": 4880, "total_steps": 7532, "loss": 0.21331898868083954, "lr": 6.0513007868616825e-06, "epoch": 1.2958438454388528, "percentage": 64.79, "elapsed_time": "17:36:54", "remaining_time": "9:34:22"} +{"current_steps": 4881, "total_steps": 7532, "loss": 0.2782329320907593, "lr": 6.047267208811174e-06, "epoch": 1.2961094144203957, "percentage": 64.8, "elapsed_time": "17:37:07", "remaining_time": "9:34:09"} +{"current_steps": 4882, "total_steps": 7532, "loss": 0.20866765081882477, "lr": 6.043234392800932e-06, "epoch": 1.2963749834019387, "percentage": 64.82, "elapsed_time": "17:37:21", "remaining_time": "9:33:56"} +{"current_steps": 4883, "total_steps": 7532, "loss": 0.2517815828323364, "lr": 6.039202339608432e-06, "epoch": 1.2966405523834816, "percentage": 64.83, "elapsed_time": "17:37:34", "remaining_time": "9:33:43"} +{"current_steps": 4884, "total_steps": 7532, "loss": 0.2617926597595215, "lr": 6.03517105001101e-06, "epoch": 1.2969061213650246, "percentage": 64.84, "elapsed_time": "17:37:47", "remaining_time": "9:33:30"} +{"current_steps": 4885, "total_steps": 7532, "loss": 0.24753305315971375, "lr": 6.0311405247858465e-06, "epoch": 1.2971716903465675, "percentage": 64.86, "elapsed_time": "17:38:00", "remaining_time": "9:33:17"} +{"current_steps": 4886, "total_steps": 7532, "loss": 0.19791719317436218, "lr": 6.027110764709982e-06, "epoch": 1.2974372593281105, "percentage": 64.87, "elapsed_time": "17:38:13", "remaining_time": "9:33:04"} +{"current_steps": 4887, "total_steps": 7532, "loss": 0.243608757853508, "lr": 6.023081770560307e-06, "epoch": 1.2977028283096534, "percentage": 64.88, "elapsed_time": "17:38:26", "remaining_time": "9:32:51"} +{"current_steps": 4888, "total_steps": 7532, "loss": 0.20469853281974792, "lr": 6.019053543113564e-06, "epoch": 1.2979683972911964, "percentage": 64.9, "elapsed_time": "17:38:39", "remaining_time": "9:32:38"} +{"current_steps": 4889, "total_steps": 7532, "loss": 0.25613903999328613, "lr": 6.015026083146345e-06, "epoch": 1.2982339662727393, "percentage": 64.91, "elapsed_time": "17:38:52", "remaining_time": "9:32:25"} +{"current_steps": 4890, "total_steps": 7532, "loss": 0.23349006474018097, "lr": 6.010999391435097e-06, "epoch": 1.2984995352542823, "percentage": 64.92, "elapsed_time": "17:39:05", "remaining_time": "9:32:12"} +{"current_steps": 4891, "total_steps": 7532, "loss": 0.23646268248558044, "lr": 6.006973468756124e-06, "epoch": 1.2987651042358253, "percentage": 64.94, "elapsed_time": "17:39:18", "remaining_time": "9:32:00"} +{"current_steps": 4892, "total_steps": 7532, "loss": 0.2371794581413269, "lr": 6.002948315885572e-06, "epoch": 1.2990306732173682, "percentage": 64.95, "elapsed_time": "17:39:32", "remaining_time": "9:31:47"} +{"current_steps": 4893, "total_steps": 7532, "loss": 0.23791949450969696, "lr": 5.998923933599443e-06, "epoch": 1.2992962421989112, "percentage": 64.96, "elapsed_time": "17:39:45", "remaining_time": "9:31:34"} +{"current_steps": 4894, "total_steps": 7532, "loss": 0.26923009753227234, "lr": 5.994900322673593e-06, "epoch": 1.299561811180454, "percentage": 64.98, "elapsed_time": "17:39:58", "remaining_time": "9:31:21"} +{"current_steps": 4895, "total_steps": 7532, "loss": 0.20164884626865387, "lr": 5.990877483883723e-06, "epoch": 1.299827380161997, "percentage": 64.99, "elapsed_time": "17:40:11", "remaining_time": "9:31:08"} +{"current_steps": 4896, "total_steps": 7532, "loss": 0.22345462441444397, "lr": 5.986855418005393e-06, "epoch": 1.30009294914354, "percentage": 65.0, "elapsed_time": "17:40:24", "remaining_time": "9:30:55"} +{"current_steps": 4897, "total_steps": 7532, "loss": 0.26678675413131714, "lr": 5.982834125814007e-06, "epoch": 1.300358518125083, "percentage": 65.02, "elapsed_time": "17:40:38", "remaining_time": "9:30:42"} +{"current_steps": 4898, "total_steps": 7532, "loss": 0.24674496054649353, "lr": 5.978813608084825e-06, "epoch": 1.300624087106626, "percentage": 65.03, "elapsed_time": "17:40:50", "remaining_time": "9:30:29"} +{"current_steps": 4899, "total_steps": 7532, "loss": 0.2804900109767914, "lr": 5.974793865592947e-06, "epoch": 1.3008896560881689, "percentage": 65.04, "elapsed_time": "17:41:04", "remaining_time": "9:30:16"} +{"current_steps": 4900, "total_steps": 7532, "loss": 0.2413155734539032, "lr": 5.970774899113345e-06, "epoch": 1.3011552250697118, "percentage": 65.06, "elapsed_time": "17:41:17", "remaining_time": "9:30:03"} +{"current_steps": 4901, "total_steps": 7532, "loss": 0.21217301487922668, "lr": 5.96675670942082e-06, "epoch": 1.3014207940512548, "percentage": 65.07, "elapsed_time": "17:41:36", "remaining_time": "9:29:53"} +{"current_steps": 4902, "total_steps": 7532, "loss": 0.23362940549850464, "lr": 5.962739297290035e-06, "epoch": 1.3016863630327977, "percentage": 65.08, "elapsed_time": "17:41:49", "remaining_time": "9:29:41"} +{"current_steps": 4903, "total_steps": 7532, "loss": 0.2669242322444916, "lr": 5.958722663495499e-06, "epoch": 1.3019519320143407, "percentage": 65.1, "elapsed_time": "17:42:01", "remaining_time": "9:29:27"} +{"current_steps": 4904, "total_steps": 7532, "loss": 0.2234608232975006, "lr": 5.95470680881157e-06, "epoch": 1.3022175009958836, "percentage": 65.11, "elapsed_time": "17:42:15", "remaining_time": "9:29:14"} +{"current_steps": 4905, "total_steps": 7532, "loss": 0.25150394439697266, "lr": 5.95069173401246e-06, "epoch": 1.3024830699774266, "percentage": 65.12, "elapsed_time": "17:42:27", "remaining_time": "9:29:01"} +{"current_steps": 4906, "total_steps": 7532, "loss": 0.2408430427312851, "lr": 5.9466774398722264e-06, "epoch": 1.3027486389589695, "percentage": 65.14, "elapsed_time": "17:42:40", "remaining_time": "9:28:48"} +{"current_steps": 4907, "total_steps": 7532, "loss": 0.2197013795375824, "lr": 5.942663927164776e-06, "epoch": 1.3030142079405125, "percentage": 65.15, "elapsed_time": "17:42:53", "remaining_time": "9:28:35"} +{"current_steps": 4908, "total_steps": 7532, "loss": 0.2224964201450348, "lr": 5.938651196663865e-06, "epoch": 1.3032797769220554, "percentage": 65.16, "elapsed_time": "17:43:06", "remaining_time": "9:28:22"} +{"current_steps": 4909, "total_steps": 7532, "loss": 0.26466232538223267, "lr": 5.934639249143108e-06, "epoch": 1.3035453459035984, "percentage": 65.18, "elapsed_time": "17:43:19", "remaining_time": "9:28:09"} +{"current_steps": 4910, "total_steps": 7532, "loss": 0.257996141910553, "lr": 5.930628085375958e-06, "epoch": 1.3038109148851413, "percentage": 65.19, "elapsed_time": "17:43:32", "remaining_time": "9:27:56"} +{"current_steps": 4911, "total_steps": 7532, "loss": 0.21995162963867188, "lr": 5.92661770613572e-06, "epoch": 1.3040764838666843, "percentage": 65.2, "elapsed_time": "17:43:45", "remaining_time": "9:27:43"} +{"current_steps": 4912, "total_steps": 7532, "loss": 0.26007258892059326, "lr": 5.922608112195546e-06, "epoch": 1.3043420528482272, "percentage": 65.22, "elapsed_time": "17:43:58", "remaining_time": "9:27:30"} +{"current_steps": 4913, "total_steps": 7532, "loss": 0.25168827176094055, "lr": 5.918599304328442e-06, "epoch": 1.3046076218297702, "percentage": 65.23, "elapsed_time": "17:44:11", "remaining_time": "9:27:17"} +{"current_steps": 4914, "total_steps": 7532, "loss": 0.24686852097511292, "lr": 5.9145912833072535e-06, "epoch": 1.3048731908113131, "percentage": 65.24, "elapsed_time": "17:44:24", "remaining_time": "9:27:04"} +{"current_steps": 4915, "total_steps": 7532, "loss": 0.247032031416893, "lr": 5.910584049904684e-06, "epoch": 1.305138759792856, "percentage": 65.25, "elapsed_time": "17:44:37", "remaining_time": "9:26:51"} +{"current_steps": 4916, "total_steps": 7532, "loss": 0.21644674241542816, "lr": 5.906577604893278e-06, "epoch": 1.305404328774399, "percentage": 65.27, "elapsed_time": "17:44:50", "remaining_time": "9:26:38"} +{"current_steps": 4917, "total_steps": 7532, "loss": 0.28093478083610535, "lr": 5.9025719490454304e-06, "epoch": 1.305669897755942, "percentage": 65.28, "elapsed_time": "17:45:02", "remaining_time": "9:26:25"} +{"current_steps": 4918, "total_steps": 7532, "loss": 0.23731757700443268, "lr": 5.898567083133389e-06, "epoch": 1.305935466737485, "percentage": 65.29, "elapsed_time": "17:45:15", "remaining_time": "9:26:12"} +{"current_steps": 4919, "total_steps": 7532, "loss": 0.20725491642951965, "lr": 5.894563007929243e-06, "epoch": 1.306201035719028, "percentage": 65.31, "elapsed_time": "17:45:28", "remaining_time": "9:25:59"} +{"current_steps": 4920, "total_steps": 7532, "loss": 0.2509433329105377, "lr": 5.89055972420493e-06, "epoch": 1.3064666047005709, "percentage": 65.32, "elapsed_time": "17:45:41", "remaining_time": "9:25:46"} +{"current_steps": 4921, "total_steps": 7532, "loss": 0.2611580491065979, "lr": 5.886557232732235e-06, "epoch": 1.3067321736821138, "percentage": 65.33, "elapsed_time": "17:45:54", "remaining_time": "9:25:33"} +{"current_steps": 4922, "total_steps": 7532, "loss": 0.20567595958709717, "lr": 5.882555534282792e-06, "epoch": 1.3069977426636568, "percentage": 65.35, "elapsed_time": "17:46:07", "remaining_time": "9:25:20"} +{"current_steps": 4923, "total_steps": 7532, "loss": 0.22851137816905975, "lr": 5.878554629628081e-06, "epoch": 1.3072633116451997, "percentage": 65.36, "elapsed_time": "17:46:20", "remaining_time": "9:25:07"} +{"current_steps": 4924, "total_steps": 7532, "loss": 0.24295902252197266, "lr": 5.874554519539431e-06, "epoch": 1.3075288806267427, "percentage": 65.37, "elapsed_time": "17:46:33", "remaining_time": "9:24:54"} +{"current_steps": 4925, "total_steps": 7532, "loss": 0.29564642906188965, "lr": 5.870555204788013e-06, "epoch": 1.3077944496082856, "percentage": 65.39, "elapsed_time": "17:46:46", "remaining_time": "9:24:41"} +{"current_steps": 4926, "total_steps": 7532, "loss": 0.2399739921092987, "lr": 5.8665566861448465e-06, "epoch": 1.3080600185898288, "percentage": 65.4, "elapsed_time": "17:46:59", "remaining_time": "9:24:28"} +{"current_steps": 4927, "total_steps": 7532, "loss": 0.23882555961608887, "lr": 5.862558964380806e-06, "epoch": 1.3083255875713717, "percentage": 65.41, "elapsed_time": "17:47:12", "remaining_time": "9:24:15"} +{"current_steps": 4928, "total_steps": 7532, "loss": 0.2510842978954315, "lr": 5.858562040266599e-06, "epoch": 1.3085911565529147, "percentage": 65.43, "elapsed_time": "17:47:25", "remaining_time": "9:24:02"} +{"current_steps": 4929, "total_steps": 7532, "loss": 0.257358193397522, "lr": 5.854565914572787e-06, "epoch": 1.3088567255344576, "percentage": 65.44, "elapsed_time": "17:47:38", "remaining_time": "9:23:49"} +{"current_steps": 4930, "total_steps": 7532, "loss": 0.23228219151496887, "lr": 5.850570588069775e-06, "epoch": 1.3091222945160006, "percentage": 65.45, "elapsed_time": "17:47:50", "remaining_time": "9:23:35"} +{"current_steps": 4931, "total_steps": 7532, "loss": 0.2234456092119217, "lr": 5.846576061527818e-06, "epoch": 1.3093878634975435, "percentage": 65.47, "elapsed_time": "17:48:03", "remaining_time": "9:23:22"} +{"current_steps": 4932, "total_steps": 7532, "loss": 0.2273438423871994, "lr": 5.842582335717009e-06, "epoch": 1.3096534324790865, "percentage": 65.48, "elapsed_time": "17:48:16", "remaining_time": "9:23:09"} +{"current_steps": 4933, "total_steps": 7532, "loss": 0.2423306405544281, "lr": 5.838589411407294e-06, "epoch": 1.3099190014606295, "percentage": 65.49, "elapsed_time": "17:48:29", "remaining_time": "9:22:56"} +{"current_steps": 4934, "total_steps": 7532, "loss": 0.266438364982605, "lr": 5.834597289368463e-06, "epoch": 1.3101845704421724, "percentage": 65.51, "elapsed_time": "17:48:42", "remaining_time": "9:22:43"} +{"current_steps": 4935, "total_steps": 7532, "loss": 0.2469342052936554, "lr": 5.830605970370142e-06, "epoch": 1.3104501394237154, "percentage": 65.52, "elapsed_time": "17:48:54", "remaining_time": "9:22:30"} +{"current_steps": 4936, "total_steps": 7532, "loss": 0.2834509611129761, "lr": 5.8266154551818225e-06, "epoch": 1.3107157084052583, "percentage": 65.53, "elapsed_time": "17:49:08", "remaining_time": "9:22:17"} +{"current_steps": 4937, "total_steps": 7532, "loss": 0.2615162134170532, "lr": 5.822625744572821e-06, "epoch": 1.3109812773868013, "percentage": 65.55, "elapsed_time": "17:49:20", "remaining_time": "9:22:04"} +{"current_steps": 4938, "total_steps": 7532, "loss": 0.2247931957244873, "lr": 5.818636839312309e-06, "epoch": 1.3112468463683442, "percentage": 65.56, "elapsed_time": "17:49:33", "remaining_time": "9:21:51"} +{"current_steps": 4939, "total_steps": 7532, "loss": 0.23759335279464722, "lr": 5.814648740169299e-06, "epoch": 1.3115124153498872, "percentage": 65.57, "elapsed_time": "17:49:46", "remaining_time": "9:21:38"} +{"current_steps": 4940, "total_steps": 7532, "loss": 0.23381784558296204, "lr": 5.8106614479126515e-06, "epoch": 1.3117779843314301, "percentage": 65.59, "elapsed_time": "17:49:59", "remaining_time": "9:21:25"} +{"current_steps": 4941, "total_steps": 7532, "loss": 0.2671264410018921, "lr": 5.8066749633110675e-06, "epoch": 1.312043553312973, "percentage": 65.6, "elapsed_time": "17:50:11", "remaining_time": "9:21:11"} +{"current_steps": 4942, "total_steps": 7532, "loss": 0.226065531373024, "lr": 5.8026892871330944e-06, "epoch": 1.312309122294516, "percentage": 65.61, "elapsed_time": "17:50:24", "remaining_time": "9:20:58"} +{"current_steps": 4943, "total_steps": 7532, "loss": 0.2654735743999481, "lr": 5.798704420147124e-06, "epoch": 1.312574691276059, "percentage": 65.63, "elapsed_time": "17:50:37", "remaining_time": "9:20:45"} +{"current_steps": 4944, "total_steps": 7532, "loss": 0.23757833242416382, "lr": 5.794720363121389e-06, "epoch": 1.312840260257602, "percentage": 65.64, "elapsed_time": "17:50:50", "remaining_time": "9:20:32"} +{"current_steps": 4945, "total_steps": 7532, "loss": 0.2561591565608978, "lr": 5.790737116823975e-06, "epoch": 1.3131058292391449, "percentage": 65.65, "elapsed_time": "17:51:02", "remaining_time": "9:20:19"} +{"current_steps": 4946, "total_steps": 7532, "loss": 0.22105304896831512, "lr": 5.7867546820227995e-06, "epoch": 1.3133713982206878, "percentage": 65.67, "elapsed_time": "17:51:15", "remaining_time": "9:20:06"} +{"current_steps": 4947, "total_steps": 7532, "loss": 0.2485857605934143, "lr": 5.7827730594856325e-06, "epoch": 1.3136369672022308, "percentage": 65.68, "elapsed_time": "17:51:28", "remaining_time": "9:19:53"} +{"current_steps": 4948, "total_steps": 7532, "loss": 0.21256676316261292, "lr": 5.7787922499800804e-06, "epoch": 1.3139025361837737, "percentage": 65.69, "elapsed_time": "17:51:41", "remaining_time": "9:19:40"} +{"current_steps": 4949, "total_steps": 7532, "loss": 0.2700715661048889, "lr": 5.774812254273604e-06, "epoch": 1.3141681051653167, "percentage": 65.71, "elapsed_time": "17:51:53", "remaining_time": "9:19:26"} +{"current_steps": 4950, "total_steps": 7532, "loss": 0.22239381074905396, "lr": 5.770833073133488e-06, "epoch": 1.3144336741468596, "percentage": 65.72, "elapsed_time": "17:52:06", "remaining_time": "9:19:13"} +{"current_steps": 4951, "total_steps": 7532, "loss": 0.22973249852657318, "lr": 5.766854707326878e-06, "epoch": 1.3146992431284026, "percentage": 65.73, "elapsed_time": "17:52:19", "remaining_time": "9:19:00"} +{"current_steps": 4952, "total_steps": 7532, "loss": 0.27923673391342163, "lr": 5.762877157620751e-06, "epoch": 1.3149648121099455, "percentage": 65.75, "elapsed_time": "17:52:31", "remaining_time": "9:18:47"} +{"current_steps": 4953, "total_steps": 7532, "loss": 0.23142218589782715, "lr": 5.758900424781939e-06, "epoch": 1.3152303810914885, "percentage": 65.76, "elapsed_time": "17:52:44", "remaining_time": "9:18:34"} +{"current_steps": 4954, "total_steps": 7532, "loss": 0.23697996139526367, "lr": 5.754924509577107e-06, "epoch": 1.3154959500730314, "percentage": 65.77, "elapsed_time": "17:52:56", "remaining_time": "9:18:20"} +{"current_steps": 4955, "total_steps": 7532, "loss": 0.27600961923599243, "lr": 5.750949412772764e-06, "epoch": 1.3157615190545744, "percentage": 65.79, "elapsed_time": "17:53:09", "remaining_time": "9:18:07"} +{"current_steps": 4956, "total_steps": 7532, "loss": 0.2300705760717392, "lr": 5.74697513513526e-06, "epoch": 1.3160270880361173, "percentage": 65.8, "elapsed_time": "17:53:22", "remaining_time": "9:17:54"} +{"current_steps": 4957, "total_steps": 7532, "loss": 0.2771111726760864, "lr": 5.743001677430791e-06, "epoch": 1.3162926570176603, "percentage": 65.81, "elapsed_time": "17:53:35", "remaining_time": "9:17:41"} +{"current_steps": 4958, "total_steps": 7532, "loss": 0.2195657342672348, "lr": 5.739029040425391e-06, "epoch": 1.3165582259992032, "percentage": 65.83, "elapsed_time": "17:53:48", "remaining_time": "9:17:28"} +{"current_steps": 4959, "total_steps": 7532, "loss": 0.2877159118652344, "lr": 5.735057224884939e-06, "epoch": 1.3168237949807462, "percentage": 65.84, "elapsed_time": "17:54:00", "remaining_time": "9:17:15"} +{"current_steps": 4960, "total_steps": 7532, "loss": 0.264115571975708, "lr": 5.731086231575154e-06, "epoch": 1.3170893639622892, "percentage": 65.85, "elapsed_time": "17:54:13", "remaining_time": "9:17:02"} +{"current_steps": 4961, "total_steps": 7532, "loss": 0.22574637830257416, "lr": 5.727116061261593e-06, "epoch": 1.317354932943832, "percentage": 65.87, "elapsed_time": "17:54:26", "remaining_time": "9:16:49"} +{"current_steps": 4962, "total_steps": 7532, "loss": 0.26063698530197144, "lr": 5.723146714709664e-06, "epoch": 1.317620501925375, "percentage": 65.88, "elapsed_time": "17:54:38", "remaining_time": "9:16:35"} +{"current_steps": 4963, "total_steps": 7532, "loss": 0.26272428035736084, "lr": 5.719178192684611e-06, "epoch": 1.317886070906918, "percentage": 65.89, "elapsed_time": "17:54:50", "remaining_time": "9:16:22"} +{"current_steps": 4964, "total_steps": 7532, "loss": 0.27188578248023987, "lr": 5.715210495951513e-06, "epoch": 1.318151639888461, "percentage": 65.91, "elapsed_time": "17:55:03", "remaining_time": "9:16:09"} +{"current_steps": 4965, "total_steps": 7532, "loss": 0.26374363899230957, "lr": 5.711243625275296e-06, "epoch": 1.318417208870004, "percentage": 65.92, "elapsed_time": "17:55:15", "remaining_time": "9:15:55"} +{"current_steps": 4966, "total_steps": 7532, "loss": 0.24819093942642212, "lr": 5.7072775814207275e-06, "epoch": 1.3186827778515469, "percentage": 65.93, "elapsed_time": "17:55:28", "remaining_time": "9:15:42"} +{"current_steps": 4967, "total_steps": 7532, "loss": 0.24387019872665405, "lr": 5.703312365152412e-06, "epoch": 1.3189483468330898, "percentage": 65.95, "elapsed_time": "17:55:41", "remaining_time": "9:15:29"} +{"current_steps": 4968, "total_steps": 7532, "loss": 0.2198091745376587, "lr": 5.699347977234799e-06, "epoch": 1.319213915814633, "percentage": 65.96, "elapsed_time": "17:55:53", "remaining_time": "9:15:16"} +{"current_steps": 4969, "total_steps": 7532, "loss": 0.24349649250507355, "lr": 5.695384418432174e-06, "epoch": 1.319479484796176, "percentage": 65.97, "elapsed_time": "17:56:06", "remaining_time": "9:15:03"} +{"current_steps": 4970, "total_steps": 7532, "loss": 0.2330506294965744, "lr": 5.691421689508661e-06, "epoch": 1.319745053777719, "percentage": 65.99, "elapsed_time": "17:56:19", "remaining_time": "9:14:50"} +{"current_steps": 4971, "total_steps": 7532, "loss": 0.22821848094463348, "lr": 5.687459791228234e-06, "epoch": 1.3200106227592618, "percentage": 66.0, "elapsed_time": "17:56:32", "remaining_time": "9:14:37"} +{"current_steps": 4972, "total_steps": 7532, "loss": 0.2342798113822937, "lr": 5.683498724354699e-06, "epoch": 1.3202761917408048, "percentage": 66.01, "elapsed_time": "17:56:44", "remaining_time": "9:14:24"} +{"current_steps": 4973, "total_steps": 7532, "loss": 0.19689922034740448, "lr": 5.679538489651702e-06, "epoch": 1.3205417607223477, "percentage": 66.02, "elapsed_time": "17:56:57", "remaining_time": "9:14:11"} +{"current_steps": 4974, "total_steps": 7532, "loss": 0.23910056054592133, "lr": 5.675579087882727e-06, "epoch": 1.3208073297038907, "percentage": 66.04, "elapsed_time": "17:57:11", "remaining_time": "9:13:58"} +{"current_steps": 4975, "total_steps": 7532, "loss": 0.25725993514060974, "lr": 5.671620519811105e-06, "epoch": 1.3210728986854336, "percentage": 66.05, "elapsed_time": "17:57:23", "remaining_time": "9:13:45"} +{"current_steps": 4976, "total_steps": 7532, "loss": 0.3030434250831604, "lr": 5.667662786199997e-06, "epoch": 1.3213384676669766, "percentage": 66.06, "elapsed_time": "17:57:37", "remaining_time": "9:13:32"} +{"current_steps": 4977, "total_steps": 7532, "loss": 0.223737433552742, "lr": 5.6637058878124075e-06, "epoch": 1.3216040366485196, "percentage": 66.08, "elapsed_time": "17:57:49", "remaining_time": "9:13:19"} +{"current_steps": 4978, "total_steps": 7532, "loss": 0.21480265259742737, "lr": 5.659749825411183e-06, "epoch": 1.3218696056300625, "percentage": 66.09, "elapsed_time": "17:58:03", "remaining_time": "9:13:06"} +{"current_steps": 4979, "total_steps": 7532, "loss": 0.23288744688034058, "lr": 5.655794599759001e-06, "epoch": 1.3221351746116055, "percentage": 66.1, "elapsed_time": "17:58:15", "remaining_time": "9:12:53"} +{"current_steps": 4980, "total_steps": 7532, "loss": 0.23701068758964539, "lr": 5.651840211618387e-06, "epoch": 1.3224007435931484, "percentage": 66.12, "elapsed_time": "17:58:29", "remaining_time": "9:12:40"} +{"current_steps": 4981, "total_steps": 7532, "loss": 0.22164157032966614, "lr": 5.647886661751698e-06, "epoch": 1.3226663125746914, "percentage": 66.13, "elapsed_time": "17:58:41", "remaining_time": "9:12:27"} +{"current_steps": 4982, "total_steps": 7532, "loss": 0.23426607251167297, "lr": 5.643933950921132e-06, "epoch": 1.3229318815562343, "percentage": 66.14, "elapsed_time": "17:58:54", "remaining_time": "9:12:14"} +{"current_steps": 4983, "total_steps": 7532, "loss": 0.2567834258079529, "lr": 5.6399820798887266e-06, "epoch": 1.3231974505377773, "percentage": 66.16, "elapsed_time": "17:59:07", "remaining_time": "9:12:00"} +{"current_steps": 4984, "total_steps": 7532, "loss": 0.2713038921356201, "lr": 5.6360310494163525e-06, "epoch": 1.3234630195193202, "percentage": 66.17, "elapsed_time": "17:59:20", "remaining_time": "9:11:47"} +{"current_steps": 4985, "total_steps": 7532, "loss": 0.2548249661922455, "lr": 5.632080860265725e-06, "epoch": 1.3237285885008632, "percentage": 66.18, "elapsed_time": "17:59:33", "remaining_time": "9:11:34"} +{"current_steps": 4986, "total_steps": 7532, "loss": 0.2442832589149475, "lr": 5.628131513198392e-06, "epoch": 1.3239941574824061, "percentage": 66.2, "elapsed_time": "17:59:46", "remaining_time": "9:11:21"} +{"current_steps": 4987, "total_steps": 7532, "loss": 0.24654853343963623, "lr": 5.6241830089757435e-06, "epoch": 1.324259726463949, "percentage": 66.21, "elapsed_time": "17:59:59", "remaining_time": "9:11:08"} +{"current_steps": 4988, "total_steps": 7532, "loss": 0.2802797853946686, "lr": 5.620235348358997e-06, "epoch": 1.324525295445492, "percentage": 66.22, "elapsed_time": "18:00:12", "remaining_time": "9:10:55"} +{"current_steps": 4989, "total_steps": 7532, "loss": 0.18801404535770416, "lr": 5.616288532109225e-06, "epoch": 1.324790864427035, "percentage": 66.24, "elapsed_time": "18:00:25", "remaining_time": "9:10:43"} +{"current_steps": 4990, "total_steps": 7532, "loss": 0.2685382068157196, "lr": 5.6123425609873235e-06, "epoch": 1.325056433408578, "percentage": 66.25, "elapsed_time": "18:00:38", "remaining_time": "9:10:29"} +{"current_steps": 4991, "total_steps": 7532, "loss": 0.23479774594306946, "lr": 5.608397435754029e-06, "epoch": 1.3253220023901209, "percentage": 66.26, "elapsed_time": "18:00:51", "remaining_time": "9:10:17"} +{"current_steps": 4992, "total_steps": 7532, "loss": 0.24198031425476074, "lr": 5.604453157169914e-06, "epoch": 1.3255875713716638, "percentage": 66.28, "elapsed_time": "18:01:04", "remaining_time": "9:10:03"} +{"current_steps": 4993, "total_steps": 7532, "loss": 0.25523462891578674, "lr": 5.60050972599539e-06, "epoch": 1.3258531403532068, "percentage": 66.29, "elapsed_time": "18:01:17", "remaining_time": "9:09:50"} +{"current_steps": 4994, "total_steps": 7532, "loss": 0.23196743428707123, "lr": 5.596567142990703e-06, "epoch": 1.3261187093347497, "percentage": 66.3, "elapsed_time": "18:01:30", "remaining_time": "9:09:37"} +{"current_steps": 4995, "total_steps": 7532, "loss": 0.29365748167037964, "lr": 5.592625408915939e-06, "epoch": 1.3263842783162927, "percentage": 66.32, "elapsed_time": "18:01:42", "remaining_time": "9:09:24"} +{"current_steps": 4996, "total_steps": 7532, "loss": 0.24509185552597046, "lr": 5.588684524531014e-06, "epoch": 1.3266498472978356, "percentage": 66.33, "elapsed_time": "18:01:55", "remaining_time": "9:09:11"} +{"current_steps": 4997, "total_steps": 7532, "loss": 0.27032390236854553, "lr": 5.584744490595687e-06, "epoch": 1.3269154162793786, "percentage": 66.34, "elapsed_time": "18:02:08", "remaining_time": "9:08:58"} +{"current_steps": 4998, "total_steps": 7532, "loss": 0.24401508271694183, "lr": 5.580805307869549e-06, "epoch": 1.3271809852609215, "percentage": 66.36, "elapsed_time": "18:02:21", "remaining_time": "9:08:45"} +{"current_steps": 4999, "total_steps": 7532, "loss": 0.2216658741235733, "lr": 5.576866977112028e-06, "epoch": 1.3274465542424645, "percentage": 66.37, "elapsed_time": "18:02:33", "remaining_time": "9:08:32"} +{"current_steps": 5000, "total_steps": 7532, "loss": 0.24545373022556305, "lr": 5.5729294990823875e-06, "epoch": 1.3277121232240074, "percentage": 66.38, "elapsed_time": "18:02:46", "remaining_time": "9:08:18"} +{"current_steps": 5001, "total_steps": 7532, "loss": 0.260816752910614, "lr": 5.568992874539728e-06, "epoch": 1.3279776922055504, "percentage": 66.4, "elapsed_time": "18:03:04", "remaining_time": "9:08:08"} +{"current_steps": 5002, "total_steps": 7532, "loss": 0.1850551962852478, "lr": 5.565057104242984e-06, "epoch": 1.3282432611870933, "percentage": 66.41, "elapsed_time": "18:03:17", "remaining_time": "9:07:55"} +{"current_steps": 5003, "total_steps": 7532, "loss": 0.26854407787323, "lr": 5.561122188950923e-06, "epoch": 1.3285088301686363, "percentage": 66.42, "elapsed_time": "18:03:30", "remaining_time": "9:07:42"} +{"current_steps": 5004, "total_steps": 7532, "loss": 0.24294906854629517, "lr": 5.557188129422153e-06, "epoch": 1.3287743991501793, "percentage": 66.44, "elapsed_time": "18:03:43", "remaining_time": "9:07:29"} +{"current_steps": 5005, "total_steps": 7532, "loss": 0.2533603310585022, "lr": 5.553254926415114e-06, "epoch": 1.3290399681317222, "percentage": 66.45, "elapsed_time": "18:03:56", "remaining_time": "9:07:16"} +{"current_steps": 5006, "total_steps": 7532, "loss": 0.2082313448190689, "lr": 5.549322580688077e-06, "epoch": 1.3293055371132652, "percentage": 66.46, "elapsed_time": "18:04:09", "remaining_time": "9:07:03"} +{"current_steps": 5007, "total_steps": 7532, "loss": 0.24265842139720917, "lr": 5.545391092999158e-06, "epoch": 1.329571106094808, "percentage": 66.48, "elapsed_time": "18:04:22", "remaining_time": "9:06:50"} +{"current_steps": 5008, "total_steps": 7532, "loss": 0.2483578324317932, "lr": 5.541460464106301e-06, "epoch": 1.329836675076351, "percentage": 66.49, "elapsed_time": "18:04:35", "remaining_time": "9:06:37"} +{"current_steps": 5009, "total_steps": 7532, "loss": 0.2769540548324585, "lr": 5.537530694767281e-06, "epoch": 1.330102244057894, "percentage": 66.5, "elapsed_time": "18:04:48", "remaining_time": "9:06:24"} +{"current_steps": 5010, "total_steps": 7532, "loss": 0.2132025957107544, "lr": 5.533601785739714e-06, "epoch": 1.330367813039437, "percentage": 66.52, "elapsed_time": "18:05:01", "remaining_time": "9:06:11"} +{"current_steps": 5011, "total_steps": 7532, "loss": 0.25223806500434875, "lr": 5.529673737781047e-06, "epoch": 1.33063338202098, "percentage": 66.53, "elapsed_time": "18:05:14", "remaining_time": "9:05:58"} +{"current_steps": 5012, "total_steps": 7532, "loss": 0.22631296515464783, "lr": 5.52574655164856e-06, "epoch": 1.3308989510025229, "percentage": 66.54, "elapsed_time": "18:05:27", "remaining_time": "9:05:45"} +{"current_steps": 5013, "total_steps": 7532, "loss": 0.23756693303585052, "lr": 5.5218202280993725e-06, "epoch": 1.3311645199840658, "percentage": 66.56, "elapsed_time": "18:05:40", "remaining_time": "9:05:32"} +{"current_steps": 5014, "total_steps": 7532, "loss": 0.24746376276016235, "lr": 5.517894767890427e-06, "epoch": 1.3314300889656088, "percentage": 66.57, "elapsed_time": "18:05:53", "remaining_time": "9:05:19"} +{"current_steps": 5015, "total_steps": 7532, "loss": 0.21463070809841156, "lr": 5.513970171778504e-06, "epoch": 1.3316956579471517, "percentage": 66.58, "elapsed_time": "18:06:06", "remaining_time": "9:05:06"} +{"current_steps": 5016, "total_steps": 7532, "loss": 0.21256107091903687, "lr": 5.510046440520228e-06, "epoch": 1.3319612269286947, "percentage": 66.6, "elapsed_time": "18:06:19", "remaining_time": "9:04:53"} +{"current_steps": 5017, "total_steps": 7532, "loss": 0.25800254940986633, "lr": 5.506123574872044e-06, "epoch": 1.3322267959102376, "percentage": 66.61, "elapsed_time": "18:06:32", "remaining_time": "9:04:40"} +{"current_steps": 5018, "total_steps": 7532, "loss": 0.2421891689300537, "lr": 5.502201575590236e-06, "epoch": 1.3324923648917806, "percentage": 66.62, "elapsed_time": "18:06:45", "remaining_time": "9:04:27"} +{"current_steps": 5019, "total_steps": 7532, "loss": 0.24375903606414795, "lr": 5.498280443430917e-06, "epoch": 1.3327579338733235, "percentage": 66.64, "elapsed_time": "18:06:58", "remaining_time": "9:04:14"} +{"current_steps": 5020, "total_steps": 7532, "loss": 0.22173303365707397, "lr": 5.494360179150033e-06, "epoch": 1.3330235028548665, "percentage": 66.65, "elapsed_time": "18:07:11", "remaining_time": "9:04:01"} +{"current_steps": 5021, "total_steps": 7532, "loss": 0.24005022644996643, "lr": 5.49044078350337e-06, "epoch": 1.3332890718364094, "percentage": 66.66, "elapsed_time": "18:07:23", "remaining_time": "9:03:48"} +{"current_steps": 5022, "total_steps": 7532, "loss": 0.2600201964378357, "lr": 5.486522257246538e-06, "epoch": 1.3335546408179524, "percentage": 66.68, "elapsed_time": "18:07:36", "remaining_time": "9:03:35"} +{"current_steps": 5023, "total_steps": 7532, "loss": 0.22889836132526398, "lr": 5.482604601134984e-06, "epoch": 1.3338202097994953, "percentage": 66.69, "elapsed_time": "18:07:49", "remaining_time": "9:03:22"} +{"current_steps": 5024, "total_steps": 7532, "loss": 0.25045812129974365, "lr": 5.478687815923981e-06, "epoch": 1.3340857787810383, "percentage": 66.7, "elapsed_time": "18:08:02", "remaining_time": "9:03:09"} +{"current_steps": 5025, "total_steps": 7532, "loss": 0.24649837613105774, "lr": 5.474771902368646e-06, "epoch": 1.3343513477625812, "percentage": 66.72, "elapsed_time": "18:08:14", "remaining_time": "9:02:55"} +{"current_steps": 5026, "total_steps": 7532, "loss": 0.23994389176368713, "lr": 5.470856861223919e-06, "epoch": 1.3346169167441242, "percentage": 66.73, "elapsed_time": "18:08:28", "remaining_time": "9:02:43"} +{"current_steps": 5027, "total_steps": 7532, "loss": 0.24381600320339203, "lr": 5.466942693244572e-06, "epoch": 1.3348824857256671, "percentage": 66.74, "elapsed_time": "18:08:40", "remaining_time": "9:02:29"} +{"current_steps": 5028, "total_steps": 7532, "loss": 0.22110486030578613, "lr": 5.463029399185217e-06, "epoch": 1.33514805470721, "percentage": 66.76, "elapsed_time": "18:08:53", "remaining_time": "9:02:16"} +{"current_steps": 5029, "total_steps": 7532, "loss": 0.25733259320259094, "lr": 5.459116979800281e-06, "epoch": 1.335413623688753, "percentage": 66.77, "elapsed_time": "18:09:06", "remaining_time": "9:02:03"} +{"current_steps": 5030, "total_steps": 7532, "loss": 0.22853803634643555, "lr": 5.4552054358440355e-06, "epoch": 1.335679192670296, "percentage": 66.78, "elapsed_time": "18:09:19", "remaining_time": "9:01:50"} +{"current_steps": 5031, "total_steps": 7532, "loss": 0.27503639459609985, "lr": 5.451294768070581e-06, "epoch": 1.335944761651839, "percentage": 66.8, "elapsed_time": "18:09:31", "remaining_time": "9:01:37"} +{"current_steps": 5032, "total_steps": 7532, "loss": 0.27931997179985046, "lr": 5.447384977233849e-06, "epoch": 1.336210330633382, "percentage": 66.81, "elapsed_time": "18:09:44", "remaining_time": "9:01:24"} +{"current_steps": 5033, "total_steps": 7532, "loss": 0.2477954626083374, "lr": 5.443476064087596e-06, "epoch": 1.3364758996149249, "percentage": 66.82, "elapsed_time": "18:09:57", "remaining_time": "9:01:11"} +{"current_steps": 5034, "total_steps": 7532, "loss": 0.2195623219013214, "lr": 5.439568029385422e-06, "epoch": 1.3367414685964678, "percentage": 66.83, "elapsed_time": "18:10:10", "remaining_time": "9:00:58"} +{"current_steps": 5035, "total_steps": 7532, "loss": 0.22160238027572632, "lr": 5.435660873880747e-06, "epoch": 1.3370070375780108, "percentage": 66.85, "elapsed_time": "18:10:23", "remaining_time": "9:00:45"} +{"current_steps": 5036, "total_steps": 7532, "loss": 0.24107405543327332, "lr": 5.4317545983268235e-06, "epoch": 1.3372726065595537, "percentage": 66.86, "elapsed_time": "18:10:36", "remaining_time": "9:00:32"} +{"current_steps": 5037, "total_steps": 7532, "loss": 0.2480086386203766, "lr": 5.427849203476738e-06, "epoch": 1.3375381755410967, "percentage": 66.87, "elapsed_time": "18:10:49", "remaining_time": "9:00:19"} +{"current_steps": 5038, "total_steps": 7532, "loss": 0.22476691007614136, "lr": 5.4239446900834005e-06, "epoch": 1.3378037445226398, "percentage": 66.89, "elapsed_time": "18:11:02", "remaining_time": "9:00:06"} +{"current_steps": 5039, "total_steps": 7532, "loss": 0.23685473203659058, "lr": 5.420041058899559e-06, "epoch": 1.3380693135041828, "percentage": 66.9, "elapsed_time": "18:11:15", "remaining_time": "8:59:53"} +{"current_steps": 5040, "total_steps": 7532, "loss": 0.27753746509552, "lr": 5.416138310677784e-06, "epoch": 1.3383348824857257, "percentage": 66.91, "elapsed_time": "18:11:27", "remaining_time": "8:59:40"} +{"current_steps": 5041, "total_steps": 7532, "loss": 0.22446027398109436, "lr": 5.412236446170482e-06, "epoch": 1.3386004514672687, "percentage": 66.93, "elapsed_time": "18:11:41", "remaining_time": "8:59:27"} +{"current_steps": 5042, "total_steps": 7532, "loss": 0.2535285949707031, "lr": 5.4083354661298816e-06, "epoch": 1.3388660204488116, "percentage": 66.94, "elapsed_time": "18:11:53", "remaining_time": "8:59:13"} +{"current_steps": 5043, "total_steps": 7532, "loss": 0.2412964254617691, "lr": 5.4044353713080565e-06, "epoch": 1.3391315894303546, "percentage": 66.95, "elapsed_time": "18:12:06", "remaining_time": "8:59:01"} +{"current_steps": 5044, "total_steps": 7532, "loss": 0.23863038420677185, "lr": 5.4005361624568895e-06, "epoch": 1.3393971584118975, "percentage": 66.97, "elapsed_time": "18:12:19", "remaining_time": "8:58:47"} +{"current_steps": 5045, "total_steps": 7532, "loss": 0.22741727530956268, "lr": 5.396637840328105e-06, "epoch": 1.3396627273934405, "percentage": 66.98, "elapsed_time": "18:12:32", "remaining_time": "8:58:34"} +{"current_steps": 5046, "total_steps": 7532, "loss": 0.2497379630804062, "lr": 5.392740405673251e-06, "epoch": 1.3399282963749835, "percentage": 66.99, "elapsed_time": "18:12:45", "remaining_time": "8:58:21"} +{"current_steps": 5047, "total_steps": 7532, "loss": 0.19558298587799072, "lr": 5.388843859243712e-06, "epoch": 1.3401938653565264, "percentage": 67.01, "elapsed_time": "18:12:58", "remaining_time": "8:58:08"} +{"current_steps": 5048, "total_steps": 7532, "loss": 0.2266748994588852, "lr": 5.3849482017906914e-06, "epoch": 1.3404594343380694, "percentage": 67.02, "elapsed_time": "18:13:10", "remaining_time": "8:57:55"} +{"current_steps": 5049, "total_steps": 7532, "loss": 0.2410028576850891, "lr": 5.381053434065229e-06, "epoch": 1.3407250033196123, "percentage": 67.03, "elapsed_time": "18:13:23", "remaining_time": "8:57:42"} +{"current_steps": 5050, "total_steps": 7532, "loss": 0.23965512216091156, "lr": 5.37715955681819e-06, "epoch": 1.3409905723011553, "percentage": 67.05, "elapsed_time": "18:13:36", "remaining_time": "8:57:29"} +{"current_steps": 5051, "total_steps": 7532, "loss": 0.22440138459205627, "lr": 5.373266570800262e-06, "epoch": 1.3412561412826982, "percentage": 67.06, "elapsed_time": "18:13:48", "remaining_time": "8:57:16"} +{"current_steps": 5052, "total_steps": 7532, "loss": 0.2509710192680359, "lr": 5.369374476761975e-06, "epoch": 1.3415217102642412, "percentage": 67.07, "elapsed_time": "18:14:02", "remaining_time": "8:57:03"} +{"current_steps": 5053, "total_steps": 7532, "loss": 0.26555800437927246, "lr": 5.365483275453677e-06, "epoch": 1.3417872792457841, "percentage": 67.09, "elapsed_time": "18:14:14", "remaining_time": "8:56:50"} +{"current_steps": 5054, "total_steps": 7532, "loss": 0.23089733719825745, "lr": 5.361592967625544e-06, "epoch": 1.342052848227327, "percentage": 67.1, "elapsed_time": "18:14:27", "remaining_time": "8:56:37"} +{"current_steps": 5055, "total_steps": 7532, "loss": 0.2040700763463974, "lr": 5.357703554027582e-06, "epoch": 1.34231841720887, "percentage": 67.11, "elapsed_time": "18:14:40", "remaining_time": "8:56:24"} +{"current_steps": 5056, "total_steps": 7532, "loss": 0.23539039492607117, "lr": 5.353815035409624e-06, "epoch": 1.342583986190413, "percentage": 67.13, "elapsed_time": "18:14:53", "remaining_time": "8:56:11"} +{"current_steps": 5057, "total_steps": 7532, "loss": 0.2190464437007904, "lr": 5.3499274125213294e-06, "epoch": 1.342849555171956, "percentage": 67.14, "elapsed_time": "18:15:06", "remaining_time": "8:55:57"} +{"current_steps": 5058, "total_steps": 7532, "loss": 0.21557429432868958, "lr": 5.346040686112189e-06, "epoch": 1.3431151241534989, "percentage": 67.15, "elapsed_time": "18:15:19", "remaining_time": "8:55:45"} +{"current_steps": 5059, "total_steps": 7532, "loss": 0.24398267269134521, "lr": 5.342154856931515e-06, "epoch": 1.3433806931350418, "percentage": 67.17, "elapsed_time": "18:15:32", "remaining_time": "8:55:32"} +{"current_steps": 5060, "total_steps": 7532, "loss": 0.21652038395404816, "lr": 5.338269925728451e-06, "epoch": 1.3436462621165848, "percentage": 67.18, "elapsed_time": "18:15:45", "remaining_time": "8:55:19"} +{"current_steps": 5061, "total_steps": 7532, "loss": 0.2031325101852417, "lr": 5.334385893251966e-06, "epoch": 1.3439118310981277, "percentage": 67.19, "elapsed_time": "18:15:58", "remaining_time": "8:55:06"} +{"current_steps": 5062, "total_steps": 7532, "loss": 0.2484835982322693, "lr": 5.330502760250853e-06, "epoch": 1.3441774000796707, "percentage": 67.21, "elapsed_time": "18:16:11", "remaining_time": "8:54:53"} +{"current_steps": 5063, "total_steps": 7532, "loss": 0.23698699474334717, "lr": 5.326620527473737e-06, "epoch": 1.3444429690612136, "percentage": 67.22, "elapsed_time": "18:16:24", "remaining_time": "8:54:40"} +{"current_steps": 5064, "total_steps": 7532, "loss": 0.23928484320640564, "lr": 5.322739195669065e-06, "epoch": 1.3447085380427566, "percentage": 67.23, "elapsed_time": "18:16:37", "remaining_time": "8:54:27"} +{"current_steps": 5065, "total_steps": 7532, "loss": 0.22679512202739716, "lr": 5.318858765585115e-06, "epoch": 1.3449741070242995, "percentage": 67.25, "elapsed_time": "18:16:50", "remaining_time": "8:54:14"} +{"current_steps": 5066, "total_steps": 7532, "loss": 0.2115025818347931, "lr": 5.314979237969984e-06, "epoch": 1.3452396760058425, "percentage": 67.26, "elapsed_time": "18:17:02", "remaining_time": "8:54:00"} +{"current_steps": 5067, "total_steps": 7532, "loss": 0.2441834807395935, "lr": 5.311100613571603e-06, "epoch": 1.3455052449873854, "percentage": 67.27, "elapsed_time": "18:17:16", "remaining_time": "8:53:48"} +{"current_steps": 5068, "total_steps": 7532, "loss": 0.2549205720424652, "lr": 5.307222893137722e-06, "epoch": 1.3457708139689284, "percentage": 67.29, "elapsed_time": "18:17:28", "remaining_time": "8:53:34"} +{"current_steps": 5069, "total_steps": 7532, "loss": 0.24652990698814392, "lr": 5.3033460774159185e-06, "epoch": 1.3460363829504713, "percentage": 67.3, "elapsed_time": "18:17:41", "remaining_time": "8:53:21"} +{"current_steps": 5070, "total_steps": 7532, "loss": 0.2403775006532669, "lr": 5.299470167153602e-06, "epoch": 1.3463019519320143, "percentage": 67.31, "elapsed_time": "18:17:54", "remaining_time": "8:53:08"} +{"current_steps": 5071, "total_steps": 7532, "loss": 0.2215663194656372, "lr": 5.295595163097999e-06, "epoch": 1.3465675209135572, "percentage": 67.33, "elapsed_time": "18:18:08", "remaining_time": "8:52:56"} +{"current_steps": 5072, "total_steps": 7532, "loss": 0.2567424774169922, "lr": 5.291721065996167e-06, "epoch": 1.3468330898951002, "percentage": 67.34, "elapsed_time": "18:18:20", "remaining_time": "8:52:42"} +{"current_steps": 5073, "total_steps": 7532, "loss": 0.21162359416484833, "lr": 5.287847876594984e-06, "epoch": 1.3470986588766432, "percentage": 67.35, "elapsed_time": "18:18:33", "remaining_time": "8:52:29"} +{"current_steps": 5074, "total_steps": 7532, "loss": 0.21851085126399994, "lr": 5.283975595641155e-06, "epoch": 1.347364227858186, "percentage": 67.37, "elapsed_time": "18:18:46", "remaining_time": "8:52:16"} +{"current_steps": 5075, "total_steps": 7532, "loss": 0.2491171509027481, "lr": 5.280104223881212e-06, "epoch": 1.347629796839729, "percentage": 67.38, "elapsed_time": "18:18:59", "remaining_time": "8:52:03"} +{"current_steps": 5076, "total_steps": 7532, "loss": 0.22467780113220215, "lr": 5.276233762061507e-06, "epoch": 1.347895365821272, "percentage": 67.39, "elapsed_time": "18:19:12", "remaining_time": "8:51:50"} +{"current_steps": 5077, "total_steps": 7532, "loss": 0.24531611800193787, "lr": 5.272364210928223e-06, "epoch": 1.348160934802815, "percentage": 67.41, "elapsed_time": "18:19:25", "remaining_time": "8:51:37"} +{"current_steps": 5078, "total_steps": 7532, "loss": 0.2582520544528961, "lr": 5.268495571227361e-06, "epoch": 1.348426503784358, "percentage": 67.42, "elapsed_time": "18:19:37", "remaining_time": "8:51:24"} +{"current_steps": 5079, "total_steps": 7532, "loss": 0.21180811524391174, "lr": 5.264627843704749e-06, "epoch": 1.3486920727659009, "percentage": 67.43, "elapsed_time": "18:19:50", "remaining_time": "8:51:11"} +{"current_steps": 5080, "total_steps": 7532, "loss": 0.27026671171188354, "lr": 5.2607610291060406e-06, "epoch": 1.348957641747444, "percentage": 67.45, "elapsed_time": "18:20:03", "remaining_time": "8:50:58"} +{"current_steps": 5081, "total_steps": 7532, "loss": 0.22954419255256653, "lr": 5.256895128176712e-06, "epoch": 1.349223210728987, "percentage": 67.46, "elapsed_time": "18:20:16", "remaining_time": "8:50:45"} +{"current_steps": 5082, "total_steps": 7532, "loss": 0.24064484238624573, "lr": 5.253030141662063e-06, "epoch": 1.34948877971053, "percentage": 67.47, "elapsed_time": "18:20:29", "remaining_time": "8:50:32"} +{"current_steps": 5083, "total_steps": 7532, "loss": 0.1981196105480194, "lr": 5.249166070307218e-06, "epoch": 1.349754348692073, "percentage": 67.49, "elapsed_time": "18:20:41", "remaining_time": "8:50:19"} +{"current_steps": 5084, "total_steps": 7532, "loss": 0.19882233440876007, "lr": 5.2453029148571226e-06, "epoch": 1.3500199176736158, "percentage": 67.5, "elapsed_time": "18:20:55", "remaining_time": "8:50:06"} +{"current_steps": 5085, "total_steps": 7532, "loss": 0.2409907579421997, "lr": 5.24144067605655e-06, "epoch": 1.3502854866551588, "percentage": 67.51, "elapsed_time": "18:21:07", "remaining_time": "8:49:53"} +{"current_steps": 5086, "total_steps": 7532, "loss": 0.2205093652009964, "lr": 5.237579354650092e-06, "epoch": 1.3505510556367017, "percentage": 67.53, "elapsed_time": "18:21:21", "remaining_time": "8:49:40"} +{"current_steps": 5087, "total_steps": 7532, "loss": 0.2283058911561966, "lr": 5.233718951382163e-06, "epoch": 1.3508166246182447, "percentage": 67.54, "elapsed_time": "18:21:33", "remaining_time": "8:49:27"} +{"current_steps": 5088, "total_steps": 7532, "loss": 0.25584474205970764, "lr": 5.229859466997012e-06, "epoch": 1.3510821935997877, "percentage": 67.55, "elapsed_time": "18:21:46", "remaining_time": "8:49:14"} +{"current_steps": 5089, "total_steps": 7532, "loss": 0.22516845166683197, "lr": 5.226000902238696e-06, "epoch": 1.3513477625813306, "percentage": 67.57, "elapsed_time": "18:21:59", "remaining_time": "8:49:00"} +{"current_steps": 5090, "total_steps": 7532, "loss": 0.23440764844417572, "lr": 5.222143257851102e-06, "epoch": 1.3516133315628736, "percentage": 67.58, "elapsed_time": "18:22:12", "remaining_time": "8:48:47"} +{"current_steps": 5091, "total_steps": 7532, "loss": 0.25858962535858154, "lr": 5.218286534577938e-06, "epoch": 1.3518789005444165, "percentage": 67.59, "elapsed_time": "18:22:24", "remaining_time": "8:48:34"} +{"current_steps": 5092, "total_steps": 7532, "loss": 0.20676326751708984, "lr": 5.214430733162736e-06, "epoch": 1.3521444695259595, "percentage": 67.6, "elapsed_time": "18:22:37", "remaining_time": "8:48:21"} +{"current_steps": 5093, "total_steps": 7532, "loss": 0.21892425417900085, "lr": 5.210575854348853e-06, "epoch": 1.3524100385075024, "percentage": 67.62, "elapsed_time": "18:22:50", "remaining_time": "8:48:08"} +{"current_steps": 5094, "total_steps": 7532, "loss": 0.2538335919380188, "lr": 5.206721898879454e-06, "epoch": 1.3526756074890454, "percentage": 67.63, "elapsed_time": "18:23:03", "remaining_time": "8:47:55"} +{"current_steps": 5095, "total_steps": 7532, "loss": 0.24750448763370514, "lr": 5.202868867497542e-06, "epoch": 1.3529411764705883, "percentage": 67.64, "elapsed_time": "18:23:16", "remaining_time": "8:47:42"} +{"current_steps": 5096, "total_steps": 7532, "loss": 0.2569364011287689, "lr": 5.199016760945931e-06, "epoch": 1.3532067454521313, "percentage": 67.66, "elapsed_time": "18:23:28", "remaining_time": "8:47:29"} +{"current_steps": 5097, "total_steps": 7532, "loss": 0.16788914799690247, "lr": 5.19516557996727e-06, "epoch": 1.3534723144336742, "percentage": 67.67, "elapsed_time": "18:23:42", "remaining_time": "8:47:16"} +{"current_steps": 5098, "total_steps": 7532, "loss": 0.19006651639938354, "lr": 5.191315325304018e-06, "epoch": 1.3537378834152172, "percentage": 67.68, "elapsed_time": "18:23:54", "remaining_time": "8:47:03"} +{"current_steps": 5099, "total_steps": 7532, "loss": 0.23474551737308502, "lr": 5.1874659976984575e-06, "epoch": 1.3540034523967601, "percentage": 67.7, "elapsed_time": "18:24:08", "remaining_time": "8:46:50"} +{"current_steps": 5100, "total_steps": 7532, "loss": 0.26601099967956543, "lr": 5.183617597892694e-06, "epoch": 1.354269021378303, "percentage": 67.71, "elapsed_time": "18:24:20", "remaining_time": "8:46:37"} +{"current_steps": 5101, "total_steps": 7532, "loss": 0.24207550287246704, "lr": 5.179770126628654e-06, "epoch": 1.354534590359846, "percentage": 67.72, "elapsed_time": "18:24:40", "remaining_time": "8:46:27"} +{"current_steps": 5102, "total_steps": 7532, "loss": 0.2538307309150696, "lr": 5.175923584648083e-06, "epoch": 1.354800159341389, "percentage": 67.74, "elapsed_time": "18:24:52", "remaining_time": "8:46:14"} +{"current_steps": 5103, "total_steps": 7532, "loss": 0.23073242604732513, "lr": 5.172077972692553e-06, "epoch": 1.355065728322932, "percentage": 67.75, "elapsed_time": "18:25:06", "remaining_time": "8:46:01"} +{"current_steps": 5104, "total_steps": 7532, "loss": 0.2634595036506653, "lr": 5.168233291503448e-06, "epoch": 1.3553312973044749, "percentage": 67.76, "elapsed_time": "18:25:18", "remaining_time": "8:45:48"} +{"current_steps": 5105, "total_steps": 7532, "loss": 0.23282350599765778, "lr": 5.1643895418219744e-06, "epoch": 1.3555968662860178, "percentage": 67.78, "elapsed_time": "18:25:32", "remaining_time": "8:45:35"} +{"current_steps": 5106, "total_steps": 7532, "loss": 0.2543700933456421, "lr": 5.160546724389172e-06, "epoch": 1.3558624352675608, "percentage": 67.79, "elapsed_time": "18:25:44", "remaining_time": "8:45:22"} +{"current_steps": 5107, "total_steps": 7532, "loss": 0.2005772739648819, "lr": 5.1567048399458855e-06, "epoch": 1.3561280042491037, "percentage": 67.8, "elapsed_time": "18:25:57", "remaining_time": "8:45:09"} +{"current_steps": 5108, "total_steps": 7532, "loss": 0.2367073893547058, "lr": 5.152863889232787e-06, "epoch": 1.3563935732306467, "percentage": 67.82, "elapsed_time": "18:26:10", "remaining_time": "8:44:55"} +{"current_steps": 5109, "total_steps": 7532, "loss": 0.25600770115852356, "lr": 5.14902387299036e-06, "epoch": 1.3566591422121896, "percentage": 67.83, "elapsed_time": "18:26:22", "remaining_time": "8:44:42"} +{"current_steps": 5110, "total_steps": 7532, "loss": 0.21678754687309265, "lr": 5.145184791958918e-06, "epoch": 1.3569247111937326, "percentage": 67.84, "elapsed_time": "18:26:36", "remaining_time": "8:44:29"} +{"current_steps": 5111, "total_steps": 7532, "loss": 0.265438973903656, "lr": 5.141346646878591e-06, "epoch": 1.3571902801752755, "percentage": 67.86, "elapsed_time": "18:26:48", "remaining_time": "8:44:16"} +{"current_steps": 5112, "total_steps": 7532, "loss": 0.24246999621391296, "lr": 5.13750943848933e-06, "epoch": 1.3574558491568185, "percentage": 67.87, "elapsed_time": "18:27:02", "remaining_time": "8:44:03"} +{"current_steps": 5113, "total_steps": 7532, "loss": 0.25401771068573, "lr": 5.133673167530899e-06, "epoch": 1.3577214181383614, "percentage": 67.88, "elapsed_time": "18:27:14", "remaining_time": "8:43:50"} +{"current_steps": 5114, "total_steps": 7532, "loss": 0.2698017656803131, "lr": 5.129837834742885e-06, "epoch": 1.3579869871199044, "percentage": 67.9, "elapsed_time": "18:27:28", "remaining_time": "8:43:38"} +{"current_steps": 5115, "total_steps": 7532, "loss": 0.27006995677948, "lr": 5.126003440864703e-06, "epoch": 1.3582525561014474, "percentage": 67.91, "elapsed_time": "18:27:40", "remaining_time": "8:43:24"} +{"current_steps": 5116, "total_steps": 7532, "loss": 0.2370866984128952, "lr": 5.122169986635575e-06, "epoch": 1.3585181250829903, "percentage": 67.92, "elapsed_time": "18:27:54", "remaining_time": "8:43:12"} +{"current_steps": 5117, "total_steps": 7532, "loss": 0.24017807841300964, "lr": 5.1183374727945425e-06, "epoch": 1.3587836940645333, "percentage": 67.94, "elapsed_time": "18:28:07", "remaining_time": "8:42:58"} +{"current_steps": 5118, "total_steps": 7532, "loss": 0.21664533019065857, "lr": 5.114505900080473e-06, "epoch": 1.3590492630460762, "percentage": 67.95, "elapsed_time": "18:28:20", "remaining_time": "8:42:46"} +{"current_steps": 5119, "total_steps": 7532, "loss": 0.24561598896980286, "lr": 5.110675269232046e-06, "epoch": 1.3593148320276192, "percentage": 67.96, "elapsed_time": "18:28:33", "remaining_time": "8:42:33"} +{"current_steps": 5120, "total_steps": 7532, "loss": 0.26678937673568726, "lr": 5.106845580987763e-06, "epoch": 1.359580401009162, "percentage": 67.98, "elapsed_time": "18:28:46", "remaining_time": "8:42:20"} +{"current_steps": 5121, "total_steps": 7532, "loss": 0.21919070184230804, "lr": 5.103016836085943e-06, "epoch": 1.359845969990705, "percentage": 67.99, "elapsed_time": "18:28:59", "remaining_time": "8:42:07"} +{"current_steps": 5122, "total_steps": 7532, "loss": 0.24887943267822266, "lr": 5.099189035264722e-06, "epoch": 1.360111538972248, "percentage": 68.0, "elapsed_time": "18:29:12", "remaining_time": "8:41:54"} +{"current_steps": 5123, "total_steps": 7532, "loss": 0.23597784340381622, "lr": 5.0953621792620556e-06, "epoch": 1.360377107953791, "percentage": 68.02, "elapsed_time": "18:29:25", "remaining_time": "8:41:41"} +{"current_steps": 5124, "total_steps": 7532, "loss": 0.21265193819999695, "lr": 5.091536268815717e-06, "epoch": 1.360642676935334, "percentage": 68.03, "elapsed_time": "18:29:38", "remaining_time": "8:41:28"} +{"current_steps": 5125, "total_steps": 7532, "loss": 0.29837465286254883, "lr": 5.0877113046632945e-06, "epoch": 1.3609082459168769, "percentage": 68.04, "elapsed_time": "18:29:51", "remaining_time": "8:41:15"} +{"current_steps": 5126, "total_steps": 7532, "loss": 0.2324269413948059, "lr": 5.0838872875421975e-06, "epoch": 1.3611738148984198, "percentage": 68.06, "elapsed_time": "18:30:04", "remaining_time": "8:41:02"} +{"current_steps": 5127, "total_steps": 7532, "loss": 0.19149541854858398, "lr": 5.080064218189652e-06, "epoch": 1.3614393838799628, "percentage": 68.07, "elapsed_time": "18:30:17", "remaining_time": "8:40:49"} +{"current_steps": 5128, "total_steps": 7532, "loss": 0.247644305229187, "lr": 5.0762420973427e-06, "epoch": 1.3617049528615057, "percentage": 68.08, "elapsed_time": "18:30:30", "remaining_time": "8:40:36"} +{"current_steps": 5129, "total_steps": 7532, "loss": 0.2272202968597412, "lr": 5.0724209257382006e-06, "epoch": 1.3619705218430487, "percentage": 68.1, "elapsed_time": "18:30:44", "remaining_time": "8:40:23"} +{"current_steps": 5130, "total_steps": 7532, "loss": 0.25735989212989807, "lr": 5.068600704112832e-06, "epoch": 1.3622360908245916, "percentage": 68.11, "elapsed_time": "18:30:56", "remaining_time": "8:40:10"} +{"current_steps": 5131, "total_steps": 7532, "loss": 0.19970473647117615, "lr": 5.064781433203086e-06, "epoch": 1.3625016598061346, "percentage": 68.12, "elapsed_time": "18:31:10", "remaining_time": "8:39:57"} +{"current_steps": 5132, "total_steps": 7532, "loss": 0.24289372563362122, "lr": 5.060963113745272e-06, "epoch": 1.3627672287876775, "percentage": 68.14, "elapsed_time": "18:31:23", "remaining_time": "8:39:44"} +{"current_steps": 5133, "total_steps": 7532, "loss": 0.2757350504398346, "lr": 5.0571457464755226e-06, "epoch": 1.3630327977692205, "percentage": 68.15, "elapsed_time": "18:31:36", "remaining_time": "8:39:31"} +{"current_steps": 5134, "total_steps": 7532, "loss": 0.24552851915359497, "lr": 5.053329332129777e-06, "epoch": 1.3632983667507634, "percentage": 68.16, "elapsed_time": "18:31:49", "remaining_time": "8:39:18"} +{"current_steps": 5135, "total_steps": 7532, "loss": 0.22152797877788544, "lr": 5.049513871443797e-06, "epoch": 1.3635639357323064, "percentage": 68.18, "elapsed_time": "18:32:02", "remaining_time": "8:39:05"} +{"current_steps": 5136, "total_steps": 7532, "loss": 0.27098602056503296, "lr": 5.045699365153155e-06, "epoch": 1.3638295047138493, "percentage": 68.19, "elapsed_time": "18:32:15", "remaining_time": "8:38:52"} +{"current_steps": 5137, "total_steps": 7532, "loss": 0.21275216341018677, "lr": 5.041885813993246e-06, "epoch": 1.3640950736953923, "percentage": 68.2, "elapsed_time": "18:32:28", "remaining_time": "8:38:39"} +{"current_steps": 5138, "total_steps": 7532, "loss": 0.2510162591934204, "lr": 5.038073218699275e-06, "epoch": 1.3643606426769352, "percentage": 68.22, "elapsed_time": "18:32:41", "remaining_time": "8:38:26"} +{"current_steps": 5139, "total_steps": 7532, "loss": 0.23203429579734802, "lr": 5.034261580006269e-06, "epoch": 1.3646262116584782, "percentage": 68.23, "elapsed_time": "18:32:54", "remaining_time": "8:38:14"} +{"current_steps": 5140, "total_steps": 7532, "loss": 0.22178995609283447, "lr": 5.030450898649064e-06, "epoch": 1.3648917806400211, "percentage": 68.24, "elapsed_time": "18:33:07", "remaining_time": "8:38:00"} +{"current_steps": 5141, "total_steps": 7532, "loss": 0.2567412257194519, "lr": 5.026641175362316e-06, "epoch": 1.365157349621564, "percentage": 68.26, "elapsed_time": "18:33:20", "remaining_time": "8:37:47"} +{"current_steps": 5142, "total_steps": 7532, "loss": 0.21939827501773834, "lr": 5.022832410880494e-06, "epoch": 1.365422918603107, "percentage": 68.27, "elapsed_time": "18:33:33", "remaining_time": "8:37:34"} +{"current_steps": 5143, "total_steps": 7532, "loss": 0.2325637936592102, "lr": 5.019024605937882e-06, "epoch": 1.36568848758465, "percentage": 68.28, "elapsed_time": "18:33:46", "remaining_time": "8:37:21"} +{"current_steps": 5144, "total_steps": 7532, "loss": 0.2416393756866455, "lr": 5.015217761268582e-06, "epoch": 1.365954056566193, "percentage": 68.3, "elapsed_time": "18:33:59", "remaining_time": "8:37:08"} +{"current_steps": 5145, "total_steps": 7532, "loss": 0.2439568042755127, "lr": 5.011411877606507e-06, "epoch": 1.366219625547736, "percentage": 68.31, "elapsed_time": "18:34:12", "remaining_time": "8:36:55"} +{"current_steps": 5146, "total_steps": 7532, "loss": 0.2495957612991333, "lr": 5.007606955685387e-06, "epoch": 1.3664851945292789, "percentage": 68.32, "elapsed_time": "18:34:26", "remaining_time": "8:36:43"} +{"current_steps": 5147, "total_steps": 7532, "loss": 0.23415328562259674, "lr": 5.003802996238766e-06, "epoch": 1.3667507635108218, "percentage": 68.34, "elapsed_time": "18:34:38", "remaining_time": "8:36:30"} +{"current_steps": 5148, "total_steps": 7532, "loss": 0.2637922465801239, "lr": 5.000000000000003e-06, "epoch": 1.3670163324923648, "percentage": 68.35, "elapsed_time": "18:34:52", "remaining_time": "8:36:17"} +{"current_steps": 5149, "total_steps": 7532, "loss": 0.2319526970386505, "lr": 4.9961979677022696e-06, "epoch": 1.3672819014739077, "percentage": 68.36, "elapsed_time": "18:35:04", "remaining_time": "8:36:04"} +{"current_steps": 5150, "total_steps": 7532, "loss": 0.2338445484638214, "lr": 4.992396900078551e-06, "epoch": 1.3675474704554509, "percentage": 68.37, "elapsed_time": "18:35:18", "remaining_time": "8:35:51"} +{"current_steps": 5151, "total_steps": 7532, "loss": 0.19041961431503296, "lr": 4.988596797861654e-06, "epoch": 1.3678130394369938, "percentage": 68.39, "elapsed_time": "18:35:31", "remaining_time": "8:35:38"} +{"current_steps": 5152, "total_steps": 7532, "loss": 0.2698138952255249, "lr": 4.984797661784191e-06, "epoch": 1.3680786084185368, "percentage": 68.4, "elapsed_time": "18:35:44", "remaining_time": "8:35:25"} +{"current_steps": 5153, "total_steps": 7532, "loss": 0.2208167165517807, "lr": 4.980999492578588e-06, "epoch": 1.3683441774000797, "percentage": 68.41, "elapsed_time": "18:35:57", "remaining_time": "8:35:12"} +{"current_steps": 5154, "total_steps": 7532, "loss": 0.2515152096748352, "lr": 4.9772022909770915e-06, "epoch": 1.3686097463816227, "percentage": 68.43, "elapsed_time": "18:36:10", "remaining_time": "8:34:59"} +{"current_steps": 5155, "total_steps": 7532, "loss": 0.2393365204334259, "lr": 4.973406057711755e-06, "epoch": 1.3688753153631656, "percentage": 68.44, "elapsed_time": "18:36:23", "remaining_time": "8:34:46"} +{"current_steps": 5156, "total_steps": 7532, "loss": 0.24546492099761963, "lr": 4.969610793514446e-06, "epoch": 1.3691408843447086, "percentage": 68.45, "elapsed_time": "18:36:37", "remaining_time": "8:34:33"} +{"current_steps": 5157, "total_steps": 7532, "loss": 0.252412348985672, "lr": 4.965816499116849e-06, "epoch": 1.3694064533262515, "percentage": 68.47, "elapsed_time": "18:36:49", "remaining_time": "8:34:20"} +{"current_steps": 5158, "total_steps": 7532, "loss": 0.22654281556606293, "lr": 4.962023175250461e-06, "epoch": 1.3696720223077945, "percentage": 68.48, "elapsed_time": "18:37:02", "remaining_time": "8:34:07"} +{"current_steps": 5159, "total_steps": 7532, "loss": 0.2542813718318939, "lr": 4.958230822646581e-06, "epoch": 1.3699375912893375, "percentage": 68.49, "elapsed_time": "18:37:16", "remaining_time": "8:33:54"} +{"current_steps": 5160, "total_steps": 7532, "loss": 0.25376224517822266, "lr": 4.9544394420363395e-06, "epoch": 1.3702031602708804, "percentage": 68.51, "elapsed_time": "18:37:28", "remaining_time": "8:33:41"} +{"current_steps": 5161, "total_steps": 7532, "loss": 0.21911674737930298, "lr": 4.950649034150666e-06, "epoch": 1.3704687292524234, "percentage": 68.52, "elapsed_time": "18:37:42", "remaining_time": "8:33:28"} +{"current_steps": 5162, "total_steps": 7532, "loss": 0.2805126905441284, "lr": 4.946859599720308e-06, "epoch": 1.3707342982339663, "percentage": 68.53, "elapsed_time": "18:37:55", "remaining_time": "8:33:15"} +{"current_steps": 5163, "total_steps": 7532, "loss": 0.2189590483903885, "lr": 4.943071139475824e-06, "epoch": 1.3709998672155093, "percentage": 68.55, "elapsed_time": "18:38:08", "remaining_time": "8:33:02"} +{"current_steps": 5164, "total_steps": 7532, "loss": 0.21837599575519562, "lr": 4.939283654147582e-06, "epoch": 1.3712654361970522, "percentage": 68.56, "elapsed_time": "18:38:21", "remaining_time": "8:32:49"} +{"current_steps": 5165, "total_steps": 7532, "loss": 0.25090983510017395, "lr": 4.935497144465766e-06, "epoch": 1.3715310051785952, "percentage": 68.57, "elapsed_time": "18:38:34", "remaining_time": "8:32:37"} +{"current_steps": 5166, "total_steps": 7532, "loss": 0.22028754651546478, "lr": 4.93171161116037e-06, "epoch": 1.3717965741601381, "percentage": 68.59, "elapsed_time": "18:38:47", "remaining_time": "8:32:24"} +{"current_steps": 5167, "total_steps": 7532, "loss": 0.20097196102142334, "lr": 4.927927054961201e-06, "epoch": 1.372062143141681, "percentage": 68.6, "elapsed_time": "18:39:00", "remaining_time": "8:32:11"} +{"current_steps": 5168, "total_steps": 7532, "loss": 0.230082705616951, "lr": 4.924143476597872e-06, "epoch": 1.372327712123224, "percentage": 68.61, "elapsed_time": "18:39:13", "remaining_time": "8:31:58"} +{"current_steps": 5169, "total_steps": 7532, "loss": 0.23701804876327515, "lr": 4.920360876799821e-06, "epoch": 1.372593281104767, "percentage": 68.63, "elapsed_time": "18:39:27", "remaining_time": "8:31:45"} +{"current_steps": 5170, "total_steps": 7532, "loss": 0.22357231378555298, "lr": 4.9165792562962834e-06, "epoch": 1.37285885008631, "percentage": 68.64, "elapsed_time": "18:39:40", "remaining_time": "8:31:32"} +{"current_steps": 5171, "total_steps": 7532, "loss": 0.2533026337623596, "lr": 4.912798615816312e-06, "epoch": 1.3731244190678529, "percentage": 68.65, "elapsed_time": "18:39:53", "remaining_time": "8:31:19"} +{"current_steps": 5172, "total_steps": 7532, "loss": 0.24878138303756714, "lr": 4.90901895608877e-06, "epoch": 1.3733899880493958, "percentage": 68.67, "elapsed_time": "18:40:06", "remaining_time": "8:31:06"} +{"current_steps": 5173, "total_steps": 7532, "loss": 0.22641420364379883, "lr": 4.905240277842335e-06, "epoch": 1.3736555570309388, "percentage": 68.68, "elapsed_time": "18:40:19", "remaining_time": "8:30:53"} +{"current_steps": 5174, "total_steps": 7532, "loss": 0.24495793879032135, "lr": 4.901462581805483e-06, "epoch": 1.3739211260124817, "percentage": 68.69, "elapsed_time": "18:40:32", "remaining_time": "8:30:40"} +{"current_steps": 5175, "total_steps": 7532, "loss": 0.2688868045806885, "lr": 4.897685868706512e-06, "epoch": 1.3741866949940247, "percentage": 68.71, "elapsed_time": "18:40:45", "remaining_time": "8:30:27"} +{"current_steps": 5176, "total_steps": 7532, "loss": 0.25796642899513245, "lr": 4.893910139273531e-06, "epoch": 1.3744522639755676, "percentage": 68.72, "elapsed_time": "18:40:58", "remaining_time": "8:30:14"} +{"current_steps": 5177, "total_steps": 7532, "loss": 0.27557405829429626, "lr": 4.890135394234451e-06, "epoch": 1.3747178329571106, "percentage": 68.73, "elapsed_time": "18:41:11", "remaining_time": "8:30:01"} +{"current_steps": 5178, "total_steps": 7532, "loss": 0.23553809523582458, "lr": 4.886361634317004e-06, "epoch": 1.3749834019386535, "percentage": 68.75, "elapsed_time": "18:41:24", "remaining_time": "8:29:48"} +{"current_steps": 5179, "total_steps": 7532, "loss": 0.2454400360584259, "lr": 4.882588860248725e-06, "epoch": 1.3752489709201965, "percentage": 68.76, "elapsed_time": "18:41:38", "remaining_time": "8:29:35"} +{"current_steps": 5180, "total_steps": 7532, "loss": 0.19460657238960266, "lr": 4.878817072756959e-06, "epoch": 1.3755145399017394, "percentage": 68.77, "elapsed_time": "18:41:51", "remaining_time": "8:29:23"} +{"current_steps": 5181, "total_steps": 7532, "loss": 0.24833449721336365, "lr": 4.875046272568863e-06, "epoch": 1.3757801088832824, "percentage": 68.79, "elapsed_time": "18:42:04", "remaining_time": "8:29:09"} +{"current_steps": 5182, "total_steps": 7532, "loss": 0.2774161994457245, "lr": 4.871276460411403e-06, "epoch": 1.3760456778648253, "percentage": 68.8, "elapsed_time": "18:42:17", "remaining_time": "8:28:57"} +{"current_steps": 5183, "total_steps": 7532, "loss": 0.2277964949607849, "lr": 4.867507637011353e-06, "epoch": 1.3763112468463683, "percentage": 68.81, "elapsed_time": "18:42:30", "remaining_time": "8:28:44"} +{"current_steps": 5184, "total_steps": 7532, "loss": 0.2176733911037445, "lr": 4.863739803095299e-06, "epoch": 1.3765768158279112, "percentage": 68.83, "elapsed_time": "18:42:44", "remaining_time": "8:28:31"} +{"current_steps": 5185, "total_steps": 7532, "loss": 0.23529113829135895, "lr": 4.859972959389634e-06, "epoch": 1.3768423848094542, "percentage": 68.84, "elapsed_time": "18:42:56", "remaining_time": "8:28:18"} +{"current_steps": 5186, "total_steps": 7532, "loss": 0.2646695077419281, "lr": 4.856207106620557e-06, "epoch": 1.3771079537909972, "percentage": 68.85, "elapsed_time": "18:43:10", "remaining_time": "8:28:05"} +{"current_steps": 5187, "total_steps": 7532, "loss": 0.23179873824119568, "lr": 4.852442245514093e-06, "epoch": 1.37737352277254, "percentage": 68.87, "elapsed_time": "18:43:23", "remaining_time": "8:27:52"} +{"current_steps": 5188, "total_steps": 7532, "loss": 0.2127494066953659, "lr": 4.84867837679605e-06, "epoch": 1.377639091754083, "percentage": 68.88, "elapsed_time": "18:43:35", "remaining_time": "8:27:39"} +{"current_steps": 5189, "total_steps": 7532, "loss": 0.2204679548740387, "lr": 4.844915501192062e-06, "epoch": 1.377904660735626, "percentage": 68.89, "elapsed_time": "18:43:49", "remaining_time": "8:27:26"} +{"current_steps": 5190, "total_steps": 7532, "loss": 0.20271794497966766, "lr": 4.841153619427567e-06, "epoch": 1.378170229717169, "percentage": 68.91, "elapsed_time": "18:44:02", "remaining_time": "8:27:13"} +{"current_steps": 5191, "total_steps": 7532, "loss": 0.2785792052745819, "lr": 4.837392732227811e-06, "epoch": 1.378435798698712, "percentage": 68.92, "elapsed_time": "18:44:15", "remaining_time": "8:27:00"} +{"current_steps": 5192, "total_steps": 7532, "loss": 0.24904468655586243, "lr": 4.8336328403178486e-06, "epoch": 1.3787013676802549, "percentage": 68.93, "elapsed_time": "18:44:28", "remaining_time": "8:26:47"} +{"current_steps": 5193, "total_steps": 7532, "loss": 0.20045346021652222, "lr": 4.829873944422544e-06, "epoch": 1.378966936661798, "percentage": 68.95, "elapsed_time": "18:44:41", "remaining_time": "8:26:34"} +{"current_steps": 5194, "total_steps": 7532, "loss": 0.21814313530921936, "lr": 4.826116045266565e-06, "epoch": 1.379232505643341, "percentage": 68.96, "elapsed_time": "18:44:54", "remaining_time": "8:26:21"} +{"current_steps": 5195, "total_steps": 7532, "loss": 0.2408592253923416, "lr": 4.82235914357439e-06, "epoch": 1.379498074624884, "percentage": 68.97, "elapsed_time": "18:45:08", "remaining_time": "8:26:08"} +{"current_steps": 5196, "total_steps": 7532, "loss": 0.21453416347503662, "lr": 4.818603240070311e-06, "epoch": 1.379763643606427, "percentage": 68.99, "elapsed_time": "18:45:21", "remaining_time": "8:25:55"} +{"current_steps": 5197, "total_steps": 7532, "loss": 0.2578599154949188, "lr": 4.814848335478418e-06, "epoch": 1.3800292125879698, "percentage": 69.0, "elapsed_time": "18:45:34", "remaining_time": "8:25:43"} +{"current_steps": 5198, "total_steps": 7532, "loss": 0.1980094015598297, "lr": 4.811094430522613e-06, "epoch": 1.3802947815695128, "percentage": 69.01, "elapsed_time": "18:45:47", "remaining_time": "8:25:30"} +{"current_steps": 5199, "total_steps": 7532, "loss": 0.24620960652828217, "lr": 4.807341525926604e-06, "epoch": 1.3805603505510557, "percentage": 69.03, "elapsed_time": "18:46:00", "remaining_time": "8:25:17"} +{"current_steps": 5200, "total_steps": 7532, "loss": 0.23525282740592957, "lr": 4.803589622413908e-06, "epoch": 1.3808259195325987, "percentage": 69.04, "elapsed_time": "18:46:13", "remaining_time": "8:25:04"} +{"current_steps": 5201, "total_steps": 7532, "loss": 0.2277744859457016, "lr": 4.799838720707847e-06, "epoch": 1.3810914885141417, "percentage": 69.05, "elapsed_time": "18:46:33", "remaining_time": "8:24:54"} +{"current_steps": 5202, "total_steps": 7532, "loss": 0.2727074921131134, "lr": 4.796088821531549e-06, "epoch": 1.3813570574956846, "percentage": 69.07, "elapsed_time": "18:46:46", "remaining_time": "8:24:41"} +{"current_steps": 5203, "total_steps": 7532, "loss": 0.21686753630638123, "lr": 4.7923399256079525e-06, "epoch": 1.3816226264772276, "percentage": 69.08, "elapsed_time": "18:46:59", "remaining_time": "8:24:28"} +{"current_steps": 5204, "total_steps": 7532, "loss": 0.2841380834579468, "lr": 4.788592033659799e-06, "epoch": 1.3818881954587705, "percentage": 69.09, "elapsed_time": "18:47:12", "remaining_time": "8:24:15"} +{"current_steps": 5205, "total_steps": 7532, "loss": 0.24577853083610535, "lr": 4.78484514640964e-06, "epoch": 1.3821537644403135, "percentage": 69.11, "elapsed_time": "18:47:25", "remaining_time": "8:24:02"} +{"current_steps": 5206, "total_steps": 7532, "loss": 0.22289782762527466, "lr": 4.7810992645798285e-06, "epoch": 1.3824193334218564, "percentage": 69.12, "elapsed_time": "18:47:38", "remaining_time": "8:23:49"} +{"current_steps": 5207, "total_steps": 7532, "loss": 0.2223999947309494, "lr": 4.7773543888925274e-06, "epoch": 1.3826849024033994, "percentage": 69.13, "elapsed_time": "18:47:51", "remaining_time": "8:23:36"} +{"current_steps": 5208, "total_steps": 7532, "loss": 0.23938870429992676, "lr": 4.773610520069706e-06, "epoch": 1.3829504713849423, "percentage": 69.14, "elapsed_time": "18:48:05", "remaining_time": "8:23:23"} +{"current_steps": 5209, "total_steps": 7532, "loss": 0.260856568813324, "lr": 4.769867658833136e-06, "epoch": 1.3832160403664853, "percentage": 69.16, "elapsed_time": "18:48:18", "remaining_time": "8:23:10"} +{"current_steps": 5210, "total_steps": 7532, "loss": 0.23602089285850525, "lr": 4.766125805904398e-06, "epoch": 1.3834816093480282, "percentage": 69.17, "elapsed_time": "18:48:31", "remaining_time": "8:22:57"} +{"current_steps": 5211, "total_steps": 7532, "loss": 0.22543978691101074, "lr": 4.762384962004877e-06, "epoch": 1.3837471783295712, "percentage": 69.18, "elapsed_time": "18:48:44", "remaining_time": "8:22:44"} +{"current_steps": 5212, "total_steps": 7532, "loss": 0.2432224452495575, "lr": 4.758645127855763e-06, "epoch": 1.3840127473111141, "percentage": 69.2, "elapsed_time": "18:48:57", "remaining_time": "8:22:31"} +{"current_steps": 5213, "total_steps": 7532, "loss": 0.22764597833156586, "lr": 4.754906304178049e-06, "epoch": 1.384278316292657, "percentage": 69.21, "elapsed_time": "18:49:10", "remaining_time": "8:22:18"} +{"current_steps": 5214, "total_steps": 7532, "loss": 0.22503387928009033, "lr": 4.751168491692541e-06, "epoch": 1.3845438852742, "percentage": 69.22, "elapsed_time": "18:49:24", "remaining_time": "8:22:06"} +{"current_steps": 5215, "total_steps": 7532, "loss": 0.21889932453632355, "lr": 4.747431691119846e-06, "epoch": 1.384809454255743, "percentage": 69.24, "elapsed_time": "18:49:37", "remaining_time": "8:21:53"} +{"current_steps": 5216, "total_steps": 7532, "loss": 0.2695825695991516, "lr": 4.743695903180372e-06, "epoch": 1.385075023237286, "percentage": 69.25, "elapsed_time": "18:49:50", "remaining_time": "8:21:40"} +{"current_steps": 5217, "total_steps": 7532, "loss": 0.265118271112442, "lr": 4.739961128594336e-06, "epoch": 1.3853405922188289, "percentage": 69.26, "elapsed_time": "18:50:03", "remaining_time": "8:21:27"} +{"current_steps": 5218, "total_steps": 7532, "loss": 0.2050788253545761, "lr": 4.736227368081757e-06, "epoch": 1.3856061612003718, "percentage": 69.28, "elapsed_time": "18:50:16", "remaining_time": "8:21:14"} +{"current_steps": 5219, "total_steps": 7532, "loss": 0.274588406085968, "lr": 4.7324946223624625e-06, "epoch": 1.3858717301819148, "percentage": 69.29, "elapsed_time": "18:50:29", "remaining_time": "8:21:01"} +{"current_steps": 5220, "total_steps": 7532, "loss": 0.2242514044046402, "lr": 4.728762892156079e-06, "epoch": 1.3861372991634577, "percentage": 69.3, "elapsed_time": "18:50:42", "remaining_time": "8:20:48"} +{"current_steps": 5221, "total_steps": 7532, "loss": 0.19989261031150818, "lr": 4.725032178182042e-06, "epoch": 1.3864028681450007, "percentage": 69.32, "elapsed_time": "18:50:55", "remaining_time": "8:20:35"} +{"current_steps": 5222, "total_steps": 7532, "loss": 0.24409207701683044, "lr": 4.721302481159588e-06, "epoch": 1.3866684371265436, "percentage": 69.33, "elapsed_time": "18:51:08", "remaining_time": "8:20:22"} +{"current_steps": 5223, "total_steps": 7532, "loss": 0.25146353244781494, "lr": 4.71757380180776e-06, "epoch": 1.3869340061080866, "percentage": 69.34, "elapsed_time": "18:51:22", "remaining_time": "8:20:09"} +{"current_steps": 5224, "total_steps": 7532, "loss": 0.23076622188091278, "lr": 4.713846140845401e-06, "epoch": 1.3871995750896295, "percentage": 69.36, "elapsed_time": "18:51:34", "remaining_time": "8:19:56"} +{"current_steps": 5225, "total_steps": 7532, "loss": 0.2159188687801361, "lr": 4.7101194989911635e-06, "epoch": 1.3874651440711725, "percentage": 69.37, "elapsed_time": "18:51:48", "remaining_time": "8:19:43"} +{"current_steps": 5226, "total_steps": 7532, "loss": 0.24891307950019836, "lr": 4.706393876963497e-06, "epoch": 1.3877307130527154, "percentage": 69.38, "elapsed_time": "18:52:00", "remaining_time": "8:19:30"} +{"current_steps": 5227, "total_steps": 7532, "loss": 0.26254773139953613, "lr": 4.702669275480659e-06, "epoch": 1.3879962820342584, "percentage": 69.4, "elapsed_time": "18:52:14", "remaining_time": "8:19:17"} +{"current_steps": 5228, "total_steps": 7532, "loss": 0.19589121639728546, "lr": 4.698945695260709e-06, "epoch": 1.3882618510158014, "percentage": 69.41, "elapsed_time": "18:52:27", "remaining_time": "8:19:04"} +{"current_steps": 5229, "total_steps": 7532, "loss": 0.23796147108078003, "lr": 4.695223137021509e-06, "epoch": 1.3885274199973443, "percentage": 69.42, "elapsed_time": "18:52:40", "remaining_time": "8:18:51"} +{"current_steps": 5230, "total_steps": 7532, "loss": 0.21211156249046326, "lr": 4.6915016014807235e-06, "epoch": 1.3887929889788873, "percentage": 69.44, "elapsed_time": "18:52:53", "remaining_time": "8:18:38"} +{"current_steps": 5231, "total_steps": 7532, "loss": 0.22418555617332458, "lr": 4.687781089355817e-06, "epoch": 1.3890585579604302, "percentage": 69.45, "elapsed_time": "18:53:06", "remaining_time": "8:18:25"} +{"current_steps": 5232, "total_steps": 7532, "loss": 0.24140511453151703, "lr": 4.68406160136407e-06, "epoch": 1.3893241269419732, "percentage": 69.46, "elapsed_time": "18:53:19", "remaining_time": "8:18:12"} +{"current_steps": 5233, "total_steps": 7532, "loss": 0.2863473892211914, "lr": 4.68034313822255e-06, "epoch": 1.389589695923516, "percentage": 69.48, "elapsed_time": "18:53:32", "remaining_time": "8:17:59"} +{"current_steps": 5234, "total_steps": 7532, "loss": 0.21283546090126038, "lr": 4.676625700648133e-06, "epoch": 1.389855264905059, "percentage": 69.49, "elapsed_time": "18:53:45", "remaining_time": "8:17:46"} +{"current_steps": 5235, "total_steps": 7532, "loss": 0.2701990008354187, "lr": 4.672909289357498e-06, "epoch": 1.390120833886602, "percentage": 69.5, "elapsed_time": "18:53:58", "remaining_time": "8:17:33"} +{"current_steps": 5236, "total_steps": 7532, "loss": 0.23807264864444733, "lr": 4.669193905067124e-06, "epoch": 1.390386402868145, "percentage": 69.52, "elapsed_time": "18:54:11", "remaining_time": "8:17:20"} +{"current_steps": 5237, "total_steps": 7532, "loss": 0.22204206883907318, "lr": 4.665479548493298e-06, "epoch": 1.390651971849688, "percentage": 69.53, "elapsed_time": "18:54:24", "remaining_time": "8:17:07"} +{"current_steps": 5238, "total_steps": 7532, "loss": 0.22389569878578186, "lr": 4.661766220352098e-06, "epoch": 1.3909175408312309, "percentage": 69.54, "elapsed_time": "18:54:37", "remaining_time": "8:16:54"} +{"current_steps": 5239, "total_steps": 7532, "loss": 0.23752997815608978, "lr": 4.65805392135941e-06, "epoch": 1.3911831098127738, "percentage": 69.56, "elapsed_time": "18:54:50", "remaining_time": "8:16:41"} +{"current_steps": 5240, "total_steps": 7532, "loss": 0.24055880308151245, "lr": 4.654342652230921e-06, "epoch": 1.3914486787943168, "percentage": 69.57, "elapsed_time": "18:55:03", "remaining_time": "8:16:28"} +{"current_steps": 5241, "total_steps": 7532, "loss": 0.22136151790618896, "lr": 4.6506324136821255e-06, "epoch": 1.3917142477758597, "percentage": 69.58, "elapsed_time": "18:55:16", "remaining_time": "8:16:15"} +{"current_steps": 5242, "total_steps": 7532, "loss": 0.2616429924964905, "lr": 4.646923206428311e-06, "epoch": 1.3919798167574027, "percentage": 69.6, "elapsed_time": "18:55:29", "remaining_time": "8:16:02"} +{"current_steps": 5243, "total_steps": 7532, "loss": 0.24827662110328674, "lr": 4.643215031184569e-06, "epoch": 1.3922453857389456, "percentage": 69.61, "elapsed_time": "18:55:42", "remaining_time": "8:15:49"} +{"current_steps": 5244, "total_steps": 7532, "loss": 0.21999669075012207, "lr": 4.639507888665792e-06, "epoch": 1.3925109547204886, "percentage": 69.62, "elapsed_time": "18:55:55", "remaining_time": "8:15:36"} +{"current_steps": 5245, "total_steps": 7532, "loss": 0.24511300027370453, "lr": 4.6358017795866715e-06, "epoch": 1.3927765237020315, "percentage": 69.64, "elapsed_time": "18:56:08", "remaining_time": "8:15:24"} +{"current_steps": 5246, "total_steps": 7532, "loss": 0.2410753220319748, "lr": 4.632096704661704e-06, "epoch": 1.3930420926835745, "percentage": 69.65, "elapsed_time": "18:56:21", "remaining_time": "8:15:10"} +{"current_steps": 5247, "total_steps": 7532, "loss": 0.2160021960735321, "lr": 4.628392664605184e-06, "epoch": 1.3933076616651174, "percentage": 69.66, "elapsed_time": "18:56:35", "remaining_time": "8:14:58"} +{"current_steps": 5248, "total_steps": 7532, "loss": 0.22672782838344574, "lr": 4.624689660131204e-06, "epoch": 1.3935732306466604, "percentage": 69.68, "elapsed_time": "18:56:47", "remaining_time": "8:14:45"} +{"current_steps": 5249, "total_steps": 7532, "loss": 0.25474926829338074, "lr": 4.620987691953659e-06, "epoch": 1.3938387996282033, "percentage": 69.69, "elapsed_time": "18:57:01", "remaining_time": "8:14:32"} +{"current_steps": 5250, "total_steps": 7532, "loss": 0.2449323832988739, "lr": 4.617286760786252e-06, "epoch": 1.3941043686097463, "percentage": 69.7, "elapsed_time": "18:57:13", "remaining_time": "8:14:19"} +{"current_steps": 5251, "total_steps": 7532, "loss": 0.23727643489837646, "lr": 4.613586867342473e-06, "epoch": 1.3943699375912892, "percentage": 69.72, "elapsed_time": "18:57:27", "remaining_time": "8:14:06"} +{"current_steps": 5252, "total_steps": 7532, "loss": 0.23727962374687195, "lr": 4.609888012335624e-06, "epoch": 1.3946355065728322, "percentage": 69.73, "elapsed_time": "18:57:40", "remaining_time": "8:13:53"} +{"current_steps": 5253, "total_steps": 7532, "loss": 0.21957805752754211, "lr": 4.60619019647879e-06, "epoch": 1.3949010755543751, "percentage": 69.74, "elapsed_time": "18:57:53", "remaining_time": "8:13:40"} +{"current_steps": 5254, "total_steps": 7532, "loss": 0.24184471368789673, "lr": 4.6024934204848745e-06, "epoch": 1.395166644535918, "percentage": 69.76, "elapsed_time": "18:58:06", "remaining_time": "8:13:27"} +{"current_steps": 5255, "total_steps": 7532, "loss": 0.239216148853302, "lr": 4.598797685066568e-06, "epoch": 1.395432213517461, "percentage": 69.77, "elapsed_time": "18:58:19", "remaining_time": "8:13:14"} +{"current_steps": 5256, "total_steps": 7532, "loss": 0.17741018533706665, "lr": 4.595102990936367e-06, "epoch": 1.395697782499004, "percentage": 69.78, "elapsed_time": "18:58:32", "remaining_time": "8:13:01"} +{"current_steps": 5257, "total_steps": 7532, "loss": 0.26139867305755615, "lr": 4.591409338806566e-06, "epoch": 1.395963351480547, "percentage": 69.8, "elapsed_time": "18:58:46", "remaining_time": "8:12:48"} +{"current_steps": 5258, "total_steps": 7532, "loss": 0.23689255118370056, "lr": 4.587716729389251e-06, "epoch": 1.39622892046209, "percentage": 69.81, "elapsed_time": "18:58:59", "remaining_time": "8:12:35"} +{"current_steps": 5259, "total_steps": 7532, "loss": 0.22679267823696136, "lr": 4.584025163396323e-06, "epoch": 1.3964944894436329, "percentage": 69.82, "elapsed_time": "18:59:12", "remaining_time": "8:12:22"} +{"current_steps": 5260, "total_steps": 7532, "loss": 0.2743435204029083, "lr": 4.580334641539467e-06, "epoch": 1.3967600584251758, "percentage": 69.84, "elapsed_time": "18:59:25", "remaining_time": "8:12:09"} +{"current_steps": 5261, "total_steps": 7532, "loss": 0.22738990187644958, "lr": 4.5766451645301735e-06, "epoch": 1.3970256274067188, "percentage": 69.85, "elapsed_time": "18:59:38", "remaining_time": "8:11:56"} +{"current_steps": 5262, "total_steps": 7532, "loss": 0.24826082587242126, "lr": 4.57295673307973e-06, "epoch": 1.3972911963882617, "percentage": 69.86, "elapsed_time": "18:59:51", "remaining_time": "8:11:43"} +{"current_steps": 5263, "total_steps": 7532, "loss": 0.23121042549610138, "lr": 4.569269347899222e-06, "epoch": 1.3975567653698049, "percentage": 69.88, "elapsed_time": "19:00:04", "remaining_time": "8:11:30"} +{"current_steps": 5264, "total_steps": 7532, "loss": 0.21382957696914673, "lr": 4.5655830096995345e-06, "epoch": 1.3978223343513478, "percentage": 69.89, "elapsed_time": "19:00:17", "remaining_time": "8:11:17"} +{"current_steps": 5265, "total_steps": 7532, "loss": 0.24439184367656708, "lr": 4.561897719191349e-06, "epoch": 1.3980879033328908, "percentage": 69.9, "elapsed_time": "19:00:30", "remaining_time": "8:11:04"} +{"current_steps": 5266, "total_steps": 7532, "loss": 0.2106003314256668, "lr": 4.558213477085148e-06, "epoch": 1.3983534723144337, "percentage": 69.92, "elapsed_time": "19:00:44", "remaining_time": "8:10:52"} +{"current_steps": 5267, "total_steps": 7532, "loss": 0.3073291480541229, "lr": 4.554530284091209e-06, "epoch": 1.3986190412959767, "percentage": 69.93, "elapsed_time": "19:00:56", "remaining_time": "8:10:38"} +{"current_steps": 5268, "total_steps": 7532, "loss": 0.2448226660490036, "lr": 4.550848140919606e-06, "epoch": 1.3988846102775196, "percentage": 69.94, "elapsed_time": "19:01:10", "remaining_time": "8:10:26"} +{"current_steps": 5269, "total_steps": 7532, "loss": 0.25378671288490295, "lr": 4.5471670482802165e-06, "epoch": 1.3991501792590626, "percentage": 69.95, "elapsed_time": "19:01:22", "remaining_time": "8:10:12"} +{"current_steps": 5270, "total_steps": 7532, "loss": 0.2735089659690857, "lr": 4.5434870068827086e-06, "epoch": 1.3994157482406056, "percentage": 69.97, "elapsed_time": "19:01:36", "remaining_time": "8:10:00"} +{"current_steps": 5271, "total_steps": 7532, "loss": 0.2530548870563507, "lr": 4.539808017436552e-06, "epoch": 1.3996813172221485, "percentage": 69.98, "elapsed_time": "19:01:48", "remaining_time": "8:09:46"} +{"current_steps": 5272, "total_steps": 7532, "loss": 0.23692254722118378, "lr": 4.536130080651015e-06, "epoch": 1.3999468862036915, "percentage": 69.99, "elapsed_time": "19:02:01", "remaining_time": "8:09:33"} +{"current_steps": 5273, "total_steps": 7532, "loss": 0.24554882943630219, "lr": 4.532453197235155e-06, "epoch": 1.4002124551852344, "percentage": 70.01, "elapsed_time": "19:02:15", "remaining_time": "8:09:21"} +{"current_steps": 5274, "total_steps": 7532, "loss": 0.20152084529399872, "lr": 4.528777367897837e-06, "epoch": 1.4004780241667774, "percentage": 70.02, "elapsed_time": "19:02:28", "remaining_time": "8:09:08"} +{"current_steps": 5275, "total_steps": 7532, "loss": 0.20908965170383453, "lr": 4.525102593347714e-06, "epoch": 1.4007435931483203, "percentage": 70.03, "elapsed_time": "19:02:41", "remaining_time": "8:08:55"} +{"current_steps": 5276, "total_steps": 7532, "loss": 0.23158209025859833, "lr": 4.521428874293238e-06, "epoch": 1.4010091621298633, "percentage": 70.05, "elapsed_time": "19:02:54", "remaining_time": "8:08:42"} +{"current_steps": 5277, "total_steps": 7532, "loss": 0.2483675330877304, "lr": 4.517756211442664e-06, "epoch": 1.4012747311114062, "percentage": 70.06, "elapsed_time": "19:03:07", "remaining_time": "8:08:29"} +{"current_steps": 5278, "total_steps": 7532, "loss": 0.23435397446155548, "lr": 4.514084605504035e-06, "epoch": 1.4015403000929492, "percentage": 70.07, "elapsed_time": "19:03:20", "remaining_time": "8:08:16"} +{"current_steps": 5279, "total_steps": 7532, "loss": 0.2605316936969757, "lr": 4.510414057185195e-06, "epoch": 1.4018058690744921, "percentage": 70.09, "elapsed_time": "19:03:33", "remaining_time": "8:08:03"} +{"current_steps": 5280, "total_steps": 7532, "loss": 0.2279929518699646, "lr": 4.506744567193782e-06, "epoch": 1.402071438056035, "percentage": 70.1, "elapsed_time": "19:03:46", "remaining_time": "8:07:50"} +{"current_steps": 5281, "total_steps": 7532, "loss": 0.23011639714241028, "lr": 4.503076136237228e-06, "epoch": 1.402337007037578, "percentage": 70.11, "elapsed_time": "19:03:59", "remaining_time": "8:07:37"} +{"current_steps": 5282, "total_steps": 7532, "loss": 0.213611900806427, "lr": 4.499408765022765e-06, "epoch": 1.402602576019121, "percentage": 70.13, "elapsed_time": "19:04:12", "remaining_time": "8:07:24"} +{"current_steps": 5283, "total_steps": 7532, "loss": 0.25555503368377686, "lr": 4.495742454257418e-06, "epoch": 1.402868145000664, "percentage": 70.14, "elapsed_time": "19:04:26", "remaining_time": "8:07:11"} +{"current_steps": 5284, "total_steps": 7532, "loss": 0.2694614827632904, "lr": 4.4920772046480095e-06, "epoch": 1.4031337139822069, "percentage": 70.15, "elapsed_time": "19:04:38", "remaining_time": "8:06:58"} +{"current_steps": 5285, "total_steps": 7532, "loss": 0.2160607874393463, "lr": 4.4884130169011565e-06, "epoch": 1.4033992829637498, "percentage": 70.17, "elapsed_time": "19:04:51", "remaining_time": "8:06:45"} +{"current_steps": 5286, "total_steps": 7532, "loss": 0.2556128203868866, "lr": 4.48474989172327e-06, "epoch": 1.4036648519452928, "percentage": 70.18, "elapsed_time": "19:05:04", "remaining_time": "8:06:32"} +{"current_steps": 5287, "total_steps": 7532, "loss": 0.2251313328742981, "lr": 4.481087829820558e-06, "epoch": 1.4039304209268357, "percentage": 70.19, "elapsed_time": "19:05:17", "remaining_time": "8:06:19"} +{"current_steps": 5288, "total_steps": 7532, "loss": 0.26856666803359985, "lr": 4.477426831899024e-06, "epoch": 1.4041959899083787, "percentage": 70.21, "elapsed_time": "19:05:30", "remaining_time": "8:06:06"} +{"current_steps": 5289, "total_steps": 7532, "loss": 0.25573840737342834, "lr": 4.473766898664464e-06, "epoch": 1.4044615588899216, "percentage": 70.22, "elapsed_time": "19:05:44", "remaining_time": "8:05:53"} +{"current_steps": 5290, "total_steps": 7532, "loss": 0.26519301533699036, "lr": 4.4701080308224685e-06, "epoch": 1.4047271278714646, "percentage": 70.23, "elapsed_time": "19:05:56", "remaining_time": "8:05:40"} +{"current_steps": 5291, "total_steps": 7532, "loss": 0.2329619824886322, "lr": 4.466450229078427e-06, "epoch": 1.4049926968530075, "percentage": 70.25, "elapsed_time": "19:06:09", "remaining_time": "8:05:27"} +{"current_steps": 5292, "total_steps": 7532, "loss": 0.2243901491165161, "lr": 4.4627934941375185e-06, "epoch": 1.4052582658345505, "percentage": 70.26, "elapsed_time": "19:06:23", "remaining_time": "8:05:14"} +{"current_steps": 5293, "total_steps": 7532, "loss": 0.22516998648643494, "lr": 4.45913782670472e-06, "epoch": 1.4055238348160934, "percentage": 70.27, "elapsed_time": "19:06:35", "remaining_time": "8:05:01"} +{"current_steps": 5294, "total_steps": 7532, "loss": 0.25573113560676575, "lr": 4.455483227484796e-06, "epoch": 1.4057894037976364, "percentage": 70.29, "elapsed_time": "19:06:49", "remaining_time": "8:04:48"} +{"current_steps": 5295, "total_steps": 7532, "loss": 0.2568536698818207, "lr": 4.451829697182317e-06, "epoch": 1.4060549727791793, "percentage": 70.3, "elapsed_time": "19:07:02", "remaining_time": "8:04:35"} +{"current_steps": 5296, "total_steps": 7532, "loss": 0.24510663747787476, "lr": 4.448177236501638e-06, "epoch": 1.4063205417607223, "percentage": 70.31, "elapsed_time": "19:07:15", "remaining_time": "8:04:22"} +{"current_steps": 5297, "total_steps": 7532, "loss": 0.24890470504760742, "lr": 4.444525846146911e-06, "epoch": 1.4065861107422652, "percentage": 70.33, "elapsed_time": "19:07:28", "remaining_time": "8:04:09"} +{"current_steps": 5298, "total_steps": 7532, "loss": 0.21442994475364685, "lr": 4.440875526822081e-06, "epoch": 1.4068516797238082, "percentage": 70.34, "elapsed_time": "19:07:41", "remaining_time": "8:03:56"} +{"current_steps": 5299, "total_steps": 7532, "loss": 0.24281370639801025, "lr": 4.437226279230884e-06, "epoch": 1.4071172487053512, "percentage": 70.35, "elapsed_time": "19:07:54", "remaining_time": "8:03:43"} +{"current_steps": 5300, "total_steps": 7532, "loss": 0.19542500376701355, "lr": 4.433578104076853e-06, "epoch": 1.407382817686894, "percentage": 70.37, "elapsed_time": "19:08:07", "remaining_time": "8:03:30"} +{"current_steps": 5301, "total_steps": 7532, "loss": 0.22688990831375122, "lr": 4.429931002063315e-06, "epoch": 1.407648386668437, "percentage": 70.38, "elapsed_time": "19:08:25", "remaining_time": "8:03:20"} +{"current_steps": 5302, "total_steps": 7532, "loss": 0.2520858347415924, "lr": 4.42628497389339e-06, "epoch": 1.40791395564998, "percentage": 70.39, "elapsed_time": "19:08:38", "remaining_time": "8:03:06"} +{"current_steps": 5303, "total_steps": 7532, "loss": 0.237991064786911, "lr": 4.42264002026998e-06, "epoch": 1.408179524631523, "percentage": 70.41, "elapsed_time": "19:08:51", "remaining_time": "8:02:54"} +{"current_steps": 5304, "total_steps": 7532, "loss": 0.20164436101913452, "lr": 4.418996141895797e-06, "epoch": 1.408445093613066, "percentage": 70.42, "elapsed_time": "19:09:04", "remaining_time": "8:02:40"} +{"current_steps": 5305, "total_steps": 7532, "loss": 0.24009189009666443, "lr": 4.415353339473338e-06, "epoch": 1.408710662594609, "percentage": 70.43, "elapsed_time": "19:09:18", "remaining_time": "8:02:28"} +{"current_steps": 5306, "total_steps": 7532, "loss": 0.23170322179794312, "lr": 4.411711613704889e-06, "epoch": 1.408976231576152, "percentage": 70.45, "elapsed_time": "19:09:31", "remaining_time": "8:02:15"} +{"current_steps": 5307, "total_steps": 7532, "loss": 0.2280617356300354, "lr": 4.408070965292534e-06, "epoch": 1.409241800557695, "percentage": 70.46, "elapsed_time": "19:09:44", "remaining_time": "8:02:02"} +{"current_steps": 5308, "total_steps": 7532, "loss": 0.21982887387275696, "lr": 4.404431394938145e-06, "epoch": 1.409507369539238, "percentage": 70.47, "elapsed_time": "19:09:56", "remaining_time": "8:01:48"} +{"current_steps": 5309, "total_steps": 7532, "loss": 0.25295430421829224, "lr": 4.40079290334339e-06, "epoch": 1.409772938520781, "percentage": 70.49, "elapsed_time": "19:10:10", "remaining_time": "8:01:36"} +{"current_steps": 5310, "total_steps": 7532, "loss": 0.20109041035175323, "lr": 4.397155491209727e-06, "epoch": 1.4100385075023238, "percentage": 70.5, "elapsed_time": "19:10:23", "remaining_time": "8:01:23"} +{"current_steps": 5311, "total_steps": 7532, "loss": 0.2487715482711792, "lr": 4.393519159238405e-06, "epoch": 1.4103040764838668, "percentage": 70.51, "elapsed_time": "19:10:36", "remaining_time": "8:01:10"} +{"current_steps": 5312, "total_steps": 7532, "loss": 0.2031790167093277, "lr": 4.389883908130465e-06, "epoch": 1.4105696454654097, "percentage": 70.53, "elapsed_time": "19:10:49", "remaining_time": "8:00:57"} +{"current_steps": 5313, "total_steps": 7532, "loss": 0.23029211163520813, "lr": 4.386249738586744e-06, "epoch": 1.4108352144469527, "percentage": 70.54, "elapsed_time": "19:11:02", "remaining_time": "8:00:44"} +{"current_steps": 5314, "total_steps": 7532, "loss": 0.23080995678901672, "lr": 4.382616651307866e-06, "epoch": 1.4111007834284957, "percentage": 70.55, "elapsed_time": "19:11:15", "remaining_time": "8:00:31"} +{"current_steps": 5315, "total_steps": 7532, "loss": 0.2450534999370575, "lr": 4.378984646994248e-06, "epoch": 1.4113663524100386, "percentage": 70.57, "elapsed_time": "19:11:27", "remaining_time": "8:00:18"} +{"current_steps": 5316, "total_steps": 7532, "loss": 0.24349799752235413, "lr": 4.375353726346094e-06, "epoch": 1.4116319213915816, "percentage": 70.58, "elapsed_time": "19:11:41", "remaining_time": "8:00:05"} +{"current_steps": 5317, "total_steps": 7532, "loss": 0.2431599199771881, "lr": 4.371723890063411e-06, "epoch": 1.4118974903731245, "percentage": 70.59, "elapsed_time": "19:11:53", "remaining_time": "7:59:52"} +{"current_steps": 5318, "total_steps": 7532, "loss": 0.2051251232624054, "lr": 4.368095138845978e-06, "epoch": 1.4121630593546675, "percentage": 70.61, "elapsed_time": "19:12:07", "remaining_time": "7:59:39"} +{"current_steps": 5319, "total_steps": 7532, "loss": 0.21346575021743774, "lr": 4.36446747339338e-06, "epoch": 1.4124286283362104, "percentage": 70.62, "elapsed_time": "19:12:20", "remaining_time": "7:59:26"} +{"current_steps": 5320, "total_steps": 7532, "loss": 0.22193217277526855, "lr": 4.360840894404989e-06, "epoch": 1.4126941973177534, "percentage": 70.63, "elapsed_time": "19:12:33", "remaining_time": "7:59:13"} +{"current_steps": 5321, "total_steps": 7532, "loss": 0.2112501859664917, "lr": 4.357215402579961e-06, "epoch": 1.4129597662992963, "percentage": 70.65, "elapsed_time": "19:12:46", "remaining_time": "7:59:00"} +{"current_steps": 5322, "total_steps": 7532, "loss": 0.2648766040802002, "lr": 4.3535909986172565e-06, "epoch": 1.4132253352808393, "percentage": 70.66, "elapsed_time": "19:12:59", "remaining_time": "7:58:47"} +{"current_steps": 5323, "total_steps": 7532, "loss": 0.22139690816402435, "lr": 4.349967683215614e-06, "epoch": 1.4134909042623822, "percentage": 70.67, "elapsed_time": "19:13:12", "remaining_time": "7:58:34"} +{"current_steps": 5324, "total_steps": 7532, "loss": 0.21558481454849243, "lr": 4.346345457073568e-06, "epoch": 1.4137564732439252, "percentage": 70.69, "elapsed_time": "19:13:26", "remaining_time": "7:58:21"} +{"current_steps": 5325, "total_steps": 7532, "loss": 0.2013886272907257, "lr": 4.342724320889438e-06, "epoch": 1.4140220422254681, "percentage": 70.7, "elapsed_time": "19:13:38", "remaining_time": "7:58:08"} +{"current_steps": 5326, "total_steps": 7532, "loss": 0.2428729385137558, "lr": 4.3391042753613375e-06, "epoch": 1.414287611207011, "percentage": 70.71, "elapsed_time": "19:13:51", "remaining_time": "7:57:55"} +{"current_steps": 5327, "total_steps": 7532, "loss": 0.20930354297161102, "lr": 4.3354853211871696e-06, "epoch": 1.414553180188554, "percentage": 70.72, "elapsed_time": "19:14:05", "remaining_time": "7:57:42"} +{"current_steps": 5328, "total_steps": 7532, "loss": 0.18988853693008423, "lr": 4.331867459064623e-06, "epoch": 1.414818749170097, "percentage": 70.74, "elapsed_time": "19:14:18", "remaining_time": "7:57:29"} +{"current_steps": 5329, "total_steps": 7532, "loss": 0.24618801474571228, "lr": 4.328250689691182e-06, "epoch": 1.41508431815164, "percentage": 70.75, "elapsed_time": "19:14:31", "remaining_time": "7:57:16"} +{"current_steps": 5330, "total_steps": 7532, "loss": 0.23857265710830688, "lr": 4.324635013764113e-06, "epoch": 1.4153498871331829, "percentage": 70.76, "elapsed_time": "19:14:44", "remaining_time": "7:57:03"} +{"current_steps": 5331, "total_steps": 7532, "loss": 0.21869014203548431, "lr": 4.321020431980483e-06, "epoch": 1.4156154561147258, "percentage": 70.78, "elapsed_time": "19:14:58", "remaining_time": "7:56:51"} +{"current_steps": 5332, "total_steps": 7532, "loss": 0.2508969008922577, "lr": 4.317406945037138e-06, "epoch": 1.4158810250962688, "percentage": 70.79, "elapsed_time": "19:15:11", "remaining_time": "7:56:37"} +{"current_steps": 5333, "total_steps": 7532, "loss": 0.2406233549118042, "lr": 4.313794553630711e-06, "epoch": 1.4161465940778117, "percentage": 70.8, "elapsed_time": "19:15:24", "remaining_time": "7:56:25"} +{"current_steps": 5334, "total_steps": 7532, "loss": 0.2376224398612976, "lr": 4.310183258457632e-06, "epoch": 1.4164121630593547, "percentage": 70.82, "elapsed_time": "19:15:37", "remaining_time": "7:56:12"} +{"current_steps": 5335, "total_steps": 7532, "loss": 0.2818688750267029, "lr": 4.306573060214115e-06, "epoch": 1.4166777320408976, "percentage": 70.83, "elapsed_time": "19:15:50", "remaining_time": "7:55:59"} +{"current_steps": 5336, "total_steps": 7532, "loss": 0.2279777228832245, "lr": 4.302963959596165e-06, "epoch": 1.4169433010224406, "percentage": 70.84, "elapsed_time": "19:16:03", "remaining_time": "7:55:46"} +{"current_steps": 5337, "total_steps": 7532, "loss": 0.2652052640914917, "lr": 4.299355957299573e-06, "epoch": 1.4172088700039835, "percentage": 70.86, "elapsed_time": "19:16:17", "remaining_time": "7:55:33"} +{"current_steps": 5338, "total_steps": 7532, "loss": 0.24415750801563263, "lr": 4.2957490540199185e-06, "epoch": 1.4174744389855265, "percentage": 70.87, "elapsed_time": "19:16:30", "remaining_time": "7:55:20"} +{"current_steps": 5339, "total_steps": 7532, "loss": 0.2318287044763565, "lr": 4.292143250452569e-06, "epoch": 1.4177400079670694, "percentage": 70.88, "elapsed_time": "19:16:43", "remaining_time": "7:55:07"} +{"current_steps": 5340, "total_steps": 7532, "loss": 0.19914361834526062, "lr": 4.288538547292685e-06, "epoch": 1.4180055769486124, "percentage": 70.9, "elapsed_time": "19:16:56", "remaining_time": "7:54:54"} +{"current_steps": 5341, "total_steps": 7532, "loss": 0.22550678253173828, "lr": 4.2849349452352095e-06, "epoch": 1.4182711459301554, "percentage": 70.91, "elapsed_time": "19:17:10", "remaining_time": "7:54:41"} +{"current_steps": 5342, "total_steps": 7532, "loss": 0.25001436471939087, "lr": 4.281332444974874e-06, "epoch": 1.4185367149116983, "percentage": 70.92, "elapsed_time": "19:17:22", "remaining_time": "7:54:28"} +{"current_steps": 5343, "total_steps": 7532, "loss": 0.24873407185077667, "lr": 4.277731047206197e-06, "epoch": 1.4188022838932413, "percentage": 70.94, "elapsed_time": "19:17:35", "remaining_time": "7:54:15"} +{"current_steps": 5344, "total_steps": 7532, "loss": 0.25732600688934326, "lr": 4.274130752623487e-06, "epoch": 1.4190678528747842, "percentage": 70.95, "elapsed_time": "19:17:49", "remaining_time": "7:54:02"} +{"current_steps": 5345, "total_steps": 7532, "loss": 0.1894054263830185, "lr": 4.270531561920836e-06, "epoch": 1.4193334218563272, "percentage": 70.96, "elapsed_time": "19:18:02", "remaining_time": "7:53:49"} +{"current_steps": 5346, "total_steps": 7532, "loss": 0.2632025480270386, "lr": 4.2669334757921284e-06, "epoch": 1.4195989908378701, "percentage": 70.98, "elapsed_time": "19:18:15", "remaining_time": "7:53:36"} +{"current_steps": 5347, "total_steps": 7532, "loss": 0.22106415033340454, "lr": 4.2633364949310315e-06, "epoch": 1.419864559819413, "percentage": 70.99, "elapsed_time": "19:18:28", "remaining_time": "7:53:23"} +{"current_steps": 5348, "total_steps": 7532, "loss": 0.2246699184179306, "lr": 4.259740620031e-06, "epoch": 1.420130128800956, "percentage": 71.0, "elapsed_time": "19:18:41", "remaining_time": "7:53:10"} +{"current_steps": 5349, "total_steps": 7532, "loss": 0.2335890382528305, "lr": 4.256145851785277e-06, "epoch": 1.420395697782499, "percentage": 71.02, "elapsed_time": "19:18:54", "remaining_time": "7:52:57"} +{"current_steps": 5350, "total_steps": 7532, "loss": 0.25485220551490784, "lr": 4.252552190886892e-06, "epoch": 1.420661266764042, "percentage": 71.03, "elapsed_time": "19:19:07", "remaining_time": "7:52:45"} +{"current_steps": 5351, "total_steps": 7532, "loss": 0.26234719157218933, "lr": 4.248959638028659e-06, "epoch": 1.4209268357455849, "percentage": 71.04, "elapsed_time": "19:19:20", "remaining_time": "7:52:32"} +{"current_steps": 5352, "total_steps": 7532, "loss": 0.22083795070648193, "lr": 4.245368193903181e-06, "epoch": 1.4211924047271278, "percentage": 71.06, "elapsed_time": "19:19:34", "remaining_time": "7:52:19"} +{"current_steps": 5353, "total_steps": 7532, "loss": 0.1886332929134369, "lr": 4.241777859202846e-06, "epoch": 1.4214579737086708, "percentage": 71.07, "elapsed_time": "19:19:47", "remaining_time": "7:52:06"} +{"current_steps": 5354, "total_steps": 7532, "loss": 0.26154160499572754, "lr": 4.238188634619826e-06, "epoch": 1.4217235426902137, "percentage": 71.08, "elapsed_time": "19:20:00", "remaining_time": "7:51:53"} +{"current_steps": 5355, "total_steps": 7532, "loss": 0.24761158227920532, "lr": 4.234600520846085e-06, "epoch": 1.4219891116717567, "percentage": 71.1, "elapsed_time": "19:20:13", "remaining_time": "7:51:40"} +{"current_steps": 5356, "total_steps": 7532, "loss": 0.20936736464500427, "lr": 4.2310135185733625e-06, "epoch": 1.4222546806532996, "percentage": 71.11, "elapsed_time": "19:20:25", "remaining_time": "7:51:27"} +{"current_steps": 5357, "total_steps": 7532, "loss": 0.2173127979040146, "lr": 4.227427628493198e-06, "epoch": 1.4225202496348426, "percentage": 71.12, "elapsed_time": "19:20:39", "remaining_time": "7:51:14"} +{"current_steps": 5358, "total_steps": 7532, "loss": 0.2598559260368347, "lr": 4.223842851296907e-06, "epoch": 1.4227858186163855, "percentage": 71.14, "elapsed_time": "19:20:51", "remaining_time": "7:51:01"} +{"current_steps": 5359, "total_steps": 7532, "loss": 0.23701196908950806, "lr": 4.22025918767559e-06, "epoch": 1.4230513875979285, "percentage": 71.15, "elapsed_time": "19:21:04", "remaining_time": "7:50:48"} +{"current_steps": 5360, "total_steps": 7532, "loss": 0.26052403450012207, "lr": 4.216676638320135e-06, "epoch": 1.4233169565794714, "percentage": 71.16, "elapsed_time": "19:21:17", "remaining_time": "7:50:34"} +{"current_steps": 5361, "total_steps": 7532, "loss": 0.2464584857225418, "lr": 4.213095203921217e-06, "epoch": 1.4235825255610144, "percentage": 71.18, "elapsed_time": "19:21:30", "remaining_time": "7:50:21"} +{"current_steps": 5362, "total_steps": 7532, "loss": 0.25889426469802856, "lr": 4.209514885169294e-06, "epoch": 1.4238480945425573, "percentage": 71.19, "elapsed_time": "19:21:42", "remaining_time": "7:50:08"} +{"current_steps": 5363, "total_steps": 7532, "loss": 0.26529380679130554, "lr": 4.2059356827546076e-06, "epoch": 1.4241136635241003, "percentage": 71.2, "elapsed_time": "19:21:55", "remaining_time": "7:49:55"} +{"current_steps": 5364, "total_steps": 7532, "loss": 0.2284630388021469, "lr": 4.202357597367187e-06, "epoch": 1.4243792325056432, "percentage": 71.22, "elapsed_time": "19:22:08", "remaining_time": "7:49:42"} +{"current_steps": 5365, "total_steps": 7532, "loss": 0.2361873984336853, "lr": 4.198780629696845e-06, "epoch": 1.4246448014871862, "percentage": 71.23, "elapsed_time": "19:22:21", "remaining_time": "7:49:29"} +{"current_steps": 5366, "total_steps": 7532, "loss": 0.2473624348640442, "lr": 4.195204780433179e-06, "epoch": 1.4249103704687291, "percentage": 71.24, "elapsed_time": "19:22:34", "remaining_time": "7:49:16"} +{"current_steps": 5367, "total_steps": 7532, "loss": 0.24852773547172546, "lr": 4.19163005026557e-06, "epoch": 1.425175939450272, "percentage": 71.26, "elapsed_time": "19:22:47", "remaining_time": "7:49:03"} +{"current_steps": 5368, "total_steps": 7532, "loss": 0.28409647941589355, "lr": 4.188056439883183e-06, "epoch": 1.425441508431815, "percentage": 71.27, "elapsed_time": "19:22:59", "remaining_time": "7:48:50"} +{"current_steps": 5369, "total_steps": 7532, "loss": 0.2500985562801361, "lr": 4.18448394997497e-06, "epoch": 1.425707077413358, "percentage": 71.28, "elapsed_time": "19:23:13", "remaining_time": "7:48:37"} +{"current_steps": 5370, "total_steps": 7532, "loss": 0.23475977778434753, "lr": 4.1809125812296635e-06, "epoch": 1.425972646394901, "percentage": 71.3, "elapsed_time": "19:23:26", "remaining_time": "7:48:24"} +{"current_steps": 5371, "total_steps": 7532, "loss": 0.22925345599651337, "lr": 4.177342334335782e-06, "epoch": 1.426238215376444, "percentage": 71.31, "elapsed_time": "19:23:39", "remaining_time": "7:48:11"} +{"current_steps": 5372, "total_steps": 7532, "loss": 0.24463894963264465, "lr": 4.173773209981627e-06, "epoch": 1.4265037843579869, "percentage": 71.32, "elapsed_time": "19:23:52", "remaining_time": "7:47:58"} +{"current_steps": 5373, "total_steps": 7532, "loss": 0.2451590746641159, "lr": 4.170205208855281e-06, "epoch": 1.4267693533395298, "percentage": 71.34, "elapsed_time": "19:24:05", "remaining_time": "7:47:45"} +{"current_steps": 5374, "total_steps": 7532, "loss": 0.21078437566757202, "lr": 4.166638331644613e-06, "epoch": 1.4270349223210728, "percentage": 71.35, "elapsed_time": "19:24:18", "remaining_time": "7:47:32"} +{"current_steps": 5375, "total_steps": 7532, "loss": 0.21466529369354248, "lr": 4.163072579037279e-06, "epoch": 1.427300491302616, "percentage": 71.36, "elapsed_time": "19:24:30", "remaining_time": "7:47:19"} +{"current_steps": 5376, "total_steps": 7532, "loss": 0.20103147625923157, "lr": 4.159507951720713e-06, "epoch": 1.4275660602841589, "percentage": 71.38, "elapsed_time": "19:24:43", "remaining_time": "7:47:06"} +{"current_steps": 5377, "total_steps": 7532, "loss": 0.2618871331214905, "lr": 4.15594445038213e-06, "epoch": 1.4278316292657018, "percentage": 71.39, "elapsed_time": "19:24:56", "remaining_time": "7:46:53"} +{"current_steps": 5378, "total_steps": 7532, "loss": 0.2496388852596283, "lr": 4.152382075708534e-06, "epoch": 1.4280971982472448, "percentage": 71.4, "elapsed_time": "19:25:09", "remaining_time": "7:46:40"} +{"current_steps": 5379, "total_steps": 7532, "loss": 0.2663899064064026, "lr": 4.148820828386707e-06, "epoch": 1.4283627672287877, "percentage": 71.42, "elapsed_time": "19:25:22", "remaining_time": "7:46:27"} +{"current_steps": 5380, "total_steps": 7532, "loss": 0.23617541790008545, "lr": 4.145260709103216e-06, "epoch": 1.4286283362103307, "percentage": 71.43, "elapsed_time": "19:25:36", "remaining_time": "7:46:14"} +{"current_steps": 5381, "total_steps": 7532, "loss": 0.200006365776062, "lr": 4.141701718544411e-06, "epoch": 1.4288939051918736, "percentage": 71.44, "elapsed_time": "19:25:48", "remaining_time": "7:46:01"} +{"current_steps": 5382, "total_steps": 7532, "loss": 0.22707203030586243, "lr": 4.138143857396425e-06, "epoch": 1.4291594741734166, "percentage": 71.46, "elapsed_time": "19:26:01", "remaining_time": "7:45:48"} +{"current_steps": 5383, "total_steps": 7532, "loss": 0.23903624713420868, "lr": 4.134587126345162e-06, "epoch": 1.4294250431549596, "percentage": 71.47, "elapsed_time": "19:26:14", "remaining_time": "7:45:35"} +{"current_steps": 5384, "total_steps": 7532, "loss": 0.2308908998966217, "lr": 4.131031526076329e-06, "epoch": 1.4296906121365025, "percentage": 71.48, "elapsed_time": "19:26:27", "remaining_time": "7:45:22"} +{"current_steps": 5385, "total_steps": 7532, "loss": 0.18762601912021637, "lr": 4.127477057275398e-06, "epoch": 1.4299561811180455, "percentage": 71.49, "elapsed_time": "19:26:40", "remaining_time": "7:45:09"} +{"current_steps": 5386, "total_steps": 7532, "loss": 0.281406044960022, "lr": 4.123923720627633e-06, "epoch": 1.4302217500995884, "percentage": 71.51, "elapsed_time": "19:26:53", "remaining_time": "7:44:56"} +{"current_steps": 5387, "total_steps": 7532, "loss": 0.24858589470386505, "lr": 4.120371516818071e-06, "epoch": 1.4304873190811314, "percentage": 71.52, "elapsed_time": "19:27:06", "remaining_time": "7:44:43"} +{"current_steps": 5388, "total_steps": 7532, "loss": 0.22179371118545532, "lr": 4.116820446531538e-06, "epoch": 1.4307528880626743, "percentage": 71.53, "elapsed_time": "19:27:19", "remaining_time": "7:44:30"} +{"current_steps": 5389, "total_steps": 7532, "loss": 0.22086869180202484, "lr": 4.113270510452636e-06, "epoch": 1.4310184570442173, "percentage": 71.55, "elapsed_time": "19:27:32", "remaining_time": "7:44:17"} +{"current_steps": 5390, "total_steps": 7532, "loss": 0.231503427028656, "lr": 4.109721709265753e-06, "epoch": 1.4312840260257602, "percentage": 71.56, "elapsed_time": "19:27:45", "remaining_time": "7:44:04"} +{"current_steps": 5391, "total_steps": 7532, "loss": 0.255252867937088, "lr": 4.106174043655054e-06, "epoch": 1.4315495950073032, "percentage": 71.57, "elapsed_time": "19:27:58", "remaining_time": "7:43:51"} +{"current_steps": 5392, "total_steps": 7532, "loss": 0.23336587846279144, "lr": 4.1026275143044854e-06, "epoch": 1.4318151639888461, "percentage": 71.59, "elapsed_time": "19:28:10", "remaining_time": "7:43:37"} +{"current_steps": 5393, "total_steps": 7532, "loss": 0.2468583881855011, "lr": 4.099082121897783e-06, "epoch": 1.432080732970389, "percentage": 71.6, "elapsed_time": "19:28:23", "remaining_time": "7:43:24"} +{"current_steps": 5394, "total_steps": 7532, "loss": 0.21211153268814087, "lr": 4.095537867118452e-06, "epoch": 1.432346301951932, "percentage": 71.61, "elapsed_time": "19:28:36", "remaining_time": "7:43:11"} +{"current_steps": 5395, "total_steps": 7532, "loss": 0.23173204064369202, "lr": 4.091994750649783e-06, "epoch": 1.432611870933475, "percentage": 71.63, "elapsed_time": "19:28:49", "remaining_time": "7:42:58"} +{"current_steps": 5396, "total_steps": 7532, "loss": 0.2606658935546875, "lr": 4.088452773174853e-06, "epoch": 1.432877439915018, "percentage": 71.64, "elapsed_time": "19:29:02", "remaining_time": "7:42:45"} +{"current_steps": 5397, "total_steps": 7532, "loss": 0.21198314428329468, "lr": 4.084911935376502e-06, "epoch": 1.4331430088965609, "percentage": 71.65, "elapsed_time": "19:29:15", "remaining_time": "7:42:32"} +{"current_steps": 5398, "total_steps": 7532, "loss": 0.216193288564682, "lr": 4.08137223793737e-06, "epoch": 1.4334085778781038, "percentage": 71.67, "elapsed_time": "19:29:28", "remaining_time": "7:42:19"} +{"current_steps": 5399, "total_steps": 7532, "loss": 0.27767330408096313, "lr": 4.077833681539866e-06, "epoch": 1.4336741468596468, "percentage": 71.68, "elapsed_time": "19:29:41", "remaining_time": "7:42:06"} +{"current_steps": 5400, "total_steps": 7532, "loss": 0.21584349870681763, "lr": 4.0742962668661826e-06, "epoch": 1.4339397158411897, "percentage": 71.69, "elapsed_time": "19:29:54", "remaining_time": "7:41:53"} +{"current_steps": 5401, "total_steps": 7532, "loss": 0.220070481300354, "lr": 4.070759994598288e-06, "epoch": 1.4342052848227327, "percentage": 71.71, "elapsed_time": "19:30:12", "remaining_time": "7:41:42"} +{"current_steps": 5402, "total_steps": 7532, "loss": 0.26035353541374207, "lr": 4.067224865417941e-06, "epoch": 1.4344708538042756, "percentage": 71.72, "elapsed_time": "19:30:26", "remaining_time": "7:41:30"} +{"current_steps": 5403, "total_steps": 7532, "loss": 0.23704876005649567, "lr": 4.063690880006671e-06, "epoch": 1.4347364227858186, "percentage": 71.73, "elapsed_time": "19:30:39", "remaining_time": "7:41:17"} +{"current_steps": 5404, "total_steps": 7532, "loss": 0.2345760464668274, "lr": 4.060158039045785e-06, "epoch": 1.4350019917673615, "percentage": 71.75, "elapsed_time": "19:30:52", "remaining_time": "7:41:04"} +{"current_steps": 5405, "total_steps": 7532, "loss": 0.21307331323623657, "lr": 4.056626343216377e-06, "epoch": 1.4352675607489045, "percentage": 71.76, "elapsed_time": "19:31:05", "remaining_time": "7:40:51"} +{"current_steps": 5406, "total_steps": 7532, "loss": 0.22029465436935425, "lr": 4.053095793199313e-06, "epoch": 1.4355331297304474, "percentage": 71.77, "elapsed_time": "19:31:18", "remaining_time": "7:40:38"} +{"current_steps": 5407, "total_steps": 7532, "loss": 0.23419252038002014, "lr": 4.049566389675244e-06, "epoch": 1.4357986987119904, "percentage": 71.79, "elapsed_time": "19:31:31", "remaining_time": "7:40:25"} +{"current_steps": 5408, "total_steps": 7532, "loss": 0.21648669242858887, "lr": 4.046038133324595e-06, "epoch": 1.4360642676935333, "percentage": 71.8, "elapsed_time": "19:31:45", "remaining_time": "7:40:12"} +{"current_steps": 5409, "total_steps": 7532, "loss": 0.2343464195728302, "lr": 4.042511024827573e-06, "epoch": 1.4363298366750763, "percentage": 71.81, "elapsed_time": "19:31:58", "remaining_time": "7:39:59"} +{"current_steps": 5410, "total_steps": 7532, "loss": 0.20108605921268463, "lr": 4.0389850648641615e-06, "epoch": 1.4365954056566193, "percentage": 71.83, "elapsed_time": "19:32:11", "remaining_time": "7:39:46"} +{"current_steps": 5411, "total_steps": 7532, "loss": 0.21885806322097778, "lr": 4.0354602541141315e-06, "epoch": 1.4368609746381622, "percentage": 71.84, "elapsed_time": "19:32:24", "remaining_time": "7:39:33"} +{"current_steps": 5412, "total_steps": 7532, "loss": 0.2382376492023468, "lr": 4.031936593257017e-06, "epoch": 1.4371265436197052, "percentage": 71.85, "elapsed_time": "19:32:37", "remaining_time": "7:39:20"} +{"current_steps": 5413, "total_steps": 7532, "loss": 0.21434128284454346, "lr": 4.028414082972141e-06, "epoch": 1.437392112601248, "percentage": 71.87, "elapsed_time": "19:32:50", "remaining_time": "7:39:07"} +{"current_steps": 5414, "total_steps": 7532, "loss": 0.2345191240310669, "lr": 4.024892723938601e-06, "epoch": 1.437657681582791, "percentage": 71.88, "elapsed_time": "19:33:03", "remaining_time": "7:38:54"} +{"current_steps": 5415, "total_steps": 7532, "loss": 0.2478899210691452, "lr": 4.021372516835273e-06, "epoch": 1.437923250564334, "percentage": 71.89, "elapsed_time": "19:33:17", "remaining_time": "7:38:41"} +{"current_steps": 5416, "total_steps": 7532, "loss": 0.21356827020645142, "lr": 4.017853462340813e-06, "epoch": 1.438188819545877, "percentage": 71.91, "elapsed_time": "19:33:29", "remaining_time": "7:38:28"} +{"current_steps": 5417, "total_steps": 7532, "loss": 0.26329827308654785, "lr": 4.014335561133652e-06, "epoch": 1.4384543885274201, "percentage": 71.92, "elapsed_time": "19:33:43", "remaining_time": "7:38:15"} +{"current_steps": 5418, "total_steps": 7532, "loss": 0.25880998373031616, "lr": 4.010818813892e-06, "epoch": 1.438719957508963, "percentage": 71.93, "elapsed_time": "19:33:56", "remaining_time": "7:38:02"} +{"current_steps": 5419, "total_steps": 7532, "loss": 0.22749441862106323, "lr": 4.007303221293844e-06, "epoch": 1.438985526490506, "percentage": 71.95, "elapsed_time": "19:34:09", "remaining_time": "7:37:49"} +{"current_steps": 5420, "total_steps": 7532, "loss": 0.2242615520954132, "lr": 4.00378878401695e-06, "epoch": 1.439251095472049, "percentage": 71.96, "elapsed_time": "19:34:22", "remaining_time": "7:37:36"} +{"current_steps": 5421, "total_steps": 7532, "loss": 0.19751839339733124, "lr": 4.000275502738862e-06, "epoch": 1.439516664453592, "percentage": 71.97, "elapsed_time": "19:34:34", "remaining_time": "7:37:23"} +{"current_steps": 5422, "total_steps": 7532, "loss": 0.27319905161857605, "lr": 3.996763378136895e-06, "epoch": 1.439782233435135, "percentage": 71.99, "elapsed_time": "19:34:47", "remaining_time": "7:37:10"} +{"current_steps": 5423, "total_steps": 7532, "loss": 0.21676769852638245, "lr": 3.993252410888149e-06, "epoch": 1.4400478024166778, "percentage": 72.0, "elapsed_time": "19:35:00", "remaining_time": "7:36:57"} +{"current_steps": 5424, "total_steps": 7532, "loss": 0.22788718342781067, "lr": 3.989742601669494e-06, "epoch": 1.4403133713982208, "percentage": 72.01, "elapsed_time": "19:35:13", "remaining_time": "7:36:44"} +{"current_steps": 5425, "total_steps": 7532, "loss": 0.23224875330924988, "lr": 3.986233951157581e-06, "epoch": 1.4405789403797638, "percentage": 72.03, "elapsed_time": "19:35:26", "remaining_time": "7:36:31"} +{"current_steps": 5426, "total_steps": 7532, "loss": 0.23625247180461884, "lr": 3.982726460028836e-06, "epoch": 1.4408445093613067, "percentage": 72.04, "elapsed_time": "19:35:39", "remaining_time": "7:36:18"} +{"current_steps": 5427, "total_steps": 7532, "loss": 0.2092093527317047, "lr": 3.979220128959463e-06, "epoch": 1.4411100783428497, "percentage": 72.05, "elapsed_time": "19:35:52", "remaining_time": "7:36:05"} +{"current_steps": 5428, "total_steps": 7532, "loss": 0.22196070849895477, "lr": 3.975714958625442e-06, "epoch": 1.4413756473243926, "percentage": 72.07, "elapsed_time": "19:36:05", "remaining_time": "7:35:52"} +{"current_steps": 5429, "total_steps": 7532, "loss": 0.21276375651359558, "lr": 3.972210949702525e-06, "epoch": 1.4416412163059356, "percentage": 72.08, "elapsed_time": "19:36:18", "remaining_time": "7:35:39"} +{"current_steps": 5430, "total_steps": 7532, "loss": 0.22150103747844696, "lr": 3.968708102866247e-06, "epoch": 1.4419067852874785, "percentage": 72.09, "elapsed_time": "19:36:32", "remaining_time": "7:35:26"} +{"current_steps": 5431, "total_steps": 7532, "loss": 0.24529573321342468, "lr": 3.965206418791914e-06, "epoch": 1.4421723542690215, "percentage": 72.11, "elapsed_time": "19:36:44", "remaining_time": "7:35:13"} +{"current_steps": 5432, "total_steps": 7532, "loss": 0.24349135160446167, "lr": 3.961705898154609e-06, "epoch": 1.4424379232505644, "percentage": 72.12, "elapsed_time": "19:36:58", "remaining_time": "7:35:00"} +{"current_steps": 5433, "total_steps": 7532, "loss": 0.23481428623199463, "lr": 3.9582065416291926e-06, "epoch": 1.4427034922321074, "percentage": 72.13, "elapsed_time": "19:37:11", "remaining_time": "7:34:47"} +{"current_steps": 5434, "total_steps": 7532, "loss": 0.2366936057806015, "lr": 3.954708349890299e-06, "epoch": 1.4429690612136503, "percentage": 72.15, "elapsed_time": "19:37:24", "remaining_time": "7:34:34"} +{"current_steps": 5435, "total_steps": 7532, "loss": 0.24792322516441345, "lr": 3.951211323612336e-06, "epoch": 1.4432346301951933, "percentage": 72.16, "elapsed_time": "19:37:37", "remaining_time": "7:34:21"} +{"current_steps": 5436, "total_steps": 7532, "loss": 0.22601652145385742, "lr": 3.947715463469493e-06, "epoch": 1.4435001991767362, "percentage": 72.17, "elapsed_time": "19:37:50", "remaining_time": "7:34:09"} +{"current_steps": 5437, "total_steps": 7532, "loss": 0.19603165984153748, "lr": 3.9442207701357235e-06, "epoch": 1.4437657681582792, "percentage": 72.19, "elapsed_time": "19:38:03", "remaining_time": "7:33:56"} +{"current_steps": 5438, "total_steps": 7532, "loss": 0.22619353234767914, "lr": 3.940727244284772e-06, "epoch": 1.4440313371398221, "percentage": 72.2, "elapsed_time": "19:38:16", "remaining_time": "7:33:43"} +{"current_steps": 5439, "total_steps": 7532, "loss": 0.24836638569831848, "lr": 3.937234886590146e-06, "epoch": 1.444296906121365, "percentage": 72.21, "elapsed_time": "19:38:29", "remaining_time": "7:33:30"} +{"current_steps": 5440, "total_steps": 7532, "loss": 0.21585768461227417, "lr": 3.933743697725129e-06, "epoch": 1.444562475102908, "percentage": 72.23, "elapsed_time": "19:38:42", "remaining_time": "7:33:17"} +{"current_steps": 5441, "total_steps": 7532, "loss": 0.20876167714595795, "lr": 3.930253678362784e-06, "epoch": 1.444828044084451, "percentage": 72.24, "elapsed_time": "19:38:56", "remaining_time": "7:33:04"} +{"current_steps": 5442, "total_steps": 7532, "loss": 0.24337999522686005, "lr": 3.926764829175943e-06, "epoch": 1.445093613065994, "percentage": 72.25, "elapsed_time": "19:39:08", "remaining_time": "7:32:51"} +{"current_steps": 5443, "total_steps": 7532, "loss": 0.2511219084262848, "lr": 3.9232771508372155e-06, "epoch": 1.4453591820475369, "percentage": 72.27, "elapsed_time": "19:39:22", "remaining_time": "7:32:38"} +{"current_steps": 5444, "total_steps": 7532, "loss": 0.26257213950157166, "lr": 3.919790644018986e-06, "epoch": 1.4456247510290798, "percentage": 72.28, "elapsed_time": "19:39:35", "remaining_time": "7:32:25"} +{"current_steps": 5445, "total_steps": 7532, "loss": 0.2720959782600403, "lr": 3.91630530939341e-06, "epoch": 1.4458903200106228, "percentage": 72.29, "elapsed_time": "19:39:48", "remaining_time": "7:32:12"} +{"current_steps": 5446, "total_steps": 7532, "loss": 0.23849177360534668, "lr": 3.912821147632421e-06, "epoch": 1.4461558889921657, "percentage": 72.3, "elapsed_time": "19:40:01", "remaining_time": "7:31:59"} +{"current_steps": 5447, "total_steps": 7532, "loss": 0.2366214245557785, "lr": 3.909338159407722e-06, "epoch": 1.4464214579737087, "percentage": 72.32, "elapsed_time": "19:40:14", "remaining_time": "7:31:46"} +{"current_steps": 5448, "total_steps": 7532, "loss": 0.21905584633350372, "lr": 3.905856345390793e-06, "epoch": 1.4466870269552516, "percentage": 72.33, "elapsed_time": "19:40:27", "remaining_time": "7:31:33"} +{"current_steps": 5449, "total_steps": 7532, "loss": 0.23964065313339233, "lr": 3.902375706252887e-06, "epoch": 1.4469525959367946, "percentage": 72.34, "elapsed_time": "19:40:41", "remaining_time": "7:31:20"} +{"current_steps": 5450, "total_steps": 7532, "loss": 0.22246500849723816, "lr": 3.89889624266503e-06, "epoch": 1.4472181649183375, "percentage": 72.36, "elapsed_time": "19:40:54", "remaining_time": "7:31:07"} +{"current_steps": 5451, "total_steps": 7532, "loss": 0.22980710864067078, "lr": 3.895417955298022e-06, "epoch": 1.4474837338998805, "percentage": 72.37, "elapsed_time": "19:41:07", "remaining_time": "7:30:54"} +{"current_steps": 5452, "total_steps": 7532, "loss": 0.21276253461837769, "lr": 3.8919408448224346e-06, "epoch": 1.4477493028814234, "percentage": 72.38, "elapsed_time": "19:41:20", "remaining_time": "7:30:41"} +{"current_steps": 5453, "total_steps": 7532, "loss": 0.23925542831420898, "lr": 3.888464911908616e-06, "epoch": 1.4480148718629664, "percentage": 72.4, "elapsed_time": "19:41:33", "remaining_time": "7:30:28"} +{"current_steps": 5454, "total_steps": 7532, "loss": 0.21528369188308716, "lr": 3.884990157226683e-06, "epoch": 1.4482804408445094, "percentage": 72.41, "elapsed_time": "19:41:47", "remaining_time": "7:30:16"} +{"current_steps": 5455, "total_steps": 7532, "loss": 0.24563542008399963, "lr": 3.8815165814465235e-06, "epoch": 1.4485460098260523, "percentage": 72.42, "elapsed_time": "19:42:00", "remaining_time": "7:30:03"} +{"current_steps": 5456, "total_steps": 7532, "loss": 0.2721150517463684, "lr": 3.87804418523781e-06, "epoch": 1.4488115788075953, "percentage": 72.44, "elapsed_time": "19:42:13", "remaining_time": "7:29:50"} +{"current_steps": 5457, "total_steps": 7532, "loss": 0.23716527223587036, "lr": 3.874572969269976e-06, "epoch": 1.4490771477891382, "percentage": 72.45, "elapsed_time": "19:42:26", "remaining_time": "7:29:37"} +{"current_steps": 5458, "total_steps": 7532, "loss": 0.2182254046201706, "lr": 3.871102934212231e-06, "epoch": 1.4493427167706812, "percentage": 72.46, "elapsed_time": "19:42:39", "remaining_time": "7:29:24"} +{"current_steps": 5459, "total_steps": 7532, "loss": 0.2179020643234253, "lr": 3.867634080733557e-06, "epoch": 1.4496082857522241, "percentage": 72.48, "elapsed_time": "19:42:52", "remaining_time": "7:29:11"} +{"current_steps": 5460, "total_steps": 7532, "loss": 0.22901684045791626, "lr": 3.864166409502706e-06, "epoch": 1.449873854733767, "percentage": 72.49, "elapsed_time": "19:43:06", "remaining_time": "7:28:58"} +{"current_steps": 5461, "total_steps": 7532, "loss": 0.2287352979183197, "lr": 3.860699921188211e-06, "epoch": 1.45013942371531, "percentage": 72.5, "elapsed_time": "19:43:18", "remaining_time": "7:28:45"} +{"current_steps": 5462, "total_steps": 7532, "loss": 0.2448873668909073, "lr": 3.85723461645836e-06, "epoch": 1.450404992696853, "percentage": 72.52, "elapsed_time": "19:43:32", "remaining_time": "7:28:32"} +{"current_steps": 5463, "total_steps": 7532, "loss": 0.2693510055541992, "lr": 3.85377049598123e-06, "epoch": 1.450670561678396, "percentage": 72.53, "elapsed_time": "19:43:44", "remaining_time": "7:28:19"} +{"current_steps": 5464, "total_steps": 7532, "loss": 0.25414884090423584, "lr": 3.8503075604246554e-06, "epoch": 1.4509361306599389, "percentage": 72.54, "elapsed_time": "19:43:57", "remaining_time": "7:28:06"} +{"current_steps": 5465, "total_steps": 7532, "loss": 0.27798837423324585, "lr": 3.846845810456258e-06, "epoch": 1.4512016996414818, "percentage": 72.56, "elapsed_time": "19:44:10", "remaining_time": "7:27:53"} +{"current_steps": 5466, "total_steps": 7532, "loss": 0.23348593711853027, "lr": 3.8433852467434175e-06, "epoch": 1.4514672686230248, "percentage": 72.57, "elapsed_time": "19:44:23", "remaining_time": "7:27:40"} +{"current_steps": 5467, "total_steps": 7532, "loss": 0.20993635058403015, "lr": 3.839925869953292e-06, "epoch": 1.4517328376045677, "percentage": 72.58, "elapsed_time": "19:44:36", "remaining_time": "7:27:27"} +{"current_steps": 5468, "total_steps": 7532, "loss": 0.225263774394989, "lr": 3.836467680752808e-06, "epoch": 1.4519984065861107, "percentage": 72.6, "elapsed_time": "19:44:49", "remaining_time": "7:27:14"} +{"current_steps": 5469, "total_steps": 7532, "loss": 0.2481595277786255, "lr": 3.833010679808662e-06, "epoch": 1.4522639755676536, "percentage": 72.61, "elapsed_time": "19:45:03", "remaining_time": "7:27:01"} +{"current_steps": 5470, "total_steps": 7532, "loss": 0.20755310356616974, "lr": 3.829554867787324e-06, "epoch": 1.4525295445491966, "percentage": 72.62, "elapsed_time": "19:45:15", "remaining_time": "7:26:48"} +{"current_steps": 5471, "total_steps": 7532, "loss": 0.22124455869197845, "lr": 3.826100245355034e-06, "epoch": 1.4527951135307395, "percentage": 72.64, "elapsed_time": "19:45:29", "remaining_time": "7:26:35"} +{"current_steps": 5472, "total_steps": 7532, "loss": 0.23461398482322693, "lr": 3.822646813177803e-06, "epoch": 1.4530606825122825, "percentage": 72.65, "elapsed_time": "19:45:42", "remaining_time": "7:26:22"} +{"current_steps": 5473, "total_steps": 7532, "loss": 0.22890526056289673, "lr": 3.819194571921407e-06, "epoch": 1.4533262514938254, "percentage": 72.66, "elapsed_time": "19:45:55", "remaining_time": "7:26:09"} +{"current_steps": 5474, "total_steps": 7532, "loss": 0.23236533999443054, "lr": 3.815743522251406e-06, "epoch": 1.4535918204753684, "percentage": 72.68, "elapsed_time": "19:46:08", "remaining_time": "7:25:56"} +{"current_steps": 5475, "total_steps": 7532, "loss": 0.2192365825176239, "lr": 3.8122936648331164e-06, "epoch": 1.4538573894569113, "percentage": 72.69, "elapsed_time": "19:46:20", "remaining_time": "7:25:43"} +{"current_steps": 5476, "total_steps": 7532, "loss": 0.23970162868499756, "lr": 3.8088450003316346e-06, "epoch": 1.4541229584384543, "percentage": 72.7, "elapsed_time": "19:46:34", "remaining_time": "7:25:30"} +{"current_steps": 5477, "total_steps": 7532, "loss": 0.24270984530448914, "lr": 3.8053975294118163e-06, "epoch": 1.4543885274199972, "percentage": 72.72, "elapsed_time": "19:46:47", "remaining_time": "7:25:17"} +{"current_steps": 5478, "total_steps": 7532, "loss": 0.22228944301605225, "lr": 3.801951252738295e-06, "epoch": 1.4546540964015402, "percentage": 72.73, "elapsed_time": "19:47:00", "remaining_time": "7:25:04"} +{"current_steps": 5479, "total_steps": 7532, "loss": 0.25029584765434265, "lr": 3.7985061709754735e-06, "epoch": 1.4549196653830831, "percentage": 72.74, "elapsed_time": "19:47:13", "remaining_time": "7:24:51"} +{"current_steps": 5480, "total_steps": 7532, "loss": 0.23831725120544434, "lr": 3.795062284787522e-06, "epoch": 1.455185234364626, "percentage": 72.76, "elapsed_time": "19:47:26", "remaining_time": "7:24:38"} +{"current_steps": 5481, "total_steps": 7532, "loss": 0.2571605145931244, "lr": 3.7916195948383817e-06, "epoch": 1.455450803346169, "percentage": 72.77, "elapsed_time": "19:47:39", "remaining_time": "7:24:25"} +{"current_steps": 5482, "total_steps": 7532, "loss": 0.2660857141017914, "lr": 3.7881781017917586e-06, "epoch": 1.455716372327712, "percentage": 72.78, "elapsed_time": "19:47:52", "remaining_time": "7:24:12"} +{"current_steps": 5483, "total_steps": 7532, "loss": 0.2468302845954895, "lr": 3.7847378063111394e-06, "epoch": 1.455981941309255, "percentage": 72.8, "elapsed_time": "19:48:05", "remaining_time": "7:23:59"} +{"current_steps": 5484, "total_steps": 7532, "loss": 0.2559482753276825, "lr": 3.7812987090597696e-06, "epoch": 1.456247510290798, "percentage": 72.81, "elapsed_time": "19:48:18", "remaining_time": "7:23:46"} +{"current_steps": 5485, "total_steps": 7532, "loss": 0.24484393000602722, "lr": 3.7778608107006654e-06, "epoch": 1.4565130792723409, "percentage": 72.82, "elapsed_time": "19:48:31", "remaining_time": "7:23:33"} +{"current_steps": 5486, "total_steps": 7532, "loss": 0.2376541644334793, "lr": 3.774424111896614e-06, "epoch": 1.4567786482538838, "percentage": 72.84, "elapsed_time": "19:48:44", "remaining_time": "7:23:20"} +{"current_steps": 5487, "total_steps": 7532, "loss": 0.22265875339508057, "lr": 3.770988613310169e-06, "epoch": 1.4570442172354268, "percentage": 72.85, "elapsed_time": "19:48:57", "remaining_time": "7:23:07"} +{"current_steps": 5488, "total_steps": 7532, "loss": 0.2511552572250366, "lr": 3.7675543156036555e-06, "epoch": 1.45730978621697, "percentage": 72.86, "elapsed_time": "19:49:10", "remaining_time": "7:22:54"} +{"current_steps": 5489, "total_steps": 7532, "loss": 0.2412843108177185, "lr": 3.764121219439165e-06, "epoch": 1.457575355198513, "percentage": 72.88, "elapsed_time": "19:49:23", "remaining_time": "7:22:41"} +{"current_steps": 5490, "total_steps": 7532, "loss": 0.26342809200286865, "lr": 3.760689325478559e-06, "epoch": 1.4578409241800558, "percentage": 72.89, "elapsed_time": "19:49:37", "remaining_time": "7:22:28"} +{"current_steps": 5491, "total_steps": 7532, "loss": 0.23315641283988953, "lr": 3.7572586343834638e-06, "epoch": 1.4581064931615988, "percentage": 72.9, "elapsed_time": "19:49:49", "remaining_time": "7:22:15"} +{"current_steps": 5492, "total_steps": 7532, "loss": 0.24148929119110107, "lr": 3.753829146815279e-06, "epoch": 1.4583720621431417, "percentage": 72.92, "elapsed_time": "19:50:02", "remaining_time": "7:22:02"} +{"current_steps": 5493, "total_steps": 7532, "loss": 0.22838115692138672, "lr": 3.750400863435166e-06, "epoch": 1.4586376311246847, "percentage": 72.93, "elapsed_time": "19:50:15", "remaining_time": "7:21:49"} +{"current_steps": 5494, "total_steps": 7532, "loss": 0.21669608354568481, "lr": 3.746973784904061e-06, "epoch": 1.4589032001062276, "percentage": 72.94, "elapsed_time": "19:50:28", "remaining_time": "7:21:36"} +{"current_steps": 5495, "total_steps": 7532, "loss": 0.25619322061538696, "lr": 3.743547911882662e-06, "epoch": 1.4591687690877706, "percentage": 72.96, "elapsed_time": "19:50:42", "remaining_time": "7:21:23"} +{"current_steps": 5496, "total_steps": 7532, "loss": 0.23629480600357056, "lr": 3.7401232450314384e-06, "epoch": 1.4594343380693136, "percentage": 72.97, "elapsed_time": "19:50:54", "remaining_time": "7:21:10"} +{"current_steps": 5497, "total_steps": 7532, "loss": 0.21799582242965698, "lr": 3.7366997850106245e-06, "epoch": 1.4596999070508565, "percentage": 72.98, "elapsed_time": "19:51:08", "remaining_time": "7:20:57"} +{"current_steps": 5498, "total_steps": 7532, "loss": 0.2582590579986572, "lr": 3.733277532480223e-06, "epoch": 1.4599654760323995, "percentage": 73.0, "elapsed_time": "19:51:20", "remaining_time": "7:20:44"} +{"current_steps": 5499, "total_steps": 7532, "loss": 0.23641736805438995, "lr": 3.729856488100003e-06, "epoch": 1.4602310450139424, "percentage": 73.01, "elapsed_time": "19:51:33", "remaining_time": "7:20:31"} +{"current_steps": 5500, "total_steps": 7532, "loss": 0.24150417745113373, "lr": 3.7264366525295e-06, "epoch": 1.4604966139954854, "percentage": 73.02, "elapsed_time": "19:51:46", "remaining_time": "7:20:18"} +{"current_steps": 5501, "total_steps": 7532, "loss": 0.2474009394645691, "lr": 3.7230180264280245e-06, "epoch": 1.4607621829770283, "percentage": 73.04, "elapsed_time": "19:52:05", "remaining_time": "7:20:07"} +{"current_steps": 5502, "total_steps": 7532, "loss": 0.269604355096817, "lr": 3.7196006104546435e-06, "epoch": 1.4610277519585713, "percentage": 73.05, "elapsed_time": "19:52:18", "remaining_time": "7:19:54"} +{"current_steps": 5503, "total_steps": 7532, "loss": 0.24324679374694824, "lr": 3.716184405268194e-06, "epoch": 1.4612933209401142, "percentage": 73.06, "elapsed_time": "19:52:31", "remaining_time": "7:19:41"} +{"current_steps": 5504, "total_steps": 7532, "loss": 0.2249709963798523, "lr": 3.7127694115272805e-06, "epoch": 1.4615588899216572, "percentage": 73.07, "elapsed_time": "19:52:44", "remaining_time": "7:19:28"} +{"current_steps": 5505, "total_steps": 7532, "loss": 0.2560918629169464, "lr": 3.7093556298902734e-06, "epoch": 1.4618244589032001, "percentage": 73.09, "elapsed_time": "19:52:57", "remaining_time": "7:19:15"} +{"current_steps": 5506, "total_steps": 7532, "loss": 0.22693020105361938, "lr": 3.705943061015309e-06, "epoch": 1.462090027884743, "percentage": 73.1, "elapsed_time": "19:53:09", "remaining_time": "7:19:02"} +{"current_steps": 5507, "total_steps": 7532, "loss": 0.2617371678352356, "lr": 3.702531705560292e-06, "epoch": 1.462355596866286, "percentage": 73.11, "elapsed_time": "19:53:22", "remaining_time": "7:18:49"} +{"current_steps": 5508, "total_steps": 7532, "loss": 0.2314397394657135, "lr": 3.6991215641828903e-06, "epoch": 1.462621165847829, "percentage": 73.13, "elapsed_time": "19:53:35", "remaining_time": "7:18:36"} +{"current_steps": 5509, "total_steps": 7532, "loss": 0.23186162114143372, "lr": 3.6957126375405383e-06, "epoch": 1.462886734829372, "percentage": 73.14, "elapsed_time": "19:53:48", "remaining_time": "7:18:23"} +{"current_steps": 5510, "total_steps": 7532, "loss": 0.21775083243846893, "lr": 3.6923049262904375e-06, "epoch": 1.4631523038109149, "percentage": 73.15, "elapsed_time": "19:54:01", "remaining_time": "7:18:10"} +{"current_steps": 5511, "total_steps": 7532, "loss": 0.24707889556884766, "lr": 3.688898431089556e-06, "epoch": 1.4634178727924578, "percentage": 73.17, "elapsed_time": "19:54:14", "remaining_time": "7:17:57"} +{"current_steps": 5512, "total_steps": 7532, "loss": 0.1941150575876236, "lr": 3.6854931525946237e-06, "epoch": 1.4636834417740008, "percentage": 73.18, "elapsed_time": "19:54:27", "remaining_time": "7:17:44"} +{"current_steps": 5513, "total_steps": 7532, "loss": 0.17808857560157776, "lr": 3.6820890914621376e-06, "epoch": 1.4639490107555437, "percentage": 73.19, "elapsed_time": "19:54:39", "remaining_time": "7:17:30"} +{"current_steps": 5514, "total_steps": 7532, "loss": 0.2150077074766159, "lr": 3.678686248348363e-06, "epoch": 1.4642145797370867, "percentage": 73.21, "elapsed_time": "19:54:52", "remaining_time": "7:17:17"} +{"current_steps": 5515, "total_steps": 7532, "loss": 0.2605292797088623, "lr": 3.6752846239093276e-06, "epoch": 1.4644801487186296, "percentage": 73.22, "elapsed_time": "19:55:05", "remaining_time": "7:17:04"} +{"current_steps": 5516, "total_steps": 7532, "loss": 0.22481867671012878, "lr": 3.671884218800822e-06, "epoch": 1.4647457177001726, "percentage": 73.23, "elapsed_time": "19:55:18", "remaining_time": "7:16:51"} +{"current_steps": 5517, "total_steps": 7532, "loss": 0.24453294277191162, "lr": 3.668485033678406e-06, "epoch": 1.4650112866817155, "percentage": 73.25, "elapsed_time": "19:55:30", "remaining_time": "7:16:38"} +{"current_steps": 5518, "total_steps": 7532, "loss": 0.2672286033630371, "lr": 3.6650870691973996e-06, "epoch": 1.4652768556632585, "percentage": 73.26, "elapsed_time": "19:55:43", "remaining_time": "7:16:25"} +{"current_steps": 5519, "total_steps": 7532, "loss": 0.2514987587928772, "lr": 3.661690326012897e-06, "epoch": 1.4655424246448014, "percentage": 73.27, "elapsed_time": "19:55:56", "remaining_time": "7:16:12"} +{"current_steps": 5520, "total_steps": 7532, "loss": 0.25671514868736267, "lr": 3.6582948047797438e-06, "epoch": 1.4658079936263444, "percentage": 73.29, "elapsed_time": "19:56:08", "remaining_time": "7:15:59"} +{"current_steps": 5521, "total_steps": 7532, "loss": 0.25485602021217346, "lr": 3.654900506152561e-06, "epoch": 1.4660735626078873, "percentage": 73.3, "elapsed_time": "19:56:22", "remaining_time": "7:15:46"} +{"current_steps": 5522, "total_steps": 7532, "loss": 0.23556292057037354, "lr": 3.6515074307857257e-06, "epoch": 1.4663391315894303, "percentage": 73.31, "elapsed_time": "19:56:34", "remaining_time": "7:15:33"} +{"current_steps": 5523, "total_steps": 7532, "loss": 0.23347696661949158, "lr": 3.6481155793333855e-06, "epoch": 1.4666047005709733, "percentage": 73.33, "elapsed_time": "19:56:47", "remaining_time": "7:15:20"} +{"current_steps": 5524, "total_steps": 7532, "loss": 0.2405884712934494, "lr": 3.6447249524494466e-06, "epoch": 1.4668702695525162, "percentage": 73.34, "elapsed_time": "19:57:00", "remaining_time": "7:15:07"} +{"current_steps": 5525, "total_steps": 7532, "loss": 0.23668336868286133, "lr": 3.6413355507875845e-06, "epoch": 1.4671358385340592, "percentage": 73.35, "elapsed_time": "19:57:13", "remaining_time": "7:14:54"} +{"current_steps": 5526, "total_steps": 7532, "loss": 0.25534945726394653, "lr": 3.6379473750012375e-06, "epoch": 1.467401407515602, "percentage": 73.37, "elapsed_time": "19:57:25", "remaining_time": "7:14:40"} +{"current_steps": 5527, "total_steps": 7532, "loss": 0.22227410972118378, "lr": 3.634560425743596e-06, "epoch": 1.467666976497145, "percentage": 73.38, "elapsed_time": "19:57:39", "remaining_time": "7:14:27"} +{"current_steps": 5528, "total_steps": 7532, "loss": 0.23395927250385284, "lr": 3.631174703667636e-06, "epoch": 1.467932545478688, "percentage": 73.39, "elapsed_time": "19:57:51", "remaining_time": "7:14:14"} +{"current_steps": 5529, "total_steps": 7532, "loss": 0.23419208824634552, "lr": 3.6277902094260785e-06, "epoch": 1.468198114460231, "percentage": 73.41, "elapsed_time": "19:58:04", "remaining_time": "7:14:01"} +{"current_steps": 5530, "total_steps": 7532, "loss": 0.22185654938220978, "lr": 3.6244069436714158e-06, "epoch": 1.4684636834417741, "percentage": 73.42, "elapsed_time": "19:58:17", "remaining_time": "7:13:48"} +{"current_steps": 5531, "total_steps": 7532, "loss": 0.2705134153366089, "lr": 3.621024907055901e-06, "epoch": 1.468729252423317, "percentage": 73.43, "elapsed_time": "19:58:30", "remaining_time": "7:13:35"} +{"current_steps": 5532, "total_steps": 7532, "loss": 0.23426109552383423, "lr": 3.617644100231551e-06, "epoch": 1.46899482140486, "percentage": 73.45, "elapsed_time": "19:58:43", "remaining_time": "7:13:22"} +{"current_steps": 5533, "total_steps": 7532, "loss": 0.25527146458625793, "lr": 3.6142645238501462e-06, "epoch": 1.469260390386403, "percentage": 73.46, "elapsed_time": "19:58:56", "remaining_time": "7:13:09"} +{"current_steps": 5534, "total_steps": 7532, "loss": 0.1882668435573578, "lr": 3.610886178563228e-06, "epoch": 1.469525959367946, "percentage": 73.47, "elapsed_time": "19:59:10", "remaining_time": "7:12:56"} +{"current_steps": 5535, "total_steps": 7532, "loss": 0.24060532450675964, "lr": 3.607509065022101e-06, "epoch": 1.469791528349489, "percentage": 73.49, "elapsed_time": "19:59:22", "remaining_time": "7:12:43"} +{"current_steps": 5536, "total_steps": 7532, "loss": 0.23555803298950195, "lr": 3.6041331838778325e-06, "epoch": 1.4700570973310318, "percentage": 73.5, "elapsed_time": "19:59:35", "remaining_time": "7:12:30"} +{"current_steps": 5537, "total_steps": 7532, "loss": 0.23126551508903503, "lr": 3.6007585357812557e-06, "epoch": 1.4703226663125748, "percentage": 73.51, "elapsed_time": "19:59:48", "remaining_time": "7:12:17"} +{"current_steps": 5538, "total_steps": 7532, "loss": 0.24203836917877197, "lr": 3.597385121382961e-06, "epoch": 1.4705882352941178, "percentage": 73.53, "elapsed_time": "20:00:01", "remaining_time": "7:12:04"} +{"current_steps": 5539, "total_steps": 7532, "loss": 0.239767923951149, "lr": 3.5940129413333046e-06, "epoch": 1.4708538042756607, "percentage": 73.54, "elapsed_time": "20:00:14", "remaining_time": "7:11:51"} +{"current_steps": 5540, "total_steps": 7532, "loss": 0.24732957780361176, "lr": 3.5906419962824002e-06, "epoch": 1.4711193732572037, "percentage": 73.55, "elapsed_time": "20:00:27", "remaining_time": "7:11:38"} +{"current_steps": 5541, "total_steps": 7532, "loss": 0.2296421229839325, "lr": 3.587272286880131e-06, "epoch": 1.4713849422387466, "percentage": 73.57, "elapsed_time": "20:00:40", "remaining_time": "7:11:25"} +{"current_steps": 5542, "total_steps": 7532, "loss": 0.2339775711297989, "lr": 3.583903813776132e-06, "epoch": 1.4716505112202896, "percentage": 73.58, "elapsed_time": "20:00:53", "remaining_time": "7:11:12"} +{"current_steps": 5543, "total_steps": 7532, "loss": 0.230351984500885, "lr": 3.5805365776198052e-06, "epoch": 1.4719160802018325, "percentage": 73.59, "elapsed_time": "20:01:06", "remaining_time": "7:10:59"} +{"current_steps": 5544, "total_steps": 7532, "loss": 0.2501414716243744, "lr": 3.5771705790603163e-06, "epoch": 1.4721816491833755, "percentage": 73.61, "elapsed_time": "20:01:19", "remaining_time": "7:10:46"} +{"current_steps": 5545, "total_steps": 7532, "loss": 0.23387153446674347, "lr": 3.5738058187465864e-06, "epoch": 1.4724472181649184, "percentage": 73.62, "elapsed_time": "20:01:32", "remaining_time": "7:10:33"} +{"current_steps": 5546, "total_steps": 7532, "loss": 0.23874594271183014, "lr": 3.570442297327307e-06, "epoch": 1.4727127871464614, "percentage": 73.63, "elapsed_time": "20:01:45", "remaining_time": "7:10:20"} +{"current_steps": 5547, "total_steps": 7532, "loss": 0.21867451071739197, "lr": 3.5670800154509245e-06, "epoch": 1.4729783561280043, "percentage": 73.65, "elapsed_time": "20:01:58", "remaining_time": "7:10:07"} +{"current_steps": 5548, "total_steps": 7532, "loss": 0.24124100804328918, "lr": 3.563718973765644e-06, "epoch": 1.4732439251095473, "percentage": 73.66, "elapsed_time": "20:02:11", "remaining_time": "7:09:54"} +{"current_steps": 5549, "total_steps": 7532, "loss": 0.22185327112674713, "lr": 3.5603591729194377e-06, "epoch": 1.4735094940910902, "percentage": 73.67, "elapsed_time": "20:02:24", "remaining_time": "7:09:41"} +{"current_steps": 5550, "total_steps": 7532, "loss": 0.21193793416023254, "lr": 3.5570006135600345e-06, "epoch": 1.4737750630726332, "percentage": 73.69, "elapsed_time": "20:02:37", "remaining_time": "7:09:28"} +{"current_steps": 5551, "total_steps": 7532, "loss": 0.2615143656730652, "lr": 3.553643296334924e-06, "epoch": 1.4740406320541761, "percentage": 73.7, "elapsed_time": "20:02:50", "remaining_time": "7:09:15"} +{"current_steps": 5552, "total_steps": 7532, "loss": 0.24937541782855988, "lr": 3.5502872218913597e-06, "epoch": 1.474306201035719, "percentage": 73.71, "elapsed_time": "20:03:03", "remaining_time": "7:09:02"} +{"current_steps": 5553, "total_steps": 7532, "loss": 0.22849224507808685, "lr": 3.5469323908763507e-06, "epoch": 1.474571770017262, "percentage": 73.73, "elapsed_time": "20:03:16", "remaining_time": "7:08:49"} +{"current_steps": 5554, "total_steps": 7532, "loss": 0.2209717333316803, "lr": 3.5435788039366657e-06, "epoch": 1.474837338998805, "percentage": 73.74, "elapsed_time": "20:03:28", "remaining_time": "7:08:36"} +{"current_steps": 5555, "total_steps": 7532, "loss": 0.2529235780239105, "lr": 3.5402264617188453e-06, "epoch": 1.475102907980348, "percentage": 73.75, "elapsed_time": "20:03:41", "remaining_time": "7:08:23"} +{"current_steps": 5556, "total_steps": 7532, "loss": 0.2045450657606125, "lr": 3.536875364869181e-06, "epoch": 1.4753684769618909, "percentage": 73.77, "elapsed_time": "20:03:54", "remaining_time": "7:08:10"} +{"current_steps": 5557, "total_steps": 7532, "loss": 0.1973644196987152, "lr": 3.5335255140337167e-06, "epoch": 1.4756340459434338, "percentage": 73.78, "elapsed_time": "20:04:07", "remaining_time": "7:07:57"} +{"current_steps": 5558, "total_steps": 7532, "loss": 0.27417299151420593, "lr": 3.5301769098582685e-06, "epoch": 1.4758996149249768, "percentage": 73.79, "elapsed_time": "20:04:20", "remaining_time": "7:07:44"} +{"current_steps": 5559, "total_steps": 7532, "loss": 0.24541756510734558, "lr": 3.5268295529884077e-06, "epoch": 1.4761651839065197, "percentage": 73.81, "elapsed_time": "20:04:32", "remaining_time": "7:07:31"} +{"current_steps": 5560, "total_steps": 7532, "loss": 0.25785958766937256, "lr": 3.5234834440694655e-06, "epoch": 1.4764307528880627, "percentage": 73.82, "elapsed_time": "20:04:45", "remaining_time": "7:07:18"} +{"current_steps": 5561, "total_steps": 7532, "loss": 0.21099212765693665, "lr": 3.5201385837465307e-06, "epoch": 1.4766963218696056, "percentage": 73.83, "elapsed_time": "20:04:58", "remaining_time": "7:07:04"} +{"current_steps": 5562, "total_steps": 7532, "loss": 0.26023173332214355, "lr": 3.5167949726644545e-06, "epoch": 1.4769618908511486, "percentage": 73.84, "elapsed_time": "20:05:11", "remaining_time": "7:06:51"} +{"current_steps": 5563, "total_steps": 7532, "loss": 0.22882963716983795, "lr": 3.5134526114678426e-06, "epoch": 1.4772274598326915, "percentage": 73.86, "elapsed_time": "20:05:24", "remaining_time": "7:06:38"} +{"current_steps": 5564, "total_steps": 7532, "loss": 0.21987251937389374, "lr": 3.5101115008010677e-06, "epoch": 1.4774930288142345, "percentage": 73.87, "elapsed_time": "20:05:37", "remaining_time": "7:06:25"} +{"current_steps": 5565, "total_steps": 7532, "loss": 0.2169610857963562, "lr": 3.506771641308255e-06, "epoch": 1.4777585977957775, "percentage": 73.88, "elapsed_time": "20:05:49", "remaining_time": "7:06:12"} +{"current_steps": 5566, "total_steps": 7532, "loss": 0.22723034024238586, "lr": 3.50343303363329e-06, "epoch": 1.4780241667773204, "percentage": 73.9, "elapsed_time": "20:06:02", "remaining_time": "7:05:59"} +{"current_steps": 5567, "total_steps": 7532, "loss": 0.23738276958465576, "lr": 3.5000956784198157e-06, "epoch": 1.4782897357588634, "percentage": 73.91, "elapsed_time": "20:06:15", "remaining_time": "7:05:46"} +{"current_steps": 5568, "total_steps": 7532, "loss": 0.19922251999378204, "lr": 3.496759576311235e-06, "epoch": 1.4785553047404063, "percentage": 73.92, "elapsed_time": "20:06:28", "remaining_time": "7:05:33"} +{"current_steps": 5569, "total_steps": 7532, "loss": 0.22529268264770508, "lr": 3.4934247279507092e-06, "epoch": 1.4788208737219493, "percentage": 73.94, "elapsed_time": "20:06:41", "remaining_time": "7:05:20"} +{"current_steps": 5570, "total_steps": 7532, "loss": 0.26758015155792236, "lr": 3.4900911339811583e-06, "epoch": 1.4790864427034922, "percentage": 73.95, "elapsed_time": "20:06:54", "remaining_time": "7:05:07"} +{"current_steps": 5571, "total_steps": 7532, "loss": 0.24752648174762726, "lr": 3.48675879504526e-06, "epoch": 1.4793520116850352, "percentage": 73.96, "elapsed_time": "20:07:06", "remaining_time": "7:04:54"} +{"current_steps": 5572, "total_steps": 7532, "loss": 0.25337618589401245, "lr": 3.483427711785449e-06, "epoch": 1.4796175806665781, "percentage": 73.98, "elapsed_time": "20:07:19", "remaining_time": "7:04:41"} +{"current_steps": 5573, "total_steps": 7532, "loss": 0.24504786729812622, "lr": 3.480097884843919e-06, "epoch": 1.479883149648121, "percentage": 73.99, "elapsed_time": "20:07:32", "remaining_time": "7:04:28"} +{"current_steps": 5574, "total_steps": 7532, "loss": 0.21255145967006683, "lr": 3.4767693148626223e-06, "epoch": 1.480148718629664, "percentage": 74.0, "elapsed_time": "20:07:45", "remaining_time": "7:04:15"} +{"current_steps": 5575, "total_steps": 7532, "loss": 0.2501891553401947, "lr": 3.473442002483267e-06, "epoch": 1.480414287611207, "percentage": 74.02, "elapsed_time": "20:07:58", "remaining_time": "7:04:02"} +{"current_steps": 5576, "total_steps": 7532, "loss": 0.25276634097099304, "lr": 3.4701159483473202e-06, "epoch": 1.48067985659275, "percentage": 74.03, "elapsed_time": "20:08:11", "remaining_time": "7:03:49"} +{"current_steps": 5577, "total_steps": 7532, "loss": 0.2760567367076874, "lr": 3.4667911530960052e-06, "epoch": 1.4809454255742929, "percentage": 74.04, "elapsed_time": "20:08:24", "remaining_time": "7:03:36"} +{"current_steps": 5578, "total_steps": 7532, "loss": 0.22686481475830078, "lr": 3.463467617370305e-06, "epoch": 1.4812109945558358, "percentage": 74.06, "elapsed_time": "20:08:37", "remaining_time": "7:03:23"} +{"current_steps": 5579, "total_steps": 7532, "loss": 0.23262599110603333, "lr": 3.4601453418109554e-06, "epoch": 1.4814765635373788, "percentage": 74.07, "elapsed_time": "20:08:50", "remaining_time": "7:03:10"} +{"current_steps": 5580, "total_steps": 7532, "loss": 0.22231365740299225, "lr": 3.4568243270584545e-06, "epoch": 1.4817421325189217, "percentage": 74.08, "elapsed_time": "20:09:03", "remaining_time": "7:02:57"} +{"current_steps": 5581, "total_steps": 7532, "loss": 0.22237855195999146, "lr": 3.4535045737530504e-06, "epoch": 1.4820077015004647, "percentage": 74.1, "elapsed_time": "20:09:16", "remaining_time": "7:02:44"} +{"current_steps": 5582, "total_steps": 7532, "loss": 0.2260412871837616, "lr": 3.4501860825347587e-06, "epoch": 1.4822732704820076, "percentage": 74.11, "elapsed_time": "20:09:29", "remaining_time": "7:02:31"} +{"current_steps": 5583, "total_steps": 7532, "loss": 0.2133496105670929, "lr": 3.4468688540433425e-06, "epoch": 1.4825388394635506, "percentage": 74.12, "elapsed_time": "20:09:42", "remaining_time": "7:02:18"} +{"current_steps": 5584, "total_steps": 7532, "loss": 0.24750375747680664, "lr": 3.4435528889183245e-06, "epoch": 1.4828044084450935, "percentage": 74.14, "elapsed_time": "20:09:54", "remaining_time": "7:02:04"} +{"current_steps": 5585, "total_steps": 7532, "loss": 0.23673412203788757, "lr": 3.440238187798983e-06, "epoch": 1.4830699774266365, "percentage": 74.15, "elapsed_time": "20:10:07", "remaining_time": "7:01:51"} +{"current_steps": 5586, "total_steps": 7532, "loss": 0.2505243420600891, "lr": 3.436924751324354e-06, "epoch": 1.4833355464081794, "percentage": 74.16, "elapsed_time": "20:10:20", "remaining_time": "7:01:38"} +{"current_steps": 5587, "total_steps": 7532, "loss": 0.276151180267334, "lr": 3.433612580133229e-06, "epoch": 1.4836011153897224, "percentage": 74.18, "elapsed_time": "20:10:33", "remaining_time": "7:01:25"} +{"current_steps": 5588, "total_steps": 7532, "loss": 0.1756816953420639, "lr": 3.430301674864154e-06, "epoch": 1.4838666843712653, "percentage": 74.19, "elapsed_time": "20:10:45", "remaining_time": "7:01:12"} +{"current_steps": 5589, "total_steps": 7532, "loss": 0.25901898741722107, "lr": 3.4269920361554342e-06, "epoch": 1.4841322533528083, "percentage": 74.2, "elapsed_time": "20:10:58", "remaining_time": "7:00:59"} +{"current_steps": 5590, "total_steps": 7532, "loss": 0.21196085214614868, "lr": 3.4236836646451286e-06, "epoch": 1.4843978223343512, "percentage": 74.22, "elapsed_time": "20:11:11", "remaining_time": "7:00:46"} +{"current_steps": 5591, "total_steps": 7532, "loss": 0.24153128266334534, "lr": 3.4203765609710525e-06, "epoch": 1.4846633913158942, "percentage": 74.23, "elapsed_time": "20:11:24", "remaining_time": "7:00:33"} +{"current_steps": 5592, "total_steps": 7532, "loss": 0.25715887546539307, "lr": 3.4170707257707757e-06, "epoch": 1.4849289602974372, "percentage": 74.24, "elapsed_time": "20:11:37", "remaining_time": "7:00:20"} +{"current_steps": 5593, "total_steps": 7532, "loss": 0.2920379042625427, "lr": 3.413766159681624e-06, "epoch": 1.48519452927898, "percentage": 74.26, "elapsed_time": "20:11:50", "remaining_time": "7:00:07"} +{"current_steps": 5594, "total_steps": 7532, "loss": 0.22127456963062286, "lr": 3.41046286334068e-06, "epoch": 1.485460098260523, "percentage": 74.27, "elapsed_time": "20:12:03", "remaining_time": "6:59:54"} +{"current_steps": 5595, "total_steps": 7532, "loss": 0.23103584349155426, "lr": 3.4071608373847786e-06, "epoch": 1.485725667242066, "percentage": 74.28, "elapsed_time": "20:12:15", "remaining_time": "6:59:41"} +{"current_steps": 5596, "total_steps": 7532, "loss": 0.29068222641944885, "lr": 3.403860082450513e-06, "epoch": 1.485991236223609, "percentage": 74.3, "elapsed_time": "20:12:29", "remaining_time": "6:59:28"} +{"current_steps": 5597, "total_steps": 7532, "loss": 0.23703888058662415, "lr": 3.4005605991742296e-06, "epoch": 1.486256805205152, "percentage": 74.31, "elapsed_time": "20:12:42", "remaining_time": "6:59:15"} +{"current_steps": 5598, "total_steps": 7532, "loss": 0.23348261415958405, "lr": 3.3972623881920296e-06, "epoch": 1.4865223741866949, "percentage": 74.32, "elapsed_time": "20:12:55", "remaining_time": "6:59:02"} +{"current_steps": 5599, "total_steps": 7532, "loss": 0.24733223021030426, "lr": 3.3939654501397645e-06, "epoch": 1.4867879431682378, "percentage": 74.34, "elapsed_time": "20:13:08", "remaining_time": "6:58:49"} +{"current_steps": 5600, "total_steps": 7532, "loss": 0.22576835751533508, "lr": 3.3906697856530548e-06, "epoch": 1.487053512149781, "percentage": 74.35, "elapsed_time": "20:13:21", "remaining_time": "6:58:36"} +{"current_steps": 5601, "total_steps": 7532, "loss": 0.20863527059555054, "lr": 3.3873753953672593e-06, "epoch": 1.487319081131324, "percentage": 74.36, "elapsed_time": "20:13:40", "remaining_time": "6:58:25"} +{"current_steps": 5602, "total_steps": 7532, "loss": 0.2299712598323822, "lr": 3.384082279917499e-06, "epoch": 1.487584650112867, "percentage": 74.38, "elapsed_time": "20:13:54", "remaining_time": "6:58:12"} +{"current_steps": 5603, "total_steps": 7532, "loss": 0.23058944940567017, "lr": 3.380790439938648e-06, "epoch": 1.4878502190944098, "percentage": 74.39, "elapsed_time": "20:14:07", "remaining_time": "6:57:59"} +{"current_steps": 5604, "total_steps": 7532, "loss": 0.20307201147079468, "lr": 3.3774998760653344e-06, "epoch": 1.4881157880759528, "percentage": 74.4, "elapsed_time": "20:14:20", "remaining_time": "6:57:46"} +{"current_steps": 5605, "total_steps": 7532, "loss": 0.2296266108751297, "lr": 3.3742105889319388e-06, "epoch": 1.4883813570574957, "percentage": 74.42, "elapsed_time": "20:14:33", "remaining_time": "6:57:34"} +{"current_steps": 5606, "total_steps": 7532, "loss": 0.22702309489250183, "lr": 3.370922579172601e-06, "epoch": 1.4886469260390387, "percentage": 74.43, "elapsed_time": "20:14:46", "remaining_time": "6:57:21"} +{"current_steps": 5607, "total_steps": 7532, "loss": 0.30432331562042236, "lr": 3.3676358474212035e-06, "epoch": 1.4889124950205816, "percentage": 74.44, "elapsed_time": "20:14:59", "remaining_time": "6:57:08"} +{"current_steps": 5608, "total_steps": 7532, "loss": 0.2488052248954773, "lr": 3.3643503943113907e-06, "epoch": 1.4891780640021246, "percentage": 74.46, "elapsed_time": "20:15:12", "remaining_time": "6:56:54"} +{"current_steps": 5609, "total_steps": 7532, "loss": 0.2221754938364029, "lr": 3.361066220476564e-06, "epoch": 1.4894436329836676, "percentage": 74.47, "elapsed_time": "20:15:26", "remaining_time": "6:56:42"} +{"current_steps": 5610, "total_steps": 7532, "loss": 0.2547761797904968, "lr": 3.3577833265498728e-06, "epoch": 1.4897092019652105, "percentage": 74.48, "elapsed_time": "20:15:38", "remaining_time": "6:56:29"} +{"current_steps": 5611, "total_steps": 7532, "loss": 0.21811938285827637, "lr": 3.3545017131642164e-06, "epoch": 1.4899747709467535, "percentage": 74.5, "elapsed_time": "20:15:51", "remaining_time": "6:56:15"} +{"current_steps": 5612, "total_steps": 7532, "loss": 0.30436158180236816, "lr": 3.3512213809522554e-06, "epoch": 1.4902403399282964, "percentage": 74.51, "elapsed_time": "20:16:04", "remaining_time": "6:56:02"} +{"current_steps": 5613, "total_steps": 7532, "loss": 0.2053622156381607, "lr": 3.3479423305463953e-06, "epoch": 1.4905059089098394, "percentage": 74.52, "elapsed_time": "20:16:17", "remaining_time": "6:55:49"} +{"current_steps": 5614, "total_steps": 7532, "loss": 0.2017601728439331, "lr": 3.344664562578801e-06, "epoch": 1.4907714778913823, "percentage": 74.54, "elapsed_time": "20:16:30", "remaining_time": "6:55:36"} +{"current_steps": 5615, "total_steps": 7532, "loss": 0.23668046295642853, "lr": 3.341388077681387e-06, "epoch": 1.4910370468729253, "percentage": 74.55, "elapsed_time": "20:16:43", "remaining_time": "6:55:23"} +{"current_steps": 5616, "total_steps": 7532, "loss": 0.20016951858997345, "lr": 3.338112876485821e-06, "epoch": 1.4913026158544682, "percentage": 74.56, "elapsed_time": "20:16:56", "remaining_time": "6:55:10"} +{"current_steps": 5617, "total_steps": 7532, "loss": 0.25477850437164307, "lr": 3.3348389596235177e-06, "epoch": 1.4915681848360112, "percentage": 74.58, "elapsed_time": "20:17:09", "remaining_time": "6:54:57"} +{"current_steps": 5618, "total_steps": 7532, "loss": 0.24063366651535034, "lr": 3.3315663277256594e-06, "epoch": 1.4918337538175541, "percentage": 74.59, "elapsed_time": "20:17:22", "remaining_time": "6:54:44"} +{"current_steps": 5619, "total_steps": 7532, "loss": 0.23443251848220825, "lr": 3.328294981423165e-06, "epoch": 1.492099322799097, "percentage": 74.6, "elapsed_time": "20:17:35", "remaining_time": "6:54:31"} +{"current_steps": 5620, "total_steps": 7532, "loss": 0.21191264688968658, "lr": 3.325024921346717e-06, "epoch": 1.49236489178064, "percentage": 74.61, "elapsed_time": "20:17:48", "remaining_time": "6:54:19"} +{"current_steps": 5621, "total_steps": 7532, "loss": 0.22062326967716217, "lr": 3.3217561481267367e-06, "epoch": 1.492630460762183, "percentage": 74.63, "elapsed_time": "20:18:01", "remaining_time": "6:54:06"} +{"current_steps": 5622, "total_steps": 7532, "loss": 0.2235480695962906, "lr": 3.318488662393409e-06, "epoch": 1.492896029743726, "percentage": 74.64, "elapsed_time": "20:18:15", "remaining_time": "6:53:53"} +{"current_steps": 5623, "total_steps": 7532, "loss": 0.26665517687797546, "lr": 3.315222464776665e-06, "epoch": 1.4931615987252689, "percentage": 74.65, "elapsed_time": "20:18:27", "remaining_time": "6:53:40"} +{"current_steps": 5624, "total_steps": 7532, "loss": 0.24300602078437805, "lr": 3.3119575559061902e-06, "epoch": 1.4934271677068118, "percentage": 74.67, "elapsed_time": "20:18:41", "remaining_time": "6:53:27"} +{"current_steps": 5625, "total_steps": 7532, "loss": 0.25441884994506836, "lr": 3.308693936411421e-06, "epoch": 1.4936927366883548, "percentage": 74.68, "elapsed_time": "20:18:54", "remaining_time": "6:53:14"} +{"current_steps": 5626, "total_steps": 7532, "loss": 0.23236152529716492, "lr": 3.3054316069215407e-06, "epoch": 1.4939583056698977, "percentage": 74.69, "elapsed_time": "20:19:07", "remaining_time": "6:53:01"} +{"current_steps": 5627, "total_steps": 7532, "loss": 0.24535568058490753, "lr": 3.3021705680654946e-06, "epoch": 1.4942238746514407, "percentage": 74.71, "elapsed_time": "20:19:20", "remaining_time": "6:52:48"} +{"current_steps": 5628, "total_steps": 7532, "loss": 0.2542986273765564, "lr": 3.29891082047197e-06, "epoch": 1.4944894436329836, "percentage": 74.72, "elapsed_time": "20:19:33", "remaining_time": "6:52:35"} +{"current_steps": 5629, "total_steps": 7532, "loss": 0.26490268111228943, "lr": 3.295652364769407e-06, "epoch": 1.4947550126145266, "percentage": 74.73, "elapsed_time": "20:19:46", "remaining_time": "6:52:22"} +{"current_steps": 5630, "total_steps": 7532, "loss": 0.25576913356781006, "lr": 3.292395201585997e-06, "epoch": 1.4950205815960695, "percentage": 74.75, "elapsed_time": "20:19:59", "remaining_time": "6:52:09"} +{"current_steps": 5631, "total_steps": 7532, "loss": 0.2930823266506195, "lr": 3.2891393315496846e-06, "epoch": 1.4952861505776125, "percentage": 74.76, "elapsed_time": "20:20:12", "remaining_time": "6:51:56"} +{"current_steps": 5632, "total_steps": 7532, "loss": 0.2426074892282486, "lr": 3.285884755288161e-06, "epoch": 1.4955517195591554, "percentage": 74.77, "elapsed_time": "20:20:25", "remaining_time": "6:51:43"} +{"current_steps": 5633, "total_steps": 7532, "loss": 0.24090878665447235, "lr": 3.2826314734288713e-06, "epoch": 1.4958172885406984, "percentage": 74.79, "elapsed_time": "20:20:39", "remaining_time": "6:51:30"} +{"current_steps": 5634, "total_steps": 7532, "loss": 0.26155173778533936, "lr": 3.2793794865990092e-06, "epoch": 1.4960828575222413, "percentage": 74.8, "elapsed_time": "20:20:51", "remaining_time": "6:51:17"} +{"current_steps": 5635, "total_steps": 7532, "loss": 0.2594009041786194, "lr": 3.2761287954255195e-06, "epoch": 1.4963484265037843, "percentage": 74.81, "elapsed_time": "20:21:04", "remaining_time": "6:51:04"} +{"current_steps": 5636, "total_steps": 7532, "loss": 0.24434763193130493, "lr": 3.2728794005350972e-06, "epoch": 1.4966139954853273, "percentage": 74.83, "elapsed_time": "20:21:17", "remaining_time": "6:50:51"} +{"current_steps": 5637, "total_steps": 7532, "loss": 0.2622208297252655, "lr": 3.269631302554188e-06, "epoch": 1.4968795644668702, "percentage": 74.84, "elapsed_time": "20:21:30", "remaining_time": "6:50:38"} +{"current_steps": 5638, "total_steps": 7532, "loss": 0.18913154304027557, "lr": 3.266384502108987e-06, "epoch": 1.4971451334484132, "percentage": 74.85, "elapsed_time": "20:21:43", "remaining_time": "6:50:25"} +{"current_steps": 5639, "total_steps": 7532, "loss": 0.2610907554626465, "lr": 3.263138999825437e-06, "epoch": 1.497410702429956, "percentage": 74.87, "elapsed_time": "20:21:56", "remaining_time": "6:50:12"} +{"current_steps": 5640, "total_steps": 7532, "loss": 0.25841569900512695, "lr": 3.2598947963292337e-06, "epoch": 1.497676271411499, "percentage": 74.88, "elapsed_time": "20:22:09", "remaining_time": "6:49:59"} +{"current_steps": 5641, "total_steps": 7532, "loss": 0.2066381573677063, "lr": 3.256651892245822e-06, "epoch": 1.497941840393042, "percentage": 74.89, "elapsed_time": "20:22:22", "remaining_time": "6:49:46"} +{"current_steps": 5642, "total_steps": 7532, "loss": 0.23956719040870667, "lr": 3.253410288200396e-06, "epoch": 1.4982074093745852, "percentage": 74.91, "elapsed_time": "20:22:35", "remaining_time": "6:49:33"} +{"current_steps": 5643, "total_steps": 7532, "loss": 0.23999394476413727, "lr": 3.250169984817897e-06, "epoch": 1.4984729783561281, "percentage": 74.92, "elapsed_time": "20:22:48", "remaining_time": "6:49:20"} +{"current_steps": 5644, "total_steps": 7532, "loss": 0.24273940920829773, "lr": 3.2469309827230156e-06, "epoch": 1.498738547337671, "percentage": 74.93, "elapsed_time": "20:23:01", "remaining_time": "6:49:07"} +{"current_steps": 5645, "total_steps": 7532, "loss": 0.2212621569633484, "lr": 3.2436932825401977e-06, "epoch": 1.499004116319214, "percentage": 74.95, "elapsed_time": "20:23:14", "remaining_time": "6:48:54"} +{"current_steps": 5646, "total_steps": 7532, "loss": 0.2487148940563202, "lr": 3.2404568848936325e-06, "epoch": 1.499269685300757, "percentage": 74.96, "elapsed_time": "20:23:28", "remaining_time": "6:48:41"} +{"current_steps": 5647, "total_steps": 7532, "loss": 0.29314422607421875, "lr": 3.237221790407259e-06, "epoch": 1.4995352542823, "percentage": 74.97, "elapsed_time": "20:23:40", "remaining_time": "6:48:28"} +{"current_steps": 5648, "total_steps": 7532, "loss": 0.22727417945861816, "lr": 3.233987999704763e-06, "epoch": 1.499800823263843, "percentage": 74.99, "elapsed_time": "20:23:54", "remaining_time": "6:48:15"} +{"current_steps": 5649, "total_steps": 7532, "loss": 0.18877442181110382, "lr": 3.230755513409585e-06, "epoch": 1.5000663922453858, "percentage": 75.0, "elapsed_time": "20:24:07", "remaining_time": "6:48:02"} +{"current_steps": 5650, "total_steps": 7532, "loss": 0.2504552900791168, "lr": 3.2275243321449068e-06, "epoch": 1.5003319612269288, "percentage": 75.01, "elapsed_time": "20:24:20", "remaining_time": "6:47:49"} +{"current_steps": 5651, "total_steps": 7532, "loss": 0.23579174280166626, "lr": 3.224294456533663e-06, "epoch": 1.5005975302084718, "percentage": 75.03, "elapsed_time": "20:24:33", "remaining_time": "6:47:36"} +{"current_steps": 5652, "total_steps": 7532, "loss": 0.29236793518066406, "lr": 3.221065887198537e-06, "epoch": 1.5008630991900147, "percentage": 75.04, "elapsed_time": "20:24:47", "remaining_time": "6:47:23"} +{"current_steps": 5653, "total_steps": 7532, "loss": 0.2735568881034851, "lr": 3.2178386247619577e-06, "epoch": 1.5011286681715577, "percentage": 75.05, "elapsed_time": "20:24:59", "remaining_time": "6:47:10"} +{"current_steps": 5654, "total_steps": 7532, "loss": 0.2391616702079773, "lr": 3.214612669846103e-06, "epoch": 1.5013942371531006, "percentage": 75.07, "elapsed_time": "20:25:13", "remaining_time": "6:46:57"} +{"current_steps": 5655, "total_steps": 7532, "loss": 0.24532485008239746, "lr": 3.2113880230729e-06, "epoch": 1.5016598061346436, "percentage": 75.08, "elapsed_time": "20:25:25", "remaining_time": "6:46:44"} +{"current_steps": 5656, "total_steps": 7532, "loss": 0.2605767250061035, "lr": 3.2081646850640215e-06, "epoch": 1.5019253751161865, "percentage": 75.09, "elapsed_time": "20:25:38", "remaining_time": "6:46:31"} +{"current_steps": 5657, "total_steps": 7532, "loss": 0.2651350200176239, "lr": 3.2049426564408893e-06, "epoch": 1.5021909440977295, "percentage": 75.11, "elapsed_time": "20:25:51", "remaining_time": "6:46:18"} +{"current_steps": 5658, "total_steps": 7532, "loss": 0.2719389498233795, "lr": 3.2017219378246734e-06, "epoch": 1.5024565130792724, "percentage": 75.12, "elapsed_time": "20:26:04", "remaining_time": "6:46:05"} +{"current_steps": 5659, "total_steps": 7532, "loss": 0.23077815771102905, "lr": 3.198502529836288e-06, "epoch": 1.5027220820608154, "percentage": 75.13, "elapsed_time": "20:26:17", "remaining_time": "6:45:52"} +{"current_steps": 5660, "total_steps": 7532, "loss": 0.21954959630966187, "lr": 3.1952844330964007e-06, "epoch": 1.5029876510423583, "percentage": 75.15, "elapsed_time": "20:26:30", "remaining_time": "6:45:39"} +{"current_steps": 5661, "total_steps": 7532, "loss": 0.28229185938835144, "lr": 3.1920676482254186e-06, "epoch": 1.5032532200239013, "percentage": 75.16, "elapsed_time": "20:26:43", "remaining_time": "6:45:26"} +{"current_steps": 5662, "total_steps": 7532, "loss": 0.24612295627593994, "lr": 3.1888521758435e-06, "epoch": 1.5035187890054442, "percentage": 75.17, "elapsed_time": "20:26:56", "remaining_time": "6:45:13"} +{"current_steps": 5663, "total_steps": 7532, "loss": 0.24191413819789886, "lr": 3.185638016570555e-06, "epoch": 1.5037843579869872, "percentage": 75.19, "elapsed_time": "20:27:09", "remaining_time": "6:45:00"} +{"current_steps": 5664, "total_steps": 7532, "loss": 0.2427935004234314, "lr": 3.1824251710262323e-06, "epoch": 1.5040499269685301, "percentage": 75.2, "elapsed_time": "20:27:22", "remaining_time": "6:44:47"} +{"current_steps": 5665, "total_steps": 7532, "loss": 0.2600318193435669, "lr": 3.17921363982993e-06, "epoch": 1.504315495950073, "percentage": 75.21, "elapsed_time": "20:27:35", "remaining_time": "6:44:34"} +{"current_steps": 5666, "total_steps": 7532, "loss": 0.25215205550193787, "lr": 3.1760034236007954e-06, "epoch": 1.504581064931616, "percentage": 75.23, "elapsed_time": "20:27:48", "remaining_time": "6:44:21"} +{"current_steps": 5667, "total_steps": 7532, "loss": 0.24460548162460327, "lr": 3.1727945229577183e-06, "epoch": 1.504846633913159, "percentage": 75.24, "elapsed_time": "20:28:01", "remaining_time": "6:44:08"} +{"current_steps": 5668, "total_steps": 7532, "loss": 0.2812577486038208, "lr": 3.169586938519338e-06, "epoch": 1.505112202894702, "percentage": 75.25, "elapsed_time": "20:28:14", "remaining_time": "6:43:55"} +{"current_steps": 5669, "total_steps": 7532, "loss": 0.23297616839408875, "lr": 3.166380670904039e-06, "epoch": 1.5053777718762449, "percentage": 75.27, "elapsed_time": "20:28:27", "remaining_time": "6:43:42"} +{"current_steps": 5670, "total_steps": 7532, "loss": 0.21659572422504425, "lr": 3.163175720729954e-06, "epoch": 1.5056433408577878, "percentage": 75.28, "elapsed_time": "20:28:40", "remaining_time": "6:43:29"} +{"current_steps": 5671, "total_steps": 7532, "loss": 0.22246181964874268, "lr": 3.1599720886149508e-06, "epoch": 1.5059089098393308, "percentage": 75.29, "elapsed_time": "20:28:53", "remaining_time": "6:43:16"} +{"current_steps": 5672, "total_steps": 7532, "loss": 0.20020918548107147, "lr": 3.1567697751766624e-06, "epoch": 1.5061744788208737, "percentage": 75.31, "elapsed_time": "20:29:07", "remaining_time": "6:43:03"} +{"current_steps": 5673, "total_steps": 7532, "loss": 0.25693628191947937, "lr": 3.1535687810324523e-06, "epoch": 1.5064400478024167, "percentage": 75.32, "elapsed_time": "20:29:19", "remaining_time": "6:42:50"} +{"current_steps": 5674, "total_steps": 7532, "loss": 0.21841923892498016, "lr": 3.150369106799436e-06, "epoch": 1.5067056167839596, "percentage": 75.33, "elapsed_time": "20:29:32", "remaining_time": "6:42:37"} +{"current_steps": 5675, "total_steps": 7532, "loss": 0.18131780624389648, "lr": 3.1471707530944707e-06, "epoch": 1.5069711857655026, "percentage": 75.35, "elapsed_time": "20:29:46", "remaining_time": "6:42:24"} +{"current_steps": 5676, "total_steps": 7532, "loss": 0.22510449588298798, "lr": 3.143973720534164e-06, "epoch": 1.5072367547470455, "percentage": 75.36, "elapsed_time": "20:29:59", "remaining_time": "6:42:11"} +{"current_steps": 5677, "total_steps": 7532, "loss": 0.23721462488174438, "lr": 3.1407780097348627e-06, "epoch": 1.5075023237285885, "percentage": 75.37, "elapsed_time": "20:30:11", "remaining_time": "6:41:58"} +{"current_steps": 5678, "total_steps": 7532, "loss": 0.24281899631023407, "lr": 3.1375836213126653e-06, "epoch": 1.5077678927101315, "percentage": 75.39, "elapsed_time": "20:30:25", "remaining_time": "6:41:45"} +{"current_steps": 5679, "total_steps": 7532, "loss": 0.23910081386566162, "lr": 3.134390555883412e-06, "epoch": 1.5080334616916744, "percentage": 75.4, "elapsed_time": "20:30:38", "remaining_time": "6:41:32"} +{"current_steps": 5680, "total_steps": 7532, "loss": 0.2635132670402527, "lr": 3.1311988140626825e-06, "epoch": 1.5082990306732174, "percentage": 75.41, "elapsed_time": "20:30:51", "remaining_time": "6:41:19"} +{"current_steps": 5681, "total_steps": 7532, "loss": 0.24802634119987488, "lr": 3.1280083964658147e-06, "epoch": 1.5085645996547603, "percentage": 75.42, "elapsed_time": "20:31:04", "remaining_time": "6:41:06"} +{"current_steps": 5682, "total_steps": 7532, "loss": 0.24081437289714813, "lr": 3.1248193037078823e-06, "epoch": 1.5088301686363033, "percentage": 75.44, "elapsed_time": "20:31:16", "remaining_time": "6:40:53"} +{"current_steps": 5683, "total_steps": 7532, "loss": 0.19550001621246338, "lr": 3.121631536403701e-06, "epoch": 1.5090957376178462, "percentage": 75.45, "elapsed_time": "20:31:30", "remaining_time": "6:40:40"} +{"current_steps": 5684, "total_steps": 7532, "loss": 0.2397807538509369, "lr": 3.118445095167837e-06, "epoch": 1.5093613065993892, "percentage": 75.46, "elapsed_time": "20:31:42", "remaining_time": "6:40:27"} +{"current_steps": 5685, "total_steps": 7532, "loss": 0.2185651659965515, "lr": 3.115259980614602e-06, "epoch": 1.5096268755809321, "percentage": 75.48, "elapsed_time": "20:31:56", "remaining_time": "6:40:14"} +{"current_steps": 5686, "total_steps": 7532, "loss": 0.22214055061340332, "lr": 3.1120761933580414e-06, "epoch": 1.509892444562475, "percentage": 75.49, "elapsed_time": "20:32:08", "remaining_time": "6:40:01"} +{"current_steps": 5687, "total_steps": 7532, "loss": 0.23971091210842133, "lr": 3.108893734011955e-06, "epoch": 1.510158013544018, "percentage": 75.5, "elapsed_time": "20:32:22", "remaining_time": "6:39:48"} +{"current_steps": 5688, "total_steps": 7532, "loss": 0.26458197832107544, "lr": 3.1057126031898843e-06, "epoch": 1.510423582525561, "percentage": 75.52, "elapsed_time": "20:32:35", "remaining_time": "6:39:35"} +{"current_steps": 5689, "total_steps": 7532, "loss": 0.23730339109897614, "lr": 3.1025328015051093e-06, "epoch": 1.510689151507104, "percentage": 75.53, "elapsed_time": "20:32:48", "remaining_time": "6:39:22"} +{"current_steps": 5690, "total_steps": 7532, "loss": 0.21981677412986755, "lr": 3.0993543295706653e-06, "epoch": 1.5109547204886469, "percentage": 75.54, "elapsed_time": "20:33:01", "remaining_time": "6:39:09"} +{"current_steps": 5691, "total_steps": 7532, "loss": 0.21984878182411194, "lr": 3.0961771879993206e-06, "epoch": 1.5112202894701898, "percentage": 75.56, "elapsed_time": "20:33:14", "remaining_time": "6:38:56"} +{"current_steps": 5692, "total_steps": 7532, "loss": 0.23086440563201904, "lr": 3.093001377403592e-06, "epoch": 1.5114858584517328, "percentage": 75.57, "elapsed_time": "20:33:27", "remaining_time": "6:38:43"} +{"current_steps": 5693, "total_steps": 7532, "loss": 0.2355024814605713, "lr": 3.0898268983957368e-06, "epoch": 1.5117514274332757, "percentage": 75.58, "elapsed_time": "20:33:40", "remaining_time": "6:38:30"} +{"current_steps": 5694, "total_steps": 7532, "loss": 0.21210229396820068, "lr": 3.0866537515877584e-06, "epoch": 1.5120169964148187, "percentage": 75.6, "elapsed_time": "20:33:53", "remaining_time": "6:38:17"} +{"current_steps": 5695, "total_steps": 7532, "loss": 0.2387622594833374, "lr": 3.0834819375914003e-06, "epoch": 1.5122825653963616, "percentage": 75.61, "elapsed_time": "20:34:06", "remaining_time": "6:38:04"} +{"current_steps": 5696, "total_steps": 7532, "loss": 0.23822402954101562, "lr": 3.0803114570181527e-06, "epoch": 1.5125481343779046, "percentage": 75.62, "elapsed_time": "20:34:19", "remaining_time": "6:37:51"} +{"current_steps": 5697, "total_steps": 7532, "loss": 0.26844173669815063, "lr": 3.0771423104792454e-06, "epoch": 1.5128137033594475, "percentage": 75.64, "elapsed_time": "20:34:32", "remaining_time": "6:37:38"} +{"current_steps": 5698, "total_steps": 7532, "loss": 0.23288767039775848, "lr": 3.07397449858565e-06, "epoch": 1.5130792723409905, "percentage": 75.65, "elapsed_time": "20:34:45", "remaining_time": "6:37:25"} +{"current_steps": 5699, "total_steps": 7532, "loss": 0.23273086547851562, "lr": 3.0708080219480896e-06, "epoch": 1.5133448413225334, "percentage": 75.66, "elapsed_time": "20:34:58", "remaining_time": "6:37:12"} +{"current_steps": 5700, "total_steps": 7532, "loss": 0.2505509555339813, "lr": 3.067642881177023e-06, "epoch": 1.5136104103040764, "percentage": 75.68, "elapsed_time": "20:35:11", "remaining_time": "6:36:59"} +{"current_steps": 5701, "total_steps": 7532, "loss": 0.22801508009433746, "lr": 3.0644790768826473e-06, "epoch": 1.5138759792856193, "percentage": 75.69, "elapsed_time": "20:35:30", "remaining_time": "6:36:48"} +{"current_steps": 5702, "total_steps": 7532, "loss": 0.2110593169927597, "lr": 3.061316609674908e-06, "epoch": 1.5141415482671623, "percentage": 75.7, "elapsed_time": "20:35:43", "remaining_time": "6:36:35"} +{"current_steps": 5703, "total_steps": 7532, "loss": 0.22201795876026154, "lr": 3.0581554801634927e-06, "epoch": 1.5144071172487052, "percentage": 75.72, "elapsed_time": "20:35:56", "remaining_time": "6:36:22"} +{"current_steps": 5704, "total_steps": 7532, "loss": 0.23104460537433624, "lr": 3.054995688957829e-06, "epoch": 1.5146726862302482, "percentage": 75.73, "elapsed_time": "20:36:09", "remaining_time": "6:36:09"} +{"current_steps": 5705, "total_steps": 7532, "loss": 0.23373261094093323, "lr": 3.0518372366670877e-06, "epoch": 1.5149382552117912, "percentage": 75.74, "elapsed_time": "20:36:22", "remaining_time": "6:35:56"} +{"current_steps": 5706, "total_steps": 7532, "loss": 0.2404957264661789, "lr": 3.0486801239001806e-06, "epoch": 1.515203824193334, "percentage": 75.76, "elapsed_time": "20:36:36", "remaining_time": "6:35:43"} +{"current_steps": 5707, "total_steps": 7532, "loss": 0.23209382593631744, "lr": 3.0455243512657606e-06, "epoch": 1.515469393174877, "percentage": 75.77, "elapsed_time": "20:36:48", "remaining_time": "6:35:30"} +{"current_steps": 5708, "total_steps": 7532, "loss": 0.218237042427063, "lr": 3.042369919372228e-06, "epoch": 1.51573496215642, "percentage": 75.78, "elapsed_time": "20:37:02", "remaining_time": "6:35:17"} +{"current_steps": 5709, "total_steps": 7532, "loss": 0.25025027990341187, "lr": 3.039216828827717e-06, "epoch": 1.516000531137963, "percentage": 75.8, "elapsed_time": "20:37:15", "remaining_time": "6:35:04"} +{"current_steps": 5710, "total_steps": 7532, "loss": 0.24729448556900024, "lr": 3.036065080240106e-06, "epoch": 1.516266100119506, "percentage": 75.81, "elapsed_time": "20:37:28", "remaining_time": "6:34:51"} +{"current_steps": 5711, "total_steps": 7532, "loss": 0.23614796996116638, "lr": 3.032914674217017e-06, "epoch": 1.5165316691010489, "percentage": 75.82, "elapsed_time": "20:37:41", "remaining_time": "6:34:38"} +{"current_steps": 5712, "total_steps": 7532, "loss": 0.2313452661037445, "lr": 3.029765611365808e-06, "epoch": 1.5167972380825918, "percentage": 75.84, "elapsed_time": "20:37:54", "remaining_time": "6:34:25"} +{"current_steps": 5713, "total_steps": 7532, "loss": 0.22152003645896912, "lr": 3.0266178922935842e-06, "epoch": 1.5170628070641348, "percentage": 75.85, "elapsed_time": "20:38:08", "remaining_time": "6:34:13"} +{"current_steps": 5714, "total_steps": 7532, "loss": 0.25942179560661316, "lr": 3.0234715176071874e-06, "epoch": 1.5173283760456777, "percentage": 75.86, "elapsed_time": "20:38:21", "remaining_time": "6:34:00"} +{"current_steps": 5715, "total_steps": 7532, "loss": 0.25030237436294556, "lr": 3.0203264879132e-06, "epoch": 1.5175939450272207, "percentage": 75.88, "elapsed_time": "20:38:34", "remaining_time": "6:33:47"} +{"current_steps": 5716, "total_steps": 7532, "loss": 0.2025807797908783, "lr": 3.0171828038179497e-06, "epoch": 1.5178595140087636, "percentage": 75.89, "elapsed_time": "20:38:46", "remaining_time": "6:33:34"} +{"current_steps": 5717, "total_steps": 7532, "loss": 0.20455190539360046, "lr": 3.014040465927499e-06, "epoch": 1.5181250829903066, "percentage": 75.9, "elapsed_time": "20:39:00", "remaining_time": "6:33:21"} +{"current_steps": 5718, "total_steps": 7532, "loss": 0.24197113513946533, "lr": 3.010899474847655e-06, "epoch": 1.5183906519718495, "percentage": 75.92, "elapsed_time": "20:39:12", "remaining_time": "6:33:07"} +{"current_steps": 5719, "total_steps": 7532, "loss": 0.22290384769439697, "lr": 3.007759831183964e-06, "epoch": 1.5186562209533925, "percentage": 75.93, "elapsed_time": "20:39:26", "remaining_time": "6:32:55"} +{"current_steps": 5720, "total_steps": 7532, "loss": 0.23087520897388458, "lr": 3.0046215355417117e-06, "epoch": 1.5189217899349357, "percentage": 75.94, "elapsed_time": "20:39:39", "remaining_time": "6:32:42"} +{"current_steps": 5721, "total_steps": 7532, "loss": 0.24425405263900757, "lr": 3.0014845885259236e-06, "epoch": 1.5191873589164786, "percentage": 75.96, "elapsed_time": "20:39:52", "remaining_time": "6:32:29"} +{"current_steps": 5722, "total_steps": 7532, "loss": 0.24888862669467926, "lr": 2.9983489907413675e-06, "epoch": 1.5194529278980216, "percentage": 75.97, "elapsed_time": "20:40:05", "remaining_time": "6:32:16"} +{"current_steps": 5723, "total_steps": 7532, "loss": 0.23556756973266602, "lr": 2.9952147427925493e-06, "epoch": 1.5197184968795645, "percentage": 75.98, "elapsed_time": "20:40:18", "remaining_time": "6:32:03"} +{"current_steps": 5724, "total_steps": 7532, "loss": 0.2532619833946228, "lr": 2.992081845283715e-06, "epoch": 1.5199840658611075, "percentage": 76.0, "elapsed_time": "20:40:31", "remaining_time": "6:31:50"} +{"current_steps": 5725, "total_steps": 7532, "loss": 0.2574974000453949, "lr": 2.988950298818848e-06, "epoch": 1.5202496348426504, "percentage": 76.01, "elapsed_time": "20:40:43", "remaining_time": "6:31:36"} +{"current_steps": 5726, "total_steps": 7532, "loss": 0.21997734904289246, "lr": 2.9858201040016775e-06, "epoch": 1.5205152038241934, "percentage": 76.02, "elapsed_time": "20:40:57", "remaining_time": "6:31:23"} +{"current_steps": 5727, "total_steps": 7532, "loss": 0.2174127697944641, "lr": 2.982691261435666e-06, "epoch": 1.5207807728057363, "percentage": 76.04, "elapsed_time": "20:41:09", "remaining_time": "6:31:10"} +{"current_steps": 5728, "total_steps": 7532, "loss": 0.22455093264579773, "lr": 2.979563771724019e-06, "epoch": 1.5210463417872793, "percentage": 76.05, "elapsed_time": "20:41:23", "remaining_time": "6:30:58"} +{"current_steps": 5729, "total_steps": 7532, "loss": 0.270727276802063, "lr": 2.976437635469678e-06, "epoch": 1.5213119107688222, "percentage": 76.06, "elapsed_time": "20:41:36", "remaining_time": "6:30:44"} +{"current_steps": 5730, "total_steps": 7532, "loss": 0.2233714610338211, "lr": 2.9733128532753254e-06, "epoch": 1.5215774797503652, "percentage": 76.08, "elapsed_time": "20:41:49", "remaining_time": "6:30:32"} +{"current_steps": 5731, "total_steps": 7532, "loss": 0.23599566519260406, "lr": 2.970189425743383e-06, "epoch": 1.5218430487319081, "percentage": 76.09, "elapsed_time": "20:42:01", "remaining_time": "6:30:18"} +{"current_steps": 5732, "total_steps": 7532, "loss": 0.23598654568195343, "lr": 2.967067353476011e-06, "epoch": 1.522108617713451, "percentage": 76.1, "elapsed_time": "20:42:15", "remaining_time": "6:30:06"} +{"current_steps": 5733, "total_steps": 7532, "loss": 0.205197274684906, "lr": 2.963946637075107e-06, "epoch": 1.522374186694994, "percentage": 76.12, "elapsed_time": "20:42:27", "remaining_time": "6:29:52"} +{"current_steps": 5734, "total_steps": 7532, "loss": 0.23581506311893463, "lr": 2.9608272771423073e-06, "epoch": 1.522639755676537, "percentage": 76.13, "elapsed_time": "20:42:40", "remaining_time": "6:29:39"} +{"current_steps": 5735, "total_steps": 7532, "loss": 0.2088197022676468, "lr": 2.9577092742789915e-06, "epoch": 1.52290532465808, "percentage": 76.14, "elapsed_time": "20:42:53", "remaining_time": "6:29:26"} +{"current_steps": 5736, "total_steps": 7532, "loss": 0.22607067227363586, "lr": 2.95459262908627e-06, "epoch": 1.5231708936396229, "percentage": 76.16, "elapsed_time": "20:43:06", "remaining_time": "6:29:13"} +{"current_steps": 5737, "total_steps": 7532, "loss": 0.22242344915866852, "lr": 2.951477342164998e-06, "epoch": 1.5234364626211658, "percentage": 76.17, "elapsed_time": "20:43:19", "remaining_time": "6:29:00"} +{"current_steps": 5738, "total_steps": 7532, "loss": 0.25626271963119507, "lr": 2.9483634141157636e-06, "epoch": 1.5237020316027088, "percentage": 76.18, "elapsed_time": "20:43:32", "remaining_time": "6:28:47"} +{"current_steps": 5739, "total_steps": 7532, "loss": 0.2241421341896057, "lr": 2.9452508455388975e-06, "epoch": 1.5239676005842517, "percentage": 76.19, "elapsed_time": "20:43:45", "remaining_time": "6:28:34"} +{"current_steps": 5740, "total_steps": 7532, "loss": 0.2191103994846344, "lr": 2.9421396370344648e-06, "epoch": 1.5242331695657947, "percentage": 76.21, "elapsed_time": "20:43:58", "remaining_time": "6:28:21"} +{"current_steps": 5741, "total_steps": 7532, "loss": 0.26252660155296326, "lr": 2.9390297892022703e-06, "epoch": 1.5244987385473376, "percentage": 76.22, "elapsed_time": "20:44:11", "remaining_time": "6:28:08"} +{"current_steps": 5742, "total_steps": 7532, "loss": 0.21522507071495056, "lr": 2.9359213026418567e-06, "epoch": 1.5247643075288806, "percentage": 76.23, "elapsed_time": "20:44:24", "remaining_time": "6:27:55"} +{"current_steps": 5743, "total_steps": 7532, "loss": 0.20159044861793518, "lr": 2.932814177952499e-06, "epoch": 1.5250298765104235, "percentage": 76.25, "elapsed_time": "20:44:37", "remaining_time": "6:27:42"} +{"current_steps": 5744, "total_steps": 7532, "loss": 0.22679558396339417, "lr": 2.929708415733221e-06, "epoch": 1.5252954454919665, "percentage": 76.26, "elapsed_time": "20:44:50", "remaining_time": "6:27:29"} +{"current_steps": 5745, "total_steps": 7532, "loss": 0.2315664291381836, "lr": 2.926604016582776e-06, "epoch": 1.5255610144735094, "percentage": 76.27, "elapsed_time": "20:45:03", "remaining_time": "6:27:16"} +{"current_steps": 5746, "total_steps": 7532, "loss": 0.229634091258049, "lr": 2.923500981099652e-06, "epoch": 1.5258265834550524, "percentage": 76.29, "elapsed_time": "20:45:16", "remaining_time": "6:27:03"} +{"current_steps": 5747, "total_steps": 7532, "loss": 0.20657674968242645, "lr": 2.9203993098820793e-06, "epoch": 1.5260921524365954, "percentage": 76.3, "elapsed_time": "20:45:30", "remaining_time": "6:26:50"} +{"current_steps": 5748, "total_steps": 7532, "loss": 0.2306358814239502, "lr": 2.9172990035280237e-06, "epoch": 1.5263577214181385, "percentage": 76.31, "elapsed_time": "20:45:42", "remaining_time": "6:26:37"} +{"current_steps": 5749, "total_steps": 7532, "loss": 0.2608031928539276, "lr": 2.9142000626351875e-06, "epoch": 1.5266232903996815, "percentage": 76.33, "elapsed_time": "20:45:56", "remaining_time": "6:26:24"} +{"current_steps": 5750, "total_steps": 7532, "loss": 0.24675670266151428, "lr": 2.911102487801013e-06, "epoch": 1.5268888593812244, "percentage": 76.34, "elapsed_time": "20:46:09", "remaining_time": "6:26:11"} +{"current_steps": 5751, "total_steps": 7532, "loss": 0.22544966638088226, "lr": 2.908006279622667e-06, "epoch": 1.5271544283627674, "percentage": 76.35, "elapsed_time": "20:46:22", "remaining_time": "6:25:59"} +{"current_steps": 5752, "total_steps": 7532, "loss": 0.2328556478023529, "lr": 2.904911438697071e-06, "epoch": 1.5274199973443103, "percentage": 76.37, "elapsed_time": "20:46:35", "remaining_time": "6:25:45"} +{"current_steps": 5753, "total_steps": 7532, "loss": 0.2316005825996399, "lr": 2.901817965620871e-06, "epoch": 1.5276855663258533, "percentage": 76.38, "elapsed_time": "20:46:48", "remaining_time": "6:25:33"} +{"current_steps": 5754, "total_steps": 7532, "loss": 0.2332756370306015, "lr": 2.8987258609904522e-06, "epoch": 1.5279511353073962, "percentage": 76.39, "elapsed_time": "20:47:01", "remaining_time": "6:25:20"} +{"current_steps": 5755, "total_steps": 7532, "loss": 0.24855142831802368, "lr": 2.8956351254019355e-06, "epoch": 1.5282167042889392, "percentage": 76.41, "elapsed_time": "20:47:14", "remaining_time": "6:25:06"} +{"current_steps": 5756, "total_steps": 7532, "loss": 0.18745368719100952, "lr": 2.8925457594511775e-06, "epoch": 1.5284822732704821, "percentage": 76.42, "elapsed_time": "20:47:27", "remaining_time": "6:24:54"} +{"current_steps": 5757, "total_steps": 7532, "loss": 0.22402942180633545, "lr": 2.889457763733774e-06, "epoch": 1.528747842252025, "percentage": 76.43, "elapsed_time": "20:47:40", "remaining_time": "6:24:41"} +{"current_steps": 5758, "total_steps": 7532, "loss": 0.2156108319759369, "lr": 2.886371138845051e-06, "epoch": 1.529013411233568, "percentage": 76.45, "elapsed_time": "20:47:53", "remaining_time": "6:24:28"} +{"current_steps": 5759, "total_steps": 7532, "loss": 0.22866520285606384, "lr": 2.883285885380076e-06, "epoch": 1.529278980215111, "percentage": 76.46, "elapsed_time": "20:48:06", "remaining_time": "6:24:15"} +{"current_steps": 5760, "total_steps": 7532, "loss": 0.2486938238143921, "lr": 2.880202003933645e-06, "epoch": 1.529544549196654, "percentage": 76.47, "elapsed_time": "20:48:20", "remaining_time": "6:24:02"} +{"current_steps": 5761, "total_steps": 7532, "loss": 0.2565295696258545, "lr": 2.877119495100301e-06, "epoch": 1.529810118178197, "percentage": 76.49, "elapsed_time": "20:48:32", "remaining_time": "6:23:49"} +{"current_steps": 5762, "total_steps": 7532, "loss": 0.21510455012321472, "lr": 2.8740383594743116e-06, "epoch": 1.5300756871597399, "percentage": 76.5, "elapsed_time": "20:48:46", "remaining_time": "6:23:36"} +{"current_steps": 5763, "total_steps": 7532, "loss": 0.2122025489807129, "lr": 2.8709585976496825e-06, "epoch": 1.5303412561412828, "percentage": 76.51, "elapsed_time": "20:48:59", "remaining_time": "6:23:23"} +{"current_steps": 5764, "total_steps": 7532, "loss": 0.24274399876594543, "lr": 2.8678802102201575e-06, "epoch": 1.5306068251228258, "percentage": 76.53, "elapsed_time": "20:49:12", "remaining_time": "6:23:10"} +{"current_steps": 5765, "total_steps": 7532, "loss": 0.22325341403484344, "lr": 2.864803197779216e-06, "epoch": 1.5308723941043687, "percentage": 76.54, "elapsed_time": "20:49:25", "remaining_time": "6:22:57"} +{"current_steps": 5766, "total_steps": 7532, "loss": 0.25205284357070923, "lr": 2.8617275609200625e-06, "epoch": 1.5311379630859117, "percentage": 76.55, "elapsed_time": "20:49:38", "remaining_time": "6:22:44"} +{"current_steps": 5767, "total_steps": 7532, "loss": 0.2047557830810547, "lr": 2.8586533002356465e-06, "epoch": 1.5314035320674546, "percentage": 76.57, "elapsed_time": "20:49:51", "remaining_time": "6:22:31"} +{"current_steps": 5768, "total_steps": 7532, "loss": 0.2166992425918579, "lr": 2.8555804163186508e-06, "epoch": 1.5316691010489976, "percentage": 76.58, "elapsed_time": "20:50:04", "remaining_time": "6:22:18"} +{"current_steps": 5769, "total_steps": 7532, "loss": 0.26253193616867065, "lr": 2.8525089097614867e-06, "epoch": 1.5319346700305405, "percentage": 76.59, "elapsed_time": "20:50:17", "remaining_time": "6:22:05"} +{"current_steps": 5770, "total_steps": 7532, "loss": 0.23307687044143677, "lr": 2.8494387811563108e-06, "epoch": 1.5322002390120835, "percentage": 76.61, "elapsed_time": "20:50:30", "remaining_time": "6:21:52"} +{"current_steps": 5771, "total_steps": 7532, "loss": 0.22128549218177795, "lr": 2.8463700310950047e-06, "epoch": 1.5324658079936264, "percentage": 76.62, "elapsed_time": "20:50:43", "remaining_time": "6:21:39"} +{"current_steps": 5772, "total_steps": 7532, "loss": 0.21966281533241272, "lr": 2.8433026601691883e-06, "epoch": 1.5327313769751694, "percentage": 76.63, "elapsed_time": "20:50:55", "remaining_time": "6:21:26"} +{"current_steps": 5773, "total_steps": 7532, "loss": 0.22869305312633514, "lr": 2.840236668970213e-06, "epoch": 1.5329969459567123, "percentage": 76.65, "elapsed_time": "20:51:08", "remaining_time": "6:21:13"} +{"current_steps": 5774, "total_steps": 7532, "loss": 0.21431279182434082, "lr": 2.837172058089167e-06, "epoch": 1.5332625149382553, "percentage": 76.66, "elapsed_time": "20:51:21", "remaining_time": "6:21:00"} +{"current_steps": 5775, "total_steps": 7532, "loss": 0.24610282480716705, "lr": 2.8341088281168693e-06, "epoch": 1.5335280839197982, "percentage": 76.67, "elapsed_time": "20:51:34", "remaining_time": "6:20:47"} +{"current_steps": 5776, "total_steps": 7532, "loss": 0.24414925277233124, "lr": 2.8310469796438767e-06, "epoch": 1.5337936529013412, "percentage": 76.69, "elapsed_time": "20:51:47", "remaining_time": "6:20:33"} +{"current_steps": 5777, "total_steps": 7532, "loss": 0.2330513596534729, "lr": 2.8279865132604766e-06, "epoch": 1.5340592218828841, "percentage": 76.7, "elapsed_time": "20:52:00", "remaining_time": "6:20:21"} +{"current_steps": 5778, "total_steps": 7532, "loss": 0.23048308491706848, "lr": 2.8249274295566863e-06, "epoch": 1.534324790864427, "percentage": 76.71, "elapsed_time": "20:52:13", "remaining_time": "6:20:07"} +{"current_steps": 5779, "total_steps": 7532, "loss": 0.2411375492811203, "lr": 2.821869729122273e-06, "epoch": 1.53459035984597, "percentage": 76.73, "elapsed_time": "20:52:27", "remaining_time": "6:19:55"} +{"current_steps": 5780, "total_steps": 7532, "loss": 0.22985543310642242, "lr": 2.818813412546715e-06, "epoch": 1.534855928827513, "percentage": 76.74, "elapsed_time": "20:52:40", "remaining_time": "6:19:42"} +{"current_steps": 5781, "total_steps": 7532, "loss": 0.20867247879505157, "lr": 2.815758480419235e-06, "epoch": 1.535121497809056, "percentage": 76.75, "elapsed_time": "20:52:53", "remaining_time": "6:19:29"} +{"current_steps": 5782, "total_steps": 7532, "loss": 0.26378586888313293, "lr": 2.8127049333287913e-06, "epoch": 1.5353870667905989, "percentage": 76.77, "elapsed_time": "20:53:06", "remaining_time": "6:19:16"} +{"current_steps": 5783, "total_steps": 7532, "loss": 0.2690306305885315, "lr": 2.8096527718640687e-06, "epoch": 1.5356526357721418, "percentage": 76.78, "elapsed_time": "20:53:19", "remaining_time": "6:19:03"} +{"current_steps": 5784, "total_steps": 7532, "loss": 0.22226165235042572, "lr": 2.8066019966134907e-06, "epoch": 1.5359182047536848, "percentage": 76.79, "elapsed_time": "20:53:32", "remaining_time": "6:18:50"} +{"current_steps": 5785, "total_steps": 7532, "loss": 0.23370322585105896, "lr": 2.803552608165209e-06, "epoch": 1.5361837737352277, "percentage": 76.81, "elapsed_time": "20:53:45", "remaining_time": "6:18:37"} +{"current_steps": 5786, "total_steps": 7532, "loss": 0.26137909293174744, "lr": 2.8005046071071107e-06, "epoch": 1.5364493427167707, "percentage": 76.82, "elapsed_time": "20:53:59", "remaining_time": "6:18:24"} +{"current_steps": 5787, "total_steps": 7532, "loss": 0.22630617022514343, "lr": 2.7974579940268096e-06, "epoch": 1.5367149116983136, "percentage": 76.83, "elapsed_time": "20:54:11", "remaining_time": "6:18:11"} +{"current_steps": 5788, "total_steps": 7532, "loss": 0.22641140222549438, "lr": 2.7944127695116663e-06, "epoch": 1.5369804806798566, "percentage": 76.85, "elapsed_time": "20:54:25", "remaining_time": "6:17:58"} +{"current_steps": 5789, "total_steps": 7532, "loss": 0.19647541642189026, "lr": 2.791368934148757e-06, "epoch": 1.5372460496613995, "percentage": 76.86, "elapsed_time": "20:54:37", "remaining_time": "6:17:45"} +{"current_steps": 5790, "total_steps": 7532, "loss": 0.22399532794952393, "lr": 2.788326488524901e-06, "epoch": 1.5375116186429425, "percentage": 76.87, "elapsed_time": "20:54:50", "remaining_time": "6:17:32"} +{"current_steps": 5791, "total_steps": 7532, "loss": 0.22549685835838318, "lr": 2.7852854332266434e-06, "epoch": 1.5377771876244855, "percentage": 76.89, "elapsed_time": "20:55:03", "remaining_time": "6:17:19"} +{"current_steps": 5792, "total_steps": 7532, "loss": 0.2129821628332138, "lr": 2.7822457688402637e-06, "epoch": 1.5380427566060284, "percentage": 76.9, "elapsed_time": "20:55:17", "remaining_time": "6:17:06"} +{"current_steps": 5793, "total_steps": 7532, "loss": 0.25330638885498047, "lr": 2.7792074959517755e-06, "epoch": 1.5383083255875714, "percentage": 76.91, "elapsed_time": "20:55:30", "remaining_time": "6:16:53"} +{"current_steps": 5794, "total_steps": 7532, "loss": 0.2413945198059082, "lr": 2.7761706151469204e-06, "epoch": 1.5385738945691143, "percentage": 76.93, "elapsed_time": "20:55:43", "remaining_time": "6:16:40"} +{"current_steps": 5795, "total_steps": 7532, "loss": 0.21930523216724396, "lr": 2.773135127011174e-06, "epoch": 1.5388394635506573, "percentage": 76.94, "elapsed_time": "20:55:56", "remaining_time": "6:16:27"} +{"current_steps": 5796, "total_steps": 7532, "loss": 0.25499141216278076, "lr": 2.7701010321297416e-06, "epoch": 1.5391050325322002, "percentage": 76.95, "elapsed_time": "20:56:09", "remaining_time": "6:16:14"} +{"current_steps": 5797, "total_steps": 7532, "loss": 0.19475680589675903, "lr": 2.7670683310875613e-06, "epoch": 1.5393706015137432, "percentage": 76.96, "elapsed_time": "20:56:22", "remaining_time": "6:16:01"} +{"current_steps": 5798, "total_steps": 7532, "loss": 0.22155825793743134, "lr": 2.7640370244693026e-06, "epoch": 1.5396361704952861, "percentage": 76.98, "elapsed_time": "20:56:35", "remaining_time": "6:15:48"} +{"current_steps": 5799, "total_steps": 7532, "loss": 0.2146138846874237, "lr": 2.761007112859365e-06, "epoch": 1.539901739476829, "percentage": 76.99, "elapsed_time": "20:56:47", "remaining_time": "6:15:35"} +{"current_steps": 5800, "total_steps": 7532, "loss": 0.22698411345481873, "lr": 2.7579785968418804e-06, "epoch": 1.540167308458372, "percentage": 77.0, "elapsed_time": "20:57:00", "remaining_time": "6:15:22"} +{"current_steps": 5801, "total_steps": 7532, "loss": 0.23889532685279846, "lr": 2.75495147700071e-06, "epoch": 1.540432877439915, "percentage": 77.02, "elapsed_time": "20:57:19", "remaining_time": "6:15:10"} +{"current_steps": 5802, "total_steps": 7532, "loss": 0.2514609694480896, "lr": 2.7519257539194488e-06, "epoch": 1.540698446421458, "percentage": 77.03, "elapsed_time": "20:57:32", "remaining_time": "6:14:57"} +{"current_steps": 5803, "total_steps": 7532, "loss": 0.22332100570201874, "lr": 2.7489014281814185e-06, "epoch": 1.5409640154030009, "percentage": 77.04, "elapsed_time": "20:57:45", "remaining_time": "6:14:44"} +{"current_steps": 5804, "total_steps": 7532, "loss": 0.21316683292388916, "lr": 2.745878500369673e-06, "epoch": 1.5412295843845438, "percentage": 77.06, "elapsed_time": "20:57:58", "remaining_time": "6:14:31"} +{"current_steps": 5805, "total_steps": 7532, "loss": 0.2228018194437027, "lr": 2.742856971066996e-06, "epoch": 1.5414951533660868, "percentage": 77.07, "elapsed_time": "20:58:11", "remaining_time": "6:14:18"} +{"current_steps": 5806, "total_steps": 7532, "loss": 0.22217239439487457, "lr": 2.7398368408559084e-06, "epoch": 1.5417607223476297, "percentage": 77.08, "elapsed_time": "20:58:24", "remaining_time": "6:14:05"} +{"current_steps": 5807, "total_steps": 7532, "loss": 0.21147233247756958, "lr": 2.736818110318652e-06, "epoch": 1.5420262913291727, "percentage": 77.1, "elapsed_time": "20:58:37", "remaining_time": "6:13:52"} +{"current_steps": 5808, "total_steps": 7532, "loss": 0.23844698071479797, "lr": 2.7338007800372024e-06, "epoch": 1.5422918603107156, "percentage": 77.11, "elapsed_time": "20:58:50", "remaining_time": "6:13:39"} +{"current_steps": 5809, "total_steps": 7532, "loss": 0.2361423820257187, "lr": 2.7307848505932653e-06, "epoch": 1.5425574292922586, "percentage": 77.12, "elapsed_time": "20:59:03", "remaining_time": "6:13:26"} +{"current_steps": 5810, "total_steps": 7532, "loss": 0.21585656702518463, "lr": 2.727770322568277e-06, "epoch": 1.5428229982738015, "percentage": 77.14, "elapsed_time": "20:59:16", "remaining_time": "6:13:13"} +{"current_steps": 5811, "total_steps": 7532, "loss": 0.233969584107399, "lr": 2.724757196543403e-06, "epoch": 1.5430885672553445, "percentage": 77.15, "elapsed_time": "20:59:30", "remaining_time": "6:13:01"} +{"current_steps": 5812, "total_steps": 7532, "loss": 0.25040164589881897, "lr": 2.7217454730995363e-06, "epoch": 1.5433541362368874, "percentage": 77.16, "elapsed_time": "20:59:43", "remaining_time": "6:12:48"} +{"current_steps": 5813, "total_steps": 7532, "loss": 0.25848713517189026, "lr": 2.7187351528173046e-06, "epoch": 1.5436197052184304, "percentage": 77.18, "elapsed_time": "20:59:56", "remaining_time": "6:12:35"} +{"current_steps": 5814, "total_steps": 7532, "loss": 0.22255051136016846, "lr": 2.715726236277061e-06, "epoch": 1.5438852741999733, "percentage": 77.19, "elapsed_time": "21:00:09", "remaining_time": "6:12:22"} +{"current_steps": 5815, "total_steps": 7532, "loss": 0.1882694661617279, "lr": 2.7127187240588883e-06, "epoch": 1.5441508431815163, "percentage": 77.2, "elapsed_time": "21:00:21", "remaining_time": "6:12:08"} +{"current_steps": 5816, "total_steps": 7532, "loss": 0.20070400834083557, "lr": 2.7097126167426002e-06, "epoch": 1.5444164121630592, "percentage": 77.22, "elapsed_time": "21:00:35", "remaining_time": "6:11:56"} +{"current_steps": 5817, "total_steps": 7532, "loss": 0.25316092371940613, "lr": 2.706707914907739e-06, "epoch": 1.5446819811446022, "percentage": 77.23, "elapsed_time": "21:00:48", "remaining_time": "6:11:43"} +{"current_steps": 5818, "total_steps": 7532, "loss": 0.24665585160255432, "lr": 2.703704619133576e-06, "epoch": 1.5449475501261452, "percentage": 77.24, "elapsed_time": "21:01:01", "remaining_time": "6:11:30"} +{"current_steps": 5819, "total_steps": 7532, "loss": 0.24172846972942352, "lr": 2.7007027299991095e-06, "epoch": 1.545213119107688, "percentage": 77.26, "elapsed_time": "21:01:14", "remaining_time": "6:11:17"} +{"current_steps": 5820, "total_steps": 7532, "loss": 0.2405129075050354, "lr": 2.6977022480830708e-06, "epoch": 1.545478688089231, "percentage": 77.27, "elapsed_time": "21:01:27", "remaining_time": "6:11:04"} +{"current_steps": 5821, "total_steps": 7532, "loss": 0.19716276228427887, "lr": 2.694703173963914e-06, "epoch": 1.545744257070774, "percentage": 77.28, "elapsed_time": "21:01:40", "remaining_time": "6:10:51"} +{"current_steps": 5822, "total_steps": 7532, "loss": 0.20343703031539917, "lr": 2.6917055082198284e-06, "epoch": 1.546009826052317, "percentage": 77.3, "elapsed_time": "21:01:53", "remaining_time": "6:10:38"} +{"current_steps": 5823, "total_steps": 7532, "loss": 0.24382619559764862, "lr": 2.688709251428725e-06, "epoch": 1.54627539503386, "percentage": 77.31, "elapsed_time": "21:02:06", "remaining_time": "6:10:25"} +{"current_steps": 5824, "total_steps": 7532, "loss": 0.2962399423122406, "lr": 2.6857144041682514e-06, "epoch": 1.5465409640154029, "percentage": 77.32, "elapsed_time": "21:02:19", "remaining_time": "6:10:12"} +{"current_steps": 5825, "total_steps": 7532, "loss": 0.24034687876701355, "lr": 2.6827209670157774e-06, "epoch": 1.5468065329969458, "percentage": 77.34, "elapsed_time": "21:02:32", "remaining_time": "6:09:59"} +{"current_steps": 5826, "total_steps": 7532, "loss": 0.2575085163116455, "lr": 2.6797289405484016e-06, "epoch": 1.5470721019784888, "percentage": 77.35, "elapsed_time": "21:02:45", "remaining_time": "6:09:46"} +{"current_steps": 5827, "total_steps": 7532, "loss": 0.2586629092693329, "lr": 2.6767383253429515e-06, "epoch": 1.5473376709600317, "percentage": 77.36, "elapsed_time": "21:02:58", "remaining_time": "6:09:33"} +{"current_steps": 5828, "total_steps": 7532, "loss": 0.18447624146938324, "lr": 2.6737491219759815e-06, "epoch": 1.5476032399415747, "percentage": 77.38, "elapsed_time": "21:03:11", "remaining_time": "6:09:19"} +{"current_steps": 5829, "total_steps": 7532, "loss": 0.244853213429451, "lr": 2.670761331023779e-06, "epoch": 1.5478688089231176, "percentage": 77.39, "elapsed_time": "21:03:24", "remaining_time": "6:09:07"} +{"current_steps": 5830, "total_steps": 7532, "loss": 0.24641919136047363, "lr": 2.66777495306235e-06, "epoch": 1.5481343779046606, "percentage": 77.4, "elapsed_time": "21:03:37", "remaining_time": "6:08:54"} +{"current_steps": 5831, "total_steps": 7532, "loss": 0.2364550232887268, "lr": 2.6647899886674323e-06, "epoch": 1.5483999468862035, "percentage": 77.42, "elapsed_time": "21:03:50", "remaining_time": "6:08:41"} +{"current_steps": 5832, "total_steps": 7532, "loss": 0.17760278284549713, "lr": 2.6618064384144925e-06, "epoch": 1.5486655158677467, "percentage": 77.43, "elapsed_time": "21:04:03", "remaining_time": "6:08:28"} +{"current_steps": 5833, "total_steps": 7532, "loss": 0.18571510910987854, "lr": 2.6588243028787274e-06, "epoch": 1.5489310848492897, "percentage": 77.44, "elapsed_time": "21:04:17", "remaining_time": "6:08:15"} +{"current_steps": 5834, "total_steps": 7532, "loss": 0.23693162202835083, "lr": 2.655843582635057e-06, "epoch": 1.5491966538308326, "percentage": 77.46, "elapsed_time": "21:04:29", "remaining_time": "6:08:02"} +{"current_steps": 5835, "total_steps": 7532, "loss": 0.26481011509895325, "lr": 2.652864278258126e-06, "epoch": 1.5494622228123756, "percentage": 77.47, "elapsed_time": "21:04:43", "remaining_time": "6:07:49"} +{"current_steps": 5836, "total_steps": 7532, "loss": 0.23405003547668457, "lr": 2.6498863903223115e-06, "epoch": 1.5497277917939185, "percentage": 77.48, "elapsed_time": "21:04:55", "remaining_time": "6:07:36"} +{"current_steps": 5837, "total_steps": 7532, "loss": 0.20662814378738403, "lr": 2.6469099194017144e-06, "epoch": 1.5499933607754615, "percentage": 77.5, "elapsed_time": "21:05:08", "remaining_time": "6:07:23"} +{"current_steps": 5838, "total_steps": 7532, "loss": 0.2722313404083252, "lr": 2.6439348660701634e-06, "epoch": 1.5502589297570044, "percentage": 77.51, "elapsed_time": "21:05:21", "remaining_time": "6:07:09"} +{"current_steps": 5839, "total_steps": 7532, "loss": 0.2288864552974701, "lr": 2.6409612309012134e-06, "epoch": 1.5505244987385474, "percentage": 77.52, "elapsed_time": "21:05:34", "remaining_time": "6:06:56"} +{"current_steps": 5840, "total_steps": 7532, "loss": 0.2286190539598465, "lr": 2.6379890144681464e-06, "epoch": 1.5507900677200903, "percentage": 77.54, "elapsed_time": "21:05:47", "remaining_time": "6:06:43"} +{"current_steps": 5841, "total_steps": 7532, "loss": 0.22478938102722168, "lr": 2.6350182173439666e-06, "epoch": 1.5510556367016333, "percentage": 77.55, "elapsed_time": "21:05:59", "remaining_time": "6:06:30"} +{"current_steps": 5842, "total_steps": 7532, "loss": 0.2520615756511688, "lr": 2.6320488401014166e-06, "epoch": 1.5513212056831762, "percentage": 77.56, "elapsed_time": "21:06:12", "remaining_time": "6:06:17"} +{"current_steps": 5843, "total_steps": 7532, "loss": 0.2121289074420929, "lr": 2.629080883312952e-06, "epoch": 1.5515867746647192, "percentage": 77.58, "elapsed_time": "21:06:25", "remaining_time": "6:06:04"} +{"current_steps": 5844, "total_steps": 7532, "loss": 0.2252352237701416, "lr": 2.6261143475507656e-06, "epoch": 1.5518523436462621, "percentage": 77.59, "elapsed_time": "21:06:38", "remaining_time": "6:05:51"} +{"current_steps": 5845, "total_steps": 7532, "loss": 0.21188892424106598, "lr": 2.6231492333867626e-06, "epoch": 1.552117912627805, "percentage": 77.6, "elapsed_time": "21:06:51", "remaining_time": "6:05:38"} +{"current_steps": 5846, "total_steps": 7532, "loss": 0.21534699201583862, "lr": 2.6201855413925857e-06, "epoch": 1.552383481609348, "percentage": 77.62, "elapsed_time": "21:07:04", "remaining_time": "6:05:25"} +{"current_steps": 5847, "total_steps": 7532, "loss": 0.21781614422798157, "lr": 2.6172232721395998e-06, "epoch": 1.552649050590891, "percentage": 77.63, "elapsed_time": "21:07:17", "remaining_time": "6:05:12"} +{"current_steps": 5848, "total_steps": 7532, "loss": 0.2476508915424347, "lr": 2.6142624261988947e-06, "epoch": 1.552914619572434, "percentage": 77.64, "elapsed_time": "21:07:30", "remaining_time": "6:04:59"} +{"current_steps": 5849, "total_steps": 7532, "loss": 0.2692151665687561, "lr": 2.611303004141287e-06, "epoch": 1.5531801885539769, "percentage": 77.66, "elapsed_time": "21:07:43", "remaining_time": "6:04:46"} +{"current_steps": 5850, "total_steps": 7532, "loss": 0.24868687987327576, "lr": 2.6083450065373163e-06, "epoch": 1.5534457575355198, "percentage": 77.67, "elapsed_time": "21:07:56", "remaining_time": "6:04:33"} +{"current_steps": 5851, "total_steps": 7532, "loss": 0.24215853214263916, "lr": 2.6053884339572543e-06, "epoch": 1.5537113265170628, "percentage": 77.68, "elapsed_time": "21:08:09", "remaining_time": "6:04:20"} +{"current_steps": 5852, "total_steps": 7532, "loss": 0.2157444804906845, "lr": 2.602433286971091e-06, "epoch": 1.5539768954986057, "percentage": 77.7, "elapsed_time": "21:08:22", "remaining_time": "6:04:07"} +{"current_steps": 5853, "total_steps": 7532, "loss": 0.22152379155158997, "lr": 2.599479566148544e-06, "epoch": 1.5542424644801487, "percentage": 77.71, "elapsed_time": "21:08:35", "remaining_time": "6:03:54"} +{"current_steps": 5854, "total_steps": 7532, "loss": 0.2278299182653427, "lr": 2.596527272059055e-06, "epoch": 1.5545080334616916, "percentage": 77.72, "elapsed_time": "21:08:48", "remaining_time": "6:03:41"} +{"current_steps": 5855, "total_steps": 7532, "loss": 0.23183950781822205, "lr": 2.593576405271793e-06, "epoch": 1.5547736024432346, "percentage": 77.73, "elapsed_time": "21:09:01", "remaining_time": "6:03:28"} +{"current_steps": 5856, "total_steps": 7532, "loss": 0.22167566418647766, "lr": 2.5906269663556484e-06, "epoch": 1.5550391714247775, "percentage": 77.75, "elapsed_time": "21:09:14", "remaining_time": "6:03:15"} +{"current_steps": 5857, "total_steps": 7532, "loss": 0.24111366271972656, "lr": 2.5876789558792403e-06, "epoch": 1.5553047404063205, "percentage": 77.76, "elapsed_time": "21:09:27", "remaining_time": "6:03:02"} +{"current_steps": 5858, "total_steps": 7532, "loss": 0.2090388983488083, "lr": 2.5847323744109087e-06, "epoch": 1.5555703093878634, "percentage": 77.77, "elapsed_time": "21:09:40", "remaining_time": "6:02:49"} +{"current_steps": 5859, "total_steps": 7532, "loss": 0.2087189108133316, "lr": 2.58178722251872e-06, "epoch": 1.5558358783694064, "percentage": 77.79, "elapsed_time": "21:09:54", "remaining_time": "6:02:36"} +{"current_steps": 5860, "total_steps": 7532, "loss": 0.2277342677116394, "lr": 2.578843500770465e-06, "epoch": 1.5561014473509496, "percentage": 77.8, "elapsed_time": "21:10:06", "remaining_time": "6:02:23"} +{"current_steps": 5861, "total_steps": 7532, "loss": 0.2204241305589676, "lr": 2.57590120973366e-06, "epoch": 1.5563670163324925, "percentage": 77.81, "elapsed_time": "21:10:20", "remaining_time": "6:02:10"} +{"current_steps": 5862, "total_steps": 7532, "loss": 0.2138606607913971, "lr": 2.5729603499755416e-06, "epoch": 1.5566325853140355, "percentage": 77.83, "elapsed_time": "21:10:33", "remaining_time": "6:01:57"} +{"current_steps": 5863, "total_steps": 7532, "loss": 0.21257862448692322, "lr": 2.5700209220630733e-06, "epoch": 1.5568981542955784, "percentage": 77.84, "elapsed_time": "21:10:46", "remaining_time": "6:01:44"} +{"current_steps": 5864, "total_steps": 7532, "loss": 0.20991909503936768, "lr": 2.5670829265629437e-06, "epoch": 1.5571637232771214, "percentage": 77.85, "elapsed_time": "21:10:59", "remaining_time": "6:01:31"} +{"current_steps": 5865, "total_steps": 7532, "loss": 0.23745422065258026, "lr": 2.5641463640415633e-06, "epoch": 1.5574292922586643, "percentage": 77.87, "elapsed_time": "21:11:13", "remaining_time": "6:01:19"} +{"current_steps": 5866, "total_steps": 7532, "loss": 0.21482989192008972, "lr": 2.561211235065065e-06, "epoch": 1.5576948612402073, "percentage": 77.88, "elapsed_time": "21:11:26", "remaining_time": "6:01:05"} +{"current_steps": 5867, "total_steps": 7532, "loss": 0.17866572737693787, "lr": 2.558277540199309e-06, "epoch": 1.5579604302217502, "percentage": 77.89, "elapsed_time": "21:11:39", "remaining_time": "6:00:52"} +{"current_steps": 5868, "total_steps": 7532, "loss": 0.223822683095932, "lr": 2.555345280009872e-06, "epoch": 1.5582259992032932, "percentage": 77.91, "elapsed_time": "21:11:52", "remaining_time": "6:00:40"} +{"current_steps": 5869, "total_steps": 7532, "loss": 0.2293519228696823, "lr": 2.552414455062068e-06, "epoch": 1.5584915681848361, "percentage": 77.92, "elapsed_time": "21:12:05", "remaining_time": "6:00:27"} +{"current_steps": 5870, "total_steps": 7532, "loss": 0.2556726038455963, "lr": 2.5494850659209203e-06, "epoch": 1.558757137166379, "percentage": 77.93, "elapsed_time": "21:12:18", "remaining_time": "6:00:14"} +{"current_steps": 5871, "total_steps": 7532, "loss": 0.26891303062438965, "lr": 2.546557113151181e-06, "epoch": 1.559022706147922, "percentage": 77.95, "elapsed_time": "21:12:31", "remaining_time": "6:00:01"} +{"current_steps": 5872, "total_steps": 7532, "loss": 0.19510813057422638, "lr": 2.5436305973173257e-06, "epoch": 1.559288275129465, "percentage": 77.96, "elapsed_time": "21:12:44", "remaining_time": "5:59:48"} +{"current_steps": 5873, "total_steps": 7532, "loss": 0.22906547784805298, "lr": 2.5407055189835518e-06, "epoch": 1.559553844111008, "percentage": 77.97, "elapsed_time": "21:12:57", "remaining_time": "5:59:35"} +{"current_steps": 5874, "total_steps": 7532, "loss": 0.25452786684036255, "lr": 2.5377818787137788e-06, "epoch": 1.559819413092551, "percentage": 77.99, "elapsed_time": "21:13:11", "remaining_time": "5:59:22"} +{"current_steps": 5875, "total_steps": 7532, "loss": 0.205597922205925, "lr": 2.5348596770716503e-06, "epoch": 1.5600849820740939, "percentage": 78.0, "elapsed_time": "21:13:24", "remaining_time": "5:59:09"} +{"current_steps": 5876, "total_steps": 7532, "loss": 0.24009352922439575, "lr": 2.5319389146205344e-06, "epoch": 1.5603505510556368, "percentage": 78.01, "elapsed_time": "21:13:38", "remaining_time": "5:58:56"} +{"current_steps": 5877, "total_steps": 7532, "loss": 0.23381268978118896, "lr": 2.5290195919235173e-06, "epoch": 1.5606161200371798, "percentage": 78.03, "elapsed_time": "21:13:51", "remaining_time": "5:58:43"} +{"current_steps": 5878, "total_steps": 7532, "loss": 0.2267276644706726, "lr": 2.52610170954341e-06, "epoch": 1.5608816890187227, "percentage": 78.04, "elapsed_time": "21:14:05", "remaining_time": "5:58:30"} +{"current_steps": 5879, "total_steps": 7532, "loss": 0.24330289661884308, "lr": 2.5231852680427482e-06, "epoch": 1.5611472580002657, "percentage": 78.05, "elapsed_time": "21:14:18", "remaining_time": "5:58:17"} +{"current_steps": 5880, "total_steps": 7532, "loss": 0.24877145886421204, "lr": 2.5202702679837852e-06, "epoch": 1.5614128269818086, "percentage": 78.07, "elapsed_time": "21:14:31", "remaining_time": "5:58:04"} +{"current_steps": 5881, "total_steps": 7532, "loss": 0.20410388708114624, "lr": 2.5173567099285e-06, "epoch": 1.5616783959633516, "percentage": 78.08, "elapsed_time": "21:14:44", "remaining_time": "5:57:51"} +{"current_steps": 5882, "total_steps": 7532, "loss": 0.21524877846240997, "lr": 2.514444594438591e-06, "epoch": 1.5619439649448945, "percentage": 78.09, "elapsed_time": "21:14:57", "remaining_time": "5:57:38"} +{"current_steps": 5883, "total_steps": 7532, "loss": 0.18785043060779572, "lr": 2.5115339220754796e-06, "epoch": 1.5622095339264375, "percentage": 78.11, "elapsed_time": "21:15:11", "remaining_time": "5:57:26"} +{"current_steps": 5884, "total_steps": 7532, "loss": 0.21200208365917206, "lr": 2.5086246934003113e-06, "epoch": 1.5624751029079804, "percentage": 78.12, "elapsed_time": "21:15:24", "remaining_time": "5:57:13"} +{"current_steps": 5885, "total_steps": 7532, "loss": 0.20752021670341492, "lr": 2.5057169089739485e-06, "epoch": 1.5627406718895234, "percentage": 78.13, "elapsed_time": "21:15:37", "remaining_time": "5:57:00"} +{"current_steps": 5886, "total_steps": 7532, "loss": 0.21395736932754517, "lr": 2.502810569356976e-06, "epoch": 1.5630062408710663, "percentage": 78.15, "elapsed_time": "21:15:50", "remaining_time": "5:56:47"} +{"current_steps": 5887, "total_steps": 7532, "loss": 0.26949262619018555, "lr": 2.499905675109707e-06, "epoch": 1.5632718098526093, "percentage": 78.16, "elapsed_time": "21:16:04", "remaining_time": "5:56:34"} +{"current_steps": 5888, "total_steps": 7532, "loss": 0.2309839278459549, "lr": 2.497002226792169e-06, "epoch": 1.5635373788341522, "percentage": 78.17, "elapsed_time": "21:16:17", "remaining_time": "5:56:21"} +{"current_steps": 5889, "total_steps": 7532, "loss": 0.24415400624275208, "lr": 2.4941002249641123e-06, "epoch": 1.5638029478156952, "percentage": 78.19, "elapsed_time": "21:16:30", "remaining_time": "5:56:08"} +{"current_steps": 5890, "total_steps": 7532, "loss": 0.23493322730064392, "lr": 2.4911996701850083e-06, "epoch": 1.5640685167972381, "percentage": 78.2, "elapsed_time": "21:16:43", "remaining_time": "5:55:55"} +{"current_steps": 5891, "total_steps": 7532, "loss": 0.23824438452720642, "lr": 2.488300563014049e-06, "epoch": 1.564334085778781, "percentage": 78.21, "elapsed_time": "21:16:56", "remaining_time": "5:55:42"} +{"current_steps": 5892, "total_steps": 7532, "loss": 0.2523414194583893, "lr": 2.4854029040101503e-06, "epoch": 1.564599654760324, "percentage": 78.23, "elapsed_time": "21:17:09", "remaining_time": "5:55:29"} +{"current_steps": 5893, "total_steps": 7532, "loss": 0.21360887587070465, "lr": 2.482506693731944e-06, "epoch": 1.564865223741867, "percentage": 78.24, "elapsed_time": "21:17:22", "remaining_time": "5:55:16"} +{"current_steps": 5894, "total_steps": 7532, "loss": 0.21182934939861298, "lr": 2.47961193273779e-06, "epoch": 1.56513079272341, "percentage": 78.25, "elapsed_time": "21:17:34", "remaining_time": "5:55:03"} +{"current_steps": 5895, "total_steps": 7532, "loss": 0.23104771971702576, "lr": 2.4767186215857542e-06, "epoch": 1.5653963617049529, "percentage": 78.27, "elapsed_time": "21:17:47", "remaining_time": "5:54:50"} +{"current_steps": 5896, "total_steps": 7532, "loss": 0.22297397255897522, "lr": 2.473826760833643e-06, "epoch": 1.5656619306864958, "percentage": 78.28, "elapsed_time": "21:18:00", "remaining_time": "5:54:37"} +{"current_steps": 5897, "total_steps": 7532, "loss": 0.21597865223884583, "lr": 2.4709363510389684e-06, "epoch": 1.5659274996680388, "percentage": 78.29, "elapsed_time": "21:18:13", "remaining_time": "5:54:24"} +{"current_steps": 5898, "total_steps": 7532, "loss": 0.27620527148246765, "lr": 2.468047392758969e-06, "epoch": 1.5661930686495817, "percentage": 78.31, "elapsed_time": "21:18:26", "remaining_time": "5:54:10"} +{"current_steps": 5899, "total_steps": 7532, "loss": 0.25262463092803955, "lr": 2.465159886550601e-06, "epoch": 1.5664586376311247, "percentage": 78.32, "elapsed_time": "21:18:38", "remaining_time": "5:53:57"} +{"current_steps": 5900, "total_steps": 7532, "loss": 0.2729034125804901, "lr": 2.462273832970542e-06, "epoch": 1.5667242066126676, "percentage": 78.33, "elapsed_time": "21:18:51", "remaining_time": "5:53:44"} +{"current_steps": 5901, "total_steps": 7532, "loss": 0.2313854992389679, "lr": 2.459389232575188e-06, "epoch": 1.5669897755942106, "percentage": 78.35, "elapsed_time": "21:19:09", "remaining_time": "5:53:33"} +{"current_steps": 5902, "total_steps": 7532, "loss": 0.22513791918754578, "lr": 2.456506085920658e-06, "epoch": 1.5672553445757536, "percentage": 78.36, "elapsed_time": "21:19:22", "remaining_time": "5:53:20"} +{"current_steps": 5903, "total_steps": 7532, "loss": 0.2658824026584625, "lr": 2.4536243935627856e-06, "epoch": 1.5675209135572965, "percentage": 78.37, "elapsed_time": "21:19:35", "remaining_time": "5:53:07"} +{"current_steps": 5904, "total_steps": 7532, "loss": 0.21781010925769806, "lr": 2.4507441560571275e-06, "epoch": 1.5677864825388395, "percentage": 78.39, "elapsed_time": "21:19:49", "remaining_time": "5:52:54"} +{"current_steps": 5905, "total_steps": 7532, "loss": 0.21047937870025635, "lr": 2.4478653739589632e-06, "epoch": 1.5680520515203824, "percentage": 78.4, "elapsed_time": "21:20:01", "remaining_time": "5:52:41"} +{"current_steps": 5906, "total_steps": 7532, "loss": 0.21674057841300964, "lr": 2.4449880478232858e-06, "epoch": 1.5683176205019254, "percentage": 78.41, "elapsed_time": "21:20:15", "remaining_time": "5:52:28"} +{"current_steps": 5907, "total_steps": 7532, "loss": 0.22062627971172333, "lr": 2.44211217820481e-06, "epoch": 1.5685831894834683, "percentage": 78.43, "elapsed_time": "21:20:28", "remaining_time": "5:52:15"} +{"current_steps": 5908, "total_steps": 7532, "loss": 0.22440886497497559, "lr": 2.439237765657968e-06, "epoch": 1.5688487584650113, "percentage": 78.44, "elapsed_time": "21:20:41", "remaining_time": "5:52:02"} +{"current_steps": 5909, "total_steps": 7532, "loss": 0.21888123452663422, "lr": 2.4363648107369175e-06, "epoch": 1.5691143274465542, "percentage": 78.45, "elapsed_time": "21:20:54", "remaining_time": "5:51:49"} +{"current_steps": 5910, "total_steps": 7532, "loss": 0.23104462027549744, "lr": 2.433493313995524e-06, "epoch": 1.5693798964280972, "percentage": 78.47, "elapsed_time": "21:21:07", "remaining_time": "5:51:36"} +{"current_steps": 5911, "total_steps": 7532, "loss": 0.23032237589359283, "lr": 2.4306232759873803e-06, "epoch": 1.5696454654096401, "percentage": 78.48, "elapsed_time": "21:21:20", "remaining_time": "5:51:23"} +{"current_steps": 5912, "total_steps": 7532, "loss": 0.2588527202606201, "lr": 2.4277546972657974e-06, "epoch": 1.569911034391183, "percentage": 78.49, "elapsed_time": "21:21:33", "remaining_time": "5:51:10"} +{"current_steps": 5913, "total_steps": 7532, "loss": 0.2845698893070221, "lr": 2.424887578383799e-06, "epoch": 1.570176603372726, "percentage": 78.51, "elapsed_time": "21:21:47", "remaining_time": "5:50:57"} +{"current_steps": 5914, "total_steps": 7532, "loss": 0.23010894656181335, "lr": 2.4220219198941384e-06, "epoch": 1.570442172354269, "percentage": 78.52, "elapsed_time": "21:22:00", "remaining_time": "5:50:44"} +{"current_steps": 5915, "total_steps": 7532, "loss": 0.2623594403266907, "lr": 2.419157722349278e-06, "epoch": 1.570707741335812, "percentage": 78.53, "elapsed_time": "21:22:13", "remaining_time": "5:50:31"} +{"current_steps": 5916, "total_steps": 7532, "loss": 0.2107153981924057, "lr": 2.416294986301401e-06, "epoch": 1.5709733103173549, "percentage": 78.54, "elapsed_time": "21:22:26", "remaining_time": "5:50:18"} +{"current_steps": 5917, "total_steps": 7532, "loss": 0.2115003615617752, "lr": 2.413433712302409e-06, "epoch": 1.5712388792988978, "percentage": 78.56, "elapsed_time": "21:22:40", "remaining_time": "5:50:05"} +{"current_steps": 5918, "total_steps": 7532, "loss": 0.22406762838363647, "lr": 2.410573900903921e-06, "epoch": 1.5715044482804408, "percentage": 78.57, "elapsed_time": "21:22:52", "remaining_time": "5:49:52"} +{"current_steps": 5919, "total_steps": 7532, "loss": 0.24878525733947754, "lr": 2.407715552657277e-06, "epoch": 1.5717700172619837, "percentage": 78.58, "elapsed_time": "21:23:06", "remaining_time": "5:49:39"} +{"current_steps": 5920, "total_steps": 7532, "loss": 0.24546805024147034, "lr": 2.404858668113532e-06, "epoch": 1.5720355862435267, "percentage": 78.6, "elapsed_time": "21:23:18", "remaining_time": "5:49:26"} +{"current_steps": 5921, "total_steps": 7532, "loss": 0.23430263996124268, "lr": 2.402003247823459e-06, "epoch": 1.5723011552250696, "percentage": 78.61, "elapsed_time": "21:23:31", "remaining_time": "5:49:13"} +{"current_steps": 5922, "total_steps": 7532, "loss": 0.26935267448425293, "lr": 2.399149292337547e-06, "epoch": 1.5725667242066126, "percentage": 78.62, "elapsed_time": "21:23:45", "remaining_time": "5:49:00"} +{"current_steps": 5923, "total_steps": 7532, "loss": 0.21104472875595093, "lr": 2.3962968022060097e-06, "epoch": 1.5728322931881555, "percentage": 78.64, "elapsed_time": "21:23:57", "remaining_time": "5:48:47"} +{"current_steps": 5924, "total_steps": 7532, "loss": 0.17162750661373138, "lr": 2.3934457779787755e-06, "epoch": 1.5730978621696985, "percentage": 78.65, "elapsed_time": "21:24:11", "remaining_time": "5:48:34"} +{"current_steps": 5925, "total_steps": 7532, "loss": 0.22233474254608154, "lr": 2.390596220205481e-06, "epoch": 1.5733634311512414, "percentage": 78.66, "elapsed_time": "21:24:24", "remaining_time": "5:48:21"} +{"current_steps": 5926, "total_steps": 7532, "loss": 0.2326992005109787, "lr": 2.387748129435491e-06, "epoch": 1.5736290001327844, "percentage": 78.68, "elapsed_time": "21:24:37", "remaining_time": "5:48:08"} +{"current_steps": 5927, "total_steps": 7532, "loss": 0.245779350399971, "lr": 2.3849015062178835e-06, "epoch": 1.5738945691143273, "percentage": 78.69, "elapsed_time": "21:24:50", "remaining_time": "5:47:55"} +{"current_steps": 5928, "total_steps": 7532, "loss": 0.24269379675388336, "lr": 2.382056351101454e-06, "epoch": 1.5741601380958703, "percentage": 78.7, "elapsed_time": "21:25:03", "remaining_time": "5:47:42"} +{"current_steps": 5929, "total_steps": 7532, "loss": 0.23644019663333893, "lr": 2.3792126646347138e-06, "epoch": 1.5744257070774133, "percentage": 78.72, "elapsed_time": "21:25:16", "remaining_time": "5:47:29"} +{"current_steps": 5930, "total_steps": 7532, "loss": 0.254330575466156, "lr": 2.376370447365893e-06, "epoch": 1.5746912760589562, "percentage": 78.73, "elapsed_time": "21:25:30", "remaining_time": "5:47:16"} +{"current_steps": 5931, "total_steps": 7532, "loss": 0.2728506922721863, "lr": 2.373529699842936e-06, "epoch": 1.5749568450404992, "percentage": 78.74, "elapsed_time": "21:25:43", "remaining_time": "5:47:03"} +{"current_steps": 5932, "total_steps": 7532, "loss": 0.23671439290046692, "lr": 2.3706904226135087e-06, "epoch": 1.575222414022042, "percentage": 78.76, "elapsed_time": "21:25:56", "remaining_time": "5:46:50"} +{"current_steps": 5933, "total_steps": 7532, "loss": 0.24205748736858368, "lr": 2.367852616224989e-06, "epoch": 1.575487983003585, "percentage": 78.77, "elapsed_time": "21:26:09", "remaining_time": "5:46:37"} +{"current_steps": 5934, "total_steps": 7532, "loss": 0.1915436089038849, "lr": 2.3650162812244725e-06, "epoch": 1.575753551985128, "percentage": 78.78, "elapsed_time": "21:26:22", "remaining_time": "5:46:24"} +{"current_steps": 5935, "total_steps": 7532, "loss": 0.23453299701213837, "lr": 2.3621814181587697e-06, "epoch": 1.576019120966671, "percentage": 78.8, "elapsed_time": "21:26:35", "remaining_time": "5:46:11"} +{"current_steps": 5936, "total_steps": 7532, "loss": 0.24066327512264252, "lr": 2.3593480275744106e-06, "epoch": 1.576284689948214, "percentage": 78.81, "elapsed_time": "21:26:48", "remaining_time": "5:45:58"} +{"current_steps": 5937, "total_steps": 7532, "loss": 0.22510530054569244, "lr": 2.356516110017639e-06, "epoch": 1.5765502589297569, "percentage": 78.82, "elapsed_time": "21:27:01", "remaining_time": "5:45:45"} +{"current_steps": 5938, "total_steps": 7532, "loss": 0.22967353463172913, "lr": 2.3536856660344144e-06, "epoch": 1.5768158279112998, "percentage": 78.84, "elapsed_time": "21:27:15", "remaining_time": "5:45:33"} +{"current_steps": 5939, "total_steps": 7532, "loss": 0.2299107313156128, "lr": 2.3508566961704127e-06, "epoch": 1.5770813968928428, "percentage": 78.85, "elapsed_time": "21:27:28", "remaining_time": "5:45:20"} +{"current_steps": 5940, "total_steps": 7532, "loss": 0.23418918251991272, "lr": 2.3480292009710282e-06, "epoch": 1.5773469658743857, "percentage": 78.86, "elapsed_time": "21:27:41", "remaining_time": "5:45:07"} +{"current_steps": 5941, "total_steps": 7532, "loss": 0.26528510451316833, "lr": 2.3452031809813657e-06, "epoch": 1.5776125348559287, "percentage": 78.88, "elapsed_time": "21:27:54", "remaining_time": "5:44:54"} +{"current_steps": 5942, "total_steps": 7532, "loss": 0.21878717839717865, "lr": 2.342378636746251e-06, "epoch": 1.5778781038374716, "percentage": 78.89, "elapsed_time": "21:28:07", "remaining_time": "5:44:41"} +{"current_steps": 5943, "total_steps": 7532, "loss": 0.19697530567646027, "lr": 2.339555568810221e-06, "epoch": 1.5781436728190146, "percentage": 78.9, "elapsed_time": "21:28:21", "remaining_time": "5:44:28"} +{"current_steps": 5944, "total_steps": 7532, "loss": 0.24812257289886475, "lr": 2.3367339777175313e-06, "epoch": 1.5784092418005577, "percentage": 78.92, "elapsed_time": "21:28:34", "remaining_time": "5:44:15"} +{"current_steps": 5945, "total_steps": 7532, "loss": 0.27651745080947876, "lr": 2.3339138640121504e-06, "epoch": 1.5786748107821007, "percentage": 78.93, "elapsed_time": "21:28:47", "remaining_time": "5:44:02"} +{"current_steps": 5946, "total_steps": 7532, "loss": 0.2651634216308594, "lr": 2.3310952282377643e-06, "epoch": 1.5789403797636437, "percentage": 78.94, "elapsed_time": "21:29:00", "remaining_time": "5:43:49"} +{"current_steps": 5947, "total_steps": 7532, "loss": 0.23799028992652893, "lr": 2.328278070937772e-06, "epoch": 1.5792059487451866, "percentage": 78.96, "elapsed_time": "21:29:13", "remaining_time": "5:43:36"} +{"current_steps": 5948, "total_steps": 7532, "loss": 0.2528802752494812, "lr": 2.3254623926552867e-06, "epoch": 1.5794715177267296, "percentage": 78.97, "elapsed_time": "21:29:26", "remaining_time": "5:43:23"} +{"current_steps": 5949, "total_steps": 7532, "loss": 0.23819346725940704, "lr": 2.322648193933137e-06, "epoch": 1.5797370867082725, "percentage": 78.98, "elapsed_time": "21:29:39", "remaining_time": "5:43:10"} +{"current_steps": 5950, "total_steps": 7532, "loss": 0.2510845959186554, "lr": 2.319835475313873e-06, "epoch": 1.5800026556898155, "percentage": 79.0, "elapsed_time": "21:29:53", "remaining_time": "5:42:57"} +{"current_steps": 5951, "total_steps": 7532, "loss": 0.20156612992286682, "lr": 2.31702423733975e-06, "epoch": 1.5802682246713584, "percentage": 79.01, "elapsed_time": "21:30:06", "remaining_time": "5:42:44"} +{"current_steps": 5952, "total_steps": 7532, "loss": 0.23375174403190613, "lr": 2.3142144805527413e-06, "epoch": 1.5805337936529014, "percentage": 79.02, "elapsed_time": "21:30:19", "remaining_time": "5:42:31"} +{"current_steps": 5953, "total_steps": 7532, "loss": 0.2378280758857727, "lr": 2.311406205494535e-06, "epoch": 1.5807993626344443, "percentage": 79.04, "elapsed_time": "21:30:32", "remaining_time": "5:42:18"} +{"current_steps": 5954, "total_steps": 7532, "loss": 0.2087683081626892, "lr": 2.308599412706535e-06, "epoch": 1.5810649316159873, "percentage": 79.05, "elapsed_time": "21:30:45", "remaining_time": "5:42:05"} +{"current_steps": 5955, "total_steps": 7532, "loss": 0.2228693962097168, "lr": 2.3057941027298557e-06, "epoch": 1.5813305005975302, "percentage": 79.06, "elapsed_time": "21:30:58", "remaining_time": "5:41:52"} +{"current_steps": 5956, "total_steps": 7532, "loss": 0.22694727778434753, "lr": 2.302990276105329e-06, "epoch": 1.5815960695790732, "percentage": 79.08, "elapsed_time": "21:31:11", "remaining_time": "5:41:39"} +{"current_steps": 5957, "total_steps": 7532, "loss": 0.22996942698955536, "lr": 2.300187933373499e-06, "epoch": 1.5818616385606161, "percentage": 79.09, "elapsed_time": "21:31:24", "remaining_time": "5:41:26"} +{"current_steps": 5958, "total_steps": 7532, "loss": 0.2440253496170044, "lr": 2.2973870750746253e-06, "epoch": 1.582127207542159, "percentage": 79.1, "elapsed_time": "21:31:38", "remaining_time": "5:41:13"} +{"current_steps": 5959, "total_steps": 7532, "loss": 0.2507309019565582, "lr": 2.2945877017486782e-06, "epoch": 1.582392776523702, "percentage": 79.12, "elapsed_time": "21:31:51", "remaining_time": "5:41:00"} +{"current_steps": 5960, "total_steps": 7532, "loss": 0.24790918827056885, "lr": 2.2917898139353467e-06, "epoch": 1.582658345505245, "percentage": 79.13, "elapsed_time": "21:32:04", "remaining_time": "5:40:47"} +{"current_steps": 5961, "total_steps": 7532, "loss": 0.22106975317001343, "lr": 2.2889934121740287e-06, "epoch": 1.582923914486788, "percentage": 79.14, "elapsed_time": "21:32:17", "remaining_time": "5:40:34"} +{"current_steps": 5962, "total_steps": 7532, "loss": 0.2410939633846283, "lr": 2.2861984970038385e-06, "epoch": 1.5831894834683309, "percentage": 79.16, "elapsed_time": "21:32:30", "remaining_time": "5:40:21"} +{"current_steps": 5963, "total_steps": 7532, "loss": 0.22821484506130219, "lr": 2.283405068963601e-06, "epoch": 1.5834550524498738, "percentage": 79.17, "elapsed_time": "21:32:43", "remaining_time": "5:40:08"} +{"current_steps": 5964, "total_steps": 7532, "loss": 0.21425281465053558, "lr": 2.2806131285918588e-06, "epoch": 1.5837206214314168, "percentage": 79.18, "elapsed_time": "21:32:56", "remaining_time": "5:39:55"} +{"current_steps": 5965, "total_steps": 7532, "loss": 0.22428902983665466, "lr": 2.277822676426863e-06, "epoch": 1.5839861904129597, "percentage": 79.2, "elapsed_time": "21:33:10", "remaining_time": "5:39:42"} +{"current_steps": 5966, "total_steps": 7532, "loss": 0.2986769676208496, "lr": 2.27503371300658e-06, "epoch": 1.5842517593945027, "percentage": 79.21, "elapsed_time": "21:33:23", "remaining_time": "5:39:29"} +{"current_steps": 5967, "total_steps": 7532, "loss": 0.24697065353393555, "lr": 2.272246238868687e-06, "epoch": 1.5845173283760456, "percentage": 79.22, "elapsed_time": "21:33:36", "remaining_time": "5:39:16"} +{"current_steps": 5968, "total_steps": 7532, "loss": 0.23725461959838867, "lr": 2.269460254550583e-06, "epoch": 1.5847828973575886, "percentage": 79.24, "elapsed_time": "21:33:49", "remaining_time": "5:39:03"} +{"current_steps": 5969, "total_steps": 7532, "loss": 0.2661248445510864, "lr": 2.2666757605893664e-06, "epoch": 1.5850484663391315, "percentage": 79.25, "elapsed_time": "21:34:02", "remaining_time": "5:38:50"} +{"current_steps": 5970, "total_steps": 7532, "loss": 0.23328733444213867, "lr": 2.263892757521858e-06, "epoch": 1.5853140353206745, "percentage": 79.26, "elapsed_time": "21:34:15", "remaining_time": "5:38:37"} +{"current_steps": 5971, "total_steps": 7532, "loss": 0.22886580228805542, "lr": 2.2611112458845873e-06, "epoch": 1.5855796043022174, "percentage": 79.28, "elapsed_time": "21:34:29", "remaining_time": "5:38:25"} +{"current_steps": 5972, "total_steps": 7532, "loss": 0.25051698088645935, "lr": 2.2583312262137966e-06, "epoch": 1.5858451732837606, "percentage": 79.29, "elapsed_time": "21:34:41", "remaining_time": "5:38:12"} +{"current_steps": 5973, "total_steps": 7532, "loss": 0.2400815784931183, "lr": 2.2555526990454413e-06, "epoch": 1.5861107422653036, "percentage": 79.3, "elapsed_time": "21:34:55", "remaining_time": "5:37:59"} +{"current_steps": 5974, "total_steps": 7532, "loss": 0.2212347537279129, "lr": 2.2527756649151912e-06, "epoch": 1.5863763112468465, "percentage": 79.31, "elapsed_time": "21:35:08", "remaining_time": "5:37:46"} +{"current_steps": 5975, "total_steps": 7532, "loss": 0.3002026379108429, "lr": 2.2500001243584204e-06, "epoch": 1.5866418802283895, "percentage": 79.33, "elapsed_time": "21:35:22", "remaining_time": "5:37:33"} +{"current_steps": 5976, "total_steps": 7532, "loss": 0.19813531637191772, "lr": 2.2472260779102185e-06, "epoch": 1.5869074492099324, "percentage": 79.34, "elapsed_time": "21:35:35", "remaining_time": "5:37:20"} +{"current_steps": 5977, "total_steps": 7532, "loss": 0.2233983874320984, "lr": 2.2444535261053968e-06, "epoch": 1.5871730181914754, "percentage": 79.35, "elapsed_time": "21:35:48", "remaining_time": "5:37:07"} +{"current_steps": 5978, "total_steps": 7532, "loss": 0.26059988141059875, "lr": 2.2416824694784676e-06, "epoch": 1.5874385871730183, "percentage": 79.37, "elapsed_time": "21:36:02", "remaining_time": "5:36:54"} +{"current_steps": 5979, "total_steps": 7532, "loss": 0.23058606684207916, "lr": 2.2389129085636573e-06, "epoch": 1.5877041561545613, "percentage": 79.38, "elapsed_time": "21:36:15", "remaining_time": "5:36:41"} +{"current_steps": 5980, "total_steps": 7532, "loss": 0.2414383739233017, "lr": 2.236144843894904e-06, "epoch": 1.5879697251361042, "percentage": 79.39, "elapsed_time": "21:36:28", "remaining_time": "5:36:28"} +{"current_steps": 5981, "total_steps": 7532, "loss": 0.21688291430473328, "lr": 2.23337827600586e-06, "epoch": 1.5882352941176472, "percentage": 79.41, "elapsed_time": "21:36:41", "remaining_time": "5:36:15"} +{"current_steps": 5982, "total_steps": 7532, "loss": 0.24297408759593964, "lr": 2.2306132054298847e-06, "epoch": 1.5885008630991901, "percentage": 79.42, "elapsed_time": "21:36:54", "remaining_time": "5:36:02"} +{"current_steps": 5983, "total_steps": 7532, "loss": 0.2655821442604065, "lr": 2.227849632700052e-06, "epoch": 1.588766432080733, "percentage": 79.43, "elapsed_time": "21:37:07", "remaining_time": "5:35:49"} +{"current_steps": 5984, "total_steps": 7532, "loss": 0.20545080304145813, "lr": 2.225087558349146e-06, "epoch": 1.589032001062276, "percentage": 79.45, "elapsed_time": "21:37:21", "remaining_time": "5:35:36"} +{"current_steps": 5985, "total_steps": 7532, "loss": 0.24151475727558136, "lr": 2.2223269829096593e-06, "epoch": 1.589297570043819, "percentage": 79.46, "elapsed_time": "21:37:34", "remaining_time": "5:35:23"} +{"current_steps": 5986, "total_steps": 7532, "loss": 0.2294519543647766, "lr": 2.2195679069138043e-06, "epoch": 1.589563139025362, "percentage": 79.47, "elapsed_time": "21:37:47", "remaining_time": "5:35:10"} +{"current_steps": 5987, "total_steps": 7532, "loss": 0.2041824758052826, "lr": 2.2168103308934953e-06, "epoch": 1.589828708006905, "percentage": 79.49, "elapsed_time": "21:38:00", "remaining_time": "5:34:57"} +{"current_steps": 5988, "total_steps": 7532, "loss": 0.1856188029050827, "lr": 2.21405425538036e-06, "epoch": 1.5900942769884479, "percentage": 79.5, "elapsed_time": "21:38:14", "remaining_time": "5:34:45"} +{"current_steps": 5989, "total_steps": 7532, "loss": 0.24337685108184814, "lr": 2.2112996809057395e-06, "epoch": 1.5903598459699908, "percentage": 79.51, "elapsed_time": "21:38:27", "remaining_time": "5:34:32"} +{"current_steps": 5990, "total_steps": 7532, "loss": 0.2201787382364273, "lr": 2.20854660800068e-06, "epoch": 1.5906254149515338, "percentage": 79.53, "elapsed_time": "21:38:41", "remaining_time": "5:34:19"} +{"current_steps": 5991, "total_steps": 7532, "loss": 0.23505619168281555, "lr": 2.2057950371959427e-06, "epoch": 1.5908909839330767, "percentage": 79.54, "elapsed_time": "21:38:54", "remaining_time": "5:34:06"} +{"current_steps": 5992, "total_steps": 7532, "loss": 0.19528049230575562, "lr": 2.203044969021997e-06, "epoch": 1.5911565529146197, "percentage": 79.55, "elapsed_time": "21:39:07", "remaining_time": "5:33:53"} +{"current_steps": 5993, "total_steps": 7532, "loss": 0.22281290590763092, "lr": 2.2002964040090256e-06, "epoch": 1.5914221218961626, "percentage": 79.57, "elapsed_time": "21:39:20", "remaining_time": "5:33:40"} +{"current_steps": 5994, "total_steps": 7532, "loss": 0.19606761634349823, "lr": 2.1975493426869155e-06, "epoch": 1.5916876908777056, "percentage": 79.58, "elapsed_time": "21:39:33", "remaining_time": "5:33:27"} +{"current_steps": 5995, "total_steps": 7532, "loss": 0.22559323906898499, "lr": 2.1948037855852733e-06, "epoch": 1.5919532598592485, "percentage": 79.59, "elapsed_time": "21:39:47", "remaining_time": "5:33:14"} +{"current_steps": 5996, "total_steps": 7532, "loss": 0.20417393743991852, "lr": 2.192059733233408e-06, "epoch": 1.5922188288407915, "percentage": 79.61, "elapsed_time": "21:39:59", "remaining_time": "5:33:01"} +{"current_steps": 5997, "total_steps": 7532, "loss": 0.2579960525035858, "lr": 2.18931718616034e-06, "epoch": 1.5924843978223344, "percentage": 79.62, "elapsed_time": "21:40:13", "remaining_time": "5:32:48"} +{"current_steps": 5998, "total_steps": 7532, "loss": 0.23339781165122986, "lr": 2.1865761448948e-06, "epoch": 1.5927499668038774, "percentage": 79.63, "elapsed_time": "21:40:26", "remaining_time": "5:32:35"} +{"current_steps": 5999, "total_steps": 7532, "loss": 0.2368197739124298, "lr": 2.1838366099652274e-06, "epoch": 1.5930155357854203, "percentage": 79.65, "elapsed_time": "21:40:39", "remaining_time": "5:32:22"} +{"current_steps": 6000, "total_steps": 7532, "loss": 0.2225847840309143, "lr": 2.1810985818997743e-06, "epoch": 1.5932811047669633, "percentage": 79.66, "elapsed_time": "21:40:52", "remaining_time": "5:32:09"} +{"current_steps": 6001, "total_steps": 7532, "loss": 0.2426701784133911, "lr": 2.1783620612263e-06, "epoch": 1.5935466737485062, "percentage": 79.67, "elapsed_time": "21:41:11", "remaining_time": "5:31:58"} +{"current_steps": 6002, "total_steps": 7532, "loss": 0.23647268116474152, "lr": 2.175627048472372e-06, "epoch": 1.5938122427300492, "percentage": 79.69, "elapsed_time": "21:41:24", "remaining_time": "5:31:45"} +{"current_steps": 6003, "total_steps": 7532, "loss": 0.22843337059020996, "lr": 2.1728935441652687e-06, "epoch": 1.5940778117115921, "percentage": 79.7, "elapsed_time": "21:41:38", "remaining_time": "5:31:32"} +{"current_steps": 6004, "total_steps": 7532, "loss": 0.21524465084075928, "lr": 2.1701615488319785e-06, "epoch": 1.594343380693135, "percentage": 79.71, "elapsed_time": "21:41:51", "remaining_time": "5:31:19"} +{"current_steps": 6005, "total_steps": 7532, "loss": 0.2160830795764923, "lr": 2.167431062999197e-06, "epoch": 1.594608949674678, "percentage": 79.73, "elapsed_time": "21:42:04", "remaining_time": "5:31:06"} +{"current_steps": 6006, "total_steps": 7532, "loss": 0.2321595996618271, "lr": 2.1647020871933288e-06, "epoch": 1.594874518656221, "percentage": 79.74, "elapsed_time": "21:42:17", "remaining_time": "5:30:53"} +{"current_steps": 6007, "total_steps": 7532, "loss": 0.21255026757717133, "lr": 2.1619746219404916e-06, "epoch": 1.595140087637764, "percentage": 79.75, "elapsed_time": "21:42:30", "remaining_time": "5:30:40"} +{"current_steps": 6008, "total_steps": 7532, "loss": 0.22851255536079407, "lr": 2.1592486677665047e-06, "epoch": 1.5954056566193069, "percentage": 79.77, "elapsed_time": "21:42:44", "remaining_time": "5:30:27"} +{"current_steps": 6009, "total_steps": 7532, "loss": 0.23844364285469055, "lr": 2.1565242251969022e-06, "epoch": 1.5956712256008498, "percentage": 79.78, "elapsed_time": "21:42:57", "remaining_time": "5:30:14"} +{"current_steps": 6010, "total_steps": 7532, "loss": 0.2592385411262512, "lr": 2.153801294756924e-06, "epoch": 1.5959367945823928, "percentage": 79.79, "elapsed_time": "21:43:10", "remaining_time": "5:30:01"} +{"current_steps": 6011, "total_steps": 7532, "loss": 0.22163718938827515, "lr": 2.151079876971519e-06, "epoch": 1.5962023635639357, "percentage": 79.81, "elapsed_time": "21:43:23", "remaining_time": "5:29:48"} +{"current_steps": 6012, "total_steps": 7532, "loss": 0.1960998773574829, "lr": 2.1483599723653415e-06, "epoch": 1.5964679325454787, "percentage": 79.82, "elapsed_time": "21:43:36", "remaining_time": "5:29:35"} +{"current_steps": 6013, "total_steps": 7532, "loss": 0.20811150968074799, "lr": 2.145641581462762e-06, "epoch": 1.5967335015270216, "percentage": 79.83, "elapsed_time": "21:43:49", "remaining_time": "5:29:22"} +{"current_steps": 6014, "total_steps": 7532, "loss": 0.23184621334075928, "lr": 2.1429247047878534e-06, "epoch": 1.5969990705085646, "percentage": 79.85, "elapsed_time": "21:44:03", "remaining_time": "5:29:09"} +{"current_steps": 6015, "total_steps": 7532, "loss": 0.22043758630752563, "lr": 2.1402093428643942e-06, "epoch": 1.5972646394901076, "percentage": 79.86, "elapsed_time": "21:44:16", "remaining_time": "5:28:56"} +{"current_steps": 6016, "total_steps": 7532, "loss": 0.18621152639389038, "lr": 2.137495496215878e-06, "epoch": 1.5975302084716505, "percentage": 79.87, "elapsed_time": "21:44:29", "remaining_time": "5:28:43"} +{"current_steps": 6017, "total_steps": 7532, "loss": 0.2422473132610321, "lr": 2.1347831653654995e-06, "epoch": 1.5977957774531935, "percentage": 79.89, "elapsed_time": "21:44:42", "remaining_time": "5:28:30"} +{"current_steps": 6018, "total_steps": 7532, "loss": 0.2147202491760254, "lr": 2.132072350836164e-06, "epoch": 1.5980613464347364, "percentage": 79.9, "elapsed_time": "21:44:55", "remaining_time": "5:28:17"} +{"current_steps": 6019, "total_steps": 7532, "loss": 0.23091933131217957, "lr": 2.1293630531504873e-06, "epoch": 1.5983269154162794, "percentage": 79.91, "elapsed_time": "21:45:08", "remaining_time": "5:28:04"} +{"current_steps": 6020, "total_steps": 7532, "loss": 0.220037579536438, "lr": 2.1266552728307876e-06, "epoch": 1.5985924843978223, "percentage": 79.93, "elapsed_time": "21:45:21", "remaining_time": "5:27:51"} +{"current_steps": 6021, "total_steps": 7532, "loss": 0.25520551204681396, "lr": 2.1239490103990946e-06, "epoch": 1.5988580533793653, "percentage": 79.94, "elapsed_time": "21:45:34", "remaining_time": "5:27:38"} +{"current_steps": 6022, "total_steps": 7532, "loss": 0.23216915130615234, "lr": 2.1212442663771427e-06, "epoch": 1.5991236223609082, "percentage": 79.95, "elapsed_time": "21:45:47", "remaining_time": "5:27:25"} +{"current_steps": 6023, "total_steps": 7532, "loss": 0.22098806500434875, "lr": 2.118541041286374e-06, "epoch": 1.5993891913424512, "percentage": 79.97, "elapsed_time": "21:46:01", "remaining_time": "5:27:12"} +{"current_steps": 6024, "total_steps": 7532, "loss": 0.261300265789032, "lr": 2.11583933564794e-06, "epoch": 1.5996547603239941, "percentage": 79.98, "elapsed_time": "21:46:13", "remaining_time": "5:26:59"} +{"current_steps": 6025, "total_steps": 7532, "loss": 0.20427154004573822, "lr": 2.113139149982698e-06, "epoch": 1.599920329305537, "percentage": 79.99, "elapsed_time": "21:46:27", "remaining_time": "5:26:46"} +{"current_steps": 6026, "total_steps": 7532, "loss": 0.20700547099113464, "lr": 2.110440484811209e-06, "epoch": 1.60018589828708, "percentage": 80.01, "elapsed_time": "21:46:39", "remaining_time": "5:26:33"} +{"current_steps": 6027, "total_steps": 7532, "loss": 0.2789752185344696, "lr": 2.1077433406537475e-06, "epoch": 1.600451467268623, "percentage": 80.02, "elapsed_time": "21:46:52", "remaining_time": "5:26:20"} +{"current_steps": 6028, "total_steps": 7532, "loss": 0.2205841988325119, "lr": 2.1050477180302885e-06, "epoch": 1.600717036250166, "percentage": 80.03, "elapsed_time": "21:47:05", "remaining_time": "5:26:07"} +{"current_steps": 6029, "total_steps": 7532, "loss": 0.24921822547912598, "lr": 2.1023536174605184e-06, "epoch": 1.6009826052317089, "percentage": 80.05, "elapsed_time": "21:47:18", "remaining_time": "5:25:54"} +{"current_steps": 6030, "total_steps": 7532, "loss": 0.2516329288482666, "lr": 2.0996610394638228e-06, "epoch": 1.6012481742132518, "percentage": 80.06, "elapsed_time": "21:47:31", "remaining_time": "5:25:41"} +{"current_steps": 6031, "total_steps": 7532, "loss": 0.21832503378391266, "lr": 2.096969984559306e-06, "epoch": 1.6015137431947948, "percentage": 80.07, "elapsed_time": "21:47:44", "remaining_time": "5:25:28"} +{"current_steps": 6032, "total_steps": 7532, "loss": 0.2499273419380188, "lr": 2.094280453265769e-06, "epoch": 1.6017793121763377, "percentage": 80.08, "elapsed_time": "21:47:57", "remaining_time": "5:25:15"} +{"current_steps": 6033, "total_steps": 7532, "loss": 0.21701282262802124, "lr": 2.09159244610172e-06, "epoch": 1.6020448811578807, "percentage": 80.1, "elapsed_time": "21:48:10", "remaining_time": "5:25:02"} +{"current_steps": 6034, "total_steps": 7532, "loss": 0.24446213245391846, "lr": 2.0889059635853783e-06, "epoch": 1.6023104501394236, "percentage": 80.11, "elapsed_time": "21:48:23", "remaining_time": "5:24:49"} +{"current_steps": 6035, "total_steps": 7532, "loss": 0.27299973368644714, "lr": 2.0862210062346622e-06, "epoch": 1.6025760191209666, "percentage": 80.12, "elapsed_time": "21:48:36", "remaining_time": "5:24:36"} +{"current_steps": 6036, "total_steps": 7532, "loss": 0.2384832501411438, "lr": 2.0835375745672027e-06, "epoch": 1.6028415881025095, "percentage": 80.14, "elapsed_time": "21:48:49", "remaining_time": "5:24:23"} +{"current_steps": 6037, "total_steps": 7532, "loss": 0.2563338875770569, "lr": 2.0808556691003335e-06, "epoch": 1.6031071570840525, "percentage": 80.15, "elapsed_time": "21:49:02", "remaining_time": "5:24:10"} +{"current_steps": 6038, "total_steps": 7532, "loss": 0.29148975014686584, "lr": 2.0781752903510954e-06, "epoch": 1.6033727260655954, "percentage": 80.16, "elapsed_time": "21:49:15", "remaining_time": "5:23:57"} +{"current_steps": 6039, "total_steps": 7532, "loss": 0.24276503920555115, "lr": 2.0754964388362264e-06, "epoch": 1.6036382950471384, "percentage": 80.18, "elapsed_time": "21:49:27", "remaining_time": "5:23:44"} +{"current_steps": 6040, "total_steps": 7532, "loss": 0.1863931119441986, "lr": 2.0728191150721866e-06, "epoch": 1.6039038640286813, "percentage": 80.19, "elapsed_time": "21:49:41", "remaining_time": "5:23:31"} +{"current_steps": 6041, "total_steps": 7532, "loss": 0.21270868182182312, "lr": 2.0701433195751286e-06, "epoch": 1.6041694330102243, "percentage": 80.2, "elapsed_time": "21:49:54", "remaining_time": "5:23:18"} +{"current_steps": 6042, "total_steps": 7532, "loss": 0.21542516350746155, "lr": 2.0674690528609155e-06, "epoch": 1.6044350019917673, "percentage": 80.22, "elapsed_time": "21:50:07", "remaining_time": "5:23:05"} +{"current_steps": 6043, "total_steps": 7532, "loss": 0.23099860548973083, "lr": 2.0647963154451124e-06, "epoch": 1.6047005709733102, "percentage": 80.23, "elapsed_time": "21:50:20", "remaining_time": "5:22:52"} +{"current_steps": 6044, "total_steps": 7532, "loss": 0.22757291793823242, "lr": 2.062125107842993e-06, "epoch": 1.6049661399548532, "percentage": 80.24, "elapsed_time": "21:50:33", "remaining_time": "5:22:39"} +{"current_steps": 6045, "total_steps": 7532, "loss": 0.2370409518480301, "lr": 2.0594554305695346e-06, "epoch": 1.605231708936396, "percentage": 80.26, "elapsed_time": "21:50:46", "remaining_time": "5:22:26"} +{"current_steps": 6046, "total_steps": 7532, "loss": 0.21620309352874756, "lr": 2.0567872841394186e-06, "epoch": 1.605497277917939, "percentage": 80.27, "elapsed_time": "21:50:59", "remaining_time": "5:22:13"} +{"current_steps": 6047, "total_steps": 7532, "loss": 0.22821158170700073, "lr": 2.0541206690670324e-06, "epoch": 1.605762846899482, "percentage": 80.28, "elapsed_time": "21:51:12", "remaining_time": "5:22:00"} +{"current_steps": 6048, "total_steps": 7532, "loss": 0.24930253624916077, "lr": 2.0514555858664663e-06, "epoch": 1.606028415881025, "percentage": 80.3, "elapsed_time": "21:51:25", "remaining_time": "5:21:47"} +{"current_steps": 6049, "total_steps": 7532, "loss": 0.2491561770439148, "lr": 2.048792035051521e-06, "epoch": 1.606293984862568, "percentage": 80.31, "elapsed_time": "21:51:38", "remaining_time": "5:21:34"} +{"current_steps": 6050, "total_steps": 7532, "loss": 0.20652002096176147, "lr": 2.046130017135697e-06, "epoch": 1.6065595538441109, "percentage": 80.32, "elapsed_time": "21:51:51", "remaining_time": "5:21:21"} +{"current_steps": 6051, "total_steps": 7532, "loss": 0.25670793652534485, "lr": 2.0434695326321975e-06, "epoch": 1.6068251228256538, "percentage": 80.34, "elapsed_time": "21:52:05", "remaining_time": "5:21:08"} +{"current_steps": 6052, "total_steps": 7532, "loss": 0.2328418493270874, "lr": 2.0408105820539328e-06, "epoch": 1.6070906918071968, "percentage": 80.35, "elapsed_time": "21:52:18", "remaining_time": "5:20:55"} +{"current_steps": 6053, "total_steps": 7532, "loss": 0.20811162889003754, "lr": 2.0381531659135213e-06, "epoch": 1.6073562607887397, "percentage": 80.36, "elapsed_time": "21:52:31", "remaining_time": "5:20:42"} +{"current_steps": 6054, "total_steps": 7532, "loss": 0.24068522453308105, "lr": 2.0354972847232756e-06, "epoch": 1.6076218297702827, "percentage": 80.38, "elapsed_time": "21:52:44", "remaining_time": "5:20:29"} +{"current_steps": 6055, "total_steps": 7532, "loss": 0.2519197463989258, "lr": 2.032842938995221e-06, "epoch": 1.6078873987518256, "percentage": 80.39, "elapsed_time": "21:52:57", "remaining_time": "5:20:16"} +{"current_steps": 6056, "total_steps": 7532, "loss": 0.2293267697095871, "lr": 2.030190129241083e-06, "epoch": 1.6081529677333686, "percentage": 80.4, "elapsed_time": "21:53:10", "remaining_time": "5:20:03"} +{"current_steps": 6057, "total_steps": 7532, "loss": 0.22398510575294495, "lr": 2.027538855972291e-06, "epoch": 1.6084185367149118, "percentage": 80.42, "elapsed_time": "21:53:24", "remaining_time": "5:19:50"} +{"current_steps": 6058, "total_steps": 7532, "loss": 0.23074102401733398, "lr": 2.0248891196999833e-06, "epoch": 1.6086841056964547, "percentage": 80.43, "elapsed_time": "21:53:37", "remaining_time": "5:19:37"} +{"current_steps": 6059, "total_steps": 7532, "loss": 0.2618173658847809, "lr": 2.0222409209349957e-06, "epoch": 1.6089496746779977, "percentage": 80.44, "elapsed_time": "21:53:50", "remaining_time": "5:19:24"} +{"current_steps": 6060, "total_steps": 7532, "loss": 0.25361114740371704, "lr": 2.0195942601878703e-06, "epoch": 1.6092152436595406, "percentage": 80.46, "elapsed_time": "21:54:03", "remaining_time": "5:19:11"} +{"current_steps": 6061, "total_steps": 7532, "loss": 0.2276519238948822, "lr": 2.016949137968851e-06, "epoch": 1.6094808126410836, "percentage": 80.47, "elapsed_time": "21:54:16", "remaining_time": "5:18:58"} +{"current_steps": 6062, "total_steps": 7532, "loss": 0.20834363996982574, "lr": 2.0143055547878863e-06, "epoch": 1.6097463816226265, "percentage": 80.48, "elapsed_time": "21:54:30", "remaining_time": "5:18:45"} +{"current_steps": 6063, "total_steps": 7532, "loss": 0.2579394578933716, "lr": 2.011663511154628e-06, "epoch": 1.6100119506041695, "percentage": 80.5, "elapsed_time": "21:54:42", "remaining_time": "5:18:32"} +{"current_steps": 6064, "total_steps": 7532, "loss": 0.22118912637233734, "lr": 2.009023007578431e-06, "epoch": 1.6102775195857124, "percentage": 80.51, "elapsed_time": "21:54:56", "remaining_time": "5:18:19"} +{"current_steps": 6065, "total_steps": 7532, "loss": 0.1881515383720398, "lr": 2.0063840445683537e-06, "epoch": 1.6105430885672554, "percentage": 80.52, "elapsed_time": "21:55:09", "remaining_time": "5:18:06"} +{"current_steps": 6066, "total_steps": 7532, "loss": 0.2270805984735489, "lr": 2.003746622633155e-06, "epoch": 1.6108086575487983, "percentage": 80.54, "elapsed_time": "21:55:23", "remaining_time": "5:17:53"} +{"current_steps": 6067, "total_steps": 7532, "loss": 0.26356351375579834, "lr": 2.0011107422813013e-06, "epoch": 1.6110742265303413, "percentage": 80.55, "elapsed_time": "21:55:35", "remaining_time": "5:17:40"} +{"current_steps": 6068, "total_steps": 7532, "loss": 0.22937676310539246, "lr": 1.9984764040209615e-06, "epoch": 1.6113397955118842, "percentage": 80.56, "elapsed_time": "21:55:48", "remaining_time": "5:17:27"} +{"current_steps": 6069, "total_steps": 7532, "loss": 0.25062739849090576, "lr": 1.99584360836e-06, "epoch": 1.6116053644934272, "percentage": 80.58, "elapsed_time": "21:56:02", "remaining_time": "5:17:14"} +{"current_steps": 6070, "total_steps": 7532, "loss": 0.2031324952840805, "lr": 1.993212355805989e-06, "epoch": 1.6118709334749701, "percentage": 80.59, "elapsed_time": "21:56:14", "remaining_time": "5:17:01"} +{"current_steps": 6071, "total_steps": 7532, "loss": 0.25769656896591187, "lr": 1.990582646866206e-06, "epoch": 1.612136502456513, "percentage": 80.6, "elapsed_time": "21:56:28", "remaining_time": "5:16:48"} +{"current_steps": 6072, "total_steps": 7532, "loss": 0.23856252431869507, "lr": 1.987954482047626e-06, "epoch": 1.612402071438056, "percentage": 80.62, "elapsed_time": "21:56:40", "remaining_time": "5:16:35"} +{"current_steps": 6073, "total_steps": 7532, "loss": 0.2336723804473877, "lr": 1.9853278618569284e-06, "epoch": 1.612667640419599, "percentage": 80.63, "elapsed_time": "21:56:53", "remaining_time": "5:16:22"} +{"current_steps": 6074, "total_steps": 7532, "loss": 0.22327622771263123, "lr": 1.9827027868004942e-06, "epoch": 1.612933209401142, "percentage": 80.64, "elapsed_time": "21:57:05", "remaining_time": "5:16:09"} +{"current_steps": 6075, "total_steps": 7532, "loss": 0.26695019006729126, "lr": 1.980079257384405e-06, "epoch": 1.6131987783826849, "percentage": 80.66, "elapsed_time": "21:57:18", "remaining_time": "5:15:56"} +{"current_steps": 6076, "total_steps": 7532, "loss": 0.2467387616634369, "lr": 1.9774572741144514e-06, "epoch": 1.6134643473642278, "percentage": 80.67, "elapsed_time": "21:57:31", "remaining_time": "5:15:43"} +{"current_steps": 6077, "total_steps": 7532, "loss": 0.25473737716674805, "lr": 1.9748368374961193e-06, "epoch": 1.6137299163457708, "percentage": 80.68, "elapsed_time": "21:57:45", "remaining_time": "5:15:30"} +{"current_steps": 6078, "total_steps": 7532, "loss": 0.25508594512939453, "lr": 1.972217948034596e-06, "epoch": 1.6139954853273137, "percentage": 80.7, "elapsed_time": "21:57:58", "remaining_time": "5:15:17"} +{"current_steps": 6079, "total_steps": 7532, "loss": 0.23020131886005402, "lr": 1.969600606234774e-06, "epoch": 1.6142610543088567, "percentage": 80.71, "elapsed_time": "21:58:11", "remaining_time": "5:15:04"} +{"current_steps": 6080, "total_steps": 7532, "loss": 0.249805748462677, "lr": 1.9669848126012447e-06, "epoch": 1.6145266232903996, "percentage": 80.72, "elapsed_time": "21:58:24", "remaining_time": "5:14:51"} +{"current_steps": 6081, "total_steps": 7532, "loss": 0.2377707064151764, "lr": 1.964370567638303e-06, "epoch": 1.6147921922719426, "percentage": 80.74, "elapsed_time": "21:58:36", "remaining_time": "5:14:38"} +{"current_steps": 6082, "total_steps": 7532, "loss": 0.28656789660453796, "lr": 1.9617578718499452e-06, "epoch": 1.6150577612534855, "percentage": 80.75, "elapsed_time": "21:58:49", "remaining_time": "5:14:25"} +{"current_steps": 6083, "total_steps": 7532, "loss": 0.22079989314079285, "lr": 1.9591467257398668e-06, "epoch": 1.6153233302350285, "percentage": 80.76, "elapsed_time": "21:59:02", "remaining_time": "5:14:12"} +{"current_steps": 6084, "total_steps": 7532, "loss": 0.1993042230606079, "lr": 1.9565371298114666e-06, "epoch": 1.6155888992165715, "percentage": 80.78, "elapsed_time": "21:59:16", "remaining_time": "5:13:59"} +{"current_steps": 6085, "total_steps": 7532, "loss": 0.20818357169628143, "lr": 1.9539290845678438e-06, "epoch": 1.6158544681981146, "percentage": 80.79, "elapsed_time": "21:59:29", "remaining_time": "5:13:46"} +{"current_steps": 6086, "total_steps": 7532, "loss": 0.20531761646270752, "lr": 1.9513225905117996e-06, "epoch": 1.6161200371796576, "percentage": 80.8, "elapsed_time": "21:59:42", "remaining_time": "5:13:33"} +{"current_steps": 6087, "total_steps": 7532, "loss": 0.23414376378059387, "lr": 1.948717648145834e-06, "epoch": 1.6163856061612005, "percentage": 80.82, "elapsed_time": "21:59:55", "remaining_time": "5:13:20"} +{"current_steps": 6088, "total_steps": 7532, "loss": 0.2025471031665802, "lr": 1.9461142579721493e-06, "epoch": 1.6166511751427435, "percentage": 80.83, "elapsed_time": "22:00:08", "remaining_time": "5:13:07"} +{"current_steps": 6089, "total_steps": 7532, "loss": 0.19130446016788483, "lr": 1.943512420492649e-06, "epoch": 1.6169167441242864, "percentage": 80.84, "elapsed_time": "22:00:21", "remaining_time": "5:12:54"} +{"current_steps": 6090, "total_steps": 7532, "loss": 0.21637848019599915, "lr": 1.940912136208938e-06, "epoch": 1.6171823131058294, "percentage": 80.86, "elapsed_time": "22:00:35", "remaining_time": "5:12:41"} +{"current_steps": 6091, "total_steps": 7532, "loss": 0.26844075322151184, "lr": 1.9383134056223176e-06, "epoch": 1.6174478820873723, "percentage": 80.87, "elapsed_time": "22:00:47", "remaining_time": "5:12:28"} +{"current_steps": 6092, "total_steps": 7532, "loss": 0.19573305547237396, "lr": 1.935716229233794e-06, "epoch": 1.6177134510689153, "percentage": 80.88, "elapsed_time": "22:01:00", "remaining_time": "5:12:15"} +{"current_steps": 6093, "total_steps": 7532, "loss": 0.22705954313278198, "lr": 1.93312060754407e-06, "epoch": 1.6179790200504582, "percentage": 80.89, "elapsed_time": "22:01:13", "remaining_time": "5:12:02"} +{"current_steps": 6094, "total_steps": 7532, "loss": 0.2505400478839874, "lr": 1.9305265410535545e-06, "epoch": 1.6182445890320012, "percentage": 80.91, "elapsed_time": "22:01:27", "remaining_time": "5:11:49"} +{"current_steps": 6095, "total_steps": 7532, "loss": 0.2328193187713623, "lr": 1.927934030262353e-06, "epoch": 1.6185101580135441, "percentage": 80.92, "elapsed_time": "22:01:40", "remaining_time": "5:11:36"} +{"current_steps": 6096, "total_steps": 7532, "loss": 0.23876577615737915, "lr": 1.9253430756702674e-06, "epoch": 1.618775726995087, "percentage": 80.93, "elapsed_time": "22:01:53", "remaining_time": "5:11:23"} +{"current_steps": 6097, "total_steps": 7532, "loss": 0.2390732318162918, "lr": 1.9227536777768063e-06, "epoch": 1.61904129597663, "percentage": 80.95, "elapsed_time": "22:02:06", "remaining_time": "5:11:10"} +{"current_steps": 6098, "total_steps": 7532, "loss": 0.25231993198394775, "lr": 1.9201658370811736e-06, "epoch": 1.619306864958173, "percentage": 80.96, "elapsed_time": "22:02:19", "remaining_time": "5:10:57"} +{"current_steps": 6099, "total_steps": 7532, "loss": 0.21527352929115295, "lr": 1.917579554082274e-06, "epoch": 1.619572433939716, "percentage": 80.97, "elapsed_time": "22:02:33", "remaining_time": "5:10:44"} +{"current_steps": 6100, "total_steps": 7532, "loss": 0.21394580602645874, "lr": 1.9149948292787133e-06, "epoch": 1.619838002921259, "percentage": 80.99, "elapsed_time": "22:02:46", "remaining_time": "5:10:31"} +{"current_steps": 6101, "total_steps": 7532, "loss": 0.26093196868896484, "lr": 1.912411663168796e-06, "epoch": 1.6201035719028019, "percentage": 81.0, "elapsed_time": "22:03:04", "remaining_time": "5:10:19"} +{"current_steps": 6102, "total_steps": 7532, "loss": 0.2631412744522095, "lr": 1.9098300562505266e-06, "epoch": 1.6203691408843448, "percentage": 81.01, "elapsed_time": "22:03:16", "remaining_time": "5:10:06"} +{"current_steps": 6103, "total_steps": 7532, "loss": 0.270250141620636, "lr": 1.9072500090216073e-06, "epoch": 1.6206347098658878, "percentage": 81.03, "elapsed_time": "22:03:29", "remaining_time": "5:09:53"} +{"current_steps": 6104, "total_steps": 7532, "loss": 0.22944031655788422, "lr": 1.9046715219794397e-06, "epoch": 1.6209002788474307, "percentage": 81.04, "elapsed_time": "22:03:41", "remaining_time": "5:09:40"} +{"current_steps": 6105, "total_steps": 7532, "loss": 0.24429070949554443, "lr": 1.902094595621129e-06, "epoch": 1.6211658478289737, "percentage": 81.05, "elapsed_time": "22:03:54", "remaining_time": "5:09:27"} +{"current_steps": 6106, "total_steps": 7532, "loss": 0.25656238198280334, "lr": 1.8995192304434729e-06, "epoch": 1.6214314168105166, "percentage": 81.07, "elapsed_time": "22:04:06", "remaining_time": "5:09:14"} +{"current_steps": 6107, "total_steps": 7532, "loss": 0.2575233280658722, "lr": 1.8969454269429743e-06, "epoch": 1.6216969857920596, "percentage": 81.08, "elapsed_time": "22:04:19", "remaining_time": "5:09:01"} +{"current_steps": 6108, "total_steps": 7532, "loss": 0.24881063401699066, "lr": 1.8943731856158299e-06, "epoch": 1.6219625547736025, "percentage": 81.09, "elapsed_time": "22:04:32", "remaining_time": "5:08:47"} +{"current_steps": 6109, "total_steps": 7532, "loss": 0.23353847861289978, "lr": 1.8918025069579382e-06, "epoch": 1.6222281237551455, "percentage": 81.11, "elapsed_time": "22:04:45", "remaining_time": "5:08:34"} +{"current_steps": 6110, "total_steps": 7532, "loss": 0.21085457503795624, "lr": 1.8892333914648953e-06, "epoch": 1.6224936927366884, "percentage": 81.12, "elapsed_time": "22:04:57", "remaining_time": "5:08:21"} +{"current_steps": 6111, "total_steps": 7532, "loss": 0.28600943088531494, "lr": 1.8866658396319947e-06, "epoch": 1.6227592617182314, "percentage": 81.13, "elapsed_time": "22:05:10", "remaining_time": "5:08:08"} +{"current_steps": 6112, "total_steps": 7532, "loss": 0.22580507397651672, "lr": 1.8840998519542352e-06, "epoch": 1.6230248306997743, "percentage": 81.15, "elapsed_time": "22:05:23", "remaining_time": "5:07:55"} +{"current_steps": 6113, "total_steps": 7532, "loss": 0.19310800731182098, "lr": 1.8815354289263066e-06, "epoch": 1.6232903996813173, "percentage": 81.16, "elapsed_time": "22:05:36", "remaining_time": "5:07:42"} +{"current_steps": 6114, "total_steps": 7532, "loss": 0.21633204817771912, "lr": 1.8789725710425988e-06, "epoch": 1.6235559686628602, "percentage": 81.17, "elapsed_time": "22:05:49", "remaining_time": "5:07:29"} +{"current_steps": 6115, "total_steps": 7532, "loss": 0.21346023678779602, "lr": 1.8764112787972e-06, "epoch": 1.6238215376444032, "percentage": 81.19, "elapsed_time": "22:06:02", "remaining_time": "5:07:16"} +{"current_steps": 6116, "total_steps": 7532, "loss": 0.21206694841384888, "lr": 1.8738515526838986e-06, "epoch": 1.6240871066259461, "percentage": 81.2, "elapsed_time": "22:06:15", "remaining_time": "5:07:03"} +{"current_steps": 6117, "total_steps": 7532, "loss": 0.2135339230298996, "lr": 1.8712933931961773e-06, "epoch": 1.624352675607489, "percentage": 81.21, "elapsed_time": "22:06:28", "remaining_time": "5:06:50"} +{"current_steps": 6118, "total_steps": 7532, "loss": 0.2168758660554886, "lr": 1.8687368008272243e-06, "epoch": 1.624618244589032, "percentage": 81.23, "elapsed_time": "22:06:41", "remaining_time": "5:06:37"} +{"current_steps": 6119, "total_steps": 7532, "loss": 0.20825617015361786, "lr": 1.866181776069914e-06, "epoch": 1.624883813570575, "percentage": 81.24, "elapsed_time": "22:06:53", "remaining_time": "5:06:24"} +{"current_steps": 6120, "total_steps": 7532, "loss": 0.25367867946624756, "lr": 1.863628319416826e-06, "epoch": 1.625149382552118, "percentage": 81.25, "elapsed_time": "22:07:07", "remaining_time": "5:06:11"} +{"current_steps": 6121, "total_steps": 7532, "loss": 0.21604284644126892, "lr": 1.8610764313602404e-06, "epoch": 1.625414951533661, "percentage": 81.27, "elapsed_time": "22:07:20", "remaining_time": "5:05:58"} +{"current_steps": 6122, "total_steps": 7532, "loss": 0.2324865758419037, "lr": 1.8585261123921283e-06, "epoch": 1.6256805205152038, "percentage": 81.28, "elapsed_time": "22:07:33", "remaining_time": "5:05:45"} +{"current_steps": 6123, "total_steps": 7532, "loss": 0.2077629417181015, "lr": 1.8559773630041632e-06, "epoch": 1.6259460894967468, "percentage": 81.29, "elapsed_time": "22:07:45", "remaining_time": "5:05:32"} +{"current_steps": 6124, "total_steps": 7532, "loss": 0.19919469952583313, "lr": 1.8534301836877122e-06, "epoch": 1.6262116584782897, "percentage": 81.31, "elapsed_time": "22:07:59", "remaining_time": "5:05:19"} +{"current_steps": 6125, "total_steps": 7532, "loss": 0.21069160103797913, "lr": 1.8508845749338412e-06, "epoch": 1.6264772274598327, "percentage": 81.32, "elapsed_time": "22:08:11", "remaining_time": "5:05:06"} +{"current_steps": 6126, "total_steps": 7532, "loss": 0.2286640703678131, "lr": 1.8483405372333152e-06, "epoch": 1.6267427964413756, "percentage": 81.33, "elapsed_time": "22:08:24", "remaining_time": "5:04:53"} +{"current_steps": 6127, "total_steps": 7532, "loss": 0.2430541068315506, "lr": 1.8457980710765932e-06, "epoch": 1.6270083654229186, "percentage": 81.35, "elapsed_time": "22:08:37", "remaining_time": "5:04:40"} +{"current_steps": 6128, "total_steps": 7532, "loss": 0.21875709295272827, "lr": 1.8432571769538344e-06, "epoch": 1.6272739344044616, "percentage": 81.36, "elapsed_time": "22:08:50", "remaining_time": "5:04:27"} +{"current_steps": 6129, "total_steps": 7532, "loss": 0.22591018676757812, "lr": 1.8407178553548876e-06, "epoch": 1.6275395033860045, "percentage": 81.37, "elapsed_time": "22:09:03", "remaining_time": "5:04:14"} +{"current_steps": 6130, "total_steps": 7532, "loss": 0.25429075956344604, "lr": 1.8381801067693129e-06, "epoch": 1.6278050723675475, "percentage": 81.39, "elapsed_time": "22:09:15", "remaining_time": "5:04:00"} +{"current_steps": 6131, "total_steps": 7532, "loss": 0.2437858283519745, "lr": 1.8356439316863528e-06, "epoch": 1.6280706413490904, "percentage": 81.4, "elapsed_time": "22:09:28", "remaining_time": "5:03:47"} +{"current_steps": 6132, "total_steps": 7532, "loss": 0.24196262657642365, "lr": 1.8331093305949532e-06, "epoch": 1.6283362103306334, "percentage": 81.41, "elapsed_time": "22:09:40", "remaining_time": "5:03:34"} +{"current_steps": 6133, "total_steps": 7532, "loss": 0.25779271125793457, "lr": 1.8305763039837576e-06, "epoch": 1.6286017793121763, "percentage": 81.43, "elapsed_time": "22:09:53", "remaining_time": "5:03:21"} +{"current_steps": 6134, "total_steps": 7532, "loss": 0.23418015241622925, "lr": 1.8280448523410987e-06, "epoch": 1.6288673482937193, "percentage": 81.44, "elapsed_time": "22:10:05", "remaining_time": "5:03:08"} +{"current_steps": 6135, "total_steps": 7532, "loss": 0.2670775353908539, "lr": 1.8255149761550128e-06, "epoch": 1.6291329172752622, "percentage": 81.45, "elapsed_time": "22:10:18", "remaining_time": "5:02:55"} +{"current_steps": 6136, "total_steps": 7532, "loss": 0.29342639446258545, "lr": 1.822986675913231e-06, "epoch": 1.6293984862568052, "percentage": 81.47, "elapsed_time": "22:10:31", "remaining_time": "5:02:42"} +{"current_steps": 6137, "total_steps": 7532, "loss": 0.22768062353134155, "lr": 1.8204599521031785e-06, "epoch": 1.6296640552383481, "percentage": 81.48, "elapsed_time": "22:10:44", "remaining_time": "5:02:29"} +{"current_steps": 6138, "total_steps": 7532, "loss": 0.23938167095184326, "lr": 1.817934805211976e-06, "epoch": 1.629929624219891, "percentage": 81.49, "elapsed_time": "22:10:57", "remaining_time": "5:02:16"} +{"current_steps": 6139, "total_steps": 7532, "loss": 0.1982264518737793, "lr": 1.8154112357264474e-06, "epoch": 1.630195193201434, "percentage": 81.51, "elapsed_time": "22:11:11", "remaining_time": "5:02:03"} +{"current_steps": 6140, "total_steps": 7532, "loss": 0.23591312766075134, "lr": 1.8128892441331047e-06, "epoch": 1.630460762182977, "percentage": 81.52, "elapsed_time": "22:11:23", "remaining_time": "5:01:50"} +{"current_steps": 6141, "total_steps": 7532, "loss": 0.20317673683166504, "lr": 1.8103688309181567e-06, "epoch": 1.63072633116452, "percentage": 81.53, "elapsed_time": "22:11:36", "remaining_time": "5:01:37"} +{"current_steps": 6142, "total_steps": 7532, "loss": 0.233676478266716, "lr": 1.8078499965675112e-06, "epoch": 1.6309919001460629, "percentage": 81.55, "elapsed_time": "22:11:49", "remaining_time": "5:01:24"} +{"current_steps": 6143, "total_steps": 7532, "loss": 0.22850775718688965, "lr": 1.8053327415667688e-06, "epoch": 1.6312574691276058, "percentage": 81.56, "elapsed_time": "22:12:02", "remaining_time": "5:01:11"} +{"current_steps": 6144, "total_steps": 7532, "loss": 0.2603572607040405, "lr": 1.8028170664012268e-06, "epoch": 1.6315230381091488, "percentage": 81.57, "elapsed_time": "22:12:15", "remaining_time": "5:00:58"} +{"current_steps": 6145, "total_steps": 7532, "loss": 0.27881523966789246, "lr": 1.8003029715558773e-06, "epoch": 1.6317886070906917, "percentage": 81.59, "elapsed_time": "22:12:28", "remaining_time": "5:00:45"} +{"current_steps": 6146, "total_steps": 7532, "loss": 0.21744176745414734, "lr": 1.797790457515406e-06, "epoch": 1.6320541760722347, "percentage": 81.6, "elapsed_time": "22:12:40", "remaining_time": "5:00:32"} +{"current_steps": 6147, "total_steps": 7532, "loss": 0.20449542999267578, "lr": 1.7952795247642008e-06, "epoch": 1.6323197450537776, "percentage": 81.61, "elapsed_time": "22:12:53", "remaining_time": "5:00:19"} +{"current_steps": 6148, "total_steps": 7532, "loss": 0.25641053915023804, "lr": 1.7927701737863402e-06, "epoch": 1.6325853140353206, "percentage": 81.63, "elapsed_time": "22:13:05", "remaining_time": "5:00:05"} +{"current_steps": 6149, "total_steps": 7532, "loss": 0.23583751916885376, "lr": 1.7902624050655914e-06, "epoch": 1.6328508830168635, "percentage": 81.64, "elapsed_time": "22:13:18", "remaining_time": "4:59:52"} +{"current_steps": 6150, "total_steps": 7532, "loss": 0.2709866762161255, "lr": 1.787756219085427e-06, "epoch": 1.6331164519984065, "percentage": 81.65, "elapsed_time": "22:13:32", "remaining_time": "4:59:39"} +{"current_steps": 6151, "total_steps": 7532, "loss": 0.233103945851326, "lr": 1.785251616329009e-06, "epoch": 1.6333820209799494, "percentage": 81.66, "elapsed_time": "22:13:45", "remaining_time": "4:59:26"} +{"current_steps": 6152, "total_steps": 7532, "loss": 0.2665184438228607, "lr": 1.7827485972791957e-06, "epoch": 1.6336475899614924, "percentage": 81.68, "elapsed_time": "22:13:58", "remaining_time": "4:59:13"} +{"current_steps": 6153, "total_steps": 7532, "loss": 0.20934605598449707, "lr": 1.7802471624185392e-06, "epoch": 1.6339131589430353, "percentage": 81.69, "elapsed_time": "22:14:10", "remaining_time": "4:59:00"} +{"current_steps": 6154, "total_steps": 7532, "loss": 0.2102464735507965, "lr": 1.7777473122292866e-06, "epoch": 1.6341787279245783, "percentage": 81.7, "elapsed_time": "22:14:23", "remaining_time": "4:58:47"} +{"current_steps": 6155, "total_steps": 7532, "loss": 0.22889986634254456, "lr": 1.7752490471933769e-06, "epoch": 1.6344442969061213, "percentage": 81.72, "elapsed_time": "22:14:36", "remaining_time": "4:58:34"} +{"current_steps": 6156, "total_steps": 7532, "loss": 0.2261584997177124, "lr": 1.772752367792452e-06, "epoch": 1.6347098658876642, "percentage": 81.73, "elapsed_time": "22:14:49", "remaining_time": "4:58:21"} +{"current_steps": 6157, "total_steps": 7532, "loss": 0.21456710994243622, "lr": 1.7702572745078395e-06, "epoch": 1.6349754348692072, "percentage": 81.74, "elapsed_time": "22:15:02", "remaining_time": "4:58:08"} +{"current_steps": 6158, "total_steps": 7532, "loss": 0.22762097418308258, "lr": 1.7677637678205627e-06, "epoch": 1.63524100385075, "percentage": 81.76, "elapsed_time": "22:15:15", "remaining_time": "4:57:55"} +{"current_steps": 6159, "total_steps": 7532, "loss": 0.24772633612155914, "lr": 1.7652718482113417e-06, "epoch": 1.635506572832293, "percentage": 81.77, "elapsed_time": "22:15:28", "remaining_time": "4:57:42"} +{"current_steps": 6160, "total_steps": 7532, "loss": 0.22980757057666779, "lr": 1.7627815161605887e-06, "epoch": 1.635772141813836, "percentage": 81.78, "elapsed_time": "22:15:40", "remaining_time": "4:57:29"} +{"current_steps": 6161, "total_steps": 7532, "loss": 0.19560125470161438, "lr": 1.760292772148411e-06, "epoch": 1.636037710795379, "percentage": 81.8, "elapsed_time": "22:15:53", "remaining_time": "4:57:16"} +{"current_steps": 6162, "total_steps": 7532, "loss": 0.23733064532279968, "lr": 1.7578056166546086e-06, "epoch": 1.636303279776922, "percentage": 81.81, "elapsed_time": "22:16:06", "remaining_time": "4:57:03"} +{"current_steps": 6163, "total_steps": 7532, "loss": 0.21064560115337372, "lr": 1.7553200501586743e-06, "epoch": 1.6365688487584649, "percentage": 81.82, "elapsed_time": "22:16:19", "remaining_time": "4:56:50"} +{"current_steps": 6164, "total_steps": 7532, "loss": 0.26709994673728943, "lr": 1.7528360731397986e-06, "epoch": 1.6368344177400078, "percentage": 81.84, "elapsed_time": "22:16:31", "remaining_time": "4:56:37"} +{"current_steps": 6165, "total_steps": 7532, "loss": 0.26555943489074707, "lr": 1.750353686076861e-06, "epoch": 1.6370999867215508, "percentage": 81.85, "elapsed_time": "22:16:45", "remaining_time": "4:56:24"} +{"current_steps": 6166, "total_steps": 7532, "loss": 0.24480760097503662, "lr": 1.7478728894484375e-06, "epoch": 1.6373655557030937, "percentage": 81.86, "elapsed_time": "22:16:57", "remaining_time": "4:56:11"} +{"current_steps": 6167, "total_steps": 7532, "loss": 0.2170884907245636, "lr": 1.7453936837327967e-06, "epoch": 1.6376311246846367, "percentage": 81.88, "elapsed_time": "22:17:10", "remaining_time": "4:55:58"} +{"current_steps": 6168, "total_steps": 7532, "loss": 0.24728982150554657, "lr": 1.7429160694078983e-06, "epoch": 1.6378966936661796, "percentage": 81.89, "elapsed_time": "22:17:23", "remaining_time": "4:55:45"} +{"current_steps": 6169, "total_steps": 7532, "loss": 0.20886945724487305, "lr": 1.7404400469513994e-06, "epoch": 1.6381622626477228, "percentage": 81.9, "elapsed_time": "22:17:37", "remaining_time": "4:55:32"} +{"current_steps": 6170, "total_steps": 7532, "loss": 0.1892474740743637, "lr": 1.7379656168406467e-06, "epoch": 1.6384278316292658, "percentage": 81.92, "elapsed_time": "22:17:50", "remaining_time": "4:55:19"} +{"current_steps": 6171, "total_steps": 7532, "loss": 0.24953782558441162, "lr": 1.7354927795526821e-06, "epoch": 1.6386934006108087, "percentage": 81.93, "elapsed_time": "22:18:03", "remaining_time": "4:55:06"} +{"current_steps": 6172, "total_steps": 7532, "loss": 0.2311600148677826, "lr": 1.7330215355642377e-06, "epoch": 1.6389589695923517, "percentage": 81.94, "elapsed_time": "22:18:16", "remaining_time": "4:54:53"} +{"current_steps": 6173, "total_steps": 7532, "loss": 0.24018675088882446, "lr": 1.73055188535174e-06, "epoch": 1.6392245385738946, "percentage": 81.96, "elapsed_time": "22:18:29", "remaining_time": "4:54:40"} +{"current_steps": 6174, "total_steps": 7532, "loss": 0.22607022523880005, "lr": 1.7280838293913116e-06, "epoch": 1.6394901075554376, "percentage": 81.97, "elapsed_time": "22:18:42", "remaining_time": "4:54:27"} +{"current_steps": 6175, "total_steps": 7532, "loss": 0.23725482821464539, "lr": 1.7256173681587619e-06, "epoch": 1.6397556765369805, "percentage": 81.98, "elapsed_time": "22:18:55", "remaining_time": "4:54:14"} +{"current_steps": 6176, "total_steps": 7532, "loss": 0.241235613822937, "lr": 1.723152502129597e-06, "epoch": 1.6400212455185235, "percentage": 82.0, "elapsed_time": "22:19:09", "remaining_time": "4:54:01"} +{"current_steps": 6177, "total_steps": 7532, "loss": 0.2150690108537674, "lr": 1.7206892317790136e-06, "epoch": 1.6402868145000664, "percentage": 82.01, "elapsed_time": "22:19:22", "remaining_time": "4:53:48"} +{"current_steps": 6178, "total_steps": 7532, "loss": 0.22133421897888184, "lr": 1.7182275575819007e-06, "epoch": 1.6405523834816094, "percentage": 82.02, "elapsed_time": "22:19:35", "remaining_time": "4:53:35"} +{"current_steps": 6179, "total_steps": 7532, "loss": 0.1937463879585266, "lr": 1.7157674800128399e-06, "epoch": 1.6408179524631523, "percentage": 82.04, "elapsed_time": "22:19:48", "remaining_time": "4:53:22"} +{"current_steps": 6180, "total_steps": 7532, "loss": 0.18938027322292328, "lr": 1.7133089995461062e-06, "epoch": 1.6410835214446953, "percentage": 82.05, "elapsed_time": "22:20:01", "remaining_time": "4:53:09"} +{"current_steps": 6181, "total_steps": 7532, "loss": 0.23577997088432312, "lr": 1.7108521166556646e-06, "epoch": 1.6413490904262382, "percentage": 82.06, "elapsed_time": "22:20:13", "remaining_time": "4:52:56"} +{"current_steps": 6182, "total_steps": 7532, "loss": 0.2712448537349701, "lr": 1.7083968318151734e-06, "epoch": 1.6416146594077812, "percentage": 82.08, "elapsed_time": "22:20:26", "remaining_time": "4:52:43"} +{"current_steps": 6183, "total_steps": 7532, "loss": 0.24242255091667175, "lr": 1.7059431454979825e-06, "epoch": 1.6418802283893241, "percentage": 82.09, "elapsed_time": "22:20:39", "remaining_time": "4:52:30"} +{"current_steps": 6184, "total_steps": 7532, "loss": 0.22521010041236877, "lr": 1.7034910581771347e-06, "epoch": 1.642145797370867, "percentage": 82.1, "elapsed_time": "22:20:52", "remaining_time": "4:52:17"} +{"current_steps": 6185, "total_steps": 7532, "loss": 0.22026273608207703, "lr": 1.7010405703253618e-06, "epoch": 1.64241136635241, "percentage": 82.12, "elapsed_time": "22:21:04", "remaining_time": "4:52:03"} +{"current_steps": 6186, "total_steps": 7532, "loss": 0.22726528346538544, "lr": 1.6985916824150894e-06, "epoch": 1.642676935333953, "percentage": 82.13, "elapsed_time": "22:21:17", "remaining_time": "4:51:50"} +{"current_steps": 6187, "total_steps": 7532, "loss": 0.25172409415245056, "lr": 1.6961443949184353e-06, "epoch": 1.642942504315496, "percentage": 82.14, "elapsed_time": "22:21:30", "remaining_time": "4:51:37"} +{"current_steps": 6188, "total_steps": 7532, "loss": 0.21173113584518433, "lr": 1.6936987083072065e-06, "epoch": 1.6432080732970389, "percentage": 82.16, "elapsed_time": "22:21:43", "remaining_time": "4:51:24"} +{"current_steps": 6189, "total_steps": 7532, "loss": 0.22596749663352966, "lr": 1.6912546230529036e-06, "epoch": 1.6434736422785818, "percentage": 82.17, "elapsed_time": "22:21:56", "remaining_time": "4:51:11"} +{"current_steps": 6190, "total_steps": 7532, "loss": 0.2749077081680298, "lr": 1.6888121396267166e-06, "epoch": 1.6437392112601248, "percentage": 82.18, "elapsed_time": "22:22:08", "remaining_time": "4:50:58"} +{"current_steps": 6191, "total_steps": 7532, "loss": 0.22150780260562897, "lr": 1.6863712584995252e-06, "epoch": 1.6440047802416677, "percentage": 82.2, "elapsed_time": "22:22:22", "remaining_time": "4:50:45"} +{"current_steps": 6192, "total_steps": 7532, "loss": 0.23437368869781494, "lr": 1.6839319801419073e-06, "epoch": 1.6442703492232107, "percentage": 82.21, "elapsed_time": "22:22:34", "remaining_time": "4:50:32"} +{"current_steps": 6193, "total_steps": 7532, "loss": 0.22949008643627167, "lr": 1.681494305024125e-06, "epoch": 1.6445359182047536, "percentage": 82.22, "elapsed_time": "22:22:47", "remaining_time": "4:50:19"} +{"current_steps": 6194, "total_steps": 7532, "loss": 0.24147525429725647, "lr": 1.6790582336161332e-06, "epoch": 1.6448014871862966, "percentage": 82.24, "elapsed_time": "22:23:00", "remaining_time": "4:50:06"} +{"current_steps": 6195, "total_steps": 7532, "loss": 0.2001456618309021, "lr": 1.6766237663875773e-06, "epoch": 1.6450670561678395, "percentage": 82.25, "elapsed_time": "22:23:13", "remaining_time": "4:49:53"} +{"current_steps": 6196, "total_steps": 7532, "loss": 0.17668186128139496, "lr": 1.674190903807794e-06, "epoch": 1.6453326251493825, "percentage": 82.26, "elapsed_time": "22:23:26", "remaining_time": "4:49:40"} +{"current_steps": 6197, "total_steps": 7532, "loss": 0.24585255980491638, "lr": 1.6717596463458107e-06, "epoch": 1.6455981941309257, "percentage": 82.28, "elapsed_time": "22:23:39", "remaining_time": "4:49:27"} +{"current_steps": 6198, "total_steps": 7532, "loss": 0.2234572172164917, "lr": 1.6693299944703479e-06, "epoch": 1.6458637631124686, "percentage": 82.29, "elapsed_time": "22:23:52", "remaining_time": "4:49:14"} +{"current_steps": 6199, "total_steps": 7532, "loss": 0.2007240653038025, "lr": 1.6669019486498083e-06, "epoch": 1.6461293320940116, "percentage": 82.3, "elapsed_time": "22:24:05", "remaining_time": "4:49:01"} +{"current_steps": 6200, "total_steps": 7532, "loss": 0.21926215291023254, "lr": 1.6644755093522913e-06, "epoch": 1.6463949010755545, "percentage": 82.32, "elapsed_time": "22:24:18", "remaining_time": "4:48:48"} +{"current_steps": 6201, "total_steps": 7532, "loss": 0.24797898530960083, "lr": 1.662050677045589e-06, "epoch": 1.6466604700570975, "percentage": 82.33, "elapsed_time": "22:24:36", "remaining_time": "4:48:36"} +{"current_steps": 6202, "total_steps": 7532, "loss": 0.22087037563323975, "lr": 1.65962745219718e-06, "epoch": 1.6469260390386404, "percentage": 82.34, "elapsed_time": "22:24:50", "remaining_time": "4:48:23"} +{"current_steps": 6203, "total_steps": 7532, "loss": 0.23073960840702057, "lr": 1.6572058352742327e-06, "epoch": 1.6471916080201834, "percentage": 82.36, "elapsed_time": "22:25:02", "remaining_time": "4:48:10"} +{"current_steps": 6204, "total_steps": 7532, "loss": 0.2430298924446106, "lr": 1.6547858267436056e-06, "epoch": 1.6474571770017263, "percentage": 82.37, "elapsed_time": "22:25:15", "remaining_time": "4:47:57"} +{"current_steps": 6205, "total_steps": 7532, "loss": 0.23337247967720032, "lr": 1.6523674270718493e-06, "epoch": 1.6477227459832693, "percentage": 82.38, "elapsed_time": "22:25:28", "remaining_time": "4:47:44"} +{"current_steps": 6206, "total_steps": 7532, "loss": 0.22141093015670776, "lr": 1.6499506367252016e-06, "epoch": 1.6479883149648122, "percentage": 82.4, "elapsed_time": "22:25:40", "remaining_time": "4:47:31"} +{"current_steps": 6207, "total_steps": 7532, "loss": 0.23247988522052765, "lr": 1.647535456169591e-06, "epoch": 1.6482538839463552, "percentage": 82.41, "elapsed_time": "22:25:53", "remaining_time": "4:47:18"} +{"current_steps": 6208, "total_steps": 7532, "loss": 0.2659391760826111, "lr": 1.6451218858706374e-06, "epoch": 1.6485194529278981, "percentage": 82.42, "elapsed_time": "22:26:06", "remaining_time": "4:47:05"} +{"current_steps": 6209, "total_steps": 7532, "loss": 0.2154998630285263, "lr": 1.642709926293644e-06, "epoch": 1.648785021909441, "percentage": 82.43, "elapsed_time": "22:26:19", "remaining_time": "4:46:52"} +{"current_steps": 6210, "total_steps": 7532, "loss": 0.20363599061965942, "lr": 1.6402995779036146e-06, "epoch": 1.649050590890984, "percentage": 82.45, "elapsed_time": "22:26:32", "remaining_time": "4:46:39"} +{"current_steps": 6211, "total_steps": 7532, "loss": 0.23388779163360596, "lr": 1.6378908411652328e-06, "epoch": 1.649316159872527, "percentage": 82.46, "elapsed_time": "22:26:45", "remaining_time": "4:46:26"} +{"current_steps": 6212, "total_steps": 7532, "loss": 0.20465341210365295, "lr": 1.6354837165428772e-06, "epoch": 1.64958172885407, "percentage": 82.47, "elapsed_time": "22:26:58", "remaining_time": "4:46:13"} +{"current_steps": 6213, "total_steps": 7532, "loss": 0.2233584225177765, "lr": 1.6330782045006088e-06, "epoch": 1.649847297835613, "percentage": 82.49, "elapsed_time": "22:27:11", "remaining_time": "4:46:00"} +{"current_steps": 6214, "total_steps": 7532, "loss": 0.2880077064037323, "lr": 1.6306743055021834e-06, "epoch": 1.6501128668171559, "percentage": 82.5, "elapsed_time": "22:27:24", "remaining_time": "4:45:47"} +{"current_steps": 6215, "total_steps": 7532, "loss": 0.23332230746746063, "lr": 1.6282720200110458e-06, "epoch": 1.6503784357986988, "percentage": 82.51, "elapsed_time": "22:27:36", "remaining_time": "4:45:34"} +{"current_steps": 6216, "total_steps": 7532, "loss": 0.22191204130649567, "lr": 1.6258713484903266e-06, "epoch": 1.6506440047802418, "percentage": 82.53, "elapsed_time": "22:27:50", "remaining_time": "4:45:21"} +{"current_steps": 6217, "total_steps": 7532, "loss": 0.2403659224510193, "lr": 1.6234722914028478e-06, "epoch": 1.6509095737617847, "percentage": 82.54, "elapsed_time": "22:28:02", "remaining_time": "4:45:08"} +{"current_steps": 6218, "total_steps": 7532, "loss": 0.2230256348848343, "lr": 1.6210748492111161e-06, "epoch": 1.6511751427433277, "percentage": 82.55, "elapsed_time": "22:28:15", "remaining_time": "4:44:55"} +{"current_steps": 6219, "total_steps": 7532, "loss": 0.2086302787065506, "lr": 1.6186790223773375e-06, "epoch": 1.6514407117248706, "percentage": 82.57, "elapsed_time": "22:28:28", "remaining_time": "4:44:42"} +{"current_steps": 6220, "total_steps": 7532, "loss": 0.22336703538894653, "lr": 1.6162848113633934e-06, "epoch": 1.6517062807064136, "percentage": 82.58, "elapsed_time": "22:28:41", "remaining_time": "4:44:28"} +{"current_steps": 6221, "total_steps": 7532, "loss": 0.2354746013879776, "lr": 1.6138922166308613e-06, "epoch": 1.6519718496879565, "percentage": 82.59, "elapsed_time": "22:28:54", "remaining_time": "4:44:15"} +{"current_steps": 6222, "total_steps": 7532, "loss": 0.23983564972877502, "lr": 1.6115012386410045e-06, "epoch": 1.6522374186694995, "percentage": 82.61, "elapsed_time": "22:29:07", "remaining_time": "4:44:02"} +{"current_steps": 6223, "total_steps": 7532, "loss": 0.25468897819519043, "lr": 1.6091118778547765e-06, "epoch": 1.6525029876510424, "percentage": 82.62, "elapsed_time": "22:29:20", "remaining_time": "4:43:49"} +{"current_steps": 6224, "total_steps": 7532, "loss": 0.2225346863269806, "lr": 1.6067241347328166e-06, "epoch": 1.6527685566325854, "percentage": 82.63, "elapsed_time": "22:29:33", "remaining_time": "4:43:36"} +{"current_steps": 6225, "total_steps": 7532, "loss": 0.28801992535591125, "lr": 1.6043380097354543e-06, "epoch": 1.6530341256141283, "percentage": 82.65, "elapsed_time": "22:29:46", "remaining_time": "4:43:23"} +{"current_steps": 6226, "total_steps": 7532, "loss": 0.1869816929101944, "lr": 1.6019535033227063e-06, "epoch": 1.6532996945956713, "percentage": 82.66, "elapsed_time": "22:29:58", "remaining_time": "4:43:10"} +{"current_steps": 6227, "total_steps": 7532, "loss": 0.2569049894809723, "lr": 1.5995706159542768e-06, "epoch": 1.6535652635772142, "percentage": 82.67, "elapsed_time": "22:30:11", "remaining_time": "4:42:57"} +{"current_steps": 6228, "total_steps": 7532, "loss": 0.19138488173484802, "lr": 1.5971893480895583e-06, "epoch": 1.6538308325587572, "percentage": 82.69, "elapsed_time": "22:30:24", "remaining_time": "4:42:44"} +{"current_steps": 6229, "total_steps": 7532, "loss": 0.23107777535915375, "lr": 1.5948097001876318e-06, "epoch": 1.6540964015403001, "percentage": 82.7, "elapsed_time": "22:30:37", "remaining_time": "4:42:31"} +{"current_steps": 6230, "total_steps": 7532, "loss": 0.21682313084602356, "lr": 1.5924316727072652e-06, "epoch": 1.654361970521843, "percentage": 82.71, "elapsed_time": "22:30:49", "remaining_time": "4:42:18"} +{"current_steps": 6231, "total_steps": 7532, "loss": 0.27629974484443665, "lr": 1.5900552661069135e-06, "epoch": 1.654627539503386, "percentage": 82.73, "elapsed_time": "22:31:02", "remaining_time": "4:42:05"} +{"current_steps": 6232, "total_steps": 7532, "loss": 0.21919876337051392, "lr": 1.587680480844721e-06, "epoch": 1.654893108484929, "percentage": 82.74, "elapsed_time": "22:31:14", "remaining_time": "4:41:52"} +{"current_steps": 6233, "total_steps": 7532, "loss": 0.2556184232234955, "lr": 1.5853073173785183e-06, "epoch": 1.655158677466472, "percentage": 82.75, "elapsed_time": "22:31:27", "remaining_time": "4:41:39"} +{"current_steps": 6234, "total_steps": 7532, "loss": 0.1904449462890625, "lr": 1.5829357761658214e-06, "epoch": 1.655424246448015, "percentage": 82.77, "elapsed_time": "22:31:40", "remaining_time": "4:41:26"} +{"current_steps": 6235, "total_steps": 7532, "loss": 0.1991434246301651, "lr": 1.5805658576638372e-06, "epoch": 1.6556898154295578, "percentage": 82.78, "elapsed_time": "22:31:52", "remaining_time": "4:41:13"} +{"current_steps": 6236, "total_steps": 7532, "loss": 0.2609177231788635, "lr": 1.5781975623294554e-06, "epoch": 1.6559553844111008, "percentage": 82.79, "elapsed_time": "22:32:05", "remaining_time": "4:41:00"} +{"current_steps": 6237, "total_steps": 7532, "loss": 0.2481592893600464, "lr": 1.575830890619261e-06, "epoch": 1.6562209533926437, "percentage": 82.81, "elapsed_time": "22:32:18", "remaining_time": "4:40:46"} +{"current_steps": 6238, "total_steps": 7532, "loss": 0.23855090141296387, "lr": 1.5734658429895156e-06, "epoch": 1.6564865223741867, "percentage": 82.82, "elapsed_time": "22:32:32", "remaining_time": "4:40:34"} +{"current_steps": 6239, "total_steps": 7532, "loss": 0.2480623573064804, "lr": 1.5711024198961745e-06, "epoch": 1.6567520913557297, "percentage": 82.83, "elapsed_time": "22:32:44", "remaining_time": "4:40:20"} +{"current_steps": 6240, "total_steps": 7532, "loss": 0.2504739463329315, "lr": 1.5687406217948775e-06, "epoch": 1.6570176603372726, "percentage": 82.85, "elapsed_time": "22:32:57", "remaining_time": "4:40:07"} +{"current_steps": 6241, "total_steps": 7532, "loss": 0.2068580538034439, "lr": 1.5663804491409506e-06, "epoch": 1.6572832293188156, "percentage": 82.86, "elapsed_time": "22:33:10", "remaining_time": "4:39:54"} +{"current_steps": 6242, "total_steps": 7532, "loss": 0.2448163628578186, "lr": 1.5640219023894077e-06, "epoch": 1.6575487983003585, "percentage": 82.87, "elapsed_time": "22:33:23", "remaining_time": "4:39:41"} +{"current_steps": 6243, "total_steps": 7532, "loss": 0.2514716386795044, "lr": 1.5616649819949492e-06, "epoch": 1.6578143672819015, "percentage": 82.89, "elapsed_time": "22:33:36", "remaining_time": "4:39:28"} +{"current_steps": 6244, "total_steps": 7532, "loss": 0.2067629098892212, "lr": 1.559309688411962e-06, "epoch": 1.6580799362634444, "percentage": 82.9, "elapsed_time": "22:33:49", "remaining_time": "4:39:15"} +{"current_steps": 6245, "total_steps": 7532, "loss": 0.22909750044345856, "lr": 1.5569560220945168e-06, "epoch": 1.6583455052449874, "percentage": 82.91, "elapsed_time": "22:34:02", "remaining_time": "4:39:02"} +{"current_steps": 6246, "total_steps": 7532, "loss": 0.203629732131958, "lr": 1.5546039834963745e-06, "epoch": 1.6586110742265303, "percentage": 82.93, "elapsed_time": "22:34:15", "remaining_time": "4:38:49"} +{"current_steps": 6247, "total_steps": 7532, "loss": 0.21919086575508118, "lr": 1.552253573070981e-06, "epoch": 1.6588766432080733, "percentage": 82.94, "elapsed_time": "22:34:28", "remaining_time": "4:38:36"} +{"current_steps": 6248, "total_steps": 7532, "loss": 0.2535661458969116, "lr": 1.549904791271466e-06, "epoch": 1.6591422121896162, "percentage": 82.95, "elapsed_time": "22:34:41", "remaining_time": "4:38:23"} +{"current_steps": 6249, "total_steps": 7532, "loss": 0.224460631608963, "lr": 1.5475576385506475e-06, "epoch": 1.6594077811711592, "percentage": 82.97, "elapsed_time": "22:34:54", "remaining_time": "4:38:10"} +{"current_steps": 6250, "total_steps": 7532, "loss": 0.21925818920135498, "lr": 1.5452121153610288e-06, "epoch": 1.6596733501527021, "percentage": 82.98, "elapsed_time": "22:35:07", "remaining_time": "4:37:57"} +{"current_steps": 6251, "total_steps": 7532, "loss": 0.2100696563720703, "lr": 1.5428682221547997e-06, "epoch": 1.659938919134245, "percentage": 82.99, "elapsed_time": "22:35:20", "remaining_time": "4:37:44"} +{"current_steps": 6252, "total_steps": 7532, "loss": 0.25982293486595154, "lr": 1.540525959383834e-06, "epoch": 1.660204488115788, "percentage": 83.01, "elapsed_time": "22:35:32", "remaining_time": "4:37:31"} +{"current_steps": 6253, "total_steps": 7532, "loss": 0.23615162074565887, "lr": 1.538185327499694e-06, "epoch": 1.660470057097331, "percentage": 83.02, "elapsed_time": "22:35:45", "remaining_time": "4:37:18"} +{"current_steps": 6254, "total_steps": 7532, "loss": 0.2454022467136383, "lr": 1.5358463269536218e-06, "epoch": 1.660735626078874, "percentage": 83.03, "elapsed_time": "22:35:58", "remaining_time": "4:37:05"} +{"current_steps": 6255, "total_steps": 7532, "loss": 0.2330605536699295, "lr": 1.5335089581965556e-06, "epoch": 1.6610011950604169, "percentage": 83.05, "elapsed_time": "22:36:11", "remaining_time": "4:36:52"} +{"current_steps": 6256, "total_steps": 7532, "loss": 0.23193006217479706, "lr": 1.5311732216791087e-06, "epoch": 1.6612667640419598, "percentage": 83.06, "elapsed_time": "22:36:24", "remaining_time": "4:36:39"} +{"current_steps": 6257, "total_steps": 7532, "loss": 0.23254770040512085, "lr": 1.5288391178515838e-06, "epoch": 1.6615323330235028, "percentage": 83.07, "elapsed_time": "22:36:37", "remaining_time": "4:36:26"} +{"current_steps": 6258, "total_steps": 7532, "loss": 0.23240572214126587, "lr": 1.5265066471639701e-06, "epoch": 1.6617979020050457, "percentage": 83.09, "elapsed_time": "22:36:50", "remaining_time": "4:36:13"} +{"current_steps": 6259, "total_steps": 7532, "loss": 0.2765730619430542, "lr": 1.5241758100659386e-06, "epoch": 1.6620634709865887, "percentage": 83.1, "elapsed_time": "22:37:03", "remaining_time": "4:36:00"} +{"current_steps": 6260, "total_steps": 7532, "loss": 0.26366496086120605, "lr": 1.5218466070068472e-06, "epoch": 1.6623290399681316, "percentage": 83.11, "elapsed_time": "22:37:16", "remaining_time": "4:35:47"} +{"current_steps": 6261, "total_steps": 7532, "loss": 0.22322653234004974, "lr": 1.5195190384357405e-06, "epoch": 1.6625946089496746, "percentage": 83.13, "elapsed_time": "22:37:28", "remaining_time": "4:35:34"} +{"current_steps": 6262, "total_steps": 7532, "loss": 0.24144116044044495, "lr": 1.5171931048013466e-06, "epoch": 1.6628601779312175, "percentage": 83.14, "elapsed_time": "22:37:41", "remaining_time": "4:35:21"} +{"current_steps": 6263, "total_steps": 7532, "loss": 0.24559618532657623, "lr": 1.5148688065520734e-06, "epoch": 1.6631257469127605, "percentage": 83.15, "elapsed_time": "22:37:54", "remaining_time": "4:35:08"} +{"current_steps": 6264, "total_steps": 7532, "loss": 0.24337056279182434, "lr": 1.5125461441360223e-06, "epoch": 1.6633913158943034, "percentage": 83.17, "elapsed_time": "22:38:07", "remaining_time": "4:34:55"} +{"current_steps": 6265, "total_steps": 7532, "loss": 0.2733612358570099, "lr": 1.5102251180009752e-06, "epoch": 1.6636568848758464, "percentage": 83.18, "elapsed_time": "22:38:20", "remaining_time": "4:34:42"} +{"current_steps": 6266, "total_steps": 7532, "loss": 0.2116459757089615, "lr": 1.5079057285943976e-06, "epoch": 1.6639224538573893, "percentage": 83.19, "elapsed_time": "22:38:33", "remaining_time": "4:34:29"} +{"current_steps": 6267, "total_steps": 7532, "loss": 0.21221664547920227, "lr": 1.5055879763634407e-06, "epoch": 1.6641880228389323, "percentage": 83.2, "elapsed_time": "22:38:46", "remaining_time": "4:34:16"} +{"current_steps": 6268, "total_steps": 7532, "loss": 0.21166589856147766, "lr": 1.503271861754939e-06, "epoch": 1.6644535918204753, "percentage": 83.22, "elapsed_time": "22:38:59", "remaining_time": "4:34:03"} +{"current_steps": 6269, "total_steps": 7532, "loss": 0.2652161121368408, "lr": 1.5009573852154136e-06, "epoch": 1.6647191608020182, "percentage": 83.23, "elapsed_time": "22:39:12", "remaining_time": "4:33:50"} +{"current_steps": 6270, "total_steps": 7532, "loss": 0.22142267227172852, "lr": 1.4986445471910672e-06, "epoch": 1.6649847297835612, "percentage": 83.24, "elapsed_time": "22:39:24", "remaining_time": "4:33:37"} +{"current_steps": 6271, "total_steps": 7532, "loss": 0.2307332456111908, "lr": 1.4963333481277874e-06, "epoch": 1.665250298765104, "percentage": 83.26, "elapsed_time": "22:39:37", "remaining_time": "4:33:23"} +{"current_steps": 6272, "total_steps": 7532, "loss": 0.2669411897659302, "lr": 1.494023788471144e-06, "epoch": 1.665515867746647, "percentage": 83.27, "elapsed_time": "22:39:49", "remaining_time": "4:33:10"} +{"current_steps": 6273, "total_steps": 7532, "loss": 0.2468804121017456, "lr": 1.4917158686663992e-06, "epoch": 1.66578143672819, "percentage": 83.28, "elapsed_time": "22:40:02", "remaining_time": "4:32:57"} +{"current_steps": 6274, "total_steps": 7532, "loss": 0.24152463674545288, "lr": 1.4894095891584882e-06, "epoch": 1.666047005709733, "percentage": 83.3, "elapsed_time": "22:40:15", "remaining_time": "4:32:44"} +{"current_steps": 6275, "total_steps": 7532, "loss": 0.1966545283794403, "lr": 1.4871049503920353e-06, "epoch": 1.666312574691276, "percentage": 83.31, "elapsed_time": "22:40:27", "remaining_time": "4:32:31"} +{"current_steps": 6276, "total_steps": 7532, "loss": 0.24772626161575317, "lr": 1.4848019528113477e-06, "epoch": 1.6665781436728189, "percentage": 83.32, "elapsed_time": "22:40:40", "remaining_time": "4:32:18"} +{"current_steps": 6277, "total_steps": 7532, "loss": 0.22138851881027222, "lr": 1.4825005968604189e-06, "epoch": 1.6668437126543618, "percentage": 83.34, "elapsed_time": "22:40:53", "remaining_time": "4:32:05"} +{"current_steps": 6278, "total_steps": 7532, "loss": 0.24345465004444122, "lr": 1.4802008829829172e-06, "epoch": 1.6671092816359048, "percentage": 83.35, "elapsed_time": "22:41:06", "remaining_time": "4:31:52"} +{"current_steps": 6279, "total_steps": 7532, "loss": 0.22862716019153595, "lr": 1.477902811622205e-06, "epoch": 1.6673748506174477, "percentage": 83.36, "elapsed_time": "22:41:19", "remaining_time": "4:31:39"} +{"current_steps": 6280, "total_steps": 7532, "loss": 0.2763083577156067, "lr": 1.4756063832213207e-06, "epoch": 1.6676404195989907, "percentage": 83.38, "elapsed_time": "22:41:32", "remaining_time": "4:31:26"} +{"current_steps": 6281, "total_steps": 7532, "loss": 0.24631357192993164, "lr": 1.4733115982229885e-06, "epoch": 1.6679059885805336, "percentage": 83.39, "elapsed_time": "22:41:45", "remaining_time": "4:31:13"} +{"current_steps": 6282, "total_steps": 7532, "loss": 0.22650030255317688, "lr": 1.4710184570696184e-06, "epoch": 1.6681715575620768, "percentage": 83.4, "elapsed_time": "22:41:57", "remaining_time": "4:31:00"} +{"current_steps": 6283, "total_steps": 7532, "loss": 0.2455909103155136, "lr": 1.4687269602033006e-06, "epoch": 1.6684371265436198, "percentage": 83.42, "elapsed_time": "22:42:10", "remaining_time": "4:30:47"} +{"current_steps": 6284, "total_steps": 7532, "loss": 0.25625506043434143, "lr": 1.4664371080658079e-06, "epoch": 1.6687026955251627, "percentage": 83.43, "elapsed_time": "22:42:23", "remaining_time": "4:30:34"} +{"current_steps": 6285, "total_steps": 7532, "loss": 0.22178369760513306, "lr": 1.4641489010985954e-06, "epoch": 1.6689682645067057, "percentage": 83.44, "elapsed_time": "22:42:35", "remaining_time": "4:30:21"} +{"current_steps": 6286, "total_steps": 7532, "loss": 0.23936234414577484, "lr": 1.4618623397428055e-06, "epoch": 1.6692338334882486, "percentage": 83.46, "elapsed_time": "22:42:48", "remaining_time": "4:30:08"} +{"current_steps": 6287, "total_steps": 7532, "loss": 0.21629829704761505, "lr": 1.459577424439258e-06, "epoch": 1.6694994024697916, "percentage": 83.47, "elapsed_time": "22:43:01", "remaining_time": "4:29:55"} +{"current_steps": 6288, "total_steps": 7532, "loss": 0.238427072763443, "lr": 1.457294155628457e-06, "epoch": 1.6697649714513345, "percentage": 83.48, "elapsed_time": "22:43:14", "remaining_time": "4:29:42"} +{"current_steps": 6289, "total_steps": 7532, "loss": 0.23168250918388367, "lr": 1.4550125337505926e-06, "epoch": 1.6700305404328775, "percentage": 83.5, "elapsed_time": "22:43:27", "remaining_time": "4:29:28"} +{"current_steps": 6290, "total_steps": 7532, "loss": 0.25518402457237244, "lr": 1.45273255924553e-06, "epoch": 1.6702961094144204, "percentage": 83.51, "elapsed_time": "22:43:40", "remaining_time": "4:29:15"} +{"current_steps": 6291, "total_steps": 7532, "loss": 0.2488553822040558, "lr": 1.450454232552826e-06, "epoch": 1.6705616783959634, "percentage": 83.52, "elapsed_time": "22:43:53", "remaining_time": "4:29:02"} +{"current_steps": 6292, "total_steps": 7532, "loss": 0.2684085965156555, "lr": 1.448177554111716e-06, "epoch": 1.6708272473775063, "percentage": 83.54, "elapsed_time": "22:44:06", "remaining_time": "4:28:49"} +{"current_steps": 6293, "total_steps": 7532, "loss": 0.24627447128295898, "lr": 1.4459025243611124e-06, "epoch": 1.6710928163590493, "percentage": 83.55, "elapsed_time": "22:44:18", "remaining_time": "4:28:36"} +{"current_steps": 6294, "total_steps": 7532, "loss": 0.24725376069545746, "lr": 1.4436291437396156e-06, "epoch": 1.6713583853405922, "percentage": 83.56, "elapsed_time": "22:44:31", "remaining_time": "4:28:23"} +{"current_steps": 6295, "total_steps": 7532, "loss": 0.23488914966583252, "lr": 1.4413574126855067e-06, "epoch": 1.6716239543221352, "percentage": 83.58, "elapsed_time": "22:44:43", "remaining_time": "4:28:10"} +{"current_steps": 6296, "total_steps": 7532, "loss": 0.2031177133321762, "lr": 1.4390873316367492e-06, "epoch": 1.6718895233036781, "percentage": 83.59, "elapsed_time": "22:44:56", "remaining_time": "4:27:57"} +{"current_steps": 6297, "total_steps": 7532, "loss": 0.25378018617630005, "lr": 1.4368189010309874e-06, "epoch": 1.672155092285221, "percentage": 83.6, "elapsed_time": "22:45:09", "remaining_time": "4:27:44"} +{"current_steps": 6298, "total_steps": 7532, "loss": 0.21305282413959503, "lr": 1.434552121305548e-06, "epoch": 1.672420661266764, "percentage": 83.62, "elapsed_time": "22:45:22", "remaining_time": "4:27:31"} +{"current_steps": 6299, "total_steps": 7532, "loss": 0.20908987522125244, "lr": 1.432286992897437e-06, "epoch": 1.672686230248307, "percentage": 83.63, "elapsed_time": "22:45:34", "remaining_time": "4:27:18"} +{"current_steps": 6300, "total_steps": 7532, "loss": 0.21945340931415558, "lr": 1.4300235162433496e-06, "epoch": 1.67295179922985, "percentage": 83.64, "elapsed_time": "22:45:47", "remaining_time": "4:27:05"} +{"current_steps": 6301, "total_steps": 7532, "loss": 0.22096669673919678, "lr": 1.4277616917796544e-06, "epoch": 1.6732173682113929, "percentage": 83.66, "elapsed_time": "22:46:05", "remaining_time": "4:26:53"} +{"current_steps": 6302, "total_steps": 7532, "loss": 0.2233850657939911, "lr": 1.425501519942406e-06, "epoch": 1.6734829371929358, "percentage": 83.67, "elapsed_time": "22:46:18", "remaining_time": "4:26:40"} +{"current_steps": 6303, "total_steps": 7532, "loss": 0.21432995796203613, "lr": 1.423243001167337e-06, "epoch": 1.6737485061744788, "percentage": 83.68, "elapsed_time": "22:46:31", "remaining_time": "4:26:27"} +{"current_steps": 6304, "total_steps": 7532, "loss": 0.2649557590484619, "lr": 1.4209861358898636e-06, "epoch": 1.6740140751560217, "percentage": 83.7, "elapsed_time": "22:46:43", "remaining_time": "4:26:14"} +{"current_steps": 6305, "total_steps": 7532, "loss": 0.24918347597122192, "lr": 1.418730924545083e-06, "epoch": 1.6742796441375647, "percentage": 83.71, "elapsed_time": "22:46:56", "remaining_time": "4:26:01"} +{"current_steps": 6306, "total_steps": 7532, "loss": 0.24121029675006866, "lr": 1.4164773675677745e-06, "epoch": 1.6745452131191076, "percentage": 83.72, "elapsed_time": "22:47:08", "remaining_time": "4:25:47"} +{"current_steps": 6307, "total_steps": 7532, "loss": 0.24401789903640747, "lr": 1.4142254653923949e-06, "epoch": 1.6748107821006506, "percentage": 83.74, "elapsed_time": "22:47:21", "remaining_time": "4:25:34"} +{"current_steps": 6308, "total_steps": 7532, "loss": 0.2374853938817978, "lr": 1.4119752184530867e-06, "epoch": 1.6750763510821935, "percentage": 83.75, "elapsed_time": "22:47:33", "remaining_time": "4:25:21"} +{"current_steps": 6309, "total_steps": 7532, "loss": 0.2351088970899582, "lr": 1.4097266271836695e-06, "epoch": 1.6753419200637365, "percentage": 83.76, "elapsed_time": "22:47:47", "remaining_time": "4:25:08"} +{"current_steps": 6310, "total_steps": 7532, "loss": 0.19560754299163818, "lr": 1.407479692017647e-06, "epoch": 1.6756074890452797, "percentage": 83.78, "elapsed_time": "22:47:59", "remaining_time": "4:24:55"} +{"current_steps": 6311, "total_steps": 7532, "loss": 0.24124252796173096, "lr": 1.405234413388199e-06, "epoch": 1.6758730580268226, "percentage": 83.79, "elapsed_time": "22:48:12", "remaining_time": "4:24:42"} +{"current_steps": 6312, "total_steps": 7532, "loss": 0.2208215445280075, "lr": 1.4029907917281903e-06, "epoch": 1.6761386270083656, "percentage": 83.8, "elapsed_time": "22:48:25", "remaining_time": "4:24:29"} +{"current_steps": 6313, "total_steps": 7532, "loss": 0.23888292908668518, "lr": 1.4007488274701653e-06, "epoch": 1.6764041959899085, "percentage": 83.82, "elapsed_time": "22:48:38", "remaining_time": "4:24:16"} +{"current_steps": 6314, "total_steps": 7532, "loss": 0.24079063534736633, "lr": 1.3985085210463479e-06, "epoch": 1.6766697649714515, "percentage": 83.83, "elapsed_time": "22:48:51", "remaining_time": "4:24:03"} +{"current_steps": 6315, "total_steps": 7532, "loss": 0.18975606560707092, "lr": 1.3962698728886414e-06, "epoch": 1.6769353339529944, "percentage": 83.84, "elapsed_time": "22:49:04", "remaining_time": "4:23:50"} +{"current_steps": 6316, "total_steps": 7532, "loss": 0.201214998960495, "lr": 1.3940328834286333e-06, "epoch": 1.6772009029345374, "percentage": 83.86, "elapsed_time": "22:49:17", "remaining_time": "4:23:37"} +{"current_steps": 6317, "total_steps": 7532, "loss": 0.20079322159290314, "lr": 1.3917975530975836e-06, "epoch": 1.6774664719160803, "percentage": 83.87, "elapsed_time": "22:49:31", "remaining_time": "4:23:24"} +{"current_steps": 6318, "total_steps": 7532, "loss": 0.23593586683273315, "lr": 1.3895638823264447e-06, "epoch": 1.6777320408976233, "percentage": 83.88, "elapsed_time": "22:49:43", "remaining_time": "4:23:11"} +{"current_steps": 6319, "total_steps": 7532, "loss": 0.26574259996414185, "lr": 1.3873318715458383e-06, "epoch": 1.6779976098791662, "percentage": 83.9, "elapsed_time": "22:49:56", "remaining_time": "4:22:58"} +{"current_steps": 6320, "total_steps": 7532, "loss": 0.20573323965072632, "lr": 1.3851015211860696e-06, "epoch": 1.6782631788607092, "percentage": 83.91, "elapsed_time": "22:50:10", "remaining_time": "4:22:45"} +{"current_steps": 6321, "total_steps": 7532, "loss": 0.25610506534576416, "lr": 1.3828728316771244e-06, "epoch": 1.6785287478422521, "percentage": 83.92, "elapsed_time": "22:50:22", "remaining_time": "4:22:32"} +{"current_steps": 6322, "total_steps": 7532, "loss": 0.2138693630695343, "lr": 1.380645803448668e-06, "epoch": 1.678794316823795, "percentage": 83.94, "elapsed_time": "22:50:35", "remaining_time": "4:22:19"} +{"current_steps": 6323, "total_steps": 7532, "loss": 0.21522866189479828, "lr": 1.3784204369300447e-06, "epoch": 1.679059885805338, "percentage": 83.95, "elapsed_time": "22:50:47", "remaining_time": "4:22:06"} +{"current_steps": 6324, "total_steps": 7532, "loss": 0.25622743368148804, "lr": 1.376196732550279e-06, "epoch": 1.679325454786881, "percentage": 83.96, "elapsed_time": "22:51:00", "remaining_time": "4:21:53"} +{"current_steps": 6325, "total_steps": 7532, "loss": 0.18025386333465576, "lr": 1.3739746907380757e-06, "epoch": 1.679591023768424, "percentage": 83.98, "elapsed_time": "22:51:13", "remaining_time": "4:21:40"} +{"current_steps": 6326, "total_steps": 7532, "loss": 0.18785078823566437, "lr": 1.3717543119218168e-06, "epoch": 1.679856592749967, "percentage": 83.99, "elapsed_time": "22:51:25", "remaining_time": "4:21:27"} +{"current_steps": 6327, "total_steps": 7532, "loss": 0.24682481586933136, "lr": 1.3695355965295653e-06, "epoch": 1.6801221617315099, "percentage": 84.0, "elapsed_time": "22:51:38", "remaining_time": "4:21:13"} +{"current_steps": 6328, "total_steps": 7532, "loss": 0.2193487137556076, "lr": 1.3673185449890647e-06, "epoch": 1.6803877307130528, "percentage": 84.01, "elapsed_time": "22:51:51", "remaining_time": "4:21:00"} +{"current_steps": 6329, "total_steps": 7532, "loss": 0.24963265657424927, "lr": 1.3651031577277351e-06, "epoch": 1.6806532996945958, "percentage": 84.03, "elapsed_time": "22:52:03", "remaining_time": "4:20:47"} +{"current_steps": 6330, "total_steps": 7532, "loss": 0.21473057568073273, "lr": 1.3628894351726785e-06, "epoch": 1.6809188686761387, "percentage": 84.04, "elapsed_time": "22:52:16", "remaining_time": "4:20:34"} +{"current_steps": 6331, "total_steps": 7532, "loss": 0.2539534866809845, "lr": 1.3606773777506731e-06, "epoch": 1.6811844376576817, "percentage": 84.05, "elapsed_time": "22:52:28", "remaining_time": "4:20:21"} +{"current_steps": 6332, "total_steps": 7532, "loss": 0.2671799659729004, "lr": 1.3584669858881771e-06, "epoch": 1.6814500066392246, "percentage": 84.07, "elapsed_time": "22:52:41", "remaining_time": "4:20:08"} +{"current_steps": 6333, "total_steps": 7532, "loss": 0.24291013181209564, "lr": 1.3562582600113295e-06, "epoch": 1.6817155756207676, "percentage": 84.08, "elapsed_time": "22:52:54", "remaining_time": "4:19:55"} +{"current_steps": 6334, "total_steps": 7532, "loss": 0.24249233305454254, "lr": 1.354051200545946e-06, "epoch": 1.6819811446023105, "percentage": 84.09, "elapsed_time": "22:53:06", "remaining_time": "4:19:42"} +{"current_steps": 6335, "total_steps": 7532, "loss": 0.21647261083126068, "lr": 1.351845807917519e-06, "epoch": 1.6822467135838535, "percentage": 84.11, "elapsed_time": "22:53:19", "remaining_time": "4:19:29"} +{"current_steps": 6336, "total_steps": 7532, "loss": 0.2348332703113556, "lr": 1.349642082551227e-06, "epoch": 1.6825122825653964, "percentage": 84.12, "elapsed_time": "22:53:31", "remaining_time": "4:19:16"} +{"current_steps": 6337, "total_steps": 7532, "loss": 0.22503259778022766, "lr": 1.34744002487192e-06, "epoch": 1.6827778515469394, "percentage": 84.13, "elapsed_time": "22:53:44", "remaining_time": "4:19:03"} +{"current_steps": 6338, "total_steps": 7532, "loss": 0.2397763580083847, "lr": 1.3452396353041286e-06, "epoch": 1.6830434205284823, "percentage": 84.15, "elapsed_time": "22:53:57", "remaining_time": "4:18:50"} +{"current_steps": 6339, "total_steps": 7532, "loss": 0.23345956206321716, "lr": 1.3430409142720624e-06, "epoch": 1.6833089895100253, "percentage": 84.16, "elapsed_time": "22:54:10", "remaining_time": "4:18:37"} +{"current_steps": 6340, "total_steps": 7532, "loss": 0.19660598039627075, "lr": 1.3408438621996088e-06, "epoch": 1.6835745584915682, "percentage": 84.17, "elapsed_time": "22:54:22", "remaining_time": "4:18:24"} +{"current_steps": 6341, "total_steps": 7532, "loss": 0.19148695468902588, "lr": 1.3386484795103327e-06, "epoch": 1.6838401274731112, "percentage": 84.19, "elapsed_time": "22:54:36", "remaining_time": "4:18:11"} +{"current_steps": 6342, "total_steps": 7532, "loss": 0.2078169733285904, "lr": 1.3364547666274819e-06, "epoch": 1.6841056964546541, "percentage": 84.2, "elapsed_time": "22:54:48", "remaining_time": "4:17:58"} +{"current_steps": 6343, "total_steps": 7532, "loss": 0.23122575879096985, "lr": 1.3342627239739715e-06, "epoch": 1.684371265436197, "percentage": 84.21, "elapsed_time": "22:55:01", "remaining_time": "4:17:44"} +{"current_steps": 6344, "total_steps": 7532, "loss": 0.2744083106517792, "lr": 1.3320723519724032e-06, "epoch": 1.68463683441774, "percentage": 84.23, "elapsed_time": "22:55:14", "remaining_time": "4:17:31"} +{"current_steps": 6345, "total_steps": 7532, "loss": 0.26361098885536194, "lr": 1.3298836510450597e-06, "epoch": 1.684902403399283, "percentage": 84.24, "elapsed_time": "22:55:27", "remaining_time": "4:17:18"} +{"current_steps": 6346, "total_steps": 7532, "loss": 0.21833205223083496, "lr": 1.3276966216138932e-06, "epoch": 1.685167972380826, "percentage": 84.25, "elapsed_time": "22:55:40", "remaining_time": "4:17:05"} +{"current_steps": 6347, "total_steps": 7532, "loss": 0.22075100243091583, "lr": 1.3255112641005374e-06, "epoch": 1.685433541362369, "percentage": 84.27, "elapsed_time": "22:55:53", "remaining_time": "4:16:52"} +{"current_steps": 6348, "total_steps": 7532, "loss": 0.24352343380451202, "lr": 1.3233275789263034e-06, "epoch": 1.6856991103439118, "percentage": 84.28, "elapsed_time": "22:56:05", "remaining_time": "4:16:39"} +{"current_steps": 6349, "total_steps": 7532, "loss": 0.2331303060054779, "lr": 1.3211455665121808e-06, "epoch": 1.6859646793254548, "percentage": 84.29, "elapsed_time": "22:56:19", "remaining_time": "4:16:26"} +{"current_steps": 6350, "total_steps": 7532, "loss": 0.2511689066886902, "lr": 1.3189652272788356e-06, "epoch": 1.6862302483069977, "percentage": 84.31, "elapsed_time": "22:56:31", "remaining_time": "4:16:13"} +{"current_steps": 6351, "total_steps": 7532, "loss": 0.18535873293876648, "lr": 1.3167865616466113e-06, "epoch": 1.6864958172885407, "percentage": 84.32, "elapsed_time": "22:56:44", "remaining_time": "4:16:00"} +{"current_steps": 6352, "total_steps": 7532, "loss": 0.23924914002418518, "lr": 1.3146095700355289e-06, "epoch": 1.6867613862700837, "percentage": 84.33, "elapsed_time": "22:56:57", "remaining_time": "4:15:47"} +{"current_steps": 6353, "total_steps": 7532, "loss": 0.19710025191307068, "lr": 1.3124342528652845e-06, "epoch": 1.6870269552516266, "percentage": 84.35, "elapsed_time": "22:57:10", "remaining_time": "4:15:34"} +{"current_steps": 6354, "total_steps": 7532, "loss": 0.21439281105995178, "lr": 1.3102606105552585e-06, "epoch": 1.6872925242331696, "percentage": 84.36, "elapsed_time": "22:57:23", "remaining_time": "4:15:21"} +{"current_steps": 6355, "total_steps": 7532, "loss": 0.2647722363471985, "lr": 1.3080886435245e-06, "epoch": 1.6875580932147125, "percentage": 84.37, "elapsed_time": "22:57:35", "remaining_time": "4:15:08"} +{"current_steps": 6356, "total_steps": 7532, "loss": 0.2202019840478897, "lr": 1.3059183521917396e-06, "epoch": 1.6878236621962555, "percentage": 84.39, "elapsed_time": "22:57:48", "remaining_time": "4:14:55"} +{"current_steps": 6357, "total_steps": 7532, "loss": 0.25833001732826233, "lr": 1.3037497369753871e-06, "epoch": 1.6880892311777984, "percentage": 84.4, "elapsed_time": "22:58:01", "remaining_time": "4:14:42"} +{"current_steps": 6358, "total_steps": 7532, "loss": 0.19984321296215057, "lr": 1.3015827982935192e-06, "epoch": 1.6883548001593414, "percentage": 84.41, "elapsed_time": "22:58:14", "remaining_time": "4:14:29"} +{"current_steps": 6359, "total_steps": 7532, "loss": 0.2190552055835724, "lr": 1.2994175365638996e-06, "epoch": 1.6886203691408843, "percentage": 84.43, "elapsed_time": "22:58:27", "remaining_time": "4:14:16"} +{"current_steps": 6360, "total_steps": 7532, "loss": 0.26262593269348145, "lr": 1.2972539522039652e-06, "epoch": 1.6888859381224273, "percentage": 84.44, "elapsed_time": "22:58:40", "remaining_time": "4:14:03"} +{"current_steps": 6361, "total_steps": 7532, "loss": 0.2665651738643646, "lr": 1.2950920456308292e-06, "epoch": 1.6891515071039702, "percentage": 84.45, "elapsed_time": "22:58:52", "remaining_time": "4:13:50"} +{"current_steps": 6362, "total_steps": 7532, "loss": 0.22369208931922913, "lr": 1.2929318172612803e-06, "epoch": 1.6894170760855132, "percentage": 84.47, "elapsed_time": "22:59:05", "remaining_time": "4:13:37"} +{"current_steps": 6363, "total_steps": 7532, "loss": 0.21063543856143951, "lr": 1.2907732675117878e-06, "epoch": 1.6896826450670561, "percentage": 84.48, "elapsed_time": "22:59:18", "remaining_time": "4:13:24"} +{"current_steps": 6364, "total_steps": 7532, "loss": 0.2303045690059662, "lr": 1.2886163967984944e-06, "epoch": 1.689948214048599, "percentage": 84.49, "elapsed_time": "22:59:30", "remaining_time": "4:13:11"} +{"current_steps": 6365, "total_steps": 7532, "loss": 0.20185884833335876, "lr": 1.2864612055372182e-06, "epoch": 1.690213783030142, "percentage": 84.51, "elapsed_time": "22:59:43", "remaining_time": "4:12:58"} +{"current_steps": 6366, "total_steps": 7532, "loss": 0.22900527715682983, "lr": 1.284307694143455e-06, "epoch": 1.690479352011685, "percentage": 84.52, "elapsed_time": "22:59:56", "remaining_time": "4:12:45"} +{"current_steps": 6367, "total_steps": 7532, "loss": 0.21405862271785736, "lr": 1.282155863032377e-06, "epoch": 1.690744920993228, "percentage": 84.53, "elapsed_time": "23:00:09", "remaining_time": "4:12:32"} +{"current_steps": 6368, "total_steps": 7532, "loss": 0.26143258810043335, "lr": 1.2800057126188304e-06, "epoch": 1.6910104899747709, "percentage": 84.55, "elapsed_time": "23:00:22", "remaining_time": "4:12:18"} +{"current_steps": 6369, "total_steps": 7532, "loss": 0.24437926709651947, "lr": 1.2778572433173397e-06, "epoch": 1.6912760589563138, "percentage": 84.56, "elapsed_time": "23:00:35", "remaining_time": "4:12:05"} +{"current_steps": 6370, "total_steps": 7532, "loss": 0.24862337112426758, "lr": 1.275710455542104e-06, "epoch": 1.6915416279378568, "percentage": 84.57, "elapsed_time": "23:00:47", "remaining_time": "4:11:52"} +{"current_steps": 6371, "total_steps": 7532, "loss": 0.2146604359149933, "lr": 1.2735653497069978e-06, "epoch": 1.6918071969193997, "percentage": 84.59, "elapsed_time": "23:01:00", "remaining_time": "4:11:39"} +{"current_steps": 6372, "total_steps": 7532, "loss": 0.2525256872177124, "lr": 1.2714219262255777e-06, "epoch": 1.6920727659009427, "percentage": 84.6, "elapsed_time": "23:01:13", "remaining_time": "4:11:26"} +{"current_steps": 6373, "total_steps": 7532, "loss": 0.23462912440299988, "lr": 1.2692801855110638e-06, "epoch": 1.6923383348824856, "percentage": 84.61, "elapsed_time": "23:01:25", "remaining_time": "4:11:13"} +{"current_steps": 6374, "total_steps": 7532, "loss": 0.21551170945167542, "lr": 1.2671401279763595e-06, "epoch": 1.6926039038640286, "percentage": 84.63, "elapsed_time": "23:01:38", "remaining_time": "4:11:00"} +{"current_steps": 6375, "total_steps": 7532, "loss": 0.24094407260417938, "lr": 1.2650017540340454e-06, "epoch": 1.6928694728455715, "percentage": 84.64, "elapsed_time": "23:01:51", "remaining_time": "4:10:47"} +{"current_steps": 6376, "total_steps": 7532, "loss": 0.23101133108139038, "lr": 1.2628650640963736e-06, "epoch": 1.6931350418271145, "percentage": 84.65, "elapsed_time": "23:02:04", "remaining_time": "4:10:34"} +{"current_steps": 6377, "total_steps": 7532, "loss": 0.2513899803161621, "lr": 1.2607300585752724e-06, "epoch": 1.6934006108086574, "percentage": 84.67, "elapsed_time": "23:02:16", "remaining_time": "4:10:21"} +{"current_steps": 6378, "total_steps": 7532, "loss": 0.2490600198507309, "lr": 1.258596737882345e-06, "epoch": 1.6936661797902004, "percentage": 84.68, "elapsed_time": "23:02:30", "remaining_time": "4:10:08"} +{"current_steps": 6379, "total_steps": 7532, "loss": 0.25767675042152405, "lr": 1.256465102428872e-06, "epoch": 1.6939317487717434, "percentage": 84.69, "elapsed_time": "23:02:42", "remaining_time": "4:09:55"} +{"current_steps": 6380, "total_steps": 7532, "loss": 0.2231348305940628, "lr": 1.254335152625804e-06, "epoch": 1.6941973177532863, "percentage": 84.71, "elapsed_time": "23:02:55", "remaining_time": "4:09:42"} +{"current_steps": 6381, "total_steps": 7532, "loss": 0.25873979926109314, "lr": 1.2522068888837758e-06, "epoch": 1.6944628867348293, "percentage": 84.72, "elapsed_time": "23:03:08", "remaining_time": "4:09:29"} +{"current_steps": 6382, "total_steps": 7532, "loss": 0.2848423421382904, "lr": 1.2500803116130887e-06, "epoch": 1.6947284557163722, "percentage": 84.73, "elapsed_time": "23:03:21", "remaining_time": "4:09:16"} +{"current_steps": 6383, "total_steps": 7532, "loss": 0.21343804895877838, "lr": 1.247955421223721e-06, "epoch": 1.6949940246979152, "percentage": 84.75, "elapsed_time": "23:03:33", "remaining_time": "4:09:03"} +{"current_steps": 6384, "total_steps": 7532, "loss": 0.23080062866210938, "lr": 1.245832218125328e-06, "epoch": 1.695259593679458, "percentage": 84.76, "elapsed_time": "23:03:47", "remaining_time": "4:08:50"} +{"current_steps": 6385, "total_steps": 7532, "loss": 0.2397225797176361, "lr": 1.2437107027272376e-06, "epoch": 1.695525162661001, "percentage": 84.77, "elapsed_time": "23:03:59", "remaining_time": "4:08:37"} +{"current_steps": 6386, "total_steps": 7532, "loss": 0.22798654437065125, "lr": 1.2415908754384532e-06, "epoch": 1.695790731642544, "percentage": 84.78, "elapsed_time": "23:04:12", "remaining_time": "4:08:24"} +{"current_steps": 6387, "total_steps": 7532, "loss": 0.2534061074256897, "lr": 1.2394727366676518e-06, "epoch": 1.696056300624087, "percentage": 84.8, "elapsed_time": "23:04:25", "remaining_time": "4:08:11"} +{"current_steps": 6388, "total_steps": 7532, "loss": 0.2127036452293396, "lr": 1.2373562868231858e-06, "epoch": 1.69632186960563, "percentage": 84.81, "elapsed_time": "23:04:38", "remaining_time": "4:07:58"} +{"current_steps": 6389, "total_steps": 7532, "loss": 0.22341205179691315, "lr": 1.2352415263130813e-06, "epoch": 1.6965874385871729, "percentage": 84.82, "elapsed_time": "23:04:51", "remaining_time": "4:07:45"} +{"current_steps": 6390, "total_steps": 7532, "loss": 0.2435426563024521, "lr": 1.2331284555450406e-06, "epoch": 1.6968530075687158, "percentage": 84.84, "elapsed_time": "23:05:04", "remaining_time": "4:07:32"} +{"current_steps": 6391, "total_steps": 7532, "loss": 0.24652531743049622, "lr": 1.2310170749264383e-06, "epoch": 1.6971185765502588, "percentage": 84.85, "elapsed_time": "23:05:17", "remaining_time": "4:07:19"} +{"current_steps": 6392, "total_steps": 7532, "loss": 0.24172671139240265, "lr": 1.228907384864323e-06, "epoch": 1.6973841455318017, "percentage": 84.86, "elapsed_time": "23:05:30", "remaining_time": "4:07:06"} +{"current_steps": 6393, "total_steps": 7532, "loss": 0.21534420549869537, "lr": 1.2267993857654182e-06, "epoch": 1.6976497145133447, "percentage": 84.88, "elapsed_time": "23:05:43", "remaining_time": "4:06:53"} +{"current_steps": 6394, "total_steps": 7532, "loss": 0.2617778182029724, "lr": 1.2246930780361221e-06, "epoch": 1.6979152834948879, "percentage": 84.89, "elapsed_time": "23:05:56", "remaining_time": "4:06:40"} +{"current_steps": 6395, "total_steps": 7532, "loss": 0.20388583838939667, "lr": 1.2225884620825046e-06, "epoch": 1.6981808524764308, "percentage": 84.9, "elapsed_time": "23:06:09", "remaining_time": "4:06:27"} +{"current_steps": 6396, "total_steps": 7532, "loss": 0.23714327812194824, "lr": 1.220485538310312e-06, "epoch": 1.6984464214579738, "percentage": 84.92, "elapsed_time": "23:06:21", "remaining_time": "4:06:14"} +{"current_steps": 6397, "total_steps": 7532, "loss": 0.2495463341474533, "lr": 1.2183843071249634e-06, "epoch": 1.6987119904395167, "percentage": 84.93, "elapsed_time": "23:06:34", "remaining_time": "4:06:00"} +{"current_steps": 6398, "total_steps": 7532, "loss": 0.2419012188911438, "lr": 1.2162847689315483e-06, "epoch": 1.6989775594210597, "percentage": 84.94, "elapsed_time": "23:06:47", "remaining_time": "4:05:47"} +{"current_steps": 6399, "total_steps": 7532, "loss": 0.23392438888549805, "lr": 1.214186924134838e-06, "epoch": 1.6992431284026026, "percentage": 84.96, "elapsed_time": "23:07:00", "remaining_time": "4:05:34"} +{"current_steps": 6400, "total_steps": 7532, "loss": 0.22855526208877563, "lr": 1.2120907731392695e-06, "epoch": 1.6995086973841456, "percentage": 84.97, "elapsed_time": "23:07:12", "remaining_time": "4:05:21"} +{"current_steps": 6401, "total_steps": 7532, "loss": 0.22393949329853058, "lr": 1.2099963163489558e-06, "epoch": 1.6997742663656885, "percentage": 84.98, "elapsed_time": "23:07:32", "remaining_time": "4:05:09"} +{"current_steps": 6402, "total_steps": 7532, "loss": 0.2539960741996765, "lr": 1.2079035541676832e-06, "epoch": 1.7000398353472315, "percentage": 85.0, "elapsed_time": "23:07:44", "remaining_time": "4:04:56"} +{"current_steps": 6403, "total_steps": 7532, "loss": 0.23716852068901062, "lr": 1.2058124869989129e-06, "epoch": 1.7003054043287744, "percentage": 85.01, "elapsed_time": "23:07:57", "remaining_time": "4:04:43"} +{"current_steps": 6404, "total_steps": 7532, "loss": 0.24658545851707458, "lr": 1.2037231152457773e-06, "epoch": 1.7005709733103174, "percentage": 85.02, "elapsed_time": "23:08:10", "remaining_time": "4:04:30"} +{"current_steps": 6405, "total_steps": 7532, "loss": 0.2316630333662033, "lr": 1.201635439311083e-06, "epoch": 1.7008365422918603, "percentage": 85.04, "elapsed_time": "23:08:23", "remaining_time": "4:04:17"} +{"current_steps": 6406, "total_steps": 7532, "loss": 0.20434345304965973, "lr": 1.1995494595973089e-06, "epoch": 1.7011021112734033, "percentage": 85.05, "elapsed_time": "23:08:37", "remaining_time": "4:04:04"} +{"current_steps": 6407, "total_steps": 7532, "loss": 0.2585931420326233, "lr": 1.197465176506607e-06, "epoch": 1.7013676802549462, "percentage": 85.06, "elapsed_time": "23:08:50", "remaining_time": "4:03:51"} +{"current_steps": 6408, "total_steps": 7532, "loss": 0.23007069528102875, "lr": 1.1953825904408033e-06, "epoch": 1.7016332492364892, "percentage": 85.08, "elapsed_time": "23:09:03", "remaining_time": "4:03:38"} +{"current_steps": 6409, "total_steps": 7532, "loss": 0.21822810173034668, "lr": 1.1933017018013948e-06, "epoch": 1.7018988182180321, "percentage": 85.09, "elapsed_time": "23:09:16", "remaining_time": "4:03:25"} +{"current_steps": 6410, "total_steps": 7532, "loss": 0.241228848695755, "lr": 1.1912225109895526e-06, "epoch": 1.702164387199575, "percentage": 85.1, "elapsed_time": "23:09:30", "remaining_time": "4:03:13"} +{"current_steps": 6411, "total_steps": 7532, "loss": 0.28803908824920654, "lr": 1.1891450184061203e-06, "epoch": 1.702429956181118, "percentage": 85.12, "elapsed_time": "23:09:43", "remaining_time": "4:03:00"} +{"current_steps": 6412, "total_steps": 7532, "loss": 0.2387516349554062, "lr": 1.1870692244516147e-06, "epoch": 1.702695525162661, "percentage": 85.13, "elapsed_time": "23:09:56", "remaining_time": "4:02:47"} +{"current_steps": 6413, "total_steps": 7532, "loss": 0.19774140417575836, "lr": 1.1849951295262242e-06, "epoch": 1.702961094144204, "percentage": 85.14, "elapsed_time": "23:10:09", "remaining_time": "4:02:34"} +{"current_steps": 6414, "total_steps": 7532, "loss": 0.22842247784137726, "lr": 1.1829227340298088e-06, "epoch": 1.7032266631257469, "percentage": 85.16, "elapsed_time": "23:10:22", "remaining_time": "4:02:21"} +{"current_steps": 6415, "total_steps": 7532, "loss": 0.21994739770889282, "lr": 1.1808520383619015e-06, "epoch": 1.7034922321072898, "percentage": 85.17, "elapsed_time": "23:10:35", "remaining_time": "4:02:08"} +{"current_steps": 6416, "total_steps": 7532, "loss": 0.22328051924705505, "lr": 1.1787830429217084e-06, "epoch": 1.7037578010888328, "percentage": 85.18, "elapsed_time": "23:10:48", "remaining_time": "4:01:55"} +{"current_steps": 6417, "total_steps": 7532, "loss": 0.26704326272010803, "lr": 1.1767157481081092e-06, "epoch": 1.7040233700703757, "percentage": 85.2, "elapsed_time": "23:11:01", "remaining_time": "4:01:42"} +{"current_steps": 6418, "total_steps": 7532, "loss": 0.2148481160402298, "lr": 1.174650154319653e-06, "epoch": 1.7042889390519187, "percentage": 85.21, "elapsed_time": "23:11:14", "remaining_time": "4:01:29"} +{"current_steps": 6419, "total_steps": 7532, "loss": 0.21731218695640564, "lr": 1.1725862619545625e-06, "epoch": 1.7045545080334616, "percentage": 85.22, "elapsed_time": "23:11:28", "remaining_time": "4:01:16"} +{"current_steps": 6420, "total_steps": 7532, "loss": 0.20832043886184692, "lr": 1.1705240714107301e-06, "epoch": 1.7048200770150046, "percentage": 85.24, "elapsed_time": "23:11:41", "remaining_time": "4:01:03"} +{"current_steps": 6421, "total_steps": 7532, "loss": 0.21739046275615692, "lr": 1.1684635830857249e-06, "epoch": 1.7050856459965475, "percentage": 85.25, "elapsed_time": "23:11:54", "remaining_time": "4:00:50"} +{"current_steps": 6422, "total_steps": 7532, "loss": 0.23972246050834656, "lr": 1.1664047973767811e-06, "epoch": 1.7053512149780907, "percentage": 85.26, "elapsed_time": "23:12:08", "remaining_time": "4:00:37"} +{"current_steps": 6423, "total_steps": 7532, "loss": 0.2471289187669754, "lr": 1.1643477146808092e-06, "epoch": 1.7056167839596337, "percentage": 85.28, "elapsed_time": "23:12:21", "remaining_time": "4:00:24"} +{"current_steps": 6424, "total_steps": 7532, "loss": 0.2014283537864685, "lr": 1.1622923353943916e-06, "epoch": 1.7058823529411766, "percentage": 85.29, "elapsed_time": "23:12:34", "remaining_time": "4:00:11"} +{"current_steps": 6425, "total_steps": 7532, "loss": 0.21680915355682373, "lr": 1.1602386599137782e-06, "epoch": 1.7061479219227196, "percentage": 85.3, "elapsed_time": "23:12:47", "remaining_time": "3:59:58"} +{"current_steps": 6426, "total_steps": 7532, "loss": 0.2101205736398697, "lr": 1.158186688634898e-06, "epoch": 1.7064134909042625, "percentage": 85.32, "elapsed_time": "23:13:00", "remaining_time": "3:59:45"} +{"current_steps": 6427, "total_steps": 7532, "loss": 0.22114071249961853, "lr": 1.1561364219533444e-06, "epoch": 1.7066790598858055, "percentage": 85.33, "elapsed_time": "23:13:13", "remaining_time": "3:59:32"} +{"current_steps": 6428, "total_steps": 7532, "loss": 0.20608706772327423, "lr": 1.1540878602643858e-06, "epoch": 1.7069446288673484, "percentage": 85.34, "elapsed_time": "23:13:26", "remaining_time": "3:59:19"} +{"current_steps": 6429, "total_steps": 7532, "loss": 0.2247905433177948, "lr": 1.1520410039629593e-06, "epoch": 1.7072101978488914, "percentage": 85.36, "elapsed_time": "23:13:39", "remaining_time": "3:59:06"} +{"current_steps": 6430, "total_steps": 7532, "loss": 0.22623226046562195, "lr": 1.1499958534436751e-06, "epoch": 1.7074757668304343, "percentage": 85.37, "elapsed_time": "23:13:52", "remaining_time": "3:58:53"} +{"current_steps": 6431, "total_steps": 7532, "loss": 0.2063906192779541, "lr": 1.1479524091008142e-06, "epoch": 1.7077413358119773, "percentage": 85.38, "elapsed_time": "23:14:05", "remaining_time": "3:58:40"} +{"current_steps": 6432, "total_steps": 7532, "loss": 0.2787795960903168, "lr": 1.1459106713283286e-06, "epoch": 1.7080069047935202, "percentage": 85.4, "elapsed_time": "23:14:18", "remaining_time": "3:58:27"} +{"current_steps": 6433, "total_steps": 7532, "loss": 0.23090440034866333, "lr": 1.1438706405198419e-06, "epoch": 1.7082724737750632, "percentage": 85.41, "elapsed_time": "23:14:30", "remaining_time": "3:58:14"} +{"current_steps": 6434, "total_steps": 7532, "loss": 0.23690670728683472, "lr": 1.141832317068645e-06, "epoch": 1.7085380427566061, "percentage": 85.42, "elapsed_time": "23:14:43", "remaining_time": "3:58:01"} +{"current_steps": 6435, "total_steps": 7532, "loss": 0.209202378988266, "lr": 1.1397957013677064e-06, "epoch": 1.708803611738149, "percentage": 85.44, "elapsed_time": "23:14:56", "remaining_time": "3:57:48"} +{"current_steps": 6436, "total_steps": 7532, "loss": 0.22541575133800507, "lr": 1.1377607938096635e-06, "epoch": 1.709069180719692, "percentage": 85.45, "elapsed_time": "23:15:09", "remaining_time": "3:57:35"} +{"current_steps": 6437, "total_steps": 7532, "loss": 0.2460884153842926, "lr": 1.1357275947868162e-06, "epoch": 1.709334749701235, "percentage": 85.46, "elapsed_time": "23:15:22", "remaining_time": "3:57:21"} +{"current_steps": 6438, "total_steps": 7532, "loss": 0.21967202425003052, "lr": 1.1336961046911443e-06, "epoch": 1.709600318682778, "percentage": 85.48, "elapsed_time": "23:15:35", "remaining_time": "3:57:09"} +{"current_steps": 6439, "total_steps": 7532, "loss": 0.23619329929351807, "lr": 1.1316663239142954e-06, "epoch": 1.709865887664321, "percentage": 85.49, "elapsed_time": "23:15:47", "remaining_time": "3:56:55"} +{"current_steps": 6440, "total_steps": 7532, "loss": 0.24563436210155487, "lr": 1.129638252847587e-06, "epoch": 1.7101314566458639, "percentage": 85.5, "elapsed_time": "23:16:00", "remaining_time": "3:56:42"} +{"current_steps": 6441, "total_steps": 7532, "loss": 0.25508859753608704, "lr": 1.1276118918820068e-06, "epoch": 1.7103970256274068, "percentage": 85.52, "elapsed_time": "23:16:13", "remaining_time": "3:56:29"} +{"current_steps": 6442, "total_steps": 7532, "loss": 0.24761545658111572, "lr": 1.1255872414082136e-06, "epoch": 1.7106625946089498, "percentage": 85.53, "elapsed_time": "23:16:26", "remaining_time": "3:56:16"} +{"current_steps": 6443, "total_steps": 7532, "loss": 0.2355962097644806, "lr": 1.1235643018165344e-06, "epoch": 1.7109281635904927, "percentage": 85.54, "elapsed_time": "23:16:39", "remaining_time": "3:56:03"} +{"current_steps": 6444, "total_steps": 7532, "loss": 0.2534273862838745, "lr": 1.1215430734969723e-06, "epoch": 1.7111937325720357, "percentage": 85.55, "elapsed_time": "23:16:52", "remaining_time": "3:55:50"} +{"current_steps": 6445, "total_steps": 7532, "loss": 0.2756424844264984, "lr": 1.1195235568391938e-06, "epoch": 1.7114593015535786, "percentage": 85.57, "elapsed_time": "23:17:05", "remaining_time": "3:55:37"} +{"current_steps": 6446, "total_steps": 7532, "loss": 0.2198309451341629, "lr": 1.1175057522325383e-06, "epoch": 1.7117248705351216, "percentage": 85.58, "elapsed_time": "23:17:18", "remaining_time": "3:55:24"} +{"current_steps": 6447, "total_steps": 7532, "loss": 0.21767666935920715, "lr": 1.1154896600660136e-06, "epoch": 1.7119904395166645, "percentage": 85.59, "elapsed_time": "23:17:31", "remaining_time": "3:55:11"} +{"current_steps": 6448, "total_steps": 7532, "loss": 0.2679128348827362, "lr": 1.1134752807283e-06, "epoch": 1.7122560084982075, "percentage": 85.61, "elapsed_time": "23:17:43", "remaining_time": "3:54:58"} +{"current_steps": 6449, "total_steps": 7532, "loss": 0.2268792986869812, "lr": 1.1114626146077457e-06, "epoch": 1.7125215774797504, "percentage": 85.62, "elapsed_time": "23:17:57", "remaining_time": "3:54:45"} +{"current_steps": 6450, "total_steps": 7532, "loss": 0.21585378050804138, "lr": 1.109451662092369e-06, "epoch": 1.7127871464612934, "percentage": 85.63, "elapsed_time": "23:18:09", "remaining_time": "3:54:32"} +{"current_steps": 6451, "total_steps": 7532, "loss": 0.2258647382259369, "lr": 1.1074424235698567e-06, "epoch": 1.7130527154428363, "percentage": 85.65, "elapsed_time": "23:18:22", "remaining_time": "3:54:19"} +{"current_steps": 6452, "total_steps": 7532, "loss": 0.2456682175397873, "lr": 1.1054348994275677e-06, "epoch": 1.7133182844243793, "percentage": 85.66, "elapsed_time": "23:18:35", "remaining_time": "3:54:06"} +{"current_steps": 6453, "total_steps": 7532, "loss": 0.22897745668888092, "lr": 1.1034290900525279e-06, "epoch": 1.7135838534059222, "percentage": 85.67, "elapsed_time": "23:18:48", "remaining_time": "3:53:53"} +{"current_steps": 6454, "total_steps": 7532, "loss": 0.1910650134086609, "lr": 1.101424995831435e-06, "epoch": 1.7138494223874652, "percentage": 85.69, "elapsed_time": "23:19:01", "remaining_time": "3:53:40"} +{"current_steps": 6455, "total_steps": 7532, "loss": 0.2519158720970154, "lr": 1.0994226171506529e-06, "epoch": 1.7141149913690081, "percentage": 85.7, "elapsed_time": "23:19:13", "remaining_time": "3:53:27"} +{"current_steps": 6456, "total_steps": 7532, "loss": 0.24191951751708984, "lr": 1.0974219543962184e-06, "epoch": 1.714380560350551, "percentage": 85.71, "elapsed_time": "23:19:27", "remaining_time": "3:53:14"} +{"current_steps": 6457, "total_steps": 7532, "loss": 0.2560814619064331, "lr": 1.0954230079538352e-06, "epoch": 1.714646129332094, "percentage": 85.73, "elapsed_time": "23:19:40", "remaining_time": "3:53:01"} +{"current_steps": 6458, "total_steps": 7532, "loss": 0.22969035804271698, "lr": 1.0934257782088763e-06, "epoch": 1.714911698313637, "percentage": 85.74, "elapsed_time": "23:19:53", "remaining_time": "3:52:48"} +{"current_steps": 6459, "total_steps": 7532, "loss": 0.26114046573638916, "lr": 1.0914302655463837e-06, "epoch": 1.71517726729518, "percentage": 85.75, "elapsed_time": "23:20:06", "remaining_time": "3:52:35"} +{"current_steps": 6460, "total_steps": 7532, "loss": 0.21457752585411072, "lr": 1.0894364703510685e-06, "epoch": 1.715442836276723, "percentage": 85.77, "elapsed_time": "23:20:19", "remaining_time": "3:52:22"} +{"current_steps": 6461, "total_steps": 7532, "loss": 0.19998760521411896, "lr": 1.0874443930073098e-06, "epoch": 1.7157084052582658, "percentage": 85.78, "elapsed_time": "23:20:32", "remaining_time": "3:52:09"} +{"current_steps": 6462, "total_steps": 7532, "loss": 0.2379671037197113, "lr": 1.0854540338991615e-06, "epoch": 1.7159739742398088, "percentage": 85.79, "elapsed_time": "23:20:46", "remaining_time": "3:51:56"} +{"current_steps": 6463, "total_steps": 7532, "loss": 0.2236609309911728, "lr": 1.0834653934103367e-06, "epoch": 1.7162395432213517, "percentage": 85.81, "elapsed_time": "23:20:58", "remaining_time": "3:51:43"} +{"current_steps": 6464, "total_steps": 7532, "loss": 0.22507379949092865, "lr": 1.0814784719242234e-06, "epoch": 1.7165051122028947, "percentage": 85.82, "elapsed_time": "23:21:11", "remaining_time": "3:51:30"} +{"current_steps": 6465, "total_steps": 7532, "loss": 0.22138816118240356, "lr": 1.079493269823877e-06, "epoch": 1.7167706811844377, "percentage": 85.83, "elapsed_time": "23:21:24", "remaining_time": "3:51:17"} +{"current_steps": 6466, "total_steps": 7532, "loss": 0.227338969707489, "lr": 1.0775097874920204e-06, "epoch": 1.7170362501659806, "percentage": 85.85, "elapsed_time": "23:21:37", "remaining_time": "3:51:04"} +{"current_steps": 6467, "total_steps": 7532, "loss": 0.23694375157356262, "lr": 1.0755280253110466e-06, "epoch": 1.7173018191475236, "percentage": 85.86, "elapsed_time": "23:21:50", "remaining_time": "3:50:51"} +{"current_steps": 6468, "total_steps": 7532, "loss": 0.26219409704208374, "lr": 1.0735479836630136e-06, "epoch": 1.7175673881290665, "percentage": 85.87, "elapsed_time": "23:22:03", "remaining_time": "3:50:38"} +{"current_steps": 6469, "total_steps": 7532, "loss": 0.22215887904167175, "lr": 1.0715696629296524e-06, "epoch": 1.7178329571106095, "percentage": 85.89, "elapsed_time": "23:22:16", "remaining_time": "3:50:25"} +{"current_steps": 6470, "total_steps": 7532, "loss": 0.25434768199920654, "lr": 1.0695930634923602e-06, "epoch": 1.7180985260921524, "percentage": 85.9, "elapsed_time": "23:22:29", "remaining_time": "3:50:12"} +{"current_steps": 6471, "total_steps": 7532, "loss": 0.2092076987028122, "lr": 1.0676181857321998e-06, "epoch": 1.7183640950736954, "percentage": 85.91, "elapsed_time": "23:22:42", "remaining_time": "3:49:59"} +{"current_steps": 6472, "total_steps": 7532, "loss": 0.2710237503051758, "lr": 1.0656450300299048e-06, "epoch": 1.7186296640552383, "percentage": 85.93, "elapsed_time": "23:22:54", "remaining_time": "3:49:46"} +{"current_steps": 6473, "total_steps": 7532, "loss": 0.2533886432647705, "lr": 1.0636735967658785e-06, "epoch": 1.7188952330367813, "percentage": 85.94, "elapsed_time": "23:23:07", "remaining_time": "3:49:33"} +{"current_steps": 6474, "total_steps": 7532, "loss": 0.2545754909515381, "lr": 1.0617038863201878e-06, "epoch": 1.7191608020183242, "percentage": 85.95, "elapsed_time": "23:23:20", "remaining_time": "3:49:20"} +{"current_steps": 6475, "total_steps": 7532, "loss": 0.26010993123054504, "lr": 1.0597358990725703e-06, "epoch": 1.7194263709998672, "percentage": 85.97, "elapsed_time": "23:23:33", "remaining_time": "3:49:07"} +{"current_steps": 6476, "total_steps": 7532, "loss": 0.22529907524585724, "lr": 1.0577696354024314e-06, "epoch": 1.7196919399814101, "percentage": 85.98, "elapsed_time": "23:23:46", "remaining_time": "3:48:54"} +{"current_steps": 6477, "total_steps": 7532, "loss": 0.1897469311952591, "lr": 1.0558050956888433e-06, "epoch": 1.719957508962953, "percentage": 85.99, "elapsed_time": "23:23:59", "remaining_time": "3:48:41"} +{"current_steps": 6478, "total_steps": 7532, "loss": 0.24663670361042023, "lr": 1.0538422803105441e-06, "epoch": 1.720223077944496, "percentage": 86.01, "elapsed_time": "23:24:11", "remaining_time": "3:48:28"} +{"current_steps": 6479, "total_steps": 7532, "loss": 0.2462892383337021, "lr": 1.0518811896459423e-06, "epoch": 1.720488646926039, "percentage": 86.02, "elapsed_time": "23:24:24", "remaining_time": "3:48:15"} +{"current_steps": 6480, "total_steps": 7532, "loss": 0.18652144074440002, "lr": 1.0499218240731157e-06, "epoch": 1.720754215907582, "percentage": 86.03, "elapsed_time": "23:24:37", "remaining_time": "3:48:02"} +{"current_steps": 6481, "total_steps": 7532, "loss": 0.24614468216896057, "lr": 1.0479641839698052e-06, "epoch": 1.7210197848891249, "percentage": 86.05, "elapsed_time": "23:24:50", "remaining_time": "3:47:49"} +{"current_steps": 6482, "total_steps": 7532, "loss": 0.27925312519073486, "lr": 1.046008269713421e-06, "epoch": 1.7212853538706678, "percentage": 86.06, "elapsed_time": "23:25:03", "remaining_time": "3:47:36"} +{"current_steps": 6483, "total_steps": 7532, "loss": 0.2626710832118988, "lr": 1.0440540816810395e-06, "epoch": 1.7215509228522108, "percentage": 86.07, "elapsed_time": "23:25:16", "remaining_time": "3:47:23"} +{"current_steps": 6484, "total_steps": 7532, "loss": 0.23039895296096802, "lr": 1.042101620249405e-06, "epoch": 1.7218164918337537, "percentage": 86.09, "elapsed_time": "23:25:29", "remaining_time": "3:47:10"} +{"current_steps": 6485, "total_steps": 7532, "loss": 0.19559775292873383, "lr": 1.0401508857949295e-06, "epoch": 1.7220820608152967, "percentage": 86.1, "elapsed_time": "23:25:42", "remaining_time": "3:46:57"} +{"current_steps": 6486, "total_steps": 7532, "loss": 0.24982990324497223, "lr": 1.0382018786936943e-06, "epoch": 1.7223476297968396, "percentage": 86.11, "elapsed_time": "23:25:56", "remaining_time": "3:46:44"} +{"current_steps": 6487, "total_steps": 7532, "loss": 0.26212313771247864, "lr": 1.0362545993214402e-06, "epoch": 1.7226131987783826, "percentage": 86.13, "elapsed_time": "23:26:09", "remaining_time": "3:46:31"} +{"current_steps": 6488, "total_steps": 7532, "loss": 0.22827446460723877, "lr": 1.0343090480535788e-06, "epoch": 1.7228787677599255, "percentage": 86.14, "elapsed_time": "23:26:22", "remaining_time": "3:46:18"} +{"current_steps": 6489, "total_steps": 7532, "loss": 0.2710435390472412, "lr": 1.032365225265196e-06, "epoch": 1.7231443367414685, "percentage": 86.15, "elapsed_time": "23:26:35", "remaining_time": "3:46:05"} +{"current_steps": 6490, "total_steps": 7532, "loss": 0.25116702914237976, "lr": 1.030423131331033e-06, "epoch": 1.7234099057230114, "percentage": 86.17, "elapsed_time": "23:26:48", "remaining_time": "3:45:52"} +{"current_steps": 6491, "total_steps": 7532, "loss": 0.1980481743812561, "lr": 1.0284827666255048e-06, "epoch": 1.7236754747045544, "percentage": 86.18, "elapsed_time": "23:27:01", "remaining_time": "3:45:39"} +{"current_steps": 6492, "total_steps": 7532, "loss": 0.2777971625328064, "lr": 1.0265441315226898e-06, "epoch": 1.7239410436860974, "percentage": 86.19, "elapsed_time": "23:27:15", "remaining_time": "3:45:26"} +{"current_steps": 6493, "total_steps": 7532, "loss": 0.23041702806949615, "lr": 1.0246072263963336e-06, "epoch": 1.7242066126676403, "percentage": 86.21, "elapsed_time": "23:27:28", "remaining_time": "3:45:13"} +{"current_steps": 6494, "total_steps": 7532, "loss": 0.21428728103637695, "lr": 1.0226720516198495e-06, "epoch": 1.7244721816491833, "percentage": 86.22, "elapsed_time": "23:27:41", "remaining_time": "3:45:00"} +{"current_steps": 6495, "total_steps": 7532, "loss": 0.22577518224716187, "lr": 1.020738607566316e-06, "epoch": 1.7247377506307262, "percentage": 86.23, "elapsed_time": "23:27:54", "remaining_time": "3:44:47"} +{"current_steps": 6496, "total_steps": 7532, "loss": 0.21080979704856873, "lr": 1.0188068946084783e-06, "epoch": 1.7250033196122692, "percentage": 86.25, "elapsed_time": "23:28:08", "remaining_time": "3:44:34"} +{"current_steps": 6497, "total_steps": 7532, "loss": 0.21232858300209045, "lr": 1.0168769131187472e-06, "epoch": 1.7252688885938121, "percentage": 86.26, "elapsed_time": "23:28:21", "remaining_time": "3:44:21"} +{"current_steps": 6498, "total_steps": 7532, "loss": 0.25525614619255066, "lr": 1.0149486634692019e-06, "epoch": 1.725534457575355, "percentage": 86.27, "elapsed_time": "23:28:35", "remaining_time": "3:44:08"} +{"current_steps": 6499, "total_steps": 7532, "loss": 0.26291778683662415, "lr": 1.0130221460315858e-06, "epoch": 1.725800026556898, "percentage": 86.29, "elapsed_time": "23:28:47", "remaining_time": "3:43:55"} +{"current_steps": 6500, "total_steps": 7532, "loss": 0.21314382553100586, "lr": 1.011097361177308e-06, "epoch": 1.726065595538441, "percentage": 86.3, "elapsed_time": "23:29:01", "remaining_time": "3:43:42"} +{"current_steps": 6501, "total_steps": 7532, "loss": 0.2106419950723648, "lr": 1.0091743092774474e-06, "epoch": 1.726331164519984, "percentage": 86.31, "elapsed_time": "23:29:20", "remaining_time": "3:43:30"} +{"current_steps": 6502, "total_steps": 7532, "loss": 0.22456032037734985, "lr": 1.0072529907027407e-06, "epoch": 1.7265967335015269, "percentage": 86.33, "elapsed_time": "23:29:33", "remaining_time": "3:43:17"} +{"current_steps": 6503, "total_steps": 7532, "loss": 0.2301097959280014, "lr": 1.0053334058235975e-06, "epoch": 1.7268623024830698, "percentage": 86.34, "elapsed_time": "23:29:46", "remaining_time": "3:43:04"} +{"current_steps": 6504, "total_steps": 7532, "loss": 0.21207617223262787, "lr": 1.0034155550100922e-06, "epoch": 1.7271278714646128, "percentage": 86.35, "elapsed_time": "23:29:59", "remaining_time": "3:42:51"} +{"current_steps": 6505, "total_steps": 7532, "loss": 0.24378664791584015, "lr": 1.0014994386319621e-06, "epoch": 1.7273934404461557, "percentage": 86.36, "elapsed_time": "23:30:12", "remaining_time": "3:42:38"} +{"current_steps": 6506, "total_steps": 7532, "loss": 0.24914023280143738, "lr": 9.995850570586107e-07, "epoch": 1.727659009427699, "percentage": 86.38, "elapsed_time": "23:30:24", "remaining_time": "3:42:25"} +{"current_steps": 6507, "total_steps": 7532, "loss": 0.23235921561717987, "lr": 9.976724106591128e-07, "epoch": 1.7279245784092419, "percentage": 86.39, "elapsed_time": "23:30:37", "remaining_time": "3:42:12"} +{"current_steps": 6508, "total_steps": 7532, "loss": 0.22441455721855164, "lr": 9.957614998022015e-07, "epoch": 1.7281901473907848, "percentage": 86.4, "elapsed_time": "23:30:50", "remaining_time": "3:41:59"} +{"current_steps": 6509, "total_steps": 7532, "loss": 0.2559920847415924, "lr": 9.93852324856278e-07, "epoch": 1.7284557163723278, "percentage": 86.42, "elapsed_time": "23:31:03", "remaining_time": "3:41:46"} +{"current_steps": 6510, "total_steps": 7532, "loss": 0.21378321945667267, "lr": 9.919448861894088e-07, "epoch": 1.7287212853538707, "percentage": 86.43, "elapsed_time": "23:31:16", "remaining_time": "3:41:33"} +{"current_steps": 6511, "total_steps": 7532, "loss": 0.23622627556324005, "lr": 9.900391841693247e-07, "epoch": 1.7289868543354137, "percentage": 86.44, "elapsed_time": "23:31:29", "remaining_time": "3:41:20"} +{"current_steps": 6512, "total_steps": 7532, "loss": 0.217013418674469, "lr": 9.88135219163424e-07, "epoch": 1.7292524233169566, "percentage": 86.46, "elapsed_time": "23:31:42", "remaining_time": "3:41:07"} +{"current_steps": 6513, "total_steps": 7532, "loss": 0.2221517264842987, "lr": 9.862329915387669e-07, "epoch": 1.7295179922984996, "percentage": 86.47, "elapsed_time": "23:31:55", "remaining_time": "3:40:54"} +{"current_steps": 6514, "total_steps": 7532, "loss": 0.24377144873142242, "lr": 9.84332501662083e-07, "epoch": 1.7297835612800425, "percentage": 86.48, "elapsed_time": "23:32:07", "remaining_time": "3:40:41"} +{"current_steps": 6515, "total_steps": 7532, "loss": 0.23368799686431885, "lr": 9.824337498997593e-07, "epoch": 1.7300491302615855, "percentage": 86.5, "elapsed_time": "23:32:20", "remaining_time": "3:40:28"} +{"current_steps": 6516, "total_steps": 7532, "loss": 0.23061680793762207, "lr": 9.805367366178608e-07, "epoch": 1.7303146992431284, "percentage": 86.51, "elapsed_time": "23:32:33", "remaining_time": "3:40:15"} +{"current_steps": 6517, "total_steps": 7532, "loss": 0.24157950282096863, "lr": 9.78641462182104e-07, "epoch": 1.7305802682246714, "percentage": 86.52, "elapsed_time": "23:32:46", "remaining_time": "3:40:02"} +{"current_steps": 6518, "total_steps": 7532, "loss": 0.2122395783662796, "lr": 9.76747926957875e-07, "epoch": 1.7308458372062143, "percentage": 86.54, "elapsed_time": "23:32:58", "remaining_time": "3:39:48"} +{"current_steps": 6519, "total_steps": 7532, "loss": 0.2351134717464447, "lr": 9.748561313102266e-07, "epoch": 1.7311114061877573, "percentage": 86.55, "elapsed_time": "23:33:11", "remaining_time": "3:39:35"} +{"current_steps": 6520, "total_steps": 7532, "loss": 0.22462692856788635, "lr": 9.729660756038738e-07, "epoch": 1.7313769751693002, "percentage": 86.56, "elapsed_time": "23:33:24", "remaining_time": "3:39:22"} +{"current_steps": 6521, "total_steps": 7532, "loss": 0.2140806019306183, "lr": 9.710777602031985e-07, "epoch": 1.7316425441508432, "percentage": 86.58, "elapsed_time": "23:33:37", "remaining_time": "3:39:09"} +{"current_steps": 6522, "total_steps": 7532, "loss": 0.22256694734096527, "lr": 9.691911854722447e-07, "epoch": 1.7319081131323861, "percentage": 86.59, "elapsed_time": "23:33:50", "remaining_time": "3:38:56"} +{"current_steps": 6523, "total_steps": 7532, "loss": 0.26044604182243347, "lr": 9.673063517747216e-07, "epoch": 1.732173682113929, "percentage": 86.6, "elapsed_time": "23:34:03", "remaining_time": "3:38:43"} +{"current_steps": 6524, "total_steps": 7532, "loss": 0.22553196549415588, "lr": 9.65423259474001e-07, "epoch": 1.732439251095472, "percentage": 86.62, "elapsed_time": "23:34:16", "remaining_time": "3:38:30"} +{"current_steps": 6525, "total_steps": 7532, "loss": 0.2240113914012909, "lr": 9.635419089331255e-07, "epoch": 1.732704820077015, "percentage": 86.63, "elapsed_time": "23:34:28", "remaining_time": "3:38:17"} +{"current_steps": 6526, "total_steps": 7532, "loss": 0.2239987701177597, "lr": 9.616623005147952e-07, "epoch": 1.732970389058558, "percentage": 86.64, "elapsed_time": "23:34:41", "remaining_time": "3:38:04"} +{"current_steps": 6527, "total_steps": 7532, "loss": 0.2779507040977478, "lr": 9.597844345813746e-07, "epoch": 1.7332359580401009, "percentage": 86.66, "elapsed_time": "23:34:54", "remaining_time": "3:37:51"} +{"current_steps": 6528, "total_steps": 7532, "loss": 0.20211297273635864, "lr": 9.57908311494896e-07, "epoch": 1.7335015270216438, "percentage": 86.67, "elapsed_time": "23:35:07", "remaining_time": "3:37:38"} +{"current_steps": 6529, "total_steps": 7532, "loss": 0.2552817165851593, "lr": 9.560339316170542e-07, "epoch": 1.7337670960031868, "percentage": 86.68, "elapsed_time": "23:35:20", "remaining_time": "3:37:25"} +{"current_steps": 6530, "total_steps": 7532, "loss": 0.248790442943573, "lr": 9.54161295309206e-07, "epoch": 1.7340326649847297, "percentage": 86.7, "elapsed_time": "23:35:33", "remaining_time": "3:37:12"} +{"current_steps": 6531, "total_steps": 7532, "loss": 0.22865381836891174, "lr": 9.522904029323754e-07, "epoch": 1.7342982339662727, "percentage": 86.71, "elapsed_time": "23:35:46", "remaining_time": "3:36:59"} +{"current_steps": 6532, "total_steps": 7532, "loss": 0.212583988904953, "lr": 9.504212548472458e-07, "epoch": 1.7345638029478156, "percentage": 86.72, "elapsed_time": "23:35:59", "remaining_time": "3:36:46"} +{"current_steps": 6533, "total_steps": 7532, "loss": 0.24632221460342407, "lr": 9.48553851414169e-07, "epoch": 1.7348293719293586, "percentage": 86.74, "elapsed_time": "23:36:11", "remaining_time": "3:36:33"} +{"current_steps": 6534, "total_steps": 7532, "loss": 0.2264299988746643, "lr": 9.466881929931582e-07, "epoch": 1.7350949409109018, "percentage": 86.75, "elapsed_time": "23:36:24", "remaining_time": "3:36:20"} +{"current_steps": 6535, "total_steps": 7532, "loss": 0.21560585498809814, "lr": 9.4482427994389e-07, "epoch": 1.7353605098924447, "percentage": 86.76, "elapsed_time": "23:36:37", "remaining_time": "3:36:07"} +{"current_steps": 6536, "total_steps": 7532, "loss": 0.24358224868774414, "lr": 9.429621126257038e-07, "epoch": 1.7356260788739877, "percentage": 86.78, "elapsed_time": "23:36:49", "remaining_time": "3:35:54"} +{"current_steps": 6537, "total_steps": 7532, "loss": 0.23307816684246063, "lr": 9.411016913976045e-07, "epoch": 1.7358916478555306, "percentage": 86.79, "elapsed_time": "23:37:03", "remaining_time": "3:35:41"} +{"current_steps": 6538, "total_steps": 7532, "loss": 0.28001490235328674, "lr": 9.392430166182597e-07, "epoch": 1.7361572168370736, "percentage": 86.8, "elapsed_time": "23:37:16", "remaining_time": "3:35:28"} +{"current_steps": 6539, "total_steps": 7532, "loss": 0.22544093430042267, "lr": 9.373860886459996e-07, "epoch": 1.7364227858186165, "percentage": 86.82, "elapsed_time": "23:37:29", "remaining_time": "3:35:15"} +{"current_steps": 6540, "total_steps": 7532, "loss": 0.2066478282213211, "lr": 9.355309078388186e-07, "epoch": 1.7366883548001595, "percentage": 86.83, "elapsed_time": "23:37:42", "remaining_time": "3:35:02"} +{"current_steps": 6541, "total_steps": 7532, "loss": 0.21185964345932007, "lr": 9.336774745543697e-07, "epoch": 1.7369539237817024, "percentage": 86.84, "elapsed_time": "23:37:55", "remaining_time": "3:34:49"} +{"current_steps": 6542, "total_steps": 7532, "loss": 0.2337890863418579, "lr": 9.318257891499793e-07, "epoch": 1.7372194927632454, "percentage": 86.86, "elapsed_time": "23:38:07", "remaining_time": "3:34:36"} +{"current_steps": 6543, "total_steps": 7532, "loss": 0.2430594563484192, "lr": 9.299758519826274e-07, "epoch": 1.7374850617447883, "percentage": 86.87, "elapsed_time": "23:38:21", "remaining_time": "3:34:23"} +{"current_steps": 6544, "total_steps": 7532, "loss": 0.24799269437789917, "lr": 9.281276634089609e-07, "epoch": 1.7377506307263313, "percentage": 86.88, "elapsed_time": "23:38:33", "remaining_time": "3:34:10"} +{"current_steps": 6545, "total_steps": 7532, "loss": 0.24756166338920593, "lr": 9.26281223785287e-07, "epoch": 1.7380161997078742, "percentage": 86.9, "elapsed_time": "23:38:46", "remaining_time": "3:33:57"} +{"current_steps": 6546, "total_steps": 7532, "loss": 0.23465190827846527, "lr": 9.244365334675787e-07, "epoch": 1.7382817686894172, "percentage": 86.91, "elapsed_time": "23:38:59", "remaining_time": "3:33:44"} +{"current_steps": 6547, "total_steps": 7532, "loss": 0.2039640098810196, "lr": 9.225935928114716e-07, "epoch": 1.7385473376709601, "percentage": 86.92, "elapsed_time": "23:39:12", "remaining_time": "3:33:31"} +{"current_steps": 6548, "total_steps": 7532, "loss": 0.22304412722587585, "lr": 9.207524021722602e-07, "epoch": 1.738812906652503, "percentage": 86.94, "elapsed_time": "23:39:25", "remaining_time": "3:33:18"} +{"current_steps": 6549, "total_steps": 7532, "loss": 0.19985908269882202, "lr": 9.189129619049064e-07, "epoch": 1.739078475634046, "percentage": 86.95, "elapsed_time": "23:39:38", "remaining_time": "3:33:05"} +{"current_steps": 6550, "total_steps": 7532, "loss": 0.2335432469844818, "lr": 9.17075272364032e-07, "epoch": 1.739344044615589, "percentage": 86.96, "elapsed_time": "23:39:51", "remaining_time": "3:32:52"} +{"current_steps": 6551, "total_steps": 7532, "loss": 0.2313593327999115, "lr": 9.152393339039223e-07, "epoch": 1.739609613597132, "percentage": 86.98, "elapsed_time": "23:40:04", "remaining_time": "3:32:39"} +{"current_steps": 6552, "total_steps": 7532, "loss": 0.2320600152015686, "lr": 9.134051468785243e-07, "epoch": 1.739875182578675, "percentage": 86.99, "elapsed_time": "23:40:16", "remaining_time": "3:32:26"} +{"current_steps": 6553, "total_steps": 7532, "loss": 0.1870848387479782, "lr": 9.115727116414475e-07, "epoch": 1.7401407515602179, "percentage": 87.0, "elapsed_time": "23:40:29", "remaining_time": "3:32:13"} +{"current_steps": 6554, "total_steps": 7532, "loss": 0.22922812402248383, "lr": 9.097420285459635e-07, "epoch": 1.7404063205417608, "percentage": 87.02, "elapsed_time": "23:40:42", "remaining_time": "3:32:00"} +{"current_steps": 6555, "total_steps": 7532, "loss": 0.2505050301551819, "lr": 9.079130979450068e-07, "epoch": 1.7406718895233038, "percentage": 87.03, "elapsed_time": "23:40:54", "remaining_time": "3:31:46"} +{"current_steps": 6556, "total_steps": 7532, "loss": 0.20445439219474792, "lr": 9.060859201911732e-07, "epoch": 1.7409374585048467, "percentage": 87.04, "elapsed_time": "23:41:08", "remaining_time": "3:31:33"} +{"current_steps": 6557, "total_steps": 7532, "loss": 0.22338441014289856, "lr": 9.042604956367218e-07, "epoch": 1.7412030274863897, "percentage": 87.06, "elapsed_time": "23:41:20", "remaining_time": "3:31:20"} +{"current_steps": 6558, "total_steps": 7532, "loss": 0.24923941493034363, "lr": 9.024368246335735e-07, "epoch": 1.7414685964679326, "percentage": 87.07, "elapsed_time": "23:41:34", "remaining_time": "3:31:07"} +{"current_steps": 6559, "total_steps": 7532, "loss": 0.22842931747436523, "lr": 9.006149075333071e-07, "epoch": 1.7417341654494756, "percentage": 87.08, "elapsed_time": "23:41:46", "remaining_time": "3:30:54"} +{"current_steps": 6560, "total_steps": 7532, "loss": 0.22451579570770264, "lr": 8.987947446871703e-07, "epoch": 1.7419997344310185, "percentage": 87.1, "elapsed_time": "23:42:00", "remaining_time": "3:30:41"} +{"current_steps": 6561, "total_steps": 7532, "loss": 0.2521047592163086, "lr": 8.969763364460682e-07, "epoch": 1.7422653034125615, "percentage": 87.11, "elapsed_time": "23:42:12", "remaining_time": "3:30:28"} +{"current_steps": 6562, "total_steps": 7532, "loss": 0.25001099705696106, "lr": 8.951596831605691e-07, "epoch": 1.7425308723941044, "percentage": 87.12, "elapsed_time": "23:42:26", "remaining_time": "3:30:15"} +{"current_steps": 6563, "total_steps": 7532, "loss": 0.19592508673667908, "lr": 8.933447851809007e-07, "epoch": 1.7427964413756474, "percentage": 87.13, "elapsed_time": "23:42:39", "remaining_time": "3:30:02"} +{"current_steps": 6564, "total_steps": 7532, "loss": 0.2785179018974304, "lr": 8.915316428569554e-07, "epoch": 1.7430620103571903, "percentage": 87.15, "elapsed_time": "23:42:52", "remaining_time": "3:29:49"} +{"current_steps": 6565, "total_steps": 7532, "loss": 0.20700594782829285, "lr": 8.897202565382845e-07, "epoch": 1.7433275793387333, "percentage": 87.16, "elapsed_time": "23:43:05", "remaining_time": "3:29:36"} +{"current_steps": 6566, "total_steps": 7532, "loss": 0.253167062997818, "lr": 8.879106265741044e-07, "epoch": 1.7435931483202762, "percentage": 87.17, "elapsed_time": "23:43:18", "remaining_time": "3:29:23"} +{"current_steps": 6567, "total_steps": 7532, "loss": 0.27672937512397766, "lr": 8.861027533132859e-07, "epoch": 1.7438587173018192, "percentage": 87.19, "elapsed_time": "23:43:31", "remaining_time": "3:29:10"} +{"current_steps": 6568, "total_steps": 7532, "loss": 0.23050950467586517, "lr": 8.842966371043671e-07, "epoch": 1.7441242862833621, "percentage": 87.2, "elapsed_time": "23:43:44", "remaining_time": "3:28:57"} +{"current_steps": 6569, "total_steps": 7532, "loss": 0.23529425263404846, "lr": 8.824922782955481e-07, "epoch": 1.744389855264905, "percentage": 87.21, "elapsed_time": "23:43:57", "remaining_time": "3:28:44"} +{"current_steps": 6570, "total_steps": 7532, "loss": 0.21803250908851624, "lr": 8.806896772346873e-07, "epoch": 1.744655424246448, "percentage": 87.23, "elapsed_time": "23:44:10", "remaining_time": "3:28:31"} +{"current_steps": 6571, "total_steps": 7532, "loss": 0.24237293004989624, "lr": 8.788888342693047e-07, "epoch": 1.744920993227991, "percentage": 87.24, "elapsed_time": "23:44:23", "remaining_time": "3:28:18"} +{"current_steps": 6572, "total_steps": 7532, "loss": 0.2008107602596283, "lr": 8.770897497465803e-07, "epoch": 1.745186562209534, "percentage": 87.25, "elapsed_time": "23:44:36", "remaining_time": "3:28:05"} +{"current_steps": 6573, "total_steps": 7532, "loss": 0.23106279969215393, "lr": 8.752924240133587e-07, "epoch": 1.745452131191077, "percentage": 87.27, "elapsed_time": "23:44:49", "remaining_time": "3:27:52"} +{"current_steps": 6574, "total_steps": 7532, "loss": 0.23726215958595276, "lr": 8.734968574161406e-07, "epoch": 1.7457177001726198, "percentage": 87.28, "elapsed_time": "23:45:02", "remaining_time": "3:27:39"} +{"current_steps": 6575, "total_steps": 7532, "loss": 0.26349812746047974, "lr": 8.717030503010915e-07, "epoch": 1.7459832691541628, "percentage": 87.29, "elapsed_time": "23:45:15", "remaining_time": "3:27:26"} +{"current_steps": 6576, "total_steps": 7532, "loss": 0.23226451873779297, "lr": 8.699110030140367e-07, "epoch": 1.7462488381357057, "percentage": 87.31, "elapsed_time": "23:45:28", "remaining_time": "3:27:13"} +{"current_steps": 6577, "total_steps": 7532, "loss": 0.22188402712345123, "lr": 8.68120715900459e-07, "epoch": 1.7465144071172487, "percentage": 87.32, "elapsed_time": "23:45:41", "remaining_time": "3:27:00"} +{"current_steps": 6578, "total_steps": 7532, "loss": 0.21238234639167786, "lr": 8.663321893055087e-07, "epoch": 1.7467799760987917, "percentage": 87.33, "elapsed_time": "23:45:54", "remaining_time": "3:26:47"} +{"current_steps": 6579, "total_steps": 7532, "loss": 0.2700675427913666, "lr": 8.645454235739903e-07, "epoch": 1.7470455450803346, "percentage": 87.35, "elapsed_time": "23:46:06", "remaining_time": "3:26:34"} +{"current_steps": 6580, "total_steps": 7532, "loss": 0.24463894963264465, "lr": 8.627604190503714e-07, "epoch": 1.7473111140618776, "percentage": 87.36, "elapsed_time": "23:46:20", "remaining_time": "3:26:21"} +{"current_steps": 6581, "total_steps": 7532, "loss": 0.23429079353809357, "lr": 8.609771760787822e-07, "epoch": 1.7475766830434205, "percentage": 87.37, "elapsed_time": "23:46:33", "remaining_time": "3:26:08"} +{"current_steps": 6582, "total_steps": 7532, "loss": 0.21767663955688477, "lr": 8.591956950030067e-07, "epoch": 1.7478422520249635, "percentage": 87.39, "elapsed_time": "23:46:46", "remaining_time": "3:25:55"} +{"current_steps": 6583, "total_steps": 7532, "loss": 0.2499813735485077, "lr": 8.574159761664957e-07, "epoch": 1.7481078210065064, "percentage": 87.4, "elapsed_time": "23:46:58", "remaining_time": "3:25:42"} +{"current_steps": 6584, "total_steps": 7532, "loss": 0.28065958619117737, "lr": 8.556380199123582e-07, "epoch": 1.7483733899880494, "percentage": 87.41, "elapsed_time": "23:47:11", "remaining_time": "3:25:29"} +{"current_steps": 6585, "total_steps": 7532, "loss": 0.2166985273361206, "lr": 8.538618265833621e-07, "epoch": 1.7486389589695923, "percentage": 87.43, "elapsed_time": "23:47:24", "remaining_time": "3:25:16"} +{"current_steps": 6586, "total_steps": 7532, "loss": 0.22835782170295715, "lr": 8.520873965219356e-07, "epoch": 1.7489045279511353, "percentage": 87.44, "elapsed_time": "23:47:37", "remaining_time": "3:25:03"} +{"current_steps": 6587, "total_steps": 7532, "loss": 0.23575961589813232, "lr": 8.503147300701709e-07, "epoch": 1.7491700969326782, "percentage": 87.45, "elapsed_time": "23:47:50", "remaining_time": "3:24:50"} +{"current_steps": 6588, "total_steps": 7532, "loss": 0.183369442820549, "lr": 8.485438275698154e-07, "epoch": 1.7494356659142212, "percentage": 87.47, "elapsed_time": "23:48:03", "remaining_time": "3:24:37"} +{"current_steps": 6589, "total_steps": 7532, "loss": 0.2731352746486664, "lr": 8.467746893622786e-07, "epoch": 1.7497012348957641, "percentage": 87.48, "elapsed_time": "23:48:16", "remaining_time": "3:24:24"} +{"current_steps": 6590, "total_steps": 7532, "loss": 0.20177578926086426, "lr": 8.450073157886296e-07, "epoch": 1.749966803877307, "percentage": 87.49, "elapsed_time": "23:48:28", "remaining_time": "3:24:11"} +{"current_steps": 6591, "total_steps": 7532, "loss": 0.21672385931015015, "lr": 8.432417071895982e-07, "epoch": 1.75023237285885, "percentage": 87.51, "elapsed_time": "23:48:41", "remaining_time": "3:23:58"} +{"current_steps": 6592, "total_steps": 7532, "loss": 0.2503831386566162, "lr": 8.414778639055699e-07, "epoch": 1.750497941840393, "percentage": 87.52, "elapsed_time": "23:48:54", "remaining_time": "3:23:45"} +{"current_steps": 6593, "total_steps": 7532, "loss": 0.2427521049976349, "lr": 8.397157862765959e-07, "epoch": 1.750763510821936, "percentage": 87.53, "elapsed_time": "23:49:07", "remaining_time": "3:23:32"} +{"current_steps": 6594, "total_steps": 7532, "loss": 0.23128533363342285, "lr": 8.379554746423824e-07, "epoch": 1.7510290798034789, "percentage": 87.55, "elapsed_time": "23:49:19", "remaining_time": "3:23:19"} +{"current_steps": 6595, "total_steps": 7532, "loss": 0.2470957189798355, "lr": 8.361969293422967e-07, "epoch": 1.7512946487850218, "percentage": 87.56, "elapsed_time": "23:49:32", "remaining_time": "3:23:06"} +{"current_steps": 6596, "total_steps": 7532, "loss": 0.29447510838508606, "lr": 8.344401507153665e-07, "epoch": 1.7515602177665648, "percentage": 87.57, "elapsed_time": "23:49:45", "remaining_time": "3:22:53"} +{"current_steps": 6597, "total_steps": 7532, "loss": 0.21585828065872192, "lr": 8.326851391002777e-07, "epoch": 1.7518257867481077, "percentage": 87.59, "elapsed_time": "23:49:58", "remaining_time": "3:22:40"} +{"current_steps": 6598, "total_steps": 7532, "loss": 0.24081121385097504, "lr": 8.30931894835375e-07, "epoch": 1.7520913557296507, "percentage": 87.6, "elapsed_time": "23:50:10", "remaining_time": "3:22:27"} +{"current_steps": 6599, "total_steps": 7532, "loss": 0.23052063584327698, "lr": 8.291804182586638e-07, "epoch": 1.7523569247111936, "percentage": 87.61, "elapsed_time": "23:50:23", "remaining_time": "3:22:14"} +{"current_steps": 6600, "total_steps": 7532, "loss": 0.19008183479309082, "lr": 8.274307097078093e-07, "epoch": 1.7526224936927366, "percentage": 87.63, "elapsed_time": "23:50:35", "remaining_time": "3:22:01"} +{"current_steps": 6601, "total_steps": 7532, "loss": 0.2632960379123688, "lr": 8.25682769520132e-07, "epoch": 1.7528880626742795, "percentage": 87.64, "elapsed_time": "23:50:54", "remaining_time": "3:21:48"} +{"current_steps": 6602, "total_steps": 7532, "loss": 0.25958624482154846, "lr": 8.239365980326175e-07, "epoch": 1.7531536316558225, "percentage": 87.65, "elapsed_time": "23:51:07", "remaining_time": "3:21:35"} +{"current_steps": 6603, "total_steps": 7532, "loss": 0.22370605170726776, "lr": 8.221921955819035e-07, "epoch": 1.7534192006373654, "percentage": 87.67, "elapsed_time": "23:51:20", "remaining_time": "3:21:22"} +{"current_steps": 6604, "total_steps": 7532, "loss": 0.22018703818321228, "lr": 8.204495625042919e-07, "epoch": 1.7536847696189084, "percentage": 87.68, "elapsed_time": "23:51:33", "remaining_time": "3:21:09"} +{"current_steps": 6605, "total_steps": 7532, "loss": 0.26802191138267517, "lr": 8.187086991357418e-07, "epoch": 1.7539503386004514, "percentage": 87.69, "elapsed_time": "23:51:45", "remaining_time": "3:20:56"} +{"current_steps": 6606, "total_steps": 7532, "loss": 0.21560518443584442, "lr": 8.169696058118725e-07, "epoch": 1.7542159075819943, "percentage": 87.71, "elapsed_time": "23:51:58", "remaining_time": "3:20:43"} +{"current_steps": 6607, "total_steps": 7532, "loss": 0.23222430050373077, "lr": 8.152322828679593e-07, "epoch": 1.7544814765635373, "percentage": 87.72, "elapsed_time": "23:52:11", "remaining_time": "3:20:30"} +{"current_steps": 6608, "total_steps": 7532, "loss": 0.17638427019119263, "lr": 8.134967306389374e-07, "epoch": 1.7547470455450802, "percentage": 87.73, "elapsed_time": "23:52:24", "remaining_time": "3:20:17"} +{"current_steps": 6609, "total_steps": 7532, "loss": 0.21539513766765594, "lr": 8.117629494594015e-07, "epoch": 1.7550126145266232, "percentage": 87.75, "elapsed_time": "23:52:37", "remaining_time": "3:20:04"} +{"current_steps": 6610, "total_steps": 7532, "loss": 0.2265736162662506, "lr": 8.100309396636031e-07, "epoch": 1.7552781835081661, "percentage": 87.76, "elapsed_time": "23:52:51", "remaining_time": "3:19:51"} +{"current_steps": 6611, "total_steps": 7532, "loss": 0.2688787281513214, "lr": 8.083007015854549e-07, "epoch": 1.755543752489709, "percentage": 87.77, "elapsed_time": "23:53:04", "remaining_time": "3:19:38"} +{"current_steps": 6612, "total_steps": 7532, "loss": 0.19756367802619934, "lr": 8.065722355585249e-07, "epoch": 1.755809321471252, "percentage": 87.79, "elapsed_time": "23:53:18", "remaining_time": "3:19:25"} +{"current_steps": 6613, "total_steps": 7532, "loss": 0.19934290647506714, "lr": 8.048455419160405e-07, "epoch": 1.756074890452795, "percentage": 87.8, "elapsed_time": "23:53:31", "remaining_time": "3:19:12"} +{"current_steps": 6614, "total_steps": 7532, "loss": 0.2523588538169861, "lr": 8.031206209908904e-07, "epoch": 1.756340459434338, "percentage": 87.81, "elapsed_time": "23:53:44", "remaining_time": "3:18:59"} +{"current_steps": 6615, "total_steps": 7532, "loss": 0.22825747728347778, "lr": 8.01397473115616e-07, "epoch": 1.7566060284158809, "percentage": 87.83, "elapsed_time": "23:53:57", "remaining_time": "3:18:46"} +{"current_steps": 6616, "total_steps": 7532, "loss": 0.24525251984596252, "lr": 7.996760986224228e-07, "epoch": 1.7568715973974238, "percentage": 87.84, "elapsed_time": "23:54:10", "remaining_time": "3:18:33"} +{"current_steps": 6617, "total_steps": 7532, "loss": 0.21883559226989746, "lr": 7.979564978431687e-07, "epoch": 1.7571371663789668, "percentage": 87.85, "elapsed_time": "23:54:23", "remaining_time": "3:18:20"} +{"current_steps": 6618, "total_steps": 7532, "loss": 0.2642098069190979, "lr": 7.96238671109374e-07, "epoch": 1.7574027353605097, "percentage": 87.87, "elapsed_time": "23:54:36", "remaining_time": "3:18:07"} +{"current_steps": 6619, "total_steps": 7532, "loss": 0.24094998836517334, "lr": 7.945226187522159e-07, "epoch": 1.757668304342053, "percentage": 87.88, "elapsed_time": "23:54:50", "remaining_time": "3:17:54"} +{"current_steps": 6620, "total_steps": 7532, "loss": 0.2225762903690338, "lr": 7.928083411025278e-07, "epoch": 1.7579338733235959, "percentage": 87.89, "elapsed_time": "23:55:03", "remaining_time": "3:17:41"} +{"current_steps": 6621, "total_steps": 7532, "loss": 0.26722851395606995, "lr": 7.910958384908041e-07, "epoch": 1.7581994423051388, "percentage": 87.9, "elapsed_time": "23:55:17", "remaining_time": "3:17:29"} +{"current_steps": 6622, "total_steps": 7532, "loss": 0.2176910787820816, "lr": 7.893851112471907e-07, "epoch": 1.7584650112866818, "percentage": 87.92, "elapsed_time": "23:55:30", "remaining_time": "3:17:16"} +{"current_steps": 6623, "total_steps": 7532, "loss": 0.20261354744434357, "lr": 7.876761597015003e-07, "epoch": 1.7587305802682247, "percentage": 87.93, "elapsed_time": "23:55:43", "remaining_time": "3:17:03"} +{"current_steps": 6624, "total_steps": 7532, "loss": 0.23314467072486877, "lr": 7.859689841831975e-07, "epoch": 1.7589961492497677, "percentage": 87.94, "elapsed_time": "23:55:56", "remaining_time": "3:16:50"} +{"current_steps": 6625, "total_steps": 7532, "loss": 0.19854989647865295, "lr": 7.842635850214054e-07, "epoch": 1.7592617182313106, "percentage": 87.96, "elapsed_time": "23:56:10", "remaining_time": "3:16:37"} +{"current_steps": 6626, "total_steps": 7532, "loss": 0.2422565519809723, "lr": 7.825599625449043e-07, "epoch": 1.7595272872128536, "percentage": 87.97, "elapsed_time": "23:56:23", "remaining_time": "3:16:24"} +{"current_steps": 6627, "total_steps": 7532, "loss": 0.27029529213905334, "lr": 7.808581170821328e-07, "epoch": 1.7597928561943965, "percentage": 87.98, "elapsed_time": "23:56:36", "remaining_time": "3:16:11"} +{"current_steps": 6628, "total_steps": 7532, "loss": 0.23596832156181335, "lr": 7.791580489611872e-07, "epoch": 1.7600584251759395, "percentage": 88.0, "elapsed_time": "23:56:49", "remaining_time": "3:15:58"} +{"current_steps": 6629, "total_steps": 7532, "loss": 0.218271404504776, "lr": 7.774597585098198e-07, "epoch": 1.7603239941574824, "percentage": 88.01, "elapsed_time": "23:57:02", "remaining_time": "3:15:45"} +{"current_steps": 6630, "total_steps": 7532, "loss": 0.2551255226135254, "lr": 7.75763246055441e-07, "epoch": 1.7605895631390254, "percentage": 88.02, "elapsed_time": "23:57:15", "remaining_time": "3:15:32"} +{"current_steps": 6631, "total_steps": 7532, "loss": 0.24410653114318848, "lr": 7.740685119251179e-07, "epoch": 1.7608551321205683, "percentage": 88.04, "elapsed_time": "23:57:29", "remaining_time": "3:15:19"} +{"current_steps": 6632, "total_steps": 7532, "loss": 0.23044872283935547, "lr": 7.723755564455771e-07, "epoch": 1.7611207011021113, "percentage": 88.05, "elapsed_time": "23:57:42", "remaining_time": "3:15:06"} +{"current_steps": 6633, "total_steps": 7532, "loss": 0.24569427967071533, "lr": 7.706843799431985e-07, "epoch": 1.7613862700836542, "percentage": 88.06, "elapsed_time": "23:57:55", "remaining_time": "3:14:53"} +{"current_steps": 6634, "total_steps": 7532, "loss": 0.200277179479599, "lr": 7.689949827440224e-07, "epoch": 1.7616518390651972, "percentage": 88.08, "elapsed_time": "23:58:08", "remaining_time": "3:14:40"} +{"current_steps": 6635, "total_steps": 7532, "loss": 0.19217821955680847, "lr": 7.673073651737428e-07, "epoch": 1.7619174080467401, "percentage": 88.09, "elapsed_time": "23:58:22", "remaining_time": "3:14:27"} +{"current_steps": 6636, "total_steps": 7532, "loss": 0.227005273103714, "lr": 7.656215275577151e-07, "epoch": 1.762182977028283, "percentage": 88.1, "elapsed_time": "23:58:35", "remaining_time": "3:14:14"} +{"current_steps": 6637, "total_steps": 7532, "loss": 0.21359863877296448, "lr": 7.639374702209468e-07, "epoch": 1.762448546009826, "percentage": 88.12, "elapsed_time": "23:58:48", "remaining_time": "3:14:01"} +{"current_steps": 6638, "total_steps": 7532, "loss": 0.24056711792945862, "lr": 7.62255193488105e-07, "epoch": 1.762714114991369, "percentage": 88.13, "elapsed_time": "23:59:01", "remaining_time": "3:13:48"} +{"current_steps": 6639, "total_steps": 7532, "loss": 0.20897413790225983, "lr": 7.605746976835127e-07, "epoch": 1.762979683972912, "percentage": 88.14, "elapsed_time": "23:59:14", "remaining_time": "3:13:35"} +{"current_steps": 6640, "total_steps": 7532, "loss": 0.20395967364311218, "lr": 7.588959831311493e-07, "epoch": 1.763245252954455, "percentage": 88.16, "elapsed_time": "23:59:27", "remaining_time": "3:13:22"} +{"current_steps": 6641, "total_steps": 7532, "loss": 0.2334095984697342, "lr": 7.572190501546517e-07, "epoch": 1.7635108219359978, "percentage": 88.17, "elapsed_time": "23:59:40", "remaining_time": "3:13:09"} +{"current_steps": 6642, "total_steps": 7532, "loss": 0.23892858624458313, "lr": 7.555438990773134e-07, "epoch": 1.7637763909175408, "percentage": 88.18, "elapsed_time": "23:59:53", "remaining_time": "3:12:56"} +{"current_steps": 6643, "total_steps": 7532, "loss": 0.23515449464321136, "lr": 7.538705302220839e-07, "epoch": 1.7640419598990837, "percentage": 88.2, "elapsed_time": "1 day, 0:00:06", "remaining_time": "3:12:43"} +{"current_steps": 6644, "total_steps": 7532, "loss": 0.19728611409664154, "lr": 7.521989439115674e-07, "epoch": 1.7643075288806267, "percentage": 88.21, "elapsed_time": "1 day, 0:00:19", "remaining_time": "3:12:30"} +{"current_steps": 6645, "total_steps": 7532, "loss": 0.22277355194091797, "lr": 7.505291404680281e-07, "epoch": 1.7645730978621696, "percentage": 88.22, "elapsed_time": "1 day, 0:00:31", "remaining_time": "3:12:17"} +{"current_steps": 6646, "total_steps": 7532, "loss": 0.24117602407932281, "lr": 7.488611202133822e-07, "epoch": 1.7648386668437126, "percentage": 88.24, "elapsed_time": "1 day, 0:00:44", "remaining_time": "3:12:04"} +{"current_steps": 6647, "total_steps": 7532, "loss": 0.24675750732421875, "lr": 7.471948834692045e-07, "epoch": 1.7651042358252558, "percentage": 88.25, "elapsed_time": "1 day, 0:00:57", "remaining_time": "3:11:51"} +{"current_steps": 6648, "total_steps": 7532, "loss": 0.2413899004459381, "lr": 7.455304305567279e-07, "epoch": 1.7653698048067987, "percentage": 88.26, "elapsed_time": "1 day, 0:01:10", "remaining_time": "3:11:38"} +{"current_steps": 6649, "total_steps": 7532, "loss": 0.22125428915023804, "lr": 7.438677617968348e-07, "epoch": 1.7656353737883417, "percentage": 88.28, "elapsed_time": "1 day, 0:01:24", "remaining_time": "3:11:25"} +{"current_steps": 6650, "total_steps": 7532, "loss": 0.205051988363266, "lr": 7.422068775100732e-07, "epoch": 1.7659009427698846, "percentage": 88.29, "elapsed_time": "1 day, 0:01:37", "remaining_time": "3:11:12"} +{"current_steps": 6651, "total_steps": 7532, "loss": 0.23711715638637543, "lr": 7.405477780166415e-07, "epoch": 1.7661665117514276, "percentage": 88.3, "elapsed_time": "1 day, 0:01:50", "remaining_time": "3:10:59"} +{"current_steps": 6652, "total_steps": 7532, "loss": 0.2591046988964081, "lr": 7.388904636363914e-07, "epoch": 1.7664320807329705, "percentage": 88.32, "elapsed_time": "1 day, 0:02:03", "remaining_time": "3:10:46"} +{"current_steps": 6653, "total_steps": 7532, "loss": 0.24837243556976318, "lr": 7.372349346888363e-07, "epoch": 1.7666976497145135, "percentage": 88.33, "elapsed_time": "1 day, 0:02:17", "remaining_time": "3:10:33"} +{"current_steps": 6654, "total_steps": 7532, "loss": 0.20910412073135376, "lr": 7.35581191493141e-07, "epoch": 1.7669632186960564, "percentage": 88.34, "elapsed_time": "1 day, 0:02:29", "remaining_time": "3:10:20"} +{"current_steps": 6655, "total_steps": 7532, "loss": 0.2056204229593277, "lr": 7.339292343681282e-07, "epoch": 1.7672287876775994, "percentage": 88.36, "elapsed_time": "1 day, 0:02:43", "remaining_time": "3:10:07"} +{"current_steps": 6656, "total_steps": 7532, "loss": 0.2496742308139801, "lr": 7.322790636322764e-07, "epoch": 1.7674943566591423, "percentage": 88.37, "elapsed_time": "1 day, 0:02:56", "remaining_time": "3:09:54"} +{"current_steps": 6657, "total_steps": 7532, "loss": 0.24432921409606934, "lr": 7.306306796037188e-07, "epoch": 1.7677599256406853, "percentage": 88.38, "elapsed_time": "1 day, 0:03:09", "remaining_time": "3:09:41"} +{"current_steps": 6658, "total_steps": 7532, "loss": 0.2492775321006775, "lr": 7.289840826002414e-07, "epoch": 1.7680254946222282, "percentage": 88.4, "elapsed_time": "1 day, 0:03:22", "remaining_time": "3:09:28"} +{"current_steps": 6659, "total_steps": 7532, "loss": 0.22673827409744263, "lr": 7.273392729392936e-07, "epoch": 1.7682910636037712, "percentage": 88.41, "elapsed_time": "1 day, 0:03:35", "remaining_time": "3:09:15"} +{"current_steps": 6660, "total_steps": 7532, "loss": 0.2225622981786728, "lr": 7.25696250937975e-07, "epoch": 1.7685566325853141, "percentage": 88.42, "elapsed_time": "1 day, 0:03:49", "remaining_time": "3:09:02"} +{"current_steps": 6661, "total_steps": 7532, "loss": 0.24896883964538574, "lr": 7.240550169130378e-07, "epoch": 1.768822201566857, "percentage": 88.44, "elapsed_time": "1 day, 0:04:02", "remaining_time": "3:08:49"} +{"current_steps": 6662, "total_steps": 7532, "loss": 0.2395302951335907, "lr": 7.224155711808923e-07, "epoch": 1.7690877705484, "percentage": 88.45, "elapsed_time": "1 day, 0:04:16", "remaining_time": "3:08:36"} +{"current_steps": 6663, "total_steps": 7532, "loss": 0.2255886197090149, "lr": 7.207779140576066e-07, "epoch": 1.769353339529943, "percentage": 88.46, "elapsed_time": "1 day, 0:04:29", "remaining_time": "3:08:23"} +{"current_steps": 6664, "total_steps": 7532, "loss": 0.24029678106307983, "lr": 7.191420458589005e-07, "epoch": 1.769618908511486, "percentage": 88.48, "elapsed_time": "1 day, 0:04:42", "remaining_time": "3:08:10"} +{"current_steps": 6665, "total_steps": 7532, "loss": 0.19399142265319824, "lr": 7.175079669001506e-07, "epoch": 1.769884477493029, "percentage": 88.49, "elapsed_time": "1 day, 0:04:55", "remaining_time": "3:07:57"} +{"current_steps": 6666, "total_steps": 7532, "loss": 0.24569162726402283, "lr": 7.158756774963882e-07, "epoch": 1.7701500464745719, "percentage": 88.5, "elapsed_time": "1 day, 0:05:09", "remaining_time": "3:07:44"} +{"current_steps": 6667, "total_steps": 7532, "loss": 0.2484329342842102, "lr": 7.142451779622971e-07, "epoch": 1.7704156154561148, "percentage": 88.52, "elapsed_time": "1 day, 0:05:22", "remaining_time": "3:07:31"} +{"current_steps": 6668, "total_steps": 7532, "loss": 0.24423512816429138, "lr": 7.126164686122216e-07, "epoch": 1.7706811844376578, "percentage": 88.53, "elapsed_time": "1 day, 0:05:35", "remaining_time": "3:07:18"} +{"current_steps": 6669, "total_steps": 7532, "loss": 0.20146678388118744, "lr": 7.109895497601571e-07, "epoch": 1.7709467534192007, "percentage": 88.54, "elapsed_time": "1 day, 0:05:48", "remaining_time": "3:07:05"} +{"current_steps": 6670, "total_steps": 7532, "loss": 0.23329001665115356, "lr": 7.093644217197526e-07, "epoch": 1.7712123224007437, "percentage": 88.56, "elapsed_time": "1 day, 0:06:01", "remaining_time": "3:06:52"} +{"current_steps": 6671, "total_steps": 7532, "loss": 0.2290019690990448, "lr": 7.077410848043165e-07, "epoch": 1.7714778913822866, "percentage": 88.57, "elapsed_time": "1 day, 0:06:13", "remaining_time": "3:06:39"} +{"current_steps": 6672, "total_steps": 7532, "loss": 0.2329377382993698, "lr": 7.061195393268061e-07, "epoch": 1.7717434603638296, "percentage": 88.58, "elapsed_time": "1 day, 0:06:27", "remaining_time": "3:06:26"} +{"current_steps": 6673, "total_steps": 7532, "loss": 0.21513575315475464, "lr": 7.04499785599837e-07, "epoch": 1.7720090293453725, "percentage": 88.6, "elapsed_time": "1 day, 0:06:40", "remaining_time": "3:06:13"} +{"current_steps": 6674, "total_steps": 7532, "loss": 0.19022463262081146, "lr": 7.028818239356794e-07, "epoch": 1.7722745983269155, "percentage": 88.61, "elapsed_time": "1 day, 0:06:53", "remaining_time": "3:06:00"} +{"current_steps": 6675, "total_steps": 7532, "loss": 0.2097887396812439, "lr": 7.012656546462571e-07, "epoch": 1.7725401673084584, "percentage": 88.62, "elapsed_time": "1 day, 0:07:06", "remaining_time": "3:05:47"} +{"current_steps": 6676, "total_steps": 7532, "loss": 0.2559792101383209, "lr": 6.996512780431486e-07, "epoch": 1.7728057362900014, "percentage": 88.64, "elapsed_time": "1 day, 0:07:19", "remaining_time": "3:05:34"} +{"current_steps": 6677, "total_steps": 7532, "loss": 0.24624274671077728, "lr": 6.980386944375849e-07, "epoch": 1.7730713052715443, "percentage": 88.65, "elapsed_time": "1 day, 0:07:32", "remaining_time": "3:05:21"} +{"current_steps": 6678, "total_steps": 7532, "loss": 0.22904372215270996, "lr": 6.964279041404553e-07, "epoch": 1.7733368742530873, "percentage": 88.66, "elapsed_time": "1 day, 0:07:46", "remaining_time": "3:05:08"} +{"current_steps": 6679, "total_steps": 7532, "loss": 0.20808623731136322, "lr": 6.948189074623002e-07, "epoch": 1.7736024432346302, "percentage": 88.67, "elapsed_time": "1 day, 0:07:59", "remaining_time": "3:04:55"} +{"current_steps": 6680, "total_steps": 7532, "loss": 0.1931435763835907, "lr": 6.932117047133158e-07, "epoch": 1.7738680122161732, "percentage": 88.69, "elapsed_time": "1 day, 0:08:12", "remaining_time": "3:04:42"} +{"current_steps": 6681, "total_steps": 7532, "loss": 0.22938531637191772, "lr": 6.91606296203351e-07, "epoch": 1.7741335811977161, "percentage": 88.7, "elapsed_time": "1 day, 0:08:26", "remaining_time": "3:04:29"} +{"current_steps": 6682, "total_steps": 7532, "loss": 0.240365132689476, "lr": 6.900026822419103e-07, "epoch": 1.774399150179259, "percentage": 88.71, "elapsed_time": "1 day, 0:08:39", "remaining_time": "3:04:16"} +{"current_steps": 6683, "total_steps": 7532, "loss": 0.26665499806404114, "lr": 6.8840086313815e-07, "epoch": 1.774664719160802, "percentage": 88.73, "elapsed_time": "1 day, 0:08:52", "remaining_time": "3:04:03"} +{"current_steps": 6684, "total_steps": 7532, "loss": 0.19775834679603577, "lr": 6.86800839200884e-07, "epoch": 1.774930288142345, "percentage": 88.74, "elapsed_time": "1 day, 0:09:05", "remaining_time": "3:03:50"} +{"current_steps": 6685, "total_steps": 7532, "loss": 0.20334021747112274, "lr": 6.852026107385756e-07, "epoch": 1.775195857123888, "percentage": 88.75, "elapsed_time": "1 day, 0:09:19", "remaining_time": "3:03:37"} +{"current_steps": 6686, "total_steps": 7532, "loss": 0.20670340955257416, "lr": 6.836061780593484e-07, "epoch": 1.775461426105431, "percentage": 88.77, "elapsed_time": "1 day, 0:09:32", "remaining_time": "3:03:24"} +{"current_steps": 6687, "total_steps": 7532, "loss": 0.2033209353685379, "lr": 6.820115414709727e-07, "epoch": 1.7757269950869738, "percentage": 88.78, "elapsed_time": "1 day, 0:09:45", "remaining_time": "3:03:11"} +{"current_steps": 6688, "total_steps": 7532, "loss": 0.23827815055847168, "lr": 6.804187012808761e-07, "epoch": 1.7759925640685168, "percentage": 88.79, "elapsed_time": "1 day, 0:09:58", "remaining_time": "3:02:58"} +{"current_steps": 6689, "total_steps": 7532, "loss": 0.2054731547832489, "lr": 6.788276577961394e-07, "epoch": 1.7762581330500598, "percentage": 88.81, "elapsed_time": "1 day, 0:10:11", "remaining_time": "3:02:45"} +{"current_steps": 6690, "total_steps": 7532, "loss": 0.25553691387176514, "lr": 6.772384113234987e-07, "epoch": 1.7765237020316027, "percentage": 88.82, "elapsed_time": "1 day, 0:10:25", "remaining_time": "3:02:32"} +{"current_steps": 6691, "total_steps": 7532, "loss": 0.23650874197483063, "lr": 6.756509621693385e-07, "epoch": 1.7767892710131457, "percentage": 88.83, "elapsed_time": "1 day, 0:10:38", "remaining_time": "3:02:19"} +{"current_steps": 6692, "total_steps": 7532, "loss": 0.2353624701499939, "lr": 6.740653106397033e-07, "epoch": 1.7770548399946886, "percentage": 88.85, "elapsed_time": "1 day, 0:10:51", "remaining_time": "3:02:06"} +{"current_steps": 6693, "total_steps": 7532, "loss": 0.26034629344940186, "lr": 6.724814570402871e-07, "epoch": 1.7773204089762316, "percentage": 88.86, "elapsed_time": "1 day, 0:11:04", "remaining_time": "3:01:53"} +{"current_steps": 6694, "total_steps": 7532, "loss": 0.2272130399942398, "lr": 6.70899401676438e-07, "epoch": 1.7775859779577745, "percentage": 88.87, "elapsed_time": "1 day, 0:11:17", "remaining_time": "3:01:40"} +{"current_steps": 6695, "total_steps": 7532, "loss": 0.27940404415130615, "lr": 6.693191448531589e-07, "epoch": 1.7778515469393175, "percentage": 88.89, "elapsed_time": "1 day, 0:11:30", "remaining_time": "3:01:27"} +{"current_steps": 6696, "total_steps": 7532, "loss": 0.22997702658176422, "lr": 6.677406868751013e-07, "epoch": 1.7781171159208604, "percentage": 88.9, "elapsed_time": "1 day, 0:11:43", "remaining_time": "3:01:14"} +{"current_steps": 6697, "total_steps": 7532, "loss": 0.22918452322483063, "lr": 6.661640280465775e-07, "epoch": 1.7783826849024034, "percentage": 88.91, "elapsed_time": "1 day, 0:11:56", "remaining_time": "3:01:01"} +{"current_steps": 6698, "total_steps": 7532, "loss": 0.18456090986728668, "lr": 6.645891686715456e-07, "epoch": 1.7786482538839463, "percentage": 88.93, "elapsed_time": "1 day, 0:12:09", "remaining_time": "3:00:48"} +{"current_steps": 6699, "total_steps": 7532, "loss": 0.23256534337997437, "lr": 6.630161090536214e-07, "epoch": 1.7789138228654893, "percentage": 88.94, "elapsed_time": "1 day, 0:12:22", "remaining_time": "3:00:35"} +{"current_steps": 6700, "total_steps": 7532, "loss": 0.21171879768371582, "lr": 6.614448494960713e-07, "epoch": 1.7791793918470322, "percentage": 88.95, "elapsed_time": "1 day, 0:12:35", "remaining_time": "3:00:22"} +{"current_steps": 6701, "total_steps": 7532, "loss": 0.21382400393486023, "lr": 6.598753903018163e-07, "epoch": 1.7794449608285752, "percentage": 88.97, "elapsed_time": "1 day, 0:12:53", "remaining_time": "3:00:10"} +{"current_steps": 6702, "total_steps": 7532, "loss": 0.22954748570919037, "lr": 6.583077317734299e-07, "epoch": 1.7797105298101181, "percentage": 88.98, "elapsed_time": "1 day, 0:13:06", "remaining_time": "2:59:57"} +{"current_steps": 6703, "total_steps": 7532, "loss": 0.25691086053848267, "lr": 6.56741874213136e-07, "epoch": 1.779976098791661, "percentage": 88.99, "elapsed_time": "1 day, 0:13:19", "remaining_time": "2:59:44"} +{"current_steps": 6704, "total_steps": 7532, "loss": 0.23413901031017303, "lr": 6.551778179228174e-07, "epoch": 1.780241667773204, "percentage": 89.01, "elapsed_time": "1 day, 0:13:32", "remaining_time": "2:59:31"} +{"current_steps": 6705, "total_steps": 7532, "loss": 0.2493733912706375, "lr": 6.536155632040031e-07, "epoch": 1.780507236754747, "percentage": 89.02, "elapsed_time": "1 day, 0:13:45", "remaining_time": "2:59:18"} +{"current_steps": 6706, "total_steps": 7532, "loss": 0.26094138622283936, "lr": 6.520551103578776e-07, "epoch": 1.78077280573629, "percentage": 89.03, "elapsed_time": "1 day, 0:13:58", "remaining_time": "2:59:05"} +{"current_steps": 6707, "total_steps": 7532, "loss": 0.23509518802165985, "lr": 6.504964596852781e-07, "epoch": 1.7810383747178329, "percentage": 89.05, "elapsed_time": "1 day, 0:14:11", "remaining_time": "2:58:52"} +{"current_steps": 6708, "total_steps": 7532, "loss": 0.2471122294664383, "lr": 6.489396114866942e-07, "epoch": 1.7813039436993758, "percentage": 89.06, "elapsed_time": "1 day, 0:14:24", "remaining_time": "2:58:39"} +{"current_steps": 6709, "total_steps": 7532, "loss": 0.2363303005695343, "lr": 6.47384566062268e-07, "epoch": 1.7815695126809188, "percentage": 89.07, "elapsed_time": "1 day, 0:14:37", "remaining_time": "2:58:26"} +{"current_steps": 6710, "total_steps": 7532, "loss": 0.18868233263492584, "lr": 6.458313237117953e-07, "epoch": 1.7818350816624617, "percentage": 89.09, "elapsed_time": "1 day, 0:14:50", "remaining_time": "2:58:13"} +{"current_steps": 6711, "total_steps": 7532, "loss": 0.23380546271800995, "lr": 6.442798847347187e-07, "epoch": 1.7821006506440047, "percentage": 89.1, "elapsed_time": "1 day, 0:15:03", "remaining_time": "2:58:00"} +{"current_steps": 6712, "total_steps": 7532, "loss": 0.24112167954444885, "lr": 6.42730249430139e-07, "epoch": 1.7823662196255476, "percentage": 89.11, "elapsed_time": "1 day, 0:15:16", "remaining_time": "2:57:47"} +{"current_steps": 6713, "total_steps": 7532, "loss": 0.2397521436214447, "lr": 6.411824180968096e-07, "epoch": 1.7826317886070906, "percentage": 89.13, "elapsed_time": "1 day, 0:15:29", "remaining_time": "2:57:34"} +{"current_steps": 6714, "total_steps": 7532, "loss": 0.23775406181812286, "lr": 6.396363910331338e-07, "epoch": 1.7828973575886335, "percentage": 89.14, "elapsed_time": "1 day, 0:15:42", "remaining_time": "2:57:21"} +{"current_steps": 6715, "total_steps": 7532, "loss": 0.23278602957725525, "lr": 6.380921685371655e-07, "epoch": 1.7831629265701765, "percentage": 89.15, "elapsed_time": "1 day, 0:15:56", "remaining_time": "2:57:08"} +{"current_steps": 6716, "total_steps": 7532, "loss": 0.20028996467590332, "lr": 6.365497509066143e-07, "epoch": 1.7834284955517195, "percentage": 89.17, "elapsed_time": "1 day, 0:16:09", "remaining_time": "2:56:55"} +{"current_steps": 6717, "total_steps": 7532, "loss": 0.20862875878810883, "lr": 6.35009138438839e-07, "epoch": 1.7836940645332624, "percentage": 89.18, "elapsed_time": "1 day, 0:16:22", "remaining_time": "2:56:42"} +{"current_steps": 6718, "total_steps": 7532, "loss": 0.23522542417049408, "lr": 6.334703314308521e-07, "epoch": 1.7839596335148054, "percentage": 89.19, "elapsed_time": "1 day, 0:16:35", "remaining_time": "2:56:29"} +{"current_steps": 6719, "total_steps": 7532, "loss": 0.24633824825286865, "lr": 6.319333301793173e-07, "epoch": 1.7842252024963483, "percentage": 89.21, "elapsed_time": "1 day, 0:16:48", "remaining_time": "2:56:16"} +{"current_steps": 6720, "total_steps": 7532, "loss": 0.22141410410404205, "lr": 6.30398134980551e-07, "epoch": 1.7844907714778913, "percentage": 89.22, "elapsed_time": "1 day, 0:17:01", "remaining_time": "2:56:03"} +{"current_steps": 6721, "total_steps": 7532, "loss": 0.23313754796981812, "lr": 6.288647461305186e-07, "epoch": 1.7847563404594342, "percentage": 89.23, "elapsed_time": "1 day, 0:17:14", "remaining_time": "2:55:50"} +{"current_steps": 6722, "total_steps": 7532, "loss": 0.22015389800071716, "lr": 6.273331639248414e-07, "epoch": 1.7850219094409772, "percentage": 89.25, "elapsed_time": "1 day, 0:17:27", "remaining_time": "2:55:37"} +{"current_steps": 6723, "total_steps": 7532, "loss": 0.21154522895812988, "lr": 6.258033886587911e-07, "epoch": 1.7852874784225201, "percentage": 89.26, "elapsed_time": "1 day, 0:17:40", "remaining_time": "2:55:24"} +{"current_steps": 6724, "total_steps": 7532, "loss": 0.2320503294467926, "lr": 6.242754206272883e-07, "epoch": 1.785553047404063, "percentage": 89.27, "elapsed_time": "1 day, 0:17:54", "remaining_time": "2:55:11"} +{"current_steps": 6725, "total_steps": 7532, "loss": 0.21778921782970428, "lr": 6.227492601249097e-07, "epoch": 1.785818616385606, "percentage": 89.29, "elapsed_time": "1 day, 0:18:07", "remaining_time": "2:54:58"} +{"current_steps": 6726, "total_steps": 7532, "loss": 0.2368871569633484, "lr": 6.212249074458776e-07, "epoch": 1.786084185367149, "percentage": 89.3, "elapsed_time": "1 day, 0:18:20", "remaining_time": "2:54:45"} +{"current_steps": 6727, "total_steps": 7532, "loss": 0.27269479632377625, "lr": 6.197023628840704e-07, "epoch": 1.786349754348692, "percentage": 89.31, "elapsed_time": "1 day, 0:18:33", "remaining_time": "2:54:32"} +{"current_steps": 6728, "total_steps": 7532, "loss": 0.2414151132106781, "lr": 6.181816267330177e-07, "epoch": 1.7866153233302349, "percentage": 89.33, "elapsed_time": "1 day, 0:18:46", "remaining_time": "2:54:19"} +{"current_steps": 6729, "total_steps": 7532, "loss": 0.2156972736120224, "lr": 6.166626992858993e-07, "epoch": 1.7868808923117778, "percentage": 89.34, "elapsed_time": "1 day, 0:18:59", "remaining_time": "2:54:06"} +{"current_steps": 6730, "total_steps": 7532, "loss": 0.2510441541671753, "lr": 6.151455808355455e-07, "epoch": 1.7871464612933208, "percentage": 89.35, "elapsed_time": "1 day, 0:19:13", "remaining_time": "2:53:53"} +{"current_steps": 6731, "total_steps": 7532, "loss": 0.20290088653564453, "lr": 6.136302716744402e-07, "epoch": 1.787412030274864, "percentage": 89.37, "elapsed_time": "1 day, 0:19:26", "remaining_time": "2:53:40"} +{"current_steps": 6732, "total_steps": 7532, "loss": 0.25088101625442505, "lr": 6.121167720947174e-07, "epoch": 1.787677599256407, "percentage": 89.38, "elapsed_time": "1 day, 0:19:39", "remaining_time": "2:53:27"} +{"current_steps": 6733, "total_steps": 7532, "loss": 0.2566376328468323, "lr": 6.106050823881604e-07, "epoch": 1.7879431682379499, "percentage": 89.39, "elapsed_time": "1 day, 0:19:52", "remaining_time": "2:53:14"} +{"current_steps": 6734, "total_steps": 7532, "loss": 0.1882714033126831, "lr": 6.09095202846206e-07, "epoch": 1.7882087372194928, "percentage": 89.41, "elapsed_time": "1 day, 0:20:05", "remaining_time": "2:53:01"} +{"current_steps": 6735, "total_steps": 7532, "loss": 0.18705856800079346, "lr": 6.075871337599404e-07, "epoch": 1.7884743062010358, "percentage": 89.42, "elapsed_time": "1 day, 0:20:18", "remaining_time": "2:52:48"} +{"current_steps": 6736, "total_steps": 7532, "loss": 0.24756133556365967, "lr": 6.060808754201031e-07, "epoch": 1.7887398751825787, "percentage": 89.43, "elapsed_time": "1 day, 0:20:31", "remaining_time": "2:52:35"} +{"current_steps": 6737, "total_steps": 7532, "loss": 0.2537599205970764, "lr": 6.045764281170818e-07, "epoch": 1.7890054441641217, "percentage": 89.45, "elapsed_time": "1 day, 0:20:44", "remaining_time": "2:52:22"} +{"current_steps": 6738, "total_steps": 7532, "loss": 0.22049202024936676, "lr": 6.030737921409169e-07, "epoch": 1.7892710131456646, "percentage": 89.46, "elapsed_time": "1 day, 0:20:57", "remaining_time": "2:52:09"} +{"current_steps": 6739, "total_steps": 7532, "loss": 0.20820394158363342, "lr": 6.015729677812965e-07, "epoch": 1.7895365821272076, "percentage": 89.47, "elapsed_time": "1 day, 0:21:11", "remaining_time": "2:51:56"} +{"current_steps": 6740, "total_steps": 7532, "loss": 0.2339879721403122, "lr": 6.00073955327567e-07, "epoch": 1.7898021511087505, "percentage": 89.48, "elapsed_time": "1 day, 0:21:24", "remaining_time": "2:51:43"} +{"current_steps": 6741, "total_steps": 7532, "loss": 0.22082161903381348, "lr": 5.98576755068715e-07, "epoch": 1.7900677200902935, "percentage": 89.5, "elapsed_time": "1 day, 0:21:37", "remaining_time": "2:51:30"} +{"current_steps": 6742, "total_steps": 7532, "loss": 0.21883058547973633, "lr": 5.97081367293385e-07, "epoch": 1.7903332890718364, "percentage": 89.51, "elapsed_time": "1 day, 0:21:50", "remaining_time": "2:51:17"} +{"current_steps": 6743, "total_steps": 7532, "loss": 0.214680016040802, "lr": 5.955877922898712e-07, "epoch": 1.7905988580533794, "percentage": 89.52, "elapsed_time": "1 day, 0:22:03", "remaining_time": "2:51:04"} +{"current_steps": 6744, "total_steps": 7532, "loss": 0.24533744156360626, "lr": 5.940960303461152e-07, "epoch": 1.7908644270349223, "percentage": 89.54, "elapsed_time": "1 day, 0:22:16", "remaining_time": "2:50:51"} +{"current_steps": 6745, "total_steps": 7532, "loss": 0.19857585430145264, "lr": 5.926060817497137e-07, "epoch": 1.7911299960164653, "percentage": 89.55, "elapsed_time": "1 day, 0:22:30", "remaining_time": "2:50:38"} +{"current_steps": 6746, "total_steps": 7532, "loss": 0.27493876218795776, "lr": 5.911179467879081e-07, "epoch": 1.7913955649980082, "percentage": 89.56, "elapsed_time": "1 day, 0:22:42", "remaining_time": "2:50:25"} +{"current_steps": 6747, "total_steps": 7532, "loss": 0.20560544729232788, "lr": 5.896316257475954e-07, "epoch": 1.7916611339795512, "percentage": 89.58, "elapsed_time": "1 day, 0:22:56", "remaining_time": "2:50:12"} +{"current_steps": 6748, "total_steps": 7532, "loss": 0.23559418320655823, "lr": 5.881471189153199e-07, "epoch": 1.7919267029610941, "percentage": 89.59, "elapsed_time": "1 day, 0:23:09", "remaining_time": "2:49:59"} +{"current_steps": 6749, "total_steps": 7532, "loss": 0.23055103421211243, "lr": 5.866644265772769e-07, "epoch": 1.792192271942637, "percentage": 89.6, "elapsed_time": "1 day, 0:23:21", "remaining_time": "2:49:46"} +{"current_steps": 6750, "total_steps": 7532, "loss": 0.2780724763870239, "lr": 5.851835490193136e-07, "epoch": 1.79245784092418, "percentage": 89.62, "elapsed_time": "1 day, 0:23:35", "remaining_time": "2:49:33"} +{"current_steps": 6751, "total_steps": 7532, "loss": 0.20216618478298187, "lr": 5.837044865269248e-07, "epoch": 1.792723409905723, "percentage": 89.63, "elapsed_time": "1 day, 0:23:48", "remaining_time": "2:49:20"} +{"current_steps": 6752, "total_steps": 7532, "loss": 0.2289930284023285, "lr": 5.822272393852557e-07, "epoch": 1.792988978887266, "percentage": 89.64, "elapsed_time": "1 day, 0:24:01", "remaining_time": "2:49:07"} +{"current_steps": 6753, "total_steps": 7532, "loss": 0.2028929740190506, "lr": 5.80751807879103e-07, "epoch": 1.793254547868809, "percentage": 89.66, "elapsed_time": "1 day, 0:24:14", "remaining_time": "2:48:54"} +{"current_steps": 6754, "total_steps": 7532, "loss": 0.1964842826128006, "lr": 5.792781922929114e-07, "epoch": 1.7935201168503518, "percentage": 89.67, "elapsed_time": "1 day, 0:24:28", "remaining_time": "2:48:41"} +{"current_steps": 6755, "total_steps": 7532, "loss": 0.2617039084434509, "lr": 5.77806392910778e-07, "epoch": 1.7937856858318948, "percentage": 89.68, "elapsed_time": "1 day, 0:24:41", "remaining_time": "2:48:28"} +{"current_steps": 6756, "total_steps": 7532, "loss": 0.2582395374774933, "lr": 5.76336410016447e-07, "epoch": 1.7940512548134377, "percentage": 89.7, "elapsed_time": "1 day, 0:24:54", "remaining_time": "2:48:15"} +{"current_steps": 6757, "total_steps": 7532, "loss": 0.23379334807395935, "lr": 5.74868243893314e-07, "epoch": 1.7943168237949807, "percentage": 89.71, "elapsed_time": "1 day, 0:25:07", "remaining_time": "2:48:02"} +{"current_steps": 6758, "total_steps": 7532, "loss": 0.2376977801322937, "lr": 5.734018948244247e-07, "epoch": 1.7945823927765236, "percentage": 89.72, "elapsed_time": "1 day, 0:25:21", "remaining_time": "2:47:49"} +{"current_steps": 6759, "total_steps": 7532, "loss": 0.21816037595272064, "lr": 5.719373630924741e-07, "epoch": 1.7948479617580668, "percentage": 89.74, "elapsed_time": "1 day, 0:25:34", "remaining_time": "2:47:36"} +{"current_steps": 6760, "total_steps": 7532, "loss": 0.22156387567520142, "lr": 5.704746489798063e-07, "epoch": 1.7951135307396098, "percentage": 89.75, "elapsed_time": "1 day, 0:25:47", "remaining_time": "2:47:23"} +{"current_steps": 6761, "total_steps": 7532, "loss": 0.20818129181861877, "lr": 5.690137527684147e-07, "epoch": 1.7953790997211527, "percentage": 89.76, "elapsed_time": "1 day, 0:26:00", "remaining_time": "2:47:10"} +{"current_steps": 6762, "total_steps": 7532, "loss": 0.18672943115234375, "lr": 5.67554674739944e-07, "epoch": 1.7956446687026957, "percentage": 89.78, "elapsed_time": "1 day, 0:26:13", "remaining_time": "2:46:57"} +{"current_steps": 6763, "total_steps": 7532, "loss": 0.2023036777973175, "lr": 5.66097415175686e-07, "epoch": 1.7959102376842386, "percentage": 89.79, "elapsed_time": "1 day, 0:26:27", "remaining_time": "2:46:44"} +{"current_steps": 6764, "total_steps": 7532, "loss": 0.24798424541950226, "lr": 5.646419743565845e-07, "epoch": 1.7961758066657816, "percentage": 89.8, "elapsed_time": "1 day, 0:26:39", "remaining_time": "2:46:31"} +{"current_steps": 6765, "total_steps": 7532, "loss": 0.1885790377855301, "lr": 5.631883525632297e-07, "epoch": 1.7964413756473245, "percentage": 89.82, "elapsed_time": "1 day, 0:26:53", "remaining_time": "2:46:18"} +{"current_steps": 6766, "total_steps": 7532, "loss": 0.24120381474494934, "lr": 5.617365500758631e-07, "epoch": 1.7967069446288675, "percentage": 89.83, "elapsed_time": "1 day, 0:27:06", "remaining_time": "2:46:05"} +{"current_steps": 6767, "total_steps": 7532, "loss": 0.24238690733909607, "lr": 5.602865671743763e-07, "epoch": 1.7969725136104104, "percentage": 89.84, "elapsed_time": "1 day, 0:27:20", "remaining_time": "2:45:52"} +{"current_steps": 6768, "total_steps": 7532, "loss": 0.22928190231323242, "lr": 5.588384041383089e-07, "epoch": 1.7972380825919534, "percentage": 89.86, "elapsed_time": "1 day, 0:27:33", "remaining_time": "2:45:39"} +{"current_steps": 6769, "total_steps": 7532, "loss": 0.2464730143547058, "lr": 5.573920612468486e-07, "epoch": 1.7975036515734963, "percentage": 89.87, "elapsed_time": "1 day, 0:27:46", "remaining_time": "2:45:26"} +{"current_steps": 6770, "total_steps": 7532, "loss": 0.2167670875787735, "lr": 5.559475387788348e-07, "epoch": 1.7977692205550393, "percentage": 89.88, "elapsed_time": "1 day, 0:27:59", "remaining_time": "2:45:13"} +{"current_steps": 6771, "total_steps": 7532, "loss": 0.24080663919448853, "lr": 5.545048370127526e-07, "epoch": 1.7980347895365822, "percentage": 89.9, "elapsed_time": "1 day, 0:28:12", "remaining_time": "2:45:00"} +{"current_steps": 6772, "total_steps": 7532, "loss": 0.25481417775154114, "lr": 5.530639562267382e-07, "epoch": 1.7983003585181252, "percentage": 89.91, "elapsed_time": "1 day, 0:28:25", "remaining_time": "2:44:47"} +{"current_steps": 6773, "total_steps": 7532, "loss": 0.23328909277915955, "lr": 5.51624896698576e-07, "epoch": 1.7985659274996681, "percentage": 89.92, "elapsed_time": "1 day, 0:28:39", "remaining_time": "2:44:34"} +{"current_steps": 6774, "total_steps": 7532, "loss": 0.18779747188091278, "lr": 5.50187658705702e-07, "epoch": 1.798831496481211, "percentage": 89.94, "elapsed_time": "1 day, 0:28:52", "remaining_time": "2:44:21"} +{"current_steps": 6775, "total_steps": 7532, "loss": 0.24840545654296875, "lr": 5.487522425251968e-07, "epoch": 1.799097065462754, "percentage": 89.95, "elapsed_time": "1 day, 0:29:05", "remaining_time": "2:44:08"} +{"current_steps": 6776, "total_steps": 7532, "loss": 0.2559642791748047, "lr": 5.473186484337911e-07, "epoch": 1.799362634444297, "percentage": 89.96, "elapsed_time": "1 day, 0:29:19", "remaining_time": "2:43:55"} +{"current_steps": 6777, "total_steps": 7532, "loss": 0.2005981206893921, "lr": 5.458868767078673e-07, "epoch": 1.79962820342584, "percentage": 89.98, "elapsed_time": "1 day, 0:29:32", "remaining_time": "2:43:42"} +{"current_steps": 6778, "total_steps": 7532, "loss": 0.2480883002281189, "lr": 5.444569276234523e-07, "epoch": 1.799893772407383, "percentage": 89.99, "elapsed_time": "1 day, 0:29:45", "remaining_time": "2:43:29"} +{"current_steps": 6779, "total_steps": 7532, "loss": 0.23043295741081238, "lr": 5.430288014562235e-07, "epoch": 1.8001593413889259, "percentage": 90.0, "elapsed_time": "1 day, 0:29:58", "remaining_time": "2:43:16"} +{"current_steps": 6780, "total_steps": 7532, "loss": 0.22702521085739136, "lr": 5.416024984815072e-07, "epoch": 1.8004249103704688, "percentage": 90.02, "elapsed_time": "1 day, 0:30:12", "remaining_time": "2:43:04"} +{"current_steps": 6781, "total_steps": 7532, "loss": 0.19955751299858093, "lr": 5.401780189742789e-07, "epoch": 1.8006904793520118, "percentage": 90.03, "elapsed_time": "1 day, 0:30:25", "remaining_time": "2:42:51"} +{"current_steps": 6782, "total_steps": 7532, "loss": 0.19743162393569946, "lr": 5.387553632091591e-07, "epoch": 1.8009560483335547, "percentage": 90.04, "elapsed_time": "1 day, 0:30:38", "remaining_time": "2:42:38"} +{"current_steps": 6783, "total_steps": 7532, "loss": 0.2262525111436844, "lr": 5.373345314604206e-07, "epoch": 1.8012216173150977, "percentage": 90.06, "elapsed_time": "1 day, 0:30:52", "remaining_time": "2:42:25"} +{"current_steps": 6784, "total_steps": 7532, "loss": 0.249632328748703, "lr": 5.359155240019809e-07, "epoch": 1.8014871862966406, "percentage": 90.07, "elapsed_time": "1 day, 0:31:05", "remaining_time": "2:42:12"} +{"current_steps": 6785, "total_steps": 7532, "loss": 0.19300231337547302, "lr": 5.344983411074111e-07, "epoch": 1.8017527552781836, "percentage": 90.08, "elapsed_time": "1 day, 0:31:18", "remaining_time": "2:41:59"} +{"current_steps": 6786, "total_steps": 7532, "loss": 0.22256134450435638, "lr": 5.330829830499263e-07, "epoch": 1.8020183242597265, "percentage": 90.1, "elapsed_time": "1 day, 0:31:31", "remaining_time": "2:41:46"} +{"current_steps": 6787, "total_steps": 7532, "loss": 0.2666356563568115, "lr": 5.316694501023911e-07, "epoch": 1.8022838932412695, "percentage": 90.11, "elapsed_time": "1 day, 0:31:44", "remaining_time": "2:41:33"} +{"current_steps": 6788, "total_steps": 7532, "loss": 0.223050057888031, "lr": 5.302577425373156e-07, "epoch": 1.8025494622228124, "percentage": 90.12, "elapsed_time": "1 day, 0:31:57", "remaining_time": "2:41:20"} +{"current_steps": 6789, "total_steps": 7532, "loss": 0.2298094481229782, "lr": 5.288478606268632e-07, "epoch": 1.8028150312043554, "percentage": 90.14, "elapsed_time": "1 day, 0:32:11", "remaining_time": "2:41:07"} +{"current_steps": 6790, "total_steps": 7532, "loss": 0.23596417903900146, "lr": 5.27439804642843e-07, "epoch": 1.8030806001858983, "percentage": 90.15, "elapsed_time": "1 day, 0:32:24", "remaining_time": "2:40:54"} +{"current_steps": 6791, "total_steps": 7532, "loss": 0.19501623511314392, "lr": 5.26033574856708e-07, "epoch": 1.8033461691674413, "percentage": 90.16, "elapsed_time": "1 day, 0:32:38", "remaining_time": "2:40:41"} +{"current_steps": 6792, "total_steps": 7532, "loss": 0.23518472909927368, "lr": 5.246291715395657e-07, "epoch": 1.8036117381489842, "percentage": 90.18, "elapsed_time": "1 day, 0:32:51", "remaining_time": "2:40:28"} +{"current_steps": 6793, "total_steps": 7532, "loss": 0.2251899093389511, "lr": 5.232265949621651e-07, "epoch": 1.8038773071305272, "percentage": 90.19, "elapsed_time": "1 day, 0:33:04", "remaining_time": "2:40:15"} +{"current_steps": 6794, "total_steps": 7532, "loss": 0.1764119267463684, "lr": 5.218258453949099e-07, "epoch": 1.8041428761120701, "percentage": 90.2, "elapsed_time": "1 day, 0:33:17", "remaining_time": "2:40:02"} +{"current_steps": 6795, "total_steps": 7532, "loss": 0.20768773555755615, "lr": 5.204269231078484e-07, "epoch": 1.804408445093613, "percentage": 90.22, "elapsed_time": "1 day, 0:33:31", "remaining_time": "2:39:49"} +{"current_steps": 6796, "total_steps": 7532, "loss": 0.2115546613931656, "lr": 5.19029828370674e-07, "epoch": 1.804674014075156, "percentage": 90.23, "elapsed_time": "1 day, 0:33:44", "remaining_time": "2:39:36"} +{"current_steps": 6797, "total_steps": 7532, "loss": 0.2465972602367401, "lr": 5.176345614527312e-07, "epoch": 1.804939583056699, "percentage": 90.24, "elapsed_time": "1 day, 0:33:57", "remaining_time": "2:39:23"} +{"current_steps": 6798, "total_steps": 7532, "loss": 0.2359803020954132, "lr": 5.162411226230102e-07, "epoch": 1.805205152038242, "percentage": 90.25, "elapsed_time": "1 day, 0:34:10", "remaining_time": "2:39:10"} +{"current_steps": 6799, "total_steps": 7532, "loss": 0.27518990635871887, "lr": 5.148495121501506e-07, "epoch": 1.805470721019785, "percentage": 90.27, "elapsed_time": "1 day, 0:34:23", "remaining_time": "2:38:57"} +{"current_steps": 6800, "total_steps": 7532, "loss": 0.23914849758148193, "lr": 5.134597303024391e-07, "epoch": 1.8057362900013278, "percentage": 90.28, "elapsed_time": "1 day, 0:34:36", "remaining_time": "2:38:44"} +{"current_steps": 6801, "total_steps": 7532, "loss": 0.21771098673343658, "lr": 5.120717773478068e-07, "epoch": 1.8060018589828708, "percentage": 90.29, "elapsed_time": "1 day, 0:34:55", "remaining_time": "2:38:31"} +{"current_steps": 6802, "total_steps": 7532, "loss": 0.235421285033226, "lr": 5.106856535538363e-07, "epoch": 1.8062674279644138, "percentage": 90.31, "elapsed_time": "1 day, 0:35:08", "remaining_time": "2:38:18"} +{"current_steps": 6803, "total_steps": 7532, "loss": 0.23973548412322998, "lr": 5.093013591877561e-07, "epoch": 1.8065329969459567, "percentage": 90.32, "elapsed_time": "1 day, 0:35:21", "remaining_time": "2:38:05"} +{"current_steps": 6804, "total_steps": 7532, "loss": 0.24059349298477173, "lr": 5.079188945164426e-07, "epoch": 1.8067985659274997, "percentage": 90.33, "elapsed_time": "1 day, 0:35:34", "remaining_time": "2:37:52"} +{"current_steps": 6805, "total_steps": 7532, "loss": 0.25188207626342773, "lr": 5.065382598064161e-07, "epoch": 1.8070641349090426, "percentage": 90.35, "elapsed_time": "1 day, 0:35:47", "remaining_time": "2:37:39"} +{"current_steps": 6806, "total_steps": 7532, "loss": 0.20124536752700806, "lr": 5.051594553238482e-07, "epoch": 1.8073297038905856, "percentage": 90.36, "elapsed_time": "1 day, 0:36:00", "remaining_time": "2:37:26"} +{"current_steps": 6807, "total_steps": 7532, "loss": 0.2059330940246582, "lr": 5.037824813345571e-07, "epoch": 1.8075952728721285, "percentage": 90.37, "elapsed_time": "1 day, 0:36:12", "remaining_time": "2:37:13"} +{"current_steps": 6808, "total_steps": 7532, "loss": 0.2122621238231659, "lr": 5.024073381040052e-07, "epoch": 1.8078608418536715, "percentage": 90.39, "elapsed_time": "1 day, 0:36:25", "remaining_time": "2:37:00"} +{"current_steps": 6809, "total_steps": 7532, "loss": 0.20064303278923035, "lr": 5.010340258973046e-07, "epoch": 1.8081264108352144, "percentage": 90.4, "elapsed_time": "1 day, 0:36:38", "remaining_time": "2:36:47"} +{"current_steps": 6810, "total_steps": 7532, "loss": 0.24773281812667847, "lr": 4.996625449792147e-07, "epoch": 1.8083919798167574, "percentage": 90.41, "elapsed_time": "1 day, 0:36:50", "remaining_time": "2:36:34"} +{"current_steps": 6811, "total_steps": 7532, "loss": 0.2111661732196808, "lr": 4.982928956141375e-07, "epoch": 1.8086575487983003, "percentage": 90.43, "elapsed_time": "1 day, 0:37:03", "remaining_time": "2:36:21"} +{"current_steps": 6812, "total_steps": 7532, "loss": 0.24823394417762756, "lr": 4.969250780661306e-07, "epoch": 1.8089231177798433, "percentage": 90.44, "elapsed_time": "1 day, 0:37:15", "remaining_time": "2:36:08"} +{"current_steps": 6813, "total_steps": 7532, "loss": 0.24726605415344238, "lr": 4.955590925988896e-07, "epoch": 1.8091886867613862, "percentage": 90.45, "elapsed_time": "1 day, 0:37:28", "remaining_time": "2:35:55"} +{"current_steps": 6814, "total_steps": 7532, "loss": 0.2269962728023529, "lr": 4.941949394757605e-07, "epoch": 1.8094542557429292, "percentage": 90.47, "elapsed_time": "1 day, 0:37:40", "remaining_time": "2:35:42"} +{"current_steps": 6815, "total_steps": 7532, "loss": 0.2336469292640686, "lr": 4.928326189597377e-07, "epoch": 1.8097198247244721, "percentage": 90.48, "elapsed_time": "1 day, 0:37:53", "remaining_time": "2:35:29"} +{"current_steps": 6816, "total_steps": 7532, "loss": 0.24872124195098877, "lr": 4.914721313134585e-07, "epoch": 1.809985393706015, "percentage": 90.49, "elapsed_time": "1 day, 0:38:06", "remaining_time": "2:35:16"} +{"current_steps": 6817, "total_steps": 7532, "loss": 0.2484157383441925, "lr": 4.901134767992099e-07, "epoch": 1.810250962687558, "percentage": 90.51, "elapsed_time": "1 day, 0:38:19", "remaining_time": "2:35:03"} +{"current_steps": 6818, "total_steps": 7532, "loss": 0.24683158099651337, "lr": 4.887566556789247e-07, "epoch": 1.810516531669101, "percentage": 90.52, "elapsed_time": "1 day, 0:38:31", "remaining_time": "2:34:50"} +{"current_steps": 6819, "total_steps": 7532, "loss": 0.18717995285987854, "lr": 4.874016682141802e-07, "epoch": 1.810782100650644, "percentage": 90.53, "elapsed_time": "1 day, 0:38:44", "remaining_time": "2:34:37"} +{"current_steps": 6820, "total_steps": 7532, "loss": 0.2220807671546936, "lr": 4.860485146662053e-07, "epoch": 1.8110476696321869, "percentage": 90.55, "elapsed_time": "1 day, 0:38:57", "remaining_time": "2:34:24"} +{"current_steps": 6821, "total_steps": 7532, "loss": 0.2178400307893753, "lr": 4.84697195295869e-07, "epoch": 1.8113132386137298, "percentage": 90.56, "elapsed_time": "1 day, 0:39:10", "remaining_time": "2:34:11"} +{"current_steps": 6822, "total_steps": 7532, "loss": 0.2056645154953003, "lr": 4.833477103636908e-07, "epoch": 1.8115788075952728, "percentage": 90.57, "elapsed_time": "1 day, 0:39:23", "remaining_time": "2:33:58"} +{"current_steps": 6823, "total_steps": 7532, "loss": 0.21441905200481415, "lr": 4.820000601298358e-07, "epoch": 1.8118443765768157, "percentage": 90.59, "elapsed_time": "1 day, 0:39:35", "remaining_time": "2:33:44"} +{"current_steps": 6824, "total_steps": 7532, "loss": 0.17688237130641937, "lr": 4.806542448541151e-07, "epoch": 1.8121099455583587, "percentage": 90.6, "elapsed_time": "1 day, 0:39:48", "remaining_time": "2:33:31"} +{"current_steps": 6825, "total_steps": 7532, "loss": 0.22405505180358887, "lr": 4.793102647959847e-07, "epoch": 1.8123755145399016, "percentage": 90.61, "elapsed_time": "1 day, 0:40:02", "remaining_time": "2:33:18"} +{"current_steps": 6826, "total_steps": 7532, "loss": 0.21617908775806427, "lr": 4.779681202145503e-07, "epoch": 1.8126410835214446, "percentage": 90.63, "elapsed_time": "1 day, 0:40:14", "remaining_time": "2:33:05"} +{"current_steps": 6827, "total_steps": 7532, "loss": 0.23570871353149414, "lr": 4.766278113685596e-07, "epoch": 1.8129066525029875, "percentage": 90.64, "elapsed_time": "1 day, 0:40:27", "remaining_time": "2:32:52"} +{"current_steps": 6828, "total_steps": 7532, "loss": 0.23806743323802948, "lr": 4.7528933851641036e-07, "epoch": 1.8131722214845305, "percentage": 90.65, "elapsed_time": "1 day, 0:40:40", "remaining_time": "2:32:39"} +{"current_steps": 6829, "total_steps": 7532, "loss": 0.24859179556369781, "lr": 4.739527019161405e-07, "epoch": 1.8134377904660735, "percentage": 90.67, "elapsed_time": "1 day, 0:40:53", "remaining_time": "2:32:26"} +{"current_steps": 6830, "total_steps": 7532, "loss": 0.21314260363578796, "lr": 4.726179018254418e-07, "epoch": 1.8137033594476164, "percentage": 90.68, "elapsed_time": "1 day, 0:41:06", "remaining_time": "2:32:13"} +{"current_steps": 6831, "total_steps": 7532, "loss": 0.25290659070014954, "lr": 4.7128493850164715e-07, "epoch": 1.8139689284291594, "percentage": 90.69, "elapsed_time": "1 day, 0:41:19", "remaining_time": "2:32:00"} +{"current_steps": 6832, "total_steps": 7532, "loss": 0.22606703639030457, "lr": 4.699538122017355e-07, "epoch": 1.8142344974107023, "percentage": 90.71, "elapsed_time": "1 day, 0:41:32", "remaining_time": "2:31:47"} +{"current_steps": 6833, "total_steps": 7532, "loss": 0.23973071575164795, "lr": 4.6862452318233275e-07, "epoch": 1.8145000663922453, "percentage": 90.72, "elapsed_time": "1 day, 0:41:45", "remaining_time": "2:31:34"} +{"current_steps": 6834, "total_steps": 7532, "loss": 0.2225341498851776, "lr": 4.672970716997094e-07, "epoch": 1.8147656353737882, "percentage": 90.73, "elapsed_time": "1 day, 0:41:57", "remaining_time": "2:31:21"} +{"current_steps": 6835, "total_steps": 7532, "loss": 0.19153356552124023, "lr": 4.6597145800978183e-07, "epoch": 1.8150312043553312, "percentage": 90.75, "elapsed_time": "1 day, 0:42:10", "remaining_time": "2:31:08"} +{"current_steps": 6836, "total_steps": 7532, "loss": 0.19694843888282776, "lr": 4.646476823681145e-07, "epoch": 1.8152967733368741, "percentage": 90.76, "elapsed_time": "1 day, 0:42:23", "remaining_time": "2:30:55"} +{"current_steps": 6837, "total_steps": 7532, "loss": 0.2353869527578354, "lr": 4.6332574502991554e-07, "epoch": 1.815562342318417, "percentage": 90.77, "elapsed_time": "1 day, 0:42:35", "remaining_time": "2:30:42"} +{"current_steps": 6838, "total_steps": 7532, "loss": 0.20919787883758545, "lr": 4.6200564625003775e-07, "epoch": 1.81582791129996, "percentage": 90.79, "elapsed_time": "1 day, 0:42:48", "remaining_time": "2:30:29"} +{"current_steps": 6839, "total_steps": 7532, "loss": 0.18352919816970825, "lr": 4.6068738628298193e-07, "epoch": 1.816093480281503, "percentage": 90.8, "elapsed_time": "1 day, 0:43:00", "remaining_time": "2:30:16"} +{"current_steps": 6840, "total_steps": 7532, "loss": 0.24711212515830994, "lr": 4.5937096538289147e-07, "epoch": 1.816359049263046, "percentage": 90.81, "elapsed_time": "1 day, 0:43:13", "remaining_time": "2:30:03"} +{"current_steps": 6841, "total_steps": 7532, "loss": 0.2350531816482544, "lr": 4.580563838035579e-07, "epoch": 1.8166246182445889, "percentage": 90.83, "elapsed_time": "1 day, 0:43:26", "remaining_time": "2:29:50"} +{"current_steps": 6842, "total_steps": 7532, "loss": 0.26124465465545654, "lr": 4.5674364179841614e-07, "epoch": 1.8168901872261318, "percentage": 90.84, "elapsed_time": "1 day, 0:43:38", "remaining_time": "2:29:37"} +{"current_steps": 6843, "total_steps": 7532, "loss": 0.2110440880060196, "lr": 4.5543273962054934e-07, "epoch": 1.8171557562076748, "percentage": 90.85, "elapsed_time": "1 day, 0:43:52", "remaining_time": "2:29:24"} +{"current_steps": 6844, "total_steps": 7532, "loss": 0.2409415990114212, "lr": 4.5412367752268094e-07, "epoch": 1.817421325189218, "percentage": 90.87, "elapsed_time": "1 day, 0:44:04", "remaining_time": "2:29:11"} +{"current_steps": 6845, "total_steps": 7532, "loss": 0.2280777543783188, "lr": 4.528164557571857e-07, "epoch": 1.817686894170761, "percentage": 90.88, "elapsed_time": "1 day, 0:44:17", "remaining_time": "2:28:58"} +{"current_steps": 6846, "total_steps": 7532, "loss": 0.201339989900589, "lr": 4.515110745760787e-07, "epoch": 1.8179524631523039, "percentage": 90.89, "elapsed_time": "1 day, 0:44:30", "remaining_time": "2:28:45"} +{"current_steps": 6847, "total_steps": 7532, "loss": 0.22910752892494202, "lr": 4.5020753423102083e-07, "epoch": 1.8182180321338468, "percentage": 90.91, "elapsed_time": "1 day, 0:44:43", "remaining_time": "2:28:32"} +{"current_steps": 6848, "total_steps": 7532, "loss": 0.21736779808998108, "lr": 4.4890583497332327e-07, "epoch": 1.8184836011153898, "percentage": 90.92, "elapsed_time": "1 day, 0:44:55", "remaining_time": "2:28:19"} +{"current_steps": 6849, "total_steps": 7532, "loss": 0.20898449420928955, "lr": 4.476059770539354e-07, "epoch": 1.8187491700969327, "percentage": 90.93, "elapsed_time": "1 day, 0:45:08", "remaining_time": "2:28:06"} +{"current_steps": 6850, "total_steps": 7532, "loss": 0.22159051895141602, "lr": 4.463079607234555e-07, "epoch": 1.8190147390784757, "percentage": 90.95, "elapsed_time": "1 day, 0:45:21", "remaining_time": "2:27:53"} +{"current_steps": 6851, "total_steps": 7532, "loss": 0.24081172049045563, "lr": 4.450117862321246e-07, "epoch": 1.8192803080600186, "percentage": 90.96, "elapsed_time": "1 day, 0:45:34", "remaining_time": "2:27:40"} +{"current_steps": 6852, "total_steps": 7532, "loss": 0.17856758832931519, "lr": 4.4371745382983164e-07, "epoch": 1.8195458770415616, "percentage": 90.97, "elapsed_time": "1 day, 0:45:47", "remaining_time": "2:27:27"} +{"current_steps": 6853, "total_steps": 7532, "loss": 0.20796868205070496, "lr": 4.424249637661071e-07, "epoch": 1.8198114460231045, "percentage": 90.99, "elapsed_time": "1 day, 0:45:59", "remaining_time": "2:27:14"} +{"current_steps": 6854, "total_steps": 7532, "loss": 0.24277149140834808, "lr": 4.4113431629013046e-07, "epoch": 1.8200770150046475, "percentage": 91.0, "elapsed_time": "1 day, 0:46:12", "remaining_time": "2:27:01"} +{"current_steps": 6855, "total_steps": 7532, "loss": 0.19315838813781738, "lr": 4.3984551165071944e-07, "epoch": 1.8203425839861904, "percentage": 91.01, "elapsed_time": "1 day, 0:46:25", "remaining_time": "2:26:47"} +{"current_steps": 6856, "total_steps": 7532, "loss": 0.20789340138435364, "lr": 4.3855855009634075e-07, "epoch": 1.8206081529677334, "percentage": 91.02, "elapsed_time": "1 day, 0:46:38", "remaining_time": "2:26:34"} +{"current_steps": 6857, "total_steps": 7532, "loss": 0.2871186137199402, "lr": 4.372734318751082e-07, "epoch": 1.8208737219492763, "percentage": 91.04, "elapsed_time": "1 day, 0:46:50", "remaining_time": "2:26:21"} +{"current_steps": 6858, "total_steps": 7532, "loss": 0.2419736236333847, "lr": 4.359901572347758e-07, "epoch": 1.8211392909308193, "percentage": 91.05, "elapsed_time": "1 day, 0:47:03", "remaining_time": "2:26:08"} +{"current_steps": 6859, "total_steps": 7532, "loss": 0.2190292328596115, "lr": 4.3470872642274455e-07, "epoch": 1.8214048599123622, "percentage": 91.06, "elapsed_time": "1 day, 0:47:16", "remaining_time": "2:25:55"} +{"current_steps": 6860, "total_steps": 7532, "loss": 0.2654367685317993, "lr": 4.3342913968605903e-07, "epoch": 1.8216704288939052, "percentage": 91.08, "elapsed_time": "1 day, 0:47:29", "remaining_time": "2:25:42"} +{"current_steps": 6861, "total_steps": 7532, "loss": 0.2536984086036682, "lr": 4.321513972714075e-07, "epoch": 1.8219359978754481, "percentage": 91.09, "elapsed_time": "1 day, 0:47:42", "remaining_time": "2:25:29"} +{"current_steps": 6862, "total_steps": 7532, "loss": 0.260431170463562, "lr": 4.308754994251252e-07, "epoch": 1.822201566856991, "percentage": 91.1, "elapsed_time": "1 day, 0:47:55", "remaining_time": "2:25:16"} +{"current_steps": 6863, "total_steps": 7532, "loss": 0.19348303973674774, "lr": 4.2960144639318855e-07, "epoch": 1.822467135838534, "percentage": 91.12, "elapsed_time": "1 day, 0:48:08", "remaining_time": "2:25:03"} +{"current_steps": 6864, "total_steps": 7532, "loss": 0.2284386157989502, "lr": 4.283292384212201e-07, "epoch": 1.822732704820077, "percentage": 91.13, "elapsed_time": "1 day, 0:48:22", "remaining_time": "2:24:50"} +{"current_steps": 6865, "total_steps": 7532, "loss": 0.23439526557922363, "lr": 4.270588757544869e-07, "epoch": 1.82299827380162, "percentage": 91.14, "elapsed_time": "1 day, 0:48:35", "remaining_time": "2:24:37"} +{"current_steps": 6866, "total_steps": 7532, "loss": 0.2123441994190216, "lr": 4.2579035863790086e-07, "epoch": 1.823263842783163, "percentage": 91.16, "elapsed_time": "1 day, 0:48:48", "remaining_time": "2:24:24"} +{"current_steps": 6867, "total_steps": 7532, "loss": 0.24568180739879608, "lr": 4.245236873160163e-07, "epoch": 1.8235294117647058, "percentage": 91.17, "elapsed_time": "1 day, 0:49:01", "remaining_time": "2:24:11"} +{"current_steps": 6868, "total_steps": 7532, "loss": 0.24078285694122314, "lr": 4.232588620330325e-07, "epoch": 1.8237949807462488, "percentage": 91.18, "elapsed_time": "1 day, 0:49:14", "remaining_time": "2:23:58"} +{"current_steps": 6869, "total_steps": 7532, "loss": 0.2003621608018875, "lr": 4.2199588303279414e-07, "epoch": 1.8240605497277917, "percentage": 91.2, "elapsed_time": "1 day, 0:49:26", "remaining_time": "2:23:45"} +{"current_steps": 6870, "total_steps": 7532, "loss": 0.21201889216899872, "lr": 4.2073475055878664e-07, "epoch": 1.8243261187093347, "percentage": 91.21, "elapsed_time": "1 day, 0:49:39", "remaining_time": "2:23:32"} +{"current_steps": 6871, "total_steps": 7532, "loss": 0.23175427317619324, "lr": 4.1947546485414215e-07, "epoch": 1.8245916876908777, "percentage": 91.22, "elapsed_time": "1 day, 0:49:52", "remaining_time": "2:23:19"} +{"current_steps": 6872, "total_steps": 7532, "loss": 0.2391383945941925, "lr": 4.182180261616364e-07, "epoch": 1.8248572566724208, "percentage": 91.24, "elapsed_time": "1 day, 0:50:05", "remaining_time": "2:23:06"} +{"current_steps": 6873, "total_steps": 7532, "loss": 0.23120146989822388, "lr": 4.169624347236878e-07, "epoch": 1.8251228256539638, "percentage": 91.25, "elapsed_time": "1 day, 0:50:18", "remaining_time": "2:22:53"} +{"current_steps": 6874, "total_steps": 7532, "loss": 0.22541432082653046, "lr": 4.157086907823604e-07, "epoch": 1.8253883946355067, "percentage": 91.26, "elapsed_time": "1 day, 0:50:30", "remaining_time": "2:22:40"} +{"current_steps": 6875, "total_steps": 7532, "loss": 0.25613510608673096, "lr": 4.1445679457936094e-07, "epoch": 1.8256539636170497, "percentage": 91.28, "elapsed_time": "1 day, 0:50:43", "remaining_time": "2:22:27"} +{"current_steps": 6876, "total_steps": 7532, "loss": 0.21002547442913055, "lr": 4.1320674635604186e-07, "epoch": 1.8259195325985926, "percentage": 91.29, "elapsed_time": "1 day, 0:50:56", "remaining_time": "2:22:14"} +{"current_steps": 6877, "total_steps": 7532, "loss": 0.2593066692352295, "lr": 4.119585463533959e-07, "epoch": 1.8261851015801356, "percentage": 91.3, "elapsed_time": "1 day, 0:51:09", "remaining_time": "2:22:01"} +{"current_steps": 6878, "total_steps": 7532, "loss": 0.23771531879901886, "lr": 4.1071219481206184e-07, "epoch": 1.8264506705616785, "percentage": 91.32, "elapsed_time": "1 day, 0:51:21", "remaining_time": "2:21:48"} +{"current_steps": 6879, "total_steps": 7532, "loss": 0.2069541960954666, "lr": 4.094676919723206e-07, "epoch": 1.8267162395432215, "percentage": 91.33, "elapsed_time": "1 day, 0:51:34", "remaining_time": "2:21:35"} +{"current_steps": 6880, "total_steps": 7532, "loss": 0.21314311027526855, "lr": 4.082250380740993e-07, "epoch": 1.8269818085247644, "percentage": 91.34, "elapsed_time": "1 day, 0:51:46", "remaining_time": "2:21:22"} +{"current_steps": 6881, "total_steps": 7532, "loss": 0.198696106672287, "lr": 4.069842333569662e-07, "epoch": 1.8272473775063074, "percentage": 91.36, "elapsed_time": "1 day, 0:51:59", "remaining_time": "2:21:09"} +{"current_steps": 6882, "total_steps": 7532, "loss": 0.22771228849887848, "lr": 4.057452780601334e-07, "epoch": 1.8275129464878503, "percentage": 91.37, "elapsed_time": "1 day, 0:52:12", "remaining_time": "2:20:56"} +{"current_steps": 6883, "total_steps": 7532, "loss": 0.24176150560379028, "lr": 4.045081724224564e-07, "epoch": 1.8277785154693933, "percentage": 91.38, "elapsed_time": "1 day, 0:52:25", "remaining_time": "2:20:43"} +{"current_steps": 6884, "total_steps": 7532, "loss": 0.18257084488868713, "lr": 4.0327291668243785e-07, "epoch": 1.8280440844509362, "percentage": 91.4, "elapsed_time": "1 day, 0:52:39", "remaining_time": "2:20:30"} +{"current_steps": 6885, "total_steps": 7532, "loss": 0.2317531704902649, "lr": 4.02039511078216e-07, "epoch": 1.8283096534324792, "percentage": 91.41, "elapsed_time": "1 day, 0:52:51", "remaining_time": "2:20:17"} +{"current_steps": 6886, "total_steps": 7532, "loss": 0.22523516416549683, "lr": 4.008079558475797e-07, "epoch": 1.8285752224140222, "percentage": 91.42, "elapsed_time": "1 day, 0:53:05", "remaining_time": "2:20:04"} +{"current_steps": 6887, "total_steps": 7532, "loss": 0.22351330518722534, "lr": 3.995782512279578e-07, "epoch": 1.828840791395565, "percentage": 91.44, "elapsed_time": "1 day, 0:53:17", "remaining_time": "2:19:51"} +{"current_steps": 6888, "total_steps": 7532, "loss": 0.22151902318000793, "lr": 3.983503974564229e-07, "epoch": 1.829106360377108, "percentage": 91.45, "elapsed_time": "1 day, 0:53:31", "remaining_time": "2:19:38"} +{"current_steps": 6889, "total_steps": 7532, "loss": 0.20800583064556122, "lr": 3.971243947696901e-07, "epoch": 1.829371929358651, "percentage": 91.46, "elapsed_time": "1 day, 0:53:43", "remaining_time": "2:19:25"} +{"current_steps": 6890, "total_steps": 7532, "loss": 0.21332690119743347, "lr": 3.959002434041181e-07, "epoch": 1.829637498340194, "percentage": 91.48, "elapsed_time": "1 day, 0:53:56", "remaining_time": "2:19:12"} +{"current_steps": 6891, "total_steps": 7532, "loss": 0.2561502456665039, "lr": 3.946779435957093e-07, "epoch": 1.829903067321737, "percentage": 91.49, "elapsed_time": "1 day, 0:54:09", "remaining_time": "2:18:59"} +{"current_steps": 6892, "total_steps": 7532, "loss": 0.23636910319328308, "lr": 3.934574955801074e-07, "epoch": 1.8301686363032799, "percentage": 91.5, "elapsed_time": "1 day, 0:54:22", "remaining_time": "2:18:46"} +{"current_steps": 6893, "total_steps": 7532, "loss": 0.26683998107910156, "lr": 3.922388995926041e-07, "epoch": 1.8304342052848228, "percentage": 91.52, "elapsed_time": "1 day, 0:54:35", "remaining_time": "2:18:33"} +{"current_steps": 6894, "total_steps": 7532, "loss": 0.2779492735862732, "lr": 3.910221558681271e-07, "epoch": 1.8306997742663658, "percentage": 91.53, "elapsed_time": "1 day, 0:54:48", "remaining_time": "2:18:20"} +{"current_steps": 6895, "total_steps": 7532, "loss": 0.20174488425254822, "lr": 3.8980726464125095e-07, "epoch": 1.8309653432479087, "percentage": 91.54, "elapsed_time": "1 day, 0:55:01", "remaining_time": "2:18:07"} +{"current_steps": 6896, "total_steps": 7532, "loss": 0.21486055850982666, "lr": 3.885942261461928e-07, "epoch": 1.8312309122294517, "percentage": 91.56, "elapsed_time": "1 day, 0:55:14", "remaining_time": "2:17:54"} +{"current_steps": 6897, "total_steps": 7532, "loss": 0.25637733936309814, "lr": 3.8738304061681107e-07, "epoch": 1.8314964812109946, "percentage": 91.57, "elapsed_time": "1 day, 0:55:27", "remaining_time": "2:17:41"} +{"current_steps": 6898, "total_steps": 7532, "loss": 0.2518364489078522, "lr": 3.8617370828661014e-07, "epoch": 1.8317620501925376, "percentage": 91.58, "elapsed_time": "1 day, 0:55:40", "remaining_time": "2:17:28"} +{"current_steps": 6899, "total_steps": 7532, "loss": 0.25752246379852295, "lr": 3.849662293887324e-07, "epoch": 1.8320276191740805, "percentage": 91.6, "elapsed_time": "1 day, 0:55:53", "remaining_time": "2:17:15"} +{"current_steps": 6900, "total_steps": 7532, "loss": 0.20891718566417694, "lr": 3.8376060415596826e-07, "epoch": 1.8322931881556235, "percentage": 91.61, "elapsed_time": "1 day, 0:56:06", "remaining_time": "2:17:02"} +{"current_steps": 6901, "total_steps": 7532, "loss": 0.20491960644721985, "lr": 3.825568328207452e-07, "epoch": 1.8325587571371664, "percentage": 91.62, "elapsed_time": "1 day, 0:56:25", "remaining_time": "2:16:49"} +{"current_steps": 6902, "total_steps": 7532, "loss": 0.22183339297771454, "lr": 3.813549156151386e-07, "epoch": 1.8328243261187094, "percentage": 91.64, "elapsed_time": "1 day, 0:56:37", "remaining_time": "2:16:36"} +{"current_steps": 6903, "total_steps": 7532, "loss": 0.2476987987756729, "lr": 3.801548527708621e-07, "epoch": 1.8330898951002523, "percentage": 91.65, "elapsed_time": "1 day, 0:56:51", "remaining_time": "2:16:23"} +{"current_steps": 6904, "total_steps": 7532, "loss": 0.26486238837242126, "lr": 3.7895664451927493e-07, "epoch": 1.8333554640817953, "percentage": 91.66, "elapsed_time": "1 day, 0:57:03", "remaining_time": "2:16:10"} +{"current_steps": 6905, "total_steps": 7532, "loss": 0.25922873616218567, "lr": 3.777602910913769e-07, "epoch": 1.8336210330633382, "percentage": 91.68, "elapsed_time": "1 day, 0:57:16", "remaining_time": "2:15:57"} +{"current_steps": 6906, "total_steps": 7532, "loss": 0.22682476043701172, "lr": 3.7656579271781127e-07, "epoch": 1.8338866020448812, "percentage": 91.69, "elapsed_time": "1 day, 0:57:29", "remaining_time": "2:15:44"} +{"current_steps": 6907, "total_steps": 7532, "loss": 0.20371592044830322, "lr": 3.753731496288626e-07, "epoch": 1.8341521710264241, "percentage": 91.7, "elapsed_time": "1 day, 0:57:43", "remaining_time": "2:15:31"} +{"current_steps": 6908, "total_steps": 7532, "loss": 0.23857446014881134, "lr": 3.7418236205445826e-07, "epoch": 1.834417740007967, "percentage": 91.72, "elapsed_time": "1 day, 0:57:55", "remaining_time": "2:15:18"} +{"current_steps": 6909, "total_steps": 7532, "loss": 0.27119290828704834, "lr": 3.729934302241689e-07, "epoch": 1.83468330898951, "percentage": 91.73, "elapsed_time": "1 day, 0:58:08", "remaining_time": "2:15:05"} +{"current_steps": 6910, "total_steps": 7532, "loss": 0.2354927361011505, "lr": 3.7180635436720567e-07, "epoch": 1.834948877971053, "percentage": 91.74, "elapsed_time": "1 day, 0:58:21", "remaining_time": "2:14:52"} +{"current_steps": 6911, "total_steps": 7532, "loss": 0.26378512382507324, "lr": 3.706211347124233e-07, "epoch": 1.835214446952596, "percentage": 91.76, "elapsed_time": "1 day, 0:58:34", "remaining_time": "2:14:39"} +{"current_steps": 6912, "total_steps": 7532, "loss": 0.20725026726722717, "lr": 3.6943777148831907e-07, "epoch": 1.835480015934139, "percentage": 91.77, "elapsed_time": "1 day, 0:58:47", "remaining_time": "2:14:26"} +{"current_steps": 6913, "total_steps": 7532, "loss": 0.2049856185913086, "lr": 3.682562649230304e-07, "epoch": 1.8357455849156818, "percentage": 91.78, "elapsed_time": "1 day, 0:59:00", "remaining_time": "2:14:13"} +{"current_steps": 6914, "total_steps": 7532, "loss": 0.19303423166275024, "lr": 3.6707661524433833e-07, "epoch": 1.8360111538972248, "percentage": 91.8, "elapsed_time": "1 day, 0:59:13", "remaining_time": "2:14:00"} +{"current_steps": 6915, "total_steps": 7532, "loss": 0.21510104835033417, "lr": 3.6589882267966445e-07, "epoch": 1.8362767228787678, "percentage": 91.81, "elapsed_time": "1 day, 0:59:26", "remaining_time": "2:13:47"} +{"current_steps": 6916, "total_steps": 7532, "loss": 0.1933138072490692, "lr": 3.6472288745607376e-07, "epoch": 1.8365422918603107, "percentage": 91.82, "elapsed_time": "1 day, 0:59:39", "remaining_time": "2:13:34"} +{"current_steps": 6917, "total_steps": 7532, "loss": 0.2015206664800644, "lr": 3.6354880980027373e-07, "epoch": 1.8368078608418537, "percentage": 91.83, "elapsed_time": "1 day, 0:59:52", "remaining_time": "2:13:21"} +{"current_steps": 6918, "total_steps": 7532, "loss": 0.20550866425037384, "lr": 3.6237658993861114e-07, "epoch": 1.8370734298233966, "percentage": 91.85, "elapsed_time": "1 day, 1:00:05", "remaining_time": "2:13:08"} +{"current_steps": 6919, "total_steps": 7532, "loss": 0.221620112657547, "lr": 3.612062280970763e-07, "epoch": 1.8373389988049396, "percentage": 91.86, "elapsed_time": "1 day, 1:00:18", "remaining_time": "2:12:55"} +{"current_steps": 6920, "total_steps": 7532, "loss": 0.23098941147327423, "lr": 3.6003772450130315e-07, "epoch": 1.8376045677864825, "percentage": 91.87, "elapsed_time": "1 day, 1:00:31", "remaining_time": "2:12:42"} +{"current_steps": 6921, "total_steps": 7532, "loss": 0.2119837999343872, "lr": 3.588710793765626e-07, "epoch": 1.8378701367680255, "percentage": 91.89, "elapsed_time": "1 day, 1:00:44", "remaining_time": "2:12:29"} +{"current_steps": 6922, "total_steps": 7532, "loss": 0.24879229068756104, "lr": 3.5770629294777146e-07, "epoch": 1.8381357057495684, "percentage": 91.9, "elapsed_time": "1 day, 1:00:57", "remaining_time": "2:12:16"} +{"current_steps": 6923, "total_steps": 7532, "loss": 0.18895789980888367, "lr": 3.565433654394879e-07, "epoch": 1.8384012747311114, "percentage": 91.91, "elapsed_time": "1 day, 1:01:10", "remaining_time": "2:12:03"} +{"current_steps": 6924, "total_steps": 7532, "loss": 0.23148275911808014, "lr": 3.55382297075908e-07, "epoch": 1.8386668437126543, "percentage": 91.93, "elapsed_time": "1 day, 1:01:23", "remaining_time": "2:11:50"} +{"current_steps": 6925, "total_steps": 7532, "loss": 0.20919913053512573, "lr": 3.542230880808739e-07, "epoch": 1.8389324126941973, "percentage": 91.94, "elapsed_time": "1 day, 1:01:35", "remaining_time": "2:11:37"} +{"current_steps": 6926, "total_steps": 7532, "loss": 0.22832845151424408, "lr": 3.53065738677868e-07, "epoch": 1.8391979816757402, "percentage": 91.95, "elapsed_time": "1 day, 1:01:48", "remaining_time": "2:11:24"} +{"current_steps": 6927, "total_steps": 7532, "loss": 0.25866004824638367, "lr": 3.519102490900117e-07, "epoch": 1.8394635506572832, "percentage": 91.97, "elapsed_time": "1 day, 1:02:01", "remaining_time": "2:11:11"} +{"current_steps": 6928, "total_steps": 7532, "loss": 0.23372048139572144, "lr": 3.507566195400691e-07, "epoch": 1.8397291196388261, "percentage": 91.98, "elapsed_time": "1 day, 1:02:14", "remaining_time": "2:10:58"} +{"current_steps": 6929, "total_steps": 7532, "loss": 0.2516997158527374, "lr": 3.496048502504501e-07, "epoch": 1.839994688620369, "percentage": 91.99, "elapsed_time": "1 day, 1:02:27", "remaining_time": "2:10:45"} +{"current_steps": 6930, "total_steps": 7532, "loss": 0.21170508861541748, "lr": 3.4845494144320036e-07, "epoch": 1.840260257601912, "percentage": 92.01, "elapsed_time": "1 day, 1:02:41", "remaining_time": "2:10:32"} +{"current_steps": 6931, "total_steps": 7532, "loss": 0.2642953395843506, "lr": 3.473068933400081e-07, "epoch": 1.840525826583455, "percentage": 92.02, "elapsed_time": "1 day, 1:02:53", "remaining_time": "2:10:19"} +{"current_steps": 6932, "total_steps": 7532, "loss": 0.2294994294643402, "lr": 3.461607061622041e-07, "epoch": 1.840791395564998, "percentage": 92.03, "elapsed_time": "1 day, 1:03:06", "remaining_time": "2:10:06"} +{"current_steps": 6933, "total_steps": 7532, "loss": 0.2554621696472168, "lr": 3.450163801307582e-07, "epoch": 1.8410569645465409, "percentage": 92.05, "elapsed_time": "1 day, 1:03:19", "remaining_time": "2:09:53"} +{"current_steps": 6934, "total_steps": 7532, "loss": 0.2291295826435089, "lr": 3.4387391546628733e-07, "epoch": 1.8413225335280838, "percentage": 92.06, "elapsed_time": "1 day, 1:03:32", "remaining_time": "2:09:40"} +{"current_steps": 6935, "total_steps": 7532, "loss": 0.1996842920780182, "lr": 3.4273331238903974e-07, "epoch": 1.8415881025096268, "percentage": 92.07, "elapsed_time": "1 day, 1:03:45", "remaining_time": "2:09:27"} +{"current_steps": 6936, "total_steps": 7532, "loss": 0.248038187623024, "lr": 3.415945711189128e-07, "epoch": 1.8418536714911697, "percentage": 92.09, "elapsed_time": "1 day, 1:03:58", "remaining_time": "2:09:14"} +{"current_steps": 6937, "total_steps": 7532, "loss": 0.232235848903656, "lr": 3.4045769187544096e-07, "epoch": 1.8421192404727127, "percentage": 92.1, "elapsed_time": "1 day, 1:04:11", "remaining_time": "2:09:01"} +{"current_steps": 6938, "total_steps": 7532, "loss": 0.2526085376739502, "lr": 3.3932267487780333e-07, "epoch": 1.8423848094542556, "percentage": 92.11, "elapsed_time": "1 day, 1:04:23", "remaining_time": "2:08:47"} +{"current_steps": 6939, "total_steps": 7532, "loss": 0.22401389479637146, "lr": 3.381895203448182e-07, "epoch": 1.8426503784357986, "percentage": 92.13, "elapsed_time": "1 day, 1:04:36", "remaining_time": "2:08:34"} +{"current_steps": 6940, "total_steps": 7532, "loss": 0.2509264647960663, "lr": 3.3705822849494195e-07, "epoch": 1.8429159474173415, "percentage": 92.14, "elapsed_time": "1 day, 1:04:49", "remaining_time": "2:08:21"} +{"current_steps": 6941, "total_steps": 7532, "loss": 0.2451169192790985, "lr": 3.3592879954627564e-07, "epoch": 1.8431815163988845, "percentage": 92.15, "elapsed_time": "1 day, 1:05:03", "remaining_time": "2:08:08"} +{"current_steps": 6942, "total_steps": 7532, "loss": 0.2361738532781601, "lr": 3.3480123371655957e-07, "epoch": 1.8434470853804275, "percentage": 92.17, "elapsed_time": "1 day, 1:05:15", "remaining_time": "2:07:55"} +{"current_steps": 6943, "total_steps": 7532, "loss": 0.22336295247077942, "lr": 3.3367553122317544e-07, "epoch": 1.8437126543619704, "percentage": 92.18, "elapsed_time": "1 day, 1:05:29", "remaining_time": "2:07:42"} +{"current_steps": 6944, "total_steps": 7532, "loss": 0.22287659347057343, "lr": 3.325516922831451e-07, "epoch": 1.8439782233435134, "percentage": 92.19, "elapsed_time": "1 day, 1:05:42", "remaining_time": "2:07:29"} +{"current_steps": 6945, "total_steps": 7532, "loss": 0.21845945715904236, "lr": 3.3142971711312975e-07, "epoch": 1.8442437923250563, "percentage": 92.21, "elapsed_time": "1 day, 1:05:55", "remaining_time": "2:07:16"} +{"current_steps": 6946, "total_steps": 7532, "loss": 0.2650350332260132, "lr": 3.303096059294364e-07, "epoch": 1.8445093613065993, "percentage": 92.22, "elapsed_time": "1 day, 1:06:08", "remaining_time": "2:07:03"} +{"current_steps": 6947, "total_steps": 7532, "loss": 0.21282124519348145, "lr": 3.291913589480078e-07, "epoch": 1.8447749302881422, "percentage": 92.23, "elapsed_time": "1 day, 1:06:22", "remaining_time": "2:06:50"} +{"current_steps": 6948, "total_steps": 7532, "loss": 0.17899346351623535, "lr": 3.280749763844293e-07, "epoch": 1.8450404992696852, "percentage": 92.25, "elapsed_time": "1 day, 1:06:35", "remaining_time": "2:06:37"} +{"current_steps": 6949, "total_steps": 7532, "loss": 0.23462103307247162, "lr": 3.269604584539254e-07, "epoch": 1.8453060682512281, "percentage": 92.26, "elapsed_time": "1 day, 1:06:48", "remaining_time": "2:06:24"} +{"current_steps": 6950, "total_steps": 7532, "loss": 0.20188388228416443, "lr": 3.2584780537136206e-07, "epoch": 1.845571637232771, "percentage": 92.27, "elapsed_time": "1 day, 1:07:01", "remaining_time": "2:06:11"} +{"current_steps": 6951, "total_steps": 7532, "loss": 0.2760109305381775, "lr": 3.247370173512443e-07, "epoch": 1.845837206214314, "percentage": 92.29, "elapsed_time": "1 day, 1:07:14", "remaining_time": "2:05:58"} +{"current_steps": 6952, "total_steps": 7532, "loss": 0.20977352559566498, "lr": 3.236280946077219e-07, "epoch": 1.846102775195857, "percentage": 92.3, "elapsed_time": "1 day, 1:07:27", "remaining_time": "2:05:45"} +{"current_steps": 6953, "total_steps": 7532, "loss": 0.26468873023986816, "lr": 3.225210373545806e-07, "epoch": 1.8463683441774, "percentage": 92.31, "elapsed_time": "1 day, 1:07:40", "remaining_time": "2:05:32"} +{"current_steps": 6954, "total_steps": 7532, "loss": 0.2362184375524521, "lr": 3.214158458052463e-07, "epoch": 1.8466339131589429, "percentage": 92.33, "elapsed_time": "1 day, 1:07:53", "remaining_time": "2:05:19"} +{"current_steps": 6955, "total_steps": 7532, "loss": 0.21406327188014984, "lr": 3.2031252017278966e-07, "epoch": 1.8468994821404858, "percentage": 92.34, "elapsed_time": "1 day, 1:08:06", "remaining_time": "2:05:06"} +{"current_steps": 6956, "total_steps": 7532, "loss": 0.2698758840560913, "lr": 3.1921106066991835e-07, "epoch": 1.847165051122029, "percentage": 92.35, "elapsed_time": "1 day, 1:08:20", "remaining_time": "2:04:53"} +{"current_steps": 6957, "total_steps": 7532, "loss": 0.22954389452934265, "lr": 3.1811146750898025e-07, "epoch": 1.847430620103572, "percentage": 92.37, "elapsed_time": "1 day, 1:08:33", "remaining_time": "2:04:40"} +{"current_steps": 6958, "total_steps": 7532, "loss": 0.23005755245685577, "lr": 3.170137409019636e-07, "epoch": 1.847696189085115, "percentage": 92.38, "elapsed_time": "1 day, 1:08:46", "remaining_time": "2:04:27"} +{"current_steps": 6959, "total_steps": 7532, "loss": 0.22408893704414368, "lr": 3.159178810604968e-07, "epoch": 1.8479617580666579, "percentage": 92.39, "elapsed_time": "1 day, 1:09:00", "remaining_time": "2:04:15"} +{"current_steps": 6960, "total_steps": 7532, "loss": 0.1983698308467865, "lr": 3.14823888195851e-07, "epoch": 1.8482273270482008, "percentage": 92.41, "elapsed_time": "1 day, 1:09:13", "remaining_time": "2:04:02"} +{"current_steps": 6961, "total_steps": 7532, "loss": 0.24643054604530334, "lr": 3.137317625189329e-07, "epoch": 1.8484928960297438, "percentage": 92.42, "elapsed_time": "1 day, 1:09:26", "remaining_time": "2:03:49"} +{"current_steps": 6962, "total_steps": 7532, "loss": 0.274917870759964, "lr": 3.1264150424029083e-07, "epoch": 1.8487584650112867, "percentage": 92.43, "elapsed_time": "1 day, 1:09:39", "remaining_time": "2:03:36"} +{"current_steps": 6963, "total_steps": 7532, "loss": 0.2129468023777008, "lr": 3.115531135701155e-07, "epoch": 1.8490240339928297, "percentage": 92.45, "elapsed_time": "1 day, 1:09:53", "remaining_time": "2:03:23"} +{"current_steps": 6964, "total_steps": 7532, "loss": 0.24127928912639618, "lr": 3.1046659071823695e-07, "epoch": 1.8492896029743726, "percentage": 92.46, "elapsed_time": "1 day, 1:10:06", "remaining_time": "2:03:10"} +{"current_steps": 6965, "total_steps": 7532, "loss": 0.2528054416179657, "lr": 3.093819358941208e-07, "epoch": 1.8495551719559156, "percentage": 92.47, "elapsed_time": "1 day, 1:10:19", "remaining_time": "2:02:57"} +{"current_steps": 6966, "total_steps": 7532, "loss": 0.23623798787593842, "lr": 3.0829914930687767e-07, "epoch": 1.8498207409374585, "percentage": 92.49, "elapsed_time": "1 day, 1:10:32", "remaining_time": "2:02:44"} +{"current_steps": 6967, "total_steps": 7532, "loss": 0.20241659879684448, "lr": 3.0721823116525497e-07, "epoch": 1.8500863099190015, "percentage": 92.5, "elapsed_time": "1 day, 1:10:46", "remaining_time": "2:02:31"} +{"current_steps": 6968, "total_steps": 7532, "loss": 0.24365916848182678, "lr": 3.0613918167764156e-07, "epoch": 1.8503518789005444, "percentage": 92.51, "elapsed_time": "1 day, 1:10:58", "remaining_time": "2:02:18"} +{"current_steps": 6969, "total_steps": 7532, "loss": 0.2550637722015381, "lr": 3.0506200105206554e-07, "epoch": 1.8506174478820874, "percentage": 92.53, "elapsed_time": "1 day, 1:11:12", "remaining_time": "2:02:05"} +{"current_steps": 6970, "total_steps": 7532, "loss": 0.21531938016414642, "lr": 3.0398668949619515e-07, "epoch": 1.8508830168636303, "percentage": 92.54, "elapsed_time": "1 day, 1:11:25", "remaining_time": "2:01:52"} +{"current_steps": 6971, "total_steps": 7532, "loss": 0.22749900817871094, "lr": 3.029132472173368e-07, "epoch": 1.8511485858451733, "percentage": 92.55, "elapsed_time": "1 day, 1:11:38", "remaining_time": "2:01:39"} +{"current_steps": 6972, "total_steps": 7532, "loss": 0.1826775223016739, "lr": 3.018416744224373e-07, "epoch": 1.8514141548267162, "percentage": 92.57, "elapsed_time": "1 day, 1:11:51", "remaining_time": "2:01:26"} +{"current_steps": 6973, "total_steps": 7532, "loss": 0.21982814371585846, "lr": 3.0077197131808344e-07, "epoch": 1.8516797238082592, "percentage": 92.58, "elapsed_time": "1 day, 1:12:04", "remaining_time": "2:01:13"} +{"current_steps": 6974, "total_steps": 7532, "loss": 0.23515473306179047, "lr": 2.997041381105026e-07, "epoch": 1.8519452927898021, "percentage": 92.59, "elapsed_time": "1 day, 1:12:17", "remaining_time": "2:01:00"} +{"current_steps": 6975, "total_steps": 7532, "loss": 0.19620616734027863, "lr": 2.9863817500556e-07, "epoch": 1.852210861771345, "percentage": 92.6, "elapsed_time": "1 day, 1:12:30", "remaining_time": "2:00:47"} +{"current_steps": 6976, "total_steps": 7532, "loss": 0.22158116102218628, "lr": 2.975740822087603e-07, "epoch": 1.852476430752888, "percentage": 92.62, "elapsed_time": "1 day, 1:12:44", "remaining_time": "2:00:34"} +{"current_steps": 6977, "total_steps": 7532, "loss": 0.23082244396209717, "lr": 2.96511859925247e-07, "epoch": 1.852741999734431, "percentage": 92.63, "elapsed_time": "1 day, 1:12:56", "remaining_time": "2:00:21"} +{"current_steps": 6978, "total_steps": 7532, "loss": 0.22743141651153564, "lr": 2.954515083598064e-07, "epoch": 1.853007568715974, "percentage": 92.64, "elapsed_time": "1 day, 1:13:10", "remaining_time": "2:00:08"} +{"current_steps": 6979, "total_steps": 7532, "loss": 0.2329188883304596, "lr": 2.943930277168594e-07, "epoch": 1.853273137697517, "percentage": 92.66, "elapsed_time": "1 day, 1:13:23", "remaining_time": "1:59:55"} +{"current_steps": 6980, "total_steps": 7532, "loss": 0.20360302925109863, "lr": 2.9333641820047055e-07, "epoch": 1.8535387066790598, "percentage": 92.67, "elapsed_time": "1 day, 1:13:36", "remaining_time": "1:59:42"} +{"current_steps": 6981, "total_steps": 7532, "loss": 0.1903664767742157, "lr": 2.922816800143402e-07, "epoch": 1.8538042756606028, "percentage": 92.68, "elapsed_time": "1 day, 1:13:49", "remaining_time": "1:59:29"} +{"current_steps": 6982, "total_steps": 7532, "loss": 0.2247854322195053, "lr": 2.912288133618102e-07, "epoch": 1.8540698446421457, "percentage": 92.7, "elapsed_time": "1 day, 1:14:03", "remaining_time": "1:59:16"} +{"current_steps": 6983, "total_steps": 7532, "loss": 0.22693192958831787, "lr": 2.9017781844586035e-07, "epoch": 1.8543354136236887, "percentage": 92.71, "elapsed_time": "1 day, 1:14:16", "remaining_time": "1:59:03"} +{"current_steps": 6984, "total_steps": 7532, "loss": 0.23769894242286682, "lr": 2.891286954691108e-07, "epoch": 1.8546009826052319, "percentage": 92.72, "elapsed_time": "1 day, 1:14:29", "remaining_time": "1:58:50"} +{"current_steps": 6985, "total_steps": 7532, "loss": 0.23251450061798096, "lr": 2.880814446338198e-07, "epoch": 1.8548665515867748, "percentage": 92.74, "elapsed_time": "1 day, 1:14:42", "remaining_time": "1:58:37"} +{"current_steps": 6986, "total_steps": 7532, "loss": 0.20828741788864136, "lr": 2.870360661418847e-07, "epoch": 1.8551321205683178, "percentage": 92.75, "elapsed_time": "1 day, 1:14:55", "remaining_time": "1:58:24"} +{"current_steps": 6987, "total_steps": 7532, "loss": 0.2324519008398056, "lr": 2.859925601948421e-07, "epoch": 1.8553976895498607, "percentage": 92.76, "elapsed_time": "1 day, 1:15:08", "remaining_time": "1:58:11"} +{"current_steps": 6988, "total_steps": 7532, "loss": 0.2166297733783722, "lr": 2.8495092699386774e-07, "epoch": 1.8556632585314037, "percentage": 92.78, "elapsed_time": "1 day, 1:15:22", "remaining_time": "1:57:58"} +{"current_steps": 6989, "total_steps": 7532, "loss": 0.2760158181190491, "lr": 2.839111667397765e-07, "epoch": 1.8559288275129466, "percentage": 92.79, "elapsed_time": "1 day, 1:15:34", "remaining_time": "1:57:45"} +{"current_steps": 6990, "total_steps": 7532, "loss": 0.2263752520084381, "lr": 2.8287327963302025e-07, "epoch": 1.8561943964944896, "percentage": 92.8, "elapsed_time": "1 day, 1:15:47", "remaining_time": "1:57:32"} +{"current_steps": 6991, "total_steps": 7532, "loss": 0.2490656077861786, "lr": 2.8183726587369455e-07, "epoch": 1.8564599654760325, "percentage": 92.82, "elapsed_time": "1 day, 1:16:01", "remaining_time": "1:57:19"} +{"current_steps": 6992, "total_steps": 7532, "loss": 0.22495508193969727, "lr": 2.808031256615285e-07, "epoch": 1.8567255344575755, "percentage": 92.83, "elapsed_time": "1 day, 1:16:14", "remaining_time": "1:57:06"} +{"current_steps": 6993, "total_steps": 7532, "loss": 0.2671046853065491, "lr": 2.7977085919589253e-07, "epoch": 1.8569911034391184, "percentage": 92.84, "elapsed_time": "1 day, 1:16:27", "remaining_time": "1:56:53"} +{"current_steps": 6994, "total_steps": 7532, "loss": 0.19782954454421997, "lr": 2.7874046667579535e-07, "epoch": 1.8572566724206614, "percentage": 92.86, "elapsed_time": "1 day, 1:16:40", "remaining_time": "1:56:40"} +{"current_steps": 6995, "total_steps": 7532, "loss": 0.24458879232406616, "lr": 2.777119482998847e-07, "epoch": 1.8575222414022043, "percentage": 92.87, "elapsed_time": "1 day, 1:16:54", "remaining_time": "1:56:27"} +{"current_steps": 6996, "total_steps": 7532, "loss": 0.23476794362068176, "lr": 2.7668530426644637e-07, "epoch": 1.8577878103837473, "percentage": 92.88, "elapsed_time": "1 day, 1:17:07", "remaining_time": "1:56:14"} +{"current_steps": 6997, "total_steps": 7532, "loss": 0.2318287342786789, "lr": 2.7566053477340535e-07, "epoch": 1.8580533793652902, "percentage": 92.9, "elapsed_time": "1 day, 1:17:20", "remaining_time": "1:56:01"} +{"current_steps": 6998, "total_steps": 7532, "loss": 0.21341973543167114, "lr": 2.746376400183259e-07, "epoch": 1.8583189483468332, "percentage": 92.91, "elapsed_time": "1 day, 1:17:33", "remaining_time": "1:55:48"} +{"current_steps": 6999, "total_steps": 7532, "loss": 0.25269803404808044, "lr": 2.7361662019840916e-07, "epoch": 1.8585845173283762, "percentage": 92.92, "elapsed_time": "1 day, 1:17:46", "remaining_time": "1:55:35"} +{"current_steps": 7000, "total_steps": 7532, "loss": 0.24590039253234863, "lr": 2.7259747551049653e-07, "epoch": 1.858850086309919, "percentage": 92.94, "elapsed_time": "1 day, 1:17:59", "remaining_time": "1:55:22"} +{"current_steps": 7001, "total_steps": 7532, "loss": 0.19907096028327942, "lr": 2.715802061510664e-07, "epoch": 1.859115655291462, "percentage": 92.95, "elapsed_time": "1 day, 1:18:19", "remaining_time": "1:55:09"} +{"current_steps": 7002, "total_steps": 7532, "loss": 0.24304917454719543, "lr": 2.705648123162363e-07, "epoch": 1.859381224273005, "percentage": 92.96, "elapsed_time": "1 day, 1:18:31", "remaining_time": "1:54:56"} +{"current_steps": 7003, "total_steps": 7532, "loss": 0.24846915900707245, "lr": 2.6955129420176193e-07, "epoch": 1.859646793254548, "percentage": 92.98, "elapsed_time": "1 day, 1:18:44", "remaining_time": "1:54:43"} +{"current_steps": 7004, "total_steps": 7532, "loss": 0.21709200739860535, "lr": 2.685396520030381e-07, "epoch": 1.859912362236091, "percentage": 92.99, "elapsed_time": "1 day, 1:18:58", "remaining_time": "1:54:30"} +{"current_steps": 7005, "total_steps": 7532, "loss": 0.28031325340270996, "lr": 2.675298859150977e-07, "epoch": 1.8601779312176339, "percentage": 93.0, "elapsed_time": "1 day, 1:19:10", "remaining_time": "1:54:17"} +{"current_steps": 7006, "total_steps": 7532, "loss": 0.20367707312107086, "lr": 2.6652199613261155e-07, "epoch": 1.8604435001991768, "percentage": 93.02, "elapsed_time": "1 day, 1:19:23", "remaining_time": "1:54:04"} +{"current_steps": 7007, "total_steps": 7532, "loss": 0.20737403631210327, "lr": 2.6551598284988877e-07, "epoch": 1.8607090691807198, "percentage": 93.03, "elapsed_time": "1 day, 1:19:36", "remaining_time": "1:53:51"} +{"current_steps": 7008, "total_steps": 7532, "loss": 0.2504046559333801, "lr": 2.6451184626087646e-07, "epoch": 1.8609746381622627, "percentage": 93.04, "elapsed_time": "1 day, 1:19:49", "remaining_time": "1:53:38"} +{"current_steps": 7009, "total_steps": 7532, "loss": 0.26347339153289795, "lr": 2.635095865591608e-07, "epoch": 1.8612402071438057, "percentage": 93.06, "elapsed_time": "1 day, 1:20:01", "remaining_time": "1:53:25"} +{"current_steps": 7010, "total_steps": 7532, "loss": 0.2347220480442047, "lr": 2.625092039379662e-07, "epoch": 1.8615057761253486, "percentage": 93.07, "elapsed_time": "1 day, 1:20:14", "remaining_time": "1:53:12"} +{"current_steps": 7011, "total_steps": 7532, "loss": 0.23565630614757538, "lr": 2.6151069859015386e-07, "epoch": 1.8617713451068916, "percentage": 93.08, "elapsed_time": "1 day, 1:20:27", "remaining_time": "1:52:59"} +{"current_steps": 7012, "total_steps": 7532, "loss": 0.21462437510490417, "lr": 2.605140707082243e-07, "epoch": 1.8620369140884345, "percentage": 93.1, "elapsed_time": "1 day, 1:20:40", "remaining_time": "1:52:46"} +{"current_steps": 7013, "total_steps": 7532, "loss": 0.24224728345870972, "lr": 2.595193204843149e-07, "epoch": 1.8623024830699775, "percentage": 93.11, "elapsed_time": "1 day, 1:20:53", "remaining_time": "1:52:33"} +{"current_steps": 7014, "total_steps": 7532, "loss": 0.24200880527496338, "lr": 2.5852644811020344e-07, "epoch": 1.8625680520515204, "percentage": 93.12, "elapsed_time": "1 day, 1:21:06", "remaining_time": "1:52:20"} +{"current_steps": 7015, "total_steps": 7532, "loss": 0.23315191268920898, "lr": 2.5753545377730227e-07, "epoch": 1.8628336210330634, "percentage": 93.14, "elapsed_time": "1 day, 1:21:19", "remaining_time": "1:52:07"} +{"current_steps": 7016, "total_steps": 7532, "loss": 0.31112274527549744, "lr": 2.56546337676663e-07, "epoch": 1.8630991900146063, "percentage": 93.15, "elapsed_time": "1 day, 1:21:31", "remaining_time": "1:51:54"} +{"current_steps": 7017, "total_steps": 7532, "loss": 0.2291945070028305, "lr": 2.555590999989754e-07, "epoch": 1.8633647589961493, "percentage": 93.16, "elapsed_time": "1 day, 1:21:44", "remaining_time": "1:51:41"} +{"current_steps": 7018, "total_steps": 7532, "loss": 0.2324746549129486, "lr": 2.5457374093457057e-07, "epoch": 1.8636303279776922, "percentage": 93.18, "elapsed_time": "1 day, 1:21:57", "remaining_time": "1:51:28"} +{"current_steps": 7019, "total_steps": 7532, "loss": 0.2585206627845764, "lr": 2.5359026067341086e-07, "epoch": 1.8638958969592352, "percentage": 93.19, "elapsed_time": "1 day, 1:22:10", "remaining_time": "1:51:15"} +{"current_steps": 7020, "total_steps": 7532, "loss": 0.22986871004104614, "lr": 2.5260865940510027e-07, "epoch": 1.8641614659407781, "percentage": 93.2, "elapsed_time": "1 day, 1:22:23", "remaining_time": "1:51:02"} +{"current_steps": 7021, "total_steps": 7532, "loss": 0.22615428268909454, "lr": 2.5162893731888074e-07, "epoch": 1.864427034922321, "percentage": 93.22, "elapsed_time": "1 day, 1:22:36", "remaining_time": "1:50:49"} +{"current_steps": 7022, "total_steps": 7532, "loss": 0.21324753761291504, "lr": 2.5065109460363113e-07, "epoch": 1.864692603903864, "percentage": 93.23, "elapsed_time": "1 day, 1:22:49", "remaining_time": "1:50:36"} +{"current_steps": 7023, "total_steps": 7532, "loss": 0.2247733324766159, "lr": 2.4967513144786736e-07, "epoch": 1.864958172885407, "percentage": 93.24, "elapsed_time": "1 day, 1:23:02", "remaining_time": "1:50:23"} +{"current_steps": 7024, "total_steps": 7532, "loss": 0.22080597281455994, "lr": 2.4870104803974336e-07, "epoch": 1.86522374186695, "percentage": 93.26, "elapsed_time": "1 day, 1:23:15", "remaining_time": "1:50:10"} +{"current_steps": 7025, "total_steps": 7532, "loss": 0.23669888079166412, "lr": 2.4772884456705224e-07, "epoch": 1.865489310848493, "percentage": 93.27, "elapsed_time": "1 day, 1:23:28", "remaining_time": "1:49:57"} +{"current_steps": 7026, "total_steps": 7532, "loss": 0.2320847064256668, "lr": 2.4675852121722075e-07, "epoch": 1.8657548798300359, "percentage": 93.28, "elapsed_time": "1 day, 1:23:41", "remaining_time": "1:49:43"} +{"current_steps": 7027, "total_steps": 7532, "loss": 0.2595662474632263, "lr": 2.4579007817731925e-07, "epoch": 1.8660204488115788, "percentage": 93.3, "elapsed_time": "1 day, 1:23:53", "remaining_time": "1:49:30"} +{"current_steps": 7028, "total_steps": 7532, "loss": 0.22152045369148254, "lr": 2.4482351563405174e-07, "epoch": 1.8662860177931218, "percentage": 93.31, "elapsed_time": "1 day, 1:24:06", "remaining_time": "1:49:17"} +{"current_steps": 7029, "total_steps": 7532, "loss": 0.2391948401927948, "lr": 2.4385883377375683e-07, "epoch": 1.8665515867746647, "percentage": 93.32, "elapsed_time": "1 day, 1:24:19", "remaining_time": "1:49:04"} +{"current_steps": 7030, "total_steps": 7532, "loss": 0.23117749392986298, "lr": 2.428960327824159e-07, "epoch": 1.8668171557562077, "percentage": 93.34, "elapsed_time": "1 day, 1:24:32", "remaining_time": "1:48:51"} +{"current_steps": 7031, "total_steps": 7532, "loss": 0.24019500613212585, "lr": 2.41935112845646e-07, "epoch": 1.8670827247377506, "percentage": 93.35, "elapsed_time": "1 day, 1:24:45", "remaining_time": "1:48:38"} +{"current_steps": 7032, "total_steps": 7532, "loss": 0.19560202956199646, "lr": 2.4097607414869995e-07, "epoch": 1.8673482937192936, "percentage": 93.36, "elapsed_time": "1 day, 1:24:58", "remaining_time": "1:48:25"} +{"current_steps": 7033, "total_steps": 7532, "loss": 0.23110055923461914, "lr": 2.4001891687647103e-07, "epoch": 1.8676138627008365, "percentage": 93.37, "elapsed_time": "1 day, 1:25:10", "remaining_time": "1:48:12"} +{"current_steps": 7034, "total_steps": 7532, "loss": 0.2214709371328354, "lr": 2.39063641213485e-07, "epoch": 1.8678794316823795, "percentage": 93.39, "elapsed_time": "1 day, 1:25:24", "remaining_time": "1:47:59"} +{"current_steps": 7035, "total_steps": 7532, "loss": 0.22123369574546814, "lr": 2.381102473439101e-07, "epoch": 1.8681450006639224, "percentage": 93.4, "elapsed_time": "1 day, 1:25:36", "remaining_time": "1:47:46"} +{"current_steps": 7036, "total_steps": 7532, "loss": 0.23984813690185547, "lr": 2.371587354515481e-07, "epoch": 1.8684105696454654, "percentage": 93.41, "elapsed_time": "1 day, 1:25:49", "remaining_time": "1:47:33"} +{"current_steps": 7037, "total_steps": 7532, "loss": 0.26089030504226685, "lr": 2.3620910571984124e-07, "epoch": 1.8686761386270083, "percentage": 93.43, "elapsed_time": "1 day, 1:26:02", "remaining_time": "1:47:20"} +{"current_steps": 7038, "total_steps": 7532, "loss": 0.2344229370355606, "lr": 2.3526135833186527e-07, "epoch": 1.8689417076085513, "percentage": 93.44, "elapsed_time": "1 day, 1:26:15", "remaining_time": "1:47:07"} +{"current_steps": 7039, "total_steps": 7532, "loss": 0.24499498307704926, "lr": 2.34315493470334e-07, "epoch": 1.8692072765900942, "percentage": 93.45, "elapsed_time": "1 day, 1:26:28", "remaining_time": "1:46:54"} +{"current_steps": 7040, "total_steps": 7532, "loss": 0.21971477568149567, "lr": 2.333715113176005e-07, "epoch": 1.8694728455716372, "percentage": 93.47, "elapsed_time": "1 day, 1:26:41", "remaining_time": "1:46:41"} +{"current_steps": 7041, "total_steps": 7532, "loss": 0.2594453990459442, "lr": 2.3242941205565362e-07, "epoch": 1.8697384145531801, "percentage": 93.48, "elapsed_time": "1 day, 1:26:54", "remaining_time": "1:46:28"} +{"current_steps": 7042, "total_steps": 7532, "loss": 0.24689960479736328, "lr": 2.3148919586611806e-07, "epoch": 1.870003983534723, "percentage": 93.49, "elapsed_time": "1 day, 1:27:07", "remaining_time": "1:46:15"} +{"current_steps": 7043, "total_steps": 7532, "loss": 0.19972509145736694, "lr": 2.3055086293025665e-07, "epoch": 1.870269552516266, "percentage": 93.51, "elapsed_time": "1 day, 1:27:20", "remaining_time": "1:46:02"} +{"current_steps": 7044, "total_steps": 7532, "loss": 0.2139236032962799, "lr": 2.2961441342896795e-07, "epoch": 1.870535121497809, "percentage": 93.52, "elapsed_time": "1 day, 1:27:33", "remaining_time": "1:45:49"} +{"current_steps": 7045, "total_steps": 7532, "loss": 0.2251984179019928, "lr": 2.286798475427898e-07, "epoch": 1.870800690479352, "percentage": 93.53, "elapsed_time": "1 day, 1:27:46", "remaining_time": "1:45:36"} +{"current_steps": 7046, "total_steps": 7532, "loss": 0.24517378211021423, "lr": 2.277471654518959e-07, "epoch": 1.8710662594608949, "percentage": 93.55, "elapsed_time": "1 day, 1:27:59", "remaining_time": "1:45:23"} +{"current_steps": 7047, "total_steps": 7532, "loss": 0.19115275144577026, "lr": 2.2681636733609457e-07, "epoch": 1.8713318284424378, "percentage": 93.56, "elapsed_time": "1 day, 1:28:11", "remaining_time": "1:45:10"} +{"current_steps": 7048, "total_steps": 7532, "loss": 0.26092633605003357, "lr": 2.2588745337483454e-07, "epoch": 1.8715973974239808, "percentage": 93.57, "elapsed_time": "1 day, 1:28:24", "remaining_time": "1:44:57"} +{"current_steps": 7049, "total_steps": 7532, "loss": 0.18862302601337433, "lr": 2.2496042374719807e-07, "epoch": 1.8718629664055237, "percentage": 93.59, "elapsed_time": "1 day, 1:28:37", "remaining_time": "1:44:44"} +{"current_steps": 7050, "total_steps": 7532, "loss": 0.20728996396064758, "lr": 2.2403527863190554e-07, "epoch": 1.8721285353870667, "percentage": 93.6, "elapsed_time": "1 day, 1:28:50", "remaining_time": "1:44:31"} +{"current_steps": 7051, "total_steps": 7532, "loss": 0.24244122207164764, "lr": 2.231120182073143e-07, "epoch": 1.8723941043686096, "percentage": 93.61, "elapsed_time": "1 day, 1:29:03", "remaining_time": "1:44:18"} +{"current_steps": 7052, "total_steps": 7532, "loss": 0.18956953287124634, "lr": 2.2219064265141866e-07, "epoch": 1.8726596733501526, "percentage": 93.63, "elapsed_time": "1 day, 1:29:16", "remaining_time": "1:44:05"} +{"current_steps": 7053, "total_steps": 7532, "loss": 0.19873176515102386, "lr": 2.2127115214184868e-07, "epoch": 1.8729252423316956, "percentage": 93.64, "elapsed_time": "1 day, 1:29:29", "remaining_time": "1:43:52"} +{"current_steps": 7054, "total_steps": 7532, "loss": 0.23717360198497772, "lr": 2.203535468558704e-07, "epoch": 1.8731908113132385, "percentage": 93.65, "elapsed_time": "1 day, 1:29:42", "remaining_time": "1:43:39"} +{"current_steps": 7055, "total_steps": 7532, "loss": 0.24051904678344727, "lr": 2.1943782697038896e-07, "epoch": 1.8734563802947815, "percentage": 93.67, "elapsed_time": "1 day, 1:29:55", "remaining_time": "1:43:26"} +{"current_steps": 7056, "total_steps": 7532, "loss": 0.23541691899299622, "lr": 2.1852399266194312e-07, "epoch": 1.8737219492763244, "percentage": 93.68, "elapsed_time": "1 day, 1:30:08", "remaining_time": "1:43:13"} +{"current_steps": 7057, "total_steps": 7532, "loss": 0.22566163539886475, "lr": 2.1761204410671088e-07, "epoch": 1.8739875182578674, "percentage": 93.69, "elapsed_time": "1 day, 1:30:21", "remaining_time": "1:43:00"} +{"current_steps": 7058, "total_steps": 7532, "loss": 0.25771743059158325, "lr": 2.167019814805027e-07, "epoch": 1.8742530872394103, "percentage": 93.71, "elapsed_time": "1 day, 1:30:34", "remaining_time": "1:42:47"} +{"current_steps": 7059, "total_steps": 7532, "loss": 0.22624637186527252, "lr": 2.1579380495876934e-07, "epoch": 1.8745186562209533, "percentage": 93.72, "elapsed_time": "1 day, 1:30:47", "remaining_time": "1:42:34"} +{"current_steps": 7060, "total_steps": 7532, "loss": 0.24671627581119537, "lr": 2.148875147165963e-07, "epoch": 1.8747842252024962, "percentage": 93.73, "elapsed_time": "1 day, 1:31:00", "remaining_time": "1:42:21"} +{"current_steps": 7061, "total_steps": 7532, "loss": 0.21607278287410736, "lr": 2.1398311092870605e-07, "epoch": 1.8750497941840392, "percentage": 93.75, "elapsed_time": "1 day, 1:31:13", "remaining_time": "1:42:08"} +{"current_steps": 7062, "total_steps": 7532, "loss": 0.1960655301809311, "lr": 2.1308059376945689e-07, "epoch": 1.8753153631655821, "percentage": 93.76, "elapsed_time": "1 day, 1:31:26", "remaining_time": "1:41:55"} +{"current_steps": 7063, "total_steps": 7532, "loss": 0.22005721926689148, "lr": 2.1217996341284297e-07, "epoch": 1.875580932147125, "percentage": 93.77, "elapsed_time": "1 day, 1:31:39", "remaining_time": "1:41:42"} +{"current_steps": 7064, "total_steps": 7532, "loss": 0.21442776918411255, "lr": 2.1128122003249541e-07, "epoch": 1.875846501128668, "percentage": 93.79, "elapsed_time": "1 day, 1:31:52", "remaining_time": "1:41:29"} +{"current_steps": 7065, "total_steps": 7532, "loss": 0.23126785457134247, "lr": 2.1038436380168114e-07, "epoch": 1.876112070110211, "percentage": 93.8, "elapsed_time": "1 day, 1:32:05", "remaining_time": "1:41:16"} +{"current_steps": 7066, "total_steps": 7532, "loss": 0.24286629259586334, "lr": 2.094893948933041e-07, "epoch": 1.876377639091754, "percentage": 93.81, "elapsed_time": "1 day, 1:32:18", "remaining_time": "1:41:03"} +{"current_steps": 7067, "total_steps": 7532, "loss": 0.25771957635879517, "lr": 2.0859631347990406e-07, "epoch": 1.8766432080732969, "percentage": 93.83, "elapsed_time": "1 day, 1:32:30", "remaining_time": "1:40:50"} +{"current_steps": 7068, "total_steps": 7532, "loss": 0.19837790727615356, "lr": 2.0770511973365436e-07, "epoch": 1.87690877705484, "percentage": 93.84, "elapsed_time": "1 day, 1:32:44", "remaining_time": "1:40:37"} +{"current_steps": 7069, "total_steps": 7532, "loss": 0.21209359169006348, "lr": 2.0681581382636984e-07, "epoch": 1.877174346036383, "percentage": 93.85, "elapsed_time": "1 day, 1:32:56", "remaining_time": "1:40:24"} +{"current_steps": 7070, "total_steps": 7532, "loss": 0.26641422510147095, "lr": 2.0592839592949554e-07, "epoch": 1.877439915017926, "percentage": 93.87, "elapsed_time": "1 day, 1:33:10", "remaining_time": "1:40:11"} +{"current_steps": 7071, "total_steps": 7532, "loss": 0.21609601378440857, "lr": 2.050428662141146e-07, "epoch": 1.877705483999469, "percentage": 93.88, "elapsed_time": "1 day, 1:33:23", "remaining_time": "1:39:58"} +{"current_steps": 7072, "total_steps": 7532, "loss": 0.23642000555992126, "lr": 2.0415922485095051e-07, "epoch": 1.8779710529810119, "percentage": 93.89, "elapsed_time": "1 day, 1:33:35", "remaining_time": "1:39:45"} +{"current_steps": 7073, "total_steps": 7532, "loss": 0.24564675986766815, "lr": 2.0327747201035587e-07, "epoch": 1.8782366219625548, "percentage": 93.91, "elapsed_time": "1 day, 1:33:49", "remaining_time": "1:39:32"} +{"current_steps": 7074, "total_steps": 7532, "loss": 0.20001479983329773, "lr": 2.0239760786232355e-07, "epoch": 1.8785021909440978, "percentage": 93.92, "elapsed_time": "1 day, 1:34:02", "remaining_time": "1:39:19"} +{"current_steps": 7075, "total_steps": 7532, "loss": 0.2590208649635315, "lr": 2.015196325764801e-07, "epoch": 1.8787677599256407, "percentage": 93.93, "elapsed_time": "1 day, 1:34:15", "remaining_time": "1:39:06"} +{"current_steps": 7076, "total_steps": 7532, "loss": 0.23298504948616028, "lr": 2.0064354632208904e-07, "epoch": 1.8790333289071837, "percentage": 93.95, "elapsed_time": "1 day, 1:34:28", "remaining_time": "1:38:53"} +{"current_steps": 7077, "total_steps": 7532, "loss": 0.22409996390342712, "lr": 1.997693492680497e-07, "epoch": 1.8792988978887266, "percentage": 93.96, "elapsed_time": "1 day, 1:34:41", "remaining_time": "1:38:40"} +{"current_steps": 7078, "total_steps": 7532, "loss": 0.27316784858703613, "lr": 1.9889704158289724e-07, "epoch": 1.8795644668702696, "percentage": 93.97, "elapsed_time": "1 day, 1:34:54", "remaining_time": "1:38:27"} +{"current_steps": 7079, "total_steps": 7532, "loss": 0.2271946519613266, "lr": 1.980266234348016e-07, "epoch": 1.8798300358518125, "percentage": 93.99, "elapsed_time": "1 day, 1:35:07", "remaining_time": "1:38:14"} +{"current_steps": 7080, "total_steps": 7532, "loss": 0.20887964963912964, "lr": 1.9715809499156858e-07, "epoch": 1.8800956048333555, "percentage": 94.0, "elapsed_time": "1 day, 1:35:20", "remaining_time": "1:38:01"} +{"current_steps": 7081, "total_steps": 7532, "loss": 0.23468685150146484, "lr": 1.9629145642064197e-07, "epoch": 1.8803611738148984, "percentage": 94.01, "elapsed_time": "1 day, 1:35:34", "remaining_time": "1:37:48"} +{"current_steps": 7082, "total_steps": 7532, "loss": 0.21624556183815002, "lr": 1.9542670788909813e-07, "epoch": 1.8806267427964414, "percentage": 94.03, "elapsed_time": "1 day, 1:35:47", "remaining_time": "1:37:35"} +{"current_steps": 7083, "total_steps": 7532, "loss": 0.22328166663646698, "lr": 1.9456384956365149e-07, "epoch": 1.8808923117779843, "percentage": 94.04, "elapsed_time": "1 day, 1:36:00", "remaining_time": "1:37:22"} +{"current_steps": 7084, "total_steps": 7532, "loss": 0.2526431381702423, "lr": 1.93702881610649e-07, "epoch": 1.8811578807595273, "percentage": 94.05, "elapsed_time": "1 day, 1:36:13", "remaining_time": "1:37:09"} +{"current_steps": 7085, "total_steps": 7532, "loss": 0.23668771982192993, "lr": 1.9284380419607784e-07, "epoch": 1.8814234497410702, "percentage": 94.07, "elapsed_time": "1 day, 1:36:26", "remaining_time": "1:36:56"} +{"current_steps": 7086, "total_steps": 7532, "loss": 0.24710845947265625, "lr": 1.9198661748555557e-07, "epoch": 1.8816890187226132, "percentage": 94.08, "elapsed_time": "1 day, 1:36:39", "remaining_time": "1:36:43"} +{"current_steps": 7087, "total_steps": 7532, "loss": 0.22696900367736816, "lr": 1.911313216443389e-07, "epoch": 1.8819545877041561, "percentage": 94.09, "elapsed_time": "1 day, 1:36:52", "remaining_time": "1:36:30"} +{"current_steps": 7088, "total_steps": 7532, "loss": 0.21652163565158844, "lr": 1.9027791683731922e-07, "epoch": 1.882220156685699, "percentage": 94.11, "elapsed_time": "1 day, 1:37:05", "remaining_time": "1:36:17"} +{"current_steps": 7089, "total_steps": 7532, "loss": 0.2166716307401657, "lr": 1.894264032290205e-07, "epoch": 1.882485725667242, "percentage": 94.12, "elapsed_time": "1 day, 1:37:18", "remaining_time": "1:36:04"} +{"current_steps": 7090, "total_steps": 7532, "loss": 0.26200050115585327, "lr": 1.8857678098360698e-07, "epoch": 1.882751294648785, "percentage": 94.13, "elapsed_time": "1 day, 1:37:31", "remaining_time": "1:35:51"} +{"current_steps": 7091, "total_steps": 7532, "loss": 0.2292764037847519, "lr": 1.8772905026487654e-07, "epoch": 1.883016863630328, "percentage": 94.14, "elapsed_time": "1 day, 1:37:44", "remaining_time": "1:35:38"} +{"current_steps": 7092, "total_steps": 7532, "loss": 0.23893016576766968, "lr": 1.8688321123625842e-07, "epoch": 1.883282432611871, "percentage": 94.16, "elapsed_time": "1 day, 1:37:57", "remaining_time": "1:35:25"} +{"current_steps": 7093, "total_steps": 7532, "loss": 0.2509230673313141, "lr": 1.860392640608244e-07, "epoch": 1.8835480015934138, "percentage": 94.17, "elapsed_time": "1 day, 1:38:10", "remaining_time": "1:35:12"} +{"current_steps": 7094, "total_steps": 7532, "loss": 0.24156486988067627, "lr": 1.8519720890127434e-07, "epoch": 1.8838135705749568, "percentage": 94.18, "elapsed_time": "1 day, 1:38:23", "remaining_time": "1:34:59"} +{"current_steps": 7095, "total_steps": 7532, "loss": 0.2120019942522049, "lr": 1.843570459199462e-07, "epoch": 1.8840791395564997, "percentage": 94.2, "elapsed_time": "1 day, 1:38:36", "remaining_time": "1:34:46"} +{"current_steps": 7096, "total_steps": 7532, "loss": 0.23400259017944336, "lr": 1.835187752788159e-07, "epoch": 1.884344708538043, "percentage": 94.21, "elapsed_time": "1 day, 1:38:50", "remaining_time": "1:34:33"} +{"current_steps": 7097, "total_steps": 7532, "loss": 0.20913103222846985, "lr": 1.8268239713949087e-07, "epoch": 1.8846102775195859, "percentage": 94.22, "elapsed_time": "1 day, 1:39:02", "remaining_time": "1:34:20"} +{"current_steps": 7098, "total_steps": 7532, "loss": 0.24468877911567688, "lr": 1.8184791166321546e-07, "epoch": 1.8848758465011288, "percentage": 94.24, "elapsed_time": "1 day, 1:39:15", "remaining_time": "1:34:06"} +{"current_steps": 7099, "total_steps": 7532, "loss": 0.2038918137550354, "lr": 1.8101531901086767e-07, "epoch": 1.8851414154826718, "percentage": 94.25, "elapsed_time": "1 day, 1:39:28", "remaining_time": "1:33:53"} +{"current_steps": 7100, "total_steps": 7532, "loss": 0.24191413819789886, "lr": 1.8018461934296239e-07, "epoch": 1.8854069844642147, "percentage": 94.26, "elapsed_time": "1 day, 1:39:41", "remaining_time": "1:33:40"} +{"current_steps": 7101, "total_steps": 7532, "loss": 0.24394474923610687, "lr": 1.793558128196493e-07, "epoch": 1.8856725534457577, "percentage": 94.28, "elapsed_time": "1 day, 1:40:00", "remaining_time": "1:33:28"} +{"current_steps": 7102, "total_steps": 7532, "loss": 0.22630709409713745, "lr": 1.7852889960071063e-07, "epoch": 1.8859381224273006, "percentage": 94.29, "elapsed_time": "1 day, 1:40:12", "remaining_time": "1:33:15"} +{"current_steps": 7103, "total_steps": 7532, "loss": 0.23936980962753296, "lr": 1.7770387984556768e-07, "epoch": 1.8862036914088436, "percentage": 94.3, "elapsed_time": "1 day, 1:40:25", "remaining_time": "1:33:02"} +{"current_steps": 7104, "total_steps": 7532, "loss": 0.24808618426322937, "lr": 1.768807537132733e-07, "epoch": 1.8864692603903865, "percentage": 94.32, "elapsed_time": "1 day, 1:40:38", "remaining_time": "1:32:49"} +{"current_steps": 7105, "total_steps": 7532, "loss": 0.23934635519981384, "lr": 1.7605952136251603e-07, "epoch": 1.8867348293719295, "percentage": 94.33, "elapsed_time": "1 day, 1:40:51", "remaining_time": "1:32:36"} +{"current_steps": 7106, "total_steps": 7532, "loss": 0.22107656300067902, "lr": 1.7524018295162148e-07, "epoch": 1.8870003983534724, "percentage": 94.34, "elapsed_time": "1 day, 1:41:03", "remaining_time": "1:32:23"} +{"current_steps": 7107, "total_steps": 7532, "loss": 0.23253028094768524, "lr": 1.7442273863854553e-07, "epoch": 1.8872659673350154, "percentage": 94.36, "elapsed_time": "1 day, 1:41:16", "remaining_time": "1:32:10"} +{"current_steps": 7108, "total_steps": 7532, "loss": 0.2501102387905121, "lr": 1.7360718858088542e-07, "epoch": 1.8875315363165583, "percentage": 94.37, "elapsed_time": "1 day, 1:41:29", "remaining_time": "1:31:57"} +{"current_steps": 7109, "total_steps": 7532, "loss": 0.25537967681884766, "lr": 1.7279353293586765e-07, "epoch": 1.8877971052981013, "percentage": 94.38, "elapsed_time": "1 day, 1:41:42", "remaining_time": "1:31:44"} +{"current_steps": 7110, "total_steps": 7532, "loss": 0.25701045989990234, "lr": 1.7198177186035447e-07, "epoch": 1.8880626742796442, "percentage": 94.4, "elapsed_time": "1 day, 1:41:55", "remaining_time": "1:31:31"} +{"current_steps": 7111, "total_steps": 7532, "loss": 0.2109440565109253, "lr": 1.7117190551084628e-07, "epoch": 1.8883282432611872, "percentage": 94.41, "elapsed_time": "1 day, 1:42:08", "remaining_time": "1:31:18"} +{"current_steps": 7112, "total_steps": 7532, "loss": 0.22767721116542816, "lr": 1.7036393404347373e-07, "epoch": 1.8885938122427302, "percentage": 94.42, "elapsed_time": "1 day, 1:42:21", "remaining_time": "1:31:05"} +{"current_steps": 7113, "total_steps": 7532, "loss": 0.1976814568042755, "lr": 1.6955785761400444e-07, "epoch": 1.888859381224273, "percentage": 94.44, "elapsed_time": "1 day, 1:42:34", "remaining_time": "1:30:52"} +{"current_steps": 7114, "total_steps": 7532, "loss": 0.21109873056411743, "lr": 1.687536763778419e-07, "epoch": 1.889124950205816, "percentage": 94.45, "elapsed_time": "1 day, 1:42:47", "remaining_time": "1:30:38"} +{"current_steps": 7115, "total_steps": 7532, "loss": 0.2165786623954773, "lr": 1.6795139049002095e-07, "epoch": 1.889390519187359, "percentage": 94.46, "elapsed_time": "1 day, 1:43:00", "remaining_time": "1:30:26"} +{"current_steps": 7116, "total_steps": 7532, "loss": 0.23962441086769104, "lr": 1.6715100010521347e-07, "epoch": 1.889656088168902, "percentage": 94.48, "elapsed_time": "1 day, 1:43:13", "remaining_time": "1:30:12"} +{"current_steps": 7117, "total_steps": 7532, "loss": 0.23351140320301056, "lr": 1.6635250537772596e-07, "epoch": 1.889921657150445, "percentage": 94.49, "elapsed_time": "1 day, 1:43:25", "remaining_time": "1:29:59"} +{"current_steps": 7118, "total_steps": 7532, "loss": 0.19999945163726807, "lr": 1.6555590646149866e-07, "epoch": 1.8901872261319879, "percentage": 94.5, "elapsed_time": "1 day, 1:43:38", "remaining_time": "1:29:46"} +{"current_steps": 7119, "total_steps": 7532, "loss": 0.27142196893692017, "lr": 1.647612035101054e-07, "epoch": 1.8904527951135308, "percentage": 94.52, "elapsed_time": "1 day, 1:43:51", "remaining_time": "1:29:33"} +{"current_steps": 7120, "total_steps": 7532, "loss": 0.21525685489177704, "lr": 1.6396839667675691e-07, "epoch": 1.8907183640950738, "percentage": 94.53, "elapsed_time": "1 day, 1:44:04", "remaining_time": "1:29:20"} +{"current_steps": 7121, "total_steps": 7532, "loss": 0.24305005371570587, "lr": 1.631774861142965e-07, "epoch": 1.8909839330766167, "percentage": 94.54, "elapsed_time": "1 day, 1:44:17", "remaining_time": "1:29:07"} +{"current_steps": 7122, "total_steps": 7532, "loss": 0.23202842473983765, "lr": 1.6238847197520113e-07, "epoch": 1.8912495020581597, "percentage": 94.56, "elapsed_time": "1 day, 1:44:30", "remaining_time": "1:28:54"} +{"current_steps": 7123, "total_steps": 7532, "loss": 0.24373790621757507, "lr": 1.6160135441158576e-07, "epoch": 1.8915150710397026, "percentage": 94.57, "elapsed_time": "1 day, 1:44:43", "remaining_time": "1:28:41"} +{"current_steps": 7124, "total_steps": 7532, "loss": 0.22774222493171692, "lr": 1.6081613357519565e-07, "epoch": 1.8917806400212456, "percentage": 94.58, "elapsed_time": "1 day, 1:44:55", "remaining_time": "1:28:28"} +{"current_steps": 7125, "total_steps": 7532, "loss": 0.20660057663917542, "lr": 1.6003280961741196e-07, "epoch": 1.8920462090027885, "percentage": 94.6, "elapsed_time": "1 day, 1:45:08", "remaining_time": "1:28:15"} +{"current_steps": 7126, "total_steps": 7532, "loss": 0.23578912019729614, "lr": 1.5925138268925166e-07, "epoch": 1.8923117779843315, "percentage": 94.61, "elapsed_time": "1 day, 1:45:21", "remaining_time": "1:28:02"} +{"current_steps": 7127, "total_steps": 7532, "loss": 0.20852091908454895, "lr": 1.5847185294136313e-07, "epoch": 1.8925773469658744, "percentage": 94.62, "elapsed_time": "1 day, 1:45:33", "remaining_time": "1:27:49"} +{"current_steps": 7128, "total_steps": 7532, "loss": 0.17455898225307465, "lr": 1.5769422052403172e-07, "epoch": 1.8928429159474174, "percentage": 94.64, "elapsed_time": "1 day, 1:45:46", "remaining_time": "1:27:36"} +{"current_steps": 7129, "total_steps": 7532, "loss": 0.29552748799324036, "lr": 1.5691848558717638e-07, "epoch": 1.8931084849289603, "percentage": 94.65, "elapsed_time": "1 day, 1:45:59", "remaining_time": "1:27:23"} +{"current_steps": 7130, "total_steps": 7532, "loss": 0.22972649335861206, "lr": 1.5614464828034746e-07, "epoch": 1.8933740539105033, "percentage": 94.66, "elapsed_time": "1 day, 1:46:11", "remaining_time": "1:27:10"} +{"current_steps": 7131, "total_steps": 7532, "loss": 0.2134108692407608, "lr": 1.5537270875273348e-07, "epoch": 1.8936396228920462, "percentage": 94.68, "elapsed_time": "1 day, 1:46:25", "remaining_time": "1:26:57"} +{"current_steps": 7132, "total_steps": 7532, "loss": 0.24145451188087463, "lr": 1.546026671531542e-07, "epoch": 1.8939051918735892, "percentage": 94.69, "elapsed_time": "1 day, 1:46:37", "remaining_time": "1:26:44"} +{"current_steps": 7133, "total_steps": 7532, "loss": 0.2323920726776123, "lr": 1.5383452363006534e-07, "epoch": 1.8941707608551321, "percentage": 94.7, "elapsed_time": "1 day, 1:46:50", "remaining_time": "1:26:31"} +{"current_steps": 7134, "total_steps": 7532, "loss": 0.20091015100479126, "lr": 1.5306827833155403e-07, "epoch": 1.894436329836675, "percentage": 94.72, "elapsed_time": "1 day, 1:47:03", "remaining_time": "1:26:18"} +{"current_steps": 7135, "total_steps": 7532, "loss": 0.2451317310333252, "lr": 1.523039314053465e-07, "epoch": 1.894701898818218, "percentage": 94.73, "elapsed_time": "1 day, 1:47:15", "remaining_time": "1:26:05"} +{"current_steps": 7136, "total_steps": 7532, "loss": 0.22744594514369965, "lr": 1.5154148299879822e-07, "epoch": 1.894967467799761, "percentage": 94.74, "elapsed_time": "1 day, 1:47:28", "remaining_time": "1:25:52"} +{"current_steps": 7137, "total_steps": 7532, "loss": 0.2460673749446869, "lr": 1.5078093325889943e-07, "epoch": 1.895233036781304, "percentage": 94.76, "elapsed_time": "1 day, 1:47:41", "remaining_time": "1:25:39"} +{"current_steps": 7138, "total_steps": 7532, "loss": 0.2524537444114685, "lr": 1.5002228233227722e-07, "epoch": 1.895498605762847, "percentage": 94.77, "elapsed_time": "1 day, 1:47:54", "remaining_time": "1:25:26"} +{"current_steps": 7139, "total_steps": 7532, "loss": 0.2056279480457306, "lr": 1.4926553036518798e-07, "epoch": 1.8957641747443899, "percentage": 94.78, "elapsed_time": "1 day, 1:48:07", "remaining_time": "1:25:13"} +{"current_steps": 7140, "total_steps": 7532, "loss": 0.2656184732913971, "lr": 1.485106775035261e-07, "epoch": 1.8960297437259328, "percentage": 94.8, "elapsed_time": "1 day, 1:48:19", "remaining_time": "1:25:00"} +{"current_steps": 7141, "total_steps": 7532, "loss": 0.2190116047859192, "lr": 1.477577238928185e-07, "epoch": 1.8962953127074758, "percentage": 94.81, "elapsed_time": "1 day, 1:48:32", "remaining_time": "1:24:47"} +{"current_steps": 7142, "total_steps": 7532, "loss": 0.22984017431735992, "lr": 1.4700666967822574e-07, "epoch": 1.8965608816890187, "percentage": 94.82, "elapsed_time": "1 day, 1:48:45", "remaining_time": "1:24:34"} +{"current_steps": 7143, "total_steps": 7532, "loss": 0.17947378754615784, "lr": 1.462575150045409e-07, "epoch": 1.8968264506705617, "percentage": 94.84, "elapsed_time": "1 day, 1:48:58", "remaining_time": "1:24:21"} +{"current_steps": 7144, "total_steps": 7532, "loss": 0.24965715408325195, "lr": 1.4551026001619395e-07, "epoch": 1.8970920196521046, "percentage": 94.85, "elapsed_time": "1 day, 1:49:11", "remaining_time": "1:24:08"} +{"current_steps": 7145, "total_steps": 7532, "loss": 0.2337307333946228, "lr": 1.4476490485724526e-07, "epoch": 1.8973575886336476, "percentage": 94.86, "elapsed_time": "1 day, 1:49:24", "remaining_time": "1:23:55"} +{"current_steps": 7146, "total_steps": 7532, "loss": 0.22668538987636566, "lr": 1.4402144967139098e-07, "epoch": 1.8976231576151905, "percentage": 94.88, "elapsed_time": "1 day, 1:49:37", "remaining_time": "1:23:42"} +{"current_steps": 7147, "total_steps": 7532, "loss": 0.21934781968593597, "lr": 1.4327989460196091e-07, "epoch": 1.8978887265967335, "percentage": 94.89, "elapsed_time": "1 day, 1:49:50", "remaining_time": "1:23:29"} +{"current_steps": 7148, "total_steps": 7532, "loss": 0.1957930624485016, "lr": 1.4254023979191844e-07, "epoch": 1.8981542955782764, "percentage": 94.9, "elapsed_time": "1 day, 1:50:03", "remaining_time": "1:23:16"} +{"current_steps": 7149, "total_steps": 7532, "loss": 0.22351369261741638, "lr": 1.4180248538385956e-07, "epoch": 1.8984198645598194, "percentage": 94.92, "elapsed_time": "1 day, 1:50:15", "remaining_time": "1:23:03"} +{"current_steps": 7150, "total_steps": 7532, "loss": 0.2603265047073364, "lr": 1.4106663152001487e-07, "epoch": 1.8986854335413623, "percentage": 94.93, "elapsed_time": "1 day, 1:50:29", "remaining_time": "1:22:50"} +{"current_steps": 7151, "total_steps": 7532, "loss": 0.2566663324832916, "lr": 1.4033267834224873e-07, "epoch": 1.8989510025229053, "percentage": 94.94, "elapsed_time": "1 day, 1:50:41", "remaining_time": "1:22:37"} +{"current_steps": 7152, "total_steps": 7532, "loss": 0.23130206763744354, "lr": 1.3960062599205682e-07, "epoch": 1.8992165715044482, "percentage": 94.95, "elapsed_time": "1 day, 1:50:54", "remaining_time": "1:22:24"} +{"current_steps": 7153, "total_steps": 7532, "loss": 0.17946425080299377, "lr": 1.3887047461057179e-07, "epoch": 1.8994821404859912, "percentage": 94.97, "elapsed_time": "1 day, 1:51:08", "remaining_time": "1:22:11"} +{"current_steps": 7154, "total_steps": 7532, "loss": 0.23946328461170197, "lr": 1.3814222433855884e-07, "epoch": 1.8997477094675341, "percentage": 94.98, "elapsed_time": "1 day, 1:51:20", "remaining_time": "1:21:58"} +{"current_steps": 7155, "total_steps": 7532, "loss": 0.21002715826034546, "lr": 1.3741587531641566e-07, "epoch": 1.900013278449077, "percentage": 94.99, "elapsed_time": "1 day, 1:51:34", "remaining_time": "1:21:45"} +{"current_steps": 7156, "total_steps": 7532, "loss": 0.2121986746788025, "lr": 1.3669142768417242e-07, "epoch": 1.90027884743062, "percentage": 95.01, "elapsed_time": "1 day, 1:51:46", "remaining_time": "1:21:32"} +{"current_steps": 7157, "total_steps": 7532, "loss": 0.26400670409202576, "lr": 1.3596888158149525e-07, "epoch": 1.900544416412163, "percentage": 95.02, "elapsed_time": "1 day, 1:52:00", "remaining_time": "1:21:19"} +{"current_steps": 7158, "total_steps": 7532, "loss": 0.18764406442642212, "lr": 1.3524823714768375e-07, "epoch": 1.900809985393706, "percentage": 95.03, "elapsed_time": "1 day, 1:52:12", "remaining_time": "1:21:06"} +{"current_steps": 7159, "total_steps": 7532, "loss": 0.2550342381000519, "lr": 1.3452949452166686e-07, "epoch": 1.9010755543752489, "percentage": 95.05, "elapsed_time": "1 day, 1:52:26", "remaining_time": "1:20:53"} +{"current_steps": 7160, "total_steps": 7532, "loss": 0.23188576102256775, "lr": 1.3381265384201035e-07, "epoch": 1.9013411233567918, "percentage": 95.06, "elapsed_time": "1 day, 1:52:38", "remaining_time": "1:20:40"} +{"current_steps": 7161, "total_steps": 7532, "loss": 0.23124513030052185, "lr": 1.3309771524691372e-07, "epoch": 1.9016066923383348, "percentage": 95.07, "elapsed_time": "1 day, 1:52:51", "remaining_time": "1:20:27"} +{"current_steps": 7162, "total_steps": 7532, "loss": 0.19941067695617676, "lr": 1.323846788742078e-07, "epoch": 1.9018722613198777, "percentage": 95.09, "elapsed_time": "1 day, 1:53:04", "remaining_time": "1:20:14"} +{"current_steps": 7163, "total_steps": 7532, "loss": 0.22510412335395813, "lr": 1.316735448613593e-07, "epoch": 1.9021378303014207, "percentage": 95.1, "elapsed_time": "1 day, 1:53:18", "remaining_time": "1:20:01"} +{"current_steps": 7164, "total_steps": 7532, "loss": 0.19102326035499573, "lr": 1.309643133454641e-07, "epoch": 1.9024033992829636, "percentage": 95.11, "elapsed_time": "1 day, 1:53:31", "remaining_time": "1:19:48"} +{"current_steps": 7165, "total_steps": 7532, "loss": 0.20826731622219086, "lr": 1.3025698446325618e-07, "epoch": 1.9026689682645066, "percentage": 95.13, "elapsed_time": "1 day, 1:53:44", "remaining_time": "1:19:35"} +{"current_steps": 7166, "total_steps": 7532, "loss": 0.23238909244537354, "lr": 1.2955155835109757e-07, "epoch": 1.9029345372460496, "percentage": 95.14, "elapsed_time": "1 day, 1:53:57", "remaining_time": "1:19:22"} +{"current_steps": 7167, "total_steps": 7532, "loss": 0.2635011374950409, "lr": 1.2884803514498833e-07, "epoch": 1.9032001062275925, "percentage": 95.15, "elapsed_time": "1 day, 1:54:10", "remaining_time": "1:19:09"} +{"current_steps": 7168, "total_steps": 7532, "loss": 0.2073322981595993, "lr": 1.281464149805578e-07, "epoch": 1.9034656752091355, "percentage": 95.17, "elapsed_time": "1 day, 1:54:22", "remaining_time": "1:18:55"} +{"current_steps": 7169, "total_steps": 7532, "loss": 0.22091326117515564, "lr": 1.274466979930711e-07, "epoch": 1.9037312441906784, "percentage": 95.18, "elapsed_time": "1 day, 1:54:35", "remaining_time": "1:18:42"} +{"current_steps": 7170, "total_steps": 7532, "loss": 0.18613001704216003, "lr": 1.2674888431742472e-07, "epoch": 1.9039968131722214, "percentage": 95.19, "elapsed_time": "1 day, 1:54:49", "remaining_time": "1:18:30"} +{"current_steps": 7171, "total_steps": 7532, "loss": 0.2165849655866623, "lr": 1.2605297408814887e-07, "epoch": 1.9042623821537643, "percentage": 95.21, "elapsed_time": "1 day, 1:55:02", "remaining_time": "1:18:16"} +{"current_steps": 7172, "total_steps": 7532, "loss": 0.21317794919013977, "lr": 1.2535896743940844e-07, "epoch": 1.9045279511353073, "percentage": 95.22, "elapsed_time": "1 day, 1:55:15", "remaining_time": "1:18:03"} +{"current_steps": 7173, "total_steps": 7532, "loss": 0.25221073627471924, "lr": 1.2466686450499866e-07, "epoch": 1.9047935201168502, "percentage": 95.23, "elapsed_time": "1 day, 1:55:27", "remaining_time": "1:17:50"} +{"current_steps": 7174, "total_steps": 7532, "loss": 0.21598559617996216, "lr": 1.239766654183472e-07, "epoch": 1.9050590890983932, "percentage": 95.25, "elapsed_time": "1 day, 1:55:40", "remaining_time": "1:17:37"} +{"current_steps": 7175, "total_steps": 7532, "loss": 0.2284495085477829, "lr": 1.232883703125187e-07, "epoch": 1.9053246580799361, "percentage": 95.26, "elapsed_time": "1 day, 1:55:53", "remaining_time": "1:17:24"} +{"current_steps": 7176, "total_steps": 7532, "loss": 0.21899332106113434, "lr": 1.2260197932020713e-07, "epoch": 1.905590227061479, "percentage": 95.27, "elapsed_time": "1 day, 1:56:06", "remaining_time": "1:17:11"} +{"current_steps": 7177, "total_steps": 7532, "loss": 0.2633277177810669, "lr": 1.2191749257374097e-07, "epoch": 1.905855796043022, "percentage": 95.29, "elapsed_time": "1 day, 1:56:19", "remaining_time": "1:16:58"} +{"current_steps": 7178, "total_steps": 7532, "loss": 0.2330140471458435, "lr": 1.2123491020508137e-07, "epoch": 1.906121365024565, "percentage": 95.3, "elapsed_time": "1 day, 1:56:32", "remaining_time": "1:16:45"} +{"current_steps": 7179, "total_steps": 7532, "loss": 0.21859750151634216, "lr": 1.2055423234582087e-07, "epoch": 1.906386934006108, "percentage": 95.31, "elapsed_time": "1 day, 1:56:45", "remaining_time": "1:16:32"} +{"current_steps": 7180, "total_steps": 7532, "loss": 0.252164363861084, "lr": 1.198754591271878e-07, "epoch": 1.9066525029876509, "percentage": 95.33, "elapsed_time": "1 day, 1:56:58", "remaining_time": "1:16:19"} +{"current_steps": 7181, "total_steps": 7532, "loss": 0.24968160688877106, "lr": 1.191985906800408e-07, "epoch": 1.906918071969194, "percentage": 95.34, "elapsed_time": "1 day, 1:57:11", "remaining_time": "1:16:06"} +{"current_steps": 7182, "total_steps": 7532, "loss": 0.2083423137664795, "lr": 1.185236271348722e-07, "epoch": 1.907183640950737, "percentage": 95.35, "elapsed_time": "1 day, 1:57:23", "remaining_time": "1:15:53"} +{"current_steps": 7183, "total_steps": 7532, "loss": 0.2468394935131073, "lr": 1.1785056862180789e-07, "epoch": 1.90744920993228, "percentage": 95.37, "elapsed_time": "1 day, 1:57:37", "remaining_time": "1:15:40"} +{"current_steps": 7184, "total_steps": 7532, "loss": 0.22417521476745605, "lr": 1.1717941527060405e-07, "epoch": 1.907714778913823, "percentage": 95.38, "elapsed_time": "1 day, 1:57:49", "remaining_time": "1:15:27"} +{"current_steps": 7185, "total_steps": 7532, "loss": 0.2411842793226242, "lr": 1.1651016721065167e-07, "epoch": 1.9079803478953659, "percentage": 95.39, "elapsed_time": "1 day, 1:58:02", "remaining_time": "1:15:14"} +{"current_steps": 7186, "total_steps": 7532, "loss": 0.24650761485099792, "lr": 1.1584282457097417e-07, "epoch": 1.9082459168769088, "percentage": 95.41, "elapsed_time": "1 day, 1:58:15", "remaining_time": "1:15:01"} +{"current_steps": 7187, "total_steps": 7532, "loss": 0.22433717548847198, "lr": 1.1517738748022755e-07, "epoch": 1.9085114858584518, "percentage": 95.42, "elapsed_time": "1 day, 1:58:28", "remaining_time": "1:14:48"} +{"current_steps": 7188, "total_steps": 7532, "loss": 0.20867910981178284, "lr": 1.145138560667003e-07, "epoch": 1.9087770548399947, "percentage": 95.43, "elapsed_time": "1 day, 1:58:41", "remaining_time": "1:14:35"} +{"current_steps": 7189, "total_steps": 7532, "loss": 0.21889618039131165, "lr": 1.138522304583134e-07, "epoch": 1.9090426238215377, "percentage": 95.45, "elapsed_time": "1 day, 1:58:54", "remaining_time": "1:14:22"} +{"current_steps": 7190, "total_steps": 7532, "loss": 0.19350749254226685, "lr": 1.1319251078261928e-07, "epoch": 1.9093081928030806, "percentage": 95.46, "elapsed_time": "1 day, 1:59:07", "remaining_time": "1:14:09"} +{"current_steps": 7191, "total_steps": 7532, "loss": 0.19123657047748566, "lr": 1.125346971668051e-07, "epoch": 1.9095737617846236, "percentage": 95.47, "elapsed_time": "1 day, 1:59:19", "remaining_time": "1:13:56"} +{"current_steps": 7192, "total_steps": 7532, "loss": 0.21433782577514648, "lr": 1.118787897376905e-07, "epoch": 1.9098393307661665, "percentage": 95.49, "elapsed_time": "1 day, 1:59:32", "remaining_time": "1:13:43"} +{"current_steps": 7193, "total_steps": 7532, "loss": 0.2521187663078308, "lr": 1.1122478862172437e-07, "epoch": 1.9101048997477095, "percentage": 95.5, "elapsed_time": "1 day, 1:59:45", "remaining_time": "1:13:30"} +{"current_steps": 7194, "total_steps": 7532, "loss": 0.2141486555337906, "lr": 1.1057269394499248e-07, "epoch": 1.9103704687292524, "percentage": 95.51, "elapsed_time": "1 day, 1:59:58", "remaining_time": "1:13:17"} +{"current_steps": 7195, "total_steps": 7532, "loss": 0.22960343956947327, "lr": 1.0992250583320985e-07, "epoch": 1.9106360377107954, "percentage": 95.53, "elapsed_time": "1 day, 2:00:11", "remaining_time": "1:13:04"} +{"current_steps": 7196, "total_steps": 7532, "loss": 0.21809744834899902, "lr": 1.092742244117262e-07, "epoch": 1.9109016066923383, "percentage": 95.54, "elapsed_time": "1 day, 2:00:24", "remaining_time": "1:12:51"} +{"current_steps": 7197, "total_steps": 7532, "loss": 0.22418212890625, "lr": 1.0862784980552044e-07, "epoch": 1.9111671756738813, "percentage": 95.55, "elapsed_time": "1 day, 2:00:37", "remaining_time": "1:12:38"} +{"current_steps": 7198, "total_steps": 7532, "loss": 0.22050701081752777, "lr": 1.0798338213920845e-07, "epoch": 1.9114327446554242, "percentage": 95.57, "elapsed_time": "1 day, 2:00:49", "remaining_time": "1:12:25"} +{"current_steps": 7199, "total_steps": 7532, "loss": 0.23200345039367676, "lr": 1.0734082153703418e-07, "epoch": 1.9116983136369672, "percentage": 95.58, "elapsed_time": "1 day, 2:01:03", "remaining_time": "1:12:12"} +{"current_steps": 7200, "total_steps": 7532, "loss": 0.18366631865501404, "lr": 1.0670016812287631e-07, "epoch": 1.9119638826185101, "percentage": 95.59, "elapsed_time": "1 day, 2:01:15", "remaining_time": "1:11:59"} +{"current_steps": 7201, "total_steps": 7532, "loss": 0.24362193048000336, "lr": 1.0606142202024605e-07, "epoch": 1.912229451600053, "percentage": 95.61, "elapsed_time": "1 day, 2:01:34", "remaining_time": "1:11:46"} +{"current_steps": 7202, "total_steps": 7532, "loss": 0.2216200977563858, "lr": 1.0542458335228601e-07, "epoch": 1.912495020581596, "percentage": 95.62, "elapsed_time": "1 day, 2:01:47", "remaining_time": "1:11:33"} +{"current_steps": 7203, "total_steps": 7532, "loss": 0.20216065645217896, "lr": 1.0478965224176907e-07, "epoch": 1.912760589563139, "percentage": 95.63, "elapsed_time": "1 day, 2:02:00", "remaining_time": "1:11:20"} +{"current_steps": 7204, "total_steps": 7532, "loss": 0.22054359316825867, "lr": 1.041566288111051e-07, "epoch": 1.913026158544682, "percentage": 95.65, "elapsed_time": "1 day, 2:02:13", "remaining_time": "1:11:07"} +{"current_steps": 7205, "total_steps": 7532, "loss": 0.21569015085697174, "lr": 1.0352551318233206e-07, "epoch": 1.913291727526225, "percentage": 95.66, "elapsed_time": "1 day, 2:02:25", "remaining_time": "1:10:54"} +{"current_steps": 7206, "total_steps": 7532, "loss": 0.22967267036437988, "lr": 1.028963054771226e-07, "epoch": 1.9135572965077678, "percentage": 95.67, "elapsed_time": "1 day, 2:02:38", "remaining_time": "1:10:41"} +{"current_steps": 7207, "total_steps": 7532, "loss": 0.2422460913658142, "lr": 1.0226900581677968e-07, "epoch": 1.9138228654893108, "percentage": 95.69, "elapsed_time": "1 day, 2:02:51", "remaining_time": "1:10:28"} +{"current_steps": 7208, "total_steps": 7532, "loss": 0.25891292095184326, "lr": 1.0164361432223879e-07, "epoch": 1.9140884344708538, "percentage": 95.7, "elapsed_time": "1 day, 2:03:04", "remaining_time": "1:10:15"} +{"current_steps": 7209, "total_steps": 7532, "loss": 0.26915764808654785, "lr": 1.0102013111406905e-07, "epoch": 1.914354003452397, "percentage": 95.71, "elapsed_time": "1 day, 2:03:17", "remaining_time": "1:10:02"} +{"current_steps": 7210, "total_steps": 7532, "loss": 0.2268485426902771, "lr": 1.0039855631247097e-07, "epoch": 1.9146195724339399, "percentage": 95.72, "elapsed_time": "1 day, 2:03:30", "remaining_time": "1:09:49"} +{"current_steps": 7211, "total_steps": 7532, "loss": 0.22551512718200684, "lr": 9.977889003727647e-08, "epoch": 1.9148851414154828, "percentage": 95.74, "elapsed_time": "1 day, 2:03:42", "remaining_time": "1:09:36"} +{"current_steps": 7212, "total_steps": 7532, "loss": 0.24224743247032166, "lr": 9.91611324079489e-08, "epoch": 1.9151507103970258, "percentage": 95.75, "elapsed_time": "1 day, 2:03:55", "remaining_time": "1:09:23"} +{"current_steps": 7213, "total_steps": 7532, "loss": 0.19550879299640656, "lr": 9.854528354358517e-08, "epoch": 1.9154162793785687, "percentage": 95.76, "elapsed_time": "1 day, 2:04:08", "remaining_time": "1:09:10"} +{"current_steps": 7214, "total_steps": 7532, "loss": 0.24986523389816284, "lr": 9.793134356291478e-08, "epoch": 1.9156818483601117, "percentage": 95.78, "elapsed_time": "1 day, 2:04:21", "remaining_time": "1:08:57"} +{"current_steps": 7215, "total_steps": 7532, "loss": 0.2565170228481293, "lr": 9.731931258429638e-08, "epoch": 1.9159474173416546, "percentage": 95.79, "elapsed_time": "1 day, 2:04:34", "remaining_time": "1:08:44"} +{"current_steps": 7216, "total_steps": 7532, "loss": 0.2166958749294281, "lr": 9.670919072572449e-08, "epoch": 1.9162129863231976, "percentage": 95.8, "elapsed_time": "1 day, 2:04:46", "remaining_time": "1:08:31"} +{"current_steps": 7217, "total_steps": 7532, "loss": 0.2002115249633789, "lr": 9.610097810482166e-08, "epoch": 1.9164785553047405, "percentage": 95.82, "elapsed_time": "1 day, 2:04:59", "remaining_time": "1:08:18"} +{"current_steps": 7218, "total_steps": 7532, "loss": 0.209486186504364, "lr": 9.549467483884412e-08, "epoch": 1.9167441242862835, "percentage": 95.83, "elapsed_time": "1 day, 2:05:12", "remaining_time": "1:08:05"} +{"current_steps": 7219, "total_steps": 7532, "loss": 0.22061321139335632, "lr": 9.489028104468056e-08, "epoch": 1.9170096932678264, "percentage": 95.84, "elapsed_time": "1 day, 2:05:26", "remaining_time": "1:07:52"} +{"current_steps": 7220, "total_steps": 7532, "loss": 0.21880047023296356, "lr": 9.428779683885114e-08, "epoch": 1.9172752622493694, "percentage": 95.86, "elapsed_time": "1 day, 2:05:38", "remaining_time": "1:07:39"} +{"current_steps": 7221, "total_steps": 7532, "loss": 0.22674325108528137, "lr": 9.368722233750849e-08, "epoch": 1.9175408312309123, "percentage": 95.87, "elapsed_time": "1 day, 2:05:51", "remaining_time": "1:07:26"} +{"current_steps": 7222, "total_steps": 7532, "loss": 0.22100718319416046, "lr": 9.308855765643332e-08, "epoch": 1.9178064002124553, "percentage": 95.88, "elapsed_time": "1 day, 2:06:04", "remaining_time": "1:07:13"} +{"current_steps": 7223, "total_steps": 7532, "loss": 0.23105254769325256, "lr": 9.249180291104553e-08, "epoch": 1.9180719691939982, "percentage": 95.9, "elapsed_time": "1 day, 2:06:17", "remaining_time": "1:07:00"} +{"current_steps": 7224, "total_steps": 7532, "loss": 0.22483405470848083, "lr": 9.189695821638755e-08, "epoch": 1.9183375381755412, "percentage": 95.91, "elapsed_time": "1 day, 2:06:30", "remaining_time": "1:06:47"} +{"current_steps": 7225, "total_steps": 7532, "loss": 0.1939004510641098, "lr": 9.130402368714208e-08, "epoch": 1.9186031071570842, "percentage": 95.92, "elapsed_time": "1 day, 2:06:42", "remaining_time": "1:06:34"} +{"current_steps": 7226, "total_steps": 7532, "loss": 0.21722440421581268, "lr": 9.071299943761769e-08, "epoch": 1.918868676138627, "percentage": 95.94, "elapsed_time": "1 day, 2:06:55", "remaining_time": "1:06:21"} +{"current_steps": 7227, "total_steps": 7532, "loss": 0.24213966727256775, "lr": 9.012388558175877e-08, "epoch": 1.91913424512017, "percentage": 95.95, "elapsed_time": "1 day, 2:07:08", "remaining_time": "1:06:08"} +{"current_steps": 7228, "total_steps": 7532, "loss": 0.2305546998977661, "lr": 8.953668223313783e-08, "epoch": 1.919399814101713, "percentage": 95.96, "elapsed_time": "1 day, 2:07:21", "remaining_time": "1:05:55"} +{"current_steps": 7229, "total_steps": 7532, "loss": 0.2678033709526062, "lr": 8.895138950496207e-08, "epoch": 1.919665383083256, "percentage": 95.98, "elapsed_time": "1 day, 2:07:34", "remaining_time": "1:05:42"} +{"current_steps": 7230, "total_steps": 7532, "loss": 0.2491014301776886, "lr": 8.836800751006791e-08, "epoch": 1.919930952064799, "percentage": 95.99, "elapsed_time": "1 day, 2:07:47", "remaining_time": "1:05:29"} +{"current_steps": 7231, "total_steps": 7532, "loss": 0.21837326884269714, "lr": 8.778653636092537e-08, "epoch": 1.9201965210463419, "percentage": 96.0, "elapsed_time": "1 day, 2:08:00", "remaining_time": "1:05:16"} +{"current_steps": 7232, "total_steps": 7532, "loss": 0.24149999022483826, "lr": 8.72069761696348e-08, "epoch": 1.9204620900278848, "percentage": 96.02, "elapsed_time": "1 day, 2:08:13", "remaining_time": "1:05:03"} +{"current_steps": 7233, "total_steps": 7532, "loss": 0.2124684453010559, "lr": 8.662932704792793e-08, "epoch": 1.9207276590094278, "percentage": 96.03, "elapsed_time": "1 day, 2:08:25", "remaining_time": "1:04:50"} +{"current_steps": 7234, "total_steps": 7532, "loss": 0.2452150285243988, "lr": 8.60535891071712e-08, "epoch": 1.9209932279909707, "percentage": 96.04, "elapsed_time": "1 day, 2:08:38", "remaining_time": "1:04:37"} +{"current_steps": 7235, "total_steps": 7532, "loss": 0.23598846793174744, "lr": 8.547976245835698e-08, "epoch": 1.9212587969725137, "percentage": 96.06, "elapsed_time": "1 day, 2:08:51", "remaining_time": "1:04:24"} +{"current_steps": 7236, "total_steps": 7532, "loss": 0.2105225920677185, "lr": 8.490784721211454e-08, "epoch": 1.9215243659540566, "percentage": 96.07, "elapsed_time": "1 day, 2:09:04", "remaining_time": "1:04:11"} +{"current_steps": 7237, "total_steps": 7532, "loss": 0.2585388720035553, "lr": 8.433784347870122e-08, "epoch": 1.9217899349355996, "percentage": 96.08, "elapsed_time": "1 day, 2:09:17", "remaining_time": "1:03:58"} +{"current_steps": 7238, "total_steps": 7532, "loss": 0.21703900396823883, "lr": 8.376975136800691e-08, "epoch": 1.9220555039171425, "percentage": 96.1, "elapsed_time": "1 day, 2:09:30", "remaining_time": "1:03:45"} +{"current_steps": 7239, "total_steps": 7532, "loss": 0.22771210968494415, "lr": 8.3203570989554e-08, "epoch": 1.9223210728986855, "percentage": 96.11, "elapsed_time": "1 day, 2:09:42", "remaining_time": "1:03:32"} +{"current_steps": 7240, "total_steps": 7532, "loss": 0.22535575926303864, "lr": 8.263930245249408e-08, "epoch": 1.9225866418802284, "percentage": 96.12, "elapsed_time": "1 day, 2:09:55", "remaining_time": "1:03:19"} +{"current_steps": 7241, "total_steps": 7532, "loss": 0.2052595466375351, "lr": 8.207694586561344e-08, "epoch": 1.9228522108617714, "percentage": 96.14, "elapsed_time": "1 day, 2:10:08", "remaining_time": "1:03:06"} +{"current_steps": 7242, "total_steps": 7532, "loss": 0.19611456990242004, "lr": 8.151650133732536e-08, "epoch": 1.9231177798433143, "percentage": 96.15, "elapsed_time": "1 day, 2:10:21", "remaining_time": "1:02:53"} +{"current_steps": 7243, "total_steps": 7532, "loss": 0.20256826281547546, "lr": 8.095796897567787e-08, "epoch": 1.9233833488248573, "percentage": 96.16, "elapsed_time": "1 day, 2:10:34", "remaining_time": "1:02:40"} +{"current_steps": 7244, "total_steps": 7532, "loss": 0.25462138652801514, "lr": 8.040134888835038e-08, "epoch": 1.9236489178064002, "percentage": 96.18, "elapsed_time": "1 day, 2:10:47", "remaining_time": "1:02:26"} +{"current_steps": 7245, "total_steps": 7532, "loss": 0.27362316846847534, "lr": 7.984664118265262e-08, "epoch": 1.9239144867879432, "percentage": 96.19, "elapsed_time": "1 day, 2:11:00", "remaining_time": "1:02:13"} +{"current_steps": 7246, "total_steps": 7532, "loss": 0.23749098181724548, "lr": 7.929384596552459e-08, "epoch": 1.9241800557694861, "percentage": 96.2, "elapsed_time": "1 day, 2:11:12", "remaining_time": "1:02:00"} +{"current_steps": 7247, "total_steps": 7532, "loss": 0.2472018599510193, "lr": 7.874296334353882e-08, "epoch": 1.924445624751029, "percentage": 96.22, "elapsed_time": "1 day, 2:11:25", "remaining_time": "1:01:47"} +{"current_steps": 7248, "total_steps": 7532, "loss": 0.23181989789009094, "lr": 7.819399342290034e-08, "epoch": 1.924711193732572, "percentage": 96.23, "elapsed_time": "1 day, 2:11:38", "remaining_time": "1:01:34"} +{"current_steps": 7249, "total_steps": 7532, "loss": 0.21363665163516998, "lr": 7.764693630944231e-08, "epoch": 1.924976762714115, "percentage": 96.24, "elapsed_time": "1 day, 2:11:51", "remaining_time": "1:01:21"} +{"current_steps": 7250, "total_steps": 7532, "loss": 0.21239221096038818, "lr": 7.710179210863144e-08, "epoch": 1.925242331695658, "percentage": 96.26, "elapsed_time": "1 day, 2:12:04", "remaining_time": "1:01:08"} +{"current_steps": 7251, "total_steps": 7532, "loss": 0.2643742263317108, "lr": 7.655856092556591e-08, "epoch": 1.925507900677201, "percentage": 96.27, "elapsed_time": "1 day, 2:12:17", "remaining_time": "1:00:55"} +{"current_steps": 7252, "total_steps": 7532, "loss": 0.2232428789138794, "lr": 7.601724286497414e-08, "epoch": 1.9257734696587439, "percentage": 96.28, "elapsed_time": "1 day, 2:12:29", "remaining_time": "1:00:42"} +{"current_steps": 7253, "total_steps": 7532, "loss": 0.2052377462387085, "lr": 7.547783803121489e-08, "epoch": 1.9260390386402868, "percentage": 96.3, "elapsed_time": "1 day, 2:12:43", "remaining_time": "1:00:29"} +{"current_steps": 7254, "total_steps": 7532, "loss": 0.22194740176200867, "lr": 7.494034652827942e-08, "epoch": 1.9263046076218298, "percentage": 96.31, "elapsed_time": "1 day, 2:12:55", "remaining_time": "1:00:16"} +{"current_steps": 7255, "total_steps": 7532, "loss": 0.22004084289073944, "lr": 7.440476845979038e-08, "epoch": 1.9265701766033727, "percentage": 96.32, "elapsed_time": "1 day, 2:13:08", "remaining_time": "1:00:03"} +{"current_steps": 7256, "total_steps": 7532, "loss": 0.2218078374862671, "lr": 7.387110392899965e-08, "epoch": 1.9268357455849157, "percentage": 96.34, "elapsed_time": "1 day, 2:13:21", "remaining_time": "0:59:50"} +{"current_steps": 7257, "total_steps": 7532, "loss": 0.23272839188575745, "lr": 7.33393530387927e-08, "epoch": 1.9271013145664586, "percentage": 96.35, "elapsed_time": "1 day, 2:13:34", "remaining_time": "0:59:37"} +{"current_steps": 7258, "total_steps": 7532, "loss": 0.23666653037071228, "lr": 7.280951589168417e-08, "epoch": 1.9273668835480016, "percentage": 96.36, "elapsed_time": "1 day, 2:13:47", "remaining_time": "0:59:24"} +{"current_steps": 7259, "total_steps": 7532, "loss": 0.21946533024311066, "lr": 7.228159258982126e-08, "epoch": 1.9276324525295445, "percentage": 96.38, "elapsed_time": "1 day, 2:14:00", "remaining_time": "0:59:11"} +{"current_steps": 7260, "total_steps": 7532, "loss": 0.22158634662628174, "lr": 7.175558323498033e-08, "epoch": 1.9278980215110875, "percentage": 96.39, "elapsed_time": "1 day, 2:14:13", "remaining_time": "0:58:58"} +{"current_steps": 7261, "total_steps": 7532, "loss": 0.19978654384613037, "lr": 7.123148792857026e-08, "epoch": 1.9281635904926304, "percentage": 96.4, "elapsed_time": "1 day, 2:14:26", "remaining_time": "0:58:45"} +{"current_steps": 7262, "total_steps": 7532, "loss": 0.21197813749313354, "lr": 7.070930677163023e-08, "epoch": 1.9284291594741734, "percentage": 96.42, "elapsed_time": "1 day, 2:14:39", "remaining_time": "0:58:32"} +{"current_steps": 7263, "total_steps": 7532, "loss": 0.22650468349456787, "lr": 7.018903986483083e-08, "epoch": 1.9286947284557163, "percentage": 96.43, "elapsed_time": "1 day, 2:14:51", "remaining_time": "0:58:19"} +{"current_steps": 7264, "total_steps": 7532, "loss": 0.22257481515407562, "lr": 6.967068730847293e-08, "epoch": 1.9289602974372593, "percentage": 96.44, "elapsed_time": "1 day, 2:15:04", "remaining_time": "0:58:06"} +{"current_steps": 7265, "total_steps": 7532, "loss": 0.24899804592132568, "lr": 6.915424920248992e-08, "epoch": 1.9292258664188022, "percentage": 96.46, "elapsed_time": "1 day, 2:15:16", "remaining_time": "0:57:53"} +{"current_steps": 7266, "total_steps": 7532, "loss": 0.250610888004303, "lr": 6.863972564644328e-08, "epoch": 1.9294914354003452, "percentage": 96.47, "elapsed_time": "1 day, 2:15:29", "remaining_time": "0:57:40"} +{"current_steps": 7267, "total_steps": 7532, "loss": 0.22786292433738708, "lr": 6.81271167395292e-08, "epoch": 1.9297570043818881, "percentage": 96.48, "elapsed_time": "1 day, 2:15:43", "remaining_time": "0:57:27"} +{"current_steps": 7268, "total_steps": 7532, "loss": 0.22816789150238037, "lr": 6.761642258056977e-08, "epoch": 1.930022573363431, "percentage": 96.49, "elapsed_time": "1 day, 2:15:55", "remaining_time": "0:57:14"} +{"current_steps": 7269, "total_steps": 7532, "loss": 0.2589687407016754, "lr": 6.7107643268024e-08, "epoch": 1.930288142344974, "percentage": 96.51, "elapsed_time": "1 day, 2:16:08", "remaining_time": "0:57:01"} +{"current_steps": 7270, "total_steps": 7532, "loss": 0.2281583547592163, "lr": 6.660077889997673e-08, "epoch": 1.930553711326517, "percentage": 96.52, "elapsed_time": "1 day, 2:16:21", "remaining_time": "0:56:48"} +{"current_steps": 7271, "total_steps": 7532, "loss": 0.22833740711212158, "lr": 6.60958295741454e-08, "epoch": 1.93081928030806, "percentage": 96.53, "elapsed_time": "1 day, 2:16:35", "remaining_time": "0:56:35"} +{"current_steps": 7272, "total_steps": 7532, "loss": 0.20720313489437103, "lr": 6.559279538787877e-08, "epoch": 1.931084849289603, "percentage": 96.55, "elapsed_time": "1 day, 2:16:48", "remaining_time": "0:56:22"} +{"current_steps": 7273, "total_steps": 7532, "loss": 0.17191773653030396, "lr": 6.509167643815594e-08, "epoch": 1.9313504182711458, "percentage": 96.56, "elapsed_time": "1 day, 2:17:01", "remaining_time": "0:56:09"} +{"current_steps": 7274, "total_steps": 7532, "loss": 0.23586943745613098, "lr": 6.459247282158632e-08, "epoch": 1.9316159872526888, "percentage": 96.57, "elapsed_time": "1 day, 2:17:13", "remaining_time": "0:55:56"} +{"current_steps": 7275, "total_steps": 7532, "loss": 0.21353168785572052, "lr": 6.409518463441067e-08, "epoch": 1.9318815562342317, "percentage": 96.59, "elapsed_time": "1 day, 2:17:27", "remaining_time": "0:55:43"} +{"current_steps": 7276, "total_steps": 7532, "loss": 0.23148195445537567, "lr": 6.359981197250009e-08, "epoch": 1.9321471252157747, "percentage": 96.6, "elapsed_time": "1 day, 2:17:39", "remaining_time": "0:55:30"} +{"current_steps": 7277, "total_steps": 7532, "loss": 0.2113666534423828, "lr": 6.310635493135709e-08, "epoch": 1.9324126941973176, "percentage": 96.61, "elapsed_time": "1 day, 2:17:52", "remaining_time": "0:55:17"} +{"current_steps": 7278, "total_steps": 7532, "loss": 0.27689510583877563, "lr": 6.261481360611332e-08, "epoch": 1.9326782631788606, "percentage": 96.63, "elapsed_time": "1 day, 2:18:05", "remaining_time": "0:55:04"} +{"current_steps": 7279, "total_steps": 7532, "loss": 0.23746277391910553, "lr": 6.2125188091533e-08, "epoch": 1.9329438321604036, "percentage": 96.64, "elapsed_time": "1 day, 2:18:18", "remaining_time": "0:54:51"} +{"current_steps": 7280, "total_steps": 7532, "loss": 0.23123708367347717, "lr": 6.163747848201062e-08, "epoch": 1.9332094011419465, "percentage": 96.65, "elapsed_time": "1 day, 2:18:30", "remaining_time": "0:54:38"} +{"current_steps": 7281, "total_steps": 7532, "loss": 0.23640167713165283, "lr": 6.115168487157097e-08, "epoch": 1.9334749701234895, "percentage": 96.67, "elapsed_time": "1 day, 2:18:43", "remaining_time": "0:54:25"} +{"current_steps": 7282, "total_steps": 7532, "loss": 0.2259385585784912, "lr": 6.066780735386801e-08, "epoch": 1.9337405391050324, "percentage": 96.68, "elapsed_time": "1 day, 2:18:56", "remaining_time": "0:54:12"} +{"current_steps": 7283, "total_steps": 7532, "loss": 0.219761461019516, "lr": 6.018584602218824e-08, "epoch": 1.9340061080865754, "percentage": 96.69, "elapsed_time": "1 day, 2:19:08", "remaining_time": "0:53:59"} +{"current_steps": 7284, "total_steps": 7532, "loss": 0.24411989748477936, "lr": 5.970580096944733e-08, "epoch": 1.9342716770681183, "percentage": 96.71, "elapsed_time": "1 day, 2:19:20", "remaining_time": "0:53:46"} +{"current_steps": 7285, "total_steps": 7532, "loss": 0.232415571808815, "lr": 5.922767228819459e-08, "epoch": 1.9345372460496613, "percentage": 96.72, "elapsed_time": "1 day, 2:19:33", "remaining_time": "0:53:33"} +{"current_steps": 7286, "total_steps": 7532, "loss": 0.25938165187835693, "lr": 5.875146007060517e-08, "epoch": 1.9348028150312042, "percentage": 96.73, "elapsed_time": "1 day, 2:19:46", "remaining_time": "0:53:20"} +{"current_steps": 7287, "total_steps": 7532, "loss": 0.22138425707817078, "lr": 5.827716440848785e-08, "epoch": 1.9350683840127472, "percentage": 96.75, "elapsed_time": "1 day, 2:19:58", "remaining_time": "0:53:07"} +{"current_steps": 7288, "total_steps": 7532, "loss": 0.19724398851394653, "lr": 5.7804785393282825e-08, "epoch": 1.9353339529942901, "percentage": 96.76, "elapsed_time": "1 day, 2:20:11", "remaining_time": "0:52:54"} +{"current_steps": 7289, "total_steps": 7532, "loss": 0.25307583808898926, "lr": 5.7334323116056136e-08, "epoch": 1.935599521975833, "percentage": 96.77, "elapsed_time": "1 day, 2:20:23", "remaining_time": "0:52:41"} +{"current_steps": 7290, "total_steps": 7532, "loss": 0.2436421811580658, "lr": 5.686577766751078e-08, "epoch": 1.935865090957376, "percentage": 96.79, "elapsed_time": "1 day, 2:20:36", "remaining_time": "0:52:28"} +{"current_steps": 7291, "total_steps": 7532, "loss": 0.2164984941482544, "lr": 5.6399149137973394e-08, "epoch": 1.936130659938919, "percentage": 96.8, "elapsed_time": "1 day, 2:20:48", "remaining_time": "0:52:15"} +{"current_steps": 7292, "total_steps": 7532, "loss": 0.22526800632476807, "lr": 5.5934437617407576e-08, "epoch": 1.936396228920462, "percentage": 96.81, "elapsed_time": "1 day, 2:21:01", "remaining_time": "0:52:02"} +{"current_steps": 7293, "total_steps": 7532, "loss": 0.27787747979164124, "lr": 5.547164319540277e-08, "epoch": 1.936661797902005, "percentage": 96.83, "elapsed_time": "1 day, 2:21:14", "remaining_time": "0:51:49"} +{"current_steps": 7294, "total_steps": 7532, "loss": 0.2188001275062561, "lr": 5.5010765961179825e-08, "epoch": 1.936927366883548, "percentage": 96.84, "elapsed_time": "1 day, 2:21:27", "remaining_time": "0:51:36"} +{"current_steps": 7295, "total_steps": 7532, "loss": 0.22620335221290588, "lr": 5.4551806003591e-08, "epoch": 1.937192935865091, "percentage": 96.85, "elapsed_time": "1 day, 2:21:39", "remaining_time": "0:51:23"} +{"current_steps": 7296, "total_steps": 7532, "loss": 0.20357783138751984, "lr": 5.409476341111775e-08, "epoch": 1.937458504846634, "percentage": 96.87, "elapsed_time": "1 day, 2:21:52", "remaining_time": "0:51:10"} +{"current_steps": 7297, "total_steps": 7532, "loss": 0.22717830538749695, "lr": 5.3639638271872906e-08, "epoch": 1.937724073828177, "percentage": 96.88, "elapsed_time": "1 day, 2:22:05", "remaining_time": "0:50:57"} +{"current_steps": 7298, "total_steps": 7532, "loss": 0.20139163732528687, "lr": 5.318643067360074e-08, "epoch": 1.9379896428097199, "percentage": 96.89, "elapsed_time": "1 day, 2:22:18", "remaining_time": "0:50:44"} +{"current_steps": 7299, "total_steps": 7532, "loss": 0.2620807886123657, "lr": 5.273514070367247e-08, "epoch": 1.9382552117912628, "percentage": 96.91, "elapsed_time": "1 day, 2:22:30", "remaining_time": "0:50:31"} +{"current_steps": 7300, "total_steps": 7532, "loss": 0.2102596014738083, "lr": 5.2285768449091834e-08, "epoch": 1.9385207807728058, "percentage": 96.92, "elapsed_time": "1 day, 2:22:43", "remaining_time": "0:50:18"} +{"current_steps": 7301, "total_steps": 7532, "loss": 0.2105238288640976, "lr": 5.183831399649175e-08, "epoch": 1.9387863497543487, "percentage": 96.93, "elapsed_time": "1 day, 2:23:01", "remaining_time": "0:50:05"} +{"current_steps": 7302, "total_steps": 7532, "loss": 0.22178848087787628, "lr": 5.1392777432138773e-08, "epoch": 1.9390519187358917, "percentage": 96.95, "elapsed_time": "1 day, 2:23:14", "remaining_time": "0:49:52"} +{"current_steps": 7303, "total_steps": 7532, "loss": 0.23375345766544342, "lr": 5.094915884192419e-08, "epoch": 1.9393174877174346, "percentage": 96.96, "elapsed_time": "1 day, 2:23:27", "remaining_time": "0:49:39"} +{"current_steps": 7304, "total_steps": 7532, "loss": 0.22709332406520844, "lr": 5.050745831137405e-08, "epoch": 1.9395830566989776, "percentage": 96.97, "elapsed_time": "1 day, 2:23:40", "remaining_time": "0:49:26"} +{"current_steps": 7305, "total_steps": 7532, "loss": 0.2312362790107727, "lr": 5.0067675925642437e-08, "epoch": 1.9398486256805205, "percentage": 96.99, "elapsed_time": "1 day, 2:23:54", "remaining_time": "0:49:13"} +{"current_steps": 7306, "total_steps": 7532, "loss": 0.2014419138431549, "lr": 4.962981176951376e-08, "epoch": 1.9401141946620635, "percentage": 97.0, "elapsed_time": "1 day, 2:24:06", "remaining_time": "0:49:00"} +{"current_steps": 7307, "total_steps": 7532, "loss": 0.23700466752052307, "lr": 4.9193865927404936e-08, "epoch": 1.9403797636436064, "percentage": 97.01, "elapsed_time": "1 day, 2:24:20", "remaining_time": "0:48:47"} +{"current_steps": 7308, "total_steps": 7532, "loss": 0.23362770676612854, "lr": 4.8759838483358745e-08, "epoch": 1.9406453326251494, "percentage": 97.03, "elapsed_time": "1 day, 2:24:32", "remaining_time": "0:48:34"} +{"current_steps": 7309, "total_steps": 7532, "loss": 0.26057323813438416, "lr": 4.832772952105269e-08, "epoch": 1.9409109016066923, "percentage": 97.04, "elapsed_time": "1 day, 2:24:45", "remaining_time": "0:48:21"} +{"current_steps": 7310, "total_steps": 7532, "loss": 0.20954950153827667, "lr": 4.789753912379014e-08, "epoch": 1.9411764705882353, "percentage": 97.05, "elapsed_time": "1 day, 2:24:58", "remaining_time": "0:48:08"} +{"current_steps": 7311, "total_steps": 7532, "loss": 0.2100827842950821, "lr": 4.746926737450919e-08, "epoch": 1.9414420395697782, "percentage": 97.07, "elapsed_time": "1 day, 2:25:11", "remaining_time": "0:47:55"} +{"current_steps": 7312, "total_steps": 7532, "loss": 0.216691792011261, "lr": 4.7042914355773795e-08, "epoch": 1.9417076085513212, "percentage": 97.08, "elapsed_time": "1 day, 2:25:23", "remaining_time": "0:47:42"} +{"current_steps": 7313, "total_steps": 7532, "loss": 0.22815749049186707, "lr": 4.6618480149780434e-08, "epoch": 1.9419731775328641, "percentage": 97.09, "elapsed_time": "1 day, 2:25:36", "remaining_time": "0:47:29"} +{"current_steps": 7314, "total_steps": 7532, "loss": 0.23365731537342072, "lr": 4.6195964838353646e-08, "epoch": 1.942238746514407, "percentage": 97.11, "elapsed_time": "1 day, 2:25:49", "remaining_time": "0:47:16"} +{"current_steps": 7315, "total_steps": 7532, "loss": 0.2112172693014145, "lr": 4.577536850295161e-08, "epoch": 1.94250431549595, "percentage": 97.12, "elapsed_time": "1 day, 2:26:02", "remaining_time": "0:47:02"} +{"current_steps": 7316, "total_steps": 7532, "loss": 0.21821950376033783, "lr": 4.5356691224659466e-08, "epoch": 1.942769884477493, "percentage": 97.13, "elapsed_time": "1 day, 2:26:15", "remaining_time": "0:46:50"} +{"current_steps": 7317, "total_steps": 7532, "loss": 0.2374412566423416, "lr": 4.4939933084192646e-08, "epoch": 1.943035453459036, "percentage": 97.15, "elapsed_time": "1 day, 2:26:28", "remaining_time": "0:46:36"} +{"current_steps": 7318, "total_steps": 7532, "loss": 0.2483779489994049, "lr": 4.4525094161897987e-08, "epoch": 1.943301022440579, "percentage": 97.16, "elapsed_time": "1 day, 2:26:42", "remaining_time": "0:46:23"} +{"current_steps": 7319, "total_steps": 7532, "loss": 0.23641882836818695, "lr": 4.411217453775152e-08, "epoch": 1.9435665914221218, "percentage": 97.17, "elapsed_time": "1 day, 2:26:55", "remaining_time": "0:46:10"} +{"current_steps": 7320, "total_steps": 7532, "loss": 0.24779492616653442, "lr": 4.370117429135956e-08, "epoch": 1.9438321604036648, "percentage": 97.19, "elapsed_time": "1 day, 2:27:08", "remaining_time": "0:45:57"} +{"current_steps": 7321, "total_steps": 7532, "loss": 0.20288071036338806, "lr": 4.329209350195651e-08, "epoch": 1.944097729385208, "percentage": 97.2, "elapsed_time": "1 day, 2:27:20", "remaining_time": "0:45:44"} +{"current_steps": 7322, "total_steps": 7532, "loss": 0.24286144971847534, "lr": 4.288493224840928e-08, "epoch": 1.944363298366751, "percentage": 97.21, "elapsed_time": "1 day, 2:27:34", "remaining_time": "0:45:31"} +{"current_steps": 7323, "total_steps": 7532, "loss": 0.22825902700424194, "lr": 4.2479690609213976e-08, "epoch": 1.9446288673482939, "percentage": 97.23, "elapsed_time": "1 day, 2:27:46", "remaining_time": "0:45:18"} +{"current_steps": 7324, "total_steps": 7532, "loss": 0.22563335299491882, "lr": 4.207636866249587e-08, "epoch": 1.9448944363298368, "percentage": 97.24, "elapsed_time": "1 day, 2:28:00", "remaining_time": "0:45:05"} +{"current_steps": 7325, "total_steps": 7532, "loss": 0.22853273153305054, "lr": 4.167496648601166e-08, "epoch": 1.9451600053113798, "percentage": 97.25, "elapsed_time": "1 day, 2:28:13", "remaining_time": "0:44:52"} +{"current_steps": 7326, "total_steps": 7532, "loss": 0.20790672302246094, "lr": 4.1275484157147216e-08, "epoch": 1.9454255742929227, "percentage": 97.27, "elapsed_time": "1 day, 2:28:26", "remaining_time": "0:44:39"} +{"current_steps": 7327, "total_steps": 7532, "loss": 0.2165423035621643, "lr": 4.087792175291649e-08, "epoch": 1.9456911432744657, "percentage": 97.28, "elapsed_time": "1 day, 2:28:39", "remaining_time": "0:44:26"} +{"current_steps": 7328, "total_steps": 7532, "loss": 0.2605394721031189, "lr": 4.048227934996485e-08, "epoch": 1.9459567122560086, "percentage": 97.29, "elapsed_time": "1 day, 2:28:52", "remaining_time": "0:44:13"} +{"current_steps": 7329, "total_steps": 7532, "loss": 0.22624900937080383, "lr": 4.008855702456904e-08, "epoch": 1.9462222812375516, "percentage": 97.3, "elapsed_time": "1 day, 2:29:05", "remaining_time": "0:44:00"} +{"current_steps": 7330, "total_steps": 7532, "loss": 0.23086196184158325, "lr": 3.9696754852632804e-08, "epoch": 1.9464878502190945, "percentage": 97.32, "elapsed_time": "1 day, 2:29:18", "remaining_time": "0:43:47"} +{"current_steps": 7331, "total_steps": 7532, "loss": 0.24633410573005676, "lr": 3.9306872909691265e-08, "epoch": 1.9467534192006375, "percentage": 97.33, "elapsed_time": "1 day, 2:29:32", "remaining_time": "0:43:34"} +{"current_steps": 7332, "total_steps": 7532, "loss": 0.2535535395145416, "lr": 3.8918911270908745e-08, "epoch": 1.9470189881821804, "percentage": 97.34, "elapsed_time": "1 day, 2:29:44", "remaining_time": "0:43:21"} +{"current_steps": 7333, "total_steps": 7532, "loss": 0.23904260993003845, "lr": 3.853287001108097e-08, "epoch": 1.9472845571637234, "percentage": 97.36, "elapsed_time": "1 day, 2:29:57", "remaining_time": "0:43:08"} +{"current_steps": 7334, "total_steps": 7532, "loss": 0.22525179386138916, "lr": 3.814874920463063e-08, "epoch": 1.9475501261452663, "percentage": 97.37, "elapsed_time": "1 day, 2:30:09", "remaining_time": "0:42:55"} +{"current_steps": 7335, "total_steps": 7532, "loss": 0.21139883995056152, "lr": 3.776654892561293e-08, "epoch": 1.9478156951268093, "percentage": 97.38, "elapsed_time": "1 day, 2:30:22", "remaining_time": "0:42:42"} +{"current_steps": 7336, "total_steps": 7532, "loss": 0.21939310431480408, "lr": 3.738626924771005e-08, "epoch": 1.9480812641083523, "percentage": 97.4, "elapsed_time": "1 day, 2:30:35", "remaining_time": "0:42:29"} +{"current_steps": 7337, "total_steps": 7532, "loss": 0.22852283716201782, "lr": 3.7007910244236664e-08, "epoch": 1.9483468330898952, "percentage": 97.41, "elapsed_time": "1 day, 2:30:48", "remaining_time": "0:42:16"} +{"current_steps": 7338, "total_steps": 7532, "loss": 0.20769211649894714, "lr": 3.663147198813666e-08, "epoch": 1.9486124020714382, "percentage": 97.42, "elapsed_time": "1 day, 2:31:01", "remaining_time": "0:42:03"} +{"current_steps": 7339, "total_steps": 7532, "loss": 0.21721890568733215, "lr": 3.625695455198086e-08, "epoch": 1.948877971052981, "percentage": 97.44, "elapsed_time": "1 day, 2:31:14", "remaining_time": "0:41:50"} +{"current_steps": 7340, "total_steps": 7532, "loss": 0.24236848950386047, "lr": 3.588435800797263e-08, "epoch": 1.949143540034524, "percentage": 97.45, "elapsed_time": "1 day, 2:31:27", "remaining_time": "0:41:37"} +{"current_steps": 7341, "total_steps": 7532, "loss": 0.2300192266702652, "lr": 3.5513682427944505e-08, "epoch": 1.949409109016067, "percentage": 97.46, "elapsed_time": "1 day, 2:31:40", "remaining_time": "0:41:24"} +{"current_steps": 7342, "total_steps": 7532, "loss": 0.21636728942394257, "lr": 3.5144927883358215e-08, "epoch": 1.94967467799761, "percentage": 97.48, "elapsed_time": "1 day, 2:31:53", "remaining_time": "0:41:11"} +{"current_steps": 7343, "total_steps": 7532, "loss": 0.25367966294288635, "lr": 3.477809444530578e-08, "epoch": 1.949940246979153, "percentage": 97.49, "elapsed_time": "1 day, 2:32:06", "remaining_time": "0:40:58"} +{"current_steps": 7344, "total_steps": 7532, "loss": 0.24514247477054596, "lr": 3.4413182184507285e-08, "epoch": 1.9502058159606959, "percentage": 97.5, "elapsed_time": "1 day, 2:32:19", "remaining_time": "0:40:45"} +{"current_steps": 7345, "total_steps": 7532, "loss": 0.18460404872894287, "lr": 3.405019117131425e-08, "epoch": 1.9504713849422388, "percentage": 97.52, "elapsed_time": "1 day, 2:32:31", "remaining_time": "0:40:32"} +{"current_steps": 7346, "total_steps": 7532, "loss": 0.2096845805644989, "lr": 3.3689121475706244e-08, "epoch": 1.9507369539237818, "percentage": 97.53, "elapsed_time": "1 day, 2:32:45", "remaining_time": "0:40:19"} +{"current_steps": 7347, "total_steps": 7532, "loss": 0.22435057163238525, "lr": 3.332997316729536e-08, "epoch": 1.9510025229053247, "percentage": 97.54, "elapsed_time": "1 day, 2:32:57", "remaining_time": "0:40:06"} +{"current_steps": 7348, "total_steps": 7532, "loss": 0.20798128843307495, "lr": 3.2972746315318436e-08, "epoch": 1.9512680918868677, "percentage": 97.56, "elapsed_time": "1 day, 2:33:11", "remaining_time": "0:39:53"} +{"current_steps": 7349, "total_steps": 7532, "loss": 0.23958316445350647, "lr": 3.2617440988645945e-08, "epoch": 1.9515336608684106, "percentage": 97.57, "elapsed_time": "1 day, 2:33:23", "remaining_time": "0:39:40"} +{"current_steps": 7350, "total_steps": 7532, "loss": 0.21934574842453003, "lr": 3.2264057255777525e-08, "epoch": 1.9517992298499536, "percentage": 97.58, "elapsed_time": "1 day, 2:33:37", "remaining_time": "0:39:27"} +{"current_steps": 7351, "total_steps": 7532, "loss": 0.24321375787258148, "lr": 3.1912595184839804e-08, "epoch": 1.9520647988314965, "percentage": 97.6, "elapsed_time": "1 day, 2:33:49", "remaining_time": "0:39:14"} +{"current_steps": 7352, "total_steps": 7532, "loss": 0.20932736992835999, "lr": 3.156305484359079e-08, "epoch": 1.9523303678130395, "percentage": 97.61, "elapsed_time": "1 day, 2:34:03", "remaining_time": "0:39:01"} +{"current_steps": 7353, "total_steps": 7532, "loss": 0.19824840128421783, "lr": 3.12154362994177e-08, "epoch": 1.9525959367945824, "percentage": 97.62, "elapsed_time": "1 day, 2:34:15", "remaining_time": "0:38:48"} +{"current_steps": 7354, "total_steps": 7532, "loss": 0.212745800614357, "lr": 3.0869739619338034e-08, "epoch": 1.9528615057761254, "percentage": 97.64, "elapsed_time": "1 day, 2:34:29", "remaining_time": "0:38:35"} +{"current_steps": 7355, "total_steps": 7532, "loss": 0.23044779896736145, "lr": 3.0525964869997374e-08, "epoch": 1.9531270747576683, "percentage": 97.65, "elapsed_time": "1 day, 2:34:42", "remaining_time": "0:38:22"} +{"current_steps": 7356, "total_steps": 7532, "loss": 0.2237459123134613, "lr": 3.018411211767158e-08, "epoch": 1.9533926437392113, "percentage": 97.66, "elapsed_time": "1 day, 2:34:55", "remaining_time": "0:38:09"} +{"current_steps": 7357, "total_steps": 7532, "loss": 0.2592429518699646, "lr": 2.984418142826684e-08, "epoch": 1.9536582127207542, "percentage": 97.68, "elapsed_time": "1 day, 2:35:09", "remaining_time": "0:37:56"} +{"current_steps": 7358, "total_steps": 7532, "loss": 0.17559123039245605, "lr": 2.9506172867315163e-08, "epoch": 1.9539237817022972, "percentage": 97.69, "elapsed_time": "1 day, 2:35:22", "remaining_time": "0:37:43"} +{"current_steps": 7359, "total_steps": 7532, "loss": 0.24143017828464508, "lr": 2.917008649998332e-08, "epoch": 1.9541893506838401, "percentage": 97.7, "elapsed_time": "1 day, 2:35:35", "remaining_time": "0:37:30"} +{"current_steps": 7360, "total_steps": 7532, "loss": 0.23560799658298492, "lr": 2.883592239106392e-08, "epoch": 1.954454919665383, "percentage": 97.72, "elapsed_time": "1 day, 2:35:48", "remaining_time": "0:37:17"} +{"current_steps": 7361, "total_steps": 7532, "loss": 0.2456119805574417, "lr": 2.8503680604979878e-08, "epoch": 1.954720488646926, "percentage": 97.73, "elapsed_time": "1 day, 2:36:02", "remaining_time": "0:37:04"} +{"current_steps": 7362, "total_steps": 7532, "loss": 0.21878069639205933, "lr": 2.817336120578329e-08, "epoch": 1.954986057628469, "percentage": 97.74, "elapsed_time": "1 day, 2:36:15", "remaining_time": "0:36:51"} +{"current_steps": 7363, "total_steps": 7532, "loss": 0.20496608316898346, "lr": 2.7844964257155438e-08, "epoch": 1.955251626610012, "percentage": 97.76, "elapsed_time": "1 day, 2:36:28", "remaining_time": "0:36:38"} +{"current_steps": 7364, "total_steps": 7532, "loss": 0.23219498991966248, "lr": 2.7518489822407902e-08, "epoch": 1.955517195591555, "percentage": 97.77, "elapsed_time": "1 day, 2:36:41", "remaining_time": "0:36:25"} +{"current_steps": 7365, "total_steps": 7532, "loss": 0.2284272015094757, "lr": 2.7193937964481442e-08, "epoch": 1.9557827645730979, "percentage": 97.78, "elapsed_time": "1 day, 2:36:54", "remaining_time": "0:36:12"} +{"current_steps": 7366, "total_steps": 7532, "loss": 0.22303974628448486, "lr": 2.68713087459449e-08, "epoch": 1.9560483335546408, "percentage": 97.8, "elapsed_time": "1 day, 2:37:07", "remaining_time": "0:35:59"} +{"current_steps": 7367, "total_steps": 7532, "loss": 0.22489243745803833, "lr": 2.655060222899741e-08, "epoch": 1.9563139025361838, "percentage": 97.81, "elapsed_time": "1 day, 2:37:21", "remaining_time": "0:35:46"} +{"current_steps": 7368, "total_steps": 7532, "loss": 0.27986854314804077, "lr": 2.6231818475468407e-08, "epoch": 1.9565794715177267, "percentage": 97.82, "elapsed_time": "1 day, 2:37:34", "remaining_time": "0:35:33"} +{"current_steps": 7369, "total_steps": 7532, "loss": 0.29321208596229553, "lr": 2.591495754681539e-08, "epoch": 1.9568450404992697, "percentage": 97.84, "elapsed_time": "1 day, 2:37:46", "remaining_time": "0:35:20"} +{"current_steps": 7370, "total_steps": 7532, "loss": 0.2560982406139374, "lr": 2.5600019504125053e-08, "epoch": 1.9571106094808126, "percentage": 97.85, "elapsed_time": "1 day, 2:37:59", "remaining_time": "0:35:07"} +{"current_steps": 7371, "total_steps": 7532, "loss": 0.264164537191391, "lr": 2.528700440811438e-08, "epoch": 1.9573761784623556, "percentage": 97.86, "elapsed_time": "1 day, 2:38:12", "remaining_time": "0:34:54"} +{"current_steps": 7372, "total_steps": 7532, "loss": 0.2135474979877472, "lr": 2.4975912319127326e-08, "epoch": 1.9576417474438985, "percentage": 97.88, "elapsed_time": "1 day, 2:38:25", "remaining_time": "0:34:41"} +{"current_steps": 7373, "total_steps": 7532, "loss": 0.2100939154624939, "lr": 2.466674329714036e-08, "epoch": 1.9579073164254415, "percentage": 97.89, "elapsed_time": "1 day, 2:38:38", "remaining_time": "0:34:28"} +{"current_steps": 7374, "total_steps": 7532, "loss": 0.23327934741973877, "lr": 2.4359497401758026e-08, "epoch": 1.9581728854069844, "percentage": 97.9, "elapsed_time": "1 day, 2:38:51", "remaining_time": "0:34:15"} +{"current_steps": 7375, "total_steps": 7532, "loss": 0.18830639123916626, "lr": 2.405417469221183e-08, "epoch": 1.9584384543885274, "percentage": 97.92, "elapsed_time": "1 day, 2:39:03", "remaining_time": "0:34:02"} +{"current_steps": 7376, "total_steps": 7532, "loss": 0.2558823227882385, "lr": 2.3750775227364686e-08, "epoch": 1.9587040233700703, "percentage": 97.93, "elapsed_time": "1 day, 2:39:16", "remaining_time": "0:33:49"} +{"current_steps": 7377, "total_steps": 7532, "loss": 0.24241580069065094, "lr": 2.3449299065710917e-08, "epoch": 1.9589695923516133, "percentage": 97.94, "elapsed_time": "1 day, 2:39:29", "remaining_time": "0:33:36"} +{"current_steps": 7378, "total_steps": 7532, "loss": 0.21678534150123596, "lr": 2.3149746265368478e-08, "epoch": 1.9592351613331562, "percentage": 97.96, "elapsed_time": "1 day, 2:39:42", "remaining_time": "0:33:23"} +{"current_steps": 7379, "total_steps": 7532, "loss": 0.20956794917583466, "lr": 2.2852116884088947e-08, "epoch": 1.9595007303146992, "percentage": 97.97, "elapsed_time": "1 day, 2:39:55", "remaining_time": "0:33:10"} +{"current_steps": 7380, "total_steps": 7532, "loss": 0.2185555249452591, "lr": 2.2556410979253095e-08, "epoch": 1.9597662992962421, "percentage": 97.98, "elapsed_time": "1 day, 2:40:08", "remaining_time": "0:32:57"} +{"current_steps": 7381, "total_steps": 7532, "loss": 0.21802933514118195, "lr": 2.226262860786643e-08, "epoch": 1.960031868277785, "percentage": 98.0, "elapsed_time": "1 day, 2:40:21", "remaining_time": "0:32:44"} +{"current_steps": 7382, "total_steps": 7532, "loss": 0.22842684388160706, "lr": 2.1970769826570317e-08, "epoch": 1.960297437259328, "percentage": 98.01, "elapsed_time": "1 day, 2:40:34", "remaining_time": "0:32:31"} +{"current_steps": 7383, "total_steps": 7532, "loss": 0.23380814492702484, "lr": 2.1680834691628627e-08, "epoch": 1.960563006240871, "percentage": 98.02, "elapsed_time": "1 day, 2:40:47", "remaining_time": "0:32:18"} +{"current_steps": 7384, "total_steps": 7532, "loss": 0.23476335406303406, "lr": 2.1392823258938877e-08, "epoch": 1.960828575222414, "percentage": 98.04, "elapsed_time": "1 day, 2:41:00", "remaining_time": "0:32:05"} +{"current_steps": 7385, "total_steps": 7532, "loss": 0.19657662510871887, "lr": 2.110673558402554e-08, "epoch": 1.961094144203957, "percentage": 98.05, "elapsed_time": "1 day, 2:41:13", "remaining_time": "0:31:52"} +{"current_steps": 7386, "total_steps": 7532, "loss": 0.1724000722169876, "lr": 2.0822571722044494e-08, "epoch": 1.9613597131854998, "percentage": 98.06, "elapsed_time": "1 day, 2:41:26", "remaining_time": "0:31:39"} +{"current_steps": 7387, "total_steps": 7532, "loss": 0.22960031032562256, "lr": 2.0540331727777475e-08, "epoch": 1.9616252821670428, "percentage": 98.07, "elapsed_time": "1 day, 2:41:39", "remaining_time": "0:31:26"} +{"current_steps": 7388, "total_steps": 7532, "loss": 0.2601638436317444, "lr": 2.0260015655637623e-08, "epoch": 1.9618908511485857, "percentage": 98.09, "elapsed_time": "1 day, 2:41:52", "remaining_time": "0:31:13"} +{"current_steps": 7389, "total_steps": 7532, "loss": 0.2562445402145386, "lr": 1.998162355966726e-08, "epoch": 1.9621564201301287, "percentage": 98.1, "elapsed_time": "1 day, 2:42:05", "remaining_time": "0:31:00"} +{"current_steps": 7390, "total_steps": 7532, "loss": 0.20073221623897552, "lr": 1.9705155493535688e-08, "epoch": 1.9624219891116716, "percentage": 98.11, "elapsed_time": "1 day, 2:42:18", "remaining_time": "0:30:47"} +{"current_steps": 7391, "total_steps": 7532, "loss": 0.18454071879386902, "lr": 1.9430611510544707e-08, "epoch": 1.9626875580932146, "percentage": 98.13, "elapsed_time": "1 day, 2:42:32", "remaining_time": "0:30:34"} +{"current_steps": 7392, "total_steps": 7532, "loss": 0.18515023589134216, "lr": 1.915799166362087e-08, "epoch": 1.9629531270747576, "percentage": 98.14, "elapsed_time": "1 day, 2:42:44", "remaining_time": "0:30:21"} +{"current_steps": 7393, "total_steps": 7532, "loss": 0.25658512115478516, "lr": 1.8887296005323242e-08, "epoch": 1.9632186960563005, "percentage": 98.15, "elapsed_time": "1 day, 2:42:57", "remaining_time": "0:30:08"} +{"current_steps": 7394, "total_steps": 7532, "loss": 0.2219933569431305, "lr": 1.861852458783897e-08, "epoch": 1.9634842650378435, "percentage": 98.17, "elapsed_time": "1 day, 2:43:10", "remaining_time": "0:29:55"} +{"current_steps": 7395, "total_steps": 7532, "loss": 0.24949616193771362, "lr": 1.8351677462983276e-08, "epoch": 1.9637498340193864, "percentage": 98.18, "elapsed_time": "1 day, 2:43:24", "remaining_time": "0:29:42"} +{"current_steps": 7396, "total_steps": 7532, "loss": 0.24348726868629456, "lr": 1.808675468220167e-08, "epoch": 1.9640154030009294, "percentage": 98.19, "elapsed_time": "1 day, 2:43:36", "remaining_time": "0:29:29"} +{"current_steps": 7397, "total_steps": 7532, "loss": 0.2329033762216568, "lr": 1.782375629656885e-08, "epoch": 1.9642809719824723, "percentage": 98.21, "elapsed_time": "1 day, 2:43:49", "remaining_time": "0:29:16"} +{"current_steps": 7398, "total_steps": 7532, "loss": 0.22265426814556122, "lr": 1.7562682356786488e-08, "epoch": 1.9645465409640153, "percentage": 98.22, "elapsed_time": "1 day, 2:44:02", "remaining_time": "0:29:03"} +{"current_steps": 7399, "total_steps": 7532, "loss": 0.24438990652561188, "lr": 1.730353291318654e-08, "epoch": 1.9648121099455582, "percentage": 98.23, "elapsed_time": "1 day, 2:44:15", "remaining_time": "0:28:50"} +{"current_steps": 7400, "total_steps": 7532, "loss": 0.2632136642932892, "lr": 1.704630801573015e-08, "epoch": 1.9650776789271012, "percentage": 98.25, "elapsed_time": "1 day, 2:44:28", "remaining_time": "0:28:37"} +{"current_steps": 7401, "total_steps": 7532, "loss": 0.22230927646160126, "lr": 1.6791007714008766e-08, "epoch": 1.9653432479086441, "percentage": 98.26, "elapsed_time": "1 day, 2:44:47", "remaining_time": "0:28:24"} +{"current_steps": 7402, "total_steps": 7532, "loss": 0.26317098736763, "lr": 1.653763205723968e-08, "epoch": 1.965608816890187, "percentage": 98.27, "elapsed_time": "1 day, 2:45:00", "remaining_time": "0:28:11"} +{"current_steps": 7403, "total_steps": 7532, "loss": 0.23205846548080444, "lr": 1.628618109427049e-08, "epoch": 1.96587438587173, "percentage": 98.29, "elapsed_time": "1 day, 2:45:13", "remaining_time": "0:27:58"} +{"current_steps": 7404, "total_steps": 7532, "loss": 0.202583909034729, "lr": 1.6036654873579084e-08, "epoch": 1.966139954853273, "percentage": 98.3, "elapsed_time": "1 day, 2:45:27", "remaining_time": "0:27:45"} +{"current_steps": 7405, "total_steps": 7532, "loss": 0.2579672038555145, "lr": 1.5789053443270308e-08, "epoch": 1.966405523834816, "percentage": 98.31, "elapsed_time": "1 day, 2:45:40", "remaining_time": "0:27:32"} +{"current_steps": 7406, "total_steps": 7532, "loss": 0.27483606338500977, "lr": 1.5543376851080428e-08, "epoch": 1.966671092816359, "percentage": 98.33, "elapsed_time": "1 day, 2:45:53", "remaining_time": "0:27:19"} +{"current_steps": 7407, "total_steps": 7532, "loss": 0.22510311007499695, "lr": 1.5299625144370444e-08, "epoch": 1.966936661797902, "percentage": 98.34, "elapsed_time": "1 day, 2:46:06", "remaining_time": "0:27:06"} +{"current_steps": 7408, "total_steps": 7532, "loss": 0.24941131472587585, "lr": 1.505779837013499e-08, "epoch": 1.967202230779445, "percentage": 98.35, "elapsed_time": "1 day, 2:46:19", "remaining_time": "0:26:53"} +{"current_steps": 7409, "total_steps": 7532, "loss": 0.22301170229911804, "lr": 1.481789657499344e-08, "epoch": 1.967467799760988, "percentage": 98.37, "elapsed_time": "1 day, 2:46:32", "remaining_time": "0:26:40"} +{"current_steps": 7410, "total_steps": 7532, "loss": 0.23045194149017334, "lr": 1.4579919805198795e-08, "epoch": 1.967733368742531, "percentage": 98.38, "elapsed_time": "1 day, 2:46:45", "remaining_time": "0:26:27"} +{"current_steps": 7411, "total_steps": 7532, "loss": 0.25892990827560425, "lr": 1.4343868106627689e-08, "epoch": 1.9679989377240739, "percentage": 98.39, "elapsed_time": "1 day, 2:46:58", "remaining_time": "0:26:14"} +{"current_steps": 7412, "total_steps": 7532, "loss": 0.23086567223072052, "lr": 1.4109741524788167e-08, "epoch": 1.9682645067056168, "percentage": 98.41, "elapsed_time": "1 day, 2:47:11", "remaining_time": "0:26:01"} +{"current_steps": 7413, "total_steps": 7532, "loss": 0.2514735460281372, "lr": 1.3877540104818566e-08, "epoch": 1.9685300756871598, "percentage": 98.42, "elapsed_time": "1 day, 2:47:24", "remaining_time": "0:25:48"} +{"current_steps": 7414, "total_steps": 7532, "loss": 0.21824213862419128, "lr": 1.3647263891484187e-08, "epoch": 1.9687956446687027, "percentage": 98.43, "elapsed_time": "1 day, 2:47:38", "remaining_time": "0:25:35"} +{"current_steps": 7415, "total_steps": 7532, "loss": 0.2262609452009201, "lr": 1.3418912929178407e-08, "epoch": 1.9690612136502457, "percentage": 98.45, "elapsed_time": "1 day, 2:47:51", "remaining_time": "0:25:22"} +{"current_steps": 7416, "total_steps": 7532, "loss": 0.23119492828845978, "lr": 1.3192487261926013e-08, "epoch": 1.9693267826317886, "percentage": 98.46, "elapsed_time": "1 day, 2:48:04", "remaining_time": "0:25:09"} +{"current_steps": 7417, "total_steps": 7532, "loss": 0.20173534750938416, "lr": 1.2967986933378751e-08, "epoch": 1.9695923516133316, "percentage": 98.47, "elapsed_time": "1 day, 2:48:17", "remaining_time": "0:24:56"} +{"current_steps": 7418, "total_steps": 7532, "loss": 0.2212662547826767, "lr": 1.2745411986816447e-08, "epoch": 1.9698579205948745, "percentage": 98.49, "elapsed_time": "1 day, 2:48:30", "remaining_time": "0:24:43"} +{"current_steps": 7419, "total_steps": 7532, "loss": 0.21990706026554108, "lr": 1.2524762465151442e-08, "epoch": 1.9701234895764175, "percentage": 98.5, "elapsed_time": "1 day, 2:48:43", "remaining_time": "0:24:30"} +{"current_steps": 7420, "total_steps": 7532, "loss": 0.18648189306259155, "lr": 1.2306038410919707e-08, "epoch": 1.9703890585579604, "percentage": 98.51, "elapsed_time": "1 day, 2:48:56", "remaining_time": "0:24:17"} +{"current_steps": 7421, "total_steps": 7532, "loss": 0.23273484408855438, "lr": 1.2089239866289737e-08, "epoch": 1.9706546275395034, "percentage": 98.53, "elapsed_time": "1 day, 2:49:09", "remaining_time": "0:24:04"} +{"current_steps": 7422, "total_steps": 7532, "loss": 0.21514324843883514, "lr": 1.1874366873059206e-08, "epoch": 1.9709201965210463, "percentage": 98.54, "elapsed_time": "1 day, 2:49:22", "remaining_time": "0:23:51"} +{"current_steps": 7423, "total_steps": 7532, "loss": 0.2544926106929779, "lr": 1.1661419472650538e-08, "epoch": 1.9711857655025893, "percentage": 98.55, "elapsed_time": "1 day, 2:49:35", "remaining_time": "0:23:38"} +{"current_steps": 7424, "total_steps": 7532, "loss": 0.235082745552063, "lr": 1.1450397706119776e-08, "epoch": 1.9714513344841322, "percentage": 98.57, "elapsed_time": "1 day, 2:49:48", "remaining_time": "0:23:25"} +{"current_steps": 7425, "total_steps": 7532, "loss": 0.24777358770370483, "lr": 1.1241301614147715e-08, "epoch": 1.9717169034656752, "percentage": 98.58, "elapsed_time": "1 day, 2:50:01", "remaining_time": "0:23:12"} +{"current_steps": 7426, "total_steps": 7532, "loss": 0.23714174330234528, "lr": 1.1034131237045443e-08, "epoch": 1.9719824724472181, "percentage": 98.59, "elapsed_time": "1 day, 2:50:14", "remaining_time": "0:22:59"} +{"current_steps": 7427, "total_steps": 7532, "loss": 0.24665668606758118, "lr": 1.0828886614754342e-08, "epoch": 1.972248041428761, "percentage": 98.61, "elapsed_time": "1 day, 2:50:27", "remaining_time": "0:22:46"} +{"current_steps": 7428, "total_steps": 7532, "loss": 0.23421131074428558, "lr": 1.062556778684276e-08, "epoch": 1.972513610410304, "percentage": 98.62, "elapsed_time": "1 day, 2:50:39", "remaining_time": "0:22:33"} +{"current_steps": 7429, "total_steps": 7532, "loss": 0.23443526029586792, "lr": 1.0424174792508234e-08, "epoch": 1.972779179391847, "percentage": 98.63, "elapsed_time": "1 day, 2:50:52", "remaining_time": "0:22:20"} +{"current_steps": 7430, "total_steps": 7532, "loss": 0.24177192151546478, "lr": 1.0224707670576373e-08, "epoch": 1.97304474837339, "percentage": 98.65, "elapsed_time": "1 day, 2:51:05", "remaining_time": "0:22:07"} +{"current_steps": 7431, "total_steps": 7532, "loss": 0.20957472920417786, "lr": 1.002716645950197e-08, "epoch": 1.973310317354933, "percentage": 98.66, "elapsed_time": "1 day, 2:51:18", "remaining_time": "0:21:54"} +{"current_steps": 7432, "total_steps": 7532, "loss": 0.21594710648059845, "lr": 9.831551197370116e-09, "epoch": 1.9735758863364758, "percentage": 98.67, "elapsed_time": "1 day, 2:51:31", "remaining_time": "0:21:41"} +{"current_steps": 7433, "total_steps": 7532, "loss": 0.2372155487537384, "lr": 9.637861921891756e-09, "epoch": 1.9738414553180188, "percentage": 98.69, "elapsed_time": "1 day, 2:51:44", "remaining_time": "0:21:28"} +{"current_steps": 7434, "total_steps": 7532, "loss": 0.211237370967865, "lr": 9.446098670408132e-09, "epoch": 1.974107024299562, "percentage": 98.7, "elapsed_time": "1 day, 2:51:57", "remaining_time": "0:21:14"} +{"current_steps": 7435, "total_steps": 7532, "loss": 0.25123757123947144, "lr": 9.256261479888562e-09, "epoch": 1.974372593281105, "percentage": 98.71, "elapsed_time": "1 day, 2:52:10", "remaining_time": "0:21:01"} +{"current_steps": 7436, "total_steps": 7532, "loss": 0.23048831522464752, "lr": 9.068350386932655e-09, "epoch": 1.9746381622626479, "percentage": 98.73, "elapsed_time": "1 day, 2:52:23", "remaining_time": "0:20:48"} +{"current_steps": 7437, "total_steps": 7532, "loss": 0.22923544049263, "lr": 8.882365427765883e-09, "epoch": 1.9749037312441908, "percentage": 98.74, "elapsed_time": "1 day, 2:52:36", "remaining_time": "0:20:35"} +{"current_steps": 7438, "total_steps": 7532, "loss": 0.199529767036438, "lr": 8.698306638245114e-09, "epoch": 1.9751693002257338, "percentage": 98.75, "elapsed_time": "1 day, 2:52:49", "remaining_time": "0:20:22"} +{"current_steps": 7439, "total_steps": 7532, "loss": 0.22778059542179108, "lr": 8.516174053854187e-09, "epoch": 1.9754348692072767, "percentage": 98.77, "elapsed_time": "1 day, 2:53:02", "remaining_time": "0:20:09"} +{"current_steps": 7440, "total_steps": 7532, "loss": 0.22807848453521729, "lr": 8.335967709706128e-09, "epoch": 1.9757004381888197, "percentage": 98.78, "elapsed_time": "1 day, 2:53:15", "remaining_time": "0:19:56"} +{"current_steps": 7441, "total_steps": 7532, "loss": 0.24764932692050934, "lr": 8.157687640543143e-09, "epoch": 1.9759660071703626, "percentage": 98.79, "elapsed_time": "1 day, 2:53:28", "remaining_time": "0:19:43"} +{"current_steps": 7442, "total_steps": 7532, "loss": 0.22213312983512878, "lr": 7.98133388073552e-09, "epoch": 1.9762315761519056, "percentage": 98.81, "elapsed_time": "1 day, 2:53:42", "remaining_time": "0:19:30"} +{"current_steps": 7443, "total_steps": 7532, "loss": 0.22822709381580353, "lr": 7.806906464281617e-09, "epoch": 1.9764971451334485, "percentage": 98.82, "elapsed_time": "1 day, 2:53:55", "remaining_time": "0:19:17"} +{"current_steps": 7444, "total_steps": 7532, "loss": 0.2236599326133728, "lr": 7.634405424808977e-09, "epoch": 1.9767627141149915, "percentage": 98.83, "elapsed_time": "1 day, 2:54:08", "remaining_time": "0:19:04"} +{"current_steps": 7445, "total_steps": 7532, "loss": 0.20294487476348877, "lr": 7.463830795574334e-09, "epoch": 1.9770282830965344, "percentage": 98.84, "elapsed_time": "1 day, 2:54:21", "remaining_time": "0:18:51"} +{"current_steps": 7446, "total_steps": 7532, "loss": 0.2187870740890503, "lr": 7.295182609461382e-09, "epoch": 1.9772938520780774, "percentage": 98.86, "elapsed_time": "1 day, 2:54:35", "remaining_time": "0:18:38"} +{"current_steps": 7447, "total_steps": 7532, "loss": 0.2629002630710602, "lr": 7.128460898984113e-09, "epoch": 1.9775594210596203, "percentage": 98.87, "elapsed_time": "1 day, 2:54:48", "remaining_time": "0:18:25"} +{"current_steps": 7448, "total_steps": 7532, "loss": 0.24024136364459991, "lr": 6.963665696285704e-09, "epoch": 1.9778249900411633, "percentage": 98.88, "elapsed_time": "1 day, 2:55:01", "remaining_time": "0:18:12"} +{"current_steps": 7449, "total_steps": 7532, "loss": 0.22334401309490204, "lr": 6.800797033134077e-09, "epoch": 1.9780905590227063, "percentage": 98.9, "elapsed_time": "1 day, 2:55:14", "remaining_time": "0:17:59"} +{"current_steps": 7450, "total_steps": 7532, "loss": 0.21535055339336395, "lr": 6.639854940930779e-09, "epoch": 1.9783561280042492, "percentage": 98.91, "elapsed_time": "1 day, 2:55:27", "remaining_time": "0:17:46"} +{"current_steps": 7451, "total_steps": 7532, "loss": 0.26096785068511963, "lr": 6.480839450703214e-09, "epoch": 1.9786216969857922, "percentage": 98.92, "elapsed_time": "1 day, 2:55:40", "remaining_time": "0:17:33"} +{"current_steps": 7452, "total_steps": 7532, "loss": 0.22461384534835815, "lr": 6.323750593106859e-09, "epoch": 1.978887265967335, "percentage": 98.94, "elapsed_time": "1 day, 2:55:53", "remaining_time": "0:17:20"} +{"current_steps": 7453, "total_steps": 7532, "loss": 0.24372713267803192, "lr": 6.168588398426378e-09, "epoch": 1.979152834948878, "percentage": 98.95, "elapsed_time": "1 day, 2:56:06", "remaining_time": "0:17:07"} +{"current_steps": 7454, "total_steps": 7532, "loss": 0.19544872641563416, "lr": 6.015352896576732e-09, "epoch": 1.979418403930421, "percentage": 98.96, "elapsed_time": "1 day, 2:56:19", "remaining_time": "0:16:54"} +{"current_steps": 7455, "total_steps": 7532, "loss": 0.22004768252372742, "lr": 5.864044117097623e-09, "epoch": 1.979683972911964, "percentage": 98.98, "elapsed_time": "1 day, 2:56:32", "remaining_time": "0:16:41"} +{"current_steps": 7456, "total_steps": 7532, "loss": 0.2509492337703705, "lr": 5.714662089162381e-09, "epoch": 1.979949541893507, "percentage": 98.99, "elapsed_time": "1 day, 2:56:45", "remaining_time": "0:16:28"} +{"current_steps": 7457, "total_steps": 7532, "loss": 0.19315078854560852, "lr": 5.567206841567974e-09, "epoch": 1.9802151108750499, "percentage": 99.0, "elapsed_time": "1 day, 2:56:58", "remaining_time": "0:16:15"} +{"current_steps": 7458, "total_steps": 7532, "loss": 0.20722024142742157, "lr": 5.421678402741659e-09, "epoch": 1.9804806798565928, "percentage": 99.02, "elapsed_time": "1 day, 2:57:12", "remaining_time": "0:16:02"} +{"current_steps": 7459, "total_steps": 7532, "loss": 0.2041238397359848, "lr": 5.278076800742105e-09, "epoch": 1.9807462488381358, "percentage": 99.03, "elapsed_time": "1 day, 2:57:25", "remaining_time": "0:15:49"} +{"current_steps": 7460, "total_steps": 7532, "loss": 0.21889238059520721, "lr": 5.136402063251611e-09, "epoch": 1.9810118178196787, "percentage": 99.04, "elapsed_time": "1 day, 2:57:38", "remaining_time": "0:15:36"} +{"current_steps": 7461, "total_steps": 7532, "loss": 0.23580557107925415, "lr": 4.996654217584995e-09, "epoch": 1.9812773868012217, "percentage": 99.06, "elapsed_time": "1 day, 2:57:51", "remaining_time": "0:15:23"} +{"current_steps": 7462, "total_steps": 7532, "loss": 0.24967315793037415, "lr": 4.858833290684039e-09, "epoch": 1.9815429557827646, "percentage": 99.07, "elapsed_time": "1 day, 2:58:04", "remaining_time": "0:15:10"} +{"current_steps": 7463, "total_steps": 7532, "loss": 0.21802274882793427, "lr": 4.722939309116381e-09, "epoch": 1.9818085247643076, "percentage": 99.08, "elapsed_time": "1 day, 2:58:18", "remaining_time": "0:14:57"} +{"current_steps": 7464, "total_steps": 7532, "loss": 0.2641376554965973, "lr": 4.588972299084393e-09, "epoch": 1.9820740937458505, "percentage": 99.1, "elapsed_time": "1 day, 2:58:31", "remaining_time": "0:14:44"} +{"current_steps": 7465, "total_steps": 7532, "loss": 0.20166629552841187, "lr": 4.456932286412974e-09, "epoch": 1.9823396627273935, "percentage": 99.11, "elapsed_time": "1 day, 2:58:44", "remaining_time": "0:14:31"} +{"current_steps": 7466, "total_steps": 7532, "loss": 0.22796592116355896, "lr": 4.3268192965573164e-09, "epoch": 1.9826052317089364, "percentage": 99.12, "elapsed_time": "1 day, 2:58:58", "remaining_time": "0:14:18"} +{"current_steps": 7467, "total_steps": 7532, "loss": 0.19833455979824066, "lr": 4.19863335460402e-09, "epoch": 1.9828708006904794, "percentage": 99.14, "elapsed_time": "1 day, 2:59:11", "remaining_time": "0:14:05"} +{"current_steps": 7468, "total_steps": 7532, "loss": 0.23009257018566132, "lr": 4.07237448526554e-09, "epoch": 1.9831363696720223, "percentage": 99.15, "elapsed_time": "1 day, 2:59:25", "remaining_time": "0:13:52"} +{"current_steps": 7469, "total_steps": 7532, "loss": 0.22418440878391266, "lr": 3.9480427128812945e-09, "epoch": 1.9834019386535653, "percentage": 99.16, "elapsed_time": "1 day, 2:59:37", "remaining_time": "0:13:39"} +{"current_steps": 7470, "total_steps": 7532, "loss": 0.2015800178050995, "lr": 3.825638061421e-09, "epoch": 1.9836675076351082, "percentage": 99.18, "elapsed_time": "1 day, 2:59:50", "remaining_time": "0:13:26"} +{"current_steps": 7471, "total_steps": 7532, "loss": 0.22166767716407776, "lr": 3.705160554485776e-09, "epoch": 1.9839330766166512, "percentage": 99.19, "elapsed_time": "1 day, 3:00:04", "remaining_time": "0:13:13"} +{"current_steps": 7472, "total_steps": 7532, "loss": 0.3154509961605072, "lr": 3.5866102152981586e-09, "epoch": 1.9841986455981941, "percentage": 99.2, "elapsed_time": "1 day, 3:00:17", "remaining_time": "0:13:00"} +{"current_steps": 7473, "total_steps": 7532, "loss": 0.25891417264938354, "lr": 3.4699870667165292e-09, "epoch": 1.984464214579737, "percentage": 99.22, "elapsed_time": "1 day, 3:00:30", "remaining_time": "0:12:47"} +{"current_steps": 7474, "total_steps": 7532, "loss": 0.24837851524353027, "lr": 3.355291131222904e-09, "epoch": 1.98472978356128, "percentage": 99.23, "elapsed_time": "1 day, 3:00:43", "remaining_time": "0:12:34"} +{"current_steps": 7475, "total_steps": 7532, "loss": 0.24254213273525238, "lr": 3.2425224309307055e-09, "epoch": 1.984995352542823, "percentage": 99.24, "elapsed_time": "1 day, 3:00:56", "remaining_time": "0:12:21"} +{"current_steps": 7476, "total_steps": 7532, "loss": 0.24822884798049927, "lr": 3.1316809875781005e-09, "epoch": 1.985260921524366, "percentage": 99.26, "elapsed_time": "1 day, 3:01:09", "remaining_time": "0:12:08"} +{"current_steps": 7477, "total_steps": 7532, "loss": 0.19553488492965698, "lr": 3.022766822535772e-09, "epoch": 1.985526490505909, "percentage": 99.27, "elapsed_time": "1 day, 3:01:22", "remaining_time": "0:11:55"} +{"current_steps": 7478, "total_steps": 7532, "loss": 0.24758943915367126, "lr": 2.9157799568002576e-09, "epoch": 1.9857920594874519, "percentage": 99.28, "elapsed_time": "1 day, 3:01:35", "remaining_time": "0:11:42"} +{"current_steps": 7479, "total_steps": 7532, "loss": 0.22947746515274048, "lr": 2.810720410998391e-09, "epoch": 1.9860576284689948, "percentage": 99.3, "elapsed_time": "1 day, 3:01:49", "remaining_time": "0:11:29"} +{"current_steps": 7480, "total_steps": 7532, "loss": 0.20573696494102478, "lr": 2.7075882053828605e-09, "epoch": 1.9863231974505378, "percentage": 99.31, "elapsed_time": "1 day, 3:02:02", "remaining_time": "0:11:16"} +{"current_steps": 7481, "total_steps": 7532, "loss": 0.2547800838947296, "lr": 2.606383359837761e-09, "epoch": 1.9865887664320807, "percentage": 99.32, "elapsed_time": "1 day, 3:02:15", "remaining_time": "0:11:03"} +{"current_steps": 7482, "total_steps": 7532, "loss": 0.22227191925048828, "lr": 2.507105893874151e-09, "epoch": 1.9868543354136237, "percentage": 99.34, "elapsed_time": "1 day, 3:02:28", "remaining_time": "0:10:50"} +{"current_steps": 7483, "total_steps": 7532, "loss": 0.24687603116035461, "lr": 2.409755826630056e-09, "epoch": 1.9871199043951666, "percentage": 99.35, "elapsed_time": "1 day, 3:02:42", "remaining_time": "0:10:37"} +{"current_steps": 7484, "total_steps": 7532, "loss": 0.23577818274497986, "lr": 2.3143331768749053e-09, "epoch": 1.9873854733767096, "percentage": 99.36, "elapsed_time": "1 day, 3:02:54", "remaining_time": "0:10:24"} +{"current_steps": 7485, "total_steps": 7532, "loss": 0.23012465238571167, "lr": 2.2208379630039858e-09, "epoch": 1.9876510423582525, "percentage": 99.38, "elapsed_time": "1 day, 3:03:08", "remaining_time": "0:10:11"} +{"current_steps": 7486, "total_steps": 7532, "loss": 0.21479251980781555, "lr": 2.129270203043987e-09, "epoch": 1.9879166113397955, "percentage": 99.39, "elapsed_time": "1 day, 3:03:21", "remaining_time": "0:09:58"} +{"current_steps": 7487, "total_steps": 7532, "loss": 0.24436548352241516, "lr": 2.039629914645236e-09, "epoch": 1.9881821803213384, "percentage": 99.4, "elapsed_time": "1 day, 3:03:34", "remaining_time": "0:09:45"} +{"current_steps": 7488, "total_steps": 7532, "loss": 0.22225134074687958, "lr": 1.951917115091684e-09, "epoch": 1.9884477493028814, "percentage": 99.42, "elapsed_time": "1 day, 3:03:47", "remaining_time": "0:09:32"} +{"current_steps": 7489, "total_steps": 7532, "loss": 0.22320827841758728, "lr": 1.8661318212920275e-09, "epoch": 1.9887133182844243, "percentage": 99.43, "elapsed_time": "1 day, 3:04:00", "remaining_time": "0:09:19"} +{"current_steps": 7490, "total_steps": 7532, "loss": 0.2317924201488495, "lr": 1.7822740497852597e-09, "epoch": 1.9889788872659673, "percentage": 99.44, "elapsed_time": "1 day, 3:04:13", "remaining_time": "0:09:06"} +{"current_steps": 7491, "total_steps": 7532, "loss": 0.2275170385837555, "lr": 1.700343816738448e-09, "epoch": 1.9892444562475102, "percentage": 99.46, "elapsed_time": "1 day, 3:04:26", "remaining_time": "0:08:53"} +{"current_steps": 7492, "total_steps": 7532, "loss": 0.24541540443897247, "lr": 1.6203411379456247e-09, "epoch": 1.9895100252290532, "percentage": 99.47, "elapsed_time": "1 day, 3:04:39", "remaining_time": "0:08:40"} +{"current_steps": 7493, "total_steps": 7532, "loss": 0.23041896522045135, "lr": 1.5422660288322288e-09, "epoch": 1.9897755942105961, "percentage": 99.48, "elapsed_time": "1 day, 3:04:52", "remaining_time": "0:08:27"} +{"current_steps": 7494, "total_steps": 7532, "loss": 0.22362437844276428, "lr": 1.4661185044484438e-09, "epoch": 1.990041163192139, "percentage": 99.5, "elapsed_time": "1 day, 3:05:06", "remaining_time": "0:08:14"} +{"current_steps": 7495, "total_steps": 7532, "loss": 0.22082944214344025, "lr": 1.3918985794747486e-09, "epoch": 1.990306732173682, "percentage": 99.51, "elapsed_time": "1 day, 3:05:19", "remaining_time": "0:08:01"} +{"current_steps": 7496, "total_steps": 7532, "loss": 0.2210516780614853, "lr": 1.3196062682208078e-09, "epoch": 1.990572301155225, "percentage": 99.52, "elapsed_time": "1 day, 3:05:32", "remaining_time": "0:07:48"} +{"current_steps": 7497, "total_steps": 7532, "loss": 0.21891455352306366, "lr": 1.249241584623251e-09, "epoch": 1.990837870136768, "percentage": 99.54, "elapsed_time": "1 day, 3:05:45", "remaining_time": "0:07:35"} +{"current_steps": 7498, "total_steps": 7532, "loss": 0.23363247513771057, "lr": 1.1808045422478932e-09, "epoch": 1.991103439118311, "percentage": 99.55, "elapsed_time": "1 day, 3:05:59", "remaining_time": "0:07:22"} +{"current_steps": 7499, "total_steps": 7532, "loss": 0.20676104724407196, "lr": 1.1142951542875146e-09, "epoch": 1.9913690080998538, "percentage": 99.56, "elapsed_time": "1 day, 3:06:11", "remaining_time": "0:07:09"} +{"current_steps": 7500, "total_steps": 7532, "loss": 0.23037788271903992, "lr": 1.0497134335663018e-09, "epoch": 1.9916345770813968, "percentage": 99.58, "elapsed_time": "1 day, 3:06:24", "remaining_time": "0:06:56"} +{"current_steps": 7501, "total_steps": 7532, "loss": 0.21958573162555695, "lr": 9.870593925320748e-10, "epoch": 1.9919001460629397, "percentage": 99.59, "elapsed_time": "1 day, 3:06:43", "remaining_time": "0:06:43"} +{"current_steps": 7502, "total_steps": 7532, "loss": 0.23280993103981018, "lr": 9.263330432662809e-10, "epoch": 1.9921657150444827, "percentage": 99.6, "elapsed_time": "1 day, 3:06:56", "remaining_time": "0:06:30"} +{"current_steps": 7503, "total_steps": 7532, "loss": 0.2254818230867386, "lr": 8.675343974762219e-10, "epoch": 1.9924312840260257, "percentage": 99.61, "elapsed_time": "1 day, 3:07:10", "remaining_time": "0:06:17"} +{"current_steps": 7504, "total_steps": 7532, "loss": 0.1850586235523224, "lr": 8.106634664950541e-10, "epoch": 1.9926968530075686, "percentage": 99.63, "elapsed_time": "1 day, 3:07:22", "remaining_time": "0:06:04"} +{"current_steps": 7505, "total_steps": 7532, "loss": 0.21080443263053894, "lr": 7.557202612895609e-10, "epoch": 1.9929624219891116, "percentage": 99.64, "elapsed_time": "1 day, 3:07:36", "remaining_time": "0:05:51"} +{"current_steps": 7506, "total_steps": 7532, "loss": 0.21604907512664795, "lr": 7.027047924512698e-10, "epoch": 1.9932279909706545, "percentage": 99.65, "elapsed_time": "1 day, 3:07:48", "remaining_time": "0:05:38"} +{"current_steps": 7507, "total_steps": 7532, "loss": 0.24684564769268036, "lr": 6.516170701997837e-10, "epoch": 1.9934935599521975, "percentage": 99.67, "elapsed_time": "1 day, 3:08:01", "remaining_time": "0:05:25"} +{"current_steps": 7508, "total_steps": 7532, "loss": 0.21735510230064392, "lr": 6.024571043861116e-10, "epoch": 1.9937591289337404, "percentage": 99.68, "elapsed_time": "1 day, 3:08:14", "remaining_time": "0:05:12"} +{"current_steps": 7509, "total_steps": 7532, "loss": 0.23616179823875427, "lr": 5.552249044860069e-10, "epoch": 1.9940246979152834, "percentage": 99.69, "elapsed_time": "1 day, 3:08:27", "remaining_time": "0:04:59"} +{"current_steps": 7510, "total_steps": 7532, "loss": 0.23930129408836365, "lr": 5.099204796066293e-10, "epoch": 1.9942902668968263, "percentage": 99.71, "elapsed_time": "1 day, 3:08:40", "remaining_time": "0:04:46"} +{"current_steps": 7511, "total_steps": 7532, "loss": 0.2354714274406433, "lr": 4.665438384809928e-10, "epoch": 1.9945558358783693, "percentage": 99.72, "elapsed_time": "1 day, 3:08:53", "remaining_time": "0:04:33"} +{"current_steps": 7512, "total_steps": 7532, "loss": 0.28315576910972595, "lr": 4.250949894724077e-10, "epoch": 1.9948214048599122, "percentage": 99.73, "elapsed_time": "1 day, 3:09:06", "remaining_time": "0:04:20"} +{"current_steps": 7513, "total_steps": 7532, "loss": 0.19599778950214386, "lr": 3.8557394057114895e-10, "epoch": 1.9950869738414552, "percentage": 99.75, "elapsed_time": "1 day, 3:09:19", "remaining_time": "0:04:07"} +{"current_steps": 7514, "total_steps": 7532, "loss": 0.2295808494091034, "lr": 3.4798069939667725e-10, "epoch": 1.9953525428229981, "percentage": 99.76, "elapsed_time": "1 day, 3:09:32", "remaining_time": "0:03:54"} +{"current_steps": 7515, "total_steps": 7532, "loss": 0.23212578892707825, "lr": 3.1231527319763864e-10, "epoch": 1.995618111804541, "percentage": 99.77, "elapsed_time": "1 day, 3:09:44", "remaining_time": "0:03:41"} +{"current_steps": 7516, "total_steps": 7532, "loss": 0.2408447265625, "lr": 2.78577668847424e-10, "epoch": 1.995883680786084, "percentage": 99.79, "elapsed_time": "1 day, 3:09:57", "remaining_time": "0:03:28"} +{"current_steps": 7517, "total_steps": 7532, "loss": 0.25482073426246643, "lr": 2.4676789285305034e-10, "epoch": 1.996149249767627, "percentage": 99.8, "elapsed_time": "1 day, 3:10:10", "remaining_time": "0:03:15"} +{"current_steps": 7518, "total_steps": 7532, "loss": 0.21228459477424622, "lr": 2.1688595134516932e-10, "epoch": 1.9964148187491702, "percentage": 99.81, "elapsed_time": "1 day, 3:10:23", "remaining_time": "0:03:02"} +{"current_steps": 7519, "total_steps": 7532, "loss": 0.2467353343963623, "lr": 1.8893185008472814e-10, "epoch": 1.996680387730713, "percentage": 99.83, "elapsed_time": "1 day, 3:10:35", "remaining_time": "0:02:49"} +{"current_steps": 7520, "total_steps": 7532, "loss": 0.24475792050361633, "lr": 1.6290559446185962e-10, "epoch": 1.996945956712256, "percentage": 99.84, "elapsed_time": "1 day, 3:10:48", "remaining_time": "0:02:36"} +{"current_steps": 7521, "total_steps": 7532, "loss": 0.24821621179580688, "lr": 1.3880718949366155e-10, "epoch": 1.997211525693799, "percentage": 99.85, "elapsed_time": "1 day, 3:11:01", "remaining_time": "0:02:23"} +{"current_steps": 7522, "total_steps": 7532, "loss": 0.24725303053855896, "lr": 1.1663663982530715e-10, "epoch": 1.997477094675342, "percentage": 99.87, "elapsed_time": "1 day, 3:11:14", "remaining_time": "0:02:10"} +{"current_steps": 7523, "total_steps": 7532, "loss": 0.2319290041923523, "lr": 9.639394973226523e-11, "epoch": 1.997742663656885, "percentage": 99.88, "elapsed_time": "1 day, 3:11:26", "remaining_time": "0:01:57"} +{"current_steps": 7524, "total_steps": 7532, "loss": 0.22183239459991455, "lr": 7.807912311696974e-11, "epoch": 1.9980082326384279, "percentage": 99.89, "elapsed_time": "1 day, 3:11:39", "remaining_time": "0:01:44"} +{"current_steps": 7525, "total_steps": 7532, "loss": 0.2154427468776703, "lr": 6.169216350881968e-11, "epoch": 1.9982738016199708, "percentage": 99.91, "elapsed_time": "1 day, 3:11:52", "remaining_time": "0:01:31"} +{"current_steps": 7526, "total_steps": 7532, "loss": 0.22269389033317566, "lr": 4.723307406973021e-11, "epoch": 1.9985393706015138, "percentage": 99.92, "elapsed_time": "1 day, 3:12:05", "remaining_time": "0:01:18"} +{"current_steps": 7527, "total_steps": 7532, "loss": 0.20317527651786804, "lr": 3.4701857584140686e-11, "epoch": 1.9988049395830567, "percentage": 99.93, "elapsed_time": "1 day, 3:12:18", "remaining_time": "0:01:05"} +{"current_steps": 7528, "total_steps": 7532, "loss": 0.20792551338672638, "lr": 2.409851647011685e-11, "epoch": 1.9990705085645997, "percentage": 99.95, "elapsed_time": "1 day, 3:12:31", "remaining_time": "0:00:52"} +{"current_steps": 7529, "total_steps": 7532, "loss": 0.2128266990184784, "lr": 1.5423052770469072e-11, "epoch": 1.9993360775461426, "percentage": 99.96, "elapsed_time": "1 day, 3:12:44", "remaining_time": "0:00:39"} +{"current_steps": 7530, "total_steps": 7532, "loss": 0.23220527172088623, "lr": 8.67546815941367e-12, "epoch": 1.9996016465276856, "percentage": 99.97, "elapsed_time": "1 day, 3:12:57", "remaining_time": "0:00:26"} +{"current_steps": 7531, "total_steps": 7532, "loss": 0.22269386053085327, "lr": 3.8557639359115826e-12, "epoch": 1.9998672155092285, "percentage": 99.99, "elapsed_time": "1 day, 3:13:10", "remaining_time": "0:00:13"} +{"current_steps": 7532, "total_steps": 7532, "loss": 0.2053365409374237, "lr": 9.63941030329707e-13, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "1 day, 3:13:14", "remaining_time": "0:00:00"} +{"current_steps": 7532, "total_steps": 7532, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "1 day, 3:13:20", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..46072143b727eee6aac3ed999ad7fc331de3581b --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,52767 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 7532, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002655689815429558, + "grad_norm": 1.8881195832990014, + "learning_rate": 0.0, + "loss": 1.1502833366394043, + "step": 1 + }, + { + "epoch": 0.0005311379630859116, + "grad_norm": 1.77718785062999, + "learning_rate": 5.3050397877984086e-08, + "loss": 1.1698756217956543, + "step": 2 + }, + { + "epoch": 0.0007967069446288673, + "grad_norm": 1.6766718507101437, + "learning_rate": 1.0610079575596817e-07, + "loss": 1.1060130596160889, + "step": 3 + }, + { + "epoch": 0.0010622759261718232, + "grad_norm": 1.876053682165919, + "learning_rate": 1.5915119363395226e-07, + "loss": 1.1075276136398315, + "step": 4 + }, + { + "epoch": 0.001327844907714779, + "grad_norm": 1.88228417845019, + "learning_rate": 2.1220159151193635e-07, + "loss": 1.2153511047363281, + "step": 5 + }, + { + "epoch": 0.0015934138892577346, + "grad_norm": 1.9273368394845023, + "learning_rate": 2.6525198938992043e-07, + "loss": 1.1400426626205444, + "step": 6 + }, + { + "epoch": 0.0018589828708006906, + "grad_norm": 1.904814034912833, + "learning_rate": 3.183023872679045e-07, + "loss": 1.2070660591125488, + "step": 7 + }, + { + "epoch": 0.0021245518523436463, + "grad_norm": 1.7346381008587795, + "learning_rate": 3.713527851458886e-07, + "loss": 1.1614588499069214, + "step": 8 + }, + { + "epoch": 0.002390120833886602, + "grad_norm": 1.817032704311048, + "learning_rate": 4.244031830238727e-07, + "loss": 1.1739476919174194, + "step": 9 + }, + { + "epoch": 0.002655689815429558, + "grad_norm": 1.8291974144657501, + "learning_rate": 4.774535809018568e-07, + "loss": 1.1559171676635742, + "step": 10 + }, + { + "epoch": 0.0029212587969725135, + "grad_norm": 2.0039010539208744, + "learning_rate": 5.305039787798409e-07, + "loss": 1.2086225748062134, + "step": 11 + }, + { + "epoch": 0.0031868277785154693, + "grad_norm": 1.876026657216244, + "learning_rate": 5.83554376657825e-07, + "loss": 1.227709174156189, + "step": 12 + }, + { + "epoch": 0.003452396760058425, + "grad_norm": 2.0245192813139825, + "learning_rate": 6.36604774535809e-07, + "loss": 1.255577564239502, + "step": 13 + }, + { + "epoch": 0.003717965741601381, + "grad_norm": 1.8641260357218605, + "learning_rate": 6.896551724137931e-07, + "loss": 1.1953760385513306, + "step": 14 + }, + { + "epoch": 0.0039835347231443365, + "grad_norm": 1.9079733249323254, + "learning_rate": 7.427055702917772e-07, + "loss": 1.1325336694717407, + "step": 15 + }, + { + "epoch": 0.004249103704687293, + "grad_norm": 1.8230190567516942, + "learning_rate": 7.957559681697613e-07, + "loss": 1.232974648475647, + "step": 16 + }, + { + "epoch": 0.004514672686230248, + "grad_norm": 1.8532380418447003, + "learning_rate": 8.488063660477454e-07, + "loss": 1.1527395248413086, + "step": 17 + }, + { + "epoch": 0.004780241667773204, + "grad_norm": 1.986294801704247, + "learning_rate": 9.018567639257295e-07, + "loss": 1.151026964187622, + "step": 18 + }, + { + "epoch": 0.00504581064931616, + "grad_norm": 1.8048967405226255, + "learning_rate": 9.549071618037136e-07, + "loss": 1.155288815498352, + "step": 19 + }, + { + "epoch": 0.005311379630859116, + "grad_norm": 2.1631450267380767, + "learning_rate": 1.0079575596816979e-06, + "loss": 1.183434009552002, + "step": 20 + }, + { + "epoch": 0.005576948612402072, + "grad_norm": 1.88758019498484, + "learning_rate": 1.0610079575596817e-06, + "loss": 1.161030650138855, + "step": 21 + }, + { + "epoch": 0.005842517593945027, + "grad_norm": 1.9605989446426395, + "learning_rate": 1.1140583554376658e-06, + "loss": 1.123382806777954, + "step": 22 + }, + { + "epoch": 0.006108086575487983, + "grad_norm": 2.2042020560619306, + "learning_rate": 1.16710875331565e-06, + "loss": 1.238707423210144, + "step": 23 + }, + { + "epoch": 0.0063736555570309385, + "grad_norm": 2.289866056000848, + "learning_rate": 1.220159151193634e-06, + "loss": 1.2058464288711548, + "step": 24 + }, + { + "epoch": 0.006639224538573895, + "grad_norm": 2.724214643619529, + "learning_rate": 1.273209549071618e-06, + "loss": 1.2351092100143433, + "step": 25 + }, + { + "epoch": 0.00690479352011685, + "grad_norm": 2.5088520951326028, + "learning_rate": 1.3262599469496024e-06, + "loss": 1.1739860773086548, + "step": 26 + }, + { + "epoch": 0.007170362501659806, + "grad_norm": 2.3243798435890155, + "learning_rate": 1.3793103448275862e-06, + "loss": 1.1407617330551147, + "step": 27 + }, + { + "epoch": 0.007435931483202762, + "grad_norm": 2.533007430657115, + "learning_rate": 1.4323607427055705e-06, + "loss": 1.1844531297683716, + "step": 28 + }, + { + "epoch": 0.007701500464745718, + "grad_norm": 2.4702075978733804, + "learning_rate": 1.4854111405835544e-06, + "loss": 1.1293678283691406, + "step": 29 + }, + { + "epoch": 0.007967069446288673, + "grad_norm": 3.0873404038783963, + "learning_rate": 1.5384615384615387e-06, + "loss": 1.1310899257659912, + "step": 30 + }, + { + "epoch": 0.00823263842783163, + "grad_norm": 2.7098364862500013, + "learning_rate": 1.5915119363395226e-06, + "loss": 1.1015795469284058, + "step": 31 + }, + { + "epoch": 0.008498207409374585, + "grad_norm": 2.8074949689582476, + "learning_rate": 1.6445623342175069e-06, + "loss": 1.0756056308746338, + "step": 32 + }, + { + "epoch": 0.00876377639091754, + "grad_norm": 3.1563034348975676, + "learning_rate": 1.6976127320954908e-06, + "loss": 1.1496126651763916, + "step": 33 + }, + { + "epoch": 0.009029345372460496, + "grad_norm": 2.842390896608423, + "learning_rate": 1.750663129973475e-06, + "loss": 1.203465461730957, + "step": 34 + }, + { + "epoch": 0.009294914354003453, + "grad_norm": 2.6747271223349753, + "learning_rate": 1.803713527851459e-06, + "loss": 1.0613923072814941, + "step": 35 + }, + { + "epoch": 0.009560483335546408, + "grad_norm": 2.146709655536541, + "learning_rate": 1.8567639257294432e-06, + "loss": 1.06027090549469, + "step": 36 + }, + { + "epoch": 0.009826052317089363, + "grad_norm": 1.9942495143394863, + "learning_rate": 1.909814323607427e-06, + "loss": 1.0508522987365723, + "step": 37 + }, + { + "epoch": 0.01009162129863232, + "grad_norm": 2.1704927298148107, + "learning_rate": 1.9628647214854114e-06, + "loss": 1.0353929996490479, + "step": 38 + }, + { + "epoch": 0.010357190280175276, + "grad_norm": 1.8252380884349957, + "learning_rate": 2.0159151193633957e-06, + "loss": 0.9974027276039124, + "step": 39 + }, + { + "epoch": 0.010622759261718231, + "grad_norm": 1.7188806752497834, + "learning_rate": 2.0689655172413796e-06, + "loss": 1.0849467515945435, + "step": 40 + }, + { + "epoch": 0.010888328243261186, + "grad_norm": 1.3692667089198218, + "learning_rate": 2.1220159151193635e-06, + "loss": 1.005434274673462, + "step": 41 + }, + { + "epoch": 0.011153897224804143, + "grad_norm": 1.3465343019370317, + "learning_rate": 2.1750663129973478e-06, + "loss": 1.052631139755249, + "step": 42 + }, + { + "epoch": 0.011419466206347099, + "grad_norm": 1.352421126005469, + "learning_rate": 2.2281167108753316e-06, + "loss": 0.9470957517623901, + "step": 43 + }, + { + "epoch": 0.011685035187890054, + "grad_norm": 1.2219308328594767, + "learning_rate": 2.281167108753316e-06, + "loss": 0.9865130186080933, + "step": 44 + }, + { + "epoch": 0.01195060416943301, + "grad_norm": 1.19161259271228, + "learning_rate": 2.3342175066313e-06, + "loss": 0.9405577778816223, + "step": 45 + }, + { + "epoch": 0.012216173150975966, + "grad_norm": 1.1603073869733838, + "learning_rate": 2.387267904509284e-06, + "loss": 0.9418795108795166, + "step": 46 + }, + { + "epoch": 0.012481742132518922, + "grad_norm": 1.1897328813812988, + "learning_rate": 2.440318302387268e-06, + "loss": 0.9841142892837524, + "step": 47 + }, + { + "epoch": 0.012747311114061877, + "grad_norm": 1.159720101499262, + "learning_rate": 2.4933687002652523e-06, + "loss": 0.9412609338760376, + "step": 48 + }, + { + "epoch": 0.013012880095604834, + "grad_norm": 1.1421347262548374, + "learning_rate": 2.546419098143236e-06, + "loss": 0.9239889979362488, + "step": 49 + }, + { + "epoch": 0.01327844907714779, + "grad_norm": 1.144363453746544, + "learning_rate": 2.59946949602122e-06, + "loss": 0.9212941527366638, + "step": 50 + }, + { + "epoch": 0.013544018058690745, + "grad_norm": 0.9916816911141796, + "learning_rate": 2.6525198938992047e-06, + "loss": 0.8863773345947266, + "step": 51 + }, + { + "epoch": 0.0138095870402337, + "grad_norm": 0.9890613082667745, + "learning_rate": 2.7055702917771886e-06, + "loss": 0.8990404009819031, + "step": 52 + }, + { + "epoch": 0.014075156021776657, + "grad_norm": 1.1123466462737277, + "learning_rate": 2.7586206896551725e-06, + "loss": 0.9257171154022217, + "step": 53 + }, + { + "epoch": 0.014340725003319612, + "grad_norm": 0.8689931750055545, + "learning_rate": 2.8116710875331564e-06, + "loss": 0.8239601254463196, + "step": 54 + }, + { + "epoch": 0.014606293984862568, + "grad_norm": 0.9936229603029793, + "learning_rate": 2.864721485411141e-06, + "loss": 0.8656830787658691, + "step": 55 + }, + { + "epoch": 0.014871862966405525, + "grad_norm": 1.0202371081091262, + "learning_rate": 2.917771883289125e-06, + "loss": 0.9470342397689819, + "step": 56 + }, + { + "epoch": 0.01513743194794848, + "grad_norm": 0.9663900963956384, + "learning_rate": 2.970822281167109e-06, + "loss": 0.8699859976768494, + "step": 57 + }, + { + "epoch": 0.015403000929491435, + "grad_norm": 0.940263545207204, + "learning_rate": 3.0238726790450927e-06, + "loss": 0.8668704628944397, + "step": 58 + }, + { + "epoch": 0.01566856991103439, + "grad_norm": 0.9865381848251076, + "learning_rate": 3.0769230769230774e-06, + "loss": 0.841624915599823, + "step": 59 + }, + { + "epoch": 0.015934138892577346, + "grad_norm": 0.8909972421095332, + "learning_rate": 3.1299734748010613e-06, + "loss": 0.8412661552429199, + "step": 60 + }, + { + "epoch": 0.0161997078741203, + "grad_norm": 0.8771283277278942, + "learning_rate": 3.183023872679045e-06, + "loss": 0.818957507610321, + "step": 61 + }, + { + "epoch": 0.01646527685566326, + "grad_norm": 0.9190140482494583, + "learning_rate": 3.23607427055703e-06, + "loss": 0.8030763268470764, + "step": 62 + }, + { + "epoch": 0.016730845837206215, + "grad_norm": 0.8839367067386452, + "learning_rate": 3.2891246684350138e-06, + "loss": 0.7869359850883484, + "step": 63 + }, + { + "epoch": 0.01699641481874917, + "grad_norm": 0.8058255896640879, + "learning_rate": 3.3421750663129977e-06, + "loss": 0.7912170886993408, + "step": 64 + }, + { + "epoch": 0.017261983800292126, + "grad_norm": 0.8538938403853334, + "learning_rate": 3.3952254641909815e-06, + "loss": 0.7736695408821106, + "step": 65 + }, + { + "epoch": 0.01752755278183508, + "grad_norm": 0.8652625375848492, + "learning_rate": 3.448275862068966e-06, + "loss": 0.768275260925293, + "step": 66 + }, + { + "epoch": 0.017793121763378036, + "grad_norm": 0.8691478661970735, + "learning_rate": 3.50132625994695e-06, + "loss": 0.7210639119148254, + "step": 67 + }, + { + "epoch": 0.01805869074492099, + "grad_norm": 0.8378031795839386, + "learning_rate": 3.554376657824934e-06, + "loss": 0.7488028407096863, + "step": 68 + }, + { + "epoch": 0.01832425972646395, + "grad_norm": 0.8943989597273122, + "learning_rate": 3.607427055702918e-06, + "loss": 0.7329621911048889, + "step": 69 + }, + { + "epoch": 0.018589828708006906, + "grad_norm": 0.92104620358882, + "learning_rate": 3.660477453580902e-06, + "loss": 0.7270619869232178, + "step": 70 + }, + { + "epoch": 0.01885539768954986, + "grad_norm": 0.9782498013554233, + "learning_rate": 3.7135278514588865e-06, + "loss": 0.7271254658699036, + "step": 71 + }, + { + "epoch": 0.019120966671092816, + "grad_norm": 0.9115603845811348, + "learning_rate": 3.7665782493368703e-06, + "loss": 0.787033200263977, + "step": 72 + }, + { + "epoch": 0.01938653565263577, + "grad_norm": 0.8604692726067453, + "learning_rate": 3.819628647214854e-06, + "loss": 0.7049479484558105, + "step": 73 + }, + { + "epoch": 0.019652104634178727, + "grad_norm": 0.8610577281688413, + "learning_rate": 3.8726790450928385e-06, + "loss": 0.7146892547607422, + "step": 74 + }, + { + "epoch": 0.019917673615721682, + "grad_norm": 0.7602187567662452, + "learning_rate": 3.925729442970823e-06, + "loss": 0.7212516069412231, + "step": 75 + }, + { + "epoch": 0.02018324259726464, + "grad_norm": 0.6842508042039768, + "learning_rate": 3.978779840848806e-06, + "loss": 0.6612375378608704, + "step": 76 + }, + { + "epoch": 0.020448811578807596, + "grad_norm": 0.7781006919053841, + "learning_rate": 4.031830238726791e-06, + "loss": 0.7038244605064392, + "step": 77 + }, + { + "epoch": 0.02071438056035055, + "grad_norm": 0.7186592057129139, + "learning_rate": 4.084880636604775e-06, + "loss": 0.7081903219223022, + "step": 78 + }, + { + "epoch": 0.020979949541893507, + "grad_norm": 0.7655954113403886, + "learning_rate": 4.137931034482759e-06, + "loss": 0.7079841494560242, + "step": 79 + }, + { + "epoch": 0.021245518523436462, + "grad_norm": 0.7149787673446053, + "learning_rate": 4.190981432360743e-06, + "loss": 0.7090641260147095, + "step": 80 + }, + { + "epoch": 0.021511087504979418, + "grad_norm": 0.6657837070384769, + "learning_rate": 4.244031830238727e-06, + "loss": 0.6632575988769531, + "step": 81 + }, + { + "epoch": 0.021776656486522373, + "grad_norm": 0.6666401713606211, + "learning_rate": 4.297082228116711e-06, + "loss": 0.7231097221374512, + "step": 82 + }, + { + "epoch": 0.02204222546806533, + "grad_norm": 0.6804476609839887, + "learning_rate": 4.3501326259946955e-06, + "loss": 0.6696034669876099, + "step": 83 + }, + { + "epoch": 0.022307794449608287, + "grad_norm": 0.7073638927991296, + "learning_rate": 4.403183023872679e-06, + "loss": 0.7550696134567261, + "step": 84 + }, + { + "epoch": 0.022573363431151242, + "grad_norm": 0.7064770122504733, + "learning_rate": 4.456233421750663e-06, + "loss": 0.671328067779541, + "step": 85 + }, + { + "epoch": 0.022838932412694198, + "grad_norm": 0.6506139330803743, + "learning_rate": 4.5092838196286476e-06, + "loss": 0.6864410638809204, + "step": 86 + }, + { + "epoch": 0.023104501394237153, + "grad_norm": 0.6642837777732639, + "learning_rate": 4.562334217506632e-06, + "loss": 0.6870769262313843, + "step": 87 + }, + { + "epoch": 0.023370070375780108, + "grad_norm": 0.6947506894199804, + "learning_rate": 4.615384615384616e-06, + "loss": 0.6539690494537354, + "step": 88 + }, + { + "epoch": 0.023635639357323063, + "grad_norm": 0.6446743321890098, + "learning_rate": 4.6684350132626e-06, + "loss": 0.6946991086006165, + "step": 89 + }, + { + "epoch": 0.02390120833886602, + "grad_norm": 0.6384512383480915, + "learning_rate": 4.721485411140584e-06, + "loss": 0.6177583932876587, + "step": 90 + }, + { + "epoch": 0.024166777320408978, + "grad_norm": 0.7150510018442997, + "learning_rate": 4.774535809018568e-06, + "loss": 0.6890037059783936, + "step": 91 + }, + { + "epoch": 0.024432346301951933, + "grad_norm": 0.6592991709316253, + "learning_rate": 4.8275862068965525e-06, + "loss": 0.6563063263893127, + "step": 92 + }, + { + "epoch": 0.024697915283494888, + "grad_norm": 0.6897740926797078, + "learning_rate": 4.880636604774536e-06, + "loss": 0.714318573474884, + "step": 93 + }, + { + "epoch": 0.024963484265037843, + "grad_norm": 0.6433596226177777, + "learning_rate": 4.93368700265252e-06, + "loss": 0.6720882654190063, + "step": 94 + }, + { + "epoch": 0.0252290532465808, + "grad_norm": 0.5910528348002435, + "learning_rate": 4.9867374005305045e-06, + "loss": 0.602899968624115, + "step": 95 + }, + { + "epoch": 0.025494622228123754, + "grad_norm": 0.6635651676723159, + "learning_rate": 5.039787798408489e-06, + "loss": 0.6628841161727905, + "step": 96 + }, + { + "epoch": 0.02576019120966671, + "grad_norm": 0.6070065577903714, + "learning_rate": 5.092838196286472e-06, + "loss": 0.6486932635307312, + "step": 97 + }, + { + "epoch": 0.026025760191209668, + "grad_norm": 0.6484848126679549, + "learning_rate": 5.145888594164457e-06, + "loss": 0.6719033122062683, + "step": 98 + }, + { + "epoch": 0.026291329172752623, + "grad_norm": 0.6856934201881044, + "learning_rate": 5.19893899204244e-06, + "loss": 0.6818530559539795, + "step": 99 + }, + { + "epoch": 0.02655689815429558, + "grad_norm": 0.6204811558305167, + "learning_rate": 5.251989389920424e-06, + "loss": 0.6306912899017334, + "step": 100 + }, + { + "epoch": 0.026822467135838534, + "grad_norm": 0.7820574736690976, + "learning_rate": 5.3050397877984095e-06, + "loss": 0.5952945351600647, + "step": 101 + }, + { + "epoch": 0.02708803611738149, + "grad_norm": 0.6546243503849497, + "learning_rate": 5.358090185676394e-06, + "loss": 0.6566107273101807, + "step": 102 + }, + { + "epoch": 0.027353605098924445, + "grad_norm": 0.707921645301647, + "learning_rate": 5.411140583554377e-06, + "loss": 0.6981694102287292, + "step": 103 + }, + { + "epoch": 0.0276191740804674, + "grad_norm": 0.6375441067969543, + "learning_rate": 5.4641909814323615e-06, + "loss": 0.6231328248977661, + "step": 104 + }, + { + "epoch": 0.02788474306201036, + "grad_norm": 0.6964560869475424, + "learning_rate": 5.517241379310345e-06, + "loss": 0.6414977312088013, + "step": 105 + }, + { + "epoch": 0.028150312043553314, + "grad_norm": 0.6835502446580011, + "learning_rate": 5.570291777188329e-06, + "loss": 0.6335234642028809, + "step": 106 + }, + { + "epoch": 0.02841588102509627, + "grad_norm": 0.6248033284508979, + "learning_rate": 5.623342175066313e-06, + "loss": 0.6040852665901184, + "step": 107 + }, + { + "epoch": 0.028681450006639225, + "grad_norm": 0.6645474785171195, + "learning_rate": 5.676392572944297e-06, + "loss": 0.6011114716529846, + "step": 108 + }, + { + "epoch": 0.02894701898818218, + "grad_norm": 0.655106623405533, + "learning_rate": 5.729442970822282e-06, + "loss": 0.6042627096176147, + "step": 109 + }, + { + "epoch": 0.029212587969725135, + "grad_norm": 0.720208539355598, + "learning_rate": 5.782493368700266e-06, + "loss": 0.6183412671089172, + "step": 110 + }, + { + "epoch": 0.02947815695126809, + "grad_norm": 0.6666287454908232, + "learning_rate": 5.83554376657825e-06, + "loss": 0.6150818467140198, + "step": 111 + }, + { + "epoch": 0.02974372593281105, + "grad_norm": 0.6840692324124527, + "learning_rate": 5.888594164456234e-06, + "loss": 0.6202039122581482, + "step": 112 + }, + { + "epoch": 0.030009294914354005, + "grad_norm": 0.6626407253242022, + "learning_rate": 5.941644562334218e-06, + "loss": 0.6334809064865112, + "step": 113 + }, + { + "epoch": 0.03027486389589696, + "grad_norm": 0.6319419097399773, + "learning_rate": 5.994694960212202e-06, + "loss": 0.5728089809417725, + "step": 114 + }, + { + "epoch": 0.030540432877439915, + "grad_norm": 0.6988175213443283, + "learning_rate": 6.0477453580901854e-06, + "loss": 0.6884603500366211, + "step": 115 + }, + { + "epoch": 0.03080600185898287, + "grad_norm": 0.6618120552387852, + "learning_rate": 6.1007957559681706e-06, + "loss": 0.5619829893112183, + "step": 116 + }, + { + "epoch": 0.031071570840525826, + "grad_norm": 0.6756012639437595, + "learning_rate": 6.153846153846155e-06, + "loss": 0.6224710941314697, + "step": 117 + }, + { + "epoch": 0.03133713982206878, + "grad_norm": 0.7208355833756769, + "learning_rate": 6.206896551724138e-06, + "loss": 0.6119496822357178, + "step": 118 + }, + { + "epoch": 0.03160270880361174, + "grad_norm": 0.6917782946677038, + "learning_rate": 6.259946949602123e-06, + "loss": 0.6190857887268066, + "step": 119 + }, + { + "epoch": 0.03186827778515469, + "grad_norm": 0.6704531181022263, + "learning_rate": 6.312997347480107e-06, + "loss": 0.6460769176483154, + "step": 120 + }, + { + "epoch": 0.03213384676669765, + "grad_norm": 0.7493511248909543, + "learning_rate": 6.36604774535809e-06, + "loss": 0.6148796677589417, + "step": 121 + }, + { + "epoch": 0.0323994157482406, + "grad_norm": 0.6359613412994526, + "learning_rate": 6.419098143236075e-06, + "loss": 0.558960497379303, + "step": 122 + }, + { + "epoch": 0.03266498472978356, + "grad_norm": 0.6785691051694177, + "learning_rate": 6.47214854111406e-06, + "loss": 0.5844984650611877, + "step": 123 + }, + { + "epoch": 0.03293055371132652, + "grad_norm": 0.6692815537253501, + "learning_rate": 6.525198938992043e-06, + "loss": 0.5343623161315918, + "step": 124 + }, + { + "epoch": 0.03319612269286947, + "grad_norm": 0.6705726789318588, + "learning_rate": 6.5782493368700276e-06, + "loss": 0.5834348797798157, + "step": 125 + }, + { + "epoch": 0.03346169167441243, + "grad_norm": 0.7626576562771024, + "learning_rate": 6.631299734748011e-06, + "loss": 0.5997360944747925, + "step": 126 + }, + { + "epoch": 0.03372726065595538, + "grad_norm": 0.7117893752859364, + "learning_rate": 6.684350132625995e-06, + "loss": 0.5991666316986084, + "step": 127 + }, + { + "epoch": 0.03399282963749834, + "grad_norm": 0.7060406683837459, + "learning_rate": 6.737400530503979e-06, + "loss": 0.581120491027832, + "step": 128 + }, + { + "epoch": 0.03425839861904129, + "grad_norm": 0.6869761252397286, + "learning_rate": 6.790450928381963e-06, + "loss": 0.6219569444656372, + "step": 129 + }, + { + "epoch": 0.03452396760058425, + "grad_norm": 0.6916173566260286, + "learning_rate": 6.843501326259947e-06, + "loss": 0.5950608253479004, + "step": 130 + }, + { + "epoch": 0.03478953658212721, + "grad_norm": 0.6136480902733893, + "learning_rate": 6.896551724137932e-06, + "loss": 0.5762747526168823, + "step": 131 + }, + { + "epoch": 0.03505510556367016, + "grad_norm": 0.670368708945713, + "learning_rate": 6.949602122015916e-06, + "loss": 0.6003131866455078, + "step": 132 + }, + { + "epoch": 0.03532067454521312, + "grad_norm": 0.6439028776339482, + "learning_rate": 7.0026525198939e-06, + "loss": 0.5866605043411255, + "step": 133 + }, + { + "epoch": 0.03558624352675607, + "grad_norm": 0.8324202287699098, + "learning_rate": 7.055702917771884e-06, + "loss": 0.6668443083763123, + "step": 134 + }, + { + "epoch": 0.03585181250829903, + "grad_norm": 0.7064456856515898, + "learning_rate": 7.108753315649868e-06, + "loss": 0.5738306045532227, + "step": 135 + }, + { + "epoch": 0.03611738148984198, + "grad_norm": 0.6941604370641007, + "learning_rate": 7.1618037135278515e-06, + "loss": 0.5774663686752319, + "step": 136 + }, + { + "epoch": 0.03638295047138494, + "grad_norm": 0.7648336305672251, + "learning_rate": 7.214854111405836e-06, + "loss": 0.5721150636672974, + "step": 137 + }, + { + "epoch": 0.0366485194529279, + "grad_norm": 0.7394576462203543, + "learning_rate": 7.267904509283821e-06, + "loss": 0.6350122690200806, + "step": 138 + }, + { + "epoch": 0.03691408843447085, + "grad_norm": 0.6540602529440619, + "learning_rate": 7.320954907161804e-06, + "loss": 0.5435039401054382, + "step": 139 + }, + { + "epoch": 0.03717965741601381, + "grad_norm": 0.6965351191908165, + "learning_rate": 7.374005305039789e-06, + "loss": 0.5869162678718567, + "step": 140 + }, + { + "epoch": 0.03744522639755676, + "grad_norm": 0.6664228073022063, + "learning_rate": 7.427055702917773e-06, + "loss": 0.5645807981491089, + "step": 141 + }, + { + "epoch": 0.03771079537909972, + "grad_norm": 0.6503771775205762, + "learning_rate": 7.480106100795756e-06, + "loss": 0.5502692461013794, + "step": 142 + }, + { + "epoch": 0.037976364360642674, + "grad_norm": 0.6223645459397411, + "learning_rate": 7.533156498673741e-06, + "loss": 0.5602732300758362, + "step": 143 + }, + { + "epoch": 0.03824193334218563, + "grad_norm": 0.8638951879324807, + "learning_rate": 7.586206896551724e-06, + "loss": 0.6011391282081604, + "step": 144 + }, + { + "epoch": 0.03850750232372859, + "grad_norm": 0.6930636234613441, + "learning_rate": 7.639257294429708e-06, + "loss": 0.5482327938079834, + "step": 145 + }, + { + "epoch": 0.03877307130527154, + "grad_norm": 0.6693652199128735, + "learning_rate": 7.692307692307694e-06, + "loss": 0.5926344394683838, + "step": 146 + }, + { + "epoch": 0.0390386402868145, + "grad_norm": 0.8434991800954339, + "learning_rate": 7.745358090185677e-06, + "loss": 0.6558316946029663, + "step": 147 + }, + { + "epoch": 0.039304209268357454, + "grad_norm": 0.6845819362079449, + "learning_rate": 7.79840848806366e-06, + "loss": 0.572425365447998, + "step": 148 + }, + { + "epoch": 0.03956977824990041, + "grad_norm": 0.696296152543372, + "learning_rate": 7.851458885941646e-06, + "loss": 0.5684784650802612, + "step": 149 + }, + { + "epoch": 0.039835347231443365, + "grad_norm": 0.6779490529346879, + "learning_rate": 7.904509283819629e-06, + "loss": 0.5843643546104431, + "step": 150 + }, + { + "epoch": 0.04010091621298632, + "grad_norm": 0.6894842979231472, + "learning_rate": 7.957559681697613e-06, + "loss": 0.5471494793891907, + "step": 151 + }, + { + "epoch": 0.04036648519452928, + "grad_norm": 0.7583250211136208, + "learning_rate": 8.010610079575598e-06, + "loss": 0.595018744468689, + "step": 152 + }, + { + "epoch": 0.040632054176072234, + "grad_norm": 0.6904128122756304, + "learning_rate": 8.063660477453583e-06, + "loss": 0.5431865453720093, + "step": 153 + }, + { + "epoch": 0.04089762315761519, + "grad_norm": 0.7943246581886504, + "learning_rate": 8.116710875331566e-06, + "loss": 0.5622385740280151, + "step": 154 + }, + { + "epoch": 0.041163192139158145, + "grad_norm": 0.7792002007338675, + "learning_rate": 8.16976127320955e-06, + "loss": 0.5795880556106567, + "step": 155 + }, + { + "epoch": 0.0414287611207011, + "grad_norm": 0.7432143976693507, + "learning_rate": 8.222811671087533e-06, + "loss": 0.5854965448379517, + "step": 156 + }, + { + "epoch": 0.041694330102244055, + "grad_norm": 0.8104825185442435, + "learning_rate": 8.275862068965518e-06, + "loss": 0.5374501943588257, + "step": 157 + }, + { + "epoch": 0.041959899083787014, + "grad_norm": 0.7598674115735401, + "learning_rate": 8.328912466843502e-06, + "loss": 0.5779006481170654, + "step": 158 + }, + { + "epoch": 0.04222546806532997, + "grad_norm": 0.7033741631796787, + "learning_rate": 8.381962864721485e-06, + "loss": 0.550236701965332, + "step": 159 + }, + { + "epoch": 0.042491037046872925, + "grad_norm": 0.7285453499901458, + "learning_rate": 8.43501326259947e-06, + "loss": 0.557443380355835, + "step": 160 + }, + { + "epoch": 0.04275660602841588, + "grad_norm": 0.7050753960524794, + "learning_rate": 8.488063660477454e-06, + "loss": 0.5875238180160522, + "step": 161 + }, + { + "epoch": 0.043022175009958835, + "grad_norm": 0.7215582793376403, + "learning_rate": 8.541114058355439e-06, + "loss": 0.510900616645813, + "step": 162 + }, + { + "epoch": 0.043287743991501794, + "grad_norm": 0.7559114001900116, + "learning_rate": 8.594164456233422e-06, + "loss": 0.5465859174728394, + "step": 163 + }, + { + "epoch": 0.043553312973044746, + "grad_norm": 0.7494489908601825, + "learning_rate": 8.647214854111406e-06, + "loss": 0.5508615970611572, + "step": 164 + }, + { + "epoch": 0.043818881954587705, + "grad_norm": 0.7714387963397975, + "learning_rate": 8.700265251989391e-06, + "loss": 0.5437714457511902, + "step": 165 + }, + { + "epoch": 0.04408445093613066, + "grad_norm": 0.7480600693956645, + "learning_rate": 8.753315649867374e-06, + "loss": 0.542698323726654, + "step": 166 + }, + { + "epoch": 0.044350019917673615, + "grad_norm": 0.7339141407878966, + "learning_rate": 8.806366047745358e-06, + "loss": 0.5169371962547302, + "step": 167 + }, + { + "epoch": 0.044615588899216574, + "grad_norm": 0.725595419270195, + "learning_rate": 8.859416445623343e-06, + "loss": 0.5436176061630249, + "step": 168 + }, + { + "epoch": 0.044881157880759526, + "grad_norm": 0.8205411933516983, + "learning_rate": 8.912466843501327e-06, + "loss": 0.568030834197998, + "step": 169 + }, + { + "epoch": 0.045146726862302484, + "grad_norm": 0.7544356200090666, + "learning_rate": 8.965517241379312e-06, + "loss": 0.5218889713287354, + "step": 170 + }, + { + "epoch": 0.045412295843845436, + "grad_norm": 0.7860957525035722, + "learning_rate": 9.018567639257295e-06, + "loss": 0.5275779962539673, + "step": 171 + }, + { + "epoch": 0.045677864825388395, + "grad_norm": 0.6938225497373272, + "learning_rate": 9.071618037135279e-06, + "loss": 0.5263184905052185, + "step": 172 + }, + { + "epoch": 0.045943433806931354, + "grad_norm": 0.7549069812662602, + "learning_rate": 9.124668435013264e-06, + "loss": 0.563044548034668, + "step": 173 + }, + { + "epoch": 0.046209002788474306, + "grad_norm": 0.9364041083837341, + "learning_rate": 9.177718832891247e-06, + "loss": 0.5896912217140198, + "step": 174 + }, + { + "epoch": 0.046474571770017264, + "grad_norm": 0.7219752548557496, + "learning_rate": 9.230769230769232e-06, + "loss": 0.5163949131965637, + "step": 175 + }, + { + "epoch": 0.046740140751560216, + "grad_norm": 0.8391633255974319, + "learning_rate": 9.283819628647216e-06, + "loss": 0.6203320026397705, + "step": 176 + }, + { + "epoch": 0.047005709733103175, + "grad_norm": 0.9119997852547688, + "learning_rate": 9.3368700265252e-06, + "loss": 0.5528024435043335, + "step": 177 + }, + { + "epoch": 0.04727127871464613, + "grad_norm": 0.8828541610102935, + "learning_rate": 9.389920424403184e-06, + "loss": 0.5657555460929871, + "step": 178 + }, + { + "epoch": 0.047536847696189086, + "grad_norm": 0.7671789386737649, + "learning_rate": 9.442970822281168e-06, + "loss": 0.5301925539970398, + "step": 179 + }, + { + "epoch": 0.04780241667773204, + "grad_norm": 0.8675940797859782, + "learning_rate": 9.496021220159151e-06, + "loss": 0.5388369560241699, + "step": 180 + }, + { + "epoch": 0.048067985659274996, + "grad_norm": 0.7966332028310692, + "learning_rate": 9.549071618037136e-06, + "loss": 0.5549717545509338, + "step": 181 + }, + { + "epoch": 0.048333554640817955, + "grad_norm": 0.8814678011939608, + "learning_rate": 9.60212201591512e-06, + "loss": 0.5959764719009399, + "step": 182 + }, + { + "epoch": 0.04859912362236091, + "grad_norm": 0.7841222204736121, + "learning_rate": 9.655172413793105e-06, + "loss": 0.5461844205856323, + "step": 183 + }, + { + "epoch": 0.048864692603903866, + "grad_norm": 0.7620084886447284, + "learning_rate": 9.708222811671088e-06, + "loss": 0.5428494811058044, + "step": 184 + }, + { + "epoch": 0.04913026158544682, + "grad_norm": 0.7918991595575344, + "learning_rate": 9.761273209549072e-06, + "loss": 0.552198052406311, + "step": 185 + }, + { + "epoch": 0.049395830566989776, + "grad_norm": 0.6896394660507362, + "learning_rate": 9.814323607427057e-06, + "loss": 0.49992549419403076, + "step": 186 + }, + { + "epoch": 0.04966139954853273, + "grad_norm": 0.7875507527713166, + "learning_rate": 9.86737400530504e-06, + "loss": 0.557820200920105, + "step": 187 + }, + { + "epoch": 0.04992696853007569, + "grad_norm": 0.8883719893129148, + "learning_rate": 9.920424403183024e-06, + "loss": 0.5238749384880066, + "step": 188 + }, + { + "epoch": 0.050192537511618646, + "grad_norm": 0.988465476825029, + "learning_rate": 9.973474801061009e-06, + "loss": 0.5346978902816772, + "step": 189 + }, + { + "epoch": 0.0504581064931616, + "grad_norm": 0.8024883433630577, + "learning_rate": 1.0026525198938993e-05, + "loss": 0.5256577730178833, + "step": 190 + }, + { + "epoch": 0.050723675474704556, + "grad_norm": 0.8026852335394901, + "learning_rate": 1.0079575596816978e-05, + "loss": 0.5235393047332764, + "step": 191 + }, + { + "epoch": 0.05098924445624751, + "grad_norm": 0.6835673591276205, + "learning_rate": 1.013262599469496e-05, + "loss": 0.4984837472438812, + "step": 192 + }, + { + "epoch": 0.05125481343779047, + "grad_norm": 0.7829913352817355, + "learning_rate": 1.0185676392572945e-05, + "loss": 0.5209602117538452, + "step": 193 + }, + { + "epoch": 0.05152038241933342, + "grad_norm": 0.8334733472253096, + "learning_rate": 1.023872679045093e-05, + "loss": 0.5468267202377319, + "step": 194 + }, + { + "epoch": 0.05178595140087638, + "grad_norm": 0.8107908645155819, + "learning_rate": 1.0291777188328913e-05, + "loss": 0.5531667470932007, + "step": 195 + }, + { + "epoch": 0.052051520382419336, + "grad_norm": 0.8437904919697584, + "learning_rate": 1.0344827586206898e-05, + "loss": 0.5741526484489441, + "step": 196 + }, + { + "epoch": 0.05231708936396229, + "grad_norm": 0.6830882515315945, + "learning_rate": 1.039787798408488e-05, + "loss": 0.46132561564445496, + "step": 197 + }, + { + "epoch": 0.05258265834550525, + "grad_norm": 0.8402230890409916, + "learning_rate": 1.0450928381962865e-05, + "loss": 0.5074198842048645, + "step": 198 + }, + { + "epoch": 0.0528482273270482, + "grad_norm": 0.7476727742688456, + "learning_rate": 1.0503978779840849e-05, + "loss": 0.5193089842796326, + "step": 199 + }, + { + "epoch": 0.05311379630859116, + "grad_norm": 0.7814745235248249, + "learning_rate": 1.0557029177718834e-05, + "loss": 0.5209243297576904, + "step": 200 + }, + { + "epoch": 0.05337936529013411, + "grad_norm": 0.8844918483638834, + "learning_rate": 1.0610079575596819e-05, + "loss": 0.5607191920280457, + "step": 201 + }, + { + "epoch": 0.05364493427167707, + "grad_norm": 0.7926104097207243, + "learning_rate": 1.0663129973474802e-05, + "loss": 0.5482805371284485, + "step": 202 + }, + { + "epoch": 0.05391050325322003, + "grad_norm": 0.8109463956858287, + "learning_rate": 1.0716180371352788e-05, + "loss": 0.5579961538314819, + "step": 203 + }, + { + "epoch": 0.05417607223476298, + "grad_norm": 0.8246893162942163, + "learning_rate": 1.076923076923077e-05, + "loss": 0.5119072794914246, + "step": 204 + }, + { + "epoch": 0.05444164121630594, + "grad_norm": 0.8293246958439139, + "learning_rate": 1.0822281167108754e-05, + "loss": 0.5129292607307434, + "step": 205 + }, + { + "epoch": 0.05470721019784889, + "grad_norm": 0.6895550242199711, + "learning_rate": 1.0875331564986738e-05, + "loss": 0.500032901763916, + "step": 206 + }, + { + "epoch": 0.05497277917939185, + "grad_norm": 0.8385731092525408, + "learning_rate": 1.0928381962864723e-05, + "loss": 0.5264571309089661, + "step": 207 + }, + { + "epoch": 0.0552383481609348, + "grad_norm": 0.7915802802090326, + "learning_rate": 1.0981432360742708e-05, + "loss": 0.5569590330123901, + "step": 208 + }, + { + "epoch": 0.05550391714247776, + "grad_norm": 0.8546725938844908, + "learning_rate": 1.103448275862069e-05, + "loss": 0.5429908037185669, + "step": 209 + }, + { + "epoch": 0.05576948612402072, + "grad_norm": 0.8175642333393268, + "learning_rate": 1.1087533156498675e-05, + "loss": 0.5073692202568054, + "step": 210 + }, + { + "epoch": 0.05603505510556367, + "grad_norm": 0.9551222157670755, + "learning_rate": 1.1140583554376659e-05, + "loss": 0.5613659620285034, + "step": 211 + }, + { + "epoch": 0.05630062408710663, + "grad_norm": 1.8348970874488084, + "learning_rate": 1.1193633952254644e-05, + "loss": 0.5197691917419434, + "step": 212 + }, + { + "epoch": 0.05656619306864958, + "grad_norm": 0.9173115658326468, + "learning_rate": 1.1246684350132625e-05, + "loss": 0.5410990715026855, + "step": 213 + }, + { + "epoch": 0.05683176205019254, + "grad_norm": 0.8562107533946397, + "learning_rate": 1.129973474801061e-05, + "loss": 0.5852477550506592, + "step": 214 + }, + { + "epoch": 0.05709733103173549, + "grad_norm": 0.8483195878163089, + "learning_rate": 1.1352785145888594e-05, + "loss": 0.5312488079071045, + "step": 215 + }, + { + "epoch": 0.05736290001327845, + "grad_norm": 0.8817111257753456, + "learning_rate": 1.140583554376658e-05, + "loss": 0.5075235366821289, + "step": 216 + }, + { + "epoch": 0.05762846899482141, + "grad_norm": 0.8014885700994473, + "learning_rate": 1.1458885941644564e-05, + "loss": 0.5213298797607422, + "step": 217 + }, + { + "epoch": 0.05789403797636436, + "grad_norm": 0.8852582070340804, + "learning_rate": 1.1511936339522548e-05, + "loss": 0.5564183592796326, + "step": 218 + }, + { + "epoch": 0.05815960695790732, + "grad_norm": 1.0148412469588788, + "learning_rate": 1.1564986737400531e-05, + "loss": 0.5328387022018433, + "step": 219 + }, + { + "epoch": 0.05842517593945027, + "grad_norm": 0.7824132338865165, + "learning_rate": 1.1618037135278515e-05, + "loss": 0.5010273456573486, + "step": 220 + }, + { + "epoch": 0.05869074492099323, + "grad_norm": 0.8493817546068081, + "learning_rate": 1.16710875331565e-05, + "loss": 0.5473708510398865, + "step": 221 + }, + { + "epoch": 0.05895631390253618, + "grad_norm": 1.1554913959885298, + "learning_rate": 1.1724137931034483e-05, + "loss": 0.5359818339347839, + "step": 222 + }, + { + "epoch": 0.05922188288407914, + "grad_norm": 0.9663065987200732, + "learning_rate": 1.1777188328912468e-05, + "loss": 0.5274665951728821, + "step": 223 + }, + { + "epoch": 0.0594874518656221, + "grad_norm": 0.8158672021913522, + "learning_rate": 1.1830238726790454e-05, + "loss": 0.5463781952857971, + "step": 224 + }, + { + "epoch": 0.05975302084716505, + "grad_norm": 0.7817235200046289, + "learning_rate": 1.1883289124668435e-05, + "loss": 0.553212583065033, + "step": 225 + }, + { + "epoch": 0.06001858982870801, + "grad_norm": 0.8540074681170072, + "learning_rate": 1.193633952254642e-05, + "loss": 0.47144171595573425, + "step": 226 + }, + { + "epoch": 0.06028415881025096, + "grad_norm": 0.9191106803002166, + "learning_rate": 1.1989389920424404e-05, + "loss": 0.506844162940979, + "step": 227 + }, + { + "epoch": 0.06054972779179392, + "grad_norm": 0.794192267301098, + "learning_rate": 1.2042440318302389e-05, + "loss": 0.4965322017669678, + "step": 228 + }, + { + "epoch": 0.06081529677333687, + "grad_norm": 0.8421546110465796, + "learning_rate": 1.2095490716180371e-05, + "loss": 0.4815751612186432, + "step": 229 + }, + { + "epoch": 0.06108086575487983, + "grad_norm": 0.8107361719185122, + "learning_rate": 1.2148541114058356e-05, + "loss": 0.5245312452316284, + "step": 230 + }, + { + "epoch": 0.06134643473642279, + "grad_norm": 0.8749447967552209, + "learning_rate": 1.2201591511936341e-05, + "loss": 0.5215133428573608, + "step": 231 + }, + { + "epoch": 0.06161200371796574, + "grad_norm": 0.8315635530714504, + "learning_rate": 1.2254641909814325e-05, + "loss": 0.5039419531822205, + "step": 232 + }, + { + "epoch": 0.0618775726995087, + "grad_norm": 1.0583546039713638, + "learning_rate": 1.230769230769231e-05, + "loss": 0.5562925338745117, + "step": 233 + }, + { + "epoch": 0.06214314168105165, + "grad_norm": 1.069780059811152, + "learning_rate": 1.2360742705570291e-05, + "loss": 0.5372984409332275, + "step": 234 + }, + { + "epoch": 0.06240871066259461, + "grad_norm": 0.8766841361731121, + "learning_rate": 1.2413793103448277e-05, + "loss": 0.44987717270851135, + "step": 235 + }, + { + "epoch": 0.06267427964413756, + "grad_norm": 0.9229136432445015, + "learning_rate": 1.246684350132626e-05, + "loss": 0.537068247795105, + "step": 236 + }, + { + "epoch": 0.06293984862568051, + "grad_norm": 0.9828329951785308, + "learning_rate": 1.2519893899204245e-05, + "loss": 0.504779577255249, + "step": 237 + }, + { + "epoch": 0.06320541760722348, + "grad_norm": 1.0061858451025696, + "learning_rate": 1.257294429708223e-05, + "loss": 0.5524113774299622, + "step": 238 + }, + { + "epoch": 0.06347098658876643, + "grad_norm": 0.9888885225244529, + "learning_rate": 1.2625994694960214e-05, + "loss": 0.5089439153671265, + "step": 239 + }, + { + "epoch": 0.06373655557030938, + "grad_norm": 0.8394940482178029, + "learning_rate": 1.2679045092838197e-05, + "loss": 0.4501679837703705, + "step": 240 + }, + { + "epoch": 0.06400212455185235, + "grad_norm": 0.8117693384854435, + "learning_rate": 1.273209549071618e-05, + "loss": 0.5360216498374939, + "step": 241 + }, + { + "epoch": 0.0642676935333953, + "grad_norm": 0.876954304053235, + "learning_rate": 1.2785145888594166e-05, + "loss": 0.5595712661743164, + "step": 242 + }, + { + "epoch": 0.06453326251493825, + "grad_norm": 1.080992038181853, + "learning_rate": 1.283819628647215e-05, + "loss": 0.5010904669761658, + "step": 243 + }, + { + "epoch": 0.0647988314964812, + "grad_norm": 1.0446842005075034, + "learning_rate": 1.2891246684350134e-05, + "loss": 0.5053697228431702, + "step": 244 + }, + { + "epoch": 0.06506440047802417, + "grad_norm": 0.803002193385922, + "learning_rate": 1.294429708222812e-05, + "loss": 0.5045514106750488, + "step": 245 + }, + { + "epoch": 0.06532996945956712, + "grad_norm": 0.7912163744531999, + "learning_rate": 1.2997347480106101e-05, + "loss": 0.5546073913574219, + "step": 246 + }, + { + "epoch": 0.06559553844111007, + "grad_norm": 0.9572908035308383, + "learning_rate": 1.3050397877984087e-05, + "loss": 0.47276046872138977, + "step": 247 + }, + { + "epoch": 0.06586110742265304, + "grad_norm": 0.8233476091470914, + "learning_rate": 1.310344827586207e-05, + "loss": 0.4757889211177826, + "step": 248 + }, + { + "epoch": 0.06612667640419599, + "grad_norm": 0.8415305337388579, + "learning_rate": 1.3156498673740055e-05, + "loss": 0.5078848600387573, + "step": 249 + }, + { + "epoch": 0.06639224538573894, + "grad_norm": 0.8437984625649567, + "learning_rate": 1.3209549071618037e-05, + "loss": 0.4890335202217102, + "step": 250 + }, + { + "epoch": 0.0666578143672819, + "grad_norm": 0.8299999132068526, + "learning_rate": 1.3262599469496022e-05, + "loss": 0.5406580567359924, + "step": 251 + }, + { + "epoch": 0.06692338334882486, + "grad_norm": 0.9307594142144101, + "learning_rate": 1.3315649867374005e-05, + "loss": 0.5236875414848328, + "step": 252 + }, + { + "epoch": 0.06718895233036781, + "grad_norm": 1.0602580439454288, + "learning_rate": 1.336870026525199e-05, + "loss": 0.4991317391395569, + "step": 253 + }, + { + "epoch": 0.06745452131191076, + "grad_norm": 0.8277603880683132, + "learning_rate": 1.3421750663129976e-05, + "loss": 0.4234679639339447, + "step": 254 + }, + { + "epoch": 0.06772009029345373, + "grad_norm": 0.9984839302922622, + "learning_rate": 1.3474801061007958e-05, + "loss": 0.49749234318733215, + "step": 255 + }, + { + "epoch": 0.06798565927499668, + "grad_norm": 0.9543855303701088, + "learning_rate": 1.3527851458885943e-05, + "loss": 0.5049105286598206, + "step": 256 + }, + { + "epoch": 0.06825122825653963, + "grad_norm": 0.8443711840757044, + "learning_rate": 1.3580901856763926e-05, + "loss": 0.5355304479598999, + "step": 257 + }, + { + "epoch": 0.06851679723808259, + "grad_norm": 0.9255144140027944, + "learning_rate": 1.3633952254641911e-05, + "loss": 0.46302929520606995, + "step": 258 + }, + { + "epoch": 0.06878236621962555, + "grad_norm": 0.953877794861965, + "learning_rate": 1.3687002652519895e-05, + "loss": 0.5054173469543457, + "step": 259 + }, + { + "epoch": 0.0690479352011685, + "grad_norm": 0.8214682466537866, + "learning_rate": 1.374005305039788e-05, + "loss": 0.5018566846847534, + "step": 260 + }, + { + "epoch": 0.06931350418271146, + "grad_norm": 0.878430758752321, + "learning_rate": 1.3793103448275863e-05, + "loss": 0.4938735365867615, + "step": 261 + }, + { + "epoch": 0.06957907316425442, + "grad_norm": 0.8343439459008911, + "learning_rate": 1.3846153846153847e-05, + "loss": 0.4605029225349426, + "step": 262 + }, + { + "epoch": 0.06984464214579737, + "grad_norm": 0.8260329604526515, + "learning_rate": 1.3899204244031832e-05, + "loss": 0.5056782960891724, + "step": 263 + }, + { + "epoch": 0.07011021112734032, + "grad_norm": 0.860551370737139, + "learning_rate": 1.3952254641909815e-05, + "loss": 0.5017784833908081, + "step": 264 + }, + { + "epoch": 0.07037578010888328, + "grad_norm": 0.8353804409772935, + "learning_rate": 1.40053050397878e-05, + "loss": 0.5132012367248535, + "step": 265 + }, + { + "epoch": 0.07064134909042624, + "grad_norm": 0.8151795113028358, + "learning_rate": 1.4058355437665782e-05, + "loss": 0.531212329864502, + "step": 266 + }, + { + "epoch": 0.0709069180719692, + "grad_norm": 0.8086605566204427, + "learning_rate": 1.4111405835543767e-05, + "loss": 0.4900968074798584, + "step": 267 + }, + { + "epoch": 0.07117248705351215, + "grad_norm": 0.8735731145360269, + "learning_rate": 1.4164456233421753e-05, + "loss": 0.45277124643325806, + "step": 268 + }, + { + "epoch": 0.07143805603505511, + "grad_norm": 0.8760293380808535, + "learning_rate": 1.4217506631299736e-05, + "loss": 0.48026078939437866, + "step": 269 + }, + { + "epoch": 0.07170362501659806, + "grad_norm": 0.9019281227597356, + "learning_rate": 1.4270557029177721e-05, + "loss": 0.5111234784126282, + "step": 270 + }, + { + "epoch": 0.07196919399814102, + "grad_norm": 0.9120608197487232, + "learning_rate": 1.4323607427055703e-05, + "loss": 0.5448082685470581, + "step": 271 + }, + { + "epoch": 0.07223476297968397, + "grad_norm": 0.9400729117423203, + "learning_rate": 1.4376657824933688e-05, + "loss": 0.5242921113967896, + "step": 272 + }, + { + "epoch": 0.07250033196122693, + "grad_norm": 0.9404952891335322, + "learning_rate": 1.4429708222811672e-05, + "loss": 0.5194095373153687, + "step": 273 + }, + { + "epoch": 0.07276590094276988, + "grad_norm": 0.8893776382848525, + "learning_rate": 1.4482758620689657e-05, + "loss": 0.4620330333709717, + "step": 274 + }, + { + "epoch": 0.07303146992431284, + "grad_norm": 0.886983687866706, + "learning_rate": 1.4535809018567642e-05, + "loss": 0.4654063582420349, + "step": 275 + }, + { + "epoch": 0.0732970389058558, + "grad_norm": 0.7984003718276244, + "learning_rate": 1.4588859416445624e-05, + "loss": 0.4637746810913086, + "step": 276 + }, + { + "epoch": 0.07356260788739875, + "grad_norm": 0.8288882522584324, + "learning_rate": 1.4641909814323609e-05, + "loss": 0.47949421405792236, + "step": 277 + }, + { + "epoch": 0.0738281768689417, + "grad_norm": 1.0041804846004008, + "learning_rate": 1.4694960212201592e-05, + "loss": 0.49565935134887695, + "step": 278 + }, + { + "epoch": 0.07409374585048466, + "grad_norm": 0.9214786055945364, + "learning_rate": 1.4748010610079577e-05, + "loss": 0.5057941675186157, + "step": 279 + }, + { + "epoch": 0.07435931483202762, + "grad_norm": 0.9073397896109812, + "learning_rate": 1.480106100795756e-05, + "loss": 0.5495956540107727, + "step": 280 + }, + { + "epoch": 0.07462488381357057, + "grad_norm": 0.8743353741776648, + "learning_rate": 1.4854111405835546e-05, + "loss": 0.4502897560596466, + "step": 281 + }, + { + "epoch": 0.07489045279511353, + "grad_norm": 0.8694785116368758, + "learning_rate": 1.490716180371353e-05, + "loss": 0.4799070954322815, + "step": 282 + }, + { + "epoch": 0.07515602177665649, + "grad_norm": 0.886176954457428, + "learning_rate": 1.4960212201591513e-05, + "loss": 0.45640307664871216, + "step": 283 + }, + { + "epoch": 0.07542159075819944, + "grad_norm": 0.8937725285994821, + "learning_rate": 1.5013262599469498e-05, + "loss": 0.47862207889556885, + "step": 284 + }, + { + "epoch": 0.0756871597397424, + "grad_norm": 0.8717898339198907, + "learning_rate": 1.5066312997347481e-05, + "loss": 0.48195987939834595, + "step": 285 + }, + { + "epoch": 0.07595272872128535, + "grad_norm": 0.9124586645482137, + "learning_rate": 1.5119363395225467e-05, + "loss": 0.518566370010376, + "step": 286 + }, + { + "epoch": 0.07621829770282831, + "grad_norm": 0.9766882853479317, + "learning_rate": 1.5172413793103448e-05, + "loss": 0.5034162402153015, + "step": 287 + }, + { + "epoch": 0.07648386668437127, + "grad_norm": 0.8995114639723897, + "learning_rate": 1.5225464190981433e-05, + "loss": 0.497822642326355, + "step": 288 + }, + { + "epoch": 0.07674943566591422, + "grad_norm": 0.8484786603983125, + "learning_rate": 1.5278514588859417e-05, + "loss": 0.510530412197113, + "step": 289 + }, + { + "epoch": 0.07701500464745718, + "grad_norm": 0.9406440408252492, + "learning_rate": 1.53315649867374e-05, + "loss": 0.5163881778717041, + "step": 290 + }, + { + "epoch": 0.07728057362900013, + "grad_norm": 0.9825958938719339, + "learning_rate": 1.5384615384615387e-05, + "loss": 0.5161621570587158, + "step": 291 + }, + { + "epoch": 0.07754614261054309, + "grad_norm": 0.8680267479326179, + "learning_rate": 1.543766578249337e-05, + "loss": 0.5260482430458069, + "step": 292 + }, + { + "epoch": 0.07781171159208604, + "grad_norm": 0.8791995274446183, + "learning_rate": 1.5490716180371354e-05, + "loss": 0.4946279227733612, + "step": 293 + }, + { + "epoch": 0.078077280573629, + "grad_norm": 0.9734620967906259, + "learning_rate": 1.5543766578249338e-05, + "loss": 0.5030514001846313, + "step": 294 + }, + { + "epoch": 0.07834284955517196, + "grad_norm": 0.899295097408943, + "learning_rate": 1.559681697612732e-05, + "loss": 0.48864102363586426, + "step": 295 + }, + { + "epoch": 0.07860841853671491, + "grad_norm": 0.8710376092284174, + "learning_rate": 1.5649867374005304e-05, + "loss": 0.48310425877571106, + "step": 296 + }, + { + "epoch": 0.07887398751825787, + "grad_norm": 1.0094258392730318, + "learning_rate": 1.570291777188329e-05, + "loss": 0.4451446533203125, + "step": 297 + }, + { + "epoch": 0.07913955649980083, + "grad_norm": 0.9863170561942101, + "learning_rate": 1.5755968169761275e-05, + "loss": 0.4884604811668396, + "step": 298 + }, + { + "epoch": 0.07940512548134378, + "grad_norm": 0.8355693003184833, + "learning_rate": 1.5809018567639258e-05, + "loss": 0.5047659873962402, + "step": 299 + }, + { + "epoch": 0.07967069446288673, + "grad_norm": 0.8879040718748079, + "learning_rate": 1.586206896551724e-05, + "loss": 0.49124205112457275, + "step": 300 + }, + { + "epoch": 0.0799362634444297, + "grad_norm": 0.9411885452551192, + "learning_rate": 1.5915119363395225e-05, + "loss": 0.5113086700439453, + "step": 301 + }, + { + "epoch": 0.08020183242597265, + "grad_norm": 0.9345380756850689, + "learning_rate": 1.5968169761273212e-05, + "loss": 0.5298338532447815, + "step": 302 + }, + { + "epoch": 0.0804674014075156, + "grad_norm": 0.9050429706274331, + "learning_rate": 1.6021220159151195e-05, + "loss": 0.4673181176185608, + "step": 303 + }, + { + "epoch": 0.08073297038905856, + "grad_norm": 0.8972864762330055, + "learning_rate": 1.607427055702918e-05, + "loss": 0.45361828804016113, + "step": 304 + }, + { + "epoch": 0.08099853937060152, + "grad_norm": 0.8848533583648175, + "learning_rate": 1.6127320954907166e-05, + "loss": 0.5144034624099731, + "step": 305 + }, + { + "epoch": 0.08126410835214447, + "grad_norm": 0.9263690972931414, + "learning_rate": 1.6180371352785146e-05, + "loss": 0.5027451515197754, + "step": 306 + }, + { + "epoch": 0.08152967733368742, + "grad_norm": 0.8575377500476566, + "learning_rate": 1.6233421750663133e-05, + "loss": 0.4987551271915436, + "step": 307 + }, + { + "epoch": 0.08179524631523039, + "grad_norm": 1.0121964253373468, + "learning_rate": 1.6286472148541116e-05, + "loss": 0.5433062314987183, + "step": 308 + }, + { + "epoch": 0.08206081529677334, + "grad_norm": 0.8973695218716041, + "learning_rate": 1.63395225464191e-05, + "loss": 0.49603772163391113, + "step": 309 + }, + { + "epoch": 0.08232638427831629, + "grad_norm": 0.9033181815462389, + "learning_rate": 1.6392572944297083e-05, + "loss": 0.47990959882736206, + "step": 310 + }, + { + "epoch": 0.08259195325985925, + "grad_norm": 0.9843185449650845, + "learning_rate": 1.6445623342175066e-05, + "loss": 0.5196831226348877, + "step": 311 + }, + { + "epoch": 0.0828575222414022, + "grad_norm": 0.8589822510995361, + "learning_rate": 1.6498673740053053e-05, + "loss": 0.4664091467857361, + "step": 312 + }, + { + "epoch": 0.08312309122294516, + "grad_norm": 0.9077443936761218, + "learning_rate": 1.6551724137931037e-05, + "loss": 0.4405553936958313, + "step": 313 + }, + { + "epoch": 0.08338866020448811, + "grad_norm": 0.8561334135462362, + "learning_rate": 1.660477453580902e-05, + "loss": 0.46172815561294556, + "step": 314 + }, + { + "epoch": 0.08365422918603108, + "grad_norm": 0.8835708894071636, + "learning_rate": 1.6657824933687004e-05, + "loss": 0.5004327297210693, + "step": 315 + }, + { + "epoch": 0.08391979816757403, + "grad_norm": 0.8452618593185571, + "learning_rate": 1.6710875331564987e-05, + "loss": 0.4727814197540283, + "step": 316 + }, + { + "epoch": 0.08418536714911698, + "grad_norm": 0.7631381381409372, + "learning_rate": 1.676392572944297e-05, + "loss": 0.43602120876312256, + "step": 317 + }, + { + "epoch": 0.08445093613065995, + "grad_norm": 0.9092168864142193, + "learning_rate": 1.6816976127320957e-05, + "loss": 0.5110410451889038, + "step": 318 + }, + { + "epoch": 0.0847165051122029, + "grad_norm": 0.9902301773407237, + "learning_rate": 1.687002652519894e-05, + "loss": 0.4798283278942108, + "step": 319 + }, + { + "epoch": 0.08498207409374585, + "grad_norm": 0.8572923551208312, + "learning_rate": 1.6923076923076924e-05, + "loss": 0.45690029859542847, + "step": 320 + }, + { + "epoch": 0.0852476430752888, + "grad_norm": 0.8864718165003516, + "learning_rate": 1.6976127320954908e-05, + "loss": 0.4770117998123169, + "step": 321 + }, + { + "epoch": 0.08551321205683177, + "grad_norm": 0.888032985544436, + "learning_rate": 1.702917771883289e-05, + "loss": 0.512240469455719, + "step": 322 + }, + { + "epoch": 0.08577878103837472, + "grad_norm": 0.8665270088700595, + "learning_rate": 1.7082228116710878e-05, + "loss": 0.4696195423603058, + "step": 323 + }, + { + "epoch": 0.08604435001991767, + "grad_norm": 0.8876364903970222, + "learning_rate": 1.713527851458886e-05, + "loss": 0.4779578149318695, + "step": 324 + }, + { + "epoch": 0.08630991900146064, + "grad_norm": 0.9604080935445363, + "learning_rate": 1.7188328912466845e-05, + "loss": 0.48670440912246704, + "step": 325 + }, + { + "epoch": 0.08657548798300359, + "grad_norm": 0.9813156772782552, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.5285798907279968, + "step": 326 + }, + { + "epoch": 0.08684105696454654, + "grad_norm": 0.9264252564283505, + "learning_rate": 1.7294429708222812e-05, + "loss": 0.46095865964889526, + "step": 327 + }, + { + "epoch": 0.08710662594608949, + "grad_norm": 0.8953179311501671, + "learning_rate": 1.73474801061008e-05, + "loss": 0.44342565536499023, + "step": 328 + }, + { + "epoch": 0.08737219492763246, + "grad_norm": 0.9640917124230414, + "learning_rate": 1.7400530503978782e-05, + "loss": 0.48974257707595825, + "step": 329 + }, + { + "epoch": 0.08763776390917541, + "grad_norm": 1.3568266957703046, + "learning_rate": 1.7453580901856765e-05, + "loss": 0.4763977527618408, + "step": 330 + }, + { + "epoch": 0.08790333289071836, + "grad_norm": 1.0231360729141987, + "learning_rate": 1.750663129973475e-05, + "loss": 0.5390856266021729, + "step": 331 + }, + { + "epoch": 0.08816890187226133, + "grad_norm": 0.9254788253309115, + "learning_rate": 1.7559681697612732e-05, + "loss": 0.4833192825317383, + "step": 332 + }, + { + "epoch": 0.08843447085380428, + "grad_norm": 0.9106057248503829, + "learning_rate": 1.7612732095490716e-05, + "loss": 0.47842955589294434, + "step": 333 + }, + { + "epoch": 0.08870003983534723, + "grad_norm": 0.8653538374375338, + "learning_rate": 1.7665782493368703e-05, + "loss": 0.4543060064315796, + "step": 334 + }, + { + "epoch": 0.08896560881689018, + "grad_norm": 0.9024795887264612, + "learning_rate": 1.7718832891246686e-05, + "loss": 0.4492039978504181, + "step": 335 + }, + { + "epoch": 0.08923117779843315, + "grad_norm": 0.9660730803540603, + "learning_rate": 1.777188328912467e-05, + "loss": 0.4930066466331482, + "step": 336 + }, + { + "epoch": 0.0894967467799761, + "grad_norm": 0.9494811659806174, + "learning_rate": 1.7824933687002653e-05, + "loss": 0.46343356370925903, + "step": 337 + }, + { + "epoch": 0.08976231576151905, + "grad_norm": 0.98824099461907, + "learning_rate": 1.7877984084880636e-05, + "loss": 0.5118839740753174, + "step": 338 + }, + { + "epoch": 0.09002788474306202, + "grad_norm": 0.9759312233085756, + "learning_rate": 1.7931034482758623e-05, + "loss": 0.4659194350242615, + "step": 339 + }, + { + "epoch": 0.09029345372460497, + "grad_norm": 0.868792760549277, + "learning_rate": 1.7984084880636607e-05, + "loss": 0.45929303765296936, + "step": 340 + }, + { + "epoch": 0.09055902270614792, + "grad_norm": 0.9774857416777888, + "learning_rate": 1.803713527851459e-05, + "loss": 0.5072556734085083, + "step": 341 + }, + { + "epoch": 0.09082459168769087, + "grad_norm": 0.8722377179138728, + "learning_rate": 1.8090185676392577e-05, + "loss": 0.42370402812957764, + "step": 342 + }, + { + "epoch": 0.09109016066923384, + "grad_norm": 0.9404121189660462, + "learning_rate": 1.8143236074270557e-05, + "loss": 0.5017818212509155, + "step": 343 + }, + { + "epoch": 0.09135572965077679, + "grad_norm": 1.0279846493738434, + "learning_rate": 1.8196286472148544e-05, + "loss": 0.4746384620666504, + "step": 344 + }, + { + "epoch": 0.09162129863231974, + "grad_norm": 1.0016746569872437, + "learning_rate": 1.8249336870026527e-05, + "loss": 0.49020540714263916, + "step": 345 + }, + { + "epoch": 0.09188686761386271, + "grad_norm": 0.8521475505102624, + "learning_rate": 1.830238726790451e-05, + "loss": 0.4569393992424011, + "step": 346 + }, + { + "epoch": 0.09215243659540566, + "grad_norm": 0.9587089968564823, + "learning_rate": 1.8355437665782494e-05, + "loss": 0.46831727027893066, + "step": 347 + }, + { + "epoch": 0.09241800557694861, + "grad_norm": 0.909230845841239, + "learning_rate": 1.8408488063660478e-05, + "loss": 0.4795265197753906, + "step": 348 + }, + { + "epoch": 0.09268357455849156, + "grad_norm": 0.9641043081337674, + "learning_rate": 1.8461538461538465e-05, + "loss": 0.5122503042221069, + "step": 349 + }, + { + "epoch": 0.09294914354003453, + "grad_norm": 0.8617611974669258, + "learning_rate": 1.8514588859416448e-05, + "loss": 0.4190404713153839, + "step": 350 + }, + { + "epoch": 0.09321471252157748, + "grad_norm": 0.9061006884991066, + "learning_rate": 1.856763925729443e-05, + "loss": 0.47778886556625366, + "step": 351 + }, + { + "epoch": 0.09348028150312043, + "grad_norm": 0.9208451846579827, + "learning_rate": 1.8620689655172415e-05, + "loss": 0.45851507782936096, + "step": 352 + }, + { + "epoch": 0.09374585048466338, + "grad_norm": 1.0050481975496854, + "learning_rate": 1.86737400530504e-05, + "loss": 0.4888782501220703, + "step": 353 + }, + { + "epoch": 0.09401141946620635, + "grad_norm": 0.9454138173982718, + "learning_rate": 1.8726790450928382e-05, + "loss": 0.5032983422279358, + "step": 354 + }, + { + "epoch": 0.0942769884477493, + "grad_norm": 0.9130362696106749, + "learning_rate": 1.877984084880637e-05, + "loss": 0.4754604697227478, + "step": 355 + }, + { + "epoch": 0.09454255742929225, + "grad_norm": 0.9970889038933597, + "learning_rate": 1.8832891246684352e-05, + "loss": 0.488397479057312, + "step": 356 + }, + { + "epoch": 0.09480812641083522, + "grad_norm": 1.222649143916529, + "learning_rate": 1.8885941644562336e-05, + "loss": 0.4775403141975403, + "step": 357 + }, + { + "epoch": 0.09507369539237817, + "grad_norm": 0.9872263151320333, + "learning_rate": 1.893899204244032e-05, + "loss": 0.47063153982162476, + "step": 358 + }, + { + "epoch": 0.09533926437392112, + "grad_norm": 1.0222144168199743, + "learning_rate": 1.8992042440318303e-05, + "loss": 0.4856908321380615, + "step": 359 + }, + { + "epoch": 0.09560483335546408, + "grad_norm": 0.9195037496858368, + "learning_rate": 1.904509283819629e-05, + "loss": 0.440033495426178, + "step": 360 + }, + { + "epoch": 0.09587040233700704, + "grad_norm": 0.9961899484684762, + "learning_rate": 1.9098143236074273e-05, + "loss": 0.4825770854949951, + "step": 361 + }, + { + "epoch": 0.09613597131854999, + "grad_norm": 0.9443841189655576, + "learning_rate": 1.9151193633952256e-05, + "loss": 0.48192232847213745, + "step": 362 + }, + { + "epoch": 0.09640154030009294, + "grad_norm": 0.9065595450317342, + "learning_rate": 1.920424403183024e-05, + "loss": 0.4689444899559021, + "step": 363 + }, + { + "epoch": 0.09666710928163591, + "grad_norm": 0.9970961253516039, + "learning_rate": 1.9257294429708223e-05, + "loss": 0.47120895981788635, + "step": 364 + }, + { + "epoch": 0.09693267826317886, + "grad_norm": 1.0106028234477955, + "learning_rate": 1.931034482758621e-05, + "loss": 0.4968941807746887, + "step": 365 + }, + { + "epoch": 0.09719824724472181, + "grad_norm": 1.115125675989656, + "learning_rate": 1.9363395225464193e-05, + "loss": 0.46982288360595703, + "step": 366 + }, + { + "epoch": 0.09746381622626477, + "grad_norm": 0.9408972278578609, + "learning_rate": 1.9416445623342177e-05, + "loss": 0.4541531205177307, + "step": 367 + }, + { + "epoch": 0.09772938520780773, + "grad_norm": 0.9760564476186651, + "learning_rate": 1.946949602122016e-05, + "loss": 0.45576703548431396, + "step": 368 + }, + { + "epoch": 0.09799495418935068, + "grad_norm": 0.9893999168346334, + "learning_rate": 1.9522546419098144e-05, + "loss": 0.48060357570648193, + "step": 369 + }, + { + "epoch": 0.09826052317089363, + "grad_norm": 0.9675810264832774, + "learning_rate": 1.9575596816976127e-05, + "loss": 0.47536781430244446, + "step": 370 + }, + { + "epoch": 0.0985260921524366, + "grad_norm": 0.9516181191759193, + "learning_rate": 1.9628647214854114e-05, + "loss": 0.46463894844055176, + "step": 371 + }, + { + "epoch": 0.09879166113397955, + "grad_norm": 1.0082712913027811, + "learning_rate": 1.9681697612732098e-05, + "loss": 0.49570178985595703, + "step": 372 + }, + { + "epoch": 0.0990572301155225, + "grad_norm": 1.0327922438955468, + "learning_rate": 1.973474801061008e-05, + "loss": 0.4764043390750885, + "step": 373 + }, + { + "epoch": 0.09932279909706546, + "grad_norm": 0.9227866290107449, + "learning_rate": 1.9787798408488064e-05, + "loss": 0.43582671880722046, + "step": 374 + }, + { + "epoch": 0.09958836807860842, + "grad_norm": 0.9360238854832598, + "learning_rate": 1.9840848806366048e-05, + "loss": 0.46077725291252136, + "step": 375 + }, + { + "epoch": 0.09985393706015137, + "grad_norm": 0.9607682273492437, + "learning_rate": 1.9893899204244035e-05, + "loss": 0.4794929027557373, + "step": 376 + }, + { + "epoch": 0.10011950604169433, + "grad_norm": 0.9619848398175739, + "learning_rate": 1.9946949602122018e-05, + "loss": 0.43174588680267334, + "step": 377 + }, + { + "epoch": 0.10038507502323729, + "grad_norm": 0.90095462919728, + "learning_rate": 2e-05, + "loss": 0.44885915517807007, + "step": 378 + }, + { + "epoch": 0.10065064400478024, + "grad_norm": 1.0789787198205218, + "learning_rate": 1.9999999036058974e-05, + "loss": 0.520150899887085, + "step": 379 + }, + { + "epoch": 0.1009162129863232, + "grad_norm": 0.9699182604374589, + "learning_rate": 1.9999996144236068e-05, + "loss": 0.5139277577400208, + "step": 380 + }, + { + "epoch": 0.10118178196786615, + "grad_norm": 1.0077278580199993, + "learning_rate": 1.999999132453184e-05, + "loss": 0.48935171961784363, + "step": 381 + }, + { + "epoch": 0.10144735094940911, + "grad_norm": 0.9095465340361383, + "learning_rate": 1.999998457694723e-05, + "loss": 0.4805561304092407, + "step": 382 + }, + { + "epoch": 0.10171291993095206, + "grad_norm": 0.9209321398292457, + "learning_rate": 1.9999975901483532e-05, + "loss": 0.4340912997722626, + "step": 383 + }, + { + "epoch": 0.10197848891249502, + "grad_norm": 1.0414639039942946, + "learning_rate": 1.999996529814242e-05, + "loss": 0.48282474279403687, + "step": 384 + }, + { + "epoch": 0.10224405789403798, + "grad_norm": 0.9753320144694753, + "learning_rate": 1.999995276692593e-05, + "loss": 0.4653206169605255, + "step": 385 + }, + { + "epoch": 0.10250962687558093, + "grad_norm": 0.919281113033857, + "learning_rate": 1.999993830783649e-05, + "loss": 0.48501014709472656, + "step": 386 + }, + { + "epoch": 0.10277519585712389, + "grad_norm": 1.0711296444042975, + "learning_rate": 1.9999921920876882e-05, + "loss": 0.48260143399238586, + "step": 387 + }, + { + "epoch": 0.10304076483866684, + "grad_norm": 0.9590085896328235, + "learning_rate": 1.9999903606050267e-05, + "loss": 0.44557270407676697, + "step": 388 + }, + { + "epoch": 0.1033063338202098, + "grad_norm": 1.111282066618818, + "learning_rate": 1.9999883363360175e-05, + "loss": 0.4843652546405792, + "step": 389 + }, + { + "epoch": 0.10357190280175275, + "grad_norm": 0.9708048507544866, + "learning_rate": 1.9999861192810508e-05, + "loss": 0.4536727964878082, + "step": 390 + }, + { + "epoch": 0.1038374717832957, + "grad_norm": 1.0216212958759847, + "learning_rate": 1.9999837094405538e-05, + "loss": 0.49557366967201233, + "step": 391 + }, + { + "epoch": 0.10410304076483867, + "grad_norm": 1.0254795167373827, + "learning_rate": 1.9999811068149917e-05, + "loss": 0.45077240467071533, + "step": 392 + }, + { + "epoch": 0.10436860974638162, + "grad_norm": 0.9857255709196505, + "learning_rate": 1.9999783114048658e-05, + "loss": 0.4554041624069214, + "step": 393 + }, + { + "epoch": 0.10463417872792458, + "grad_norm": 0.8770920920154472, + "learning_rate": 1.999975323210715e-05, + "loss": 0.43526744842529297, + "step": 394 + }, + { + "epoch": 0.10489974770946753, + "grad_norm": 0.9824982196768539, + "learning_rate": 1.9999721422331154e-05, + "loss": 0.4097936749458313, + "step": 395 + }, + { + "epoch": 0.1051653166910105, + "grad_norm": 1.013432449022695, + "learning_rate": 1.9999687684726803e-05, + "loss": 0.4740130305290222, + "step": 396 + }, + { + "epoch": 0.10543088567255345, + "grad_norm": 0.9786752992542405, + "learning_rate": 1.9999652019300604e-05, + "loss": 0.43374374508857727, + "step": 397 + }, + { + "epoch": 0.1056964546540964, + "grad_norm": 0.9323415402935509, + "learning_rate": 1.999961442605943e-05, + "loss": 0.4423784911632538, + "step": 398 + }, + { + "epoch": 0.10596202363563936, + "grad_norm": 1.0497518439124596, + "learning_rate": 1.999957490501053e-05, + "loss": 0.4660544693470001, + "step": 399 + }, + { + "epoch": 0.10622759261718231, + "grad_norm": 1.11742327964835, + "learning_rate": 1.999953345616152e-05, + "loss": 0.4579896628856659, + "step": 400 + }, + { + "epoch": 0.10649316159872527, + "grad_norm": 1.0653029752390735, + "learning_rate": 1.9999490079520395e-05, + "loss": 0.4634096920490265, + "step": 401 + }, + { + "epoch": 0.10675873058026822, + "grad_norm": 0.9969566988589958, + "learning_rate": 1.9999444775095517e-05, + "loss": 0.45374077558517456, + "step": 402 + }, + { + "epoch": 0.10702429956181118, + "grad_norm": 1.1298291912896017, + "learning_rate": 1.9999397542895615e-05, + "loss": 0.49752670526504517, + "step": 403 + }, + { + "epoch": 0.10728986854335414, + "grad_norm": 1.049244919494092, + "learning_rate": 1.99993483829298e-05, + "loss": 0.4539335370063782, + "step": 404 + }, + { + "epoch": 0.10755543752489709, + "grad_norm": 1.0017841795942442, + "learning_rate": 1.999929729520755e-05, + "loss": 0.4665772616863251, + "step": 405 + }, + { + "epoch": 0.10782100650644005, + "grad_norm": 1.023688686658119, + "learning_rate": 1.9999244279738713e-05, + "loss": 0.4850832223892212, + "step": 406 + }, + { + "epoch": 0.108086575487983, + "grad_norm": 0.9960763191436038, + "learning_rate": 1.9999189336533508e-05, + "loss": 0.43974876403808594, + "step": 407 + }, + { + "epoch": 0.10835214446952596, + "grad_norm": 1.0378626233602128, + "learning_rate": 1.9999132465602526e-05, + "loss": 0.46823856234550476, + "step": 408 + }, + { + "epoch": 0.10861771345106891, + "grad_norm": 1.0461372802003532, + "learning_rate": 1.9999073666956734e-05, + "loss": 0.49704545736312866, + "step": 409 + }, + { + "epoch": 0.10888328243261187, + "grad_norm": 1.03380477635781, + "learning_rate": 1.999901294060747e-05, + "loss": 0.3863454759120941, + "step": 410 + }, + { + "epoch": 0.10914885141415483, + "grad_norm": 1.1280569204620268, + "learning_rate": 1.9998950286566438e-05, + "loss": 0.4903780221939087, + "step": 411 + }, + { + "epoch": 0.10941442039569778, + "grad_norm": 0.9546134462956446, + "learning_rate": 1.9998885704845716e-05, + "loss": 0.4312375485897064, + "step": 412 + }, + { + "epoch": 0.10967998937724074, + "grad_norm": 0.9382591225300354, + "learning_rate": 1.9998819195457756e-05, + "loss": 0.4350954294204712, + "step": 413 + }, + { + "epoch": 0.1099455583587837, + "grad_norm": 0.9201016144754837, + "learning_rate": 1.999875075841538e-05, + "loss": 0.4364873766899109, + "step": 414 + }, + { + "epoch": 0.11021112734032665, + "grad_norm": 0.9578414566062486, + "learning_rate": 1.999868039373178e-05, + "loss": 0.42079728841781616, + "step": 415 + }, + { + "epoch": 0.1104766963218696, + "grad_norm": 1.0011321946551845, + "learning_rate": 1.9998608101420527e-05, + "loss": 0.4396737515926361, + "step": 416 + }, + { + "epoch": 0.11074226530341257, + "grad_norm": 0.9922478693245596, + "learning_rate": 1.9998533881495552e-05, + "loss": 0.44765806198120117, + "step": 417 + }, + { + "epoch": 0.11100783428495552, + "grad_norm": 1.0219437952159112, + "learning_rate": 1.999845773397117e-05, + "loss": 0.46199291944503784, + "step": 418 + }, + { + "epoch": 0.11127340326649847, + "grad_norm": 0.9510961467421052, + "learning_rate": 1.9998379658862058e-05, + "loss": 0.44561129808425903, + "step": 419 + }, + { + "epoch": 0.11153897224804143, + "grad_norm": 1.0559368690309399, + "learning_rate": 1.9998299656183263e-05, + "loss": 0.46025681495666504, + "step": 420 + }, + { + "epoch": 0.11180454122958439, + "grad_norm": 0.9881679042322009, + "learning_rate": 1.999821772595022e-05, + "loss": 0.4408613443374634, + "step": 421 + }, + { + "epoch": 0.11207011021112734, + "grad_norm": 0.9620122842513851, + "learning_rate": 1.999813386817871e-05, + "loss": 0.4846842586994171, + "step": 422 + }, + { + "epoch": 0.11233567919267029, + "grad_norm": 0.9697081207450757, + "learning_rate": 1.999804808288491e-05, + "loss": 0.44503283500671387, + "step": 423 + }, + { + "epoch": 0.11260124817421326, + "grad_norm": 0.9687765160951803, + "learning_rate": 1.9997960370085355e-05, + "loss": 0.4090060293674469, + "step": 424 + }, + { + "epoch": 0.11286681715575621, + "grad_norm": 0.9575575943579401, + "learning_rate": 1.999787072979696e-05, + "loss": 0.43246471881866455, + "step": 425 + }, + { + "epoch": 0.11313238613729916, + "grad_norm": 1.001604978030575, + "learning_rate": 1.9997779162036996e-05, + "loss": 0.46283262968063354, + "step": 426 + }, + { + "epoch": 0.11339795511884213, + "grad_norm": 0.9108113962903395, + "learning_rate": 1.999768566682313e-05, + "loss": 0.3866165578365326, + "step": 427 + }, + { + "epoch": 0.11366352410038508, + "grad_norm": 0.9595506331685858, + "learning_rate": 1.9997590244173374e-05, + "loss": 0.4501144289970398, + "step": 428 + }, + { + "epoch": 0.11392909308192803, + "grad_norm": 0.9153639565172541, + "learning_rate": 1.9997492894106127e-05, + "loss": 0.43005290627479553, + "step": 429 + }, + { + "epoch": 0.11419466206347098, + "grad_norm": 0.9635360081712412, + "learning_rate": 1.9997393616640165e-05, + "loss": 0.4427964985370636, + "step": 430 + }, + { + "epoch": 0.11446023104501395, + "grad_norm": 1.0560533392763956, + "learning_rate": 1.999729241179462e-05, + "loss": 0.4690951108932495, + "step": 431 + }, + { + "epoch": 0.1147258000265569, + "grad_norm": 0.9559285214931015, + "learning_rate": 1.9997189279589003e-05, + "loss": 0.456949919462204, + "step": 432 + }, + { + "epoch": 0.11499136900809985, + "grad_norm": 0.9851459681291062, + "learning_rate": 1.99970842200432e-05, + "loss": 0.456052303314209, + "step": 433 + }, + { + "epoch": 0.11525693798964282, + "grad_norm": 0.9609923633405658, + "learning_rate": 1.9996977233177466e-05, + "loss": 0.43220120668411255, + "step": 434 + }, + { + "epoch": 0.11552250697118577, + "grad_norm": 0.9022181145862976, + "learning_rate": 1.9996868319012422e-05, + "loss": 0.4237494170665741, + "step": 435 + }, + { + "epoch": 0.11578807595272872, + "grad_norm": 1.1387519975876466, + "learning_rate": 1.9996757477569072e-05, + "loss": 0.4713878631591797, + "step": 436 + }, + { + "epoch": 0.11605364493427167, + "grad_norm": 1.026114633188765, + "learning_rate": 1.9996644708868776e-05, + "loss": 0.4561111330986023, + "step": 437 + }, + { + "epoch": 0.11631921391581464, + "grad_norm": 1.0425252904592188, + "learning_rate": 1.9996530012933285e-05, + "loss": 0.468253493309021, + "step": 438 + }, + { + "epoch": 0.11658478289735759, + "grad_norm": 0.9323050726416767, + "learning_rate": 1.9996413389784704e-05, + "loss": 0.4815019369125366, + "step": 439 + }, + { + "epoch": 0.11685035187890054, + "grad_norm": 0.9369313249225236, + "learning_rate": 1.9996294839445518e-05, + "loss": 0.4235987663269043, + "step": 440 + }, + { + "epoch": 0.1171159208604435, + "grad_norm": 0.9217309559918773, + "learning_rate": 1.999617436193858e-05, + "loss": 0.40562817454338074, + "step": 441 + }, + { + "epoch": 0.11738148984198646, + "grad_norm": 1.1384168500780398, + "learning_rate": 1.999605195728712e-05, + "loss": 0.424539715051651, + "step": 442 + }, + { + "epoch": 0.11764705882352941, + "grad_norm": 0.9616123874834243, + "learning_rate": 1.9995927625514736e-05, + "loss": 0.43677473068237305, + "step": 443 + }, + { + "epoch": 0.11791262780507236, + "grad_norm": 0.9761533315060044, + "learning_rate": 1.9995801366645396e-05, + "loss": 0.47325971722602844, + "step": 444 + }, + { + "epoch": 0.11817819678661533, + "grad_norm": 0.9447069768738408, + "learning_rate": 1.9995673180703443e-05, + "loss": 0.4206562638282776, + "step": 445 + }, + { + "epoch": 0.11844376576815828, + "grad_norm": 0.9743544240614231, + "learning_rate": 1.999554306771359e-05, + "loss": 0.4492834210395813, + "step": 446 + }, + { + "epoch": 0.11870933474970123, + "grad_norm": 1.0629000505790311, + "learning_rate": 1.9995411027700917e-05, + "loss": 0.4445284605026245, + "step": 447 + }, + { + "epoch": 0.1189749037312442, + "grad_norm": 0.9911650776890225, + "learning_rate": 1.9995277060690885e-05, + "loss": 0.4038352370262146, + "step": 448 + }, + { + "epoch": 0.11924047271278715, + "grad_norm": 0.9418518804089067, + "learning_rate": 1.9995141166709318e-05, + "loss": 0.4261324405670166, + "step": 449 + }, + { + "epoch": 0.1195060416943301, + "grad_norm": 1.067611227425969, + "learning_rate": 1.9995003345782416e-05, + "loss": 0.44187062978744507, + "step": 450 + }, + { + "epoch": 0.11977161067587305, + "grad_norm": 0.9191915914869351, + "learning_rate": 1.9994863597936752e-05, + "loss": 0.44672587513923645, + "step": 451 + }, + { + "epoch": 0.12003717965741602, + "grad_norm": 0.9882052007755191, + "learning_rate": 1.999472192319926e-05, + "loss": 0.44322314858436584, + "step": 452 + }, + { + "epoch": 0.12030274863895897, + "grad_norm": 0.9882289435866314, + "learning_rate": 1.9994578321597258e-05, + "loss": 0.4396611154079437, + "step": 453 + }, + { + "epoch": 0.12056831762050192, + "grad_norm": 0.9831868773412876, + "learning_rate": 1.9994432793158433e-05, + "loss": 0.4487733542919159, + "step": 454 + }, + { + "epoch": 0.12083388660204489, + "grad_norm": 0.9360753951175719, + "learning_rate": 1.999428533791084e-05, + "loss": 0.3969653248786926, + "step": 455 + }, + { + "epoch": 0.12109945558358784, + "grad_norm": 0.9662346637828156, + "learning_rate": 1.9994135955882906e-05, + "loss": 0.39312344789505005, + "step": 456 + }, + { + "epoch": 0.12136502456513079, + "grad_norm": 0.9019524086641805, + "learning_rate": 1.9993984647103425e-05, + "loss": 0.3979804217815399, + "step": 457 + }, + { + "epoch": 0.12163059354667374, + "grad_norm": 1.0970468981958466, + "learning_rate": 1.9993831411601573e-05, + "loss": 0.4430229365825653, + "step": 458 + }, + { + "epoch": 0.12189616252821671, + "grad_norm": 0.994492352252997, + "learning_rate": 1.9993676249406895e-05, + "loss": 0.4511718451976776, + "step": 459 + }, + { + "epoch": 0.12216173150975966, + "grad_norm": 1.091979336298699, + "learning_rate": 1.9993519160549298e-05, + "loss": 0.4686455726623535, + "step": 460 + }, + { + "epoch": 0.12242730049130261, + "grad_norm": 1.0158374042593608, + "learning_rate": 1.9993360145059073e-05, + "loss": 0.4501730501651764, + "step": 461 + }, + { + "epoch": 0.12269286947284558, + "grad_norm": 0.8530053413909426, + "learning_rate": 1.999319920296687e-05, + "loss": 0.40718767046928406, + "step": 462 + }, + { + "epoch": 0.12295843845438853, + "grad_norm": 1.1181007301257784, + "learning_rate": 1.9993036334303716e-05, + "loss": 0.47313761711120605, + "step": 463 + }, + { + "epoch": 0.12322400743593148, + "grad_norm": 0.9710975932515886, + "learning_rate": 1.9992871539101018e-05, + "loss": 0.47417378425598145, + "step": 464 + }, + { + "epoch": 0.12348957641747443, + "grad_norm": 0.9297582414898758, + "learning_rate": 1.999270481739054e-05, + "loss": 0.44206154346466064, + "step": 465 + }, + { + "epoch": 0.1237551453990174, + "grad_norm": 0.8745553533375581, + "learning_rate": 1.9992536169204427e-05, + "loss": 0.3800848722457886, + "step": 466 + }, + { + "epoch": 0.12402071438056035, + "grad_norm": 0.9337162704530373, + "learning_rate": 1.9992365594575194e-05, + "loss": 0.40339407324790955, + "step": 467 + }, + { + "epoch": 0.1242862833621033, + "grad_norm": 0.945328490567385, + "learning_rate": 1.999219309353572e-05, + "loss": 0.45280492305755615, + "step": 468 + }, + { + "epoch": 0.12455185234364627, + "grad_norm": 1.0911195899085697, + "learning_rate": 1.9992018666119266e-05, + "loss": 0.4600910544395447, + "step": 469 + }, + { + "epoch": 0.12481742132518922, + "grad_norm": 0.9649890056306747, + "learning_rate": 1.9991842312359458e-05, + "loss": 0.4475003480911255, + "step": 470 + }, + { + "epoch": 0.12508299030673217, + "grad_norm": 1.0493048741226816, + "learning_rate": 1.9991664032290297e-05, + "loss": 0.45377033948898315, + "step": 471 + }, + { + "epoch": 0.12534855928827512, + "grad_norm": 0.9964208438270044, + "learning_rate": 1.9991483825946147e-05, + "loss": 0.4397522509098053, + "step": 472 + }, + { + "epoch": 0.12561412826981808, + "grad_norm": 0.9309535511597795, + "learning_rate": 1.9991301693361756e-05, + "loss": 0.4258221387863159, + "step": 473 + }, + { + "epoch": 0.12587969725136103, + "grad_norm": 0.9120842027423138, + "learning_rate": 1.9991117634572234e-05, + "loss": 0.40272068977355957, + "step": 474 + }, + { + "epoch": 0.126145266232904, + "grad_norm": 0.8761120829975514, + "learning_rate": 1.9990931649613067e-05, + "loss": 0.3721206784248352, + "step": 475 + }, + { + "epoch": 0.12641083521444696, + "grad_norm": 0.9997105907953329, + "learning_rate": 1.9990743738520115e-05, + "loss": 0.4530203938484192, + "step": 476 + }, + { + "epoch": 0.1266764041959899, + "grad_norm": 0.999446109489731, + "learning_rate": 1.999055390132959e-05, + "loss": 0.4281614422798157, + "step": 477 + }, + { + "epoch": 0.12694197317753286, + "grad_norm": 1.3617327829527315, + "learning_rate": 1.999036213807811e-05, + "loss": 0.41965895891189575, + "step": 478 + }, + { + "epoch": 0.12720754215907581, + "grad_norm": 0.9525189428273744, + "learning_rate": 1.9990168448802633e-05, + "loss": 0.40055203437805176, + "step": 479 + }, + { + "epoch": 0.12747311114061877, + "grad_norm": 1.0868137290392272, + "learning_rate": 1.99899728335405e-05, + "loss": 0.4266522526741028, + "step": 480 + }, + { + "epoch": 0.12773868012216172, + "grad_norm": 1.028316280940819, + "learning_rate": 1.9989775292329425e-05, + "loss": 0.42291250824928284, + "step": 481 + }, + { + "epoch": 0.1280042491037047, + "grad_norm": 1.0319881226067493, + "learning_rate": 1.9989575825207494e-05, + "loss": 0.41346436738967896, + "step": 482 + }, + { + "epoch": 0.12826981808524765, + "grad_norm": 1.0162482863207583, + "learning_rate": 1.998937443221316e-05, + "loss": 0.4092825651168823, + "step": 483 + }, + { + "epoch": 0.1285353870667906, + "grad_norm": 0.9789070022917183, + "learning_rate": 1.998917111338525e-05, + "loss": 0.39763280749320984, + "step": 484 + }, + { + "epoch": 0.12880095604833355, + "grad_norm": 1.1639998102533433, + "learning_rate": 1.9988965868762956e-05, + "loss": 0.45523273944854736, + "step": 485 + }, + { + "epoch": 0.1290665250298765, + "grad_norm": 0.9737102573843942, + "learning_rate": 1.9988758698385854e-05, + "loss": 0.40181300044059753, + "step": 486 + }, + { + "epoch": 0.12933209401141946, + "grad_norm": 1.0269411713354706, + "learning_rate": 1.9988549602293884e-05, + "loss": 0.42487743496894836, + "step": 487 + }, + { + "epoch": 0.1295976629929624, + "grad_norm": 0.9805378587174307, + "learning_rate": 1.998833858052735e-05, + "loss": 0.41672298312187195, + "step": 488 + }, + { + "epoch": 0.1298632319745054, + "grad_norm": 0.9804335652831319, + "learning_rate": 1.998812563312694e-05, + "loss": 0.36750108003616333, + "step": 489 + }, + { + "epoch": 0.13012880095604834, + "grad_norm": 1.0991024476796578, + "learning_rate": 1.9987910760133712e-05, + "loss": 0.49290573596954346, + "step": 490 + }, + { + "epoch": 0.1303943699375913, + "grad_norm": 0.9956647709409898, + "learning_rate": 1.9987693961589084e-05, + "loss": 0.460039347410202, + "step": 491 + }, + { + "epoch": 0.13065993891913424, + "grad_norm": 1.269757897267166, + "learning_rate": 1.998747523753485e-05, + "loss": 0.4471668303012848, + "step": 492 + }, + { + "epoch": 0.1309255079006772, + "grad_norm": 0.9411513149719377, + "learning_rate": 1.9987254588013184e-05, + "loss": 0.395844966173172, + "step": 493 + }, + { + "epoch": 0.13119107688222015, + "grad_norm": 0.9546844808839872, + "learning_rate": 1.9987032013066623e-05, + "loss": 0.4465745985507965, + "step": 494 + }, + { + "epoch": 0.1314566458637631, + "grad_norm": 1.0929917252775374, + "learning_rate": 1.9986807512738075e-05, + "loss": 0.43123912811279297, + "step": 495 + }, + { + "epoch": 0.13172221484530608, + "grad_norm": 0.9741124155963404, + "learning_rate": 1.9986581087070824e-05, + "loss": 0.40066564083099365, + "step": 496 + }, + { + "epoch": 0.13198778382684903, + "grad_norm": 0.9421948045046618, + "learning_rate": 1.9986352736108515e-05, + "loss": 0.38514643907546997, + "step": 497 + }, + { + "epoch": 0.13225335280839198, + "grad_norm": 0.9713567699891517, + "learning_rate": 1.9986122459895182e-05, + "loss": 0.37397241592407227, + "step": 498 + }, + { + "epoch": 0.13251892178993493, + "grad_norm": 0.9697777712481016, + "learning_rate": 1.9985890258475215e-05, + "loss": 0.44865745306015015, + "step": 499 + }, + { + "epoch": 0.1327844907714779, + "grad_norm": 1.000823551239605, + "learning_rate": 1.9985656131893374e-05, + "loss": 0.4161406457424164, + "step": 500 + }, + { + "epoch": 0.13305005975302084, + "grad_norm": 1.049045844462056, + "learning_rate": 1.9985420080194804e-05, + "loss": 0.41364359855651855, + "step": 501 + }, + { + "epoch": 0.1333156287345638, + "grad_norm": 0.9766347522178017, + "learning_rate": 1.9985182103425007e-05, + "loss": 0.38466009497642517, + "step": 502 + }, + { + "epoch": 0.13358119771610677, + "grad_norm": 0.9820108788569575, + "learning_rate": 1.9984942201629868e-05, + "loss": 0.4189472794532776, + "step": 503 + }, + { + "epoch": 0.13384676669764972, + "grad_norm": 1.0124943582595707, + "learning_rate": 1.998470037485563e-05, + "loss": 0.4088754653930664, + "step": 504 + }, + { + "epoch": 0.13411233567919267, + "grad_norm": 0.9404621165531668, + "learning_rate": 1.9984456623148923e-05, + "loss": 0.4197084307670593, + "step": 505 + }, + { + "epoch": 0.13437790466073563, + "grad_norm": 1.022677047132229, + "learning_rate": 1.998421094655673e-05, + "loss": 0.4318644404411316, + "step": 506 + }, + { + "epoch": 0.13464347364227858, + "grad_norm": 0.9443470782499029, + "learning_rate": 1.9983963345126423e-05, + "loss": 0.38180238008499146, + "step": 507 + }, + { + "epoch": 0.13490904262382153, + "grad_norm": 0.9655473739081939, + "learning_rate": 1.9983713818905733e-05, + "loss": 0.38704103231430054, + "step": 508 + }, + { + "epoch": 0.13517461160536448, + "grad_norm": 1.050357567916831, + "learning_rate": 1.998346236794276e-05, + "loss": 0.4206693768501282, + "step": 509 + }, + { + "epoch": 0.13544018058690746, + "grad_norm": 1.1108901361228778, + "learning_rate": 1.9983208992285993e-05, + "loss": 0.42818987369537354, + "step": 510 + }, + { + "epoch": 0.1357057495684504, + "grad_norm": 1.0771548955106338, + "learning_rate": 1.9982953691984274e-05, + "loss": 0.44592660665512085, + "step": 511 + }, + { + "epoch": 0.13597131854999336, + "grad_norm": 1.006125968429414, + "learning_rate": 1.9982696467086815e-05, + "loss": 0.4272580146789551, + "step": 512 + }, + { + "epoch": 0.13623688753153632, + "grad_norm": 1.084212872761102, + "learning_rate": 1.9982437317643218e-05, + "loss": 0.4416295289993286, + "step": 513 + }, + { + "epoch": 0.13650245651307927, + "grad_norm": 1.1040865905907058, + "learning_rate": 1.998217624370343e-05, + "loss": 0.45108669996261597, + "step": 514 + }, + { + "epoch": 0.13676802549462222, + "grad_norm": 0.9866796372680723, + "learning_rate": 1.9981913245317802e-05, + "loss": 0.40311864018440247, + "step": 515 + }, + { + "epoch": 0.13703359447616517, + "grad_norm": 1.041531014011416, + "learning_rate": 1.9981648322537017e-05, + "loss": 0.4388020932674408, + "step": 516 + }, + { + "epoch": 0.13729916345770815, + "grad_norm": 1.069295153220874, + "learning_rate": 1.9981381475412162e-05, + "loss": 0.42741361260414124, + "step": 517 + }, + { + "epoch": 0.1375647324392511, + "grad_norm": 0.8562984414004653, + "learning_rate": 1.9981112703994677e-05, + "loss": 0.3766555190086365, + "step": 518 + }, + { + "epoch": 0.13783030142079405, + "grad_norm": 0.9297024970383198, + "learning_rate": 1.998084200833638e-05, + "loss": 0.38618308305740356, + "step": 519 + }, + { + "epoch": 0.138095870402337, + "grad_norm": 1.0033450202172107, + "learning_rate": 1.9980569388489457e-05, + "loss": 0.4553264379501343, + "step": 520 + }, + { + "epoch": 0.13836143938387996, + "grad_norm": 1.024202819723292, + "learning_rate": 1.9980294844506468e-05, + "loss": 0.44632673263549805, + "step": 521 + }, + { + "epoch": 0.1386270083654229, + "grad_norm": 1.0907023510727254, + "learning_rate": 1.998001837644033e-05, + "loss": 0.4285067617893219, + "step": 522 + }, + { + "epoch": 0.13889257734696586, + "grad_norm": 0.9721672428790065, + "learning_rate": 1.9979739984344365e-05, + "loss": 0.39360538125038147, + "step": 523 + }, + { + "epoch": 0.13915814632850884, + "grad_norm": 0.9475835393492287, + "learning_rate": 1.9979459668272226e-05, + "loss": 0.4007593095302582, + "step": 524 + }, + { + "epoch": 0.1394237153100518, + "grad_norm": 1.028990364637073, + "learning_rate": 1.9979177428277955e-05, + "loss": 0.40176767110824585, + "step": 525 + }, + { + "epoch": 0.13968928429159475, + "grad_norm": 1.0167293750004343, + "learning_rate": 1.9978893264415978e-05, + "loss": 0.4190528392791748, + "step": 526 + }, + { + "epoch": 0.1399548532731377, + "grad_norm": 0.9871913820335487, + "learning_rate": 1.9978607176741063e-05, + "loss": 0.4139288067817688, + "step": 527 + }, + { + "epoch": 0.14022042225468065, + "grad_norm": 0.8610694360554231, + "learning_rate": 1.9978319165308373e-05, + "loss": 0.3666151463985443, + "step": 528 + }, + { + "epoch": 0.1404859912362236, + "grad_norm": 1.016794526359022, + "learning_rate": 1.997802923017343e-05, + "loss": 0.44621142745018005, + "step": 529 + }, + { + "epoch": 0.14075156021776655, + "grad_norm": 0.9742602007181285, + "learning_rate": 1.9977737371392134e-05, + "loss": 0.4162977635860443, + "step": 530 + }, + { + "epoch": 0.14101712919930953, + "grad_norm": 1.0386051117102446, + "learning_rate": 1.997744358902075e-05, + "loss": 0.438882052898407, + "step": 531 + }, + { + "epoch": 0.14128269818085248, + "grad_norm": 0.9131334625730753, + "learning_rate": 1.997714788311591e-05, + "loss": 0.43381333351135254, + "step": 532 + }, + { + "epoch": 0.14154826716239544, + "grad_norm": 1.0341262373297713, + "learning_rate": 1.9976850253734633e-05, + "loss": 0.41925039887428284, + "step": 533 + }, + { + "epoch": 0.1418138361439384, + "grad_norm": 1.0366031704059997, + "learning_rate": 1.997655070093429e-05, + "loss": 0.40469998121261597, + "step": 534 + }, + { + "epoch": 0.14207940512548134, + "grad_norm": 1.069653848503876, + "learning_rate": 1.9976249224772638e-05, + "loss": 0.4252749979496002, + "step": 535 + }, + { + "epoch": 0.1423449741070243, + "grad_norm": 0.9131599330211423, + "learning_rate": 1.9975945825307788e-05, + "loss": 0.42437341809272766, + "step": 536 + }, + { + "epoch": 0.14261054308856724, + "grad_norm": 0.9295944144104017, + "learning_rate": 1.9975640502598243e-05, + "loss": 0.3435184955596924, + "step": 537 + }, + { + "epoch": 0.14287611207011022, + "grad_norm": 1.135805935036872, + "learning_rate": 1.9975333256702864e-05, + "loss": 0.4677535593509674, + "step": 538 + }, + { + "epoch": 0.14314168105165317, + "grad_norm": 0.9857610455714647, + "learning_rate": 1.9975024087680873e-05, + "loss": 0.3860551118850708, + "step": 539 + }, + { + "epoch": 0.14340725003319613, + "grad_norm": 1.0260051612127887, + "learning_rate": 1.9974712995591887e-05, + "loss": 0.4067271649837494, + "step": 540 + }, + { + "epoch": 0.14367281901473908, + "grad_norm": 1.0673102525592195, + "learning_rate": 1.9974399980495877e-05, + "loss": 0.42236536741256714, + "step": 541 + }, + { + "epoch": 0.14393838799628203, + "grad_norm": 0.9825710114440017, + "learning_rate": 1.9974085042453188e-05, + "loss": 0.45230624079704285, + "step": 542 + }, + { + "epoch": 0.14420395697782498, + "grad_norm": 1.0223761508252163, + "learning_rate": 1.997376818152453e-05, + "loss": 0.428194522857666, + "step": 543 + }, + { + "epoch": 0.14446952595936793, + "grad_norm": 1.0337438279048081, + "learning_rate": 1.9973449397771004e-05, + "loss": 0.40774789452552795, + "step": 544 + }, + { + "epoch": 0.1447350949409109, + "grad_norm": 0.9168779980285519, + "learning_rate": 1.9973128691254054e-05, + "loss": 0.4086815118789673, + "step": 545 + }, + { + "epoch": 0.14500066392245387, + "grad_norm": 0.9934439062572693, + "learning_rate": 1.997280606203552e-05, + "loss": 0.4045162796974182, + "step": 546 + }, + { + "epoch": 0.14526623290399682, + "grad_norm": 1.0110955437735047, + "learning_rate": 1.9972481510177594e-05, + "loss": 0.40463268756866455, + "step": 547 + }, + { + "epoch": 0.14553180188553977, + "grad_norm": 1.0029896014566093, + "learning_rate": 1.9972155035742847e-05, + "loss": 0.46733587980270386, + "step": 548 + }, + { + "epoch": 0.14579737086708272, + "grad_norm": 0.9683751197048177, + "learning_rate": 1.997182663879422e-05, + "loss": 0.45210930705070496, + "step": 549 + }, + { + "epoch": 0.14606293984862567, + "grad_norm": 0.9559484778346481, + "learning_rate": 1.9971496319395022e-05, + "loss": 0.39798587560653687, + "step": 550 + }, + { + "epoch": 0.14632850883016862, + "grad_norm": 1.0582410708312875, + "learning_rate": 1.9971164077608937e-05, + "loss": 0.4166080057621002, + "step": 551 + }, + { + "epoch": 0.1465940778117116, + "grad_norm": 0.99705391441119, + "learning_rate": 1.9970829913500017e-05, + "loss": 0.3995435833930969, + "step": 552 + }, + { + "epoch": 0.14685964679325456, + "grad_norm": 0.9693599664680953, + "learning_rate": 1.9970493827132686e-05, + "loss": 0.39335039258003235, + "step": 553 + }, + { + "epoch": 0.1471252157747975, + "grad_norm": 1.0653128556742777, + "learning_rate": 1.9970155818571733e-05, + "loss": 0.3923008441925049, + "step": 554 + }, + { + "epoch": 0.14739078475634046, + "grad_norm": 1.1000528384874784, + "learning_rate": 1.996981588788233e-05, + "loss": 0.42148759961128235, + "step": 555 + }, + { + "epoch": 0.1476563537378834, + "grad_norm": 0.9532704289154984, + "learning_rate": 1.9969474035130005e-05, + "loss": 0.36099517345428467, + "step": 556 + }, + { + "epoch": 0.14792192271942636, + "grad_norm": 0.9498609858415961, + "learning_rate": 1.9969130260380663e-05, + "loss": 0.39650559425354004, + "step": 557 + }, + { + "epoch": 0.14818749170096931, + "grad_norm": 0.9667452630427784, + "learning_rate": 1.9968784563700586e-05, + "loss": 0.36410078406333923, + "step": 558 + }, + { + "epoch": 0.1484530606825123, + "grad_norm": 1.002419821858965, + "learning_rate": 1.996843694515641e-05, + "loss": 0.41312888264656067, + "step": 559 + }, + { + "epoch": 0.14871862966405525, + "grad_norm": 1.1088153047335336, + "learning_rate": 1.9968087404815162e-05, + "loss": 0.3895263373851776, + "step": 560 + }, + { + "epoch": 0.1489841986455982, + "grad_norm": 1.2422388501205763, + "learning_rate": 1.9967735942744226e-05, + "loss": 0.4400597810745239, + "step": 561 + }, + { + "epoch": 0.14924976762714115, + "grad_norm": 1.1300700300497077, + "learning_rate": 1.9967382559011356e-05, + "loss": 0.36712852120399475, + "step": 562 + }, + { + "epoch": 0.1495153366086841, + "grad_norm": 1.0425502358891738, + "learning_rate": 1.9967027253684685e-05, + "loss": 0.4043564200401306, + "step": 563 + }, + { + "epoch": 0.14978090559022705, + "grad_norm": 1.101160625764444, + "learning_rate": 1.9966670026832707e-05, + "loss": 0.45233044028282166, + "step": 564 + }, + { + "epoch": 0.15004647457177, + "grad_norm": 1.3277254520379258, + "learning_rate": 1.9966310878524297e-05, + "loss": 0.441600501537323, + "step": 565 + }, + { + "epoch": 0.15031204355331299, + "grad_norm": 1.0833095900878238, + "learning_rate": 1.9965949808828687e-05, + "loss": 0.4268038868904114, + "step": 566 + }, + { + "epoch": 0.15057761253485594, + "grad_norm": 1.1492448156590855, + "learning_rate": 1.9965586817815494e-05, + "loss": 0.41927874088287354, + "step": 567 + }, + { + "epoch": 0.1508431815163989, + "grad_norm": 1.026170307581087, + "learning_rate": 1.9965221905554695e-05, + "loss": 0.41488781571388245, + "step": 568 + }, + { + "epoch": 0.15110875049794184, + "grad_norm": 0.9559142330236491, + "learning_rate": 1.9964855072116642e-05, + "loss": 0.3624749779701233, + "step": 569 + }, + { + "epoch": 0.1513743194794848, + "grad_norm": 1.254830306735622, + "learning_rate": 1.996448631757206e-05, + "loss": 0.45119866728782654, + "step": 570 + }, + { + "epoch": 0.15163988846102774, + "grad_norm": 1.095837461898702, + "learning_rate": 1.996411564199203e-05, + "loss": 0.41389739513397217, + "step": 571 + }, + { + "epoch": 0.1519054574425707, + "grad_norm": 0.9684460814064966, + "learning_rate": 1.996374304544802e-05, + "loss": 0.3640916347503662, + "step": 572 + }, + { + "epoch": 0.15217102642411368, + "grad_norm": 1.0711015344753547, + "learning_rate": 1.9963368528011867e-05, + "loss": 0.45648565888404846, + "step": 573 + }, + { + "epoch": 0.15243659540565663, + "grad_norm": 0.9722794055909949, + "learning_rate": 1.9962992089755765e-05, + "loss": 0.4335980713367462, + "step": 574 + }, + { + "epoch": 0.15270216438719958, + "grad_norm": 1.158400874054287, + "learning_rate": 1.996261373075229e-05, + "loss": 0.3908158540725708, + "step": 575 + }, + { + "epoch": 0.15296773336874253, + "grad_norm": 0.9311953954584888, + "learning_rate": 1.996223345107439e-05, + "loss": 0.36533305048942566, + "step": 576 + }, + { + "epoch": 0.15323330235028548, + "grad_norm": 0.9771467412652409, + "learning_rate": 1.9961851250795372e-05, + "loss": 0.407212495803833, + "step": 577 + }, + { + "epoch": 0.15349887133182843, + "grad_norm": 0.9988499065644934, + "learning_rate": 1.996146712998892e-05, + "loss": 0.4266315698623657, + "step": 578 + }, + { + "epoch": 0.1537644403133714, + "grad_norm": 0.9843108485081927, + "learning_rate": 1.9961081088729092e-05, + "loss": 0.3806581199169159, + "step": 579 + }, + { + "epoch": 0.15403000929491437, + "grad_norm": 0.9497423806639163, + "learning_rate": 1.9960693127090312e-05, + "loss": 0.40962716937065125, + "step": 580 + }, + { + "epoch": 0.15429557827645732, + "grad_norm": 0.94680923059909, + "learning_rate": 1.996030324514737e-05, + "loss": 0.4195394515991211, + "step": 581 + }, + { + "epoch": 0.15456114725800027, + "grad_norm": 1.0211843119224446, + "learning_rate": 1.995991144297543e-05, + "loss": 0.4366803765296936, + "step": 582 + }, + { + "epoch": 0.15482671623954322, + "grad_norm": 1.1779341722116263, + "learning_rate": 1.995951772065004e-05, + "loss": 0.44951680302619934, + "step": 583 + }, + { + "epoch": 0.15509228522108617, + "grad_norm": 1.1165714790353467, + "learning_rate": 1.9959122078247088e-05, + "loss": 0.42920851707458496, + "step": 584 + }, + { + "epoch": 0.15535785420262913, + "grad_norm": 1.3260467831670406, + "learning_rate": 1.9958724515842856e-05, + "loss": 0.3805098533630371, + "step": 585 + }, + { + "epoch": 0.15562342318417208, + "grad_norm": 1.1544212798945541, + "learning_rate": 1.995832503351399e-05, + "loss": 0.439333438873291, + "step": 586 + }, + { + "epoch": 0.15588899216571506, + "grad_norm": 0.9414235863159184, + "learning_rate": 1.9957923631337505e-05, + "loss": 0.38338547945022583, + "step": 587 + }, + { + "epoch": 0.156154561147258, + "grad_norm": 0.9711288321476074, + "learning_rate": 1.9957520309390786e-05, + "loss": 0.40603697299957275, + "step": 588 + }, + { + "epoch": 0.15642013012880096, + "grad_norm": 0.9468286962292546, + "learning_rate": 1.9957115067751594e-05, + "loss": 0.42816999554634094, + "step": 589 + }, + { + "epoch": 0.1566856991103439, + "grad_norm": 0.979497417166178, + "learning_rate": 1.9956707906498046e-05, + "loss": 0.42367884516716003, + "step": 590 + }, + { + "epoch": 0.15695126809188686, + "grad_norm": 1.1158588594509518, + "learning_rate": 1.995629882570864e-05, + "loss": 0.4349297881126404, + "step": 591 + }, + { + "epoch": 0.15721683707342982, + "grad_norm": 0.9762108745852242, + "learning_rate": 1.995588782546225e-05, + "loss": 0.37990960478782654, + "step": 592 + }, + { + "epoch": 0.15748240605497277, + "grad_norm": 0.9495653219493333, + "learning_rate": 1.9955474905838102e-05, + "loss": 0.4085468649864197, + "step": 593 + }, + { + "epoch": 0.15774797503651575, + "grad_norm": 0.9419429879365407, + "learning_rate": 1.995506006691581e-05, + "loss": 0.41362464427948, + "step": 594 + }, + { + "epoch": 0.1580135440180587, + "grad_norm": 1.002559702640921, + "learning_rate": 1.9954643308775342e-05, + "loss": 0.3830018937587738, + "step": 595 + }, + { + "epoch": 0.15827911299960165, + "grad_norm": 1.1505182326275074, + "learning_rate": 1.995422463149705e-05, + "loss": 0.48350822925567627, + "step": 596 + }, + { + "epoch": 0.1585446819811446, + "grad_norm": 0.9889824166630486, + "learning_rate": 1.995380403516165e-05, + "loss": 0.4215185344219208, + "step": 597 + }, + { + "epoch": 0.15881025096268755, + "grad_norm": 1.06826056700577, + "learning_rate": 1.9953381519850224e-05, + "loss": 0.42061948776245117, + "step": 598 + }, + { + "epoch": 0.1590758199442305, + "grad_norm": 1.032451381790901, + "learning_rate": 1.995295708564423e-05, + "loss": 0.38956254720687866, + "step": 599 + }, + { + "epoch": 0.15934138892577346, + "grad_norm": 1.0492553607775368, + "learning_rate": 1.9952530732625492e-05, + "loss": 0.3864685893058777, + "step": 600 + }, + { + "epoch": 0.15960695790731644, + "grad_norm": 0.9770856461072062, + "learning_rate": 1.9952102460876214e-05, + "loss": 0.395724356174469, + "step": 601 + }, + { + "epoch": 0.1598725268888594, + "grad_norm": 1.04245602393598, + "learning_rate": 1.995167227047895e-05, + "loss": 0.4220300316810608, + "step": 602 + }, + { + "epoch": 0.16013809587040234, + "grad_norm": 1.1406615370546667, + "learning_rate": 1.9951240161516643e-05, + "loss": 0.4129142165184021, + "step": 603 + }, + { + "epoch": 0.1604036648519453, + "grad_norm": 0.983753356740355, + "learning_rate": 1.9950806134072595e-05, + "loss": 0.3951375484466553, + "step": 604 + }, + { + "epoch": 0.16066923383348825, + "grad_norm": 1.0214548083454909, + "learning_rate": 1.9950370188230486e-05, + "loss": 0.4117582142353058, + "step": 605 + }, + { + "epoch": 0.1609348028150312, + "grad_norm": 1.0340746201961049, + "learning_rate": 1.994993232407436e-05, + "loss": 0.3920668363571167, + "step": 606 + }, + { + "epoch": 0.16120037179657415, + "grad_norm": 0.9768399206450091, + "learning_rate": 1.9949492541688626e-05, + "loss": 0.3756999373435974, + "step": 607 + }, + { + "epoch": 0.16146594077811713, + "grad_norm": 1.0034054922110034, + "learning_rate": 1.9949050841158078e-05, + "loss": 0.41009610891342163, + "step": 608 + }, + { + "epoch": 0.16173150975966008, + "grad_norm": 0.9847346075479474, + "learning_rate": 1.994860722256786e-05, + "loss": 0.3986571729183197, + "step": 609 + }, + { + "epoch": 0.16199707874120303, + "grad_norm": 0.9978440495541314, + "learning_rate": 1.994816168600351e-05, + "loss": 0.3903341591358185, + "step": 610 + }, + { + "epoch": 0.16226264772274598, + "grad_norm": 0.9992231775305654, + "learning_rate": 1.994771423155091e-05, + "loss": 0.39725261926651, + "step": 611 + }, + { + "epoch": 0.16252821670428894, + "grad_norm": 0.9446936558476315, + "learning_rate": 1.994726485929633e-05, + "loss": 0.39461129903793335, + "step": 612 + }, + { + "epoch": 0.1627937856858319, + "grad_norm": 1.0162077284831286, + "learning_rate": 1.99468135693264e-05, + "loss": 0.41346144676208496, + "step": 613 + }, + { + "epoch": 0.16305935466737484, + "grad_norm": 1.0305116850266922, + "learning_rate": 1.9946360361728127e-05, + "loss": 0.41148197650909424, + "step": 614 + }, + { + "epoch": 0.16332492364891782, + "grad_norm": 0.9678436330540818, + "learning_rate": 1.9945905236588884e-05, + "loss": 0.38204139471054077, + "step": 615 + }, + { + "epoch": 0.16359049263046077, + "grad_norm": 0.9830320911733957, + "learning_rate": 1.9945448193996412e-05, + "loss": 0.41496896743774414, + "step": 616 + }, + { + "epoch": 0.16385606161200372, + "grad_norm": 0.9327494941136337, + "learning_rate": 1.994498923403882e-05, + "loss": 0.38998982310295105, + "step": 617 + }, + { + "epoch": 0.16412163059354667, + "grad_norm": 1.0310759290486786, + "learning_rate": 1.99445283568046e-05, + "loss": 0.39018991589546204, + "step": 618 + }, + { + "epoch": 0.16438719957508963, + "grad_norm": 1.1133251353738367, + "learning_rate": 1.9944065562382594e-05, + "loss": 0.41579991579055786, + "step": 619 + }, + { + "epoch": 0.16465276855663258, + "grad_norm": 1.1413714641323347, + "learning_rate": 1.9943600850862027e-05, + "loss": 0.426283061504364, + "step": 620 + }, + { + "epoch": 0.16491833753817553, + "grad_norm": 1.0537239280428552, + "learning_rate": 1.9943134222332493e-05, + "loss": 0.418672651052475, + "step": 621 + }, + { + "epoch": 0.1651839065197185, + "grad_norm": 1.0177048245128393, + "learning_rate": 1.9942665676883946e-05, + "loss": 0.4014776349067688, + "step": 622 + }, + { + "epoch": 0.16544947550126146, + "grad_norm": 0.9703989792649265, + "learning_rate": 1.994219521460672e-05, + "loss": 0.3714776933193207, + "step": 623 + }, + { + "epoch": 0.1657150444828044, + "grad_norm": 1.005321267739283, + "learning_rate": 1.9941722835591514e-05, + "loss": 0.39415785670280457, + "step": 624 + }, + { + "epoch": 0.16598061346434737, + "grad_norm": 1.739817458909074, + "learning_rate": 1.9941248539929395e-05, + "loss": 0.3706223964691162, + "step": 625 + }, + { + "epoch": 0.16624618244589032, + "grad_norm": 0.9887487099192142, + "learning_rate": 1.9940772327711807e-05, + "loss": 0.4167429208755493, + "step": 626 + }, + { + "epoch": 0.16651175142743327, + "grad_norm": 1.0502993213264278, + "learning_rate": 1.9940294199030553e-05, + "loss": 0.38234227895736694, + "step": 627 + }, + { + "epoch": 0.16677732040897622, + "grad_norm": 0.9929957655695576, + "learning_rate": 1.9939814153977813e-05, + "loss": 0.4139519929885864, + "step": 628 + }, + { + "epoch": 0.1670428893905192, + "grad_norm": 1.0428716869119874, + "learning_rate": 1.9939332192646136e-05, + "loss": 0.44490402936935425, + "step": 629 + }, + { + "epoch": 0.16730845837206215, + "grad_norm": 0.9723220719956404, + "learning_rate": 1.993884831512843e-05, + "loss": 0.3870658278465271, + "step": 630 + }, + { + "epoch": 0.1675740273536051, + "grad_norm": 0.9337218443909966, + "learning_rate": 1.993836252151799e-05, + "loss": 0.3308948278427124, + "step": 631 + }, + { + "epoch": 0.16783959633514806, + "grad_norm": 1.1119638169858157, + "learning_rate": 1.993787481190847e-05, + "loss": 0.3727487623691559, + "step": 632 + }, + { + "epoch": 0.168105165316691, + "grad_norm": 1.0025380900585623, + "learning_rate": 1.9937385186393888e-05, + "loss": 0.4277465343475342, + "step": 633 + }, + { + "epoch": 0.16837073429823396, + "grad_norm": 1.2120120873899203, + "learning_rate": 1.9936893645068647e-05, + "loss": 0.4276485741138458, + "step": 634 + }, + { + "epoch": 0.1686363032797769, + "grad_norm": 1.000070161461063, + "learning_rate": 1.9936400188027502e-05, + "loss": 0.374578058719635, + "step": 635 + }, + { + "epoch": 0.1689018722613199, + "grad_norm": 1.113556890943216, + "learning_rate": 1.993590481536559e-05, + "loss": 0.4583400785923004, + "step": 636 + }, + { + "epoch": 0.16916744124286284, + "grad_norm": 0.9731147624235688, + "learning_rate": 1.9935407527178417e-05, + "loss": 0.3734489679336548, + "step": 637 + }, + { + "epoch": 0.1694330102244058, + "grad_norm": 1.0110441212525507, + "learning_rate": 1.9934908323561846e-05, + "loss": 0.39524513483047485, + "step": 638 + }, + { + "epoch": 0.16969857920594875, + "grad_norm": 1.0264447655460065, + "learning_rate": 1.9934407204612124e-05, + "loss": 0.42300352454185486, + "step": 639 + }, + { + "epoch": 0.1699641481874917, + "grad_norm": 0.9950374891978715, + "learning_rate": 1.9933904170425858e-05, + "loss": 0.4152276813983917, + "step": 640 + }, + { + "epoch": 0.17022971716903465, + "grad_norm": 1.230783330329369, + "learning_rate": 1.9933399221100026e-05, + "loss": 0.43046653270721436, + "step": 641 + }, + { + "epoch": 0.1704952861505776, + "grad_norm": 1.0095783418631343, + "learning_rate": 1.993289235673198e-05, + "loss": 0.4134339392185211, + "step": 642 + }, + { + "epoch": 0.17076085513212058, + "grad_norm": 1.0051407398693462, + "learning_rate": 1.9932383577419432e-05, + "loss": 0.44028693437576294, + "step": 643 + }, + { + "epoch": 0.17102642411366353, + "grad_norm": 1.0208746920457954, + "learning_rate": 1.9931872883260473e-05, + "loss": 0.3790222704410553, + "step": 644 + }, + { + "epoch": 0.17129199309520649, + "grad_norm": 1.041462978505965, + "learning_rate": 1.9931360274353556e-05, + "loss": 0.3683086633682251, + "step": 645 + }, + { + "epoch": 0.17155756207674944, + "grad_norm": 1.0400069352454702, + "learning_rate": 1.993084575079751e-05, + "loss": 0.3630594313144684, + "step": 646 + }, + { + "epoch": 0.1718231310582924, + "grad_norm": 1.0694046561659416, + "learning_rate": 1.993032931269153e-05, + "loss": 0.4398641884326935, + "step": 647 + }, + { + "epoch": 0.17208870003983534, + "grad_norm": 1.107156801944608, + "learning_rate": 1.992981096013517e-05, + "loss": 0.42222845554351807, + "step": 648 + }, + { + "epoch": 0.1723542690213783, + "grad_norm": 1.043160064840446, + "learning_rate": 1.992929069322837e-05, + "loss": 0.38966643810272217, + "step": 649 + }, + { + "epoch": 0.17261983800292127, + "grad_norm": 1.0607803195691352, + "learning_rate": 1.992876851207143e-05, + "loss": 0.4394804835319519, + "step": 650 + }, + { + "epoch": 0.17288540698446422, + "grad_norm": 0.9714467718451273, + "learning_rate": 1.9928244416765022e-05, + "loss": 0.3475287854671478, + "step": 651 + }, + { + "epoch": 0.17315097596600718, + "grad_norm": 0.9848879046616053, + "learning_rate": 1.992771840741018e-05, + "loss": 0.40047168731689453, + "step": 652 + }, + { + "epoch": 0.17341654494755013, + "grad_norm": 1.0744593937096147, + "learning_rate": 1.9927190484108315e-05, + "loss": 0.4028981328010559, + "step": 653 + }, + { + "epoch": 0.17368211392909308, + "grad_norm": 1.010491020672817, + "learning_rate": 1.9926660646961208e-05, + "loss": 0.3891482949256897, + "step": 654 + }, + { + "epoch": 0.17394768291063603, + "grad_norm": 1.1163232689680433, + "learning_rate": 1.9926128896071e-05, + "loss": 0.4570680856704712, + "step": 655 + }, + { + "epoch": 0.17421325189217898, + "grad_norm": 0.9509061944047602, + "learning_rate": 1.992559523154021e-05, + "loss": 0.392758309841156, + "step": 656 + }, + { + "epoch": 0.17447882087372196, + "grad_norm": 0.9648168194829144, + "learning_rate": 1.992505965347172e-05, + "loss": 0.39552047848701477, + "step": 657 + }, + { + "epoch": 0.17474438985526491, + "grad_norm": 1.045434666464082, + "learning_rate": 1.992452216196879e-05, + "loss": 0.4412619173526764, + "step": 658 + }, + { + "epoch": 0.17500995883680787, + "grad_norm": 1.033655605856329, + "learning_rate": 1.9923982757135028e-05, + "loss": 0.4075942635536194, + "step": 659 + }, + { + "epoch": 0.17527552781835082, + "grad_norm": 1.0660210414475448, + "learning_rate": 1.9923441439074434e-05, + "loss": 0.44615018367767334, + "step": 660 + }, + { + "epoch": 0.17554109679989377, + "grad_norm": 0.9504988883268379, + "learning_rate": 1.992289820789137e-05, + "loss": 0.3957441449165344, + "step": 661 + }, + { + "epoch": 0.17580666578143672, + "grad_norm": 0.9513339400965243, + "learning_rate": 1.992235306369056e-05, + "loss": 0.4014820158481598, + "step": 662 + }, + { + "epoch": 0.17607223476297967, + "grad_norm": 0.9988043316582222, + "learning_rate": 1.9921806006577102e-05, + "loss": 0.39478158950805664, + "step": 663 + }, + { + "epoch": 0.17633780374452265, + "grad_norm": 1.0278124558587338, + "learning_rate": 1.9921257036656463e-05, + "loss": 0.45742082595825195, + "step": 664 + }, + { + "epoch": 0.1766033727260656, + "grad_norm": 0.9674516471555401, + "learning_rate": 1.9920706154034477e-05, + "loss": 0.36519041657447815, + "step": 665 + }, + { + "epoch": 0.17686894170760856, + "grad_norm": 1.0086354363577679, + "learning_rate": 1.992015335881735e-05, + "loss": 0.40599358081817627, + "step": 666 + }, + { + "epoch": 0.1771345106891515, + "grad_norm": 0.958585892866014, + "learning_rate": 1.991959865111165e-05, + "loss": 0.4064781367778778, + "step": 667 + }, + { + "epoch": 0.17740007967069446, + "grad_norm": 0.9430583774727941, + "learning_rate": 1.991904203102432e-05, + "loss": 0.4076484143733978, + "step": 668 + }, + { + "epoch": 0.1776656486522374, + "grad_norm": 1.1044553051326549, + "learning_rate": 1.9918483498662678e-05, + "loss": 0.42157143354415894, + "step": 669 + }, + { + "epoch": 0.17793121763378036, + "grad_norm": 1.005923050768092, + "learning_rate": 1.9917923054134388e-05, + "loss": 0.3814900517463684, + "step": 670 + }, + { + "epoch": 0.17819678661532334, + "grad_norm": 1.0156953904207233, + "learning_rate": 1.9917360697547506e-05, + "loss": 0.4211175739765167, + "step": 671 + }, + { + "epoch": 0.1784623555968663, + "grad_norm": 1.0530805044024834, + "learning_rate": 1.991679642901045e-05, + "loss": 0.3975893259048462, + "step": 672 + }, + { + "epoch": 0.17872792457840925, + "grad_norm": 0.9633270935214763, + "learning_rate": 1.9916230248631993e-05, + "loss": 0.36090826988220215, + "step": 673 + }, + { + "epoch": 0.1789934935599522, + "grad_norm": 0.9408638333666679, + "learning_rate": 1.99156621565213e-05, + "loss": 0.36511334776878357, + "step": 674 + }, + { + "epoch": 0.17925906254149515, + "grad_norm": 1.0839117569759185, + "learning_rate": 1.9915092152787888e-05, + "loss": 0.4131924510002136, + "step": 675 + }, + { + "epoch": 0.1795246315230381, + "grad_norm": 1.1407281463751517, + "learning_rate": 1.9914520237541644e-05, + "loss": 0.4283728301525116, + "step": 676 + }, + { + "epoch": 0.17979020050458105, + "grad_norm": 0.9751873028047018, + "learning_rate": 1.991394641089283e-05, + "loss": 0.3855544924736023, + "step": 677 + }, + { + "epoch": 0.18005576948612403, + "grad_norm": 1.3517309919327671, + "learning_rate": 1.9913370672952074e-05, + "loss": 0.41288501024246216, + "step": 678 + }, + { + "epoch": 0.180321338467667, + "grad_norm": 1.1127679640996702, + "learning_rate": 1.9912793023830365e-05, + "loss": 0.3824073076248169, + "step": 679 + }, + { + "epoch": 0.18058690744920994, + "grad_norm": 1.0055812841256684, + "learning_rate": 1.9912213463639077e-05, + "loss": 0.39005106687545776, + "step": 680 + }, + { + "epoch": 0.1808524764307529, + "grad_norm": 1.0115332151563563, + "learning_rate": 1.9911631992489933e-05, + "loss": 0.3521374464035034, + "step": 681 + }, + { + "epoch": 0.18111804541229584, + "grad_norm": 0.983790464571211, + "learning_rate": 1.9911048610495037e-05, + "loss": 0.337347149848938, + "step": 682 + }, + { + "epoch": 0.1813836143938388, + "grad_norm": 1.1534370397304132, + "learning_rate": 1.9910463317766864e-05, + "loss": 0.4349983334541321, + "step": 683 + }, + { + "epoch": 0.18164918337538175, + "grad_norm": 1.059114838428009, + "learning_rate": 1.9909876114418242e-05, + "loss": 0.3783540427684784, + "step": 684 + }, + { + "epoch": 0.18191475235692472, + "grad_norm": 1.0050293498117582, + "learning_rate": 1.9909287000562383e-05, + "loss": 0.4065130054950714, + "step": 685 + }, + { + "epoch": 0.18218032133846768, + "grad_norm": 1.0122618604087057, + "learning_rate": 1.990869597631286e-05, + "loss": 0.3876315653324127, + "step": 686 + }, + { + "epoch": 0.18244589032001063, + "grad_norm": 0.9622962910168786, + "learning_rate": 1.9908103041783615e-05, + "loss": 0.3716024160385132, + "step": 687 + }, + { + "epoch": 0.18271145930155358, + "grad_norm": 1.086778230300176, + "learning_rate": 1.990750819708896e-05, + "loss": 0.4096733331680298, + "step": 688 + }, + { + "epoch": 0.18297702828309653, + "grad_norm": 1.131269280292305, + "learning_rate": 1.9906911442343567e-05, + "loss": 0.41432395577430725, + "step": 689 + }, + { + "epoch": 0.18324259726463948, + "grad_norm": 1.1182736792418642, + "learning_rate": 1.9906312777662493e-05, + "loss": 0.3934200406074524, + "step": 690 + }, + { + "epoch": 0.18350816624618244, + "grad_norm": 1.0493015785833109, + "learning_rate": 1.9905712203161148e-05, + "loss": 0.4246784746646881, + "step": 691 + }, + { + "epoch": 0.18377373522772542, + "grad_norm": 1.1362836227785695, + "learning_rate": 1.9905109718955323e-05, + "loss": 0.40027567744255066, + "step": 692 + }, + { + "epoch": 0.18403930420926837, + "grad_norm": 1.056262242708622, + "learning_rate": 1.990450532516116e-05, + "loss": 0.4162583351135254, + "step": 693 + }, + { + "epoch": 0.18430487319081132, + "grad_norm": 1.05760814074371, + "learning_rate": 1.990389902189518e-05, + "loss": 0.4133074879646301, + "step": 694 + }, + { + "epoch": 0.18457044217235427, + "grad_norm": 1.0438921885629904, + "learning_rate": 1.9903290809274277e-05, + "loss": 0.333192378282547, + "step": 695 + }, + { + "epoch": 0.18483601115389722, + "grad_norm": 0.9814281867123515, + "learning_rate": 1.9902680687415704e-05, + "loss": 0.39349496364593506, + "step": 696 + }, + { + "epoch": 0.18510158013544017, + "grad_norm": 1.0366332083029342, + "learning_rate": 1.9902068656437086e-05, + "loss": 0.39678412675857544, + "step": 697 + }, + { + "epoch": 0.18536714911698313, + "grad_norm": 1.0003960978434148, + "learning_rate": 1.9901454716456415e-05, + "loss": 0.3553932011127472, + "step": 698 + }, + { + "epoch": 0.18563271809852608, + "grad_norm": 1.0876315802223169, + "learning_rate": 1.990083886759205e-05, + "loss": 0.4264630079269409, + "step": 699 + }, + { + "epoch": 0.18589828708006906, + "grad_norm": 1.0135520655053032, + "learning_rate": 1.9900221109962726e-05, + "loss": 0.3883950412273407, + "step": 700 + }, + { + "epoch": 0.186163856061612, + "grad_norm": 1.0408639715408188, + "learning_rate": 1.989960144368753e-05, + "loss": 0.38465407490730286, + "step": 701 + }, + { + "epoch": 0.18642942504315496, + "grad_norm": 2.2198594223984065, + "learning_rate": 1.9898979868885933e-05, + "loss": 0.39897871017456055, + "step": 702 + }, + { + "epoch": 0.1866949940246979, + "grad_norm": 1.120873004114704, + "learning_rate": 1.9898356385677762e-05, + "loss": 0.4386023283004761, + "step": 703 + }, + { + "epoch": 0.18696056300624087, + "grad_norm": 1.0254606123190075, + "learning_rate": 1.989773099418322e-05, + "loss": 0.42621874809265137, + "step": 704 + }, + { + "epoch": 0.18722613198778382, + "grad_norm": 1.0153284696458207, + "learning_rate": 1.9897103694522877e-05, + "loss": 0.3811546266078949, + "step": 705 + }, + { + "epoch": 0.18749170096932677, + "grad_norm": 1.0634877610237485, + "learning_rate": 1.989647448681767e-05, + "loss": 0.4018982946872711, + "step": 706 + }, + { + "epoch": 0.18775726995086975, + "grad_norm": 1.0316038713106725, + "learning_rate": 1.9895843371188897e-05, + "loss": 0.3920126259326935, + "step": 707 + }, + { + "epoch": 0.1880228389324127, + "grad_norm": 0.9767495366810068, + "learning_rate": 1.9895210347758233e-05, + "loss": 0.3598487973213196, + "step": 708 + }, + { + "epoch": 0.18828840791395565, + "grad_norm": 1.0286682270198635, + "learning_rate": 1.9894575416647717e-05, + "loss": 0.4204316735267639, + "step": 709 + }, + { + "epoch": 0.1885539768954986, + "grad_norm": 0.9653709480495668, + "learning_rate": 1.9893938577979755e-05, + "loss": 0.33814263343811035, + "step": 710 + }, + { + "epoch": 0.18881954587704156, + "grad_norm": 0.9588770367914977, + "learning_rate": 1.9893299831877124e-05, + "loss": 0.3788227140903473, + "step": 711 + }, + { + "epoch": 0.1890851148585845, + "grad_norm": 0.9974371582936609, + "learning_rate": 1.989265917846297e-05, + "loss": 0.38141176104545593, + "step": 712 + }, + { + "epoch": 0.18935068384012746, + "grad_norm": 1.0051109402301954, + "learning_rate": 1.9892016617860793e-05, + "loss": 0.3757280707359314, + "step": 713 + }, + { + "epoch": 0.18961625282167044, + "grad_norm": 0.9863956856856875, + "learning_rate": 1.989137215019448e-05, + "loss": 0.37819087505340576, + "step": 714 + }, + { + "epoch": 0.1898818218032134, + "grad_norm": 1.1797000402703188, + "learning_rate": 1.9890725775588277e-05, + "loss": 0.46046000719070435, + "step": 715 + }, + { + "epoch": 0.19014739078475634, + "grad_norm": 0.9967163493181064, + "learning_rate": 1.9890077494166792e-05, + "loss": 0.33967363834381104, + "step": 716 + }, + { + "epoch": 0.1904129597662993, + "grad_norm": 0.9620841339155507, + "learning_rate": 1.988942730605501e-05, + "loss": 0.36672675609588623, + "step": 717 + }, + { + "epoch": 0.19067852874784225, + "grad_norm": 1.0666183498740949, + "learning_rate": 1.9888775211378278e-05, + "loss": 0.38705015182495117, + "step": 718 + }, + { + "epoch": 0.1909440977293852, + "grad_norm": 1.0696051052523068, + "learning_rate": 1.9888121210262313e-05, + "loss": 0.35257095098495483, + "step": 719 + }, + { + "epoch": 0.19120966671092815, + "grad_norm": 1.0337108803934987, + "learning_rate": 1.9887465302833194e-05, + "loss": 0.3803965449333191, + "step": 720 + }, + { + "epoch": 0.19147523569247113, + "grad_norm": 1.0097965015220993, + "learning_rate": 1.988680748921738e-05, + "loss": 0.38166487216949463, + "step": 721 + }, + { + "epoch": 0.19174080467401408, + "grad_norm": 0.971159209120872, + "learning_rate": 1.988614776954169e-05, + "loss": 0.4017483592033386, + "step": 722 + }, + { + "epoch": 0.19200637365555703, + "grad_norm": 1.0651840937747212, + "learning_rate": 1.98854861439333e-05, + "loss": 0.4343035817146301, + "step": 723 + }, + { + "epoch": 0.19227194263709999, + "grad_norm": 1.0527178531986199, + "learning_rate": 1.9884822612519773e-05, + "loss": 0.4017031192779541, + "step": 724 + }, + { + "epoch": 0.19253751161864294, + "grad_norm": 0.9558335625340557, + "learning_rate": 1.988415717542903e-05, + "loss": 0.32294636964797974, + "step": 725 + }, + { + "epoch": 0.1928030806001859, + "grad_norm": 1.018550638071552, + "learning_rate": 1.988348983278935e-05, + "loss": 0.34661561250686646, + "step": 726 + }, + { + "epoch": 0.19306864958172884, + "grad_norm": 1.1264464061553692, + "learning_rate": 1.98828205847294e-05, + "loss": 0.3588724434375763, + "step": 727 + }, + { + "epoch": 0.19333421856327182, + "grad_norm": 1.151476031768393, + "learning_rate": 1.9882149431378194e-05, + "loss": 0.45439180731773376, + "step": 728 + }, + { + "epoch": 0.19359978754481477, + "grad_norm": 1.092854672146059, + "learning_rate": 1.988147637286513e-05, + "loss": 0.3916742205619812, + "step": 729 + }, + { + "epoch": 0.19386535652635772, + "grad_norm": 1.1073017625666908, + "learning_rate": 1.988080140931996e-05, + "loss": 0.3838115334510803, + "step": 730 + }, + { + "epoch": 0.19413092550790068, + "grad_norm": 1.0305888563782257, + "learning_rate": 1.9880124540872813e-05, + "loss": 0.3803096413612366, + "step": 731 + }, + { + "epoch": 0.19439649448944363, + "grad_norm": 1.0697488639709387, + "learning_rate": 1.987944576765418e-05, + "loss": 0.4180675446987152, + "step": 732 + }, + { + "epoch": 0.19466206347098658, + "grad_norm": 0.968492149308095, + "learning_rate": 1.987876508979492e-05, + "loss": 0.34485924243927, + "step": 733 + }, + { + "epoch": 0.19492763245252953, + "grad_norm": 1.0301319893667387, + "learning_rate": 1.987808250742626e-05, + "loss": 0.3696223795413971, + "step": 734 + }, + { + "epoch": 0.1951932014340725, + "grad_norm": 1.0070871597151176, + "learning_rate": 1.9877398020679796e-05, + "loss": 0.39920324087142944, + "step": 735 + }, + { + "epoch": 0.19545877041561546, + "grad_norm": 0.9772548764362861, + "learning_rate": 1.987671162968748e-05, + "loss": 0.33534419536590576, + "step": 736 + }, + { + "epoch": 0.19572433939715841, + "grad_norm": 0.955184588375953, + "learning_rate": 1.9876023334581657e-05, + "loss": 0.3698185682296753, + "step": 737 + }, + { + "epoch": 0.19598990837870137, + "grad_norm": 1.0108475553340988, + "learning_rate": 1.9875333135495e-05, + "loss": 0.37388375401496887, + "step": 738 + }, + { + "epoch": 0.19625547736024432, + "grad_norm": 0.9685434293396273, + "learning_rate": 1.9874641032560594e-05, + "loss": 0.3285469114780426, + "step": 739 + }, + { + "epoch": 0.19652104634178727, + "grad_norm": 1.01794140535256, + "learning_rate": 1.9873947025911854e-05, + "loss": 0.3539549708366394, + "step": 740 + }, + { + "epoch": 0.19678661532333022, + "grad_norm": 1.0943847325994938, + "learning_rate": 1.9873251115682577e-05, + "loss": 0.4707021117210388, + "step": 741 + }, + { + "epoch": 0.1970521843048732, + "grad_norm": 0.9783865509799976, + "learning_rate": 1.987255330200693e-05, + "loss": 0.3871781826019287, + "step": 742 + }, + { + "epoch": 0.19731775328641615, + "grad_norm": 1.0462206197157178, + "learning_rate": 1.9871853585019446e-05, + "loss": 0.3890243172645569, + "step": 743 + }, + { + "epoch": 0.1975833222679591, + "grad_norm": 0.9914096392216383, + "learning_rate": 1.9871151964855013e-05, + "loss": 0.34914374351501465, + "step": 744 + }, + { + "epoch": 0.19784889124950206, + "grad_norm": 1.0157439665946277, + "learning_rate": 1.9870448441648905e-05, + "loss": 0.41009777784347534, + "step": 745 + }, + { + "epoch": 0.198114460231045, + "grad_norm": 1.0725931773033663, + "learning_rate": 1.9869743015536747e-05, + "loss": 0.39449363946914673, + "step": 746 + }, + { + "epoch": 0.19838002921258796, + "grad_norm": 1.081644116196219, + "learning_rate": 1.9869035686654538e-05, + "loss": 0.3530065417289734, + "step": 747 + }, + { + "epoch": 0.1986455981941309, + "grad_norm": 1.1338420898560146, + "learning_rate": 1.986832645513864e-05, + "loss": 0.4255196154117584, + "step": 748 + }, + { + "epoch": 0.1989111671756739, + "grad_norm": 1.0625457917520444, + "learning_rate": 1.9867615321125796e-05, + "loss": 0.3921143114566803, + "step": 749 + }, + { + "epoch": 0.19917673615721684, + "grad_norm": 1.1076371778966394, + "learning_rate": 1.986690228475309e-05, + "loss": 0.4157381057739258, + "step": 750 + }, + { + "epoch": 0.1994423051387598, + "grad_norm": 0.9887260401437288, + "learning_rate": 1.986618734615799e-05, + "loss": 0.3922047019004822, + "step": 751 + }, + { + "epoch": 0.19970787412030275, + "grad_norm": 1.2477225666156357, + "learning_rate": 1.9865470505478335e-05, + "loss": 0.4378710985183716, + "step": 752 + }, + { + "epoch": 0.1999734431018457, + "grad_norm": 0.9960415180367619, + "learning_rate": 1.986475176285232e-05, + "loss": 0.3636753261089325, + "step": 753 + }, + { + "epoch": 0.20023901208338865, + "grad_norm": 1.0691751577172293, + "learning_rate": 1.986403111841851e-05, + "loss": 0.3509834408760071, + "step": 754 + }, + { + "epoch": 0.2005045810649316, + "grad_norm": 0.9490438891131449, + "learning_rate": 1.986330857231583e-05, + "loss": 0.3539624512195587, + "step": 755 + }, + { + "epoch": 0.20077015004647458, + "grad_norm": 1.002849163142055, + "learning_rate": 1.9862584124683587e-05, + "loss": 0.417904257774353, + "step": 756 + }, + { + "epoch": 0.20103571902801753, + "grad_norm": 0.9438738740406134, + "learning_rate": 1.9861857775661442e-05, + "loss": 0.3602277636528015, + "step": 757 + }, + { + "epoch": 0.2013012880095605, + "grad_norm": 1.0703002408877305, + "learning_rate": 1.986112952538943e-05, + "loss": 0.41064661741256714, + "step": 758 + }, + { + "epoch": 0.20156685699110344, + "grad_norm": 0.9789269746167363, + "learning_rate": 1.9860399374007944e-05, + "loss": 0.36313754320144653, + "step": 759 + }, + { + "epoch": 0.2018324259726464, + "grad_norm": 1.0711706181502203, + "learning_rate": 1.9859667321657755e-05, + "loss": 0.39497628808021545, + "step": 760 + }, + { + "epoch": 0.20209799495418934, + "grad_norm": 1.0173001682725575, + "learning_rate": 1.9858933368479987e-05, + "loss": 0.405613511800766, + "step": 761 + }, + { + "epoch": 0.2023635639357323, + "grad_norm": 0.9881458101524105, + "learning_rate": 1.9858197514616142e-05, + "loss": 0.39093440771102905, + "step": 762 + }, + { + "epoch": 0.20262913291727527, + "grad_norm": 1.0330584509521943, + "learning_rate": 1.9857459760208084e-05, + "loss": 0.39908382296562195, + "step": 763 + }, + { + "epoch": 0.20289470189881822, + "grad_norm": 0.9416263868211369, + "learning_rate": 1.9856720105398038e-05, + "loss": 0.36787620186805725, + "step": 764 + }, + { + "epoch": 0.20316027088036118, + "grad_norm": 1.0128388377672763, + "learning_rate": 1.985597855032861e-05, + "loss": 0.390550822019577, + "step": 765 + }, + { + "epoch": 0.20342583986190413, + "grad_norm": 1.115759431869763, + "learning_rate": 1.9855235095142754e-05, + "loss": 0.4191611409187317, + "step": 766 + }, + { + "epoch": 0.20369140884344708, + "grad_norm": 1.1288935622655036, + "learning_rate": 1.985448973998381e-05, + "loss": 0.4060766100883484, + "step": 767 + }, + { + "epoch": 0.20395697782499003, + "grad_norm": 1.055264696895727, + "learning_rate": 1.985374248499546e-05, + "loss": 0.3906163275241852, + "step": 768 + }, + { + "epoch": 0.20422254680653298, + "grad_norm": 1.0101644212894914, + "learning_rate": 1.9852993330321774e-05, + "loss": 0.3926839828491211, + "step": 769 + }, + { + "epoch": 0.20448811578807596, + "grad_norm": 1.0474151984911524, + "learning_rate": 1.9852242276107182e-05, + "loss": 0.37276068329811096, + "step": 770 + }, + { + "epoch": 0.20475368476961892, + "grad_norm": 0.9531396793135881, + "learning_rate": 1.9851489322496476e-05, + "loss": 0.3765360414981842, + "step": 771 + }, + { + "epoch": 0.20501925375116187, + "grad_norm": 1.0017274873228423, + "learning_rate": 1.9850734469634815e-05, + "loss": 0.35091257095336914, + "step": 772 + }, + { + "epoch": 0.20528482273270482, + "grad_norm": 1.1164065944268338, + "learning_rate": 1.9849977717667725e-05, + "loss": 0.4259791076183319, + "step": 773 + }, + { + "epoch": 0.20555039171424777, + "grad_norm": 0.9939508272565134, + "learning_rate": 1.9849219066741102e-05, + "loss": 0.3563114404678345, + "step": 774 + }, + { + "epoch": 0.20581596069579072, + "grad_norm": 1.0814350606971046, + "learning_rate": 1.9848458517001203e-05, + "loss": 0.4148223102092743, + "step": 775 + }, + { + "epoch": 0.20608152967733367, + "grad_norm": 1.0296405515766518, + "learning_rate": 1.9847696068594655e-05, + "loss": 0.3817785382270813, + "step": 776 + }, + { + "epoch": 0.20634709865887665, + "grad_norm": 1.115875170640065, + "learning_rate": 1.984693172166845e-05, + "loss": 0.41741886734962463, + "step": 777 + }, + { + "epoch": 0.2066126676404196, + "grad_norm": 1.0479957521256793, + "learning_rate": 1.9846165476369938e-05, + "loss": 0.34800025820732117, + "step": 778 + }, + { + "epoch": 0.20687823662196256, + "grad_norm": 1.0122784392492805, + "learning_rate": 1.9845397332846848e-05, + "loss": 0.38093405961990356, + "step": 779 + }, + { + "epoch": 0.2071438056035055, + "grad_norm": 1.0953515150858002, + "learning_rate": 1.9844627291247268e-05, + "loss": 0.40733009576797485, + "step": 780 + }, + { + "epoch": 0.20740937458504846, + "grad_norm": 1.1011295166986532, + "learning_rate": 1.9843855351719655e-05, + "loss": 0.3829066753387451, + "step": 781 + }, + { + "epoch": 0.2076749435665914, + "grad_norm": 1.0316161170996605, + "learning_rate": 1.9843081514412827e-05, + "loss": 0.3574868440628052, + "step": 782 + }, + { + "epoch": 0.20794051254813437, + "grad_norm": 1.071531696766489, + "learning_rate": 1.984230577947597e-05, + "loss": 0.3675144612789154, + "step": 783 + }, + { + "epoch": 0.20820608152967734, + "grad_norm": 0.9982781618225591, + "learning_rate": 1.9841528147058638e-05, + "loss": 0.36120525002479553, + "step": 784 + }, + { + "epoch": 0.2084716505112203, + "grad_norm": 1.0016427535647234, + "learning_rate": 1.984074861731075e-05, + "loss": 0.3651392459869385, + "step": 785 + }, + { + "epoch": 0.20873721949276325, + "grad_norm": 1.1254815799645344, + "learning_rate": 1.983996719038259e-05, + "loss": 0.4204651117324829, + "step": 786 + }, + { + "epoch": 0.2090027884743062, + "grad_norm": 1.0600310007301286, + "learning_rate": 1.9839183866424806e-05, + "loss": 0.4452149271965027, + "step": 787 + }, + { + "epoch": 0.20926835745584915, + "grad_norm": 1.000047138771705, + "learning_rate": 1.9838398645588418e-05, + "loss": 0.3931270241737366, + "step": 788 + }, + { + "epoch": 0.2095339264373921, + "grad_norm": 1.0009892054118905, + "learning_rate": 1.98376115280248e-05, + "loss": 0.3680538535118103, + "step": 789 + }, + { + "epoch": 0.20979949541893506, + "grad_norm": 0.9848864128393906, + "learning_rate": 1.9836822513885704e-05, + "loss": 0.3766820728778839, + "step": 790 + }, + { + "epoch": 0.21006506440047804, + "grad_norm": 1.0494510099931045, + "learning_rate": 1.9836031603323245e-05, + "loss": 0.3602439761161804, + "step": 791 + }, + { + "epoch": 0.210330633382021, + "grad_norm": 0.9790632198207762, + "learning_rate": 1.98352387964899e-05, + "loss": 0.38925549387931824, + "step": 792 + }, + { + "epoch": 0.21059620236356394, + "grad_norm": 1.0121548586068807, + "learning_rate": 1.9834444093538504e-05, + "loss": 0.3569640517234802, + "step": 793 + }, + { + "epoch": 0.2108617713451069, + "grad_norm": 1.0171085592107372, + "learning_rate": 1.9833647494622275e-05, + "loss": 0.3543340265750885, + "step": 794 + }, + { + "epoch": 0.21112734032664984, + "grad_norm": 1.0426744340585967, + "learning_rate": 1.983284899989479e-05, + "loss": 0.37313222885131836, + "step": 795 + }, + { + "epoch": 0.2113929093081928, + "grad_norm": 1.0940501026222131, + "learning_rate": 1.983204860950998e-05, + "loss": 0.3874257802963257, + "step": 796 + }, + { + "epoch": 0.21165847828973575, + "grad_norm": 1.005805069630653, + "learning_rate": 1.983124632362216e-05, + "loss": 0.3815164864063263, + "step": 797 + }, + { + "epoch": 0.21192404727127873, + "grad_norm": 1.0879143214156584, + "learning_rate": 1.9830442142386e-05, + "loss": 0.39476731419563293, + "step": 798 + }, + { + "epoch": 0.21218961625282168, + "grad_norm": 1.0888281701524323, + "learning_rate": 1.9829636065956527e-05, + "loss": 0.399338036775589, + "step": 799 + }, + { + "epoch": 0.21245518523436463, + "grad_norm": 1.0679987938098825, + "learning_rate": 1.9828828094489157e-05, + "loss": 0.3940344452857971, + "step": 800 + }, + { + "epoch": 0.21272075421590758, + "grad_norm": 1.0124680733329086, + "learning_rate": 1.9828018228139647e-05, + "loss": 0.35597044229507446, + "step": 801 + }, + { + "epoch": 0.21298632319745053, + "grad_norm": 1.197291261672491, + "learning_rate": 1.9827206467064133e-05, + "loss": 0.4309435784816742, + "step": 802 + }, + { + "epoch": 0.21325189217899349, + "grad_norm": 1.0158009285134544, + "learning_rate": 1.9826392811419113e-05, + "loss": 0.37327438592910767, + "step": 803 + }, + { + "epoch": 0.21351746116053644, + "grad_norm": 0.9944187944281718, + "learning_rate": 1.9825577261361454e-05, + "loss": 0.35214242339134216, + "step": 804 + }, + { + "epoch": 0.21378303014207942, + "grad_norm": 1.1575422458756877, + "learning_rate": 1.982475981704838e-05, + "loss": 0.41114968061447144, + "step": 805 + }, + { + "epoch": 0.21404859912362237, + "grad_norm": 0.9719994027948292, + "learning_rate": 1.9823940478637486e-05, + "loss": 0.3632299304008484, + "step": 806 + }, + { + "epoch": 0.21431416810516532, + "grad_norm": 1.1699036102992622, + "learning_rate": 1.9823119246286727e-05, + "loss": 0.39640772342681885, + "step": 807 + }, + { + "epoch": 0.21457973708670827, + "grad_norm": 1.002397111320771, + "learning_rate": 1.9822296120154433e-05, + "loss": 0.39356929063796997, + "step": 808 + }, + { + "epoch": 0.21484530606825122, + "grad_norm": 1.061754718166072, + "learning_rate": 1.9821471100399294e-05, + "loss": 0.3710761070251465, + "step": 809 + }, + { + "epoch": 0.21511087504979418, + "grad_norm": 0.9713246248834058, + "learning_rate": 1.9820644187180354e-05, + "loss": 0.35515087842941284, + "step": 810 + }, + { + "epoch": 0.21537644403133713, + "grad_norm": 1.0166244205196049, + "learning_rate": 1.981981538065704e-05, + "loss": 0.3803205192089081, + "step": 811 + }, + { + "epoch": 0.2156420130128801, + "grad_norm": 1.0421456761704733, + "learning_rate": 1.9818984680989134e-05, + "loss": 0.40275394916534424, + "step": 812 + }, + { + "epoch": 0.21590758199442306, + "grad_norm": 1.0872785008811605, + "learning_rate": 1.9818152088336786e-05, + "loss": 0.3711051344871521, + "step": 813 + }, + { + "epoch": 0.216173150975966, + "grad_norm": 1.0872190904032264, + "learning_rate": 1.9817317602860512e-05, + "loss": 0.4198985695838928, + "step": 814 + }, + { + "epoch": 0.21643871995750896, + "grad_norm": 0.9931448766878032, + "learning_rate": 1.9816481224721185e-05, + "loss": 0.38333773612976074, + "step": 815 + }, + { + "epoch": 0.21670428893905191, + "grad_norm": 1.1679000778390602, + "learning_rate": 1.9815642954080055e-05, + "loss": 0.3959774971008301, + "step": 816 + }, + { + "epoch": 0.21696985792059487, + "grad_norm": 1.1013876458182361, + "learning_rate": 1.9814802791098728e-05, + "loss": 0.3475337326526642, + "step": 817 + }, + { + "epoch": 0.21723542690213782, + "grad_norm": 1.06867842878894, + "learning_rate": 1.981396073593918e-05, + "loss": 0.369370698928833, + "step": 818 + }, + { + "epoch": 0.2175009958836808, + "grad_norm": 1.085763343280496, + "learning_rate": 1.9813116788763744e-05, + "loss": 0.3515776991844177, + "step": 819 + }, + { + "epoch": 0.21776656486522375, + "grad_norm": 1.0780206278908893, + "learning_rate": 1.9812270949735124e-05, + "loss": 0.3637402355670929, + "step": 820 + }, + { + "epoch": 0.2180321338467667, + "grad_norm": 1.0342672695189807, + "learning_rate": 1.9811423219016395e-05, + "loss": 0.3930947780609131, + "step": 821 + }, + { + "epoch": 0.21829770282830965, + "grad_norm": 1.102521832922822, + "learning_rate": 1.981057359677098e-05, + "loss": 0.40081048011779785, + "step": 822 + }, + { + "epoch": 0.2185632718098526, + "grad_norm": 1.0386373096164698, + "learning_rate": 1.9809722083162682e-05, + "loss": 0.3831724226474762, + "step": 823 + }, + { + "epoch": 0.21882884079139556, + "grad_norm": 1.0516274934858763, + "learning_rate": 1.9808868678355662e-05, + "loss": 0.3919270932674408, + "step": 824 + }, + { + "epoch": 0.2190944097729385, + "grad_norm": 1.0623138704484363, + "learning_rate": 1.9808013382514448e-05, + "loss": 0.41782522201538086, + "step": 825 + }, + { + "epoch": 0.2193599787544815, + "grad_norm": 1.0570337251212087, + "learning_rate": 1.9807156195803926e-05, + "loss": 0.3751329779624939, + "step": 826 + }, + { + "epoch": 0.21962554773602444, + "grad_norm": 1.0009279652164118, + "learning_rate": 1.9806297118389353e-05, + "loss": 0.36451685428619385, + "step": 827 + }, + { + "epoch": 0.2198911167175674, + "grad_norm": 1.1911804759546862, + "learning_rate": 1.9805436150436352e-05, + "loss": 0.3924056887626648, + "step": 828 + }, + { + "epoch": 0.22015668569911034, + "grad_norm": 0.9887238598202497, + "learning_rate": 1.9804573292110906e-05, + "loss": 0.34744757413864136, + "step": 829 + }, + { + "epoch": 0.2204222546806533, + "grad_norm": 1.1506637434477502, + "learning_rate": 1.980370854357936e-05, + "loss": 0.4162982702255249, + "step": 830 + }, + { + "epoch": 0.22068782366219625, + "grad_norm": 1.103994708633239, + "learning_rate": 1.9802841905008434e-05, + "loss": 0.36572596430778503, + "step": 831 + }, + { + "epoch": 0.2209533926437392, + "grad_norm": 1.0028116020560682, + "learning_rate": 1.98019733765652e-05, + "loss": 0.3535170555114746, + "step": 832 + }, + { + "epoch": 0.22121896162528218, + "grad_norm": 1.061392974987333, + "learning_rate": 1.9801102958417107e-05, + "loss": 0.3906480073928833, + "step": 833 + }, + { + "epoch": 0.22148453060682513, + "grad_norm": 1.0646039703833918, + "learning_rate": 1.980023065073195e-05, + "loss": 0.34185755252838135, + "step": 834 + }, + { + "epoch": 0.22175009958836808, + "grad_norm": 1.1983506875652454, + "learning_rate": 1.9799356453677913e-05, + "loss": 0.4216359853744507, + "step": 835 + }, + { + "epoch": 0.22201566856991103, + "grad_norm": 1.038756499639493, + "learning_rate": 1.979848036742352e-05, + "loss": 0.365469366312027, + "step": 836 + }, + { + "epoch": 0.222281237551454, + "grad_norm": 1.0128951338762324, + "learning_rate": 1.9797602392137678e-05, + "loss": 0.3570204973220825, + "step": 837 + }, + { + "epoch": 0.22254680653299694, + "grad_norm": 1.0221196075964396, + "learning_rate": 1.9796722527989646e-05, + "loss": 0.3929975926876068, + "step": 838 + }, + { + "epoch": 0.2228123755145399, + "grad_norm": 1.1512146064832047, + "learning_rate": 1.979584077514905e-05, + "loss": 0.39064258337020874, + "step": 839 + }, + { + "epoch": 0.22307794449608287, + "grad_norm": 1.0559333522375243, + "learning_rate": 1.9794957133785884e-05, + "loss": 0.3626471757888794, + "step": 840 + }, + { + "epoch": 0.22334351347762582, + "grad_norm": 1.0867316997584564, + "learning_rate": 1.9794071604070506e-05, + "loss": 0.4337238371372223, + "step": 841 + }, + { + "epoch": 0.22360908245916877, + "grad_norm": 0.9358033183445809, + "learning_rate": 1.9793184186173632e-05, + "loss": 0.3361967206001282, + "step": 842 + }, + { + "epoch": 0.22387465144071172, + "grad_norm": 0.961043072021178, + "learning_rate": 1.9792294880266346e-05, + "loss": 0.3429332971572876, + "step": 843 + }, + { + "epoch": 0.22414022042225468, + "grad_norm": 1.012773989217256, + "learning_rate": 1.97914036865201e-05, + "loss": 0.39196616411209106, + "step": 844 + }, + { + "epoch": 0.22440578940379763, + "grad_norm": 1.1250916546708978, + "learning_rate": 1.9790510605106697e-05, + "loss": 0.3763045072555542, + "step": 845 + }, + { + "epoch": 0.22467135838534058, + "grad_norm": 1.1139610172600873, + "learning_rate": 1.978961563619832e-05, + "loss": 0.41614070534706116, + "step": 846 + }, + { + "epoch": 0.22493692736688356, + "grad_norm": 1.065347693165354, + "learning_rate": 1.9788718779967506e-05, + "loss": 0.3834165334701538, + "step": 847 + }, + { + "epoch": 0.2252024963484265, + "grad_norm": 0.9834992911039661, + "learning_rate": 1.978782003658716e-05, + "loss": 0.3552364110946655, + "step": 848 + }, + { + "epoch": 0.22546806532996946, + "grad_norm": 1.0365749744504318, + "learning_rate": 1.9786919406230544e-05, + "loss": 0.3857925534248352, + "step": 849 + }, + { + "epoch": 0.22573363431151242, + "grad_norm": 1.0779836727772776, + "learning_rate": 1.9786016889071294e-05, + "loss": 0.3501393795013428, + "step": 850 + }, + { + "epoch": 0.22599920329305537, + "grad_norm": 1.1363104904390704, + "learning_rate": 1.9785112485283404e-05, + "loss": 0.36280643939971924, + "step": 851 + }, + { + "epoch": 0.22626477227459832, + "grad_norm": 1.1791591930929934, + "learning_rate": 1.978420619504123e-05, + "loss": 0.3713894486427307, + "step": 852 + }, + { + "epoch": 0.22653034125614127, + "grad_norm": 1.0682718312185442, + "learning_rate": 1.97832980185195e-05, + "loss": 0.3668733537197113, + "step": 853 + }, + { + "epoch": 0.22679591023768425, + "grad_norm": 1.06232834606136, + "learning_rate": 1.978238795589329e-05, + "loss": 0.4054701626300812, + "step": 854 + }, + { + "epoch": 0.2270614792192272, + "grad_norm": 1.1024819375758403, + "learning_rate": 1.9781476007338058e-05, + "loss": 0.3824681043624878, + "step": 855 + }, + { + "epoch": 0.22732704820077015, + "grad_norm": 1.0604830101195206, + "learning_rate": 1.978056217302961e-05, + "loss": 0.4009544253349304, + "step": 856 + }, + { + "epoch": 0.2275926171823131, + "grad_norm": 1.0150812264671392, + "learning_rate": 1.9779646453144133e-05, + "loss": 0.34773316979408264, + "step": 857 + }, + { + "epoch": 0.22785818616385606, + "grad_norm": 1.0737509474924387, + "learning_rate": 1.977872884785815e-05, + "loss": 0.4067278206348419, + "step": 858 + }, + { + "epoch": 0.228123755145399, + "grad_norm": 1.0566398666110703, + "learning_rate": 1.9777809357348584e-05, + "loss": 0.3843458890914917, + "step": 859 + }, + { + "epoch": 0.22838932412694196, + "grad_norm": 1.083451143522079, + "learning_rate": 1.977688798179269e-05, + "loss": 0.4261704683303833, + "step": 860 + }, + { + "epoch": 0.22865489310848494, + "grad_norm": 1.0145015740681522, + "learning_rate": 1.9775964721368098e-05, + "loss": 0.39109086990356445, + "step": 861 + }, + { + "epoch": 0.2289204620900279, + "grad_norm": 1.1472642326588585, + "learning_rate": 1.9775039576252807e-05, + "loss": 0.39436954259872437, + "step": 862 + }, + { + "epoch": 0.22918603107157084, + "grad_norm": 0.9770870267905873, + "learning_rate": 1.9774112546625168e-05, + "loss": 0.3787967562675476, + "step": 863 + }, + { + "epoch": 0.2294516000531138, + "grad_norm": 1.5071435779935147, + "learning_rate": 1.9773183632663907e-05, + "loss": 0.3729320466518402, + "step": 864 + }, + { + "epoch": 0.22971716903465675, + "grad_norm": 1.0048578103437809, + "learning_rate": 1.9772252834548108e-05, + "loss": 0.3817081153392792, + "step": 865 + }, + { + "epoch": 0.2299827380161997, + "grad_norm": 0.9709592169890221, + "learning_rate": 1.9771320152457212e-05, + "loss": 0.3362218737602234, + "step": 866 + }, + { + "epoch": 0.23024830699774265, + "grad_norm": 1.0194192402395448, + "learning_rate": 1.9770385586571033e-05, + "loss": 0.37274059653282166, + "step": 867 + }, + { + "epoch": 0.23051387597928563, + "grad_norm": 1.058710969457703, + "learning_rate": 1.9769449137069746e-05, + "loss": 0.3832330107688904, + "step": 868 + }, + { + "epoch": 0.23077944496082858, + "grad_norm": 0.9857605594513371, + "learning_rate": 1.9768510804133886e-05, + "loss": 0.37420010566711426, + "step": 869 + }, + { + "epoch": 0.23104501394237154, + "grad_norm": 1.0333482020677847, + "learning_rate": 1.976757058794435e-05, + "loss": 0.35314565896987915, + "step": 870 + }, + { + "epoch": 0.2313105829239145, + "grad_norm": 1.0404097802666386, + "learning_rate": 1.97666284886824e-05, + "loss": 0.34667372703552246, + "step": 871 + }, + { + "epoch": 0.23157615190545744, + "grad_norm": 1.1826768759617956, + "learning_rate": 1.976568450652967e-05, + "loss": 0.3465980589389801, + "step": 872 + }, + { + "epoch": 0.2318417208870004, + "grad_norm": 1.6479387485919323, + "learning_rate": 1.9764738641668137e-05, + "loss": 0.40539389848709106, + "step": 873 + }, + { + "epoch": 0.23210728986854334, + "grad_norm": 1.090454596374008, + "learning_rate": 1.976379089428016e-05, + "loss": 0.35154545307159424, + "step": 874 + }, + { + "epoch": 0.23237285885008632, + "grad_norm": 1.1033163387519414, + "learning_rate": 1.9762841264548453e-05, + "loss": 0.39748087525367737, + "step": 875 + }, + { + "epoch": 0.23263842783162927, + "grad_norm": 1.0600221119400453, + "learning_rate": 1.976188975265609e-05, + "loss": 0.41628387570381165, + "step": 876 + }, + { + "epoch": 0.23290399681317223, + "grad_norm": 1.0805125037340586, + "learning_rate": 1.976093635878652e-05, + "loss": 0.4076233208179474, + "step": 877 + }, + { + "epoch": 0.23316956579471518, + "grad_norm": 0.9221839355888705, + "learning_rate": 1.9759981083123533e-05, + "loss": 0.3262259364128113, + "step": 878 + }, + { + "epoch": 0.23343513477625813, + "grad_norm": 1.1690018828805817, + "learning_rate": 1.9759023925851302e-05, + "loss": 0.36561673879623413, + "step": 879 + }, + { + "epoch": 0.23370070375780108, + "grad_norm": 1.083829918240926, + "learning_rate": 1.9758064887154358e-05, + "loss": 0.36661773920059204, + "step": 880 + }, + { + "epoch": 0.23396627273934403, + "grad_norm": 1.0655263771494812, + "learning_rate": 1.9757103967217587e-05, + "loss": 0.34671685099601746, + "step": 881 + }, + { + "epoch": 0.234231841720887, + "grad_norm": 1.0056372913167473, + "learning_rate": 1.9756141166226246e-05, + "loss": 0.3486331105232239, + "step": 882 + }, + { + "epoch": 0.23449741070242996, + "grad_norm": 1.1177836982205323, + "learning_rate": 1.9755176484365953e-05, + "loss": 0.3883505165576935, + "step": 883 + }, + { + "epoch": 0.23476297968397292, + "grad_norm": 1.0548520245203914, + "learning_rate": 1.9754209921822683e-05, + "loss": 0.3832106590270996, + "step": 884 + }, + { + "epoch": 0.23502854866551587, + "grad_norm": 1.078830112662993, + "learning_rate": 1.975324147878278e-05, + "loss": 0.37876033782958984, + "step": 885 + }, + { + "epoch": 0.23529411764705882, + "grad_norm": 1.0689289829128008, + "learning_rate": 1.975227115543295e-05, + "loss": 0.38931846618652344, + "step": 886 + }, + { + "epoch": 0.23555968662860177, + "grad_norm": 0.956721500767322, + "learning_rate": 1.9751298951960258e-05, + "loss": 0.3581021726131439, + "step": 887 + }, + { + "epoch": 0.23582525561014472, + "grad_norm": 1.0206944172292924, + "learning_rate": 1.9750324868552133e-05, + "loss": 0.35196465253829956, + "step": 888 + }, + { + "epoch": 0.2360908245916877, + "grad_norm": 0.9996206423870837, + "learning_rate": 1.974934890539637e-05, + "loss": 0.3635658025741577, + "step": 889 + }, + { + "epoch": 0.23635639357323066, + "grad_norm": 0.9523927655707425, + "learning_rate": 1.9748371062681122e-05, + "loss": 0.345594197511673, + "step": 890 + }, + { + "epoch": 0.2366219625547736, + "grad_norm": 1.0443032231121456, + "learning_rate": 1.97473913405949e-05, + "loss": 0.357181191444397, + "step": 891 + }, + { + "epoch": 0.23688753153631656, + "grad_norm": 1.0008000126392016, + "learning_rate": 1.974640973932659e-05, + "loss": 0.3264622986316681, + "step": 892 + }, + { + "epoch": 0.2371531005178595, + "grad_norm": 0.9731630083329554, + "learning_rate": 1.9745426259065434e-05, + "loss": 0.37950894236564636, + "step": 893 + }, + { + "epoch": 0.23741866949940246, + "grad_norm": 1.1493289415276364, + "learning_rate": 1.9744440900001027e-05, + "loss": 0.37400782108306885, + "step": 894 + }, + { + "epoch": 0.23768423848094541, + "grad_norm": 1.0325785235739895, + "learning_rate": 1.974345366232334e-05, + "loss": 0.3455463945865631, + "step": 895 + }, + { + "epoch": 0.2379498074624884, + "grad_norm": 1.1059511993758653, + "learning_rate": 1.9742464546222702e-05, + "loss": 0.3605351150035858, + "step": 896 + }, + { + "epoch": 0.23821537644403135, + "grad_norm": 0.9763906212855142, + "learning_rate": 1.97414735518898e-05, + "loss": 0.3839051127433777, + "step": 897 + }, + { + "epoch": 0.2384809454255743, + "grad_norm": 1.0304758127284366, + "learning_rate": 1.974048067951569e-05, + "loss": 0.34562867879867554, + "step": 898 + }, + { + "epoch": 0.23874651440711725, + "grad_norm": 1.1332867443652592, + "learning_rate": 1.9739485929291778e-05, + "loss": 0.3986506760120392, + "step": 899 + }, + { + "epoch": 0.2390120833886602, + "grad_norm": 1.1598961775072092, + "learning_rate": 1.9738489301409848e-05, + "loss": 0.3955162465572357, + "step": 900 + }, + { + "epoch": 0.23927765237020315, + "grad_norm": 1.080226447361195, + "learning_rate": 1.9737490796062036e-05, + "loss": 0.370066374540329, + "step": 901 + }, + { + "epoch": 0.2395432213517461, + "grad_norm": 1.0637004733407822, + "learning_rate": 1.973649041344084e-05, + "loss": 0.3777826726436615, + "step": 902 + }, + { + "epoch": 0.23980879033328908, + "grad_norm": 1.1358293788080334, + "learning_rate": 1.9735488153739128e-05, + "loss": 0.327572226524353, + "step": 903 + }, + { + "epoch": 0.24007435931483204, + "grad_norm": 1.071729158749965, + "learning_rate": 1.973448401715011e-05, + "loss": 0.3921743929386139, + "step": 904 + }, + { + "epoch": 0.240339928296375, + "grad_norm": 1.0635179670685195, + "learning_rate": 1.973347800386739e-05, + "loss": 0.3683379888534546, + "step": 905 + }, + { + "epoch": 0.24060549727791794, + "grad_norm": 1.023832589054702, + "learning_rate": 1.9732470114084905e-05, + "loss": 0.390872597694397, + "step": 906 + }, + { + "epoch": 0.2408710662594609, + "grad_norm": 1.0814023137489452, + "learning_rate": 1.9731460347996964e-05, + "loss": 0.3772459626197815, + "step": 907 + }, + { + "epoch": 0.24113663524100384, + "grad_norm": 1.0280982913686894, + "learning_rate": 1.973044870579824e-05, + "loss": 0.37990954518318176, + "step": 908 + }, + { + "epoch": 0.2414022042225468, + "grad_norm": 1.0035238419205756, + "learning_rate": 1.972943518768377e-05, + "loss": 0.3380817770957947, + "step": 909 + }, + { + "epoch": 0.24166777320408978, + "grad_norm": 0.9879847056007396, + "learning_rate": 1.9728419793848935e-05, + "loss": 0.3348115384578705, + "step": 910 + }, + { + "epoch": 0.24193334218563273, + "grad_norm": 1.0561235323428824, + "learning_rate": 1.9727402524489505e-05, + "loss": 0.36936551332473755, + "step": 911 + }, + { + "epoch": 0.24219891116717568, + "grad_norm": 1.0744513063457712, + "learning_rate": 1.9726383379801593e-05, + "loss": 0.3871539235115051, + "step": 912 + }, + { + "epoch": 0.24246448014871863, + "grad_norm": 1.0904556770971818, + "learning_rate": 1.9725362359981676e-05, + "loss": 0.37087059020996094, + "step": 913 + }, + { + "epoch": 0.24273004913026158, + "grad_norm": 0.9802916629421812, + "learning_rate": 1.9724339465226595e-05, + "loss": 0.35582688450813293, + "step": 914 + }, + { + "epoch": 0.24299561811180453, + "grad_norm": 1.0947021466091125, + "learning_rate": 1.9723314695733557e-05, + "loss": 0.38500669598579407, + "step": 915 + }, + { + "epoch": 0.2432611870933475, + "grad_norm": 0.9834121517145057, + "learning_rate": 1.9722288051700116e-05, + "loss": 0.32470762729644775, + "step": 916 + }, + { + "epoch": 0.24352675607489047, + "grad_norm": 1.0805011919993295, + "learning_rate": 1.9721259533324207e-05, + "loss": 0.3822774589061737, + "step": 917 + }, + { + "epoch": 0.24379232505643342, + "grad_norm": 0.9937398719966192, + "learning_rate": 1.972022914080411e-05, + "loss": 0.38374873995780945, + "step": 918 + }, + { + "epoch": 0.24405789403797637, + "grad_norm": 1.0550770033370775, + "learning_rate": 1.9719196874338472e-05, + "loss": 0.3419352173805237, + "step": 919 + }, + { + "epoch": 0.24432346301951932, + "grad_norm": 1.0164630853495407, + "learning_rate": 1.9718162734126308e-05, + "loss": 0.3294275403022766, + "step": 920 + }, + { + "epoch": 0.24458903200106227, + "grad_norm": 1.0668295499881337, + "learning_rate": 1.9717126720366982e-05, + "loss": 0.3585365414619446, + "step": 921 + }, + { + "epoch": 0.24485460098260522, + "grad_norm": 1.0609325079201495, + "learning_rate": 1.9716088833260225e-05, + "loss": 0.38130316138267517, + "step": 922 + }, + { + "epoch": 0.24512016996414818, + "grad_norm": 1.0577067392982809, + "learning_rate": 1.9715049073006133e-05, + "loss": 0.3745136260986328, + "step": 923 + }, + { + "epoch": 0.24538573894569116, + "grad_norm": 1.0457228779122651, + "learning_rate": 1.971400743980516e-05, + "loss": 0.3771660327911377, + "step": 924 + }, + { + "epoch": 0.2456513079272341, + "grad_norm": 1.0133861698501567, + "learning_rate": 1.971296393385812e-05, + "loss": 0.29661691188812256, + "step": 925 + }, + { + "epoch": 0.24591687690877706, + "grad_norm": 0.9516714902458889, + "learning_rate": 1.9711918555366184e-05, + "loss": 0.33783960342407227, + "step": 926 + }, + { + "epoch": 0.24618244589032, + "grad_norm": 1.2469460687001952, + "learning_rate": 1.971087130453089e-05, + "loss": 0.42983683943748474, + "step": 927 + }, + { + "epoch": 0.24644801487186296, + "grad_norm": 0.9725914261438413, + "learning_rate": 1.9709822181554142e-05, + "loss": 0.32242363691329956, + "step": 928 + }, + { + "epoch": 0.24671358385340592, + "grad_norm": 1.0989308968162201, + "learning_rate": 1.970877118663819e-05, + "loss": 0.3576955795288086, + "step": 929 + }, + { + "epoch": 0.24697915283494887, + "grad_norm": 1.116595385391156, + "learning_rate": 1.9707718319985663e-05, + "loss": 0.4185359477996826, + "step": 930 + }, + { + "epoch": 0.24724472181649185, + "grad_norm": 1.1178442474909813, + "learning_rate": 1.970666358179953e-05, + "loss": 0.35377705097198486, + "step": 931 + }, + { + "epoch": 0.2475102907980348, + "grad_norm": 1.1350743092525455, + "learning_rate": 1.9705606972283143e-05, + "loss": 0.3860151171684265, + "step": 932 + }, + { + "epoch": 0.24777585977957775, + "grad_norm": 1.1915035264404457, + "learning_rate": 1.9704548491640195e-05, + "loss": 0.39463168382644653, + "step": 933 + }, + { + "epoch": 0.2480414287611207, + "grad_norm": 1.0462444044755623, + "learning_rate": 1.9703488140074752e-05, + "loss": 0.3670084774494171, + "step": 934 + }, + { + "epoch": 0.24830699774266365, + "grad_norm": 1.2914788702644175, + "learning_rate": 1.9702425917791242e-05, + "loss": 0.388730525970459, + "step": 935 + }, + { + "epoch": 0.2485725667242066, + "grad_norm": 1.128517931307855, + "learning_rate": 1.970136182499444e-05, + "loss": 0.38767656683921814, + "step": 936 + }, + { + "epoch": 0.24883813570574956, + "grad_norm": 1.0771582387425684, + "learning_rate": 1.9700295861889497e-05, + "loss": 0.35394930839538574, + "step": 937 + }, + { + "epoch": 0.24910370468729254, + "grad_norm": 1.0639329095738126, + "learning_rate": 1.9699228028681917e-05, + "loss": 0.3360324501991272, + "step": 938 + }, + { + "epoch": 0.2493692736688355, + "grad_norm": 1.116621384383513, + "learning_rate": 1.9698158325577563e-05, + "loss": 0.390169233083725, + "step": 939 + }, + { + "epoch": 0.24963484265037844, + "grad_norm": 1.108635788765439, + "learning_rate": 1.9697086752782666e-05, + "loss": 0.3921571671962738, + "step": 940 + }, + { + "epoch": 0.2499004116319214, + "grad_norm": 1.0665933445619122, + "learning_rate": 1.9696013310503808e-05, + "loss": 0.3795739710330963, + "step": 941 + }, + { + "epoch": 0.25016598061346434, + "grad_norm": 1.2202319167117164, + "learning_rate": 1.9694937998947935e-05, + "loss": 0.3891025185585022, + "step": 942 + }, + { + "epoch": 0.2504315495950073, + "grad_norm": 0.9751921056908068, + "learning_rate": 1.9693860818322357e-05, + "loss": 0.3548225164413452, + "step": 943 + }, + { + "epoch": 0.25069711857655025, + "grad_norm": 1.0555900207888067, + "learning_rate": 1.9692781768834747e-05, + "loss": 0.3696819543838501, + "step": 944 + }, + { + "epoch": 0.2509626875580932, + "grad_norm": 1.1322184210541604, + "learning_rate": 1.9691700850693126e-05, + "loss": 0.3906037211418152, + "step": 945 + }, + { + "epoch": 0.25122825653963615, + "grad_norm": 1.072434154806742, + "learning_rate": 1.9690618064105883e-05, + "loss": 0.38181206583976746, + "step": 946 + }, + { + "epoch": 0.2514938255211791, + "grad_norm": 1.0644124497842522, + "learning_rate": 1.9689533409281765e-05, + "loss": 0.36904582381248474, + "step": 947 + }, + { + "epoch": 0.25175939450272206, + "grad_norm": 1.097105891991116, + "learning_rate": 1.9688446886429885e-05, + "loss": 0.3635823130607605, + "step": 948 + }, + { + "epoch": 0.25202496348426506, + "grad_norm": 0.9954310874837226, + "learning_rate": 1.9687358495759713e-05, + "loss": 0.3527260422706604, + "step": 949 + }, + { + "epoch": 0.252290532465808, + "grad_norm": 1.1902017812011518, + "learning_rate": 1.968626823748107e-05, + "loss": 0.3781110346317291, + "step": 950 + }, + { + "epoch": 0.25255610144735097, + "grad_norm": 1.0346217070487125, + "learning_rate": 1.968517611180415e-05, + "loss": 0.3931560814380646, + "step": 951 + }, + { + "epoch": 0.2528216704288939, + "grad_norm": 1.0783245371828571, + "learning_rate": 1.9684082118939503e-05, + "loss": 0.39111074805259705, + "step": 952 + }, + { + "epoch": 0.25308723941043687, + "grad_norm": 1.2090013193363973, + "learning_rate": 1.9682986259098037e-05, + "loss": 0.385967880487442, + "step": 953 + }, + { + "epoch": 0.2533528083919798, + "grad_norm": 1.0103878099057118, + "learning_rate": 1.9681888532491022e-05, + "loss": 0.34006553888320923, + "step": 954 + }, + { + "epoch": 0.2536183773735228, + "grad_norm": 1.0077784550534965, + "learning_rate": 1.9680788939330086e-05, + "loss": 0.36069998145103455, + "step": 955 + }, + { + "epoch": 0.2538839463550657, + "grad_norm": 1.090649670414093, + "learning_rate": 1.9679687479827212e-05, + "loss": 0.3354898691177368, + "step": 956 + }, + { + "epoch": 0.2541495153366087, + "grad_norm": 1.0691933766101984, + "learning_rate": 1.9678584154194756e-05, + "loss": 0.35667335987091064, + "step": 957 + }, + { + "epoch": 0.25441508431815163, + "grad_norm": 1.2652121820599898, + "learning_rate": 1.9677478962645422e-05, + "loss": 0.4003029465675354, + "step": 958 + }, + { + "epoch": 0.2546806532996946, + "grad_norm": 1.0313200756086844, + "learning_rate": 1.9676371905392278e-05, + "loss": 0.34397056698799133, + "step": 959 + }, + { + "epoch": 0.25494622228123753, + "grad_norm": 1.0544706314753822, + "learning_rate": 1.9675262982648757e-05, + "loss": 0.35319578647613525, + "step": 960 + }, + { + "epoch": 0.2552117912627805, + "grad_norm": 1.0179000224070893, + "learning_rate": 1.967415219462864e-05, + "loss": 0.34840327501296997, + "step": 961 + }, + { + "epoch": 0.25547736024432344, + "grad_norm": 0.9360325612494472, + "learning_rate": 1.9673039541546076e-05, + "loss": 0.3298989534378052, + "step": 962 + }, + { + "epoch": 0.25574292922586644, + "grad_norm": 1.0904225305922717, + "learning_rate": 1.9671925023615572e-05, + "loss": 0.38438719511032104, + "step": 963 + }, + { + "epoch": 0.2560084982074094, + "grad_norm": 1.128608711014793, + "learning_rate": 1.9670808641051994e-05, + "loss": 0.3834493160247803, + "step": 964 + }, + { + "epoch": 0.25627406718895235, + "grad_norm": 1.0456501331264114, + "learning_rate": 1.9669690394070564e-05, + "loss": 0.3713288903236389, + "step": 965 + }, + { + "epoch": 0.2565396361704953, + "grad_norm": 1.0864184401996346, + "learning_rate": 1.966857028288687e-05, + "loss": 0.37564241886138916, + "step": 966 + }, + { + "epoch": 0.25680520515203825, + "grad_norm": 1.0329676619050974, + "learning_rate": 1.9667448307716857e-05, + "loss": 0.30162689089775085, + "step": 967 + }, + { + "epoch": 0.2570707741335812, + "grad_norm": 1.0948768995323135, + "learning_rate": 1.9666324468776826e-05, + "loss": 0.35969680547714233, + "step": 968 + }, + { + "epoch": 0.25733634311512416, + "grad_norm": 1.206651724690857, + "learning_rate": 1.9665198766283444e-05, + "loss": 0.40947285294532776, + "step": 969 + }, + { + "epoch": 0.2576019120966671, + "grad_norm": 1.0651964473806064, + "learning_rate": 1.9664071200453726e-05, + "loss": 0.35868343710899353, + "step": 970 + }, + { + "epoch": 0.25786748107821006, + "grad_norm": 1.1330033214419297, + "learning_rate": 1.966294177150506e-05, + "loss": 0.3569234311580658, + "step": 971 + }, + { + "epoch": 0.258133050059753, + "grad_norm": 1.1641224987322216, + "learning_rate": 1.9661810479655184e-05, + "loss": 0.3381764888763428, + "step": 972 + }, + { + "epoch": 0.25839861904129596, + "grad_norm": 1.535927577191984, + "learning_rate": 1.9660677325122196e-05, + "loss": 0.39847785234451294, + "step": 973 + }, + { + "epoch": 0.2586641880228389, + "grad_norm": 0.9608622914302752, + "learning_rate": 1.965954230812456e-05, + "loss": 0.33162468671798706, + "step": 974 + }, + { + "epoch": 0.25892975700438187, + "grad_norm": 1.0421688584245348, + "learning_rate": 1.9658405428881087e-05, + "loss": 0.3627605438232422, + "step": 975 + }, + { + "epoch": 0.2591953259859248, + "grad_norm": 1.0501672081861986, + "learning_rate": 1.9657266687610965e-05, + "loss": 0.3253796100616455, + "step": 976 + }, + { + "epoch": 0.2594608949674678, + "grad_norm": 1.0198628618780734, + "learning_rate": 1.9656126084533716e-05, + "loss": 0.3341265916824341, + "step": 977 + }, + { + "epoch": 0.2597264639490108, + "grad_norm": 1.0202967346949672, + "learning_rate": 1.9654983619869242e-05, + "loss": 0.3714970052242279, + "step": 978 + }, + { + "epoch": 0.25999203293055373, + "grad_norm": 1.0333982958482495, + "learning_rate": 1.9653839293837798e-05, + "loss": 0.3360912501811981, + "step": 979 + }, + { + "epoch": 0.2602576019120967, + "grad_norm": 1.0322459892827835, + "learning_rate": 1.9652693106659995e-05, + "loss": 0.3780854642391205, + "step": 980 + }, + { + "epoch": 0.26052317089363963, + "grad_norm": 1.1062219940451128, + "learning_rate": 1.9651545058556803e-05, + "loss": 0.33595478534698486, + "step": 981 + }, + { + "epoch": 0.2607887398751826, + "grad_norm": 1.111464982167328, + "learning_rate": 1.965039514974955e-05, + "loss": 0.3608357012271881, + "step": 982 + }, + { + "epoch": 0.26105430885672554, + "grad_norm": 1.0024532391943957, + "learning_rate": 1.964924338045993e-05, + "loss": 0.3807666599750519, + "step": 983 + }, + { + "epoch": 0.2613198778382685, + "grad_norm": 1.0213030373156555, + "learning_rate": 1.964808975090999e-05, + "loss": 0.3551647663116455, + "step": 984 + }, + { + "epoch": 0.26158544681981144, + "grad_norm": 1.0761922389740786, + "learning_rate": 1.9646934261322135e-05, + "loss": 0.3771904706954956, + "step": 985 + }, + { + "epoch": 0.2618510158013544, + "grad_norm": 1.1925998045571422, + "learning_rate": 1.964577691191913e-05, + "loss": 0.41103222966194153, + "step": 986 + }, + { + "epoch": 0.26211658478289734, + "grad_norm": 1.0270282722515527, + "learning_rate": 1.9644617702924093e-05, + "loss": 0.34439292550086975, + "step": 987 + }, + { + "epoch": 0.2623821537644403, + "grad_norm": 1.1578988390038234, + "learning_rate": 1.9643456634560515e-05, + "loss": 0.41214391589164734, + "step": 988 + }, + { + "epoch": 0.26264772274598325, + "grad_norm": 0.9879567855265076, + "learning_rate": 1.9642293707052232e-05, + "loss": 0.3186502754688263, + "step": 989 + }, + { + "epoch": 0.2629132917275262, + "grad_norm": 1.039224300824638, + "learning_rate": 1.9641128920623438e-05, + "loss": 0.3534559905529022, + "step": 990 + }, + { + "epoch": 0.2631788607090692, + "grad_norm": 1.0867820667103292, + "learning_rate": 1.96399622754987e-05, + "loss": 0.35217320919036865, + "step": 991 + }, + { + "epoch": 0.26344442969061216, + "grad_norm": 0.954421559413849, + "learning_rate": 1.9638793771902924e-05, + "loss": 0.31661587953567505, + "step": 992 + }, + { + "epoch": 0.2637099986721551, + "grad_norm": 0.9881195075112362, + "learning_rate": 1.9637623410061392e-05, + "loss": 0.32468482851982117, + "step": 993 + }, + { + "epoch": 0.26397556765369806, + "grad_norm": 1.0355017939200293, + "learning_rate": 1.9636451190199727e-05, + "loss": 0.346771776676178, + "step": 994 + }, + { + "epoch": 0.264241136635241, + "grad_norm": 1.0997948902450267, + "learning_rate": 1.9635277112543928e-05, + "loss": 0.36409270763397217, + "step": 995 + }, + { + "epoch": 0.26450670561678397, + "grad_norm": 1.2132528670947562, + "learning_rate": 1.963410117732034e-05, + "loss": 0.404967725276947, + "step": 996 + }, + { + "epoch": 0.2647722745983269, + "grad_norm": 1.1962964423617835, + "learning_rate": 1.9632923384755666e-05, + "loss": 0.39506661891937256, + "step": 997 + }, + { + "epoch": 0.26503784357986987, + "grad_norm": 1.1967751692769375, + "learning_rate": 1.9631743735076972e-05, + "loss": 0.3833203911781311, + "step": 998 + }, + { + "epoch": 0.2653034125614128, + "grad_norm": 1.083140773107417, + "learning_rate": 1.9630562228511682e-05, + "loss": 0.34522518515586853, + "step": 999 + }, + { + "epoch": 0.2655689815429558, + "grad_norm": 1.1367328076589556, + "learning_rate": 1.962937886528758e-05, + "loss": 0.3818400800228119, + "step": 1000 + }, + { + "epoch": 0.2658345505244987, + "grad_norm": 1.2496699132911573, + "learning_rate": 1.9628193645632796e-05, + "loss": 0.40827828645706177, + "step": 1001 + }, + { + "epoch": 0.2661001195060417, + "grad_norm": 1.0406728708542907, + "learning_rate": 1.962700656977583e-05, + "loss": 0.3448852002620697, + "step": 1002 + }, + { + "epoch": 0.26636568848758463, + "grad_norm": 1.1035895986897222, + "learning_rate": 1.9625817637945542e-05, + "loss": 0.36560773849487305, + "step": 1003 + }, + { + "epoch": 0.2666312574691276, + "grad_norm": 1.1637977684704512, + "learning_rate": 1.962462685037114e-05, + "loss": 0.38305893540382385, + "step": 1004 + }, + { + "epoch": 0.2668968264506706, + "grad_norm": 1.0320363555261158, + "learning_rate": 1.962343420728219e-05, + "loss": 0.3562568426132202, + "step": 1005 + }, + { + "epoch": 0.26716239543221354, + "grad_norm": 1.18312934129538, + "learning_rate": 1.9622239708908626e-05, + "loss": 0.37458860874176025, + "step": 1006 + }, + { + "epoch": 0.2674279644137565, + "grad_norm": 1.058042672523148, + "learning_rate": 1.9621043355480726e-05, + "loss": 0.35852503776550293, + "step": 1007 + }, + { + "epoch": 0.26769353339529944, + "grad_norm": 1.0975239398171568, + "learning_rate": 1.961984514722914e-05, + "loss": 0.4056578278541565, + "step": 1008 + }, + { + "epoch": 0.2679591023768424, + "grad_norm": 1.1773057151207822, + "learning_rate": 1.9618645084384863e-05, + "loss": 0.4531296491622925, + "step": 1009 + }, + { + "epoch": 0.26822467135838535, + "grad_norm": 0.9095840908563808, + "learning_rate": 1.9617443167179256e-05, + "loss": 0.3356376886367798, + "step": 1010 + }, + { + "epoch": 0.2684902403399283, + "grad_norm": 1.09880831555839, + "learning_rate": 1.9616239395844033e-05, + "loss": 0.38045161962509155, + "step": 1011 + }, + { + "epoch": 0.26875580932147125, + "grad_norm": 1.028451509847456, + "learning_rate": 1.9615033770611268e-05, + "loss": 0.3549511730670929, + "step": 1012 + }, + { + "epoch": 0.2690213783030142, + "grad_norm": 1.0546213631772847, + "learning_rate": 1.9613826291713393e-05, + "loss": 0.33363252878189087, + "step": 1013 + }, + { + "epoch": 0.26928694728455715, + "grad_norm": 0.9539256345754278, + "learning_rate": 1.961261695938319e-05, + "loss": 0.3443339467048645, + "step": 1014 + }, + { + "epoch": 0.2695525162661001, + "grad_norm": 0.9897755385014708, + "learning_rate": 1.9611405773853807e-05, + "loss": 0.3258364796638489, + "step": 1015 + }, + { + "epoch": 0.26981808524764306, + "grad_norm": 1.0357196980681809, + "learning_rate": 1.961019273535875e-05, + "loss": 0.357122540473938, + "step": 1016 + }, + { + "epoch": 0.270083654229186, + "grad_norm": 0.9668495504097999, + "learning_rate": 1.9608977844131875e-05, + "loss": 0.32092082500457764, + "step": 1017 + }, + { + "epoch": 0.27034922321072896, + "grad_norm": 1.0067299219043435, + "learning_rate": 1.96077611004074e-05, + "loss": 0.36354511976242065, + "step": 1018 + }, + { + "epoch": 0.27061479219227197, + "grad_norm": 1.0982243281899924, + "learning_rate": 1.9606542504419895e-05, + "loss": 0.37128758430480957, + "step": 1019 + }, + { + "epoch": 0.2708803611738149, + "grad_norm": 1.1112959838703056, + "learning_rate": 1.9605322056404294e-05, + "loss": 0.3732859790325165, + "step": 1020 + }, + { + "epoch": 0.2711459301553579, + "grad_norm": 1.0058814849372155, + "learning_rate": 1.9604099756595885e-05, + "loss": 0.32642674446105957, + "step": 1021 + }, + { + "epoch": 0.2714114991369008, + "grad_norm": 1.10371255398192, + "learning_rate": 1.9602875605230313e-05, + "loss": 0.376791775226593, + "step": 1022 + }, + { + "epoch": 0.2716770681184438, + "grad_norm": 1.0603007725295257, + "learning_rate": 1.960164960254358e-05, + "loss": 0.34514784812927246, + "step": 1023 + }, + { + "epoch": 0.27194263709998673, + "grad_norm": 1.225533197470795, + "learning_rate": 1.9600421748772044e-05, + "loss": 0.3752189576625824, + "step": 1024 + }, + { + "epoch": 0.2722082060815297, + "grad_norm": 1.0783483670765837, + "learning_rate": 1.959919204415242e-05, + "loss": 0.33100831508636475, + "step": 1025 + }, + { + "epoch": 0.27247377506307263, + "grad_norm": 1.1910668751599112, + "learning_rate": 1.9597960488921785e-05, + "loss": 0.42713654041290283, + "step": 1026 + }, + { + "epoch": 0.2727393440446156, + "grad_norm": 1.110777223027095, + "learning_rate": 1.9596727083317565e-05, + "loss": 0.3746519684791565, + "step": 1027 + }, + { + "epoch": 0.27300491302615854, + "grad_norm": 1.1133725792972708, + "learning_rate": 1.9595491827577543e-05, + "loss": 0.39962098002433777, + "step": 1028 + }, + { + "epoch": 0.2732704820077015, + "grad_norm": 1.0544310192284179, + "learning_rate": 1.9594254721939866e-05, + "loss": 0.35112401843070984, + "step": 1029 + }, + { + "epoch": 0.27353605098924444, + "grad_norm": 1.0749153592990304, + "learning_rate": 1.9593015766643037e-05, + "loss": 0.3648139238357544, + "step": 1030 + }, + { + "epoch": 0.2738016199707874, + "grad_norm": 1.0268996180520502, + "learning_rate": 1.9591774961925902e-05, + "loss": 0.31544098258018494, + "step": 1031 + }, + { + "epoch": 0.27406718895233034, + "grad_norm": 1.1260952074052377, + "learning_rate": 1.959053230802768e-05, + "loss": 0.3593738079071045, + "step": 1032 + }, + { + "epoch": 0.27433275793387335, + "grad_norm": 1.1009303195981317, + "learning_rate": 1.958928780518794e-05, + "loss": 0.39784368872642517, + "step": 1033 + }, + { + "epoch": 0.2745983269154163, + "grad_norm": 1.1304731324804922, + "learning_rate": 1.9588041453646606e-05, + "loss": 0.3869936168193817, + "step": 1034 + }, + { + "epoch": 0.27486389589695925, + "grad_norm": 0.9803124730292929, + "learning_rate": 1.958679325364396e-05, + "loss": 0.31108593940734863, + "step": 1035 + }, + { + "epoch": 0.2751294648785022, + "grad_norm": 1.098791994520666, + "learning_rate": 1.958554320542064e-05, + "loss": 0.3917708098888397, + "step": 1036 + }, + { + "epoch": 0.27539503386004516, + "grad_norm": 0.9969159455112034, + "learning_rate": 1.958429130921764e-05, + "loss": 0.36782944202423096, + "step": 1037 + }, + { + "epoch": 0.2756606028415881, + "grad_norm": 0.9381100088398062, + "learning_rate": 1.9583037565276314e-05, + "loss": 0.36196422576904297, + "step": 1038 + }, + { + "epoch": 0.27592617182313106, + "grad_norm": 1.0783473143219733, + "learning_rate": 1.9581781973838368e-05, + "loss": 0.32208555936813354, + "step": 1039 + }, + { + "epoch": 0.276191740804674, + "grad_norm": 0.9653316626874986, + "learning_rate": 1.958052453514586e-05, + "loss": 0.33451759815216064, + "step": 1040 + }, + { + "epoch": 0.27645730978621696, + "grad_norm": 1.0328342572912144, + "learning_rate": 1.9579265249441216e-05, + "loss": 0.3228047788143158, + "step": 1041 + }, + { + "epoch": 0.2767228787677599, + "grad_norm": 1.0944658380016739, + "learning_rate": 1.957800411696721e-05, + "loss": 0.36992791295051575, + "step": 1042 + }, + { + "epoch": 0.27698844774930287, + "grad_norm": 0.9799580951396849, + "learning_rate": 1.9576741137966967e-05, + "loss": 0.3072342276573181, + "step": 1043 + }, + { + "epoch": 0.2772540167308458, + "grad_norm": 1.0637046756594408, + "learning_rate": 1.9575476312683985e-05, + "loss": 0.3372080326080322, + "step": 1044 + }, + { + "epoch": 0.27751958571238877, + "grad_norm": 1.0509701364189301, + "learning_rate": 1.95742096413621e-05, + "loss": 0.34725332260131836, + "step": 1045 + }, + { + "epoch": 0.2777851546939317, + "grad_norm": 1.1053591471100805, + "learning_rate": 1.9572941124245516e-05, + "loss": 0.36714982986450195, + "step": 1046 + }, + { + "epoch": 0.27805072367547473, + "grad_norm": 1.208127444221669, + "learning_rate": 1.957167076157878e-05, + "loss": 0.4163498282432556, + "step": 1047 + }, + { + "epoch": 0.2783162926570177, + "grad_norm": 1.1861975128714084, + "learning_rate": 1.9570398553606815e-05, + "loss": 0.40059348940849304, + "step": 1048 + }, + { + "epoch": 0.27858186163856063, + "grad_norm": 1.085993120538819, + "learning_rate": 1.956912450057488e-05, + "loss": 0.3622320294380188, + "step": 1049 + }, + { + "epoch": 0.2788474306201036, + "grad_norm": 1.1326017870689584, + "learning_rate": 1.9567848602728595e-05, + "loss": 0.35159534215927124, + "step": 1050 + }, + { + "epoch": 0.27911299960164654, + "grad_norm": 0.9516936878211085, + "learning_rate": 1.9566570860313944e-05, + "loss": 0.3093762993812561, + "step": 1051 + }, + { + "epoch": 0.2793785685831895, + "grad_norm": 1.040326152894859, + "learning_rate": 1.9565291273577255e-05, + "loss": 0.341474324464798, + "step": 1052 + }, + { + "epoch": 0.27964413756473244, + "grad_norm": 1.0885626452470811, + "learning_rate": 1.9564009842765225e-05, + "loss": 0.35376566648483276, + "step": 1053 + }, + { + "epoch": 0.2799097065462754, + "grad_norm": 1.09154548256864, + "learning_rate": 1.9562726568124892e-05, + "loss": 0.3487662374973297, + "step": 1054 + }, + { + "epoch": 0.28017527552781835, + "grad_norm": 1.014222924008021, + "learning_rate": 1.956144144990366e-05, + "loss": 0.3610745370388031, + "step": 1055 + }, + { + "epoch": 0.2804408445093613, + "grad_norm": 0.9789890869027496, + "learning_rate": 1.9560154488349284e-05, + "loss": 0.33230137825012207, + "step": 1056 + }, + { + "epoch": 0.28070641349090425, + "grad_norm": 1.0104241821081763, + "learning_rate": 1.9558865683709875e-05, + "loss": 0.310351699590683, + "step": 1057 + }, + { + "epoch": 0.2809719824724472, + "grad_norm": 1.1188708821966176, + "learning_rate": 1.9557575036233897e-05, + "loss": 0.39930224418640137, + "step": 1058 + }, + { + "epoch": 0.28123755145399015, + "grad_norm": 1.0498907782820184, + "learning_rate": 1.955628254617017e-05, + "loss": 0.3345295488834381, + "step": 1059 + }, + { + "epoch": 0.2815031204355331, + "grad_norm": 1.1059864789744056, + "learning_rate": 1.9554988213767875e-05, + "loss": 0.37963107228279114, + "step": 1060 + }, + { + "epoch": 0.2817686894170761, + "grad_norm": 1.0825219178132603, + "learning_rate": 1.9553692039276545e-05, + "loss": 0.3923654854297638, + "step": 1061 + }, + { + "epoch": 0.28203425839861906, + "grad_norm": 1.0736283126776336, + "learning_rate": 1.9552394022946068e-05, + "loss": 0.363646924495697, + "step": 1062 + }, + { + "epoch": 0.282299827380162, + "grad_norm": 1.1051684289136041, + "learning_rate": 1.9551094165026677e-05, + "loss": 0.35486382246017456, + "step": 1063 + }, + { + "epoch": 0.28256539636170497, + "grad_norm": 1.0845117937449689, + "learning_rate": 1.954979246576898e-05, + "loss": 0.35215455293655396, + "step": 1064 + }, + { + "epoch": 0.2828309653432479, + "grad_norm": 1.1587243435425785, + "learning_rate": 1.9548488925423924e-05, + "loss": 0.3936809003353119, + "step": 1065 + }, + { + "epoch": 0.28309653432479087, + "grad_norm": 1.0399965264634783, + "learning_rate": 1.9547183544242817e-05, + "loss": 0.36852866411209106, + "step": 1066 + }, + { + "epoch": 0.2833621033063338, + "grad_norm": 1.0679817467710029, + "learning_rate": 1.954587632247732e-05, + "loss": 0.3552001714706421, + "step": 1067 + }, + { + "epoch": 0.2836276722878768, + "grad_norm": 1.1330169189394568, + "learning_rate": 1.9544567260379455e-05, + "loss": 0.3684498965740204, + "step": 1068 + }, + { + "epoch": 0.2838932412694197, + "grad_norm": 0.9857931835351914, + "learning_rate": 1.9543256358201586e-05, + "loss": 0.3367026448249817, + "step": 1069 + }, + { + "epoch": 0.2841588102509627, + "grad_norm": 1.0677692738667734, + "learning_rate": 1.9541943616196443e-05, + "loss": 0.3702335059642792, + "step": 1070 + }, + { + "epoch": 0.28442437923250563, + "grad_norm": 1.1114119189633371, + "learning_rate": 1.9540629034617108e-05, + "loss": 0.3430984318256378, + "step": 1071 + }, + { + "epoch": 0.2846899482140486, + "grad_norm": 1.1406170357402363, + "learning_rate": 1.953931261371702e-05, + "loss": 0.36514735221862793, + "step": 1072 + }, + { + "epoch": 0.28495551719559153, + "grad_norm": 1.0428104806049732, + "learning_rate": 1.9537994353749963e-05, + "loss": 0.3524945080280304, + "step": 1073 + }, + { + "epoch": 0.2852210861771345, + "grad_norm": 1.0283973360981475, + "learning_rate": 1.9536674254970088e-05, + "loss": 0.32405683398246765, + "step": 1074 + }, + { + "epoch": 0.2854866551586775, + "grad_norm": 1.0649875575316718, + "learning_rate": 1.9535352317631888e-05, + "loss": 0.30863165855407715, + "step": 1075 + }, + { + "epoch": 0.28575222414022045, + "grad_norm": 1.0647565002745494, + "learning_rate": 1.953402854199022e-05, + "loss": 0.34343889355659485, + "step": 1076 + }, + { + "epoch": 0.2860177931217634, + "grad_norm": 1.2339349330872973, + "learning_rate": 1.9532702928300292e-05, + "loss": 0.3639434576034546, + "step": 1077 + }, + { + "epoch": 0.28628336210330635, + "grad_norm": 1.0888261251069975, + "learning_rate": 1.9531375476817667e-05, + "loss": 0.3380300998687744, + "step": 1078 + }, + { + "epoch": 0.2865489310848493, + "grad_norm": 1.1078839119175599, + "learning_rate": 1.9530046187798267e-05, + "loss": 0.3323265016078949, + "step": 1079 + }, + { + "epoch": 0.28681450006639225, + "grad_norm": 1.0529271541493659, + "learning_rate": 1.9528715061498355e-05, + "loss": 0.3439220190048218, + "step": 1080 + }, + { + "epoch": 0.2870800690479352, + "grad_norm": 1.088357435010649, + "learning_rate": 1.952738209817456e-05, + "loss": 0.36376965045928955, + "step": 1081 + }, + { + "epoch": 0.28734563802947816, + "grad_norm": 1.0188116446188513, + "learning_rate": 1.952604729808386e-05, + "loss": 0.3281211853027344, + "step": 1082 + }, + { + "epoch": 0.2876112070110211, + "grad_norm": 1.0999135645201878, + "learning_rate": 1.9524710661483594e-05, + "loss": 0.3538089990615845, + "step": 1083 + }, + { + "epoch": 0.28787677599256406, + "grad_norm": 1.1475903462769852, + "learning_rate": 1.9523372188631442e-05, + "loss": 0.3982803225517273, + "step": 1084 + }, + { + "epoch": 0.288142344974107, + "grad_norm": 1.11408923860859, + "learning_rate": 1.9522031879785453e-05, + "loss": 0.3958810567855835, + "step": 1085 + }, + { + "epoch": 0.28840791395564996, + "grad_norm": 1.191451776763126, + "learning_rate": 1.9520689735204016e-05, + "loss": 0.40133988857269287, + "step": 1086 + }, + { + "epoch": 0.2886734829371929, + "grad_norm": 1.048862195613205, + "learning_rate": 1.9519345755145886e-05, + "loss": 0.32411646842956543, + "step": 1087 + }, + { + "epoch": 0.28893905191873587, + "grad_norm": 1.210003646730205, + "learning_rate": 1.9517999939870166e-05, + "loss": 0.38678207993507385, + "step": 1088 + }, + { + "epoch": 0.2892046209002789, + "grad_norm": 1.0663258874668164, + "learning_rate": 1.951665228963631e-05, + "loss": 0.36829686164855957, + "step": 1089 + }, + { + "epoch": 0.2894701898818218, + "grad_norm": 0.9884592653808488, + "learning_rate": 1.9515302804704134e-05, + "loss": 0.38631704449653625, + "step": 1090 + }, + { + "epoch": 0.2897357588633648, + "grad_norm": 1.1934503112083867, + "learning_rate": 1.9513951485333798e-05, + "loss": 0.39288902282714844, + "step": 1091 + }, + { + "epoch": 0.29000132784490773, + "grad_norm": 1.0804742457342014, + "learning_rate": 1.9512598331785822e-05, + "loss": 0.3655658960342407, + "step": 1092 + }, + { + "epoch": 0.2902668968264507, + "grad_norm": 0.9929300268939649, + "learning_rate": 1.9511243344321076e-05, + "loss": 0.3263852596282959, + "step": 1093 + }, + { + "epoch": 0.29053246580799363, + "grad_norm": 1.1166275426043832, + "learning_rate": 1.9509886523200792e-05, + "loss": 0.37939125299453735, + "step": 1094 + }, + { + "epoch": 0.2907980347895366, + "grad_norm": 1.074761796186792, + "learning_rate": 1.9508527868686543e-05, + "loss": 0.34218865633010864, + "step": 1095 + }, + { + "epoch": 0.29106360377107954, + "grad_norm": 1.036633851483027, + "learning_rate": 1.9507167381040263e-05, + "loss": 0.368261456489563, + "step": 1096 + }, + { + "epoch": 0.2913291727526225, + "grad_norm": 1.083724731335207, + "learning_rate": 1.950580506052424e-05, + "loss": 0.36133286356925964, + "step": 1097 + }, + { + "epoch": 0.29159474173416544, + "grad_norm": 1.0542758401630365, + "learning_rate": 1.9504440907401113e-05, + "loss": 0.3667418658733368, + "step": 1098 + }, + { + "epoch": 0.2918603107157084, + "grad_norm": 0.9961595646698646, + "learning_rate": 1.950307492193387e-05, + "loss": 0.34444570541381836, + "step": 1099 + }, + { + "epoch": 0.29212587969725134, + "grad_norm": 1.1203470867439278, + "learning_rate": 1.9501707104385863e-05, + "loss": 0.41261589527130127, + "step": 1100 + }, + { + "epoch": 0.2923914486787943, + "grad_norm": 1.0847270622391922, + "learning_rate": 1.9500337455020788e-05, + "loss": 0.3762981593608856, + "step": 1101 + }, + { + "epoch": 0.29265701766033725, + "grad_norm": 1.108635996430537, + "learning_rate": 1.9498965974102697e-05, + "loss": 0.3527417480945587, + "step": 1102 + }, + { + "epoch": 0.29292258664188026, + "grad_norm": 1.1555485155020386, + "learning_rate": 1.9497592661895996e-05, + "loss": 0.34812286496162415, + "step": 1103 + }, + { + "epoch": 0.2931881556234232, + "grad_norm": 0.9844968948580171, + "learning_rate": 1.9496217518665444e-05, + "loss": 0.33663398027420044, + "step": 1104 + }, + { + "epoch": 0.29345372460496616, + "grad_norm": 0.997090208380272, + "learning_rate": 1.9494840544676156e-05, + "loss": 0.3632991313934326, + "step": 1105 + }, + { + "epoch": 0.2937192935865091, + "grad_norm": 1.3515018592791732, + "learning_rate": 1.9493461740193587e-05, + "loss": 0.37389490008354187, + "step": 1106 + }, + { + "epoch": 0.29398486256805206, + "grad_norm": 1.204356467911551, + "learning_rate": 1.949208110548356e-05, + "loss": 0.3634020686149597, + "step": 1107 + }, + { + "epoch": 0.294250431549595, + "grad_norm": 1.0778805299295515, + "learning_rate": 1.9490698640812247e-05, + "loss": 0.36032742261886597, + "step": 1108 + }, + { + "epoch": 0.29451600053113797, + "grad_norm": 1.1504972318858309, + "learning_rate": 1.9489314346446164e-05, + "loss": 0.3385765552520752, + "step": 1109 + }, + { + "epoch": 0.2947815695126809, + "grad_norm": 1.0946200184976398, + "learning_rate": 1.9487928222652195e-05, + "loss": 0.3751915991306305, + "step": 1110 + }, + { + "epoch": 0.29504713849422387, + "grad_norm": 1.0903856446796527, + "learning_rate": 1.9486540269697564e-05, + "loss": 0.36069825291633606, + "step": 1111 + }, + { + "epoch": 0.2953127074757668, + "grad_norm": 1.009573568422265, + "learning_rate": 1.948515048784985e-05, + "loss": 0.32703787088394165, + "step": 1112 + }, + { + "epoch": 0.2955782764573098, + "grad_norm": 0.9196963642088989, + "learning_rate": 1.948375887737699e-05, + "loss": 0.312494158744812, + "step": 1113 + }, + { + "epoch": 0.2958438454388527, + "grad_norm": 0.9880564768480579, + "learning_rate": 1.9482365438547272e-05, + "loss": 0.30626165866851807, + "step": 1114 + }, + { + "epoch": 0.2961094144203957, + "grad_norm": 1.07827456569524, + "learning_rate": 1.948097017162933e-05, + "loss": 0.3625817894935608, + "step": 1115 + }, + { + "epoch": 0.29637498340193863, + "grad_norm": 1.1789711489550672, + "learning_rate": 1.9479573076892152e-05, + "loss": 0.38403773307800293, + "step": 1116 + }, + { + "epoch": 0.2966405523834816, + "grad_norm": 1.0638061154391991, + "learning_rate": 1.9478174154605093e-05, + "loss": 0.3645164966583252, + "step": 1117 + }, + { + "epoch": 0.2969061213650246, + "grad_norm": 1.0428170431433939, + "learning_rate": 1.9476773405037836e-05, + "loss": 0.3714389503002167, + "step": 1118 + }, + { + "epoch": 0.29717169034656754, + "grad_norm": 1.1488169814057956, + "learning_rate": 1.9475370828460436e-05, + "loss": 0.39809900522232056, + "step": 1119 + }, + { + "epoch": 0.2974372593281105, + "grad_norm": 1.0702503358715294, + "learning_rate": 1.9473966425143292e-05, + "loss": 0.3698490262031555, + "step": 1120 + }, + { + "epoch": 0.29770282830965344, + "grad_norm": 1.0166542138266799, + "learning_rate": 1.947256019535716e-05, + "loss": 0.3072658181190491, + "step": 1121 + }, + { + "epoch": 0.2979683972911964, + "grad_norm": 1.0479599499698302, + "learning_rate": 1.947115213937314e-05, + "loss": 0.3294365406036377, + "step": 1122 + }, + { + "epoch": 0.29823396627273935, + "grad_norm": 1.007749929257712, + "learning_rate": 1.9469742257462684e-05, + "loss": 0.34933674335479736, + "step": 1123 + }, + { + "epoch": 0.2984995352542823, + "grad_norm": 1.133473784296847, + "learning_rate": 1.946833054989761e-05, + "loss": 0.34586772322654724, + "step": 1124 + }, + { + "epoch": 0.29876510423582525, + "grad_norm": 1.0225090189343862, + "learning_rate": 1.9466917016950076e-05, + "loss": 0.33158159255981445, + "step": 1125 + }, + { + "epoch": 0.2990306732173682, + "grad_norm": 1.0162208348084125, + "learning_rate": 1.946550165889259e-05, + "loss": 0.32665887475013733, + "step": 1126 + }, + { + "epoch": 0.29929624219891116, + "grad_norm": 1.1065475895733048, + "learning_rate": 1.946408447599802e-05, + "loss": 0.3333032429218292, + "step": 1127 + }, + { + "epoch": 0.2995618111804541, + "grad_norm": 1.0958997421479173, + "learning_rate": 1.9462665468539582e-05, + "loss": 0.3747228980064392, + "step": 1128 + }, + { + "epoch": 0.29982738016199706, + "grad_norm": 0.9447906277138843, + "learning_rate": 1.9461244636790845e-05, + "loss": 0.34040436148643494, + "step": 1129 + }, + { + "epoch": 0.30009294914354, + "grad_norm": 1.0062775259583612, + "learning_rate": 1.9459821981025723e-05, + "loss": 0.3279584050178528, + "step": 1130 + }, + { + "epoch": 0.30035851812508296, + "grad_norm": 1.136819731097147, + "learning_rate": 1.9458397501518496e-05, + "loss": 0.33507707715034485, + "step": 1131 + }, + { + "epoch": 0.30062408710662597, + "grad_norm": 0.9978141677663763, + "learning_rate": 1.945697119854378e-05, + "loss": 0.3511529862880707, + "step": 1132 + }, + { + "epoch": 0.3008896560881689, + "grad_norm": 1.1038696900269844, + "learning_rate": 1.945554307237655e-05, + "loss": 0.33260345458984375, + "step": 1133 + }, + { + "epoch": 0.3011552250697119, + "grad_norm": 1.1267244347055163, + "learning_rate": 1.9454113123292133e-05, + "loss": 0.37698423862457275, + "step": 1134 + }, + { + "epoch": 0.3014207940512548, + "grad_norm": 1.0482054605062838, + "learning_rate": 1.945268135156621e-05, + "loss": 0.34843316674232483, + "step": 1135 + }, + { + "epoch": 0.3016863630327978, + "grad_norm": 1.1518938911568848, + "learning_rate": 1.9451247757474805e-05, + "loss": 0.38723987340927124, + "step": 1136 + }, + { + "epoch": 0.30195193201434073, + "grad_norm": 1.0597410032778982, + "learning_rate": 1.9449812341294302e-05, + "loss": 0.3836795389652252, + "step": 1137 + }, + { + "epoch": 0.3022175009958837, + "grad_norm": 0.9828275773453091, + "learning_rate": 1.9448375103301424e-05, + "loss": 0.3362433612346649, + "step": 1138 + }, + { + "epoch": 0.30248306997742663, + "grad_norm": 1.0750556057741842, + "learning_rate": 1.9446936043773264e-05, + "loss": 0.3615792393684387, + "step": 1139 + }, + { + "epoch": 0.3027486389589696, + "grad_norm": 1.0233339727957385, + "learning_rate": 1.944549516298725e-05, + "loss": 0.33693915605545044, + "step": 1140 + }, + { + "epoch": 0.30301420794051254, + "grad_norm": 1.0074205515838075, + "learning_rate": 1.9444052461221167e-05, + "loss": 0.32611170411109924, + "step": 1141 + }, + { + "epoch": 0.3032797769220555, + "grad_norm": 1.0257687736898828, + "learning_rate": 1.9442607938753153e-05, + "loss": 0.3504132032394409, + "step": 1142 + }, + { + "epoch": 0.30354534590359844, + "grad_norm": 1.081217851264946, + "learning_rate": 1.944116159586169e-05, + "loss": 0.3598168194293976, + "step": 1143 + }, + { + "epoch": 0.3038109148851414, + "grad_norm": 1.025673115447757, + "learning_rate": 1.9439713432825625e-05, + "loss": 0.33447909355163574, + "step": 1144 + }, + { + "epoch": 0.30407648386668434, + "grad_norm": 0.9795127759513904, + "learning_rate": 1.943826344992414e-05, + "loss": 0.34026333689689636, + "step": 1145 + }, + { + "epoch": 0.30434205284822735, + "grad_norm": 1.070042442644686, + "learning_rate": 1.9436811647436772e-05, + "loss": 0.323203980922699, + "step": 1146 + }, + { + "epoch": 0.3046076218297703, + "grad_norm": 1.0588861737680213, + "learning_rate": 1.943535802564342e-05, + "loss": 0.332398921251297, + "step": 1147 + }, + { + "epoch": 0.30487319081131325, + "grad_norm": 1.175168490214782, + "learning_rate": 1.9433902584824316e-05, + "loss": 0.3882995545864105, + "step": 1148 + }, + { + "epoch": 0.3051387597928562, + "grad_norm": 1.093435738226519, + "learning_rate": 1.943244532526006e-05, + "loss": 0.35262739658355713, + "step": 1149 + }, + { + "epoch": 0.30540432877439916, + "grad_norm": 1.1043029209432185, + "learning_rate": 1.9430986247231586e-05, + "loss": 0.39694511890411377, + "step": 1150 + }, + { + "epoch": 0.3056698977559421, + "grad_norm": 1.1276348856512544, + "learning_rate": 1.9429525351020197e-05, + "loss": 0.3692580759525299, + "step": 1151 + }, + { + "epoch": 0.30593546673748506, + "grad_norm": 1.1284903074468042, + "learning_rate": 1.9428062636907526e-05, + "loss": 0.3685402572154999, + "step": 1152 + }, + { + "epoch": 0.306201035719028, + "grad_norm": 1.1120189967723886, + "learning_rate": 1.9426598105175575e-05, + "loss": 0.37557253241539, + "step": 1153 + }, + { + "epoch": 0.30646660470057097, + "grad_norm": 0.9544414078231065, + "learning_rate": 1.9425131756106687e-05, + "loss": 0.3323203921318054, + "step": 1154 + }, + { + "epoch": 0.3067321736821139, + "grad_norm": 1.085159318227953, + "learning_rate": 1.9423663589983554e-05, + "loss": 0.37262290716171265, + "step": 1155 + }, + { + "epoch": 0.30699774266365687, + "grad_norm": 1.138203326668225, + "learning_rate": 1.9422193607089224e-05, + "loss": 0.36621618270874023, + "step": 1156 + }, + { + "epoch": 0.3072633116451998, + "grad_norm": 1.0326975743253168, + "learning_rate": 1.942072180770709e-05, + "loss": 0.3844982385635376, + "step": 1157 + }, + { + "epoch": 0.3075288806267428, + "grad_norm": 0.9983252957319158, + "learning_rate": 1.94192481921209e-05, + "loss": 0.3229531943798065, + "step": 1158 + }, + { + "epoch": 0.3077944496082857, + "grad_norm": 1.0805327657153956, + "learning_rate": 1.9417772760614745e-05, + "loss": 0.34862661361694336, + "step": 1159 + }, + { + "epoch": 0.30806001858982873, + "grad_norm": 1.0329581193958253, + "learning_rate": 1.941629551347308e-05, + "loss": 0.35496509075164795, + "step": 1160 + }, + { + "epoch": 0.3083255875713717, + "grad_norm": 1.051163133463375, + "learning_rate": 1.9414816450980686e-05, + "loss": 0.3695065975189209, + "step": 1161 + }, + { + "epoch": 0.30859115655291464, + "grad_norm": 1.0254769076684076, + "learning_rate": 1.9413335573422723e-05, + "loss": 0.3472525179386139, + "step": 1162 + }, + { + "epoch": 0.3088567255344576, + "grad_norm": 1.008969123299064, + "learning_rate": 1.9411852881084683e-05, + "loss": 0.3447483479976654, + "step": 1163 + }, + { + "epoch": 0.30912229451600054, + "grad_norm": 0.9333424416365893, + "learning_rate": 1.941036837425241e-05, + "loss": 0.31047824025154114, + "step": 1164 + }, + { + "epoch": 0.3093878634975435, + "grad_norm": 1.0570471012152007, + "learning_rate": 1.9408882053212094e-05, + "loss": 0.34502410888671875, + "step": 1165 + }, + { + "epoch": 0.30965343247908644, + "grad_norm": 1.1849442151759089, + "learning_rate": 1.940739391825029e-05, + "loss": 0.3663109540939331, + "step": 1166 + }, + { + "epoch": 0.3099190014606294, + "grad_norm": 1.1136723468346887, + "learning_rate": 1.9405903969653887e-05, + "loss": 0.3635792136192322, + "step": 1167 + }, + { + "epoch": 0.31018457044217235, + "grad_norm": 1.0769441486287206, + "learning_rate": 1.940441220771013e-05, + "loss": 0.359528124332428, + "step": 1168 + }, + { + "epoch": 0.3104501394237153, + "grad_norm": 1.043185528474707, + "learning_rate": 1.9402918632706618e-05, + "loss": 0.32566630840301514, + "step": 1169 + }, + { + "epoch": 0.31071570840525825, + "grad_norm": 1.0286897614370414, + "learning_rate": 1.940142324493129e-05, + "loss": 0.34758460521698, + "step": 1170 + }, + { + "epoch": 0.3109812773868012, + "grad_norm": 1.0148570847451444, + "learning_rate": 1.9399926044672438e-05, + "loss": 0.3484055995941162, + "step": 1171 + }, + { + "epoch": 0.31124684636834415, + "grad_norm": 1.1806099587394492, + "learning_rate": 1.93984270322187e-05, + "loss": 0.41958773136138916, + "step": 1172 + }, + { + "epoch": 0.3115124153498871, + "grad_norm": 1.085314216258339, + "learning_rate": 1.9396926207859085e-05, + "loss": 0.3578398525714874, + "step": 1173 + }, + { + "epoch": 0.3117779843314301, + "grad_norm": 1.0721505496116728, + "learning_rate": 1.9395423571882917e-05, + "loss": 0.38140422105789185, + "step": 1174 + }, + { + "epoch": 0.31204355331297307, + "grad_norm": 1.1224661464468277, + "learning_rate": 1.9393919124579898e-05, + "loss": 0.3782861828804016, + "step": 1175 + }, + { + "epoch": 0.312309122294516, + "grad_norm": 1.0482874367837718, + "learning_rate": 1.939241286624006e-05, + "loss": 0.3211040496826172, + "step": 1176 + }, + { + "epoch": 0.31257469127605897, + "grad_norm": 0.9909015391020882, + "learning_rate": 1.9390904797153795e-05, + "loss": 0.3090783953666687, + "step": 1177 + }, + { + "epoch": 0.3128402602576019, + "grad_norm": 1.0203166402095418, + "learning_rate": 1.938939491761184e-05, + "loss": 0.3542889654636383, + "step": 1178 + }, + { + "epoch": 0.3131058292391449, + "grad_norm": 1.016567110972503, + "learning_rate": 1.9387883227905285e-05, + "loss": 0.369164377450943, + "step": 1179 + }, + { + "epoch": 0.3133713982206878, + "grad_norm": 1.1492868354113897, + "learning_rate": 1.9386369728325562e-05, + "loss": 0.35200801491737366, + "step": 1180 + }, + { + "epoch": 0.3136369672022308, + "grad_norm": 1.1332626811675575, + "learning_rate": 1.9384854419164454e-05, + "loss": 0.3696276843547821, + "step": 1181 + }, + { + "epoch": 0.31390253618377373, + "grad_norm": 0.9856387823657043, + "learning_rate": 1.9383337300714104e-05, + "loss": 0.3403652012348175, + "step": 1182 + }, + { + "epoch": 0.3141681051653167, + "grad_norm": 0.9608300998441986, + "learning_rate": 1.9381818373266987e-05, + "loss": 0.3307063579559326, + "step": 1183 + }, + { + "epoch": 0.31443367414685963, + "grad_norm": 1.002604353314113, + "learning_rate": 1.9380297637115933e-05, + "loss": 0.3223465085029602, + "step": 1184 + }, + { + "epoch": 0.3146992431284026, + "grad_norm": 1.1668926481270334, + "learning_rate": 1.9378775092554124e-05, + "loss": 0.4013838768005371, + "step": 1185 + }, + { + "epoch": 0.31496481210994554, + "grad_norm": 1.2376602965184098, + "learning_rate": 1.9377250739875095e-05, + "loss": 0.3596574664115906, + "step": 1186 + }, + { + "epoch": 0.3152303810914885, + "grad_norm": 1.0683740579575798, + "learning_rate": 1.937572457937271e-05, + "loss": 0.41639968752861023, + "step": 1187 + }, + { + "epoch": 0.3154959500730315, + "grad_norm": 0.950341293536979, + "learning_rate": 1.9374196611341212e-05, + "loss": 0.3001318573951721, + "step": 1188 + }, + { + "epoch": 0.31576151905457445, + "grad_norm": 1.0390515723802394, + "learning_rate": 1.937266683607516e-05, + "loss": 0.33238667249679565, + "step": 1189 + }, + { + "epoch": 0.3160270880361174, + "grad_norm": 1.0559788990716998, + "learning_rate": 1.9371135253869483e-05, + "loss": 0.33638086915016174, + "step": 1190 + }, + { + "epoch": 0.31629265701766035, + "grad_norm": 1.0736881782093415, + "learning_rate": 1.9369601865019452e-05, + "loss": 0.34445878863334656, + "step": 1191 + }, + { + "epoch": 0.3165582259992033, + "grad_norm": 1.116672373820781, + "learning_rate": 1.9368066669820684e-05, + "loss": 0.33554553985595703, + "step": 1192 + }, + { + "epoch": 0.31682379498074625, + "grad_norm": 1.2940820576034424, + "learning_rate": 1.936652966856915e-05, + "loss": 0.3668493628501892, + "step": 1193 + }, + { + "epoch": 0.3170893639622892, + "grad_norm": 1.1460266164336763, + "learning_rate": 1.9364990861561163e-05, + "loss": 0.3813396990299225, + "step": 1194 + }, + { + "epoch": 0.31735493294383216, + "grad_norm": 1.048871056336621, + "learning_rate": 1.936345024909339e-05, + "loss": 0.33625900745391846, + "step": 1195 + }, + { + "epoch": 0.3176205019253751, + "grad_norm": 1.0238786804477913, + "learning_rate": 1.9361907831462836e-05, + "loss": 0.31131428480148315, + "step": 1196 + }, + { + "epoch": 0.31788607090691806, + "grad_norm": 0.9751456398999766, + "learning_rate": 1.936036360896687e-05, + "loss": 0.32571589946746826, + "step": 1197 + }, + { + "epoch": 0.318151639888461, + "grad_norm": 1.1296061558872548, + "learning_rate": 1.9358817581903193e-05, + "loss": 0.36207717657089233, + "step": 1198 + }, + { + "epoch": 0.31841720887000396, + "grad_norm": 1.062344543153862, + "learning_rate": 1.9357269750569864e-05, + "loss": 0.3743855059146881, + "step": 1199 + }, + { + "epoch": 0.3186827778515469, + "grad_norm": 1.1254060799620074, + "learning_rate": 1.9355720115265283e-05, + "loss": 0.3862137794494629, + "step": 1200 + }, + { + "epoch": 0.31894834683308987, + "grad_norm": 1.1135871061204583, + "learning_rate": 1.935416867628821e-05, + "loss": 0.33353424072265625, + "step": 1201 + }, + { + "epoch": 0.3192139158146329, + "grad_norm": 9.759113022509682, + "learning_rate": 1.9352615433937733e-05, + "loss": 0.3277953267097473, + "step": 1202 + }, + { + "epoch": 0.3194794847961758, + "grad_norm": 1.104737565124737, + "learning_rate": 1.9351060388513304e-05, + "loss": 0.38247692584991455, + "step": 1203 + }, + { + "epoch": 0.3197450537777188, + "grad_norm": 1.0645482624060865, + "learning_rate": 1.9349503540314724e-05, + "loss": 0.3330709934234619, + "step": 1204 + }, + { + "epoch": 0.32001062275926173, + "grad_norm": 1.1382102351287038, + "learning_rate": 1.9347944889642125e-05, + "loss": 0.3809449076652527, + "step": 1205 + }, + { + "epoch": 0.3202761917408047, + "grad_norm": 0.9591245399492223, + "learning_rate": 1.9346384436796e-05, + "loss": 0.33623188734054565, + "step": 1206 + }, + { + "epoch": 0.32054176072234764, + "grad_norm": 1.0414583731283242, + "learning_rate": 1.9344822182077184e-05, + "loss": 0.35465264320373535, + "step": 1207 + }, + { + "epoch": 0.3208073297038906, + "grad_norm": 1.0419539507532576, + "learning_rate": 1.9343258125786866e-05, + "loss": 0.3532233238220215, + "step": 1208 + }, + { + "epoch": 0.32107289868543354, + "grad_norm": 0.972348986123494, + "learning_rate": 1.9341692268226572e-05, + "loss": 0.3498903512954712, + "step": 1209 + }, + { + "epoch": 0.3213384676669765, + "grad_norm": 1.057700016356479, + "learning_rate": 1.9340124609698185e-05, + "loss": 0.36124879121780396, + "step": 1210 + }, + { + "epoch": 0.32160403664851944, + "grad_norm": 1.1891126233384992, + "learning_rate": 1.933855515050393e-05, + "loss": 0.38535434007644653, + "step": 1211 + }, + { + "epoch": 0.3218696056300624, + "grad_norm": 1.1201736183139164, + "learning_rate": 1.9336983890946383e-05, + "loss": 0.39999911189079285, + "step": 1212 + }, + { + "epoch": 0.32213517461160535, + "grad_norm": 1.1396977359685507, + "learning_rate": 1.9335410831328457e-05, + "loss": 0.3519791066646576, + "step": 1213 + }, + { + "epoch": 0.3224007435931483, + "grad_norm": 1.1624196201646915, + "learning_rate": 1.9333835971953424e-05, + "loss": 0.35882368683815, + "step": 1214 + }, + { + "epoch": 0.32266631257469125, + "grad_norm": 1.2089532713833613, + "learning_rate": 1.93322593131249e-05, + "loss": 0.36132001876831055, + "step": 1215 + }, + { + "epoch": 0.32293188155623426, + "grad_norm": 1.0741169297687752, + "learning_rate": 1.9330680855146845e-05, + "loss": 0.36840832233428955, + "step": 1216 + }, + { + "epoch": 0.3231974505377772, + "grad_norm": 1.1553079333487188, + "learning_rate": 1.9329100598323563e-05, + "loss": 0.3755963444709778, + "step": 1217 + }, + { + "epoch": 0.32346301951932016, + "grad_norm": 1.1792888887437214, + "learning_rate": 1.9327518542959717e-05, + "loss": 0.400601863861084, + "step": 1218 + }, + { + "epoch": 0.3237285885008631, + "grad_norm": 1.0342294479515497, + "learning_rate": 1.93259346893603e-05, + "loss": 0.3100128769874573, + "step": 1219 + }, + { + "epoch": 0.32399415748240606, + "grad_norm": 1.0633052239431813, + "learning_rate": 1.9324349037830665e-05, + "loss": 0.3439880609512329, + "step": 1220 + }, + { + "epoch": 0.324259726463949, + "grad_norm": 1.1634088151631976, + "learning_rate": 1.9322761588676505e-05, + "loss": 0.3612631559371948, + "step": 1221 + }, + { + "epoch": 0.32452529544549197, + "grad_norm": 1.1292400605185824, + "learning_rate": 1.9321172342203863e-05, + "loss": 0.38202327489852905, + "step": 1222 + }, + { + "epoch": 0.3247908644270349, + "grad_norm": 1.0253004653890312, + "learning_rate": 1.9319581298719127e-05, + "loss": 0.3405265808105469, + "step": 1223 + }, + { + "epoch": 0.32505643340857787, + "grad_norm": 1.1499639639111883, + "learning_rate": 1.931798845852903e-05, + "loss": 0.4110907018184662, + "step": 1224 + }, + { + "epoch": 0.3253220023901208, + "grad_norm": 1.2758168253168263, + "learning_rate": 1.9316393821940654e-05, + "loss": 0.3007548451423645, + "step": 1225 + }, + { + "epoch": 0.3255875713716638, + "grad_norm": 2.5438383009304673, + "learning_rate": 1.9314797389261426e-05, + "loss": 0.32769858837127686, + "step": 1226 + }, + { + "epoch": 0.3258531403532067, + "grad_norm": 1.0370704182885782, + "learning_rate": 1.931319916079912e-05, + "loss": 0.3619830310344696, + "step": 1227 + }, + { + "epoch": 0.3261187093347497, + "grad_norm": 1.2983573666738066, + "learning_rate": 1.9311599136861853e-05, + "loss": 0.3470210134983063, + "step": 1228 + }, + { + "epoch": 0.32638427831629263, + "grad_norm": 1.145435126731274, + "learning_rate": 1.9309997317758093e-05, + "loss": 0.3471665382385254, + "step": 1229 + }, + { + "epoch": 0.32664984729783564, + "grad_norm": 1.0757592201920594, + "learning_rate": 1.930839370379665e-05, + "loss": 0.3717760443687439, + "step": 1230 + }, + { + "epoch": 0.3269154162793786, + "grad_norm": 1.1173068015382108, + "learning_rate": 1.9306788295286687e-05, + "loss": 0.37279975414276123, + "step": 1231 + }, + { + "epoch": 0.32718098526092154, + "grad_norm": 1.1523781527891401, + "learning_rate": 1.93051810925377e-05, + "loss": 0.3884522020816803, + "step": 1232 + }, + { + "epoch": 0.3274465542424645, + "grad_norm": 1.1200431222189422, + "learning_rate": 1.9303572095859545e-05, + "loss": 0.4277604818344116, + "step": 1233 + }, + { + "epoch": 0.32771212322400745, + "grad_norm": 1.1197023145386935, + "learning_rate": 1.9301961305562415e-05, + "loss": 0.2888818681240082, + "step": 1234 + }, + { + "epoch": 0.3279776922055504, + "grad_norm": 1.0271311895282893, + "learning_rate": 1.9300348721956854e-05, + "loss": 0.3134511709213257, + "step": 1235 + }, + { + "epoch": 0.32824326118709335, + "grad_norm": 1.0800984792046815, + "learning_rate": 1.9298734345353745e-05, + "loss": 0.38525280356407166, + "step": 1236 + }, + { + "epoch": 0.3285088301686363, + "grad_norm": 1.134011749036063, + "learning_rate": 1.9297118176064324e-05, + "loss": 0.3692918121814728, + "step": 1237 + }, + { + "epoch": 0.32877439915017925, + "grad_norm": 1.0348260315377988, + "learning_rate": 1.9295500214400165e-05, + "loss": 0.3443421721458435, + "step": 1238 + }, + { + "epoch": 0.3290399681317222, + "grad_norm": 1.0129455663017488, + "learning_rate": 1.9293880460673197e-05, + "loss": 0.3228621184825897, + "step": 1239 + }, + { + "epoch": 0.32930553711326516, + "grad_norm": 1.0116024279908165, + "learning_rate": 1.9292258915195688e-05, + "loss": 0.330943763256073, + "step": 1240 + }, + { + "epoch": 0.3295711060948081, + "grad_norm": 1.1814587344422625, + "learning_rate": 1.929063557828025e-05, + "loss": 0.356637567281723, + "step": 1241 + }, + { + "epoch": 0.32983667507635106, + "grad_norm": 0.9888159780201056, + "learning_rate": 1.9289010450239843e-05, + "loss": 0.3481113910675049, + "step": 1242 + }, + { + "epoch": 0.330102244057894, + "grad_norm": 1.1876931030431213, + "learning_rate": 1.928738353138778e-05, + "loss": 0.36579906940460205, + "step": 1243 + }, + { + "epoch": 0.330367813039437, + "grad_norm": 1.0281454378567854, + "learning_rate": 1.9285754822037705e-05, + "loss": 0.33025234937667847, + "step": 1244 + }, + { + "epoch": 0.33063338202097997, + "grad_norm": 1.0936673160473642, + "learning_rate": 1.9284124322503613e-05, + "loss": 0.34848469495773315, + "step": 1245 + }, + { + "epoch": 0.3308989510025229, + "grad_norm": 1.1232405017277023, + "learning_rate": 1.928249203309985e-05, + "loss": 0.3523876368999481, + "step": 1246 + }, + { + "epoch": 0.3311645199840659, + "grad_norm": 1.140153458583263, + "learning_rate": 1.92808579541411e-05, + "loss": 0.3695565462112427, + "step": 1247 + }, + { + "epoch": 0.3314300889656088, + "grad_norm": 1.0267337296320096, + "learning_rate": 1.9279222085942396e-05, + "loss": 0.3557945191860199, + "step": 1248 + }, + { + "epoch": 0.3316956579471518, + "grad_norm": 1.0261133198060035, + "learning_rate": 1.9277584428819113e-05, + "loss": 0.3015502989292145, + "step": 1249 + }, + { + "epoch": 0.33196122692869473, + "grad_norm": 0.9384869314897972, + "learning_rate": 1.9275944983086964e-05, + "loss": 0.31333664059638977, + "step": 1250 + }, + { + "epoch": 0.3322267959102377, + "grad_norm": 1.103154580638619, + "learning_rate": 1.9274303749062028e-05, + "loss": 0.36595287919044495, + "step": 1251 + }, + { + "epoch": 0.33249236489178063, + "grad_norm": 1.0573816777840739, + "learning_rate": 1.9272660727060705e-05, + "loss": 0.3400266170501709, + "step": 1252 + }, + { + "epoch": 0.3327579338733236, + "grad_norm": 1.0994664368429343, + "learning_rate": 1.927101591739976e-05, + "loss": 0.3642529547214508, + "step": 1253 + }, + { + "epoch": 0.33302350285486654, + "grad_norm": 1.08059410662081, + "learning_rate": 1.926936932039628e-05, + "loss": 0.3418777287006378, + "step": 1254 + }, + { + "epoch": 0.3332890718364095, + "grad_norm": 1.0881678177934593, + "learning_rate": 1.9267720936367723e-05, + "loss": 0.33382388949394226, + "step": 1255 + }, + { + "epoch": 0.33355464081795244, + "grad_norm": 1.1227567600503816, + "learning_rate": 1.926607076563187e-05, + "loss": 0.36257779598236084, + "step": 1256 + }, + { + "epoch": 0.3338202097994954, + "grad_norm": 1.5546101865012443, + "learning_rate": 1.926441880850686e-05, + "loss": 0.3018002510070801, + "step": 1257 + }, + { + "epoch": 0.3340857787810384, + "grad_norm": 1.0263747105982135, + "learning_rate": 1.9262765065311165e-05, + "loss": 0.3373662233352661, + "step": 1258 + }, + { + "epoch": 0.33435134776258135, + "grad_norm": 1.0001644182280367, + "learning_rate": 1.9261109536363613e-05, + "loss": 0.3555397391319275, + "step": 1259 + }, + { + "epoch": 0.3346169167441243, + "grad_norm": 1.1519069907937776, + "learning_rate": 1.925945222198336e-05, + "loss": 0.3004256784915924, + "step": 1260 + }, + { + "epoch": 0.33488248572566726, + "grad_norm": 2.328412351070072, + "learning_rate": 1.925779312248993e-05, + "loss": 0.33299940824508667, + "step": 1261 + }, + { + "epoch": 0.3351480547072102, + "grad_norm": 1.0617967738999583, + "learning_rate": 1.9256132238203166e-05, + "loss": 0.3715725541114807, + "step": 1262 + }, + { + "epoch": 0.33541362368875316, + "grad_norm": 1.0140049717249513, + "learning_rate": 1.9254469569443274e-05, + "loss": 0.35133951902389526, + "step": 1263 + }, + { + "epoch": 0.3356791926702961, + "grad_norm": 0.9980129680534503, + "learning_rate": 1.92528051165308e-05, + "loss": 0.3328818380832672, + "step": 1264 + }, + { + "epoch": 0.33594476165183906, + "grad_norm": 1.0764552464682182, + "learning_rate": 1.925113887978662e-05, + "loss": 0.3665468692779541, + "step": 1265 + }, + { + "epoch": 0.336210330633382, + "grad_norm": 1.0446302802374996, + "learning_rate": 1.9249470859531976e-05, + "loss": 0.3489571511745453, + "step": 1266 + }, + { + "epoch": 0.33647589961492497, + "grad_norm": 1.0629721705272823, + "learning_rate": 1.9247801056088433e-05, + "loss": 0.30038982629776, + "step": 1267 + }, + { + "epoch": 0.3367414685964679, + "grad_norm": 1.1798569183028156, + "learning_rate": 1.9246129469777918e-05, + "loss": 0.4163355827331543, + "step": 1268 + }, + { + "epoch": 0.33700703757801087, + "grad_norm": 1.0428552063046848, + "learning_rate": 1.924445610092269e-05, + "loss": 0.33687612414360046, + "step": 1269 + }, + { + "epoch": 0.3372726065595538, + "grad_norm": 1.0466869124167506, + "learning_rate": 1.924278094984535e-05, + "loss": 0.3448297679424286, + "step": 1270 + }, + { + "epoch": 0.3375381755410968, + "grad_norm": 1.0979384797680924, + "learning_rate": 1.9241104016868853e-05, + "loss": 0.35257208347320557, + "step": 1271 + }, + { + "epoch": 0.3378037445226398, + "grad_norm": 1.0794393535441016, + "learning_rate": 1.9239425302316487e-05, + "loss": 0.34880566596984863, + "step": 1272 + }, + { + "epoch": 0.33806931350418273, + "grad_norm": 1.1081978913885613, + "learning_rate": 1.9237744806511895e-05, + "loss": 0.33643782138824463, + "step": 1273 + }, + { + "epoch": 0.3383348824857257, + "grad_norm": 1.0185962864877929, + "learning_rate": 1.9236062529779057e-05, + "loss": 0.32345050573349, + "step": 1274 + }, + { + "epoch": 0.33860045146726864, + "grad_norm": 1.0547576972102612, + "learning_rate": 1.9234378472442286e-05, + "loss": 0.33983978629112244, + "step": 1275 + }, + { + "epoch": 0.3388660204488116, + "grad_norm": 1.0305326470674594, + "learning_rate": 1.923269263482626e-05, + "loss": 0.32825571298599243, + "step": 1276 + }, + { + "epoch": 0.33913158943035454, + "grad_norm": 1.0836151603415423, + "learning_rate": 1.923100501725598e-05, + "loss": 0.3434044122695923, + "step": 1277 + }, + { + "epoch": 0.3393971584118975, + "grad_norm": 1.1293248576076373, + "learning_rate": 1.9229315620056805e-05, + "loss": 0.3463204503059387, + "step": 1278 + }, + { + "epoch": 0.33966272739344044, + "grad_norm": 1.0476463818396518, + "learning_rate": 1.9227624443554425e-05, + "loss": 0.3608240485191345, + "step": 1279 + }, + { + "epoch": 0.3399282963749834, + "grad_norm": 1.111712780266586, + "learning_rate": 1.9225931488074882e-05, + "loss": 0.36131763458251953, + "step": 1280 + }, + { + "epoch": 0.34019386535652635, + "grad_norm": 0.9948222919660873, + "learning_rate": 1.922423675394456e-05, + "loss": 0.3270101547241211, + "step": 1281 + }, + { + "epoch": 0.3404594343380693, + "grad_norm": 1.1047356141038558, + "learning_rate": 1.922254024149018e-05, + "loss": 0.3551778495311737, + "step": 1282 + }, + { + "epoch": 0.34072500331961225, + "grad_norm": 1.1057498393465535, + "learning_rate": 1.9220841951038815e-05, + "loss": 0.3686622381210327, + "step": 1283 + }, + { + "epoch": 0.3409905723011552, + "grad_norm": 1.0810198379819234, + "learning_rate": 1.921914188291787e-05, + "loss": 0.35161536931991577, + "step": 1284 + }, + { + "epoch": 0.34125614128269816, + "grad_norm": 1.1489267376414198, + "learning_rate": 1.92174400374551e-05, + "loss": 0.3549870550632477, + "step": 1285 + }, + { + "epoch": 0.34152171026424116, + "grad_norm": 1.0904860537070935, + "learning_rate": 1.9215736414978593e-05, + "loss": 0.36780738830566406, + "step": 1286 + }, + { + "epoch": 0.3417872792457841, + "grad_norm": 1.132171748367688, + "learning_rate": 1.9214031015816803e-05, + "loss": 0.36060047149658203, + "step": 1287 + }, + { + "epoch": 0.34205284822732707, + "grad_norm": 1.0753334155968608, + "learning_rate": 1.9212323840298502e-05, + "loss": 0.32578715682029724, + "step": 1288 + }, + { + "epoch": 0.34231841720887, + "grad_norm": 1.0380534929488934, + "learning_rate": 1.9210614888752813e-05, + "loss": 0.3505493402481079, + "step": 1289 + }, + { + "epoch": 0.34258398619041297, + "grad_norm": 1.0227959332298084, + "learning_rate": 1.9208904161509203e-05, + "loss": 0.32681795954704285, + "step": 1290 + }, + { + "epoch": 0.3428495551719559, + "grad_norm": 1.0227973616384467, + "learning_rate": 1.9207191658897473e-05, + "loss": 0.34808459877967834, + "step": 1291 + }, + { + "epoch": 0.3431151241534989, + "grad_norm": 1.0810974703490968, + "learning_rate": 1.920547738124779e-05, + "loss": 0.3588678240776062, + "step": 1292 + }, + { + "epoch": 0.3433806931350418, + "grad_norm": 1.2030053357742059, + "learning_rate": 1.9203761328890626e-05, + "loss": 0.3528832495212555, + "step": 1293 + }, + { + "epoch": 0.3436462621165848, + "grad_norm": 1.35729757891191, + "learning_rate": 1.9202043502156833e-05, + "loss": 0.33549001812934875, + "step": 1294 + }, + { + "epoch": 0.34391183109812773, + "grad_norm": 1.0986147605525078, + "learning_rate": 1.920032390137758e-05, + "loss": 0.3466021418571472, + "step": 1295 + }, + { + "epoch": 0.3441774000796707, + "grad_norm": 1.0492164389172054, + "learning_rate": 1.9198602526884388e-05, + "loss": 0.35646146535873413, + "step": 1296 + }, + { + "epoch": 0.34444296906121363, + "grad_norm": 1.0348991752364494, + "learning_rate": 1.9196879379009112e-05, + "loss": 0.3442128300666809, + "step": 1297 + }, + { + "epoch": 0.3447085380427566, + "grad_norm": 1.083291442034964, + "learning_rate": 1.9195154458083962e-05, + "loss": 0.3854391872882843, + "step": 1298 + }, + { + "epoch": 0.34497410702429954, + "grad_norm": 1.202325074766952, + "learning_rate": 1.9193427764441477e-05, + "loss": 0.376137375831604, + "step": 1299 + }, + { + "epoch": 0.34523967600584254, + "grad_norm": 1.1591691335477168, + "learning_rate": 1.9191699298414547e-05, + "loss": 0.3115769028663635, + "step": 1300 + }, + { + "epoch": 0.3455052449873855, + "grad_norm": 1.125127529667975, + "learning_rate": 1.9189969060336396e-05, + "loss": 0.32553282380104065, + "step": 1301 + }, + { + "epoch": 0.34577081396892845, + "grad_norm": 1.2442677252107, + "learning_rate": 1.9188237050540597e-05, + "loss": 0.39529356360435486, + "step": 1302 + }, + { + "epoch": 0.3460363829504714, + "grad_norm": 1.016155926476122, + "learning_rate": 1.9186503269361063e-05, + "loss": 0.3027458190917969, + "step": 1303 + }, + { + "epoch": 0.34630195193201435, + "grad_norm": 1.2178145504108082, + "learning_rate": 1.918476771713204e-05, + "loss": 0.39317795634269714, + "step": 1304 + }, + { + "epoch": 0.3465675209135573, + "grad_norm": 1.1358253756284789, + "learning_rate": 1.918303039418813e-05, + "loss": 0.3730325698852539, + "step": 1305 + }, + { + "epoch": 0.34683308989510025, + "grad_norm": 1.0835224567793253, + "learning_rate": 1.918129130086426e-05, + "loss": 0.34862780570983887, + "step": 1306 + }, + { + "epoch": 0.3470986588766432, + "grad_norm": 1.106131252801308, + "learning_rate": 1.9179550437495707e-05, + "loss": 0.32139018177986145, + "step": 1307 + }, + { + "epoch": 0.34736422785818616, + "grad_norm": 1.118754726003564, + "learning_rate": 1.91778078044181e-05, + "loss": 0.37246090173721313, + "step": 1308 + }, + { + "epoch": 0.3476297968397291, + "grad_norm": 1.035507147337034, + "learning_rate": 1.9176063401967386e-05, + "loss": 0.30985957384109497, + "step": 1309 + }, + { + "epoch": 0.34789536582127206, + "grad_norm": 1.1303664709170593, + "learning_rate": 1.917431723047987e-05, + "loss": 0.3713758587837219, + "step": 1310 + }, + { + "epoch": 0.348160934802815, + "grad_norm": 1.076206973404712, + "learning_rate": 1.9172569290292193e-05, + "loss": 0.3465833067893982, + "step": 1311 + }, + { + "epoch": 0.34842650378435797, + "grad_norm": 1.1789932919731194, + "learning_rate": 1.917081958174134e-05, + "loss": 0.34807220101356506, + "step": 1312 + }, + { + "epoch": 0.3486920727659009, + "grad_norm": 1.0178456651378849, + "learning_rate": 1.9169068105164627e-05, + "loss": 0.3369640111923218, + "step": 1313 + }, + { + "epoch": 0.3489576417474439, + "grad_norm": 1.1714339652663717, + "learning_rate": 1.9167314860899724e-05, + "loss": 0.3521544337272644, + "step": 1314 + }, + { + "epoch": 0.3492232107289869, + "grad_norm": 0.9756562815370131, + "learning_rate": 1.9165559849284635e-05, + "loss": 0.3256300687789917, + "step": 1315 + }, + { + "epoch": 0.34948877971052983, + "grad_norm": 1.1173269078403432, + "learning_rate": 1.9163803070657706e-05, + "loss": 0.32401931285858154, + "step": 1316 + }, + { + "epoch": 0.3497543486920728, + "grad_norm": 1.104564951170044, + "learning_rate": 1.916204452535762e-05, + "loss": 0.372749924659729, + "step": 1317 + }, + { + "epoch": 0.35001991767361573, + "grad_norm": 1.053240444697934, + "learning_rate": 1.9160284213723407e-05, + "loss": 0.35853224992752075, + "step": 1318 + }, + { + "epoch": 0.3502854866551587, + "grad_norm": 1.048325144857422, + "learning_rate": 1.9158522136094433e-05, + "loss": 0.32850801944732666, + "step": 1319 + }, + { + "epoch": 0.35055105563670164, + "grad_norm": 1.1274703494911789, + "learning_rate": 1.9156758292810404e-05, + "loss": 0.3548474907875061, + "step": 1320 + }, + { + "epoch": 0.3508166246182446, + "grad_norm": 1.10371779317482, + "learning_rate": 1.9154992684211372e-05, + "loss": 0.38709041476249695, + "step": 1321 + }, + { + "epoch": 0.35108219359978754, + "grad_norm": 1.1369910570736041, + "learning_rate": 1.9153225310637726e-05, + "loss": 0.40369266271591187, + "step": 1322 + }, + { + "epoch": 0.3513477625813305, + "grad_norm": 1.179710362637603, + "learning_rate": 1.9151456172430186e-05, + "loss": 0.3570155203342438, + "step": 1323 + }, + { + "epoch": 0.35161333156287344, + "grad_norm": 1.0315056954444073, + "learning_rate": 1.9149685269929833e-05, + "loss": 0.34426411986351013, + "step": 1324 + }, + { + "epoch": 0.3518789005444164, + "grad_norm": 1.0980268876500368, + "learning_rate": 1.9147912603478066e-05, + "loss": 0.35666006803512573, + "step": 1325 + }, + { + "epoch": 0.35214446952595935, + "grad_norm": 1.0320732816254274, + "learning_rate": 1.9146138173416643e-05, + "loss": 0.36225512623786926, + "step": 1326 + }, + { + "epoch": 0.3524100385075023, + "grad_norm": 1.0499655117353668, + "learning_rate": 1.9144361980087643e-05, + "loss": 0.3312349319458008, + "step": 1327 + }, + { + "epoch": 0.3526756074890453, + "grad_norm": 1.0828461821707789, + "learning_rate": 1.9142584023833506e-05, + "loss": 0.3590523302555084, + "step": 1328 + }, + { + "epoch": 0.35294117647058826, + "grad_norm": 1.2432343198034153, + "learning_rate": 1.9140804304996997e-05, + "loss": 0.341480016708374, + "step": 1329 + }, + { + "epoch": 0.3532067454521312, + "grad_norm": 1.0165353851066345, + "learning_rate": 1.913902282392122e-05, + "loss": 0.37246501445770264, + "step": 1330 + }, + { + "epoch": 0.35347231443367416, + "grad_norm": 1.0959834963108057, + "learning_rate": 1.913723958094963e-05, + "loss": 0.33834031224250793, + "step": 1331 + }, + { + "epoch": 0.3537378834152171, + "grad_norm": 1.0066884605687934, + "learning_rate": 1.913545457642601e-05, + "loss": 0.29285067319869995, + "step": 1332 + }, + { + "epoch": 0.35400345239676007, + "grad_norm": 1.0768479974972798, + "learning_rate": 1.913366781069449e-05, + "loss": 0.2903720736503601, + "step": 1333 + }, + { + "epoch": 0.354269021378303, + "grad_norm": 1.1311334028851072, + "learning_rate": 1.913187928409954e-05, + "loss": 0.36428314447402954, + "step": 1334 + }, + { + "epoch": 0.35453459035984597, + "grad_norm": 1.0473346547130091, + "learning_rate": 1.9130088996985967e-05, + "loss": 0.3379477560520172, + "step": 1335 + }, + { + "epoch": 0.3548001593413889, + "grad_norm": 1.0963924260325884, + "learning_rate": 1.912829694969891e-05, + "loss": 0.35286659002304077, + "step": 1336 + }, + { + "epoch": 0.3550657283229319, + "grad_norm": 1.1930831242867357, + "learning_rate": 1.9126503142583864e-05, + "loss": 0.3670174479484558, + "step": 1337 + }, + { + "epoch": 0.3553312973044748, + "grad_norm": 1.1294601866875984, + "learning_rate": 1.9124707575986642e-05, + "loss": 0.3422902226448059, + "step": 1338 + }, + { + "epoch": 0.3555968662860178, + "grad_norm": 0.9984746022499613, + "learning_rate": 1.912291025025342e-05, + "loss": 0.29778385162353516, + "step": 1339 + }, + { + "epoch": 0.35586243526756073, + "grad_norm": 1.1907673127670892, + "learning_rate": 1.91211111657307e-05, + "loss": 0.36249661445617676, + "step": 1340 + }, + { + "epoch": 0.3561280042491037, + "grad_norm": 1.1054946723600563, + "learning_rate": 1.9119310322765315e-05, + "loss": 0.340925395488739, + "step": 1341 + }, + { + "epoch": 0.3563935732306467, + "grad_norm": 1.1964466720866056, + "learning_rate": 1.9117507721704455e-05, + "loss": 0.35674089193344116, + "step": 1342 + }, + { + "epoch": 0.35665914221218964, + "grad_norm": 1.1077144979302902, + "learning_rate": 1.9115703362895636e-05, + "loss": 0.3602067828178406, + "step": 1343 + }, + { + "epoch": 0.3569247111937326, + "grad_norm": 1.1669501112510636, + "learning_rate": 1.9113897246686716e-05, + "loss": 0.35211697220802307, + "step": 1344 + }, + { + "epoch": 0.35719028017527554, + "grad_norm": 1.1098565168791754, + "learning_rate": 1.91120893734259e-05, + "loss": 0.3706115484237671, + "step": 1345 + }, + { + "epoch": 0.3574558491568185, + "grad_norm": 0.955637908965499, + "learning_rate": 1.9110279743461717e-05, + "loss": 0.3365110754966736, + "step": 1346 + }, + { + "epoch": 0.35772141813836145, + "grad_norm": 1.2071736385011052, + "learning_rate": 1.9108468357143047e-05, + "loss": 0.40012121200561523, + "step": 1347 + }, + { + "epoch": 0.3579869871199044, + "grad_norm": 1.1409634140225444, + "learning_rate": 1.91066552148191e-05, + "loss": 0.4003351926803589, + "step": 1348 + }, + { + "epoch": 0.35825255610144735, + "grad_norm": 1.0613274196364288, + "learning_rate": 1.910484031683943e-05, + "loss": 0.3574616014957428, + "step": 1349 + }, + { + "epoch": 0.3585181250829903, + "grad_norm": 1.0904662824068834, + "learning_rate": 1.910302366355393e-05, + "loss": 0.3345073461532593, + "step": 1350 + }, + { + "epoch": 0.35878369406453325, + "grad_norm": 1.0532412802136695, + "learning_rate": 1.910120525531283e-05, + "loss": 0.3467676341533661, + "step": 1351 + }, + { + "epoch": 0.3590492630460762, + "grad_norm": 1.0529131768701299, + "learning_rate": 1.9099385092466695e-05, + "loss": 0.32433655858039856, + "step": 1352 + }, + { + "epoch": 0.35931483202761916, + "grad_norm": 1.0442908892383016, + "learning_rate": 1.909756317536643e-05, + "loss": 0.3366447985172272, + "step": 1353 + }, + { + "epoch": 0.3595804010091621, + "grad_norm": 1.0770054348386777, + "learning_rate": 1.909573950436328e-05, + "loss": 0.310118168592453, + "step": 1354 + }, + { + "epoch": 0.35984596999070506, + "grad_norm": 1.4782002462322321, + "learning_rate": 1.909391407980883e-05, + "loss": 0.3503451943397522, + "step": 1355 + }, + { + "epoch": 0.36011153897224807, + "grad_norm": 1.0889726916887852, + "learning_rate": 1.9092086902054996e-05, + "loss": 0.3375343978404999, + "step": 1356 + }, + { + "epoch": 0.360377107953791, + "grad_norm": 0.9368081121032712, + "learning_rate": 1.909025797145404e-05, + "loss": 0.3056451082229614, + "step": 1357 + }, + { + "epoch": 0.360642676935334, + "grad_norm": 0.9554491579006472, + "learning_rate": 1.9088427288358556e-05, + "loss": 0.3063391447067261, + "step": 1358 + }, + { + "epoch": 0.3609082459168769, + "grad_norm": 0.9358824747825566, + "learning_rate": 1.908659485312148e-05, + "loss": 0.3055405616760254, + "step": 1359 + }, + { + "epoch": 0.3611738148984199, + "grad_norm": 1.1828231629690173, + "learning_rate": 1.908476066609608e-05, + "loss": 0.38323235511779785, + "step": 1360 + }, + { + "epoch": 0.36143938387996283, + "grad_norm": 1.0971994038941366, + "learning_rate": 1.908292472763597e-05, + "loss": 0.33526092767715454, + "step": 1361 + }, + { + "epoch": 0.3617049528615058, + "grad_norm": 1.0449346093027478, + "learning_rate": 1.9081087038095094e-05, + "loss": 0.34485238790512085, + "step": 1362 + }, + { + "epoch": 0.36197052184304873, + "grad_norm": 1.0943982229718532, + "learning_rate": 1.907924759782774e-05, + "loss": 0.2963239252567291, + "step": 1363 + }, + { + "epoch": 0.3622360908245917, + "grad_norm": 1.2033822452903298, + "learning_rate": 1.9077406407188532e-05, + "loss": 0.3536864221096039, + "step": 1364 + }, + { + "epoch": 0.36250165980613464, + "grad_norm": 1.1739216512613182, + "learning_rate": 1.907556346653242e-05, + "loss": 0.3724798858165741, + "step": 1365 + }, + { + "epoch": 0.3627672287876776, + "grad_norm": 1.2035474175290464, + "learning_rate": 1.9073718776214717e-05, + "loss": 0.36241161823272705, + "step": 1366 + }, + { + "epoch": 0.36303279776922054, + "grad_norm": 1.2262905723198394, + "learning_rate": 1.9071872336591042e-05, + "loss": 0.3484225273132324, + "step": 1367 + }, + { + "epoch": 0.3632983667507635, + "grad_norm": 1.11285184075262, + "learning_rate": 1.9070024148017375e-05, + "loss": 0.33606311678886414, + "step": 1368 + }, + { + "epoch": 0.36356393573230644, + "grad_norm": 1.076908267109863, + "learning_rate": 1.906817421085002e-05, + "loss": 0.3263503909111023, + "step": 1369 + }, + { + "epoch": 0.36382950471384945, + "grad_norm": 1.126388175466026, + "learning_rate": 1.906632252544563e-05, + "loss": 0.33454492688179016, + "step": 1370 + }, + { + "epoch": 0.3640950736953924, + "grad_norm": 1.1264022314316273, + "learning_rate": 1.9064469092161185e-05, + "loss": 0.34858438372612, + "step": 1371 + }, + { + "epoch": 0.36436064267693535, + "grad_norm": 1.0527021112264499, + "learning_rate": 1.9062613911354005e-05, + "loss": 0.3466234505176544, + "step": 1372 + }, + { + "epoch": 0.3646262116584783, + "grad_norm": 1.0325760706581486, + "learning_rate": 1.9060756983381743e-05, + "loss": 0.33574312925338745, + "step": 1373 + }, + { + "epoch": 0.36489178064002126, + "grad_norm": 1.0321788657369535, + "learning_rate": 1.90588983086024e-05, + "loss": 0.3012363016605377, + "step": 1374 + }, + { + "epoch": 0.3651573496215642, + "grad_norm": 1.0033389586223882, + "learning_rate": 1.90570378873743e-05, + "loss": 0.3050191402435303, + "step": 1375 + }, + { + "epoch": 0.36542291860310716, + "grad_norm": 1.0078763869776561, + "learning_rate": 1.905517572005611e-05, + "loss": 0.35090070962905884, + "step": 1376 + }, + { + "epoch": 0.3656884875846501, + "grad_norm": 1.011051809727729, + "learning_rate": 1.9053311807006845e-05, + "loss": 0.3276262581348419, + "step": 1377 + }, + { + "epoch": 0.36595405656619306, + "grad_norm": 1.300904148134606, + "learning_rate": 1.9051446148585833e-05, + "loss": 0.3303500711917877, + "step": 1378 + }, + { + "epoch": 0.366219625547736, + "grad_norm": 1.113413634877815, + "learning_rate": 1.9049578745152754e-05, + "loss": 0.3748486042022705, + "step": 1379 + }, + { + "epoch": 0.36648519452927897, + "grad_norm": 0.8707302355459249, + "learning_rate": 1.9047709597067628e-05, + "loss": 0.30339744687080383, + "step": 1380 + }, + { + "epoch": 0.3667507635108219, + "grad_norm": 1.0245709544347914, + "learning_rate": 1.9045838704690796e-05, + "loss": 0.31811147928237915, + "step": 1381 + }, + { + "epoch": 0.36701633249236487, + "grad_norm": 1.1759156162745943, + "learning_rate": 1.9043966068382945e-05, + "loss": 0.3541119694709778, + "step": 1382 + }, + { + "epoch": 0.3672819014739078, + "grad_norm": 1.0874467494483675, + "learning_rate": 1.9042091688505104e-05, + "loss": 0.36639657616615295, + "step": 1383 + }, + { + "epoch": 0.36754747045545083, + "grad_norm": 1.0242460437241268, + "learning_rate": 1.9040215565418628e-05, + "loss": 0.35859787464141846, + "step": 1384 + }, + { + "epoch": 0.3678130394369938, + "grad_norm": 1.017105790679022, + "learning_rate": 1.9038337699485207e-05, + "loss": 0.3210521340370178, + "step": 1385 + }, + { + "epoch": 0.36807860841853673, + "grad_norm": 1.0362268895966902, + "learning_rate": 1.9036458091066875e-05, + "loss": 0.3207433819770813, + "step": 1386 + }, + { + "epoch": 0.3683441774000797, + "grad_norm": 0.9948382455278952, + "learning_rate": 1.9034576740526e-05, + "loss": 0.3475082218647003, + "step": 1387 + }, + { + "epoch": 0.36860974638162264, + "grad_norm": 1.167057707852143, + "learning_rate": 1.903269364822528e-05, + "loss": 0.33252987265586853, + "step": 1388 + }, + { + "epoch": 0.3688753153631656, + "grad_norm": 1.0281516525035093, + "learning_rate": 1.903080881452776e-05, + "loss": 0.32200103998184204, + "step": 1389 + }, + { + "epoch": 0.36914088434470854, + "grad_norm": 1.0752934055327636, + "learning_rate": 1.9028922239796803e-05, + "loss": 0.34780022501945496, + "step": 1390 + }, + { + "epoch": 0.3694064533262515, + "grad_norm": 1.1028643639363398, + "learning_rate": 1.902703392439613e-05, + "loss": 0.35411912202835083, + "step": 1391 + }, + { + "epoch": 0.36967202230779445, + "grad_norm": 1.6627965093255739, + "learning_rate": 1.9025143868689773e-05, + "loss": 0.35232803225517273, + "step": 1392 + }, + { + "epoch": 0.3699375912893374, + "grad_norm": 1.168292115519334, + "learning_rate": 1.9023252073042128e-05, + "loss": 0.38561391830444336, + "step": 1393 + }, + { + "epoch": 0.37020316027088035, + "grad_norm": 0.9982322437598163, + "learning_rate": 1.9021358537817897e-05, + "loss": 0.3184170126914978, + "step": 1394 + }, + { + "epoch": 0.3704687292524233, + "grad_norm": 1.0557333187102689, + "learning_rate": 1.9019463263382142e-05, + "loss": 0.32455068826675415, + "step": 1395 + }, + { + "epoch": 0.37073429823396625, + "grad_norm": 1.0862364532602506, + "learning_rate": 1.901756625010024e-05, + "loss": 0.32998934388160706, + "step": 1396 + }, + { + "epoch": 0.3709998672155092, + "grad_norm": 1.1350071137219766, + "learning_rate": 1.901566749833792e-05, + "loss": 0.3361780643463135, + "step": 1397 + }, + { + "epoch": 0.37126543619705216, + "grad_norm": 1.1483051699341575, + "learning_rate": 1.9013767008461236e-05, + "loss": 0.3618711829185486, + "step": 1398 + }, + { + "epoch": 0.37153100517859516, + "grad_norm": 1.1250978483748488, + "learning_rate": 1.901186478083658e-05, + "loss": 0.3904131054878235, + "step": 1399 + }, + { + "epoch": 0.3717965741601381, + "grad_norm": 1.0885741580509858, + "learning_rate": 1.9009960815830676e-05, + "loss": 0.35742759704589844, + "step": 1400 + }, + { + "epoch": 0.37206214314168107, + "grad_norm": 1.073570835222054, + "learning_rate": 1.9008055113810595e-05, + "loss": 0.32880812883377075, + "step": 1401 + }, + { + "epoch": 0.372327712123224, + "grad_norm": 1.0645240727318732, + "learning_rate": 1.9006147675143724e-05, + "loss": 0.3379839360713959, + "step": 1402 + }, + { + "epoch": 0.37259328110476697, + "grad_norm": 1.1363528922504198, + "learning_rate": 1.90042385001978e-05, + "loss": 0.3635789453983307, + "step": 1403 + }, + { + "epoch": 0.3728588500863099, + "grad_norm": 1.1103620354136925, + "learning_rate": 1.900232758934089e-05, + "loss": 0.3462461233139038, + "step": 1404 + }, + { + "epoch": 0.3731244190678529, + "grad_norm": 1.1087128591527484, + "learning_rate": 1.900041494294139e-05, + "loss": 0.34578579664230347, + "step": 1405 + }, + { + "epoch": 0.3733899880493958, + "grad_norm": 1.1067984269435176, + "learning_rate": 1.899850056136804e-05, + "loss": 0.36266931891441345, + "step": 1406 + }, + { + "epoch": 0.3736555570309388, + "grad_norm": 1.089685836132972, + "learning_rate": 1.899658444498991e-05, + "loss": 0.34019365906715393, + "step": 1407 + }, + { + "epoch": 0.37392112601248173, + "grad_norm": 1.0009475991478056, + "learning_rate": 1.8994666594176404e-05, + "loss": 0.3057953119277954, + "step": 1408 + }, + { + "epoch": 0.3741866949940247, + "grad_norm": 1.1008245937613312, + "learning_rate": 1.8992747009297265e-05, + "loss": 0.3663131892681122, + "step": 1409 + }, + { + "epoch": 0.37445226397556763, + "grad_norm": 1.0696938984110862, + "learning_rate": 1.8990825690722557e-05, + "loss": 0.3402065634727478, + "step": 1410 + }, + { + "epoch": 0.3747178329571106, + "grad_norm": 1.017664192724319, + "learning_rate": 1.8988902638822693e-05, + "loss": 0.3437868654727936, + "step": 1411 + }, + { + "epoch": 0.37498340193865354, + "grad_norm": 1.2246388577961873, + "learning_rate": 1.8986977853968416e-05, + "loss": 0.40972524881362915, + "step": 1412 + }, + { + "epoch": 0.37524897092019655, + "grad_norm": 1.0293557658064552, + "learning_rate": 1.89850513365308e-05, + "loss": 0.3237977921962738, + "step": 1413 + }, + { + "epoch": 0.3755145399017395, + "grad_norm": 0.9581631299919097, + "learning_rate": 1.8983123086881254e-05, + "loss": 0.3146173357963562, + "step": 1414 + }, + { + "epoch": 0.37578010888328245, + "grad_norm": 0.9942979474502576, + "learning_rate": 1.8981193105391524e-05, + "loss": 0.33485543727874756, + "step": 1415 + }, + { + "epoch": 0.3760456778648254, + "grad_norm": 1.0963696340494955, + "learning_rate": 1.8979261392433685e-05, + "loss": 0.36379897594451904, + "step": 1416 + }, + { + "epoch": 0.37631124684636835, + "grad_norm": 0.902828061805848, + "learning_rate": 1.8977327948380154e-05, + "loss": 0.2737882137298584, + "step": 1417 + }, + { + "epoch": 0.3765768158279113, + "grad_norm": 1.1168765744666191, + "learning_rate": 1.897539277360367e-05, + "loss": 0.3554575443267822, + "step": 1418 + }, + { + "epoch": 0.37684238480945426, + "grad_norm": 1.0021058464909711, + "learning_rate": 1.897345586847731e-05, + "loss": 0.3297621011734009, + "step": 1419 + }, + { + "epoch": 0.3771079537909972, + "grad_norm": 1.1638469907551372, + "learning_rate": 1.8971517233374497e-05, + "loss": 0.32272985577583313, + "step": 1420 + }, + { + "epoch": 0.37737352277254016, + "grad_norm": 1.0280583772355378, + "learning_rate": 1.8969576868668967e-05, + "loss": 0.32175642251968384, + "step": 1421 + }, + { + "epoch": 0.3776390917540831, + "grad_norm": 1.1136468557030246, + "learning_rate": 1.8967634774734807e-05, + "loss": 0.35973137617111206, + "step": 1422 + }, + { + "epoch": 0.37790466073562606, + "grad_norm": 1.1892680335343753, + "learning_rate": 1.8965690951946424e-05, + "loss": 0.3385169506072998, + "step": 1423 + }, + { + "epoch": 0.378170229717169, + "grad_norm": 1.1245023779822048, + "learning_rate": 1.8963745400678564e-05, + "loss": 0.3683067560195923, + "step": 1424 + }, + { + "epoch": 0.37843579869871197, + "grad_norm": 1.1630069521478075, + "learning_rate": 1.896179812130631e-05, + "loss": 0.3711622357368469, + "step": 1425 + }, + { + "epoch": 0.3787013676802549, + "grad_norm": 1.015020556732164, + "learning_rate": 1.895984911420507e-05, + "loss": 0.30416572093963623, + "step": 1426 + }, + { + "epoch": 0.3789669366617979, + "grad_norm": 1.079958708031102, + "learning_rate": 1.8957898379750598e-05, + "loss": 0.3439522385597229, + "step": 1427 + }, + { + "epoch": 0.3792325056433409, + "grad_norm": 1.1382084488728177, + "learning_rate": 1.895594591831896e-05, + "loss": 0.3663806617259979, + "step": 1428 + }, + { + "epoch": 0.37949807462488383, + "grad_norm": 1.0501527452156108, + "learning_rate": 1.895399173028658e-05, + "loss": 0.32132354378700256, + "step": 1429 + }, + { + "epoch": 0.3797636436064268, + "grad_norm": 0.9916462964383544, + "learning_rate": 1.8952035816030196e-05, + "loss": 0.3040635585784912, + "step": 1430 + }, + { + "epoch": 0.38002921258796973, + "grad_norm": 1.1155299107557486, + "learning_rate": 1.8950078175926886e-05, + "loss": 0.3548869788646698, + "step": 1431 + }, + { + "epoch": 0.3802947815695127, + "grad_norm": 1.1280933582225339, + "learning_rate": 1.894811881035406e-05, + "loss": 0.3114319443702698, + "step": 1432 + }, + { + "epoch": 0.38056035055105564, + "grad_norm": 1.151174980739505, + "learning_rate": 1.894615771968946e-05, + "loss": 0.3589673936367035, + "step": 1433 + }, + { + "epoch": 0.3808259195325986, + "grad_norm": 1.1074661491088642, + "learning_rate": 1.894419490431116e-05, + "loss": 0.3073863983154297, + "step": 1434 + }, + { + "epoch": 0.38109148851414154, + "grad_norm": 1.0689323921068359, + "learning_rate": 1.8942230364597572e-05, + "loss": 0.32474076747894287, + "step": 1435 + }, + { + "epoch": 0.3813570574956845, + "grad_norm": 2.6127931856999314, + "learning_rate": 1.8940264100927432e-05, + "loss": 0.3363546133041382, + "step": 1436 + }, + { + "epoch": 0.38162262647722744, + "grad_norm": 0.9995665434586938, + "learning_rate": 1.8938296113679814e-05, + "loss": 0.33679312467575073, + "step": 1437 + }, + { + "epoch": 0.3818881954587704, + "grad_norm": 1.0113319573344832, + "learning_rate": 1.8936326403234125e-05, + "loss": 0.33171382546424866, + "step": 1438 + }, + { + "epoch": 0.38215376444031335, + "grad_norm": 1.0880785150495547, + "learning_rate": 1.8934354969970097e-05, + "loss": 0.3717402219772339, + "step": 1439 + }, + { + "epoch": 0.3824193334218563, + "grad_norm": 1.1102375952968466, + "learning_rate": 1.8932381814267802e-05, + "loss": 0.335337370634079, + "step": 1440 + }, + { + "epoch": 0.3826849024033993, + "grad_norm": 1.010201255539417, + "learning_rate": 1.893040693650764e-05, + "loss": 0.32745444774627686, + "step": 1441 + }, + { + "epoch": 0.38295047138494226, + "grad_norm": 1.045820108792802, + "learning_rate": 1.892843033707035e-05, + "loss": 0.34863507747650146, + "step": 1442 + }, + { + "epoch": 0.3832160403664852, + "grad_norm": 1.0344465763282014, + "learning_rate": 1.8926452016336987e-05, + "loss": 0.3428313732147217, + "step": 1443 + }, + { + "epoch": 0.38348160934802816, + "grad_norm": 0.9882681324904586, + "learning_rate": 1.8924471974688956e-05, + "loss": 0.3223801851272583, + "step": 1444 + }, + { + "epoch": 0.3837471783295711, + "grad_norm": 1.2003387152989082, + "learning_rate": 1.8922490212507983e-05, + "loss": 0.33248746395111084, + "step": 1445 + }, + { + "epoch": 0.38401274731111407, + "grad_norm": 1.0404747226700646, + "learning_rate": 1.8920506730176125e-05, + "loss": 0.3472076654434204, + "step": 1446 + }, + { + "epoch": 0.384278316292657, + "grad_norm": 1.229166058737197, + "learning_rate": 1.891852152807578e-05, + "loss": 0.4385136365890503, + "step": 1447 + }, + { + "epoch": 0.38454388527419997, + "grad_norm": 1.0444838405880497, + "learning_rate": 1.8916534606589666e-05, + "loss": 0.36871540546417236, + "step": 1448 + }, + { + "epoch": 0.3848094542557429, + "grad_norm": 1.0803859921763799, + "learning_rate": 1.8914545966100843e-05, + "loss": 0.3136710524559021, + "step": 1449 + }, + { + "epoch": 0.3850750232372859, + "grad_norm": 1.0902031451870209, + "learning_rate": 1.891255560699269e-05, + "loss": 0.3236457109451294, + "step": 1450 + }, + { + "epoch": 0.3853405922188288, + "grad_norm": 0.9936714818929803, + "learning_rate": 1.8910563529648933e-05, + "loss": 0.3176822066307068, + "step": 1451 + }, + { + "epoch": 0.3856061612003718, + "grad_norm": 1.0635659473367998, + "learning_rate": 1.890856973445362e-05, + "loss": 0.3531719744205475, + "step": 1452 + }, + { + "epoch": 0.38587173018191473, + "grad_norm": 0.9470574553293423, + "learning_rate": 1.8906574221791127e-05, + "loss": 0.2911416292190552, + "step": 1453 + }, + { + "epoch": 0.3861372991634577, + "grad_norm": 1.0992858203425024, + "learning_rate": 1.890457699204617e-05, + "loss": 0.3522392511367798, + "step": 1454 + }, + { + "epoch": 0.3864028681450007, + "grad_norm": 1.1706910837372075, + "learning_rate": 1.8902578045603787e-05, + "loss": 0.3724471628665924, + "step": 1455 + }, + { + "epoch": 0.38666843712654364, + "grad_norm": 1.1807687078274312, + "learning_rate": 1.890057738284935e-05, + "loss": 0.2935449481010437, + "step": 1456 + }, + { + "epoch": 0.3869340061080866, + "grad_norm": 1.1181603604376231, + "learning_rate": 1.8898575004168568e-05, + "loss": 0.3413137197494507, + "step": 1457 + }, + { + "epoch": 0.38719957508962954, + "grad_norm": 1.1002740783107277, + "learning_rate": 1.8896570909947477e-05, + "loss": 0.32282277941703796, + "step": 1458 + }, + { + "epoch": 0.3874651440711725, + "grad_norm": 1.0071931608273124, + "learning_rate": 1.8894565100572435e-05, + "loss": 0.3285476565361023, + "step": 1459 + }, + { + "epoch": 0.38773071305271545, + "grad_norm": 1.010871057653593, + "learning_rate": 1.8892557576430147e-05, + "loss": 0.29517480731010437, + "step": 1460 + }, + { + "epoch": 0.3879962820342584, + "grad_norm": 0.9710184588467288, + "learning_rate": 1.8890548337907636e-05, + "loss": 0.2913149297237396, + "step": 1461 + }, + { + "epoch": 0.38826185101580135, + "grad_norm": 1.096024980027641, + "learning_rate": 1.8888537385392258e-05, + "loss": 0.32154160737991333, + "step": 1462 + }, + { + "epoch": 0.3885274199973443, + "grad_norm": 1.157775550745099, + "learning_rate": 1.88865247192717e-05, + "loss": 0.30677905678749084, + "step": 1463 + }, + { + "epoch": 0.38879298897888726, + "grad_norm": 1.1509749466488566, + "learning_rate": 1.888451033993399e-05, + "loss": 0.37568169832229614, + "step": 1464 + }, + { + "epoch": 0.3890585579604302, + "grad_norm": 1.0554287268781006, + "learning_rate": 1.8882494247767465e-05, + "loss": 0.34972083568573, + "step": 1465 + }, + { + "epoch": 0.38932412694197316, + "grad_norm": 1.1253148629548142, + "learning_rate": 1.888047644316081e-05, + "loss": 0.3198736906051636, + "step": 1466 + }, + { + "epoch": 0.3895896959235161, + "grad_norm": 1.0268445477998984, + "learning_rate": 1.887845692650303e-05, + "loss": 0.3405846953392029, + "step": 1467 + }, + { + "epoch": 0.38985526490505906, + "grad_norm": 1.1800981831391237, + "learning_rate": 1.8876435698183465e-05, + "loss": 0.3600257337093353, + "step": 1468 + }, + { + "epoch": 0.39012083388660207, + "grad_norm": 1.042232512137109, + "learning_rate": 1.887441275859179e-05, + "loss": 0.32415103912353516, + "step": 1469 + }, + { + "epoch": 0.390386402868145, + "grad_norm": 1.1736259107415346, + "learning_rate": 1.8872388108117995e-05, + "loss": 0.3450891673564911, + "step": 1470 + }, + { + "epoch": 0.390651971849688, + "grad_norm": 1.0534871304087963, + "learning_rate": 1.8870361747152416e-05, + "loss": 0.3210057318210602, + "step": 1471 + }, + { + "epoch": 0.3909175408312309, + "grad_norm": 1.1749127166764717, + "learning_rate": 1.8868333676085707e-05, + "loss": 0.3615706264972687, + "step": 1472 + }, + { + "epoch": 0.3911831098127739, + "grad_norm": 1.0750237065987462, + "learning_rate": 1.8866303895308856e-05, + "loss": 0.34149813652038574, + "step": 1473 + }, + { + "epoch": 0.39144867879431683, + "grad_norm": 0.91786674858188, + "learning_rate": 1.8864272405213188e-05, + "loss": 0.2795295715332031, + "step": 1474 + }, + { + "epoch": 0.3917142477758598, + "grad_norm": 1.1110559595870293, + "learning_rate": 1.8862239206190337e-05, + "loss": 0.3459053933620453, + "step": 1475 + }, + { + "epoch": 0.39197981675740273, + "grad_norm": 1.1048084354602663, + "learning_rate": 1.8860204298632294e-05, + "loss": 0.3531072735786438, + "step": 1476 + }, + { + "epoch": 0.3922453857389457, + "grad_norm": 1.128095083544478, + "learning_rate": 1.8858167682931357e-05, + "loss": 0.3788977265357971, + "step": 1477 + }, + { + "epoch": 0.39251095472048864, + "grad_norm": 1.3263027090109385, + "learning_rate": 1.8856129359480163e-05, + "loss": 0.3210671544075012, + "step": 1478 + }, + { + "epoch": 0.3927765237020316, + "grad_norm": 1.0773816671223826, + "learning_rate": 1.8854089328671673e-05, + "loss": 0.3442102074623108, + "step": 1479 + }, + { + "epoch": 0.39304209268357454, + "grad_norm": 1.0501956367137624, + "learning_rate": 1.885204759089919e-05, + "loss": 0.29128211736679077, + "step": 1480 + }, + { + "epoch": 0.3933076616651175, + "grad_norm": 1.1403330671915806, + "learning_rate": 1.885000414655633e-05, + "loss": 0.3601154088973999, + "step": 1481 + }, + { + "epoch": 0.39357323064666044, + "grad_norm": 1.032058056545269, + "learning_rate": 1.8847958996037042e-05, + "loss": 0.3173052668571472, + "step": 1482 + }, + { + "epoch": 0.39383879962820345, + "grad_norm": 1.0840123249628424, + "learning_rate": 1.8845912139735616e-05, + "loss": 0.32759106159210205, + "step": 1483 + }, + { + "epoch": 0.3941043686097464, + "grad_norm": 1.0868479290241493, + "learning_rate": 1.8843863578046657e-05, + "loss": 0.3213586211204529, + "step": 1484 + }, + { + "epoch": 0.39436993759128935, + "grad_norm": 1.0263834848721582, + "learning_rate": 1.8841813311365105e-05, + "loss": 0.342970073223114, + "step": 1485 + }, + { + "epoch": 0.3946355065728323, + "grad_norm": 1.1467746465148738, + "learning_rate": 1.883976134008622e-05, + "loss": 0.3852401375770569, + "step": 1486 + }, + { + "epoch": 0.39490107555437526, + "grad_norm": 1.0974253808771965, + "learning_rate": 1.883770766460561e-05, + "loss": 0.2965390682220459, + "step": 1487 + }, + { + "epoch": 0.3951666445359182, + "grad_norm": 1.1655078685340161, + "learning_rate": 1.883565228531919e-05, + "loss": 0.3899655044078827, + "step": 1488 + }, + { + "epoch": 0.39543221351746116, + "grad_norm": 1.1086105484757183, + "learning_rate": 1.8833595202623222e-05, + "loss": 0.339199423789978, + "step": 1489 + }, + { + "epoch": 0.3956977824990041, + "grad_norm": 1.049526058190211, + "learning_rate": 1.8831536416914278e-05, + "loss": 0.3121682405471802, + "step": 1490 + }, + { + "epoch": 0.39596335148054707, + "grad_norm": 1.073417591294797, + "learning_rate": 1.8829475928589272e-05, + "loss": 0.31947991251945496, + "step": 1491 + }, + { + "epoch": 0.39622892046209, + "grad_norm": 1.1660176936819076, + "learning_rate": 1.882741373804544e-05, + "loss": 0.3569333553314209, + "step": 1492 + }, + { + "epoch": 0.39649448944363297, + "grad_norm": 1.1521030930761056, + "learning_rate": 1.882534984568035e-05, + "loss": 0.3739020526409149, + "step": 1493 + }, + { + "epoch": 0.3967600584251759, + "grad_norm": 1.0930221251915908, + "learning_rate": 1.882328425189189e-05, + "loss": 0.34350353479385376, + "step": 1494 + }, + { + "epoch": 0.3970256274067189, + "grad_norm": 1.0780622136577362, + "learning_rate": 1.882121695707829e-05, + "loss": 0.3103981614112854, + "step": 1495 + }, + { + "epoch": 0.3972911963882618, + "grad_norm": 1.066229649085828, + "learning_rate": 1.8819147961638104e-05, + "loss": 0.33847716450691223, + "step": 1496 + }, + { + "epoch": 0.39755676536980483, + "grad_norm": 0.943119049120047, + "learning_rate": 1.8817077265970196e-05, + "loss": 0.3080996870994568, + "step": 1497 + }, + { + "epoch": 0.3978223343513478, + "grad_norm": 0.9758181744675688, + "learning_rate": 1.8815004870473777e-05, + "loss": 0.3247831463813782, + "step": 1498 + }, + { + "epoch": 0.39808790333289074, + "grad_norm": 0.9965389459031595, + "learning_rate": 1.8812930775548387e-05, + "loss": 0.2919698655605316, + "step": 1499 + }, + { + "epoch": 0.3983534723144337, + "grad_norm": 1.1815639690812958, + "learning_rate": 1.8810854981593883e-05, + "loss": 0.3627319931983948, + "step": 1500 + }, + { + "epoch": 0.39861904129597664, + "grad_norm": 1.0245222516327634, + "learning_rate": 1.880877748901045e-05, + "loss": 0.3619319796562195, + "step": 1501 + }, + { + "epoch": 0.3988846102775196, + "grad_norm": 1.0294076265521692, + "learning_rate": 1.8806698298198608e-05, + "loss": 0.3393789827823639, + "step": 1502 + }, + { + "epoch": 0.39915017925906254, + "grad_norm": 1.1375999694611314, + "learning_rate": 1.88046174095592e-05, + "loss": 0.3736116886138916, + "step": 1503 + }, + { + "epoch": 0.3994157482406055, + "grad_norm": 0.9615847393601772, + "learning_rate": 1.8802534823493395e-05, + "loss": 0.32829388976097107, + "step": 1504 + }, + { + "epoch": 0.39968131722214845, + "grad_norm": 1.004520084683698, + "learning_rate": 1.8800450540402694e-05, + "loss": 0.340041846036911, + "step": 1505 + }, + { + "epoch": 0.3999468862036914, + "grad_norm": 1.6423190284198783, + "learning_rate": 1.8798364560688917e-05, + "loss": 0.2830736041069031, + "step": 1506 + }, + { + "epoch": 0.40021245518523435, + "grad_norm": 1.126838308447994, + "learning_rate": 1.8796276884754224e-05, + "loss": 0.33011579513549805, + "step": 1507 + }, + { + "epoch": 0.4004780241667773, + "grad_norm": 1.0024833819275993, + "learning_rate": 1.8794187513001088e-05, + "loss": 0.2893834114074707, + "step": 1508 + }, + { + "epoch": 0.40074359314832025, + "grad_norm": 1.0682148927963429, + "learning_rate": 1.8792096445832317e-05, + "loss": 0.3590015172958374, + "step": 1509 + }, + { + "epoch": 0.4010091621298632, + "grad_norm": 1.1883404603513603, + "learning_rate": 1.8790003683651045e-05, + "loss": 0.3968508541584015, + "step": 1510 + }, + { + "epoch": 0.4012747311114062, + "grad_norm": 1.1506641785596874, + "learning_rate": 1.878790922686073e-05, + "loss": 0.324398934841156, + "step": 1511 + }, + { + "epoch": 0.40154030009294917, + "grad_norm": 1.0455658872732225, + "learning_rate": 1.8785813075865164e-05, + "loss": 0.35111895203590393, + "step": 1512 + }, + { + "epoch": 0.4018058690744921, + "grad_norm": 1.055231257150353, + "learning_rate": 1.8783715231068452e-05, + "loss": 0.28124356269836426, + "step": 1513 + }, + { + "epoch": 0.40207143805603507, + "grad_norm": 1.0070468428923411, + "learning_rate": 1.878161569287504e-05, + "loss": 0.28962311148643494, + "step": 1514 + }, + { + "epoch": 0.402337007037578, + "grad_norm": 1.0934983041480315, + "learning_rate": 1.877951446168969e-05, + "loss": 0.3646606206893921, + "step": 1515 + }, + { + "epoch": 0.402602576019121, + "grad_norm": 1.1065863254454682, + "learning_rate": 1.8777411537917497e-05, + "loss": 0.2815355360507965, + "step": 1516 + }, + { + "epoch": 0.4028681450006639, + "grad_norm": 1.1372178900816394, + "learning_rate": 1.877530692196388e-05, + "loss": 0.33208370208740234, + "step": 1517 + }, + { + "epoch": 0.4031337139822069, + "grad_norm": 1.0968319662456871, + "learning_rate": 1.8773200614234587e-05, + "loss": 0.33741289377212524, + "step": 1518 + }, + { + "epoch": 0.40339928296374983, + "grad_norm": 1.1178822197952292, + "learning_rate": 1.877109261513568e-05, + "loss": 0.31304073333740234, + "step": 1519 + }, + { + "epoch": 0.4036648519452928, + "grad_norm": 1.264796618244999, + "learning_rate": 1.8768982925073566e-05, + "loss": 0.32556387782096863, + "step": 1520 + }, + { + "epoch": 0.40393042092683573, + "grad_norm": 1.1057344226732335, + "learning_rate": 1.8766871544454963e-05, + "loss": 0.3584224581718445, + "step": 1521 + }, + { + "epoch": 0.4041959899083787, + "grad_norm": 1.0109621512685618, + "learning_rate": 1.8764758473686918e-05, + "loss": 0.2864416837692261, + "step": 1522 + }, + { + "epoch": 0.40446155888992164, + "grad_norm": 1.0390539229722413, + "learning_rate": 1.8762643713176815e-05, + "loss": 0.28925320506095886, + "step": 1523 + }, + { + "epoch": 0.4047271278714646, + "grad_norm": 1.022628245189221, + "learning_rate": 1.876052726333235e-05, + "loss": 0.30940550565719604, + "step": 1524 + }, + { + "epoch": 0.4049926968530076, + "grad_norm": 1.1648500528958037, + "learning_rate": 1.875840912456155e-05, + "loss": 0.3463154733181, + "step": 1525 + }, + { + "epoch": 0.40525826583455055, + "grad_norm": 1.1823420506345301, + "learning_rate": 1.8756289297272764e-05, + "loss": 0.3349658250808716, + "step": 1526 + }, + { + "epoch": 0.4055238348160935, + "grad_norm": 1.0511817500052025, + "learning_rate": 1.8754167781874674e-05, + "loss": 0.32588714361190796, + "step": 1527 + }, + { + "epoch": 0.40578940379763645, + "grad_norm": 1.0750045197041278, + "learning_rate": 1.875204457877628e-05, + "loss": 0.33787310123443604, + "step": 1528 + }, + { + "epoch": 0.4060549727791794, + "grad_norm": 1.0444881434472735, + "learning_rate": 1.8749919688386912e-05, + "loss": 0.3223261833190918, + "step": 1529 + }, + { + "epoch": 0.40632054176072235, + "grad_norm": 1.2251483540500576, + "learning_rate": 1.8747793111116226e-05, + "loss": 0.38505882024765015, + "step": 1530 + }, + { + "epoch": 0.4065861107422653, + "grad_norm": 1.077913563059366, + "learning_rate": 1.8745664847374197e-05, + "loss": 0.33071833848953247, + "step": 1531 + }, + { + "epoch": 0.40685167972380826, + "grad_norm": 1.2405893427169952, + "learning_rate": 1.874353489757113e-05, + "loss": 0.36603987216949463, + "step": 1532 + }, + { + "epoch": 0.4071172487053512, + "grad_norm": 0.9982674001932202, + "learning_rate": 1.874140326211766e-05, + "loss": 0.3103085160255432, + "step": 1533 + }, + { + "epoch": 0.40738281768689416, + "grad_norm": 1.1470515997968143, + "learning_rate": 1.873926994142473e-05, + "loss": 0.3471127152442932, + "step": 1534 + }, + { + "epoch": 0.4076483866684371, + "grad_norm": 1.0759117431352352, + "learning_rate": 1.873713493590363e-05, + "loss": 0.33152899146080017, + "step": 1535 + }, + { + "epoch": 0.40791395564998006, + "grad_norm": 1.0887192073538825, + "learning_rate": 1.8734998245965958e-05, + "loss": 0.340177059173584, + "step": 1536 + }, + { + "epoch": 0.408179524631523, + "grad_norm": 1.175803638176176, + "learning_rate": 1.8732859872023644e-05, + "loss": 0.3331618010997772, + "step": 1537 + }, + { + "epoch": 0.40844509361306597, + "grad_norm": 1.0971311272588662, + "learning_rate": 1.8730719814488937e-05, + "loss": 0.3911997675895691, + "step": 1538 + }, + { + "epoch": 0.408710662594609, + "grad_norm": 1.0986179012488992, + "learning_rate": 1.8728578073774427e-05, + "loss": 0.3699817955493927, + "step": 1539 + }, + { + "epoch": 0.4089762315761519, + "grad_norm": 1.086312859301249, + "learning_rate": 1.8726434650293e-05, + "loss": 0.31567275524139404, + "step": 1540 + }, + { + "epoch": 0.4092418005576949, + "grad_norm": 1.1099279461258769, + "learning_rate": 1.8724289544457897e-05, + "loss": 0.3387305438518524, + "step": 1541 + }, + { + "epoch": 0.40950736953923783, + "grad_norm": 1.6366665349052443, + "learning_rate": 1.8722142756682663e-05, + "loss": 0.3460234999656677, + "step": 1542 + }, + { + "epoch": 0.4097729385207808, + "grad_norm": 1.1109783591024025, + "learning_rate": 1.8719994287381173e-05, + "loss": 0.35653382539749146, + "step": 1543 + }, + { + "epoch": 0.41003850750232373, + "grad_norm": 1.1054235252004945, + "learning_rate": 1.8717844136967626e-05, + "loss": 0.3828277885913849, + "step": 1544 + }, + { + "epoch": 0.4103040764838667, + "grad_norm": 1.0929819002464054, + "learning_rate": 1.871569230585655e-05, + "loss": 0.35883858799934387, + "step": 1545 + }, + { + "epoch": 0.41056964546540964, + "grad_norm": 0.988264800308937, + "learning_rate": 1.8713538794462783e-05, + "loss": 0.27414464950561523, + "step": 1546 + }, + { + "epoch": 0.4108352144469526, + "grad_norm": 1.0216234157414708, + "learning_rate": 1.871138360320151e-05, + "loss": 0.2924337387084961, + "step": 1547 + }, + { + "epoch": 0.41110078342849554, + "grad_norm": 1.1264719097344291, + "learning_rate": 1.8709226732488216e-05, + "loss": 0.34270918369293213, + "step": 1548 + }, + { + "epoch": 0.4113663524100385, + "grad_norm": 1.056133674601812, + "learning_rate": 1.870706818273872e-05, + "loss": 0.33866482973098755, + "step": 1549 + }, + { + "epoch": 0.41163192139158145, + "grad_norm": 1.0578429496037574, + "learning_rate": 1.8704907954369176e-05, + "loss": 0.3350633382797241, + "step": 1550 + }, + { + "epoch": 0.4118974903731244, + "grad_norm": 1.0981882806330738, + "learning_rate": 1.870274604779604e-05, + "loss": 0.32763785123825073, + "step": 1551 + }, + { + "epoch": 0.41216305935466735, + "grad_norm": 1.1235534336905566, + "learning_rate": 1.8700582463436102e-05, + "loss": 0.3130378723144531, + "step": 1552 + }, + { + "epoch": 0.41242862833621036, + "grad_norm": 1.1311593123986747, + "learning_rate": 1.8698417201706484e-05, + "loss": 0.34318777918815613, + "step": 1553 + }, + { + "epoch": 0.4126941973177533, + "grad_norm": 1.038517953287962, + "learning_rate": 1.8696250263024617e-05, + "loss": 0.3250104784965515, + "step": 1554 + }, + { + "epoch": 0.41295976629929626, + "grad_norm": 1.1047081419569766, + "learning_rate": 1.869408164780826e-05, + "loss": 0.3409217298030853, + "step": 1555 + }, + { + "epoch": 0.4132253352808392, + "grad_norm": 0.9892429720688775, + "learning_rate": 1.86919113564755e-05, + "loss": 0.2885017395019531, + "step": 1556 + }, + { + "epoch": 0.41349090426238216, + "grad_norm": 0.9861078966083267, + "learning_rate": 1.8689739389444744e-05, + "loss": 0.31912562251091003, + "step": 1557 + }, + { + "epoch": 0.4137564732439251, + "grad_norm": 1.0037060940033242, + "learning_rate": 1.8687565747134716e-05, + "loss": 0.29874011874198914, + "step": 1558 + }, + { + "epoch": 0.41402204222546807, + "grad_norm": 1.0308167425812278, + "learning_rate": 1.8685390429964473e-05, + "loss": 0.3132701516151428, + "step": 1559 + }, + { + "epoch": 0.414287611207011, + "grad_norm": 1.0029824533275895, + "learning_rate": 1.868321343835339e-05, + "loss": 0.31158843636512756, + "step": 1560 + }, + { + "epoch": 0.41455318018855397, + "grad_norm": 0.959841401113078, + "learning_rate": 1.8681034772721167e-05, + "loss": 0.30490344762802124, + "step": 1561 + }, + { + "epoch": 0.4148187491700969, + "grad_norm": 1.1053356359227535, + "learning_rate": 1.867885443348782e-05, + "loss": 0.3150998055934906, + "step": 1562 + }, + { + "epoch": 0.4150843181516399, + "grad_norm": 1.0578010897773087, + "learning_rate": 1.86766724210737e-05, + "loss": 0.3391645550727844, + "step": 1563 + }, + { + "epoch": 0.4153498871331828, + "grad_norm": 1.1317933031731224, + "learning_rate": 1.8674488735899466e-05, + "loss": 0.35013002157211304, + "step": 1564 + }, + { + "epoch": 0.4156154561147258, + "grad_norm": 1.1514144052665038, + "learning_rate": 1.867230337838611e-05, + "loss": 0.3455789387226105, + "step": 1565 + }, + { + "epoch": 0.41588102509626873, + "grad_norm": 1.0985743755307058, + "learning_rate": 1.8670116348954945e-05, + "loss": 0.3179319500923157, + "step": 1566 + }, + { + "epoch": 0.41614659407781174, + "grad_norm": 1.046997092909125, + "learning_rate": 1.8667927648027596e-05, + "loss": 0.3628920018672943, + "step": 1567 + }, + { + "epoch": 0.4164121630593547, + "grad_norm": 1.1175553372657145, + "learning_rate": 1.8665737276026033e-05, + "loss": 0.33599400520324707, + "step": 1568 + }, + { + "epoch": 0.41667773204089764, + "grad_norm": 1.0741100001694928, + "learning_rate": 1.8663545233372524e-05, + "loss": 0.31519144773483276, + "step": 1569 + }, + { + "epoch": 0.4169433010224406, + "grad_norm": 1.0564388001425704, + "learning_rate": 1.8661351520489667e-05, + "loss": 0.3326237201690674, + "step": 1570 + }, + { + "epoch": 0.41720887000398355, + "grad_norm": 1.0506499046982631, + "learning_rate": 1.865915613780039e-05, + "loss": 0.35254499316215515, + "step": 1571 + }, + { + "epoch": 0.4174744389855265, + "grad_norm": 1.134962500533026, + "learning_rate": 1.8656959085727936e-05, + "loss": 0.36689436435699463, + "step": 1572 + }, + { + "epoch": 0.41774000796706945, + "grad_norm": 1.104702895545828, + "learning_rate": 1.8654760364695873e-05, + "loss": 0.3113600015640259, + "step": 1573 + }, + { + "epoch": 0.4180055769486124, + "grad_norm": 1.0072243279377031, + "learning_rate": 1.865255997512808e-05, + "loss": 0.3336432874202728, + "step": 1574 + }, + { + "epoch": 0.41827114593015535, + "grad_norm": 1.1762721663897004, + "learning_rate": 1.8650357917448774e-05, + "loss": 0.3657492995262146, + "step": 1575 + }, + { + "epoch": 0.4185367149116983, + "grad_norm": 1.1286123264778107, + "learning_rate": 1.864815419208248e-05, + "loss": 0.3087846338748932, + "step": 1576 + }, + { + "epoch": 0.41880228389324126, + "grad_norm": 1.059893684126419, + "learning_rate": 1.8645948799454058e-05, + "loss": 0.31422343850135803, + "step": 1577 + }, + { + "epoch": 0.4190678528747842, + "grad_norm": 1.0232345658393134, + "learning_rate": 1.8643741739988672e-05, + "loss": 0.3172760009765625, + "step": 1578 + }, + { + "epoch": 0.41933342185632716, + "grad_norm": 1.131569038679809, + "learning_rate": 1.8641533014111824e-05, + "loss": 0.36819136142730713, + "step": 1579 + }, + { + "epoch": 0.4195989908378701, + "grad_norm": 1.0215370560204735, + "learning_rate": 1.863932262224933e-05, + "loss": 0.29081088304519653, + "step": 1580 + }, + { + "epoch": 0.4198645598194131, + "grad_norm": 1.0406040134422527, + "learning_rate": 1.8637110564827325e-05, + "loss": 0.3209632635116577, + "step": 1581 + }, + { + "epoch": 0.42013012880095607, + "grad_norm": 1.9161132832998955, + "learning_rate": 1.863489684227227e-05, + "loss": 0.3357914686203003, + "step": 1582 + }, + { + "epoch": 0.420395697782499, + "grad_norm": 1.0469990353974015, + "learning_rate": 1.8632681455010937e-05, + "loss": 0.285677969455719, + "step": 1583 + }, + { + "epoch": 0.420661266764042, + "grad_norm": 1.1491447855439996, + "learning_rate": 1.8630464403470435e-05, + "loss": 0.377876341342926, + "step": 1584 + }, + { + "epoch": 0.4209268357455849, + "grad_norm": 1.0642007656116979, + "learning_rate": 1.8628245688078187e-05, + "loss": 0.3141768276691437, + "step": 1585 + }, + { + "epoch": 0.4211924047271279, + "grad_norm": 1.078787810404599, + "learning_rate": 1.8626025309261927e-05, + "loss": 0.34249693155288696, + "step": 1586 + }, + { + "epoch": 0.42145797370867083, + "grad_norm": 1.1583509747022063, + "learning_rate": 1.8623803267449722e-05, + "loss": 0.32564717531204224, + "step": 1587 + }, + { + "epoch": 0.4217235426902138, + "grad_norm": 1.0623179841052965, + "learning_rate": 1.8621579563069957e-05, + "loss": 0.3425004184246063, + "step": 1588 + }, + { + "epoch": 0.42198911167175673, + "grad_norm": 1.05392590229203, + "learning_rate": 1.8619354196551333e-05, + "loss": 0.3676222562789917, + "step": 1589 + }, + { + "epoch": 0.4222546806532997, + "grad_norm": 0.9612536546184688, + "learning_rate": 1.8617127168322877e-05, + "loss": 0.28915971517562866, + "step": 1590 + }, + { + "epoch": 0.42252024963484264, + "grad_norm": 1.1293248025877465, + "learning_rate": 1.8614898478813933e-05, + "loss": 0.3387221097946167, + "step": 1591 + }, + { + "epoch": 0.4227858186163856, + "grad_norm": 1.0804518757125117, + "learning_rate": 1.8612668128454164e-05, + "loss": 0.33886784315109253, + "step": 1592 + }, + { + "epoch": 0.42305138759792854, + "grad_norm": 1.0780507904890781, + "learning_rate": 1.8610436117673557e-05, + "loss": 0.3364121913909912, + "step": 1593 + }, + { + "epoch": 0.4233169565794715, + "grad_norm": 1.0590527240631433, + "learning_rate": 1.8608202446902418e-05, + "loss": 0.3661370873451233, + "step": 1594 + }, + { + "epoch": 0.4235825255610145, + "grad_norm": 1.254416564930449, + "learning_rate": 1.8605967116571372e-05, + "loss": 0.2980557680130005, + "step": 1595 + }, + { + "epoch": 0.42384809454255745, + "grad_norm": 1.180518248335952, + "learning_rate": 1.8603730127111363e-05, + "loss": 0.36112043261528015, + "step": 1596 + }, + { + "epoch": 0.4241136635241004, + "grad_norm": 0.9967676484164163, + "learning_rate": 1.860149147895366e-05, + "loss": 0.30641958117485046, + "step": 1597 + }, + { + "epoch": 0.42437923250564336, + "grad_norm": 1.06006138769355, + "learning_rate": 1.8599251172529836e-05, + "loss": 0.3312561511993408, + "step": 1598 + }, + { + "epoch": 0.4246448014871863, + "grad_norm": 1.070580032885208, + "learning_rate": 1.859700920827181e-05, + "loss": 0.3757131099700928, + "step": 1599 + }, + { + "epoch": 0.42491037046872926, + "grad_norm": 1.0514692584176801, + "learning_rate": 1.8594765586611805e-05, + "loss": 0.3225080370903015, + "step": 1600 + }, + { + "epoch": 0.4251759394502722, + "grad_norm": 1.0857454483782787, + "learning_rate": 1.859252030798236e-05, + "loss": 0.35943928360939026, + "step": 1601 + }, + { + "epoch": 0.42544150843181516, + "grad_norm": 0.9907794348406631, + "learning_rate": 1.859027337281633e-05, + "loss": 0.29319390654563904, + "step": 1602 + }, + { + "epoch": 0.4257070774133581, + "grad_norm": 1.1441852776057728, + "learning_rate": 1.8588024781546914e-05, + "loss": 0.32320237159729004, + "step": 1603 + }, + { + "epoch": 0.42597264639490107, + "grad_norm": 1.1070076098385897, + "learning_rate": 1.8585774534607606e-05, + "loss": 0.3381520211696625, + "step": 1604 + }, + { + "epoch": 0.426238215376444, + "grad_norm": 0.9826840529093485, + "learning_rate": 1.858352263243223e-05, + "loss": 0.30010825395584106, + "step": 1605 + }, + { + "epoch": 0.42650378435798697, + "grad_norm": 0.9805553200940528, + "learning_rate": 1.8581269075454918e-05, + "loss": 0.26282748579978943, + "step": 1606 + }, + { + "epoch": 0.4267693533395299, + "grad_norm": 1.0395702570014627, + "learning_rate": 1.857901386411014e-05, + "loss": 0.33613401651382446, + "step": 1607 + }, + { + "epoch": 0.4270349223210729, + "grad_norm": 1.1625768546626036, + "learning_rate": 1.8576756998832667e-05, + "loss": 0.34522315859794617, + "step": 1608 + }, + { + "epoch": 0.4273004913026159, + "grad_norm": 1.0776480516530333, + "learning_rate": 1.8574498480057598e-05, + "loss": 0.3253153860569, + "step": 1609 + }, + { + "epoch": 0.42756606028415883, + "grad_norm": 1.177683979502923, + "learning_rate": 1.8572238308220347e-05, + "loss": 0.32180655002593994, + "step": 1610 + }, + { + "epoch": 0.4278316292657018, + "grad_norm": 1.2444289754345055, + "learning_rate": 1.856997648375665e-05, + "loss": 0.3274008333683014, + "step": 1611 + }, + { + "epoch": 0.42809719824724474, + "grad_norm": 1.006782047196068, + "learning_rate": 1.8567713007102565e-05, + "loss": 0.3196510374546051, + "step": 1612 + }, + { + "epoch": 0.4283627672287877, + "grad_norm": 1.0069133029708661, + "learning_rate": 1.8565447878694455e-05, + "loss": 0.2759617567062378, + "step": 1613 + }, + { + "epoch": 0.42862833621033064, + "grad_norm": 1.1572573238869637, + "learning_rate": 1.8563181098969017e-05, + "loss": 0.35069289803504944, + "step": 1614 + }, + { + "epoch": 0.4288939051918736, + "grad_norm": 1.1400434606874466, + "learning_rate": 1.8560912668363253e-05, + "loss": 0.3388484716415405, + "step": 1615 + }, + { + "epoch": 0.42915947417341654, + "grad_norm": 1.0338736294243014, + "learning_rate": 1.8558642587314496e-05, + "loss": 0.34116029739379883, + "step": 1616 + }, + { + "epoch": 0.4294250431549595, + "grad_norm": 1.0487376701262667, + "learning_rate": 1.8556370856260387e-05, + "loss": 0.30212706327438354, + "step": 1617 + }, + { + "epoch": 0.42969061213650245, + "grad_norm": 1.0633174136084793, + "learning_rate": 1.855409747563889e-05, + "loss": 0.32250338792800903, + "step": 1618 + }, + { + "epoch": 0.4299561811180454, + "grad_norm": 1.132237618998821, + "learning_rate": 1.8551822445888285e-05, + "loss": 0.35972943902015686, + "step": 1619 + }, + { + "epoch": 0.43022175009958835, + "grad_norm": 0.9921112897877987, + "learning_rate": 1.8549545767447174e-05, + "loss": 0.3112533390522003, + "step": 1620 + }, + { + "epoch": 0.4304873190811313, + "grad_norm": 1.0331176116114555, + "learning_rate": 1.854726744075447e-05, + "loss": 0.3044458031654358, + "step": 1621 + }, + { + "epoch": 0.43075288806267426, + "grad_norm": 1.0421498129424722, + "learning_rate": 1.8544987466249412e-05, + "loss": 0.3261772096157074, + "step": 1622 + }, + { + "epoch": 0.43101845704421726, + "grad_norm": 1.3249821498842442, + "learning_rate": 1.8542705844371544e-05, + "loss": 0.3485907018184662, + "step": 1623 + }, + { + "epoch": 0.4312840260257602, + "grad_norm": 2.6643478315387576, + "learning_rate": 1.8540422575560747e-05, + "loss": 0.3016113340854645, + "step": 1624 + }, + { + "epoch": 0.43154959500730317, + "grad_norm": 1.021133157663628, + "learning_rate": 1.8538137660257198e-05, + "loss": 0.35383081436157227, + "step": 1625 + }, + { + "epoch": 0.4318151639888461, + "grad_norm": 1.170997891522692, + "learning_rate": 1.8535851098901406e-05, + "loss": 0.32015109062194824, + "step": 1626 + }, + { + "epoch": 0.43208073297038907, + "grad_norm": 1.1526156179794622, + "learning_rate": 1.8533562891934195e-05, + "loss": 0.3801743984222412, + "step": 1627 + }, + { + "epoch": 0.432346301951932, + "grad_norm": 1.0686097183664227, + "learning_rate": 1.85312730397967e-05, + "loss": 0.33140939474105835, + "step": 1628 + }, + { + "epoch": 0.432611870933475, + "grad_norm": 1.232101025230023, + "learning_rate": 1.8528981542930382e-05, + "loss": 0.4052904546260834, + "step": 1629 + }, + { + "epoch": 0.4328774399150179, + "grad_norm": 1.0850305465298753, + "learning_rate": 1.8526688401777014e-05, + "loss": 0.3661607801914215, + "step": 1630 + }, + { + "epoch": 0.4331430088965609, + "grad_norm": 1.0520968780833948, + "learning_rate": 1.852439361677868e-05, + "loss": 0.33260756731033325, + "step": 1631 + }, + { + "epoch": 0.43340857787810383, + "grad_norm": 1.0137607762513057, + "learning_rate": 1.85220971883778e-05, + "loss": 0.30222776532173157, + "step": 1632 + }, + { + "epoch": 0.4336741468596468, + "grad_norm": 1.1138822281677037, + "learning_rate": 1.8519799117017086e-05, + "loss": 0.3444751799106598, + "step": 1633 + }, + { + "epoch": 0.43393971584118973, + "grad_norm": 1.0896517914007275, + "learning_rate": 1.8517499403139586e-05, + "loss": 0.33887404203414917, + "step": 1634 + }, + { + "epoch": 0.4342052848227327, + "grad_norm": 0.9260010903737679, + "learning_rate": 1.8515198047188652e-05, + "loss": 0.287893146276474, + "step": 1635 + }, + { + "epoch": 0.43447085380427564, + "grad_norm": 1.0080783350179279, + "learning_rate": 1.8512895049607965e-05, + "loss": 0.32236215472221375, + "step": 1636 + }, + { + "epoch": 0.43473642278581864, + "grad_norm": 1.0861808896793093, + "learning_rate": 1.8510590410841515e-05, + "loss": 0.30670079588890076, + "step": 1637 + }, + { + "epoch": 0.4350019917673616, + "grad_norm": 1.045996826542631, + "learning_rate": 1.8508284131333604e-05, + "loss": 0.34104713797569275, + "step": 1638 + }, + { + "epoch": 0.43526756074890455, + "grad_norm": 1.13616869746559, + "learning_rate": 1.8505976211528857e-05, + "loss": 0.3402378559112549, + "step": 1639 + }, + { + "epoch": 0.4355331297304475, + "grad_norm": 1.1414650328718847, + "learning_rate": 1.8503666651872217e-05, + "loss": 0.35236096382141113, + "step": 1640 + }, + { + "epoch": 0.43579869871199045, + "grad_norm": 1.1137846416322885, + "learning_rate": 1.850135545280894e-05, + "loss": 0.3385634422302246, + "step": 1641 + }, + { + "epoch": 0.4360642676935334, + "grad_norm": 1.0049349552180111, + "learning_rate": 1.849904261478459e-05, + "loss": 0.32222414016723633, + "step": 1642 + }, + { + "epoch": 0.43632983667507635, + "grad_norm": 1.1246487142505726, + "learning_rate": 1.8496728138245062e-05, + "loss": 0.3251120448112488, + "step": 1643 + }, + { + "epoch": 0.4365954056566193, + "grad_norm": 1.3230672810485753, + "learning_rate": 1.8494412023636563e-05, + "loss": 0.3199063837528229, + "step": 1644 + }, + { + "epoch": 0.43686097463816226, + "grad_norm": 1.031106173264746, + "learning_rate": 1.8492094271405605e-05, + "loss": 0.3470883071422577, + "step": 1645 + }, + { + "epoch": 0.4371265436197052, + "grad_norm": 1.1420067933967792, + "learning_rate": 1.848977488199903e-05, + "loss": 0.319596529006958, + "step": 1646 + }, + { + "epoch": 0.43739211260124816, + "grad_norm": 1.172387725238046, + "learning_rate": 1.848745385586398e-05, + "loss": 0.3445591628551483, + "step": 1647 + }, + { + "epoch": 0.4376576815827911, + "grad_norm": 1.0622512502557289, + "learning_rate": 1.848513119344793e-05, + "loss": 0.35861149430274963, + "step": 1648 + }, + { + "epoch": 0.43792325056433407, + "grad_norm": 1.3423176489021205, + "learning_rate": 1.8482806895198658e-05, + "loss": 0.36727622151374817, + "step": 1649 + }, + { + "epoch": 0.438188819545877, + "grad_norm": 1.0985203266462633, + "learning_rate": 1.848048096156426e-05, + "loss": 0.3505704402923584, + "step": 1650 + }, + { + "epoch": 0.43845438852742, + "grad_norm": 1.050005044594017, + "learning_rate": 1.8478153392993154e-05, + "loss": 0.3508742153644562, + "step": 1651 + }, + { + "epoch": 0.438719957508963, + "grad_norm": 1.0688095584032915, + "learning_rate": 1.8475824189934063e-05, + "loss": 0.32757264375686646, + "step": 1652 + }, + { + "epoch": 0.43898552649050593, + "grad_norm": 1.0768843323365103, + "learning_rate": 1.8473493352836032e-05, + "loss": 0.3117530643939972, + "step": 1653 + }, + { + "epoch": 0.4392510954720489, + "grad_norm": 1.1751248406507369, + "learning_rate": 1.8471160882148417e-05, + "loss": 0.3506043553352356, + "step": 1654 + }, + { + "epoch": 0.43951666445359183, + "grad_norm": 1.1247697965204402, + "learning_rate": 1.8468826778320892e-05, + "loss": 0.33997148275375366, + "step": 1655 + }, + { + "epoch": 0.4397822334351348, + "grad_norm": 1.007133328419329, + "learning_rate": 1.8466491041803446e-05, + "loss": 0.30060335993766785, + "step": 1656 + }, + { + "epoch": 0.44004780241667774, + "grad_norm": 0.9546594059496064, + "learning_rate": 1.846415367304638e-05, + "loss": 0.3057805597782135, + "step": 1657 + }, + { + "epoch": 0.4403133713982207, + "grad_norm": 1.006954520739026, + "learning_rate": 1.846181467250031e-05, + "loss": 0.30772098898887634, + "step": 1658 + }, + { + "epoch": 0.44057894037976364, + "grad_norm": 1.043209753174748, + "learning_rate": 1.845947404061617e-05, + "loss": 0.3183813989162445, + "step": 1659 + }, + { + "epoch": 0.4408445093613066, + "grad_norm": 1.0413807475941115, + "learning_rate": 1.8457131777845204e-05, + "loss": 0.2986184358596802, + "step": 1660 + }, + { + "epoch": 0.44111007834284954, + "grad_norm": 1.0330249735438937, + "learning_rate": 1.8454787884638973e-05, + "loss": 0.33342432975769043, + "step": 1661 + }, + { + "epoch": 0.4413756473243925, + "grad_norm": 1.6337494282252796, + "learning_rate": 1.8452442361449353e-05, + "loss": 0.33435192704200745, + "step": 1662 + }, + { + "epoch": 0.44164121630593545, + "grad_norm": 1.1084487395338765, + "learning_rate": 1.8450095208728537e-05, + "loss": 0.31596100330352783, + "step": 1663 + }, + { + "epoch": 0.4419067852874784, + "grad_norm": 1.0372033094770008, + "learning_rate": 1.8447746426929022e-05, + "loss": 0.29850512742996216, + "step": 1664 + }, + { + "epoch": 0.4421723542690214, + "grad_norm": 1.1891933812209383, + "learning_rate": 1.8445396016503628e-05, + "loss": 0.34898555278778076, + "step": 1665 + }, + { + "epoch": 0.44243792325056436, + "grad_norm": 1.0486597661615855, + "learning_rate": 1.8443043977905484e-05, + "loss": 0.283272385597229, + "step": 1666 + }, + { + "epoch": 0.4427034922321073, + "grad_norm": 1.041766578180328, + "learning_rate": 1.844069031158804e-05, + "loss": 0.32765433192253113, + "step": 1667 + }, + { + "epoch": 0.44296906121365026, + "grad_norm": 1.1465241668847563, + "learning_rate": 1.8438335018005052e-05, + "loss": 0.347957044839859, + "step": 1668 + }, + { + "epoch": 0.4432346301951932, + "grad_norm": 1.1330493919292772, + "learning_rate": 1.8435978097610594e-05, + "loss": 0.36188018321990967, + "step": 1669 + }, + { + "epoch": 0.44350019917673617, + "grad_norm": 1.1541714860130494, + "learning_rate": 1.843361955085905e-05, + "loss": 0.35944315791130066, + "step": 1670 + }, + { + "epoch": 0.4437657681582791, + "grad_norm": 1.0564596521414393, + "learning_rate": 1.8431259378205122e-05, + "loss": 0.33441367745399475, + "step": 1671 + }, + { + "epoch": 0.44403133713982207, + "grad_norm": 1.1043363461383413, + "learning_rate": 1.8428897580103827e-05, + "loss": 0.3157849907875061, + "step": 1672 + }, + { + "epoch": 0.444296906121365, + "grad_norm": 1.0760645254646117, + "learning_rate": 1.8426534157010486e-05, + "loss": 0.33416497707366943, + "step": 1673 + }, + { + "epoch": 0.444562475102908, + "grad_norm": 1.1629646905519946, + "learning_rate": 1.842416910938074e-05, + "loss": 0.3611617684364319, + "step": 1674 + }, + { + "epoch": 0.4448280440844509, + "grad_norm": 1.079831089952362, + "learning_rate": 1.8421802437670546e-05, + "loss": 0.3030395805835724, + "step": 1675 + }, + { + "epoch": 0.4450936130659939, + "grad_norm": 0.9867988845558019, + "learning_rate": 1.8419434142336167e-05, + "loss": 0.30281510949134827, + "step": 1676 + }, + { + "epoch": 0.44535918204753683, + "grad_norm": 1.2041533085675928, + "learning_rate": 1.8417064223834184e-05, + "loss": 0.3489738404750824, + "step": 1677 + }, + { + "epoch": 0.4456247510290798, + "grad_norm": 1.0320394434428715, + "learning_rate": 1.8414692682621487e-05, + "loss": 0.30453425645828247, + "step": 1678 + }, + { + "epoch": 0.44589032001062273, + "grad_norm": 0.9586890082829097, + "learning_rate": 1.841231951915528e-05, + "loss": 0.28717339038848877, + "step": 1679 + }, + { + "epoch": 0.44615588899216574, + "grad_norm": 1.0685350052372018, + "learning_rate": 1.840994473389309e-05, + "loss": 0.3227912187576294, + "step": 1680 + }, + { + "epoch": 0.4464214579737087, + "grad_norm": 1.0774879432227336, + "learning_rate": 1.8407568327292737e-05, + "loss": 0.3575928807258606, + "step": 1681 + }, + { + "epoch": 0.44668702695525164, + "grad_norm": 1.0240612597420884, + "learning_rate": 1.840519029981237e-05, + "loss": 0.35601454973220825, + "step": 1682 + }, + { + "epoch": 0.4469525959367946, + "grad_norm": 1.1829639598617365, + "learning_rate": 1.8402810651910444e-05, + "loss": 0.34867429733276367, + "step": 1683 + }, + { + "epoch": 0.44721816491833755, + "grad_norm": 1.0185115495756123, + "learning_rate": 1.8400429384045724e-05, + "loss": 0.3333359360694885, + "step": 1684 + }, + { + "epoch": 0.4474837338998805, + "grad_norm": 1.1658514468774803, + "learning_rate": 1.8398046496677296e-05, + "loss": 0.3269057273864746, + "step": 1685 + }, + { + "epoch": 0.44774930288142345, + "grad_norm": 1.0186865264151983, + "learning_rate": 1.839566199026455e-05, + "loss": 0.3507213890552521, + "step": 1686 + }, + { + "epoch": 0.4480148718629664, + "grad_norm": 1.0962029873559684, + "learning_rate": 1.8393275865267185e-05, + "loss": 0.32935822010040283, + "step": 1687 + }, + { + "epoch": 0.44828044084450935, + "grad_norm": 1.168811125319112, + "learning_rate": 1.8390888122145225e-05, + "loss": 0.3780096769332886, + "step": 1688 + }, + { + "epoch": 0.4485460098260523, + "grad_norm": 1.08432540630583, + "learning_rate": 1.8388498761358997e-05, + "loss": 0.3412250578403473, + "step": 1689 + }, + { + "epoch": 0.44881157880759526, + "grad_norm": 1.0725143861051711, + "learning_rate": 1.838610778336914e-05, + "loss": 0.33751022815704346, + "step": 1690 + }, + { + "epoch": 0.4490771477891382, + "grad_norm": 1.113628501747759, + "learning_rate": 1.8383715188636608e-05, + "loss": 0.35736170411109924, + "step": 1691 + }, + { + "epoch": 0.44934271677068116, + "grad_norm": 1.0608679340591776, + "learning_rate": 1.8381320977622664e-05, + "loss": 0.3133913278579712, + "step": 1692 + }, + { + "epoch": 0.4496082857522241, + "grad_norm": 1.0696112323301112, + "learning_rate": 1.8378925150788886e-05, + "loss": 0.2890821099281311, + "step": 1693 + }, + { + "epoch": 0.4498738547337671, + "grad_norm": 1.0759892831738864, + "learning_rate": 1.8376527708597155e-05, + "loss": 0.34016966819763184, + "step": 1694 + }, + { + "epoch": 0.45013942371531007, + "grad_norm": 1.0933611032669988, + "learning_rate": 1.8374128651509676e-05, + "loss": 0.3502900302410126, + "step": 1695 + }, + { + "epoch": 0.450404992696853, + "grad_norm": 1.1956521483077693, + "learning_rate": 1.8371727979988957e-05, + "loss": 0.31828251481056213, + "step": 1696 + }, + { + "epoch": 0.450670561678396, + "grad_norm": 1.1739995891800665, + "learning_rate": 1.836932569449782e-05, + "loss": 0.33322471380233765, + "step": 1697 + }, + { + "epoch": 0.4509361306599389, + "grad_norm": 0.977715581129718, + "learning_rate": 1.8366921795499394e-05, + "loss": 0.28489458560943604, + "step": 1698 + }, + { + "epoch": 0.4512016996414819, + "grad_norm": 1.0351592490047028, + "learning_rate": 1.8364516283457127e-05, + "loss": 0.3125787079334259, + "step": 1699 + }, + { + "epoch": 0.45146726862302483, + "grad_norm": 1.6801930060854708, + "learning_rate": 1.8362109158834767e-05, + "loss": 0.3352596163749695, + "step": 1700 + }, + { + "epoch": 0.4517328376045678, + "grad_norm": 1.0152758212914303, + "learning_rate": 1.8359700422096385e-05, + "loss": 0.2986747622489929, + "step": 1701 + }, + { + "epoch": 0.45199840658611073, + "grad_norm": 1.0704573865215896, + "learning_rate": 1.8357290073706355e-05, + "loss": 0.3276829123497009, + "step": 1702 + }, + { + "epoch": 0.4522639755676537, + "grad_norm": 1.05119725558451, + "learning_rate": 1.8354878114129368e-05, + "loss": 0.3183029890060425, + "step": 1703 + }, + { + "epoch": 0.45252954454919664, + "grad_norm": 1.0595099003295023, + "learning_rate": 1.835246454383041e-05, + "loss": 0.32149460911750793, + "step": 1704 + }, + { + "epoch": 0.4527951135307396, + "grad_norm": 1.0365725372264356, + "learning_rate": 1.8350049363274802e-05, + "loss": 0.2963859438896179, + "step": 1705 + }, + { + "epoch": 0.45306068251228254, + "grad_norm": 1.132218144997021, + "learning_rate": 1.8347632572928154e-05, + "loss": 0.35251080989837646, + "step": 1706 + }, + { + "epoch": 0.4533262514938255, + "grad_norm": 1.1840188868504486, + "learning_rate": 1.8345214173256395e-05, + "loss": 0.3585474491119385, + "step": 1707 + }, + { + "epoch": 0.4535918204753685, + "grad_norm": 1.1792148584627284, + "learning_rate": 1.834279416472577e-05, + "loss": 0.32339078187942505, + "step": 1708 + }, + { + "epoch": 0.45385738945691145, + "grad_norm": 1.030916532610971, + "learning_rate": 1.8340372547802822e-05, + "loss": 0.3473295569419861, + "step": 1709 + }, + { + "epoch": 0.4541229584384544, + "grad_norm": 1.149162033618886, + "learning_rate": 1.833794932295441e-05, + "loss": 0.35146117210388184, + "step": 1710 + }, + { + "epoch": 0.45438852741999736, + "grad_norm": 1.080751163824508, + "learning_rate": 1.833552449064771e-05, + "loss": 0.29697534441947937, + "step": 1711 + }, + { + "epoch": 0.4546540964015403, + "grad_norm": 1.0590764839143914, + "learning_rate": 1.8333098051350197e-05, + "loss": 0.30980685353279114, + "step": 1712 + }, + { + "epoch": 0.45491966538308326, + "grad_norm": 1.2023264217964575, + "learning_rate": 1.8330670005529657e-05, + "loss": 0.3271983861923218, + "step": 1713 + }, + { + "epoch": 0.4551852343646262, + "grad_norm": 1.061456665590969, + "learning_rate": 1.8328240353654193e-05, + "loss": 0.3421804904937744, + "step": 1714 + }, + { + "epoch": 0.45545080334616916, + "grad_norm": 0.988281834877126, + "learning_rate": 1.8325809096192207e-05, + "loss": 0.2949771285057068, + "step": 1715 + }, + { + "epoch": 0.4557163723277121, + "grad_norm": 1.1467541005281106, + "learning_rate": 1.832337623361242e-05, + "loss": 0.35578668117523193, + "step": 1716 + }, + { + "epoch": 0.45598194130925507, + "grad_norm": 1.099618839558401, + "learning_rate": 1.832094176638387e-05, + "loss": 0.3714647889137268, + "step": 1717 + }, + { + "epoch": 0.456247510290798, + "grad_norm": 1.116087725713372, + "learning_rate": 1.8318505694975877e-05, + "loss": 0.36253875494003296, + "step": 1718 + }, + { + "epoch": 0.45651307927234097, + "grad_norm": 1.0310426822464949, + "learning_rate": 1.8316068019858093e-05, + "loss": 0.3148016035556793, + "step": 1719 + }, + { + "epoch": 0.4567786482538839, + "grad_norm": 1.0869949789046671, + "learning_rate": 1.8313628741500476e-05, + "loss": 0.3420512080192566, + "step": 1720 + }, + { + "epoch": 0.4570442172354269, + "grad_norm": 1.0955610437646774, + "learning_rate": 1.831118786037329e-05, + "loss": 0.2941698431968689, + "step": 1721 + }, + { + "epoch": 0.4573097862169699, + "grad_norm": 0.9987507632564111, + "learning_rate": 1.83087453769471e-05, + "loss": 0.3033481240272522, + "step": 1722 + }, + { + "epoch": 0.45757535519851283, + "grad_norm": 1.0508818993675257, + "learning_rate": 1.8306301291692798e-05, + "loss": 0.3405943810939789, + "step": 1723 + }, + { + "epoch": 0.4578409241800558, + "grad_norm": 1.0291343903638976, + "learning_rate": 1.8303855605081567e-05, + "loss": 0.32217931747436523, + "step": 1724 + }, + { + "epoch": 0.45810649316159874, + "grad_norm": 1.1797464113481113, + "learning_rate": 1.8301408317584913e-05, + "loss": 0.3627573847770691, + "step": 1725 + }, + { + "epoch": 0.4583720621431417, + "grad_norm": 1.1425882725361838, + "learning_rate": 1.829895942967464e-05, + "loss": 0.3512224853038788, + "step": 1726 + }, + { + "epoch": 0.45863763112468464, + "grad_norm": 1.1358093316461328, + "learning_rate": 1.8296508941822868e-05, + "loss": 0.35433265566825867, + "step": 1727 + }, + { + "epoch": 0.4589032001062276, + "grad_norm": 1.1217406683513973, + "learning_rate": 1.829405685450202e-05, + "loss": 0.33105185627937317, + "step": 1728 + }, + { + "epoch": 0.45916876908777055, + "grad_norm": 1.0087946676492725, + "learning_rate": 1.829160316818483e-05, + "loss": 0.31765925884246826, + "step": 1729 + }, + { + "epoch": 0.4594343380693135, + "grad_norm": 1.0268902541251206, + "learning_rate": 1.8289147883344338e-05, + "loss": 0.3276101350784302, + "step": 1730 + }, + { + "epoch": 0.45969990705085645, + "grad_norm": 2.1185922480389676, + "learning_rate": 1.8286691000453895e-05, + "loss": 0.2921130061149597, + "step": 1731 + }, + { + "epoch": 0.4599654760323994, + "grad_norm": 0.9680106013727008, + "learning_rate": 1.828423251998716e-05, + "loss": 0.3025062382221222, + "step": 1732 + }, + { + "epoch": 0.46023104501394235, + "grad_norm": 1.0299077884479195, + "learning_rate": 1.82817724424181e-05, + "loss": 0.3128702640533447, + "step": 1733 + }, + { + "epoch": 0.4604966139954853, + "grad_norm": 0.9957682350134235, + "learning_rate": 1.8279310768220987e-05, + "loss": 0.31156033277511597, + "step": 1734 + }, + { + "epoch": 0.46076218297702826, + "grad_norm": 1.0327514294429654, + "learning_rate": 1.82768474978704e-05, + "loss": 0.30409976840019226, + "step": 1735 + }, + { + "epoch": 0.46102775195857126, + "grad_norm": 1.0533664417585449, + "learning_rate": 1.827438263184124e-05, + "loss": 0.305557519197464, + "step": 1736 + }, + { + "epoch": 0.4612933209401142, + "grad_norm": 1.1216722893854725, + "learning_rate": 1.827191617060869e-05, + "loss": 0.36079999804496765, + "step": 1737 + }, + { + "epoch": 0.46155888992165717, + "grad_norm": 1.0546022345807051, + "learning_rate": 1.8269448114648264e-05, + "loss": 0.3341830372810364, + "step": 1738 + }, + { + "epoch": 0.4618244589032001, + "grad_norm": 1.0085785444907966, + "learning_rate": 1.8266978464435764e-05, + "loss": 0.3222450017929077, + "step": 1739 + }, + { + "epoch": 0.46209002788474307, + "grad_norm": 1.112818872130856, + "learning_rate": 1.826450722044732e-05, + "loss": 0.34665441513061523, + "step": 1740 + }, + { + "epoch": 0.462355596866286, + "grad_norm": 1.1112300040840664, + "learning_rate": 1.8262034383159357e-05, + "loss": 0.31024169921875, + "step": 1741 + }, + { + "epoch": 0.462621165847829, + "grad_norm": 1.2322752248386413, + "learning_rate": 1.8259559953048606e-05, + "loss": 0.2950369119644165, + "step": 1742 + }, + { + "epoch": 0.4628867348293719, + "grad_norm": 1.109045795536776, + "learning_rate": 1.8257083930592102e-05, + "loss": 0.3378523886203766, + "step": 1743 + }, + { + "epoch": 0.4631523038109149, + "grad_norm": 0.9899845397184047, + "learning_rate": 1.8254606316267204e-05, + "loss": 0.2930060923099518, + "step": 1744 + }, + { + "epoch": 0.46341787279245783, + "grad_norm": 1.079619676645024, + "learning_rate": 1.8252127110551564e-05, + "loss": 0.3236517012119293, + "step": 1745 + }, + { + "epoch": 0.4636834417740008, + "grad_norm": 0.9852877201201444, + "learning_rate": 1.824964631392314e-05, + "loss": 0.3010406196117401, + "step": 1746 + }, + { + "epoch": 0.46394901075554373, + "grad_norm": 1.0095585954453505, + "learning_rate": 1.8247163926860204e-05, + "loss": 0.3269607424736023, + "step": 1747 + }, + { + "epoch": 0.4642145797370867, + "grad_norm": 1.0474961373680607, + "learning_rate": 1.8244679949841328e-05, + "loss": 0.3437904715538025, + "step": 1748 + }, + { + "epoch": 0.46448014871862964, + "grad_norm": 1.1512723462780612, + "learning_rate": 1.8242194383345394e-05, + "loss": 0.37820738554000854, + "step": 1749 + }, + { + "epoch": 0.46474571770017264, + "grad_norm": 1.0989334641357904, + "learning_rate": 1.8239707227851592e-05, + "loss": 0.3365899920463562, + "step": 1750 + }, + { + "epoch": 0.4650112866817156, + "grad_norm": 0.9943228703349263, + "learning_rate": 1.8237218483839414e-05, + "loss": 0.30418774485588074, + "step": 1751 + }, + { + "epoch": 0.46527685566325855, + "grad_norm": 0.9379554406122236, + "learning_rate": 1.823472815178866e-05, + "loss": 0.2923222780227661, + "step": 1752 + }, + { + "epoch": 0.4655424246448015, + "grad_norm": 1.1096787188742467, + "learning_rate": 1.823223623217944e-05, + "loss": 0.3358995020389557, + "step": 1753 + }, + { + "epoch": 0.46580799362634445, + "grad_norm": 1.0997620749237405, + "learning_rate": 1.822974272549216e-05, + "loss": 0.3413343131542206, + "step": 1754 + }, + { + "epoch": 0.4660735626078874, + "grad_norm": 1.0873990469892099, + "learning_rate": 1.822724763220755e-05, + "loss": 0.33553364872932434, + "step": 1755 + }, + { + "epoch": 0.46633913158943036, + "grad_norm": 1.0957210856960815, + "learning_rate": 1.8224750952806626e-05, + "loss": 0.35896626114845276, + "step": 1756 + }, + { + "epoch": 0.4666047005709733, + "grad_norm": 1.1032076691430248, + "learning_rate": 1.8222252687770718e-05, + "loss": 0.35345566272735596, + "step": 1757 + }, + { + "epoch": 0.46687026955251626, + "grad_norm": 1.0034635235769087, + "learning_rate": 1.8219752837581466e-05, + "loss": 0.3146013617515564, + "step": 1758 + }, + { + "epoch": 0.4671358385340592, + "grad_norm": 1.0191336075935247, + "learning_rate": 1.8217251402720807e-05, + "loss": 0.33270642161369324, + "step": 1759 + }, + { + "epoch": 0.46740140751560216, + "grad_norm": 1.030475428136688, + "learning_rate": 1.821474838367099e-05, + "loss": 0.3172033727169037, + "step": 1760 + }, + { + "epoch": 0.4676669764971451, + "grad_norm": 1.6535016363051902, + "learning_rate": 1.8212243780914578e-05, + "loss": 0.3277033567428589, + "step": 1761 + }, + { + "epoch": 0.46793254547868807, + "grad_norm": 1.1570228647748637, + "learning_rate": 1.820973759493441e-05, + "loss": 0.3523799777030945, + "step": 1762 + }, + { + "epoch": 0.468198114460231, + "grad_norm": 1.0907259849913267, + "learning_rate": 1.8207229826213664e-05, + "loss": 0.32437676191329956, + "step": 1763 + }, + { + "epoch": 0.468463683441774, + "grad_norm": 1.1347618214788342, + "learning_rate": 1.82047204752358e-05, + "loss": 0.34185051918029785, + "step": 1764 + }, + { + "epoch": 0.468729252423317, + "grad_norm": 1.0561382700570243, + "learning_rate": 1.8202209542484594e-05, + "loss": 0.32034197449684143, + "step": 1765 + }, + { + "epoch": 0.46899482140485993, + "grad_norm": 1.097207173265362, + "learning_rate": 1.8199697028444125e-05, + "loss": 0.30969515442848206, + "step": 1766 + }, + { + "epoch": 0.4692603903864029, + "grad_norm": 0.9320632629292236, + "learning_rate": 1.8197182933598776e-05, + "loss": 0.24751389026641846, + "step": 1767 + }, + { + "epoch": 0.46952595936794583, + "grad_norm": 1.2001835130139573, + "learning_rate": 1.8194667258433235e-05, + "loss": 0.3859948217868805, + "step": 1768 + }, + { + "epoch": 0.4697915283494888, + "grad_norm": 1.0989779617923678, + "learning_rate": 1.819215000343249e-05, + "loss": 0.29364967346191406, + "step": 1769 + }, + { + "epoch": 0.47005709733103174, + "grad_norm": 1.1161641657952082, + "learning_rate": 1.8189631169081845e-05, + "loss": 0.3560323715209961, + "step": 1770 + }, + { + "epoch": 0.4703226663125747, + "grad_norm": 1.6505675097600017, + "learning_rate": 1.8187110755866898e-05, + "loss": 0.3458098769187927, + "step": 1771 + }, + { + "epoch": 0.47058823529411764, + "grad_norm": 1.0148526914708587, + "learning_rate": 1.8184588764273555e-05, + "loss": 0.32131001353263855, + "step": 1772 + }, + { + "epoch": 0.4708538042756606, + "grad_norm": 1.0453234866463608, + "learning_rate": 1.8182065194788024e-05, + "loss": 0.3011054992675781, + "step": 1773 + }, + { + "epoch": 0.47111937325720354, + "grad_norm": 1.1076832582073854, + "learning_rate": 1.8179540047896827e-05, + "loss": 0.3314674496650696, + "step": 1774 + }, + { + "epoch": 0.4713849422387465, + "grad_norm": 1.0853788387965118, + "learning_rate": 1.8177013324086774e-05, + "loss": 0.3437536060810089, + "step": 1775 + }, + { + "epoch": 0.47165051122028945, + "grad_norm": 1.166112048160084, + "learning_rate": 1.8174485023844993e-05, + "loss": 0.36137935519218445, + "step": 1776 + }, + { + "epoch": 0.4719160802018324, + "grad_norm": 1.0726359370167762, + "learning_rate": 1.8171955147658905e-05, + "loss": 0.34018874168395996, + "step": 1777 + }, + { + "epoch": 0.4721816491833754, + "grad_norm": 1.0596665602066746, + "learning_rate": 1.8169423696016245e-05, + "loss": 0.33298587799072266, + "step": 1778 + }, + { + "epoch": 0.47244721816491836, + "grad_norm": 1.1107712039752602, + "learning_rate": 1.816689066940505e-05, + "loss": 0.3649418354034424, + "step": 1779 + }, + { + "epoch": 0.4727127871464613, + "grad_norm": 1.0148859742506888, + "learning_rate": 1.8164356068313646e-05, + "loss": 0.32419171929359436, + "step": 1780 + }, + { + "epoch": 0.47297835612800426, + "grad_norm": 1.047167823612948, + "learning_rate": 1.8161819893230688e-05, + "loss": 0.288555383682251, + "step": 1781 + }, + { + "epoch": 0.4732439251095472, + "grad_norm": 1.005455205363293, + "learning_rate": 1.815928214464511e-05, + "loss": 0.3231011629104614, + "step": 1782 + }, + { + "epoch": 0.47350949409109017, + "grad_norm": 1.0470674131364166, + "learning_rate": 1.815674282304617e-05, + "loss": 0.29310134053230286, + "step": 1783 + }, + { + "epoch": 0.4737750630726331, + "grad_norm": 1.0390137248114197, + "learning_rate": 1.815420192892341e-05, + "loss": 0.32683852314949036, + "step": 1784 + }, + { + "epoch": 0.47404063205417607, + "grad_norm": 1.0353379429668699, + "learning_rate": 1.8151659462766685e-05, + "loss": 0.3200969099998474, + "step": 1785 + }, + { + "epoch": 0.474306201035719, + "grad_norm": 1.051359679014311, + "learning_rate": 1.814911542506616e-05, + "loss": 0.3091360032558441, + "step": 1786 + }, + { + "epoch": 0.474571770017262, + "grad_norm": 1.1630088603070372, + "learning_rate": 1.814656981631229e-05, + "loss": 0.3679049611091614, + "step": 1787 + }, + { + "epoch": 0.4748373389988049, + "grad_norm": 1.1065634125772459, + "learning_rate": 1.814402263699584e-05, + "loss": 0.290119469165802, + "step": 1788 + }, + { + "epoch": 0.4751029079803479, + "grad_norm": 1.0987492456650414, + "learning_rate": 1.8141473887607874e-05, + "loss": 0.31878861784935, + "step": 1789 + }, + { + "epoch": 0.47536847696189083, + "grad_norm": 1.1254389921885528, + "learning_rate": 1.8138923568639763e-05, + "loss": 0.35820287466049194, + "step": 1790 + }, + { + "epoch": 0.4756340459434338, + "grad_norm": 1.0046454439717083, + "learning_rate": 1.8136371680583176e-05, + "loss": 0.2924647629261017, + "step": 1791 + }, + { + "epoch": 0.4758996149249768, + "grad_norm": 1.2202907606610718, + "learning_rate": 1.8133818223930092e-05, + "loss": 0.3799927234649658, + "step": 1792 + }, + { + "epoch": 0.47616518390651974, + "grad_norm": 1.1097316301591598, + "learning_rate": 1.8131263199172783e-05, + "loss": 0.3505420386791229, + "step": 1793 + }, + { + "epoch": 0.4764307528880627, + "grad_norm": 1.1021438648339534, + "learning_rate": 1.8128706606803823e-05, + "loss": 0.3291688859462738, + "step": 1794 + }, + { + "epoch": 0.47669632186960564, + "grad_norm": 1.0814065231113215, + "learning_rate": 1.8126148447316104e-05, + "loss": 0.34079697728157043, + "step": 1795 + }, + { + "epoch": 0.4769618908511486, + "grad_norm": 1.2185578909639558, + "learning_rate": 1.8123588721202802e-05, + "loss": 0.2898064851760864, + "step": 1796 + }, + { + "epoch": 0.47722745983269155, + "grad_norm": 1.0448194415877836, + "learning_rate": 1.8121027428957402e-05, + "loss": 0.32089224457740784, + "step": 1797 + }, + { + "epoch": 0.4774930288142345, + "grad_norm": 1.903396083379018, + "learning_rate": 1.8118464571073697e-05, + "loss": 0.3402039408683777, + "step": 1798 + }, + { + "epoch": 0.47775859779577745, + "grad_norm": 1.1693256768707747, + "learning_rate": 1.8115900148045767e-05, + "loss": 0.29904159903526306, + "step": 1799 + }, + { + "epoch": 0.4780241667773204, + "grad_norm": 1.0688058843932313, + "learning_rate": 1.8113334160368007e-05, + "loss": 0.34074240922927856, + "step": 1800 + }, + { + "epoch": 0.47828973575886335, + "grad_norm": 1.0404364284009804, + "learning_rate": 1.811076660853511e-05, + "loss": 0.28566253185272217, + "step": 1801 + }, + { + "epoch": 0.4785553047404063, + "grad_norm": 1.0267154270839738, + "learning_rate": 1.8108197493042065e-05, + "loss": 0.34523358941078186, + "step": 1802 + }, + { + "epoch": 0.47882087372194926, + "grad_norm": 1.0082361251695107, + "learning_rate": 1.8105626814384173e-05, + "loss": 0.3261171281337738, + "step": 1803 + }, + { + "epoch": 0.4790864427034922, + "grad_norm": 1.0353580811121572, + "learning_rate": 1.8103054573057027e-05, + "loss": 0.2915942966938019, + "step": 1804 + }, + { + "epoch": 0.47935201168503516, + "grad_norm": 1.117140176261941, + "learning_rate": 1.810048076955653e-05, + "loss": 0.2999255657196045, + "step": 1805 + }, + { + "epoch": 0.47961758066657817, + "grad_norm": 1.0967176640726466, + "learning_rate": 1.8097905404378874e-05, + "loss": 0.3294594883918762, + "step": 1806 + }, + { + "epoch": 0.4798831496481211, + "grad_norm": 1.025641731681811, + "learning_rate": 1.8095328478020563e-05, + "loss": 0.30720093846321106, + "step": 1807 + }, + { + "epoch": 0.4801487186296641, + "grad_norm": 1.0583824100775536, + "learning_rate": 1.8092749990978395e-05, + "loss": 0.31076985597610474, + "step": 1808 + }, + { + "epoch": 0.480414287611207, + "grad_norm": 1.0650372083327142, + "learning_rate": 1.8090169943749477e-05, + "loss": 0.3182013928890228, + "step": 1809 + }, + { + "epoch": 0.48067985659275, + "grad_norm": 1.1560421045272382, + "learning_rate": 1.8087588336831206e-05, + "loss": 0.325716108083725, + "step": 1810 + }, + { + "epoch": 0.48094542557429293, + "grad_norm": 1.034822212222003, + "learning_rate": 1.8085005170721287e-05, + "loss": 0.3148769736289978, + "step": 1811 + }, + { + "epoch": 0.4812109945558359, + "grad_norm": 0.9998987744353804, + "learning_rate": 1.8082420445917727e-05, + "loss": 0.30645644664764404, + "step": 1812 + }, + { + "epoch": 0.48147656353737883, + "grad_norm": 0.9765412034449941, + "learning_rate": 1.807983416291883e-05, + "loss": 0.2978900969028473, + "step": 1813 + }, + { + "epoch": 0.4817421325189218, + "grad_norm": 1.1281577444413164, + "learning_rate": 1.8077246322223194e-05, + "loss": 0.34340181946754456, + "step": 1814 + }, + { + "epoch": 0.48200770150046474, + "grad_norm": 1.0940690010095575, + "learning_rate": 1.8074656924329733e-05, + "loss": 0.3272106349468231, + "step": 1815 + }, + { + "epoch": 0.4822732704820077, + "grad_norm": 1.0823130111098402, + "learning_rate": 1.807206596973765e-05, + "loss": 0.31061962246894836, + "step": 1816 + }, + { + "epoch": 0.48253883946355064, + "grad_norm": 1.1134329507970786, + "learning_rate": 1.8069473458946445e-05, + "loss": 0.28947243094444275, + "step": 1817 + }, + { + "epoch": 0.4828044084450936, + "grad_norm": 1.066867737773279, + "learning_rate": 1.8066879392455932e-05, + "loss": 0.35057532787323, + "step": 1818 + }, + { + "epoch": 0.48306997742663654, + "grad_norm": 1.5202577425125505, + "learning_rate": 1.8064283770766212e-05, + "loss": 0.31032001972198486, + "step": 1819 + }, + { + "epoch": 0.48333554640817955, + "grad_norm": 1.1166414917810035, + "learning_rate": 1.8061686594377685e-05, + "loss": 0.3802293539047241, + "step": 1820 + }, + { + "epoch": 0.4836011153897225, + "grad_norm": 1.122052528401037, + "learning_rate": 1.8059087863791066e-05, + "loss": 0.3306402564048767, + "step": 1821 + }, + { + "epoch": 0.48386668437126545, + "grad_norm": 1.051177925612534, + "learning_rate": 1.8056487579507352e-05, + "loss": 0.32170724868774414, + "step": 1822 + }, + { + "epoch": 0.4841322533528084, + "grad_norm": 1.0182895505748566, + "learning_rate": 1.8053885742027854e-05, + "loss": 0.35058924555778503, + "step": 1823 + }, + { + "epoch": 0.48439782233435136, + "grad_norm": 1.079491665486815, + "learning_rate": 1.8051282351854168e-05, + "loss": 0.3796595335006714, + "step": 1824 + }, + { + "epoch": 0.4846633913158943, + "grad_norm": 1.0882057457557335, + "learning_rate": 1.8048677409488205e-05, + "loss": 0.28997284173965454, + "step": 1825 + }, + { + "epoch": 0.48492896029743726, + "grad_norm": 1.7307038017833063, + "learning_rate": 1.804607091543216e-05, + "loss": 0.35110151767730713, + "step": 1826 + }, + { + "epoch": 0.4851945292789802, + "grad_norm": 1.1036882170711018, + "learning_rate": 1.8043462870188535e-05, + "loss": 0.3194088637828827, + "step": 1827 + }, + { + "epoch": 0.48546009826052317, + "grad_norm": 1.0664676604065728, + "learning_rate": 1.8040853274260137e-05, + "loss": 0.28777945041656494, + "step": 1828 + }, + { + "epoch": 0.4857256672420661, + "grad_norm": 1.0702584286398438, + "learning_rate": 1.803824212815006e-05, + "loss": 0.3642069697380066, + "step": 1829 + }, + { + "epoch": 0.48599123622360907, + "grad_norm": 1.0626897024145745, + "learning_rate": 1.80356294323617e-05, + "loss": 0.32396575808525085, + "step": 1830 + }, + { + "epoch": 0.486256805205152, + "grad_norm": 1.205959051296984, + "learning_rate": 1.8033015187398758e-05, + "loss": 0.36421436071395874, + "step": 1831 + }, + { + "epoch": 0.486522374186695, + "grad_norm": 1.0011906322370974, + "learning_rate": 1.8030399393765227e-05, + "loss": 0.3170832395553589, + "step": 1832 + }, + { + "epoch": 0.4867879431682379, + "grad_norm": 0.9739220394650455, + "learning_rate": 1.8027782051965408e-05, + "loss": 0.3003416359424591, + "step": 1833 + }, + { + "epoch": 0.48705351214978093, + "grad_norm": 1.0701369618567955, + "learning_rate": 1.802516316250388e-05, + "loss": 0.30362898111343384, + "step": 1834 + }, + { + "epoch": 0.4873190811313239, + "grad_norm": 1.0466563888798912, + "learning_rate": 1.802254272588555e-05, + "loss": 0.32721444964408875, + "step": 1835 + }, + { + "epoch": 0.48758465011286684, + "grad_norm": 1.345049864677536, + "learning_rate": 1.8019920742615596e-05, + "loss": 0.317483514547348, + "step": 1836 + }, + { + "epoch": 0.4878502190944098, + "grad_norm": 1.0589953518283157, + "learning_rate": 1.801729721319951e-05, + "loss": 0.2928479015827179, + "step": 1837 + }, + { + "epoch": 0.48811578807595274, + "grad_norm": 1.1098495840377043, + "learning_rate": 1.8014672138143073e-05, + "loss": 0.3425772190093994, + "step": 1838 + }, + { + "epoch": 0.4883813570574957, + "grad_norm": 1.0286414092040284, + "learning_rate": 1.801204551795238e-05, + "loss": 0.334087997674942, + "step": 1839 + }, + { + "epoch": 0.48864692603903864, + "grad_norm": 1.0797374159140127, + "learning_rate": 1.80094173531338e-05, + "loss": 0.3186641335487366, + "step": 1840 + }, + { + "epoch": 0.4889124950205816, + "grad_norm": 1.0361897985848911, + "learning_rate": 1.800678764419401e-05, + "loss": 0.3153733015060425, + "step": 1841 + }, + { + "epoch": 0.48917806400212455, + "grad_norm": 1.070217807683518, + "learning_rate": 1.8004156391640004e-05, + "loss": 0.3323214054107666, + "step": 1842 + }, + { + "epoch": 0.4894436329836675, + "grad_norm": 0.9455521865874897, + "learning_rate": 1.8001523595979043e-05, + "loss": 0.2856762409210205, + "step": 1843 + }, + { + "epoch": 0.48970920196521045, + "grad_norm": 1.0256135363684138, + "learning_rate": 1.79988892577187e-05, + "loss": 0.32493725419044495, + "step": 1844 + }, + { + "epoch": 0.4899747709467534, + "grad_norm": 1.1082860888483268, + "learning_rate": 1.7996253377366846e-05, + "loss": 0.350448876619339, + "step": 1845 + }, + { + "epoch": 0.49024033992829635, + "grad_norm": 1.096249407467401, + "learning_rate": 1.7993615955431648e-05, + "loss": 0.32246965169906616, + "step": 1846 + }, + { + "epoch": 0.4905059089098393, + "grad_norm": 0.9715072313794847, + "learning_rate": 1.799097699242157e-05, + "loss": 0.302636057138443, + "step": 1847 + }, + { + "epoch": 0.4907714778913823, + "grad_norm": 1.1573319310132777, + "learning_rate": 1.7988336488845374e-05, + "loss": 0.34280693531036377, + "step": 1848 + }, + { + "epoch": 0.49103704687292526, + "grad_norm": 1.1205814585182334, + "learning_rate": 1.7985694445212118e-05, + "loss": 0.3650673031806946, + "step": 1849 + }, + { + "epoch": 0.4913026158544682, + "grad_norm": 1.1348057531260405, + "learning_rate": 1.798305086203115e-05, + "loss": 0.33800822496414185, + "step": 1850 + }, + { + "epoch": 0.49156818483601117, + "grad_norm": 1.0428655272942455, + "learning_rate": 1.7980405739812134e-05, + "loss": 0.31522083282470703, + "step": 1851 + }, + { + "epoch": 0.4918337538175541, + "grad_norm": 1.177464907100392, + "learning_rate": 1.7977759079065003e-05, + "loss": 0.3374335765838623, + "step": 1852 + }, + { + "epoch": 0.49209932279909707, + "grad_norm": 1.060278247692231, + "learning_rate": 1.7975110880300018e-05, + "loss": 0.33803191781044006, + "step": 1853 + }, + { + "epoch": 0.49236489178064, + "grad_norm": 1.0982376140773644, + "learning_rate": 1.797246114402771e-05, + "loss": 0.37764933705329895, + "step": 1854 + }, + { + "epoch": 0.492630460762183, + "grad_norm": 0.9654297547716862, + "learning_rate": 1.796980987075892e-05, + "loss": 0.3075840473175049, + "step": 1855 + }, + { + "epoch": 0.4928960297437259, + "grad_norm": 0.9768928030686648, + "learning_rate": 1.7967157061004782e-05, + "loss": 0.306305855512619, + "step": 1856 + }, + { + "epoch": 0.4931615987252689, + "grad_norm": 1.0225684543938522, + "learning_rate": 1.796450271527673e-05, + "loss": 0.3474302291870117, + "step": 1857 + }, + { + "epoch": 0.49342716770681183, + "grad_norm": 1.0243106870487633, + "learning_rate": 1.7961846834086483e-05, + "loss": 0.31059685349464417, + "step": 1858 + }, + { + "epoch": 0.4936927366883548, + "grad_norm": 1.0236396527349367, + "learning_rate": 1.795918941794607e-05, + "loss": 0.346218079328537, + "step": 1859 + }, + { + "epoch": 0.49395830566989773, + "grad_norm": 0.9969229384493907, + "learning_rate": 1.7956530467367805e-05, + "loss": 0.28371214866638184, + "step": 1860 + }, + { + "epoch": 0.4942238746514407, + "grad_norm": 0.8979156608776232, + "learning_rate": 1.7953869982864306e-05, + "loss": 0.27775150537490845, + "step": 1861 + }, + { + "epoch": 0.4944894436329837, + "grad_norm": 1.279703247293047, + "learning_rate": 1.795120796494848e-05, + "loss": 0.328782856464386, + "step": 1862 + }, + { + "epoch": 0.49475501261452665, + "grad_norm": 1.0950381369417217, + "learning_rate": 1.7948544414133534e-05, + "loss": 0.33220064640045166, + "step": 1863 + }, + { + "epoch": 0.4950205815960696, + "grad_norm": 1.0528449584388764, + "learning_rate": 1.794587933093297e-05, + "loss": 0.32681554555892944, + "step": 1864 + }, + { + "epoch": 0.49528615057761255, + "grad_norm": 1.1023465974826758, + "learning_rate": 1.7943212715860586e-05, + "loss": 0.32202866673469543, + "step": 1865 + }, + { + "epoch": 0.4955517195591555, + "grad_norm": 2.266456857585339, + "learning_rate": 1.7940544569430468e-05, + "loss": 0.3051350712776184, + "step": 1866 + }, + { + "epoch": 0.49581728854069845, + "grad_norm": 1.1617568134775966, + "learning_rate": 1.793787489215701e-05, + "loss": 0.3924705386161804, + "step": 1867 + }, + { + "epoch": 0.4960828575222414, + "grad_norm": 1.018817969430421, + "learning_rate": 1.793520368455489e-05, + "loss": 0.30267882347106934, + "step": 1868 + }, + { + "epoch": 0.49634842650378436, + "grad_norm": 1.0585020042998596, + "learning_rate": 1.793253094713909e-05, + "loss": 0.3150729238986969, + "step": 1869 + }, + { + "epoch": 0.4966139954853273, + "grad_norm": 1.314679145900761, + "learning_rate": 1.7929856680424872e-05, + "loss": 0.33814147114753723, + "step": 1870 + }, + { + "epoch": 0.49687956446687026, + "grad_norm": 1.010460021909887, + "learning_rate": 1.7927180884927814e-05, + "loss": 0.31929856538772583, + "step": 1871 + }, + { + "epoch": 0.4971451334484132, + "grad_norm": 1.1376790681693039, + "learning_rate": 1.7924503561163775e-05, + "loss": 0.3797461688518524, + "step": 1872 + }, + { + "epoch": 0.49741070242995616, + "grad_norm": 1.057594588942085, + "learning_rate": 1.792182470964891e-05, + "loss": 0.3056377172470093, + "step": 1873 + }, + { + "epoch": 0.4976762714114991, + "grad_norm": 1.1254473942016883, + "learning_rate": 1.7919144330899668e-05, + "loss": 0.3526398539543152, + "step": 1874 + }, + { + "epoch": 0.49794184039304207, + "grad_norm": 1.0289140670533532, + "learning_rate": 1.79164624254328e-05, + "loss": 0.3183595538139343, + "step": 1875 + }, + { + "epoch": 0.4982074093745851, + "grad_norm": 1.1908370019011798, + "learning_rate": 1.791377899376534e-05, + "loss": 0.3604113459587097, + "step": 1876 + }, + { + "epoch": 0.498472978356128, + "grad_norm": 1.1651856770093412, + "learning_rate": 1.7911094036414623e-05, + "loss": 0.3219848573207855, + "step": 1877 + }, + { + "epoch": 0.498738547337671, + "grad_norm": 1.0586801467718077, + "learning_rate": 1.7908407553898282e-05, + "loss": 0.28773394227027893, + "step": 1878 + }, + { + "epoch": 0.49900411631921393, + "grad_norm": 1.0649509880321448, + "learning_rate": 1.7905719546734233e-05, + "loss": 0.31453996896743774, + "step": 1879 + }, + { + "epoch": 0.4992696853007569, + "grad_norm": 0.9878415524405192, + "learning_rate": 1.7903030015440696e-05, + "loss": 0.2947153151035309, + "step": 1880 + }, + { + "epoch": 0.49953525428229983, + "grad_norm": 1.0652111521233423, + "learning_rate": 1.7900338960536178e-05, + "loss": 0.313723087310791, + "step": 1881 + }, + { + "epoch": 0.4998008232638428, + "grad_norm": 1.0853994840945123, + "learning_rate": 1.7897646382539485e-05, + "loss": 0.3385108709335327, + "step": 1882 + }, + { + "epoch": 0.5000663922453857, + "grad_norm": 1.0993457819479324, + "learning_rate": 1.7894952281969712e-05, + "loss": 0.31417039036750793, + "step": 1883 + }, + { + "epoch": 0.5003319612269287, + "grad_norm": 1.1452192213941934, + "learning_rate": 1.7892256659346253e-05, + "loss": 0.3555717468261719, + "step": 1884 + }, + { + "epoch": 0.5005975302084716, + "grad_norm": 1.1989261836629121, + "learning_rate": 1.7889559515188793e-05, + "loss": 0.3724518120288849, + "step": 1885 + }, + { + "epoch": 0.5008630991900146, + "grad_norm": 1.0516015708006068, + "learning_rate": 1.7886860850017306e-05, + "loss": 0.32646167278289795, + "step": 1886 + }, + { + "epoch": 0.5011286681715575, + "grad_norm": 1.079300223054909, + "learning_rate": 1.7884160664352062e-05, + "loss": 0.31072959303855896, + "step": 1887 + }, + { + "epoch": 0.5013942371531005, + "grad_norm": 0.9518526173941219, + "learning_rate": 1.7881458958713628e-05, + "loss": 0.26987242698669434, + "step": 1888 + }, + { + "epoch": 0.5016598061346434, + "grad_norm": 0.9908294117764815, + "learning_rate": 1.787875573362286e-05, + "loss": 0.30105817317962646, + "step": 1889 + }, + { + "epoch": 0.5019253751161864, + "grad_norm": 1.0444226583374554, + "learning_rate": 1.7876050989600908e-05, + "loss": 0.31277188658714294, + "step": 1890 + }, + { + "epoch": 0.5021909440977294, + "grad_norm": 1.0192470233304842, + "learning_rate": 1.7873344727169214e-05, + "loss": 0.31068161129951477, + "step": 1891 + }, + { + "epoch": 0.5024565130792723, + "grad_norm": 1.0797105219167356, + "learning_rate": 1.7870636946849512e-05, + "loss": 0.3491121530532837, + "step": 1892 + }, + { + "epoch": 0.5027220820608153, + "grad_norm": 1.0753654491775293, + "learning_rate": 1.7867927649163838e-05, + "loss": 0.3223581612110138, + "step": 1893 + }, + { + "epoch": 0.5029876510423582, + "grad_norm": 1.1295999155195493, + "learning_rate": 1.7865216834634506e-05, + "loss": 0.345224529504776, + "step": 1894 + }, + { + "epoch": 0.5032532200239012, + "grad_norm": 1.1419032071310418, + "learning_rate": 1.7862504503784123e-05, + "loss": 0.3408205211162567, + "step": 1895 + }, + { + "epoch": 0.5035187890054441, + "grad_norm": 0.9713066472066385, + "learning_rate": 1.7859790657135608e-05, + "loss": 0.2680068016052246, + "step": 1896 + }, + { + "epoch": 0.5037843579869872, + "grad_norm": 0.9186813995364894, + "learning_rate": 1.7857075295212148e-05, + "loss": 0.29733535647392273, + "step": 1897 + }, + { + "epoch": 0.5040499269685301, + "grad_norm": 1.1196248802118025, + "learning_rate": 1.785435841853724e-05, + "loss": 0.34820133447647095, + "step": 1898 + }, + { + "epoch": 0.5043154959500731, + "grad_norm": 1.134445876132798, + "learning_rate": 1.785164002763466e-05, + "loss": 0.3306594491004944, + "step": 1899 + }, + { + "epoch": 0.504581064931616, + "grad_norm": 1.0579272410020724, + "learning_rate": 1.7848920123028482e-05, + "loss": 0.3166846036911011, + "step": 1900 + }, + { + "epoch": 0.504846633913159, + "grad_norm": 1.2213509498849395, + "learning_rate": 1.784619870524308e-05, + "loss": 0.3406408727169037, + "step": 1901 + }, + { + "epoch": 0.5051122028947019, + "grad_norm": 1.0410168562106317, + "learning_rate": 1.78434757748031e-05, + "loss": 0.36358171701431274, + "step": 1902 + }, + { + "epoch": 0.5053777718762449, + "grad_norm": 1.0510382236040618, + "learning_rate": 1.7840751332233498e-05, + "loss": 0.34045761823654175, + "step": 1903 + }, + { + "epoch": 0.5056433408577878, + "grad_norm": 1.0566120463915532, + "learning_rate": 1.783802537805951e-05, + "loss": 0.3442475199699402, + "step": 1904 + }, + { + "epoch": 0.5059089098393308, + "grad_norm": 1.1632822330113848, + "learning_rate": 1.7835297912806675e-05, + "loss": 0.3488585650920868, + "step": 1905 + }, + { + "epoch": 0.5061744788208737, + "grad_norm": 1.098650773563784, + "learning_rate": 1.7832568937000808e-05, + "loss": 0.3340107500553131, + "step": 1906 + }, + { + "epoch": 0.5064400478024167, + "grad_norm": 1.0195614065654457, + "learning_rate": 1.7829838451168027e-05, + "loss": 0.3206177353858948, + "step": 1907 + }, + { + "epoch": 0.5067056167839596, + "grad_norm": 1.0219563874782234, + "learning_rate": 1.782710645583473e-05, + "loss": 0.2851010262966156, + "step": 1908 + }, + { + "epoch": 0.5069711857655026, + "grad_norm": 1.0249326570563306, + "learning_rate": 1.782437295152763e-05, + "loss": 0.31850844621658325, + "step": 1909 + }, + { + "epoch": 0.5072367547470455, + "grad_norm": 1.0890541355083159, + "learning_rate": 1.7821637938773704e-05, + "loss": 0.3343108892440796, + "step": 1910 + }, + { + "epoch": 0.5075023237285885, + "grad_norm": 1.1131994842325255, + "learning_rate": 1.781890141810023e-05, + "loss": 0.3423745930194855, + "step": 1911 + }, + { + "epoch": 0.5077678927101315, + "grad_norm": 1.057536319451762, + "learning_rate": 1.7816163390034775e-05, + "loss": 0.30980780720710754, + "step": 1912 + }, + { + "epoch": 0.5080334616916744, + "grad_norm": 1.0099692843485935, + "learning_rate": 1.7813423855105203e-05, + "loss": 0.31217479705810547, + "step": 1913 + }, + { + "epoch": 0.5082990306732174, + "grad_norm": 1.0721675523916532, + "learning_rate": 1.7810682813839664e-05, + "loss": 0.34741947054862976, + "step": 1914 + }, + { + "epoch": 0.5085645996547603, + "grad_norm": 1.1098427332228447, + "learning_rate": 1.7807940266766595e-05, + "loss": 0.32275527715682983, + "step": 1915 + }, + { + "epoch": 0.5088301686363033, + "grad_norm": 1.1130434711054393, + "learning_rate": 1.7805196214414728e-05, + "loss": 0.32760411500930786, + "step": 1916 + }, + { + "epoch": 0.5090957376178462, + "grad_norm": 1.1445787919507704, + "learning_rate": 1.7802450657313086e-05, + "loss": 0.3877720832824707, + "step": 1917 + }, + { + "epoch": 0.5093613065993892, + "grad_norm": 1.1135916509560913, + "learning_rate": 1.779970359599098e-05, + "loss": 0.33458876609802246, + "step": 1918 + }, + { + "epoch": 0.5096268755809321, + "grad_norm": 0.9826034605244246, + "learning_rate": 1.7796955030978007e-05, + "loss": 0.30603206157684326, + "step": 1919 + }, + { + "epoch": 0.5098924445624751, + "grad_norm": 0.9902684589377142, + "learning_rate": 1.7794204962804063e-05, + "loss": 0.2920286953449249, + "step": 1920 + }, + { + "epoch": 0.510158013544018, + "grad_norm": 1.1034173597508874, + "learning_rate": 1.7791453391999325e-05, + "loss": 0.32407981157302856, + "step": 1921 + }, + { + "epoch": 0.510423582525561, + "grad_norm": 1.3200648964540613, + "learning_rate": 1.7788700319094263e-05, + "loss": 0.30423563718795776, + "step": 1922 + }, + { + "epoch": 0.5106891515071039, + "grad_norm": 1.1213502448496324, + "learning_rate": 1.7785945744619642e-05, + "loss": 0.34691399335861206, + "step": 1923 + }, + { + "epoch": 0.5109547204886469, + "grad_norm": 1.0498801582672959, + "learning_rate": 1.7783189669106503e-05, + "loss": 0.3217603266239166, + "step": 1924 + }, + { + "epoch": 0.5112202894701899, + "grad_norm": 1.1943957961346587, + "learning_rate": 1.7780432093086198e-05, + "loss": 0.365132212638855, + "step": 1925 + }, + { + "epoch": 0.5114858584517329, + "grad_norm": 0.9783494867108459, + "learning_rate": 1.7777673017090344e-05, + "loss": 0.29662930965423584, + "step": 1926 + }, + { + "epoch": 0.5117514274332758, + "grad_norm": 1.0707541061431447, + "learning_rate": 1.7774912441650857e-05, + "loss": 0.3324819803237915, + "step": 1927 + }, + { + "epoch": 0.5120169964148188, + "grad_norm": 1.0040789031204058, + "learning_rate": 1.7772150367299953e-05, + "loss": 0.29331067204475403, + "step": 1928 + }, + { + "epoch": 0.5122825653963617, + "grad_norm": 1.064062495235822, + "learning_rate": 1.7769386794570117e-05, + "loss": 0.3158259987831116, + "step": 1929 + }, + { + "epoch": 0.5125481343779047, + "grad_norm": 1.020159871349018, + "learning_rate": 1.7766621723994145e-05, + "loss": 0.2824791967868805, + "step": 1930 + }, + { + "epoch": 0.5128137033594476, + "grad_norm": 1.0493215169042918, + "learning_rate": 1.7763855156105097e-05, + "loss": 0.2690732777118683, + "step": 1931 + }, + { + "epoch": 0.5130792723409906, + "grad_norm": 1.043157004637876, + "learning_rate": 1.7761087091436346e-05, + "loss": 0.31360942125320435, + "step": 1932 + }, + { + "epoch": 0.5133448413225336, + "grad_norm": 0.9858891902519169, + "learning_rate": 1.7758317530521535e-05, + "loss": 0.28334349393844604, + "step": 1933 + }, + { + "epoch": 0.5136104103040765, + "grad_norm": 1.1739380172138798, + "learning_rate": 1.7755546473894604e-05, + "loss": 0.3857404589653015, + "step": 1934 + }, + { + "epoch": 0.5138759792856195, + "grad_norm": 1.0280582546011092, + "learning_rate": 1.7752773922089784e-05, + "loss": 0.2852492332458496, + "step": 1935 + }, + { + "epoch": 0.5141415482671624, + "grad_norm": 1.003050995152578, + "learning_rate": 1.7749999875641585e-05, + "loss": 0.2959831953048706, + "step": 1936 + }, + { + "epoch": 0.5144071172487054, + "grad_norm": 1.100974201889633, + "learning_rate": 1.7747224335084815e-05, + "loss": 0.3129635453224182, + "step": 1937 + }, + { + "epoch": 0.5146726862302483, + "grad_norm": 1.0336946735940622, + "learning_rate": 1.774444730095456e-05, + "loss": 0.31391531229019165, + "step": 1938 + }, + { + "epoch": 0.5149382552117913, + "grad_norm": 1.0155253897885985, + "learning_rate": 1.7741668773786202e-05, + "loss": 0.30274757742881775, + "step": 1939 + }, + { + "epoch": 0.5152038241933342, + "grad_norm": 1.026561688701391, + "learning_rate": 1.7738888754115413e-05, + "loss": 0.29162222146987915, + "step": 1940 + }, + { + "epoch": 0.5154693931748772, + "grad_norm": 1.045931473256506, + "learning_rate": 1.7736107242478143e-05, + "loss": 0.30358970165252686, + "step": 1941 + }, + { + "epoch": 0.5157349621564201, + "grad_norm": 1.11915386227621, + "learning_rate": 1.7733324239410634e-05, + "loss": 0.32268065214157104, + "step": 1942 + }, + { + "epoch": 0.5160005311379631, + "grad_norm": 1.0626040245012975, + "learning_rate": 1.7730539745449417e-05, + "loss": 0.31925222277641296, + "step": 1943 + }, + { + "epoch": 0.516266100119506, + "grad_norm": 1.1170224886553113, + "learning_rate": 1.7727753761131312e-05, + "loss": 0.32883748412132263, + "step": 1944 + }, + { + "epoch": 0.516531669101049, + "grad_norm": 1.101510406621582, + "learning_rate": 1.7724966286993425e-05, + "loss": 0.3212829530239105, + "step": 1945 + }, + { + "epoch": 0.5167972380825919, + "grad_norm": 1.1477333753851342, + "learning_rate": 1.772217732357314e-05, + "loss": 0.32909759879112244, + "step": 1946 + }, + { + "epoch": 0.5170628070641349, + "grad_norm": 33.3722959000957, + "learning_rate": 1.7719386871408147e-05, + "loss": 0.3451213538646698, + "step": 1947 + }, + { + "epoch": 0.5173283760456778, + "grad_norm": 1.0792459943819739, + "learning_rate": 1.7716594931036402e-05, + "loss": 0.318422794342041, + "step": 1948 + }, + { + "epoch": 0.5175939450272208, + "grad_norm": 1.1243494025490273, + "learning_rate": 1.7713801502996166e-05, + "loss": 0.3165292739868164, + "step": 1949 + }, + { + "epoch": 0.5178595140087637, + "grad_norm": 1.1353818628503742, + "learning_rate": 1.7711006587825975e-05, + "loss": 0.3116700351238251, + "step": 1950 + }, + { + "epoch": 0.5181250829903067, + "grad_norm": 1.2005138291757869, + "learning_rate": 1.7708210186064656e-05, + "loss": 0.32102686166763306, + "step": 1951 + }, + { + "epoch": 0.5183906519718496, + "grad_norm": 1.079523368082095, + "learning_rate": 1.7705412298251323e-05, + "loss": 0.33025500178337097, + "step": 1952 + }, + { + "epoch": 0.5186562209533926, + "grad_norm": 1.2087703844513067, + "learning_rate": 1.7702612924925377e-05, + "loss": 0.36113062500953674, + "step": 1953 + }, + { + "epoch": 0.5189217899349357, + "grad_norm": 1.1242566727618883, + "learning_rate": 1.7699812066626503e-05, + "loss": 0.3092479109764099, + "step": 1954 + }, + { + "epoch": 0.5191873589164786, + "grad_norm": 1.117146005158035, + "learning_rate": 1.769700972389467e-05, + "loss": 0.3389117419719696, + "step": 1955 + }, + { + "epoch": 0.5194529278980216, + "grad_norm": 1.1525168535902064, + "learning_rate": 1.7694205897270147e-05, + "loss": 0.3225803077220917, + "step": 1956 + }, + { + "epoch": 0.5197184968795645, + "grad_norm": 1.0237361691251219, + "learning_rate": 1.7691400587293467e-05, + "loss": 0.3226786255836487, + "step": 1957 + }, + { + "epoch": 0.5199840658611075, + "grad_norm": 1.0060672564491426, + "learning_rate": 1.7688593794505466e-05, + "loss": 0.27708399295806885, + "step": 1958 + }, + { + "epoch": 0.5202496348426504, + "grad_norm": 1.0763214880079806, + "learning_rate": 1.768578551944726e-05, + "loss": 0.36100950837135315, + "step": 1959 + }, + { + "epoch": 0.5205152038241934, + "grad_norm": 1.043549985204807, + "learning_rate": 1.768297576266025e-05, + "loss": 0.3138211965560913, + "step": 1960 + }, + { + "epoch": 0.5207807728057363, + "grad_norm": 1.0618046264640966, + "learning_rate": 1.7680164524686128e-05, + "loss": 0.33959656953811646, + "step": 1961 + }, + { + "epoch": 0.5210463417872793, + "grad_norm": 0.9826913420332539, + "learning_rate": 1.7677351806066863e-05, + "loss": 0.3093605637550354, + "step": 1962 + }, + { + "epoch": 0.5213119107688222, + "grad_norm": 1.13307401094871, + "learning_rate": 1.7674537607344717e-05, + "loss": 0.3098641633987427, + "step": 1963 + }, + { + "epoch": 0.5215774797503652, + "grad_norm": 1.0810255128706003, + "learning_rate": 1.767172192906223e-05, + "loss": 0.35172683000564575, + "step": 1964 + }, + { + "epoch": 0.5218430487319081, + "grad_norm": 1.0729896509671073, + "learning_rate": 1.7668904771762242e-05, + "loss": 0.3535798192024231, + "step": 1965 + }, + { + "epoch": 0.5221086177134511, + "grad_norm": 1.2521081937006913, + "learning_rate": 1.766608613598785e-05, + "loss": 0.36183854937553406, + "step": 1966 + }, + { + "epoch": 0.522374186694994, + "grad_norm": 1.0735439944400962, + "learning_rate": 1.7663266022282473e-05, + "loss": 0.35995131731033325, + "step": 1967 + }, + { + "epoch": 0.522639755676537, + "grad_norm": 1.117054454049305, + "learning_rate": 1.766044443118978e-05, + "loss": 0.38672733306884766, + "step": 1968 + }, + { + "epoch": 0.5229053246580799, + "grad_norm": 1.0862044019422723, + "learning_rate": 1.765762136325375e-05, + "loss": 0.3389524221420288, + "step": 1969 + }, + { + "epoch": 0.5231708936396229, + "grad_norm": 0.9847521483407152, + "learning_rate": 1.7654796819018635e-05, + "loss": 0.3325779139995575, + "step": 1970 + }, + { + "epoch": 0.5234364626211658, + "grad_norm": 1.014607581135561, + "learning_rate": 1.7651970799028976e-05, + "loss": 0.328407347202301, + "step": 1971 + }, + { + "epoch": 0.5237020316027088, + "grad_norm": 0.9793310107257689, + "learning_rate": 1.764914330382959e-05, + "loss": 0.3050537705421448, + "step": 1972 + }, + { + "epoch": 0.5239676005842517, + "grad_norm": 1.1408686145630131, + "learning_rate": 1.7646314333965588e-05, + "loss": 0.35500285029411316, + "step": 1973 + }, + { + "epoch": 0.5242331695657947, + "grad_norm": 1.1035893819341516, + "learning_rate": 1.7643483889982364e-05, + "loss": 0.30319780111312866, + "step": 1974 + }, + { + "epoch": 0.5244987385473376, + "grad_norm": 1.0161223434375823, + "learning_rate": 1.7640651972425592e-05, + "loss": 0.315757691860199, + "step": 1975 + }, + { + "epoch": 0.5247643075288806, + "grad_norm": 1.0278713767432786, + "learning_rate": 1.7637818581841234e-05, + "loss": 0.28562331199645996, + "step": 1976 + }, + { + "epoch": 0.5250298765104235, + "grad_norm": 1.017204404946826, + "learning_rate": 1.763498371877553e-05, + "loss": 0.29798296093940735, + "step": 1977 + }, + { + "epoch": 0.5252954454919665, + "grad_norm": 1.1245986087835715, + "learning_rate": 1.763214738377501e-05, + "loss": 0.2923639416694641, + "step": 1978 + }, + { + "epoch": 0.5255610144735094, + "grad_norm": 1.0282257211254215, + "learning_rate": 1.7629309577386492e-05, + "loss": 0.2858009934425354, + "step": 1979 + }, + { + "epoch": 0.5258265834550524, + "grad_norm": 1.1185725636940211, + "learning_rate": 1.7626470300157064e-05, + "loss": 0.3615952134132385, + "step": 1980 + }, + { + "epoch": 0.5260921524365954, + "grad_norm": 1.1357118701340632, + "learning_rate": 1.762362955263411e-05, + "loss": 0.36142098903656006, + "step": 1981 + }, + { + "epoch": 0.5263577214181384, + "grad_norm": 1.1305105783283786, + "learning_rate": 1.762078733536529e-05, + "loss": 0.3335961699485779, + "step": 1982 + }, + { + "epoch": 0.5266232903996814, + "grad_norm": 1.2367655641806865, + "learning_rate": 1.761794364889855e-05, + "loss": 0.34549272060394287, + "step": 1983 + }, + { + "epoch": 0.5268888593812243, + "grad_norm": 1.1166612317693478, + "learning_rate": 1.761509849378212e-05, + "loss": 0.3177812993526459, + "step": 1984 + }, + { + "epoch": 0.5271544283627673, + "grad_norm": 1.1485560676920734, + "learning_rate": 1.7612251870564515e-05, + "loss": 0.33191388845443726, + "step": 1985 + }, + { + "epoch": 0.5274199973443102, + "grad_norm": 1.0807821541967428, + "learning_rate": 1.7609403779794523e-05, + "loss": 0.30732038617134094, + "step": 1986 + }, + { + "epoch": 0.5276855663258532, + "grad_norm": 1.1038043700347457, + "learning_rate": 1.7606554222021226e-05, + "loss": 0.33012068271636963, + "step": 1987 + }, + { + "epoch": 0.5279511353073961, + "grad_norm": 1.2233212729045404, + "learning_rate": 1.760370319779399e-05, + "loss": 0.3396066427230835, + "step": 1988 + }, + { + "epoch": 0.5282167042889391, + "grad_norm": 1.0755028443639627, + "learning_rate": 1.7600850707662454e-05, + "loss": 0.29053401947021484, + "step": 1989 + }, + { + "epoch": 0.528482273270482, + "grad_norm": 1.0859289781343007, + "learning_rate": 1.7597996752176545e-05, + "loss": 0.32927206158638, + "step": 1990 + }, + { + "epoch": 0.528747842252025, + "grad_norm": 1.0494460781018915, + "learning_rate": 1.759514133188647e-05, + "loss": 0.309224933385849, + "step": 1991 + }, + { + "epoch": 0.5290134112335679, + "grad_norm": 1.0870307368096292, + "learning_rate": 1.7592284447342725e-05, + "loss": 0.31973862648010254, + "step": 1992 + }, + { + "epoch": 0.5292789802151109, + "grad_norm": 1.0491029702582455, + "learning_rate": 1.758942609909608e-05, + "loss": 0.3331080377101898, + "step": 1993 + }, + { + "epoch": 0.5295445491966538, + "grad_norm": 1.0710245753206995, + "learning_rate": 1.7586566287697592e-05, + "loss": 0.32755160331726074, + "step": 1994 + }, + { + "epoch": 0.5298101181781968, + "grad_norm": 1.0377451052992368, + "learning_rate": 1.7583705013698602e-05, + "loss": 0.31942498683929443, + "step": 1995 + }, + { + "epoch": 0.5300756871597397, + "grad_norm": 1.1665695354682926, + "learning_rate": 1.7580842277650723e-05, + "loss": 0.3199199438095093, + "step": 1996 + }, + { + "epoch": 0.5303412561412827, + "grad_norm": 0.9680761404148592, + "learning_rate": 1.7577978080105864e-05, + "loss": 0.28153708577156067, + "step": 1997 + }, + { + "epoch": 0.5306068251228256, + "grad_norm": 1.0336529884327843, + "learning_rate": 1.7575112421616203e-05, + "loss": 0.3050921559333801, + "step": 1998 + }, + { + "epoch": 0.5308723941043686, + "grad_norm": 1.0836881519572394, + "learning_rate": 1.7572245302734208e-05, + "loss": 0.3242149353027344, + "step": 1999 + }, + { + "epoch": 0.5311379630859115, + "grad_norm": 0.9889139549595165, + "learning_rate": 1.7569376724012622e-05, + "loss": 0.29947227239608765, + "step": 2000 + }, + { + "epoch": 0.5314035320674545, + "grad_norm": 1.132976441688301, + "learning_rate": 1.756650668600448e-05, + "loss": 0.3229755163192749, + "step": 2001 + }, + { + "epoch": 0.5316691010489975, + "grad_norm": 1.0802391073518836, + "learning_rate": 1.7563635189263086e-05, + "loss": 0.3544544577598572, + "step": 2002 + }, + { + "epoch": 0.5319346700305404, + "grad_norm": 1.0996284853033707, + "learning_rate": 1.756076223434203e-05, + "loss": 0.32807621359825134, + "step": 2003 + }, + { + "epoch": 0.5322002390120834, + "grad_norm": 0.9920629294688551, + "learning_rate": 1.7557887821795192e-05, + "loss": 0.3057190477848053, + "step": 2004 + }, + { + "epoch": 0.5324658079936263, + "grad_norm": 1.0234244423063892, + "learning_rate": 1.7555011952176716e-05, + "loss": 0.29419198632240295, + "step": 2005 + }, + { + "epoch": 0.5327313769751693, + "grad_norm": 0.9799120327217228, + "learning_rate": 1.755213462604104e-05, + "loss": 0.3232089877128601, + "step": 2006 + }, + { + "epoch": 0.5329969459567122, + "grad_norm": 1.0186576745896931, + "learning_rate": 1.7549255843942875e-05, + "loss": 0.29784274101257324, + "step": 2007 + }, + { + "epoch": 0.5332625149382552, + "grad_norm": 1.0470325382276877, + "learning_rate": 1.7546375606437216e-05, + "loss": 0.31421899795532227, + "step": 2008 + }, + { + "epoch": 0.5335280839197981, + "grad_norm": 1.0641694414781755, + "learning_rate": 1.7543493914079345e-05, + "loss": 0.30681121349334717, + "step": 2009 + }, + { + "epoch": 0.5337936529013412, + "grad_norm": 1.0092085906510277, + "learning_rate": 1.7540610767424813e-05, + "loss": 0.3114027976989746, + "step": 2010 + }, + { + "epoch": 0.5340592218828841, + "grad_norm": 1.0064230726553411, + "learning_rate": 1.753772616702946e-05, + "loss": 0.3030378520488739, + "step": 2011 + }, + { + "epoch": 0.5343247908644271, + "grad_norm": 1.1096181297712675, + "learning_rate": 1.75348401134494e-05, + "loss": 0.30272024869918823, + "step": 2012 + }, + { + "epoch": 0.53459035984597, + "grad_norm": 1.049795668852804, + "learning_rate": 1.7531952607241033e-05, + "loss": 0.35117241740226746, + "step": 2013 + }, + { + "epoch": 0.534855928827513, + "grad_norm": 1.2552056089457548, + "learning_rate": 1.7529063648961035e-05, + "loss": 0.297889769077301, + "step": 2014 + }, + { + "epoch": 0.5351214978090559, + "grad_norm": 1.1238332501182418, + "learning_rate": 1.752617323916636e-05, + "loss": 0.32858210802078247, + "step": 2015 + }, + { + "epoch": 0.5353870667905989, + "grad_norm": 1.117582559290418, + "learning_rate": 1.7523281378414246e-05, + "loss": 0.3095484673976898, + "step": 2016 + }, + { + "epoch": 0.5356526357721418, + "grad_norm": 1.1072331793921826, + "learning_rate": 1.752038806726222e-05, + "loss": 0.34490731358528137, + "step": 2017 + }, + { + "epoch": 0.5359182047536848, + "grad_norm": 1.1427367564985542, + "learning_rate": 1.751749330626806e-05, + "loss": 0.35144859552383423, + "step": 2018 + }, + { + "epoch": 0.5361837737352277, + "grad_norm": 1.0337528414474293, + "learning_rate": 1.751459709598985e-05, + "loss": 0.26337549090385437, + "step": 2019 + }, + { + "epoch": 0.5364493427167707, + "grad_norm": 1.0719958558069054, + "learning_rate": 1.7511699436985952e-05, + "loss": 0.3235297203063965, + "step": 2020 + }, + { + "epoch": 0.5367149116983136, + "grad_norm": 1.1655117185465573, + "learning_rate": 1.7508800329814993e-05, + "loss": 0.35195302963256836, + "step": 2021 + }, + { + "epoch": 0.5369804806798566, + "grad_norm": 1.0547432431007058, + "learning_rate": 1.7505899775035887e-05, + "loss": 0.3226467967033386, + "step": 2022 + }, + { + "epoch": 0.5372460496613995, + "grad_norm": 1.0406958245289468, + "learning_rate": 1.750299777320783e-05, + "loss": 0.30616605281829834, + "step": 2023 + }, + { + "epoch": 0.5375116186429425, + "grad_norm": 1.074902411593199, + "learning_rate": 1.7500094324890294e-05, + "loss": 0.3007400333881378, + "step": 2024 + }, + { + "epoch": 0.5377771876244855, + "grad_norm": 1.1883491645763606, + "learning_rate": 1.7497189430643025e-05, + "loss": 0.35409432649612427, + "step": 2025 + }, + { + "epoch": 0.5380427566060284, + "grad_norm": 1.6951314154408594, + "learning_rate": 1.7494283091026053e-05, + "loss": 0.33718281984329224, + "step": 2026 + }, + { + "epoch": 0.5383083255875714, + "grad_norm": 1.0940933435725269, + "learning_rate": 1.749137530659969e-05, + "loss": 0.3589650094509125, + "step": 2027 + }, + { + "epoch": 0.5385738945691143, + "grad_norm": 1.1114345705753812, + "learning_rate": 1.7488466077924525e-05, + "loss": 0.35314273834228516, + "step": 2028 + }, + { + "epoch": 0.5388394635506573, + "grad_norm": 1.017869922891923, + "learning_rate": 1.7485555405561412e-05, + "loss": 0.28393587470054626, + "step": 2029 + }, + { + "epoch": 0.5391050325322002, + "grad_norm": 1.0276825009259218, + "learning_rate": 1.7482643290071503e-05, + "loss": 0.3262496292591095, + "step": 2030 + }, + { + "epoch": 0.5393706015137432, + "grad_norm": 1.122887144479208, + "learning_rate": 1.7479729732016218e-05, + "loss": 0.3549670875072479, + "step": 2031 + }, + { + "epoch": 0.5396361704952861, + "grad_norm": 1.0211791251004596, + "learning_rate": 1.7476814731957253e-05, + "loss": 0.30668947100639343, + "step": 2032 + }, + { + "epoch": 0.5399017394768291, + "grad_norm": 0.9278865240006526, + "learning_rate": 1.747389829045659e-05, + "loss": 0.2942228317260742, + "step": 2033 + }, + { + "epoch": 0.540167308458372, + "grad_norm": 1.023956047651912, + "learning_rate": 1.7470980408076484e-05, + "loss": 0.3166583478450775, + "step": 2034 + }, + { + "epoch": 0.540432877439915, + "grad_norm": 1.1503051826481139, + "learning_rate": 1.7468061085379467e-05, + "loss": 0.35149675607681274, + "step": 2035 + }, + { + "epoch": 0.5406984464214579, + "grad_norm": 1.1081467050264138, + "learning_rate": 1.7465140322928353e-05, + "loss": 0.32645004987716675, + "step": 2036 + }, + { + "epoch": 0.5409640154030009, + "grad_norm": 1.1656339653416823, + "learning_rate": 1.7462218121286224e-05, + "loss": 0.3078027367591858, + "step": 2037 + }, + { + "epoch": 0.5412295843845439, + "grad_norm": 1.0310810248927436, + "learning_rate": 1.7459294481016452e-05, + "loss": 0.28726300597190857, + "step": 2038 + }, + { + "epoch": 0.5414951533660869, + "grad_norm": 1.028103971871598, + "learning_rate": 1.7456369402682675e-05, + "loss": 0.29330572485923767, + "step": 2039 + }, + { + "epoch": 0.5417607223476298, + "grad_norm": 1.176742297493161, + "learning_rate": 1.7453442886848818e-05, + "loss": 0.3151019215583801, + "step": 2040 + }, + { + "epoch": 0.5420262913291728, + "grad_norm": 1.0830810759861134, + "learning_rate": 1.745051493407908e-05, + "loss": 0.3267561197280884, + "step": 2041 + }, + { + "epoch": 0.5422918603107157, + "grad_norm": 1.0462822233377385, + "learning_rate": 1.7447585544937933e-05, + "loss": 0.2834410071372986, + "step": 2042 + }, + { + "epoch": 0.5425574292922587, + "grad_norm": 0.9922210453154783, + "learning_rate": 1.7444654719990128e-05, + "loss": 0.29896080493927, + "step": 2043 + }, + { + "epoch": 0.5428229982738016, + "grad_norm": 1.0716195406510356, + "learning_rate": 1.7441722459800695e-05, + "loss": 0.3084600865840912, + "step": 2044 + }, + { + "epoch": 0.5430885672553446, + "grad_norm": 1.100381998832612, + "learning_rate": 1.743878876493494e-05, + "loss": 0.3178163170814514, + "step": 2045 + }, + { + "epoch": 0.5433541362368876, + "grad_norm": 1.1512124937535644, + "learning_rate": 1.743585363595844e-05, + "loss": 0.32886385917663574, + "step": 2046 + }, + { + "epoch": 0.5436197052184305, + "grad_norm": 1.0499932799675828, + "learning_rate": 1.743291707343706e-05, + "loss": 0.31810784339904785, + "step": 2047 + }, + { + "epoch": 0.5438852741999735, + "grad_norm": 0.994229574171737, + "learning_rate": 1.7429979077936928e-05, + "loss": 0.3003198504447937, + "step": 2048 + }, + { + "epoch": 0.5441508431815164, + "grad_norm": 1.1622503660754158, + "learning_rate": 1.7427039650024462e-05, + "loss": 0.33889323472976685, + "step": 2049 + }, + { + "epoch": 0.5444164121630594, + "grad_norm": 1.062972427778211, + "learning_rate": 1.7424098790266343e-05, + "loss": 0.3238763213157654, + "step": 2050 + }, + { + "epoch": 0.5446819811446023, + "grad_norm": 1.3651581380225686, + "learning_rate": 1.742115649922954e-05, + "loss": 0.34304776787757874, + "step": 2051 + }, + { + "epoch": 0.5449475501261453, + "grad_norm": 1.1192647204238841, + "learning_rate": 1.741821277748128e-05, + "loss": 0.31528347730636597, + "step": 2052 + }, + { + "epoch": 0.5452131191076882, + "grad_norm": 1.0728286121769783, + "learning_rate": 1.7415267625589094e-05, + "loss": 0.2992726266384125, + "step": 2053 + }, + { + "epoch": 0.5454786880892312, + "grad_norm": 1.0217638219637288, + "learning_rate": 1.741232104412076e-05, + "loss": 0.31706419587135315, + "step": 2054 + }, + { + "epoch": 0.5457442570707741, + "grad_norm": 1.8373163603702176, + "learning_rate": 1.7409373033644355e-05, + "loss": 0.2887676954269409, + "step": 2055 + }, + { + "epoch": 0.5460098260523171, + "grad_norm": 1.1434290988558236, + "learning_rate": 1.740642359472821e-05, + "loss": 0.3410964906215668, + "step": 2056 + }, + { + "epoch": 0.54627539503386, + "grad_norm": 1.0501323660770627, + "learning_rate": 1.740347272794095e-05, + "loss": 0.3711693286895752, + "step": 2057 + }, + { + "epoch": 0.546540964015403, + "grad_norm": 1.10922453334831, + "learning_rate": 1.7400520433851457e-05, + "loss": 0.3512499928474426, + "step": 2058 + }, + { + "epoch": 0.5468065329969459, + "grad_norm": 1.0790222544341648, + "learning_rate": 1.739756671302891e-05, + "loss": 0.3136678636074066, + "step": 2059 + }, + { + "epoch": 0.5470721019784889, + "grad_norm": 1.0417668658369865, + "learning_rate": 1.7394611566042748e-05, + "loss": 0.2983730435371399, + "step": 2060 + }, + { + "epoch": 0.5473376709600318, + "grad_norm": 1.1233530419836393, + "learning_rate": 1.7391654993462686e-05, + "loss": 0.36603933572769165, + "step": 2061 + }, + { + "epoch": 0.5476032399415748, + "grad_norm": 1.1758952832381078, + "learning_rate": 1.7388696995858717e-05, + "loss": 0.3651789128780365, + "step": 2062 + }, + { + "epoch": 0.5478688089231177, + "grad_norm": 1.2065493864331982, + "learning_rate": 1.7385737573801108e-05, + "loss": 0.30580615997314453, + "step": 2063 + }, + { + "epoch": 0.5481343779046607, + "grad_norm": 0.981372496476623, + "learning_rate": 1.7382776727860406e-05, + "loss": 0.2630755305290222, + "step": 2064 + }, + { + "epoch": 0.5483999468862036, + "grad_norm": 1.0020540486713174, + "learning_rate": 1.7379814458607416e-05, + "loss": 0.2947537899017334, + "step": 2065 + }, + { + "epoch": 0.5486655158677467, + "grad_norm": 1.034048631807644, + "learning_rate": 1.737685076661324e-05, + "loss": 0.3119455873966217, + "step": 2066 + }, + { + "epoch": 0.5489310848492897, + "grad_norm": 1.052273536899897, + "learning_rate": 1.7373885652449237e-05, + "loss": 0.3162347972393036, + "step": 2067 + }, + { + "epoch": 0.5491966538308326, + "grad_norm": 1.2320011234530202, + "learning_rate": 1.7370919116687047e-05, + "loss": 0.34120452404022217, + "step": 2068 + }, + { + "epoch": 0.5494622228123756, + "grad_norm": 1.095244169583748, + "learning_rate": 1.7367951159898583e-05, + "loss": 0.3126780092716217, + "step": 2069 + }, + { + "epoch": 0.5497277917939185, + "grad_norm": 0.9591128480333501, + "learning_rate": 1.7364981782656033e-05, + "loss": 0.2833349406719208, + "step": 2070 + }, + { + "epoch": 0.5499933607754615, + "grad_norm": 1.0921809927618633, + "learning_rate": 1.7362010985531855e-05, + "loss": 0.31617453694343567, + "step": 2071 + }, + { + "epoch": 0.5502589297570044, + "grad_norm": 1.0809700153666713, + "learning_rate": 1.735903876909879e-05, + "loss": 0.31372442841529846, + "step": 2072 + }, + { + "epoch": 0.5505244987385474, + "grad_norm": 1.1616077591637106, + "learning_rate": 1.735606513392984e-05, + "loss": 0.3500489592552185, + "step": 2073 + }, + { + "epoch": 0.5507900677200903, + "grad_norm": 1.0373404262028456, + "learning_rate": 1.735309008059829e-05, + "loss": 0.3219031095504761, + "step": 2074 + }, + { + "epoch": 0.5510556367016333, + "grad_norm": 1.0701365395287485, + "learning_rate": 1.7350113609677694e-05, + "loss": 0.32419610023498535, + "step": 2075 + }, + { + "epoch": 0.5513212056831762, + "grad_norm": 1.1054492395059694, + "learning_rate": 1.7347135721741874e-05, + "loss": 0.34804612398147583, + "step": 2076 + }, + { + "epoch": 0.5515867746647192, + "grad_norm": 1.09814942010155, + "learning_rate": 1.7344156417364946e-05, + "loss": 0.33105939626693726, + "step": 2077 + }, + { + "epoch": 0.5518523436462621, + "grad_norm": 1.0139790776190714, + "learning_rate": 1.7341175697121273e-05, + "loss": 0.3426011800765991, + "step": 2078 + }, + { + "epoch": 0.5521179126278051, + "grad_norm": 1.1120942872149455, + "learning_rate": 1.7338193561585507e-05, + "loss": 0.33207643032073975, + "step": 2079 + }, + { + "epoch": 0.552383481609348, + "grad_norm": 0.9807946500665143, + "learning_rate": 1.7335210011332573e-05, + "loss": 0.31849467754364014, + "step": 2080 + }, + { + "epoch": 0.552649050590891, + "grad_norm": 1.081622565959563, + "learning_rate": 1.7332225046937655e-05, + "loss": 0.3549337685108185, + "step": 2081 + }, + { + "epoch": 0.5529146195724339, + "grad_norm": 0.9652343930669623, + "learning_rate": 1.7329238668976224e-05, + "loss": 0.2850857377052307, + "step": 2082 + }, + { + "epoch": 0.5531801885539769, + "grad_norm": 1.1370461672740964, + "learning_rate": 1.732625087802402e-05, + "loss": 0.3277609348297119, + "step": 2083 + }, + { + "epoch": 0.5534457575355198, + "grad_norm": 1.0712095451099939, + "learning_rate": 1.732326167465705e-05, + "loss": 0.2951444983482361, + "step": 2084 + }, + { + "epoch": 0.5537113265170628, + "grad_norm": 1.0893938459197319, + "learning_rate": 1.7320271059451597e-05, + "loss": 0.36634138226509094, + "step": 2085 + }, + { + "epoch": 0.5539768954986057, + "grad_norm": 1.060256238160636, + "learning_rate": 1.7317279032984222e-05, + "loss": 0.3407907783985138, + "step": 2086 + }, + { + "epoch": 0.5542424644801487, + "grad_norm": 1.0563310141876696, + "learning_rate": 1.7314285595831747e-05, + "loss": 0.34038978815078735, + "step": 2087 + }, + { + "epoch": 0.5545080334616916, + "grad_norm": 1.0558109709205228, + "learning_rate": 1.7311290748571273e-05, + "loss": 0.337898313999176, + "step": 2088 + }, + { + "epoch": 0.5547736024432346, + "grad_norm": 1.1543867929059073, + "learning_rate": 1.7308294491780175e-05, + "loss": 0.3250765800476074, + "step": 2089 + }, + { + "epoch": 0.5550391714247775, + "grad_norm": 1.101568217376945, + "learning_rate": 1.730529682603609e-05, + "loss": 0.31562721729278564, + "step": 2090 + }, + { + "epoch": 0.5553047404063205, + "grad_norm": 1.2678079753749867, + "learning_rate": 1.730229775191693e-05, + "loss": 0.32757896184921265, + "step": 2091 + }, + { + "epoch": 0.5555703093878634, + "grad_norm": 1.1010819086774664, + "learning_rate": 1.7299297270000894e-05, + "loss": 0.35861605405807495, + "step": 2092 + }, + { + "epoch": 0.5558358783694064, + "grad_norm": 1.0999873688088635, + "learning_rate": 1.7296295380866425e-05, + "loss": 0.3383220434188843, + "step": 2093 + }, + { + "epoch": 0.5561014473509495, + "grad_norm": 1.1431134206724336, + "learning_rate": 1.7293292085092263e-05, + "loss": 0.30144187808036804, + "step": 2094 + }, + { + "epoch": 0.5563670163324924, + "grad_norm": 1.0354659821546437, + "learning_rate": 1.72902873832574e-05, + "loss": 0.2626546323299408, + "step": 2095 + }, + { + "epoch": 0.5566325853140354, + "grad_norm": 1.0939710377386638, + "learning_rate": 1.7287281275941112e-05, + "loss": 0.3289363980293274, + "step": 2096 + }, + { + "epoch": 0.5568981542955783, + "grad_norm": 0.9797533003070389, + "learning_rate": 1.7284273763722943e-05, + "loss": 0.26631784439086914, + "step": 2097 + }, + { + "epoch": 0.5571637232771213, + "grad_norm": 1.0035421194069876, + "learning_rate": 1.7281264847182697e-05, + "loss": 0.3051939606666565, + "step": 2098 + }, + { + "epoch": 0.5574292922586642, + "grad_norm": 1.0515034870910809, + "learning_rate": 1.7278254526900468e-05, + "loss": 0.34456121921539307, + "step": 2099 + }, + { + "epoch": 0.5576948612402072, + "grad_norm": 1.2038994359149542, + "learning_rate": 1.72752428034566e-05, + "loss": 0.2747807502746582, + "step": 2100 + }, + { + "epoch": 0.5579604302217501, + "grad_norm": 2.186270123050143, + "learning_rate": 1.7272229677431723e-05, + "loss": 0.31111812591552734, + "step": 2101 + }, + { + "epoch": 0.5582259992032931, + "grad_norm": 1.0150701360001215, + "learning_rate": 1.7269215149406737e-05, + "loss": 0.29648226499557495, + "step": 2102 + }, + { + "epoch": 0.558491568184836, + "grad_norm": 0.9846402594569152, + "learning_rate": 1.72661992199628e-05, + "loss": 0.28303876519203186, + "step": 2103 + }, + { + "epoch": 0.558757137166379, + "grad_norm": 1.1069492435421613, + "learning_rate": 1.726318188968135e-05, + "loss": 0.30540165305137634, + "step": 2104 + }, + { + "epoch": 0.5590227061479219, + "grad_norm": 1.2177152582591586, + "learning_rate": 1.726016315914409e-05, + "loss": 0.31810393929481506, + "step": 2105 + }, + { + "epoch": 0.5592882751294649, + "grad_norm": 1.134577587954556, + "learning_rate": 1.7257143028933004e-05, + "loss": 0.33605068922042847, + "step": 2106 + }, + { + "epoch": 0.5595538441110078, + "grad_norm": 1.089019585879268, + "learning_rate": 1.725412149963033e-05, + "loss": 0.3340590298175812, + "step": 2107 + }, + { + "epoch": 0.5598194130925508, + "grad_norm": 0.9872121137775324, + "learning_rate": 1.7251098571818586e-05, + "loss": 0.29560500383377075, + "step": 2108 + }, + { + "epoch": 0.5600849820740937, + "grad_norm": 1.0964006197085026, + "learning_rate": 1.7248074246080555e-05, + "loss": 0.30100107192993164, + "step": 2109 + }, + { + "epoch": 0.5603505510556367, + "grad_norm": 1.1506338140671328, + "learning_rate": 1.7245048522999294e-05, + "loss": 0.35551172494888306, + "step": 2110 + }, + { + "epoch": 0.5606161200371796, + "grad_norm": 1.0513397818607815, + "learning_rate": 1.724202140315812e-05, + "loss": 0.3182663023471832, + "step": 2111 + }, + { + "epoch": 0.5608816890187226, + "grad_norm": 1.092960095111009, + "learning_rate": 1.723899288714064e-05, + "loss": 0.3160201609134674, + "step": 2112 + }, + { + "epoch": 0.5611472580002655, + "grad_norm": 1.0656744789709975, + "learning_rate": 1.72359629755307e-05, + "loss": 0.3126063942909241, + "step": 2113 + }, + { + "epoch": 0.5614128269818085, + "grad_norm": 1.0376603045942787, + "learning_rate": 1.723293166891244e-05, + "loss": 0.3222552239894867, + "step": 2114 + }, + { + "epoch": 0.5616783959633515, + "grad_norm": 1.1154320347150413, + "learning_rate": 1.722989896787026e-05, + "loss": 0.33601805567741394, + "step": 2115 + }, + { + "epoch": 0.5619439649448944, + "grad_norm": 1.0241046952841495, + "learning_rate": 1.722686487298883e-05, + "loss": 0.28679755330085754, + "step": 2116 + }, + { + "epoch": 0.5622095339264374, + "grad_norm": 0.9498185678215705, + "learning_rate": 1.722382938485308e-05, + "loss": 0.2895340323448181, + "step": 2117 + }, + { + "epoch": 0.5624751029079803, + "grad_norm": 1.3753225282493697, + "learning_rate": 1.7220792504048227e-05, + "loss": 0.310183048248291, + "step": 2118 + }, + { + "epoch": 0.5627406718895233, + "grad_norm": 0.9776305745351022, + "learning_rate": 1.7217754231159737e-05, + "loss": 0.2768586277961731, + "step": 2119 + }, + { + "epoch": 0.5630062408710662, + "grad_norm": 0.9838874956474448, + "learning_rate": 1.7214714566773358e-05, + "loss": 0.2785574793815613, + "step": 2120 + }, + { + "epoch": 0.5632718098526092, + "grad_norm": 1.1815363465765012, + "learning_rate": 1.72116735114751e-05, + "loss": 0.30544358491897583, + "step": 2121 + }, + { + "epoch": 0.5635373788341522, + "grad_norm": 1.0704755380783626, + "learning_rate": 1.7208631065851243e-05, + "loss": 0.31662559509277344, + "step": 2122 + }, + { + "epoch": 0.5638029478156952, + "grad_norm": 0.9893085866675072, + "learning_rate": 1.7205587230488335e-05, + "loss": 0.31466105580329895, + "step": 2123 + }, + { + "epoch": 0.5640685167972381, + "grad_norm": 1.1520731756820097, + "learning_rate": 1.720254200597319e-05, + "loss": 0.3471367359161377, + "step": 2124 + }, + { + "epoch": 0.5643340857787811, + "grad_norm": 1.056530578075146, + "learning_rate": 1.7199495392892892e-05, + "loss": 0.3325269818305969, + "step": 2125 + }, + { + "epoch": 0.564599654760324, + "grad_norm": 1.1040662937900534, + "learning_rate": 1.7196447391834797e-05, + "loss": 0.32423460483551025, + "step": 2126 + }, + { + "epoch": 0.564865223741867, + "grad_norm": 1.0403895710374138, + "learning_rate": 1.7193398003386514e-05, + "loss": 0.3083527088165283, + "step": 2127 + }, + { + "epoch": 0.5651307927234099, + "grad_norm": 1.1794029606730059, + "learning_rate": 1.7190347228135933e-05, + "loss": 0.3418716490268707, + "step": 2128 + }, + { + "epoch": 0.5653963617049529, + "grad_norm": 1.0509473075306943, + "learning_rate": 1.7187295066671214e-05, + "loss": 0.33037957549095154, + "step": 2129 + }, + { + "epoch": 0.5656619306864958, + "grad_norm": 1.229094630243538, + "learning_rate": 1.7184241519580767e-05, + "loss": 0.3383673131465912, + "step": 2130 + }, + { + "epoch": 0.5659274996680388, + "grad_norm": 0.9364933789266218, + "learning_rate": 1.718118658745329e-05, + "loss": 0.27756133675575256, + "step": 2131 + }, + { + "epoch": 0.5661930686495817, + "grad_norm": 1.1307081535546069, + "learning_rate": 1.717813027087773e-05, + "loss": 0.2987852692604065, + "step": 2132 + }, + { + "epoch": 0.5664586376311247, + "grad_norm": 1.0924971268375117, + "learning_rate": 1.717507257044331e-05, + "loss": 0.30016621947288513, + "step": 2133 + }, + { + "epoch": 0.5667242066126676, + "grad_norm": 1.0923612277165435, + "learning_rate": 1.7172013486739528e-05, + "loss": 0.31592345237731934, + "step": 2134 + }, + { + "epoch": 0.5669897755942106, + "grad_norm": 1.0932899901018698, + "learning_rate": 1.716895302035613e-05, + "loss": 0.3500048816204071, + "step": 2135 + }, + { + "epoch": 0.5672553445757536, + "grad_norm": 1.0529476139624208, + "learning_rate": 1.7165891171883134e-05, + "loss": 0.32069307565689087, + "step": 2136 + }, + { + "epoch": 0.5675209135572965, + "grad_norm": 1.10329279559138, + "learning_rate": 1.7162827941910837e-05, + "loss": 0.3100130558013916, + "step": 2137 + }, + { + "epoch": 0.5677864825388395, + "grad_norm": 1.080836142172887, + "learning_rate": 1.715976333102979e-05, + "loss": 0.3205985128879547, + "step": 2138 + }, + { + "epoch": 0.5680520515203824, + "grad_norm": 1.0861679281182697, + "learning_rate": 1.715669733983081e-05, + "loss": 0.3243224024772644, + "step": 2139 + }, + { + "epoch": 0.5683176205019254, + "grad_norm": 1.0818895017967487, + "learning_rate": 1.7153629968904997e-05, + "loss": 0.3278832733631134, + "step": 2140 + }, + { + "epoch": 0.5685831894834683, + "grad_norm": 0.9949896264020713, + "learning_rate": 1.7150561218843693e-05, + "loss": 0.29137033224105835, + "step": 2141 + }, + { + "epoch": 0.5688487584650113, + "grad_norm": 1.0470808838345107, + "learning_rate": 1.7147491090238516e-05, + "loss": 0.3065168857574463, + "step": 2142 + }, + { + "epoch": 0.5691143274465542, + "grad_norm": 1.0368441449557109, + "learning_rate": 1.7144419583681354e-05, + "loss": 0.3367912173271179, + "step": 2143 + }, + { + "epoch": 0.5693798964280972, + "grad_norm": 1.086220090850542, + "learning_rate": 1.7141346699764357e-05, + "loss": 0.32278239727020264, + "step": 2144 + }, + { + "epoch": 0.5696454654096401, + "grad_norm": 1.080765529331453, + "learning_rate": 1.713827243907994e-05, + "loss": 0.2887166440486908, + "step": 2145 + }, + { + "epoch": 0.5699110343911831, + "grad_norm": 1.1353258061614586, + "learning_rate": 1.713519680222079e-05, + "loss": 0.33214619755744934, + "step": 2146 + }, + { + "epoch": 0.570176603372726, + "grad_norm": 1.1145274058321384, + "learning_rate": 1.7132119789779846e-05, + "loss": 0.2865470051765442, + "step": 2147 + }, + { + "epoch": 0.570442172354269, + "grad_norm": 1.1145678631141913, + "learning_rate": 1.7129041402350317e-05, + "loss": 0.32746967673301697, + "step": 2148 + }, + { + "epoch": 0.5707077413358119, + "grad_norm": 1.0454330804264187, + "learning_rate": 1.712596164052569e-05, + "loss": 0.3029513359069824, + "step": 2149 + }, + { + "epoch": 0.570973310317355, + "grad_norm": 0.9779058393705973, + "learning_rate": 1.7122880504899698e-05, + "loss": 0.3052698075771332, + "step": 2150 + }, + { + "epoch": 0.5712388792988979, + "grad_norm": 1.055591157713499, + "learning_rate": 1.7119797996066355e-05, + "loss": 0.29221272468566895, + "step": 2151 + }, + { + "epoch": 0.5715044482804409, + "grad_norm": 1.0014263274293047, + "learning_rate": 1.711671411461993e-05, + "loss": 0.3165368139743805, + "step": 2152 + }, + { + "epoch": 0.5717700172619838, + "grad_norm": 1.0763149059705845, + "learning_rate": 1.7113628861154953e-05, + "loss": 0.30877187848091125, + "step": 2153 + }, + { + "epoch": 0.5720355862435268, + "grad_norm": 1.0826550246568385, + "learning_rate": 1.711054223626623e-05, + "loss": 0.2985781729221344, + "step": 2154 + }, + { + "epoch": 0.5723011552250697, + "grad_norm": 1.1063225967671673, + "learning_rate": 1.7107454240548825e-05, + "loss": 0.3449699878692627, + "step": 2155 + }, + { + "epoch": 0.5725667242066127, + "grad_norm": 1.0430022801820942, + "learning_rate": 1.7104364874598066e-05, + "loss": 0.3219606578350067, + "step": 2156 + }, + { + "epoch": 0.5728322931881557, + "grad_norm": 1.0017795464639185, + "learning_rate": 1.710127413900955e-05, + "loss": 0.3059350550174713, + "step": 2157 + }, + { + "epoch": 0.5730978621696986, + "grad_norm": 1.0027463566346577, + "learning_rate": 1.7098182034379132e-05, + "loss": 0.29461371898651123, + "step": 2158 + }, + { + "epoch": 0.5733634311512416, + "grad_norm": 1.0159484116581767, + "learning_rate": 1.709508856130293e-05, + "loss": 0.2998795509338379, + "step": 2159 + }, + { + "epoch": 0.5736290001327845, + "grad_norm": 1.0092216110834475, + "learning_rate": 1.7091993720377336e-05, + "loss": 0.28214582800865173, + "step": 2160 + }, + { + "epoch": 0.5738945691143275, + "grad_norm": 1.2106483053766084, + "learning_rate": 1.708889751219899e-05, + "loss": 0.3036864697933197, + "step": 2161 + }, + { + "epoch": 0.5741601380958704, + "grad_norm": 1.1139097359759478, + "learning_rate": 1.7085799937364815e-05, + "loss": 0.34146320819854736, + "step": 2162 + }, + { + "epoch": 0.5744257070774134, + "grad_norm": 1.0631963944232283, + "learning_rate": 1.708270099647198e-05, + "loss": 0.33996909856796265, + "step": 2163 + }, + { + "epoch": 0.5746912760589563, + "grad_norm": 1.0779467399705778, + "learning_rate": 1.7079600690117924e-05, + "loss": 0.3308744728565216, + "step": 2164 + }, + { + "epoch": 0.5749568450404993, + "grad_norm": 1.0447240453690412, + "learning_rate": 1.707649901890035e-05, + "loss": 0.2945587933063507, + "step": 2165 + }, + { + "epoch": 0.5752224140220422, + "grad_norm": 1.0321317558144223, + "learning_rate": 1.7073395983417227e-05, + "loss": 0.30348697304725647, + "step": 2166 + }, + { + "epoch": 0.5754879830035852, + "grad_norm": 1.025806147580304, + "learning_rate": 1.707029158426678e-05, + "loss": 0.28789055347442627, + "step": 2167 + }, + { + "epoch": 0.5757535519851281, + "grad_norm": 1.168965754707192, + "learning_rate": 1.7067185822047502e-05, + "loss": 0.3026643693447113, + "step": 2168 + }, + { + "epoch": 0.5760191209666711, + "grad_norm": 1.1108861255752682, + "learning_rate": 1.7064078697358147e-05, + "loss": 0.34021061658859253, + "step": 2169 + }, + { + "epoch": 0.576284689948214, + "grad_norm": 1.1062563353075296, + "learning_rate": 1.7060970210797735e-05, + "loss": 0.32793867588043213, + "step": 2170 + }, + { + "epoch": 0.576550258929757, + "grad_norm": 1.1692826638365306, + "learning_rate": 1.705786036296554e-05, + "loss": 0.36144691705703735, + "step": 2171 + }, + { + "epoch": 0.5768158279112999, + "grad_norm": 1.1177501875227254, + "learning_rate": 1.7054749154461105e-05, + "loss": 0.3630291223526001, + "step": 2172 + }, + { + "epoch": 0.5770813968928429, + "grad_norm": 1.144365708172633, + "learning_rate": 1.705163658588424e-05, + "loss": 0.34964969754219055, + "step": 2173 + }, + { + "epoch": 0.5773469658743858, + "grad_norm": 1.0298961015626151, + "learning_rate": 1.7048522657835004e-05, + "loss": 0.2877815067768097, + "step": 2174 + }, + { + "epoch": 0.5776125348559288, + "grad_norm": 1.1148926749607628, + "learning_rate": 1.7045407370913732e-05, + "loss": 0.3185664713382721, + "step": 2175 + }, + { + "epoch": 0.5778781038374717, + "grad_norm": 1.0393243287048395, + "learning_rate": 1.704229072572101e-05, + "loss": 0.3035257160663605, + "step": 2176 + }, + { + "epoch": 0.5781436728190147, + "grad_norm": 1.048139429574759, + "learning_rate": 1.7039172722857695e-05, + "loss": 0.325702965259552, + "step": 2177 + }, + { + "epoch": 0.5784092418005577, + "grad_norm": 1.1046410504333486, + "learning_rate": 1.7036053362924896e-05, + "loss": 0.32837462425231934, + "step": 2178 + }, + { + "epoch": 0.5786748107821007, + "grad_norm": 1.066094854816524, + "learning_rate": 1.703293264652399e-05, + "loss": 0.3430028259754181, + "step": 2179 + }, + { + "epoch": 0.5789403797636437, + "grad_norm": 1.1007701198247044, + "learning_rate": 1.702981057425662e-05, + "loss": 0.32792964577674866, + "step": 2180 + }, + { + "epoch": 0.5792059487451866, + "grad_norm": 0.9964902607677808, + "learning_rate": 1.7026687146724675e-05, + "loss": 0.3037140965461731, + "step": 2181 + }, + { + "epoch": 0.5794715177267296, + "grad_norm": 0.9962684392556416, + "learning_rate": 1.7023562364530322e-05, + "loss": 0.33083540201187134, + "step": 2182 + }, + { + "epoch": 0.5797370867082725, + "grad_norm": 0.9979777099745417, + "learning_rate": 1.702043622827598e-05, + "loss": 0.3108663260936737, + "step": 2183 + }, + { + "epoch": 0.5800026556898155, + "grad_norm": 0.9618495492417584, + "learning_rate": 1.7017308738564336e-05, + "loss": 0.2939792573451996, + "step": 2184 + }, + { + "epoch": 0.5802682246713584, + "grad_norm": 1.1315656989934186, + "learning_rate": 1.7014179895998322e-05, + "loss": 0.3686106503009796, + "step": 2185 + }, + { + "epoch": 0.5805337936529014, + "grad_norm": 1.0524191997810952, + "learning_rate": 1.7011049701181152e-05, + "loss": 0.3497159779071808, + "step": 2186 + }, + { + "epoch": 0.5807993626344443, + "grad_norm": 1.0989364128809138, + "learning_rate": 1.7007918154716286e-05, + "loss": 0.31730401515960693, + "step": 2187 + }, + { + "epoch": 0.5810649316159873, + "grad_norm": 1.0000330799865447, + "learning_rate": 1.7004785257207456e-05, + "loss": 0.3064701557159424, + "step": 2188 + }, + { + "epoch": 0.5813305005975302, + "grad_norm": 1.1111458283716926, + "learning_rate": 1.7001651009258635e-05, + "loss": 0.37174129486083984, + "step": 2189 + }, + { + "epoch": 0.5815960695790732, + "grad_norm": 1.068050904458805, + "learning_rate": 1.699851541147408e-05, + "loss": 0.3548140823841095, + "step": 2190 + }, + { + "epoch": 0.5818616385606161, + "grad_norm": 1.2340650081251097, + "learning_rate": 1.6995378464458292e-05, + "loss": 0.3486049473285675, + "step": 2191 + }, + { + "epoch": 0.5821272075421591, + "grad_norm": 1.996025853729682, + "learning_rate": 1.6992240168816037e-05, + "loss": 0.3083210587501526, + "step": 2192 + }, + { + "epoch": 0.582392776523702, + "grad_norm": 1.0284637251594817, + "learning_rate": 1.6989100525152346e-05, + "loss": 0.3006829619407654, + "step": 2193 + }, + { + "epoch": 0.582658345505245, + "grad_norm": 1.103386023825705, + "learning_rate": 1.6985959534072502e-05, + "loss": 0.32856425642967224, + "step": 2194 + }, + { + "epoch": 0.5829239144867879, + "grad_norm": 1.1293873964177752, + "learning_rate": 1.6982817196182052e-05, + "loss": 0.3382526934146881, + "step": 2195 + }, + { + "epoch": 0.5831894834683309, + "grad_norm": 1.0326113865244562, + "learning_rate": 1.69796735120868e-05, + "loss": 0.3311583399772644, + "step": 2196 + }, + { + "epoch": 0.5834550524498738, + "grad_norm": 1.0267321140886136, + "learning_rate": 1.6976528482392815e-05, + "loss": 0.312778115272522, + "step": 2197 + }, + { + "epoch": 0.5837206214314168, + "grad_norm": 1.0148067463802801, + "learning_rate": 1.697338210770642e-05, + "loss": 0.2996736466884613, + "step": 2198 + }, + { + "epoch": 0.5839861904129597, + "grad_norm": 1.1885772355333009, + "learning_rate": 1.6970234388634192e-05, + "loss": 0.344571590423584, + "step": 2199 + }, + { + "epoch": 0.5842517593945027, + "grad_norm": 0.9183671512098872, + "learning_rate": 1.6967085325782984e-05, + "loss": 0.25299468636512756, + "step": 2200 + }, + { + "epoch": 0.5845173283760456, + "grad_norm": 1.042142544774348, + "learning_rate": 1.6963934919759896e-05, + "loss": 0.3080691695213318, + "step": 2201 + }, + { + "epoch": 0.5847828973575886, + "grad_norm": 1.0216299822000434, + "learning_rate": 1.6960783171172286e-05, + "loss": 0.27491697669029236, + "step": 2202 + }, + { + "epoch": 0.5850484663391315, + "grad_norm": 1.1629234714983534, + "learning_rate": 1.6957630080627772e-05, + "loss": 0.3422500193119049, + "step": 2203 + }, + { + "epoch": 0.5853140353206745, + "grad_norm": 1.0832524871656921, + "learning_rate": 1.695447564873424e-05, + "loss": 0.27703234553337097, + "step": 2204 + }, + { + "epoch": 0.5855796043022174, + "grad_norm": 1.0275000328668338, + "learning_rate": 1.6951319876099825e-05, + "loss": 0.3088543117046356, + "step": 2205 + }, + { + "epoch": 0.5858451732837605, + "grad_norm": 1.0671359142705343, + "learning_rate": 1.694816276333292e-05, + "loss": 0.29875609278678894, + "step": 2206 + }, + { + "epoch": 0.5861107422653035, + "grad_norm": 1.0185982306074886, + "learning_rate": 1.6945004311042176e-05, + "loss": 0.30804386734962463, + "step": 2207 + }, + { + "epoch": 0.5863763112468464, + "grad_norm": 1.081134235929082, + "learning_rate": 1.694184451983651e-05, + "loss": 0.3324572741985321, + "step": 2208 + }, + { + "epoch": 0.5866418802283894, + "grad_norm": 1.0822730402391103, + "learning_rate": 1.6938683390325096e-05, + "loss": 0.30302488803863525, + "step": 2209 + }, + { + "epoch": 0.5869074492099323, + "grad_norm": 1.1499037543983048, + "learning_rate": 1.6935520923117355e-05, + "loss": 0.3264358341693878, + "step": 2210 + }, + { + "epoch": 0.5871730181914753, + "grad_norm": 1.1305858167915457, + "learning_rate": 1.693235711882298e-05, + "loss": 0.3172164261341095, + "step": 2211 + }, + { + "epoch": 0.5874385871730182, + "grad_norm": 0.9910314790510931, + "learning_rate": 1.6929191978051908e-05, + "loss": 0.300851047039032, + "step": 2212 + }, + { + "epoch": 0.5877041561545612, + "grad_norm": 1.1122516205102002, + "learning_rate": 1.6926025501414352e-05, + "loss": 0.2887764871120453, + "step": 2213 + }, + { + "epoch": 0.5879697251361041, + "grad_norm": 1.0991421920944897, + "learning_rate": 1.692285768952076e-05, + "loss": 0.3246796727180481, + "step": 2214 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1069795382063548, + "learning_rate": 1.6919688542981852e-05, + "loss": 0.30595412850379944, + "step": 2215 + }, + { + "epoch": 0.58850086309919, + "grad_norm": 1.068918741300791, + "learning_rate": 1.6916518062408604e-05, + "loss": 0.2885501980781555, + "step": 2216 + }, + { + "epoch": 0.588766432080733, + "grad_norm": 1.066918066226772, + "learning_rate": 1.6913346248412245e-05, + "loss": 0.34449082612991333, + "step": 2217 + }, + { + "epoch": 0.5890320010622759, + "grad_norm": 1.0585511422631098, + "learning_rate": 1.6910173101604267e-05, + "loss": 0.29410409927368164, + "step": 2218 + }, + { + "epoch": 0.5892975700438189, + "grad_norm": 1.1710793080996782, + "learning_rate": 1.690699862259641e-05, + "loss": 0.3250378370285034, + "step": 2219 + }, + { + "epoch": 0.5895631390253618, + "grad_norm": 1.3327292763951073, + "learning_rate": 1.690382281200068e-05, + "loss": 0.34420648217201233, + "step": 2220 + }, + { + "epoch": 0.5898287080069048, + "grad_norm": 1.1196949637967406, + "learning_rate": 1.6900645670429338e-05, + "loss": 0.33951860666275024, + "step": 2221 + }, + { + "epoch": 0.5900942769884477, + "grad_norm": 1.064177847952839, + "learning_rate": 1.6897467198494892e-05, + "loss": 0.35045644640922546, + "step": 2222 + }, + { + "epoch": 0.5903598459699907, + "grad_norm": 1.0378256375427404, + "learning_rate": 1.689428739681012e-05, + "loss": 0.3262789845466614, + "step": 2223 + }, + { + "epoch": 0.5906254149515336, + "grad_norm": 1.0662878016953237, + "learning_rate": 1.689110626598805e-05, + "loss": 0.2959234118461609, + "step": 2224 + }, + { + "epoch": 0.5908909839330766, + "grad_norm": 1.040953230887288, + "learning_rate": 1.6887923806641965e-05, + "loss": 0.3185187876224518, + "step": 2225 + }, + { + "epoch": 0.5911565529146195, + "grad_norm": 0.9754385668000993, + "learning_rate": 1.6884740019385403e-05, + "loss": 0.2861860692501068, + "step": 2226 + }, + { + "epoch": 0.5914221218961625, + "grad_norm": 1.0067160421449919, + "learning_rate": 1.6881554904832163e-05, + "loss": 0.28718897700309753, + "step": 2227 + }, + { + "epoch": 0.5916876908777055, + "grad_norm": 1.0412433017248806, + "learning_rate": 1.68783684635963e-05, + "loss": 0.2919235825538635, + "step": 2228 + }, + { + "epoch": 0.5919532598592484, + "grad_norm": 0.9981457951279066, + "learning_rate": 1.687518069629212e-05, + "loss": 0.29265689849853516, + "step": 2229 + }, + { + "epoch": 0.5922188288407914, + "grad_norm": 1.105624159979672, + "learning_rate": 1.6871991603534183e-05, + "loss": 0.3257937431335449, + "step": 2230 + }, + { + "epoch": 0.5924843978223343, + "grad_norm": 0.9776528734928177, + "learning_rate": 1.6868801185937318e-05, + "loss": 0.30709922313690186, + "step": 2231 + }, + { + "epoch": 0.5927499668038773, + "grad_norm": 1.0470693079191735, + "learning_rate": 1.6865609444116594e-05, + "loss": 0.34016695618629456, + "step": 2232 + }, + { + "epoch": 0.5930155357854202, + "grad_norm": 3.119158292180646, + "learning_rate": 1.686241637868734e-05, + "loss": 0.27988332509994507, + "step": 2233 + }, + { + "epoch": 0.5932811047669632, + "grad_norm": 1.0478488923431404, + "learning_rate": 1.685922199026514e-05, + "loss": 0.33241748809814453, + "step": 2234 + }, + { + "epoch": 0.5935466737485062, + "grad_norm": 1.131470783603603, + "learning_rate": 1.685602627946584e-05, + "loss": 0.29636645317077637, + "step": 2235 + }, + { + "epoch": 0.5938122427300492, + "grad_norm": 1.0270882549188534, + "learning_rate": 1.6852829246905532e-05, + "loss": 0.32173705101013184, + "step": 2236 + }, + { + "epoch": 0.5940778117115921, + "grad_norm": 1.0825392737706068, + "learning_rate": 1.6849630893200567e-05, + "loss": 0.318726122379303, + "step": 2237 + }, + { + "epoch": 0.5943433806931351, + "grad_norm": 1.0382165285294276, + "learning_rate": 1.684643121896755e-05, + "loss": 0.3085494339466095, + "step": 2238 + }, + { + "epoch": 0.594608949674678, + "grad_norm": 1.0527313536489507, + "learning_rate": 1.684323022482334e-05, + "loss": 0.3402160406112671, + "step": 2239 + }, + { + "epoch": 0.594874518656221, + "grad_norm": 1.0380085019224927, + "learning_rate": 1.684002791138505e-05, + "loss": 0.28099578619003296, + "step": 2240 + }, + { + "epoch": 0.5951400876377639, + "grad_norm": 1.0821564922133853, + "learning_rate": 1.6836824279270053e-05, + "loss": 0.3049670159816742, + "step": 2241 + }, + { + "epoch": 0.5954056566193069, + "grad_norm": 1.0644252940512267, + "learning_rate": 1.6833619329095966e-05, + "loss": 0.2999834716320038, + "step": 2242 + }, + { + "epoch": 0.5956712256008498, + "grad_norm": 1.0828247808996563, + "learning_rate": 1.6830413061480663e-05, + "loss": 0.2976648509502411, + "step": 2243 + }, + { + "epoch": 0.5959367945823928, + "grad_norm": 0.9516700397999099, + "learning_rate": 1.6827205477042282e-05, + "loss": 0.2937200963497162, + "step": 2244 + }, + { + "epoch": 0.5962023635639357, + "grad_norm": 0.9800041770842799, + "learning_rate": 1.6823996576399208e-05, + "loss": 0.27944231033325195, + "step": 2245 + }, + { + "epoch": 0.5964679325454787, + "grad_norm": 1.2497901059935828, + "learning_rate": 1.6820786360170073e-05, + "loss": 0.37821248173713684, + "step": 2246 + }, + { + "epoch": 0.5967335015270216, + "grad_norm": 1.0764913922139379, + "learning_rate": 1.681757482897377e-05, + "loss": 0.31929296255111694, + "step": 2247 + }, + { + "epoch": 0.5969990705085646, + "grad_norm": 1.0997353700477965, + "learning_rate": 1.6814361983429446e-05, + "loss": 0.29905542731285095, + "step": 2248 + }, + { + "epoch": 0.5972646394901076, + "grad_norm": 1.1012066663218303, + "learning_rate": 1.6811147824156503e-05, + "loss": 0.31056714057922363, + "step": 2249 + }, + { + "epoch": 0.5975302084716505, + "grad_norm": 1.0740873036211436, + "learning_rate": 1.6807932351774585e-05, + "loss": 0.3311445415019989, + "step": 2250 + }, + { + "epoch": 0.5977957774531935, + "grad_norm": 0.9539008733822649, + "learning_rate": 1.6804715566903603e-05, + "loss": 0.28413334488868713, + "step": 2251 + }, + { + "epoch": 0.5980613464347364, + "grad_norm": 1.068533794622215, + "learning_rate": 1.6801497470163717e-05, + "loss": 0.27681154012680054, + "step": 2252 + }, + { + "epoch": 0.5983269154162794, + "grad_norm": 1.0654200190327086, + "learning_rate": 1.679827806217533e-05, + "loss": 0.290216863155365, + "step": 2253 + }, + { + "epoch": 0.5985924843978223, + "grad_norm": 1.1041469834048565, + "learning_rate": 1.6795057343559115e-05, + "loss": 0.31263259053230286, + "step": 2254 + }, + { + "epoch": 0.5988580533793653, + "grad_norm": 1.126601485756597, + "learning_rate": 1.6791835314935984e-05, + "loss": 0.31527474522590637, + "step": 2255 + }, + { + "epoch": 0.5991236223609082, + "grad_norm": 1.078203294441185, + "learning_rate": 1.6788611976927104e-05, + "loss": 0.308803915977478, + "step": 2256 + }, + { + "epoch": 0.5993891913424512, + "grad_norm": 1.0503773076355036, + "learning_rate": 1.6785387330153898e-05, + "loss": 0.3038686215877533, + "step": 2257 + }, + { + "epoch": 0.5996547603239941, + "grad_norm": 1.0216209005739547, + "learning_rate": 1.6782161375238045e-05, + "loss": 0.32485973834991455, + "step": 2258 + }, + { + "epoch": 0.5999203293055371, + "grad_norm": 1.182450532742011, + "learning_rate": 1.6778934112801467e-05, + "loss": 0.32350587844848633, + "step": 2259 + }, + { + "epoch": 0.60018589828708, + "grad_norm": 1.0888151703509321, + "learning_rate": 1.6775705543466337e-05, + "loss": 0.31593745946884155, + "step": 2260 + }, + { + "epoch": 0.600451467268623, + "grad_norm": 1.0882766479814592, + "learning_rate": 1.6772475667855098e-05, + "loss": 0.3266843855381012, + "step": 2261 + }, + { + "epoch": 0.6007170362501659, + "grad_norm": 1.1815872316974045, + "learning_rate": 1.676924448659042e-05, + "loss": 0.3334394693374634, + "step": 2262 + }, + { + "epoch": 0.600982605231709, + "grad_norm": 1.1019346354795203, + "learning_rate": 1.676601200029524e-05, + "loss": 0.29688704013824463, + "step": 2263 + }, + { + "epoch": 0.6012481742132519, + "grad_norm": 1.0675092497220116, + "learning_rate": 1.6762778209592744e-05, + "loss": 0.3163599967956543, + "step": 2264 + }, + { + "epoch": 0.6015137431947949, + "grad_norm": 3.310146638883422, + "learning_rate": 1.675954311510637e-05, + "loss": 0.3001909554004669, + "step": 2265 + }, + { + "epoch": 0.6017793121763378, + "grad_norm": 1.052342150287052, + "learning_rate": 1.6756306717459804e-05, + "loss": 0.306442528963089, + "step": 2266 + }, + { + "epoch": 0.6020448811578808, + "grad_norm": 1.0462245388504205, + "learning_rate": 1.6753069017276988e-05, + "loss": 0.32714736461639404, + "step": 2267 + }, + { + "epoch": 0.6023104501394237, + "grad_norm": 1.1462408299032063, + "learning_rate": 1.6749830015182106e-05, + "loss": 0.3276352286338806, + "step": 2268 + }, + { + "epoch": 0.6025760191209667, + "grad_norm": 1.196238497855594, + "learning_rate": 1.6746589711799607e-05, + "loss": 0.3151017427444458, + "step": 2269 + }, + { + "epoch": 0.6028415881025097, + "grad_norm": 1.0342963680315473, + "learning_rate": 1.674334810775418e-05, + "loss": 0.30252715945243835, + "step": 2270 + }, + { + "epoch": 0.6031071570840526, + "grad_norm": 1.013150034994447, + "learning_rate": 1.674010520367077e-05, + "loss": 0.28994205594062805, + "step": 2271 + }, + { + "epoch": 0.6033727260655956, + "grad_norm": 1.060884408167446, + "learning_rate": 1.6736861000174566e-05, + "loss": 0.31821542978286743, + "step": 2272 + }, + { + "epoch": 0.6036382950471385, + "grad_norm": 1.0745731746159097, + "learning_rate": 1.6733615497891018e-05, + "loss": 0.33488404750823975, + "step": 2273 + }, + { + "epoch": 0.6039038640286815, + "grad_norm": 1.1687722013665731, + "learning_rate": 1.6730368697445815e-05, + "loss": 0.32545825839042664, + "step": 2274 + }, + { + "epoch": 0.6041694330102244, + "grad_norm": 1.0959659967153625, + "learning_rate": 1.6727120599464904e-05, + "loss": 0.3229105770587921, + "step": 2275 + }, + { + "epoch": 0.6044350019917674, + "grad_norm": 1.0190980223229251, + "learning_rate": 1.672387120457448e-05, + "loss": 0.29090648889541626, + "step": 2276 + }, + { + "epoch": 0.6047005709733103, + "grad_norm": 1.0135966931724694, + "learning_rate": 1.6720620513400993e-05, + "loss": 0.3102695345878601, + "step": 2277 + }, + { + "epoch": 0.6049661399548533, + "grad_norm": 0.9853472262099896, + "learning_rate": 1.6717368526571133e-05, + "loss": 0.3104533851146698, + "step": 2278 + }, + { + "epoch": 0.6052317089363962, + "grad_norm": 1.0624907138843722, + "learning_rate": 1.671411524471184e-05, + "loss": 0.3340798616409302, + "step": 2279 + }, + { + "epoch": 0.6054972779179392, + "grad_norm": 0.9362556276145145, + "learning_rate": 1.6710860668450318e-05, + "loss": 0.2807982563972473, + "step": 2280 + }, + { + "epoch": 0.6057628468994821, + "grad_norm": 1.0604829312359818, + "learning_rate": 1.6707604798414005e-05, + "loss": 0.28892064094543457, + "step": 2281 + }, + { + "epoch": 0.6060284158810251, + "grad_norm": 1.1005771261022437, + "learning_rate": 1.6704347635230594e-05, + "loss": 0.29660698771476746, + "step": 2282 + }, + { + "epoch": 0.606293984862568, + "grad_norm": 1.0826898129560842, + "learning_rate": 1.6701089179528032e-05, + "loss": 0.32079893350601196, + "step": 2283 + }, + { + "epoch": 0.606559553844111, + "grad_norm": 1.0711524337358722, + "learning_rate": 1.6697829431934508e-05, + "loss": 0.3464012145996094, + "step": 2284 + }, + { + "epoch": 0.6068251228256539, + "grad_norm": 1.113831391037599, + "learning_rate": 1.669456839307846e-05, + "loss": 0.3378494381904602, + "step": 2285 + }, + { + "epoch": 0.6070906918071969, + "grad_norm": 1.1314381443012484, + "learning_rate": 1.6691306063588583e-05, + "loss": 0.2856704294681549, + "step": 2286 + }, + { + "epoch": 0.6073562607887398, + "grad_norm": 1.117095467957477, + "learning_rate": 1.6688042444093816e-05, + "loss": 0.317970871925354, + "step": 2287 + }, + { + "epoch": 0.6076218297702828, + "grad_norm": 0.9765740214705895, + "learning_rate": 1.6684777535223338e-05, + "loss": 0.3067381978034973, + "step": 2288 + }, + { + "epoch": 0.6078873987518257, + "grad_norm": 0.9795122588790717, + "learning_rate": 1.6681511337606594e-05, + "loss": 0.28682243824005127, + "step": 2289 + }, + { + "epoch": 0.6081529677333687, + "grad_norm": 1.0967806384391572, + "learning_rate": 1.667824385187327e-05, + "loss": 0.30516478419303894, + "step": 2290 + }, + { + "epoch": 0.6084185367149118, + "grad_norm": 1.2090889717256932, + "learning_rate": 1.6674975078653284e-05, + "loss": 0.3114034831523895, + "step": 2291 + }, + { + "epoch": 0.6086841056964547, + "grad_norm": 1.045779035897072, + "learning_rate": 1.6671705018576837e-05, + "loss": 0.3119916617870331, + "step": 2292 + }, + { + "epoch": 0.6089496746779977, + "grad_norm": 1.0110290976394836, + "learning_rate": 1.666843367227434e-05, + "loss": 0.2695278823375702, + "step": 2293 + }, + { + "epoch": 0.6092152436595406, + "grad_norm": 1.1042693591067085, + "learning_rate": 1.6665161040376483e-05, + "loss": 0.32162508368492126, + "step": 2294 + }, + { + "epoch": 0.6094808126410836, + "grad_norm": 1.1533266295102853, + "learning_rate": 1.6661887123514183e-05, + "loss": 0.3115222752094269, + "step": 2295 + }, + { + "epoch": 0.6097463816226265, + "grad_norm": 1.1903173397636237, + "learning_rate": 1.6658611922318618e-05, + "loss": 0.3239362835884094, + "step": 2296 + }, + { + "epoch": 0.6100119506041695, + "grad_norm": 1.0224008240467277, + "learning_rate": 1.66553354374212e-05, + "loss": 0.29716256260871887, + "step": 2297 + }, + { + "epoch": 0.6102775195857124, + "grad_norm": 1.1579823586849616, + "learning_rate": 1.6652057669453606e-05, + "loss": 0.3337557911872864, + "step": 2298 + }, + { + "epoch": 0.6105430885672554, + "grad_norm": 1.0726602627394455, + "learning_rate": 1.6648778619047747e-05, + "loss": 0.30258649587631226, + "step": 2299 + }, + { + "epoch": 0.6108086575487983, + "grad_norm": 1.0836532202857172, + "learning_rate": 1.6645498286835784e-05, + "loss": 0.3151426315307617, + "step": 2300 + }, + { + "epoch": 0.6110742265303413, + "grad_norm": 0.9639622977001232, + "learning_rate": 1.664221667345013e-05, + "loss": 0.274954617023468, + "step": 2301 + }, + { + "epoch": 0.6113397955118842, + "grad_norm": 1.0454921478368049, + "learning_rate": 1.6638933779523437e-05, + "loss": 0.3055363893508911, + "step": 2302 + }, + { + "epoch": 0.6116053644934272, + "grad_norm": 1.0132221767482874, + "learning_rate": 1.663564960568861e-05, + "loss": 0.30296921730041504, + "step": 2303 + }, + { + "epoch": 0.6118709334749701, + "grad_norm": 1.0766188111034134, + "learning_rate": 1.66323641525788e-05, + "loss": 0.3118343651294708, + "step": 2304 + }, + { + "epoch": 0.6121365024565131, + "grad_norm": 1.164685781665666, + "learning_rate": 1.6629077420827405e-05, + "loss": 0.3277447819709778, + "step": 2305 + }, + { + "epoch": 0.612402071438056, + "grad_norm": 1.11996036014055, + "learning_rate": 1.6625789411068063e-05, + "loss": 0.307643860578537, + "step": 2306 + }, + { + "epoch": 0.612667640419599, + "grad_norm": 1.0752891079202938, + "learning_rate": 1.6622500123934665e-05, + "loss": 0.3043777346611023, + "step": 2307 + }, + { + "epoch": 0.6129332094011419, + "grad_norm": 1.1229566611504027, + "learning_rate": 1.6619209560061352e-05, + "loss": 0.28634852170944214, + "step": 2308 + }, + { + "epoch": 0.6131987783826849, + "grad_norm": 1.1746890844036781, + "learning_rate": 1.6615917720082503e-05, + "loss": 0.33200016617774963, + "step": 2309 + }, + { + "epoch": 0.6134643473642278, + "grad_norm": 1.0620493011215435, + "learning_rate": 1.661262460463274e-05, + "loss": 0.26568055152893066, + "step": 2310 + }, + { + "epoch": 0.6137299163457708, + "grad_norm": 1.0408157138123326, + "learning_rate": 1.6609330214346945e-05, + "loss": 0.2772855758666992, + "step": 2311 + }, + { + "epoch": 0.6139954853273137, + "grad_norm": 1.2060076126932109, + "learning_rate": 1.6606034549860236e-05, + "loss": 0.3330409824848175, + "step": 2312 + }, + { + "epoch": 0.6142610543088567, + "grad_norm": 1.0235644562455184, + "learning_rate": 1.6602737611807975e-05, + "loss": 0.27702978253364563, + "step": 2313 + }, + { + "epoch": 0.6145266232903996, + "grad_norm": 1.1266755606893777, + "learning_rate": 1.6599439400825775e-05, + "loss": 0.29985183477401733, + "step": 2314 + }, + { + "epoch": 0.6147921922719426, + "grad_norm": 1.0266522277907775, + "learning_rate": 1.659613991754949e-05, + "loss": 0.2666100859642029, + "step": 2315 + }, + { + "epoch": 0.6150577612534855, + "grad_norm": 1.0676553477298287, + "learning_rate": 1.6592839162615223e-05, + "loss": 0.2968613803386688, + "step": 2316 + }, + { + "epoch": 0.6153233302350285, + "grad_norm": 1.26155090118547, + "learning_rate": 1.6589537136659326e-05, + "loss": 0.2693714499473572, + "step": 2317 + }, + { + "epoch": 0.6155888992165715, + "grad_norm": 1.1411779960646509, + "learning_rate": 1.658623384031838e-05, + "loss": 0.3192713260650635, + "step": 2318 + }, + { + "epoch": 0.6158544681981145, + "grad_norm": 1.099028639770974, + "learning_rate": 1.658292927422923e-05, + "loss": 0.2958469092845917, + "step": 2319 + }, + { + "epoch": 0.6161200371796575, + "grad_norm": 1.0613129939040433, + "learning_rate": 1.657962343902895e-05, + "loss": 0.28580743074417114, + "step": 2320 + }, + { + "epoch": 0.6163856061612004, + "grad_norm": 1.2105545865052383, + "learning_rate": 1.6576316335354875e-05, + "loss": 0.34325680136680603, + "step": 2321 + }, + { + "epoch": 0.6166511751427434, + "grad_norm": 1.076014963599046, + "learning_rate": 1.657300796384457e-05, + "loss": 0.3220894932746887, + "step": 2322 + }, + { + "epoch": 0.6169167441242863, + "grad_norm": 1.003861259990267, + "learning_rate": 1.656969832513585e-05, + "loss": 0.2934642434120178, + "step": 2323 + }, + { + "epoch": 0.6171823131058293, + "grad_norm": 1.0182182491222724, + "learning_rate": 1.656638741986677e-05, + "loss": 0.3066999912261963, + "step": 2324 + }, + { + "epoch": 0.6174478820873722, + "grad_norm": 1.0780285957414313, + "learning_rate": 1.6563075248675645e-05, + "loss": 0.2947896122932434, + "step": 2325 + }, + { + "epoch": 0.6177134510689152, + "grad_norm": 1.1567241875430703, + "learning_rate": 1.6559761812201018e-05, + "loss": 0.33616161346435547, + "step": 2326 + }, + { + "epoch": 0.6179790200504581, + "grad_norm": 1.0754490235924812, + "learning_rate": 1.6556447111081678e-05, + "loss": 0.29555875062942505, + "step": 2327 + }, + { + "epoch": 0.6182445890320011, + "grad_norm": 1.0070791342344025, + "learning_rate": 1.655313114595666e-05, + "loss": 0.276498019695282, + "step": 2328 + }, + { + "epoch": 0.618510158013544, + "grad_norm": 1.0894248364537533, + "learning_rate": 1.6549813917465242e-05, + "loss": 0.3081165552139282, + "step": 2329 + }, + { + "epoch": 0.618775726995087, + "grad_norm": 1.2153046006588315, + "learning_rate": 1.654649542624695e-05, + "loss": 0.3610053062438965, + "step": 2330 + }, + { + "epoch": 0.6190412959766299, + "grad_norm": 1.0676492266011808, + "learning_rate": 1.654317567294155e-05, + "loss": 0.2775106430053711, + "step": 2331 + }, + { + "epoch": 0.6193068649581729, + "grad_norm": 4.371469554540211, + "learning_rate": 1.653985465818905e-05, + "loss": 0.2915893793106079, + "step": 2332 + }, + { + "epoch": 0.6195724339397158, + "grad_norm": 1.0032536414224313, + "learning_rate": 1.6536532382629696e-05, + "loss": 0.30868977308273315, + "step": 2333 + }, + { + "epoch": 0.6198380029212588, + "grad_norm": 1.1011191125099704, + "learning_rate": 1.6533208846903996e-05, + "loss": 0.3083038330078125, + "step": 2334 + }, + { + "epoch": 0.6201035719028017, + "grad_norm": 0.9895882037041855, + "learning_rate": 1.652988405165268e-05, + "loss": 0.25192466378211975, + "step": 2335 + }, + { + "epoch": 0.6203691408843447, + "grad_norm": 1.1020677364796136, + "learning_rate": 1.6526557997516737e-05, + "loss": 0.32154130935668945, + "step": 2336 + }, + { + "epoch": 0.6206347098658876, + "grad_norm": 1.1174587266065723, + "learning_rate": 1.6523230685137382e-05, + "loss": 0.2860945165157318, + "step": 2337 + }, + { + "epoch": 0.6209002788474306, + "grad_norm": 1.1647384960602913, + "learning_rate": 1.6519902115156084e-05, + "loss": 0.3279789984226227, + "step": 2338 + }, + { + "epoch": 0.6211658478289735, + "grad_norm": 1.062678685453679, + "learning_rate": 1.6516572288214555e-05, + "loss": 0.3082200884819031, + "step": 2339 + }, + { + "epoch": 0.6214314168105165, + "grad_norm": 1.1253285275737313, + "learning_rate": 1.6513241204954745e-05, + "loss": 0.29032304883003235, + "step": 2340 + }, + { + "epoch": 0.6216969857920595, + "grad_norm": 1.004918906125766, + "learning_rate": 1.6509908866018843e-05, + "loss": 0.3096848130226135, + "step": 2341 + }, + { + "epoch": 0.6219625547736024, + "grad_norm": 1.021047856460921, + "learning_rate": 1.6506575272049294e-05, + "loss": 0.309989333152771, + "step": 2342 + }, + { + "epoch": 0.6222281237551454, + "grad_norm": 1.119097166323709, + "learning_rate": 1.6503240423688768e-05, + "loss": 0.311350554227829, + "step": 2343 + }, + { + "epoch": 0.6224936927366883, + "grad_norm": 1.0659510240862446, + "learning_rate": 1.6499904321580187e-05, + "loss": 0.3313952386379242, + "step": 2344 + }, + { + "epoch": 0.6227592617182313, + "grad_norm": 1.0702797293760455, + "learning_rate": 1.649656696636671e-05, + "loss": 0.2984781265258789, + "step": 2345 + }, + { + "epoch": 0.6230248306997742, + "grad_norm": 1.0312282361562104, + "learning_rate": 1.6493228358691748e-05, + "loss": 0.3058238625526428, + "step": 2346 + }, + { + "epoch": 0.6232903996813173, + "grad_norm": 1.0462474005488736, + "learning_rate": 1.6489888499198935e-05, + "loss": 0.33439138531684875, + "step": 2347 + }, + { + "epoch": 0.6235559686628602, + "grad_norm": 1.0386002000588619, + "learning_rate": 1.6486547388532157e-05, + "loss": 0.2883133292198181, + "step": 2348 + }, + { + "epoch": 0.6238215376444032, + "grad_norm": 0.9997410916606129, + "learning_rate": 1.648320502733555e-05, + "loss": 0.30258435010910034, + "step": 2349 + }, + { + "epoch": 0.6240871066259461, + "grad_norm": 1.0226158069339855, + "learning_rate": 1.6479861416253476e-05, + "loss": 0.316353440284729, + "step": 2350 + }, + { + "epoch": 0.6243526756074891, + "grad_norm": 1.0638089423798769, + "learning_rate": 1.647651655593054e-05, + "loss": 0.3230556547641754, + "step": 2351 + }, + { + "epoch": 0.624618244589032, + "grad_norm": 1.2043111611037318, + "learning_rate": 1.6473170447011593e-05, + "loss": 0.3327128291130066, + "step": 2352 + }, + { + "epoch": 0.624883813570575, + "grad_norm": 1.081123131766037, + "learning_rate": 1.6469823090141733e-05, + "loss": 0.3152993619441986, + "step": 2353 + }, + { + "epoch": 0.6251493825521179, + "grad_norm": 1.0655193061859811, + "learning_rate": 1.6466474485966286e-05, + "loss": 0.26792511343955994, + "step": 2354 + }, + { + "epoch": 0.6254149515336609, + "grad_norm": 1.121022507517606, + "learning_rate": 1.6463124635130824e-05, + "loss": 0.31665652990341187, + "step": 2355 + }, + { + "epoch": 0.6256805205152038, + "grad_norm": 1.0108098757868682, + "learning_rate": 1.645977353828115e-05, + "loss": 0.29573655128479004, + "step": 2356 + }, + { + "epoch": 0.6259460894967468, + "grad_norm": 1.0973823257435635, + "learning_rate": 1.6456421196063334e-05, + "loss": 0.3210436999797821, + "step": 2357 + }, + { + "epoch": 0.6262116584782897, + "grad_norm": 1.2424369194288305, + "learning_rate": 1.6453067609123656e-05, + "loss": 0.2837316691875458, + "step": 2358 + }, + { + "epoch": 0.6264772274598327, + "grad_norm": 1.0217734190114693, + "learning_rate": 1.6449712778108645e-05, + "loss": 0.2885812520980835, + "step": 2359 + }, + { + "epoch": 0.6267427964413756, + "grad_norm": 1.1369177274860889, + "learning_rate": 1.6446356703665078e-05, + "loss": 0.34908249974250793, + "step": 2360 + }, + { + "epoch": 0.6270083654229186, + "grad_norm": 0.9942151080492051, + "learning_rate": 1.6442999386439967e-05, + "loss": 0.30398470163345337, + "step": 2361 + }, + { + "epoch": 0.6272739344044616, + "grad_norm": 0.9838105681310805, + "learning_rate": 1.6439640827080565e-05, + "loss": 0.2780487537384033, + "step": 2362 + }, + { + "epoch": 0.6275395033860045, + "grad_norm": 0.956534505955689, + "learning_rate": 1.6436281026234357e-05, + "loss": 0.2575770616531372, + "step": 2363 + }, + { + "epoch": 0.6278050723675475, + "grad_norm": 0.9675911826739493, + "learning_rate": 1.6432919984549077e-05, + "loss": 0.2888547480106354, + "step": 2364 + }, + { + "epoch": 0.6280706413490904, + "grad_norm": 1.2303845977564731, + "learning_rate": 1.6429557702672694e-05, + "loss": 0.3259009122848511, + "step": 2365 + }, + { + "epoch": 0.6283362103306334, + "grad_norm": 1.3923197622537806, + "learning_rate": 1.6426194181253415e-05, + "loss": 0.2899959683418274, + "step": 2366 + }, + { + "epoch": 0.6286017793121763, + "grad_norm": 1.058685915432802, + "learning_rate": 1.6422829420939688e-05, + "loss": 0.28471851348876953, + "step": 2367 + }, + { + "epoch": 0.6288673482937193, + "grad_norm": 1.0822140266216713, + "learning_rate": 1.64194634223802e-05, + "loss": 0.2958947420120239, + "step": 2368 + }, + { + "epoch": 0.6291329172752622, + "grad_norm": 1.1251439755337522, + "learning_rate": 1.6416096186223872e-05, + "loss": 0.3089750111103058, + "step": 2369 + }, + { + "epoch": 0.6293984862568052, + "grad_norm": 1.0517657351777636, + "learning_rate": 1.641272771311987e-05, + "loss": 0.31597089767456055, + "step": 2370 + }, + { + "epoch": 0.6296640552383481, + "grad_norm": 1.237586073778816, + "learning_rate": 1.6409358003717598e-05, + "loss": 0.2968488931655884, + "step": 2371 + }, + { + "epoch": 0.6299296242198911, + "grad_norm": 1.0062603647307793, + "learning_rate": 1.6405987058666694e-05, + "loss": 0.27532660961151123, + "step": 2372 + }, + { + "epoch": 0.630195193201434, + "grad_norm": 1.0061271713511417, + "learning_rate": 1.6402614878617037e-05, + "loss": 0.2800731956958771, + "step": 2373 + }, + { + "epoch": 0.630460762182977, + "grad_norm": 1.0867786948587836, + "learning_rate": 1.6399241464218744e-05, + "loss": 0.31728652119636536, + "step": 2374 + }, + { + "epoch": 0.63072633116452, + "grad_norm": 1.0634834793994077, + "learning_rate": 1.6395866816122167e-05, + "loss": 0.2776367664337158, + "step": 2375 + }, + { + "epoch": 0.630991900146063, + "grad_norm": 1.2696308030410766, + "learning_rate": 1.63924909349779e-05, + "loss": 0.3308418095111847, + "step": 2376 + }, + { + "epoch": 0.6312574691276059, + "grad_norm": 1.027144235831433, + "learning_rate": 1.6389113821436775e-05, + "loss": 0.31589487195014954, + "step": 2377 + }, + { + "epoch": 0.6315230381091489, + "grad_norm": 0.9983142729953255, + "learning_rate": 1.6385735476149855e-05, + "loss": 0.27181899547576904, + "step": 2378 + }, + { + "epoch": 0.6317886070906918, + "grad_norm": 1.0656862561919935, + "learning_rate": 1.638235589976845e-05, + "loss": 0.2603747546672821, + "step": 2379 + }, + { + "epoch": 0.6320541760722348, + "grad_norm": 1.0543823342651422, + "learning_rate": 1.63789750929441e-05, + "loss": 0.29050707817077637, + "step": 2380 + }, + { + "epoch": 0.6323197450537777, + "grad_norm": 1.0310549396867945, + "learning_rate": 1.6375593056328586e-05, + "loss": 0.2979413866996765, + "step": 2381 + }, + { + "epoch": 0.6325853140353207, + "grad_norm": 1.0460005843129836, + "learning_rate": 1.6372209790573926e-05, + "loss": 0.30875420570373535, + "step": 2382 + }, + { + "epoch": 0.6328508830168637, + "grad_norm": 0.9698416111844145, + "learning_rate": 1.6368825296332366e-05, + "loss": 0.2755935788154602, + "step": 2383 + }, + { + "epoch": 0.6331164519984066, + "grad_norm": 1.1336778567410772, + "learning_rate": 1.6365439574256406e-05, + "loss": 0.3459136486053467, + "step": 2384 + }, + { + "epoch": 0.6333820209799496, + "grad_norm": 1.116018329054477, + "learning_rate": 1.6362052624998767e-05, + "loss": 0.29043829441070557, + "step": 2385 + }, + { + "epoch": 0.6336475899614925, + "grad_norm": 1.123039696178655, + "learning_rate": 1.635866444921242e-05, + "loss": 0.321551114320755, + "step": 2386 + }, + { + "epoch": 0.6339131589430355, + "grad_norm": 1.0451682936950502, + "learning_rate": 1.6355275047550553e-05, + "loss": 0.28478139638900757, + "step": 2387 + }, + { + "epoch": 0.6341787279245784, + "grad_norm": 1.060617338056141, + "learning_rate": 1.6351884420666616e-05, + "loss": 0.30913087725639343, + "step": 2388 + }, + { + "epoch": 0.6344442969061214, + "grad_norm": 1.0996519301974148, + "learning_rate": 1.6348492569214275e-05, + "loss": 0.328342467546463, + "step": 2389 + }, + { + "epoch": 0.6347098658876643, + "grad_norm": 1.0657562962668374, + "learning_rate": 1.634509949384744e-05, + "loss": 0.3291119933128357, + "step": 2390 + }, + { + "epoch": 0.6349754348692073, + "grad_norm": 1.0805286951038287, + "learning_rate": 1.6341705195220257e-05, + "loss": 0.3542378544807434, + "step": 2391 + }, + { + "epoch": 0.6352410038507502, + "grad_norm": 1.1387422668526126, + "learning_rate": 1.63383096739871e-05, + "loss": 0.3167935609817505, + "step": 2392 + }, + { + "epoch": 0.6355065728322932, + "grad_norm": 0.9614211236141011, + "learning_rate": 1.63349129308026e-05, + "loss": 0.27623263001441956, + "step": 2393 + }, + { + "epoch": 0.6357721418138361, + "grad_norm": 1.1351525352268206, + "learning_rate": 1.6331514966321596e-05, + "loss": 0.3615761399269104, + "step": 2394 + }, + { + "epoch": 0.6360377107953791, + "grad_norm": 1.1430561223010627, + "learning_rate": 1.632811578119918e-05, + "loss": 0.3503292500972748, + "step": 2395 + }, + { + "epoch": 0.636303279776922, + "grad_norm": 1.0400637290516392, + "learning_rate": 1.6324715376090673e-05, + "loss": 0.2994767129421234, + "step": 2396 + }, + { + "epoch": 0.636568848758465, + "grad_norm": 1.2836743734514182, + "learning_rate": 1.6321313751651638e-05, + "loss": 0.29903143644332886, + "step": 2397 + }, + { + "epoch": 0.6368344177400079, + "grad_norm": 1.0273086079776361, + "learning_rate": 1.6317910908537865e-05, + "loss": 0.310536652803421, + "step": 2398 + }, + { + "epoch": 0.6370999867215509, + "grad_norm": 1.2820707601171073, + "learning_rate": 1.6314506847405382e-05, + "loss": 0.32584354281425476, + "step": 2399 + }, + { + "epoch": 0.6373655557030938, + "grad_norm": 1.186095937719991, + "learning_rate": 1.6311101568910448e-05, + "loss": 0.3536352217197418, + "step": 2400 + }, + { + "epoch": 0.6376311246846368, + "grad_norm": 1.0361661707144088, + "learning_rate": 1.6307695073709565e-05, + "loss": 0.3198434114456177, + "step": 2401 + }, + { + "epoch": 0.6378966936661797, + "grad_norm": 0.8809138916670839, + "learning_rate": 1.6304287362459462e-05, + "loss": 0.264182448387146, + "step": 2402 + }, + { + "epoch": 0.6381622626477228, + "grad_norm": 1.0526335869529386, + "learning_rate": 1.6300878435817115e-05, + "loss": 0.31182044744491577, + "step": 2403 + }, + { + "epoch": 0.6384278316292658, + "grad_norm": 1.0495886453587215, + "learning_rate": 1.6297468294439708e-05, + "loss": 0.28221404552459717, + "step": 2404 + }, + { + "epoch": 0.6386934006108087, + "grad_norm": 1.0211141314743026, + "learning_rate": 1.6294056938984693e-05, + "loss": 0.27788785099983215, + "step": 2405 + }, + { + "epoch": 0.6389589695923517, + "grad_norm": 1.068610455564362, + "learning_rate": 1.6290644370109728e-05, + "loss": 0.3300796151161194, + "step": 2406 + }, + { + "epoch": 0.6392245385738946, + "grad_norm": 1.0949996094795582, + "learning_rate": 1.628723058847272e-05, + "loss": 0.32170963287353516, + "step": 2407 + }, + { + "epoch": 0.6394901075554376, + "grad_norm": 1.1320309851276869, + "learning_rate": 1.628381559473181e-05, + "loss": 0.3243589997291565, + "step": 2408 + }, + { + "epoch": 0.6397556765369805, + "grad_norm": 1.4458945786524546, + "learning_rate": 1.6280399389545358e-05, + "loss": 0.311046838760376, + "step": 2409 + }, + { + "epoch": 0.6400212455185235, + "grad_norm": 1.0237689913585555, + "learning_rate": 1.6276981973571973e-05, + "loss": 0.2642543911933899, + "step": 2410 + }, + { + "epoch": 0.6402868145000664, + "grad_norm": 1.1424399755044237, + "learning_rate": 1.62735633474705e-05, + "loss": 0.3593730926513672, + "step": 2411 + }, + { + "epoch": 0.6405523834816094, + "grad_norm": 1.1145611429504636, + "learning_rate": 1.62701435119e-05, + "loss": 0.3147425353527069, + "step": 2412 + }, + { + "epoch": 0.6408179524631523, + "grad_norm": 1.1400749315540035, + "learning_rate": 1.6266722467519783e-05, + "loss": 0.32639142870903015, + "step": 2413 + }, + { + "epoch": 0.6410835214446953, + "grad_norm": 1.1011849489387644, + "learning_rate": 1.626330021498938e-05, + "loss": 0.32113659381866455, + "step": 2414 + }, + { + "epoch": 0.6413490904262382, + "grad_norm": 1.0371621680767618, + "learning_rate": 1.6259876754968568e-05, + "loss": 0.3188290297985077, + "step": 2415 + }, + { + "epoch": 0.6416146594077812, + "grad_norm": 1.076893351246201, + "learning_rate": 1.625645208811734e-05, + "loss": 0.3145543932914734, + "step": 2416 + }, + { + "epoch": 0.6418802283893241, + "grad_norm": 1.1368093372185335, + "learning_rate": 1.6253026215095943e-05, + "loss": 0.30433323979377747, + "step": 2417 + }, + { + "epoch": 0.6421457973708671, + "grad_norm": 1.1042321396184265, + "learning_rate": 1.6249599136564837e-05, + "loss": 0.30946728587150574, + "step": 2418 + }, + { + "epoch": 0.64241136635241, + "grad_norm": 0.991248414026241, + "learning_rate": 1.6246170853184726e-05, + "loss": 0.26245906949043274, + "step": 2419 + }, + { + "epoch": 0.642676935333953, + "grad_norm": 1.1213671588278835, + "learning_rate": 1.624274136561654e-05, + "loss": 0.31468862295150757, + "step": 2420 + }, + { + "epoch": 0.6429425043154959, + "grad_norm": 1.0200744973975597, + "learning_rate": 1.6239310674521443e-05, + "loss": 0.28946155309677124, + "step": 2421 + }, + { + "epoch": 0.6432080732970389, + "grad_norm": 1.1088143851501708, + "learning_rate": 1.6235878780560835e-05, + "loss": 0.26272106170654297, + "step": 2422 + }, + { + "epoch": 0.6434736422785818, + "grad_norm": 1.1185700160494145, + "learning_rate": 1.6232445684396347e-05, + "loss": 0.3094574213027954, + "step": 2423 + }, + { + "epoch": 0.6437392112601248, + "grad_norm": 0.9377280048944331, + "learning_rate": 1.6229011386689832e-05, + "loss": 0.2503833770751953, + "step": 2424 + }, + { + "epoch": 0.6440047802416677, + "grad_norm": 0.9657663244207705, + "learning_rate": 1.6225575888103387e-05, + "loss": 0.2655009627342224, + "step": 2425 + }, + { + "epoch": 0.6442703492232107, + "grad_norm": 1.123117061290067, + "learning_rate": 1.6222139189299336e-05, + "loss": 0.2819611728191376, + "step": 2426 + }, + { + "epoch": 0.6445359182047536, + "grad_norm": 1.0859641118248262, + "learning_rate": 1.6218701290940232e-05, + "loss": 0.2956068217754364, + "step": 2427 + }, + { + "epoch": 0.6448014871862966, + "grad_norm": 1.2445728810553593, + "learning_rate": 1.6215262193688862e-05, + "loss": 0.3330997824668884, + "step": 2428 + }, + { + "epoch": 0.6450670561678395, + "grad_norm": 1.0073602881165937, + "learning_rate": 1.6211821898208242e-05, + "loss": 0.25897055864334106, + "step": 2429 + }, + { + "epoch": 0.6453326251493825, + "grad_norm": 1.1228221759016932, + "learning_rate": 1.6208380405161623e-05, + "loss": 0.3119947016239166, + "step": 2430 + }, + { + "epoch": 0.6455981941309256, + "grad_norm": 1.143631742936843, + "learning_rate": 1.6204937715212482e-05, + "loss": 0.30833956599235535, + "step": 2431 + }, + { + "epoch": 0.6458637631124685, + "grad_norm": 1.1584271404994573, + "learning_rate": 1.620149382902453e-05, + "loss": 0.2935214638710022, + "step": 2432 + }, + { + "epoch": 0.6461293320940115, + "grad_norm": 1.6063755788258844, + "learning_rate": 1.619804874726171e-05, + "loss": 0.24297356605529785, + "step": 2433 + }, + { + "epoch": 0.6463949010755544, + "grad_norm": 1.14218339304969, + "learning_rate": 1.6194602470588186e-05, + "loss": 0.319774866104126, + "step": 2434 + }, + { + "epoch": 0.6466604700570974, + "grad_norm": 1.1751618225153557, + "learning_rate": 1.6191154999668368e-05, + "loss": 0.29197463393211365, + "step": 2435 + }, + { + "epoch": 0.6469260390386403, + "grad_norm": 1.1008916130088804, + "learning_rate": 1.6187706335166882e-05, + "loss": 0.2939727306365967, + "step": 2436 + }, + { + "epoch": 0.6471916080201833, + "grad_norm": 1.0935449463761302, + "learning_rate": 1.6184256477748595e-05, + "loss": 0.2941162586212158, + "step": 2437 + }, + { + "epoch": 0.6474571770017262, + "grad_norm": 1.1336931987797143, + "learning_rate": 1.6180805428078593e-05, + "loss": 0.2823144197463989, + "step": 2438 + }, + { + "epoch": 0.6477227459832692, + "grad_norm": 1.0912252779984561, + "learning_rate": 1.61773531868222e-05, + "loss": 0.30048274993896484, + "step": 2439 + }, + { + "epoch": 0.6479883149648121, + "grad_norm": 1.183044095349839, + "learning_rate": 1.617389975464497e-05, + "loss": 0.30927354097366333, + "step": 2440 + }, + { + "epoch": 0.6482538839463551, + "grad_norm": 1.166570736507726, + "learning_rate": 1.6170445132212678e-05, + "loss": 0.34835004806518555, + "step": 2441 + }, + { + "epoch": 0.648519452927898, + "grad_norm": 1.0325781129961564, + "learning_rate": 1.616698932019134e-05, + "loss": 0.2890225648880005, + "step": 2442 + }, + { + "epoch": 0.648785021909441, + "grad_norm": 1.1182329319338478, + "learning_rate": 1.6163532319247195e-05, + "loss": 0.31410521268844604, + "step": 2443 + }, + { + "epoch": 0.6490505908909839, + "grad_norm": 0.9213656240638256, + "learning_rate": 1.616007413004671e-05, + "loss": 0.267375111579895, + "step": 2444 + }, + { + "epoch": 0.6493161598725269, + "grad_norm": 1.1587177777274813, + "learning_rate": 1.6156614753256583e-05, + "loss": 0.3300023376941681, + "step": 2445 + }, + { + "epoch": 0.6495817288540698, + "grad_norm": 1.0295072511714587, + "learning_rate": 1.615315418954374e-05, + "loss": 0.2822847366333008, + "step": 2446 + }, + { + "epoch": 0.6498472978356128, + "grad_norm": 1.1626615137060834, + "learning_rate": 1.6149692439575348e-05, + "loss": 0.3093401789665222, + "step": 2447 + }, + { + "epoch": 0.6501128668171557, + "grad_norm": 1.0475923101386018, + "learning_rate": 1.6146229504018777e-05, + "loss": 0.2892506718635559, + "step": 2448 + }, + { + "epoch": 0.6503784357986987, + "grad_norm": 0.9972012319936079, + "learning_rate": 1.6142765383541643e-05, + "loss": 0.2805558741092682, + "step": 2449 + }, + { + "epoch": 0.6506440047802416, + "grad_norm": 1.0535842654025462, + "learning_rate": 1.6139300078811794e-05, + "loss": 0.29852935671806335, + "step": 2450 + }, + { + "epoch": 0.6509095737617846, + "grad_norm": 1.193949473615032, + "learning_rate": 1.6135833590497295e-05, + "loss": 0.3567991256713867, + "step": 2451 + }, + { + "epoch": 0.6511751427433276, + "grad_norm": 1.1265709697559396, + "learning_rate": 1.6132365919266442e-05, + "loss": 0.29564782977104187, + "step": 2452 + }, + { + "epoch": 0.6514407117248705, + "grad_norm": 1.011180050217134, + "learning_rate": 1.612889706578777e-05, + "loss": 0.30027297139167786, + "step": 2453 + }, + { + "epoch": 0.6517062807064135, + "grad_norm": 1.0908136110597069, + "learning_rate": 1.6125427030730027e-05, + "loss": 0.3318096697330475, + "step": 2454 + }, + { + "epoch": 0.6519718496879564, + "grad_norm": 1.0728958387824694, + "learning_rate": 1.612195581476219e-05, + "loss": 0.30962997674942017, + "step": 2455 + }, + { + "epoch": 0.6522374186694994, + "grad_norm": 1.2969539714019946, + "learning_rate": 1.6118483418553476e-05, + "loss": 0.3152836859226227, + "step": 2456 + }, + { + "epoch": 0.6525029876510423, + "grad_norm": 1.0160215490589632, + "learning_rate": 1.6115009842773322e-05, + "loss": 0.26117920875549316, + "step": 2457 + }, + { + "epoch": 0.6527685566325853, + "grad_norm": 0.9780826840488046, + "learning_rate": 1.6111535088091388e-05, + "loss": 0.2705717384815216, + "step": 2458 + }, + { + "epoch": 0.6530341256141283, + "grad_norm": 1.112935626593024, + "learning_rate": 1.6108059155177568e-05, + "loss": 0.3281205892562866, + "step": 2459 + }, + { + "epoch": 0.6532996945956713, + "grad_norm": 1.0805050021999307, + "learning_rate": 1.6104582044701983e-05, + "loss": 0.3300125002861023, + "step": 2460 + }, + { + "epoch": 0.6535652635772142, + "grad_norm": 1.0596352955938992, + "learning_rate": 1.6101103757334973e-05, + "loss": 0.29286977648735046, + "step": 2461 + }, + { + "epoch": 0.6538308325587572, + "grad_norm": 1.114611766363321, + "learning_rate": 1.6097624293747115e-05, + "loss": 0.2920498847961426, + "step": 2462 + }, + { + "epoch": 0.6540964015403001, + "grad_norm": 1.0455118881549736, + "learning_rate": 1.609414365460921e-05, + "loss": 0.31018689274787903, + "step": 2463 + }, + { + "epoch": 0.6543619705218431, + "grad_norm": 1.0028130278859915, + "learning_rate": 1.609066184059228e-05, + "loss": 0.26806512475013733, + "step": 2464 + }, + { + "epoch": 0.654627539503386, + "grad_norm": 1.0385768164913443, + "learning_rate": 1.608717885236758e-05, + "loss": 0.29770639538764954, + "step": 2465 + }, + { + "epoch": 0.654893108484929, + "grad_norm": 1.0811683391440958, + "learning_rate": 1.6083694690606592e-05, + "loss": 0.36161965131759644, + "step": 2466 + }, + { + "epoch": 0.6551586774664719, + "grad_norm": 1.1455214370068598, + "learning_rate": 1.6080209355981016e-05, + "loss": 0.36114081740379333, + "step": 2467 + }, + { + "epoch": 0.6554242464480149, + "grad_norm": 0.9911085328884063, + "learning_rate": 1.6076722849162786e-05, + "loss": 0.28924882411956787, + "step": 2468 + }, + { + "epoch": 0.6556898154295578, + "grad_norm": 1.1198872767040324, + "learning_rate": 1.6073235170824058e-05, + "loss": 0.3088049292564392, + "step": 2469 + }, + { + "epoch": 0.6559553844111008, + "grad_norm": 1.062389027957873, + "learning_rate": 1.6069746321637216e-05, + "loss": 0.2684907615184784, + "step": 2470 + }, + { + "epoch": 0.6562209533926437, + "grad_norm": 0.9850175058697045, + "learning_rate": 1.6066256302274873e-05, + "loss": 0.2674641013145447, + "step": 2471 + }, + { + "epoch": 0.6564865223741867, + "grad_norm": 1.0658104164235327, + "learning_rate": 1.6062765113409854e-05, + "loss": 0.2865106165409088, + "step": 2472 + }, + { + "epoch": 0.6567520913557297, + "grad_norm": 1.1117203943537428, + "learning_rate": 1.605927275571523e-05, + "loss": 0.33163607120513916, + "step": 2473 + }, + { + "epoch": 0.6570176603372726, + "grad_norm": 1.1177244627769223, + "learning_rate": 1.6055779229864276e-05, + "loss": 0.32725927233695984, + "step": 2474 + }, + { + "epoch": 0.6572832293188156, + "grad_norm": 1.171322314473831, + "learning_rate": 1.605228453653051e-05, + "loss": 0.31537747383117676, + "step": 2475 + }, + { + "epoch": 0.6575487983003585, + "grad_norm": 1.0855461390356589, + "learning_rate": 1.604878867638767e-05, + "loss": 0.29331761598587036, + "step": 2476 + }, + { + "epoch": 0.6578143672819015, + "grad_norm": 1.0342424424241736, + "learning_rate": 1.6045291650109706e-05, + "loss": 0.315193772315979, + "step": 2477 + }, + { + "epoch": 0.6580799362634444, + "grad_norm": 1.2286540067411784, + "learning_rate": 1.6041793458370812e-05, + "loss": 0.3595796227455139, + "step": 2478 + }, + { + "epoch": 0.6583455052449874, + "grad_norm": 1.0251892797499218, + "learning_rate": 1.6038294101845394e-05, + "loss": 0.3069949150085449, + "step": 2479 + }, + { + "epoch": 0.6586110742265303, + "grad_norm": 1.1576253586981062, + "learning_rate": 1.603479358120809e-05, + "loss": 0.3154812455177307, + "step": 2480 + }, + { + "epoch": 0.6588766432080733, + "grad_norm": 1.1008921076459075, + "learning_rate": 1.6031291897133756e-05, + "loss": 0.3005039691925049, + "step": 2481 + }, + { + "epoch": 0.6591422121896162, + "grad_norm": 1.1463594149599334, + "learning_rate": 1.6027789050297476e-05, + "loss": 0.2885095775127411, + "step": 2482 + }, + { + "epoch": 0.6594077811711592, + "grad_norm": 1.002066881102099, + "learning_rate": 1.602428504137456e-05, + "loss": 0.291950523853302, + "step": 2483 + }, + { + "epoch": 0.6596733501527021, + "grad_norm": 1.0919380790727968, + "learning_rate": 1.6020779871040538e-05, + "loss": 0.31630760431289673, + "step": 2484 + }, + { + "epoch": 0.6599389191342451, + "grad_norm": 1.0827567425634856, + "learning_rate": 1.6017273539971167e-05, + "loss": 0.29767507314682007, + "step": 2485 + }, + { + "epoch": 0.660204488115788, + "grad_norm": 1.036820980968177, + "learning_rate": 1.601376604884242e-05, + "loss": 0.2882775664329529, + "step": 2486 + }, + { + "epoch": 0.6604700570973311, + "grad_norm": 1.0885135950320362, + "learning_rate": 1.601025739833051e-05, + "loss": 0.325736403465271, + "step": 2487 + }, + { + "epoch": 0.660735626078874, + "grad_norm": 1.048580856774253, + "learning_rate": 1.6006747589111854e-05, + "loss": 0.3007255792617798, + "step": 2488 + }, + { + "epoch": 0.661001195060417, + "grad_norm": 1.146836506523448, + "learning_rate": 1.6003236621863107e-05, + "loss": 0.33199968934059143, + "step": 2489 + }, + { + "epoch": 0.6612667640419599, + "grad_norm": 1.1430196866694278, + "learning_rate": 1.5999724497261138e-05, + "loss": 0.3784569799900055, + "step": 2490 + }, + { + "epoch": 0.6615323330235029, + "grad_norm": 1.0506667031587968, + "learning_rate": 1.5996211215983052e-05, + "loss": 0.28146931529045105, + "step": 2491 + }, + { + "epoch": 0.6617979020050458, + "grad_norm": 1.0621415260673002, + "learning_rate": 1.599269677870616e-05, + "loss": 0.32187730073928833, + "step": 2492 + }, + { + "epoch": 0.6620634709865888, + "grad_norm": 1.0631524880676668, + "learning_rate": 1.5989181186108003e-05, + "loss": 0.3021823465824127, + "step": 2493 + }, + { + "epoch": 0.6623290399681317, + "grad_norm": 1.0248198480240434, + "learning_rate": 1.5985664438866354e-05, + "loss": 0.3309648334980011, + "step": 2494 + }, + { + "epoch": 0.6625946089496747, + "grad_norm": 1.0183038789118495, + "learning_rate": 1.598214653765919e-05, + "loss": 0.2939694821834564, + "step": 2495 + }, + { + "epoch": 0.6628601779312177, + "grad_norm": 1.0091208408649601, + "learning_rate": 1.597862748316473e-05, + "loss": 0.31219810247421265, + "step": 2496 + }, + { + "epoch": 0.6631257469127606, + "grad_norm": 1.3669850946739606, + "learning_rate": 1.5975107276061405e-05, + "loss": 0.29435622692108154, + "step": 2497 + }, + { + "epoch": 0.6633913158943036, + "grad_norm": 1.0359724885535866, + "learning_rate": 1.5971585917027864e-05, + "loss": 0.27167004346847534, + "step": 2498 + }, + { + "epoch": 0.6636568848758465, + "grad_norm": 1.121619558624798, + "learning_rate": 1.5968063406742988e-05, + "loss": 0.3360658884048462, + "step": 2499 + }, + { + "epoch": 0.6639224538573895, + "grad_norm": 1.0767207810238415, + "learning_rate": 1.596453974588587e-05, + "loss": 0.2994089424610138, + "step": 2500 + }, + { + "epoch": 0.6641880228389324, + "grad_norm": 1.0997593865705806, + "learning_rate": 1.596101493513584e-05, + "loss": 0.32302889227867126, + "step": 2501 + }, + { + "epoch": 0.6644535918204754, + "grad_norm": 1.1249891187970829, + "learning_rate": 1.595748897517243e-05, + "loss": 0.3122987747192383, + "step": 2502 + }, + { + "epoch": 0.6647191608020183, + "grad_norm": 1.014108779554691, + "learning_rate": 1.5953961866675408e-05, + "loss": 0.2746438980102539, + "step": 2503 + }, + { + "epoch": 0.6649847297835613, + "grad_norm": 1.0758059481680302, + "learning_rate": 1.5950433610324758e-05, + "loss": 0.3043097257614136, + "step": 2504 + }, + { + "epoch": 0.6652502987651042, + "grad_norm": 1.2204942135197403, + "learning_rate": 1.594690420680069e-05, + "loss": 0.3208698332309723, + "step": 2505 + }, + { + "epoch": 0.6655158677466472, + "grad_norm": 1.1502218188727449, + "learning_rate": 1.5943373656783628e-05, + "loss": 0.317341148853302, + "step": 2506 + }, + { + "epoch": 0.6657814367281901, + "grad_norm": 1.1223078751349502, + "learning_rate": 1.5939841960954218e-05, + "loss": 0.3250347673892975, + "step": 2507 + }, + { + "epoch": 0.6660470057097331, + "grad_norm": 1.066903715567463, + "learning_rate": 1.5936309119993333e-05, + "loss": 0.32255828380584717, + "step": 2508 + }, + { + "epoch": 0.666312574691276, + "grad_norm": 1.0591506680476068, + "learning_rate": 1.593277513458206e-05, + "loss": 0.3247614800930023, + "step": 2509 + }, + { + "epoch": 0.666578143672819, + "grad_norm": 1.087253896768941, + "learning_rate": 1.5929240005401715e-05, + "loss": 0.34171730279922485, + "step": 2510 + }, + { + "epoch": 0.6668437126543619, + "grad_norm": 1.092874100004657, + "learning_rate": 1.5925703733133823e-05, + "loss": 0.30671584606170654, + "step": 2511 + }, + { + "epoch": 0.6671092816359049, + "grad_norm": 1.1250075389065, + "learning_rate": 1.5922166318460138e-05, + "loss": 0.3387908339500427, + "step": 2512 + }, + { + "epoch": 0.6673748506174478, + "grad_norm": 1.0272141820522305, + "learning_rate": 1.5918627762062635e-05, + "loss": 0.2772873044013977, + "step": 2513 + }, + { + "epoch": 0.6676404195989908, + "grad_norm": 1.0802689739154336, + "learning_rate": 1.59150880646235e-05, + "loss": 0.31555238366127014, + "step": 2514 + }, + { + "epoch": 0.6679059885805337, + "grad_norm": 0.9930963010924009, + "learning_rate": 1.5911547226825154e-05, + "loss": 0.2821594476699829, + "step": 2515 + }, + { + "epoch": 0.6681715575620768, + "grad_norm": 1.098936156337469, + "learning_rate": 1.5908005249350217e-05, + "loss": 0.3176054358482361, + "step": 2516 + }, + { + "epoch": 0.6684371265436198, + "grad_norm": 1.083365844116071, + "learning_rate": 1.590446213288155e-05, + "loss": 0.28484907746315, + "step": 2517 + }, + { + "epoch": 0.6687026955251627, + "grad_norm": 1.0028500327966023, + "learning_rate": 1.590091787810222e-05, + "loss": 0.25227850675582886, + "step": 2518 + }, + { + "epoch": 0.6689682645067057, + "grad_norm": 0.993931866088294, + "learning_rate": 1.5897372485695514e-05, + "loss": 0.276819109916687, + "step": 2519 + }, + { + "epoch": 0.6692338334882486, + "grad_norm": 1.1883846939575156, + "learning_rate": 1.589382595634495e-05, + "loss": 0.27944183349609375, + "step": 2520 + }, + { + "epoch": 0.6694994024697916, + "grad_norm": 1.0217591474349375, + "learning_rate": 1.589027829073425e-05, + "loss": 0.295337975025177, + "step": 2521 + }, + { + "epoch": 0.6697649714513345, + "grad_norm": 1.0940479681497102, + "learning_rate": 1.5886729489547365e-05, + "loss": 0.31168580055236816, + "step": 2522 + }, + { + "epoch": 0.6700305404328775, + "grad_norm": 1.0847233646991081, + "learning_rate": 1.5883179553468465e-05, + "loss": 0.34520941972732544, + "step": 2523 + }, + { + "epoch": 0.6702961094144204, + "grad_norm": 1.0941539012056998, + "learning_rate": 1.587962848318193e-05, + "loss": 0.3121863901615143, + "step": 2524 + }, + { + "epoch": 0.6705616783959634, + "grad_norm": 1.2414605611463847, + "learning_rate": 1.587607627937237e-05, + "loss": 0.3450377583503723, + "step": 2525 + }, + { + "epoch": 0.6708272473775063, + "grad_norm": 1.0575484463097053, + "learning_rate": 1.58725229427246e-05, + "loss": 0.33431196212768555, + "step": 2526 + }, + { + "epoch": 0.6710928163590493, + "grad_norm": 2.8101197900274433, + "learning_rate": 1.5868968473923675e-05, + "loss": 0.2753226161003113, + "step": 2527 + }, + { + "epoch": 0.6713583853405922, + "grad_norm": 1.1171540013343635, + "learning_rate": 1.586541287365484e-05, + "loss": 0.31394219398498535, + "step": 2528 + }, + { + "epoch": 0.6716239543221352, + "grad_norm": 1.0940027543433968, + "learning_rate": 1.586185614260358e-05, + "loss": 0.352859765291214, + "step": 2529 + }, + { + "epoch": 0.6718895233036781, + "grad_norm": 1.158790754412002, + "learning_rate": 1.5858298281455592e-05, + "loss": 0.3182204067707062, + "step": 2530 + }, + { + "epoch": 0.6721550922852211, + "grad_norm": 1.0901686159979078, + "learning_rate": 1.5854739290896785e-05, + "loss": 0.3107008934020996, + "step": 2531 + }, + { + "epoch": 0.672420661266764, + "grad_norm": 1.0367853416177613, + "learning_rate": 1.5851179171613294e-05, + "loss": 0.2737328112125397, + "step": 2532 + }, + { + "epoch": 0.672686230248307, + "grad_norm": 1.070700914663809, + "learning_rate": 1.5847617924291466e-05, + "loss": 0.2744509279727936, + "step": 2533 + }, + { + "epoch": 0.6729517992298499, + "grad_norm": 1.0763385778363233, + "learning_rate": 1.584405554961787e-05, + "loss": 0.3149082660675049, + "step": 2534 + }, + { + "epoch": 0.6732173682113929, + "grad_norm": 1.1199335422347676, + "learning_rate": 1.584049204827929e-05, + "loss": 0.32643741369247437, + "step": 2535 + }, + { + "epoch": 0.6734829371929358, + "grad_norm": 1.1153920819002263, + "learning_rate": 1.583692742096272e-05, + "loss": 0.31901559233665466, + "step": 2536 + }, + { + "epoch": 0.6737485061744788, + "grad_norm": 1.037012713250851, + "learning_rate": 1.583336166835539e-05, + "loss": 0.3020802140235901, + "step": 2537 + }, + { + "epoch": 0.6740140751560217, + "grad_norm": 0.9884255382698084, + "learning_rate": 1.5829794791144723e-05, + "loss": 0.29683804512023926, + "step": 2538 + }, + { + "epoch": 0.6742796441375647, + "grad_norm": 1.0549080502640127, + "learning_rate": 1.582622679001838e-05, + "loss": 0.2898966073989868, + "step": 2539 + }, + { + "epoch": 0.6745452131191076, + "grad_norm": 1.0628349250468347, + "learning_rate": 1.582265766566422e-05, + "loss": 0.2665000855922699, + "step": 2540 + }, + { + "epoch": 0.6748107821006506, + "grad_norm": 1.1059852721256176, + "learning_rate": 1.581908741877034e-05, + "loss": 0.2987207770347595, + "step": 2541 + }, + { + "epoch": 0.6750763510821935, + "grad_norm": 1.1051901132495052, + "learning_rate": 1.5815516050025032e-05, + "loss": 0.32591086626052856, + "step": 2542 + }, + { + "epoch": 0.6753419200637365, + "grad_norm": 0.9752097662975195, + "learning_rate": 1.581194356011682e-05, + "loss": 0.28181299567222595, + "step": 2543 + }, + { + "epoch": 0.6756074890452796, + "grad_norm": 1.0983389872703522, + "learning_rate": 1.5808369949734433e-05, + "loss": 0.3256041407585144, + "step": 2544 + }, + { + "epoch": 0.6758730580268225, + "grad_norm": 1.1228012917357884, + "learning_rate": 1.5804795219566825e-05, + "loss": 0.3079703152179718, + "step": 2545 + }, + { + "epoch": 0.6761386270083655, + "grad_norm": 1.1504916593616519, + "learning_rate": 1.580121937030316e-05, + "loss": 0.3364162743091583, + "step": 2546 + }, + { + "epoch": 0.6764041959899084, + "grad_norm": 1.046870504650359, + "learning_rate": 1.5797642402632816e-05, + "loss": 0.2774898111820221, + "step": 2547 + }, + { + "epoch": 0.6766697649714514, + "grad_norm": 1.1108782100380157, + "learning_rate": 1.5794064317245396e-05, + "loss": 0.33260244131088257, + "step": 2548 + }, + { + "epoch": 0.6769353339529943, + "grad_norm": 1.16229568793775, + "learning_rate": 1.5790485114830708e-05, + "loss": 0.3327571153640747, + "step": 2549 + }, + { + "epoch": 0.6772009029345373, + "grad_norm": 1.1256526679188055, + "learning_rate": 1.5786904796078783e-05, + "loss": 0.28527912497520447, + "step": 2550 + }, + { + "epoch": 0.6774664719160802, + "grad_norm": 1.1757868172389025, + "learning_rate": 1.5783323361679865e-05, + "loss": 0.3100908100605011, + "step": 2551 + }, + { + "epoch": 0.6777320408976232, + "grad_norm": 1.1187226402475792, + "learning_rate": 1.577974081232441e-05, + "loss": 0.3434574007987976, + "step": 2552 + }, + { + "epoch": 0.6779976098791661, + "grad_norm": 1.0691671390255433, + "learning_rate": 1.5776157148703094e-05, + "loss": 0.3151341676712036, + "step": 2553 + }, + { + "epoch": 0.6782631788607091, + "grad_norm": 1.1432839314923735, + "learning_rate": 1.5772572371506803e-05, + "loss": 0.33334124088287354, + "step": 2554 + }, + { + "epoch": 0.678528747842252, + "grad_norm": 0.9718187941404679, + "learning_rate": 1.576898648142664e-05, + "loss": 0.26933547854423523, + "step": 2555 + }, + { + "epoch": 0.678794316823795, + "grad_norm": 1.0146251280063243, + "learning_rate": 1.576539947915392e-05, + "loss": 0.3087029755115509, + "step": 2556 + }, + { + "epoch": 0.6790598858053379, + "grad_norm": 2.0746649121309244, + "learning_rate": 1.576181136538018e-05, + "loss": 0.32620540261268616, + "step": 2557 + }, + { + "epoch": 0.6793254547868809, + "grad_norm": 1.0462752825892652, + "learning_rate": 1.575822214079716e-05, + "loss": 0.29112139344215393, + "step": 2558 + }, + { + "epoch": 0.6795910237684238, + "grad_norm": 1.108770761520566, + "learning_rate": 1.5754631806096822e-05, + "loss": 0.3394843339920044, + "step": 2559 + }, + { + "epoch": 0.6798565927499668, + "grad_norm": 1.0789431162979184, + "learning_rate": 1.5751040361971342e-05, + "loss": 0.32754629850387573, + "step": 2560 + }, + { + "epoch": 0.6801221617315097, + "grad_norm": 1.055729440740922, + "learning_rate": 1.574744780911311e-05, + "loss": 0.2829592823982239, + "step": 2561 + }, + { + "epoch": 0.6803877307130527, + "grad_norm": 3.1916720491195423, + "learning_rate": 1.5743854148214724e-05, + "loss": 0.2718046307563782, + "step": 2562 + }, + { + "epoch": 0.6806532996945956, + "grad_norm": 1.0355755791413483, + "learning_rate": 1.5740259379969002e-05, + "loss": 0.29244256019592285, + "step": 2563 + }, + { + "epoch": 0.6809188686761386, + "grad_norm": 1.0678189150114252, + "learning_rate": 1.5736663505068972e-05, + "loss": 0.2925388514995575, + "step": 2564 + }, + { + "epoch": 0.6811844376576816, + "grad_norm": 1.109826571766002, + "learning_rate": 1.5733066524207875e-05, + "loss": 0.26742440462112427, + "step": 2565 + }, + { + "epoch": 0.6814500066392245, + "grad_norm": 1.0365586719986022, + "learning_rate": 1.5729468438079167e-05, + "loss": 0.33688807487487793, + "step": 2566 + }, + { + "epoch": 0.6817155756207675, + "grad_norm": 1.0939355325909954, + "learning_rate": 1.5725869247376514e-05, + "loss": 0.2953096330165863, + "step": 2567 + }, + { + "epoch": 0.6819811446023104, + "grad_norm": 1.081510188555139, + "learning_rate": 1.5722268952793806e-05, + "loss": 0.321500301361084, + "step": 2568 + }, + { + "epoch": 0.6822467135838534, + "grad_norm": 1.1427798210793014, + "learning_rate": 1.5718667555025127e-05, + "loss": 0.29148590564727783, + "step": 2569 + }, + { + "epoch": 0.6825122825653963, + "grad_norm": 1.0849106130015975, + "learning_rate": 1.5715065054764792e-05, + "loss": 0.26887139678001404, + "step": 2570 + }, + { + "epoch": 0.6827778515469393, + "grad_norm": 0.9118900514894542, + "learning_rate": 1.5711461452707316e-05, + "loss": 0.2698139250278473, + "step": 2571 + }, + { + "epoch": 0.6830434205284823, + "grad_norm": 0.9420578172190551, + "learning_rate": 1.5707856749547433e-05, + "loss": 0.264956533908844, + "step": 2572 + }, + { + "epoch": 0.6833089895100253, + "grad_norm": 1.0786584040903482, + "learning_rate": 1.5704250945980085e-05, + "loss": 0.32535314559936523, + "step": 2573 + }, + { + "epoch": 0.6835745584915682, + "grad_norm": 1.1132312438200667, + "learning_rate": 1.5700644042700432e-05, + "loss": 0.30529654026031494, + "step": 2574 + }, + { + "epoch": 0.6838401274731112, + "grad_norm": 0.9518994724553314, + "learning_rate": 1.569703604040384e-05, + "loss": 0.27253150939941406, + "step": 2575 + }, + { + "epoch": 0.6841056964546541, + "grad_norm": 1.0559070796873817, + "learning_rate": 1.5693426939785886e-05, + "loss": 0.27451053261756897, + "step": 2576 + }, + { + "epoch": 0.6843712654361971, + "grad_norm": 1.1393124405849042, + "learning_rate": 1.5689816741542374e-05, + "loss": 0.33280283212661743, + "step": 2577 + }, + { + "epoch": 0.68463683441774, + "grad_norm": 1.1306113061745138, + "learning_rate": 1.5686205446369293e-05, + "loss": 0.2911887764930725, + "step": 2578 + }, + { + "epoch": 0.684902403399283, + "grad_norm": 1.0940465986734231, + "learning_rate": 1.5682593054962866e-05, + "loss": 0.2950279116630554, + "step": 2579 + }, + { + "epoch": 0.6851679723808259, + "grad_norm": 1.0911163136563768, + "learning_rate": 1.5678979568019518e-05, + "loss": 0.3267458975315094, + "step": 2580 + }, + { + "epoch": 0.6854335413623689, + "grad_norm": 1.2739312763430675, + "learning_rate": 1.5675364986235887e-05, + "loss": 0.3209132254123688, + "step": 2581 + }, + { + "epoch": 0.6856991103439118, + "grad_norm": 1.1101887519376679, + "learning_rate": 1.5671749310308818e-05, + "loss": 0.3186662197113037, + "step": 2582 + }, + { + "epoch": 0.6859646793254548, + "grad_norm": 0.9652854961372175, + "learning_rate": 1.566813254093538e-05, + "loss": 0.24875827133655548, + "step": 2583 + }, + { + "epoch": 0.6862302483069977, + "grad_norm": 1.0684425959326884, + "learning_rate": 1.5664514678812835e-05, + "loss": 0.26657983660697937, + "step": 2584 + }, + { + "epoch": 0.6864958172885407, + "grad_norm": 1.0670123202559558, + "learning_rate": 1.5660895724638666e-05, + "loss": 0.2889682650566101, + "step": 2585 + }, + { + "epoch": 0.6867613862700837, + "grad_norm": 1.2310590689373582, + "learning_rate": 1.5657275679110564e-05, + "loss": 0.32035061717033386, + "step": 2586 + }, + { + "epoch": 0.6870269552516266, + "grad_norm": 0.9946580402808185, + "learning_rate": 1.5653654542926435e-05, + "loss": 0.2844264507293701, + "step": 2587 + }, + { + "epoch": 0.6872925242331696, + "grad_norm": 1.0738818938413612, + "learning_rate": 1.5650032316784388e-05, + "loss": 0.27645713090896606, + "step": 2588 + }, + { + "epoch": 0.6875580932147125, + "grad_norm": 1.0078062598096618, + "learning_rate": 1.5646409001382745e-05, + "loss": 0.29902809858322144, + "step": 2589 + }, + { + "epoch": 0.6878236621962555, + "grad_norm": 1.0662439819494403, + "learning_rate": 1.564278459742004e-05, + "loss": 0.28179824352264404, + "step": 2590 + }, + { + "epoch": 0.6880892311777984, + "grad_norm": 0.9959782320912598, + "learning_rate": 1.563915910559502e-05, + "loss": 0.30527305603027344, + "step": 2591 + }, + { + "epoch": 0.6883548001593414, + "grad_norm": 0.9640464455731136, + "learning_rate": 1.5635532526606625e-05, + "loss": 0.29411792755126953, + "step": 2592 + }, + { + "epoch": 0.6886203691408843, + "grad_norm": 1.0659796212639145, + "learning_rate": 1.563190486115403e-05, + "loss": 0.32294154167175293, + "step": 2593 + }, + { + "epoch": 0.6888859381224273, + "grad_norm": 1.0983041505312465, + "learning_rate": 1.5628276109936594e-05, + "loss": 0.31873172521591187, + "step": 2594 + }, + { + "epoch": 0.6891515071039702, + "grad_norm": 1.2163401358885952, + "learning_rate": 1.5624646273653908e-05, + "loss": 0.37790048122406006, + "step": 2595 + }, + { + "epoch": 0.6894170760855132, + "grad_norm": 1.0271206309222516, + "learning_rate": 1.5621015353005754e-05, + "loss": 0.27596205472946167, + "step": 2596 + }, + { + "epoch": 0.6896826450670561, + "grad_norm": 1.2915034278595348, + "learning_rate": 1.5617383348692135e-05, + "loss": 0.30952686071395874, + "step": 2597 + }, + { + "epoch": 0.6899482140485991, + "grad_norm": 1.089414433310086, + "learning_rate": 1.5613750261413256e-05, + "loss": 0.2933235764503479, + "step": 2598 + }, + { + "epoch": 0.690213783030142, + "grad_norm": 1.1151043496896997, + "learning_rate": 1.5610116091869538e-05, + "loss": 0.2961776554584503, + "step": 2599 + }, + { + "epoch": 0.6904793520116851, + "grad_norm": 1.0596230408388436, + "learning_rate": 1.56064808407616e-05, + "loss": 0.2843313217163086, + "step": 2600 + }, + { + "epoch": 0.690744920993228, + "grad_norm": 1.0545406618996236, + "learning_rate": 1.560284450879028e-05, + "loss": 0.29366564750671387, + "step": 2601 + }, + { + "epoch": 0.691010489974771, + "grad_norm": 1.028254286030692, + "learning_rate": 1.5599207096656614e-05, + "loss": 0.32668614387512207, + "step": 2602 + }, + { + "epoch": 0.6912760589563139, + "grad_norm": 1.1962201821774399, + "learning_rate": 1.5595568605061858e-05, + "loss": 0.344367653131485, + "step": 2603 + }, + { + "epoch": 0.6915416279378569, + "grad_norm": 1.2250839657368426, + "learning_rate": 1.5591929034707468e-05, + "loss": 0.2875809371471405, + "step": 2604 + }, + { + "epoch": 0.6918071969193998, + "grad_norm": 0.9717157700868733, + "learning_rate": 1.5588288386295113e-05, + "loss": 0.2688799202442169, + "step": 2605 + }, + { + "epoch": 0.6920727659009428, + "grad_norm": 1.2520016236289049, + "learning_rate": 1.558464666052667e-05, + "loss": 0.28575828671455383, + "step": 2606 + }, + { + "epoch": 0.6923383348824858, + "grad_norm": 1.0741907315089707, + "learning_rate": 1.5581003858104203e-05, + "loss": 0.2800632119178772, + "step": 2607 + }, + { + "epoch": 0.6926039038640287, + "grad_norm": 1.096176752690496, + "learning_rate": 1.5577359979730022e-05, + "loss": 0.3066416382789612, + "step": 2608 + }, + { + "epoch": 0.6928694728455717, + "grad_norm": 1.0146792499875503, + "learning_rate": 1.5573715026106617e-05, + "loss": 0.3164110779762268, + "step": 2609 + }, + { + "epoch": 0.6931350418271146, + "grad_norm": 1.0292100354922897, + "learning_rate": 1.5570068997936686e-05, + "loss": 0.2908422350883484, + "step": 2610 + }, + { + "epoch": 0.6934006108086576, + "grad_norm": 0.9996966110923509, + "learning_rate": 1.5566421895923148e-05, + "loss": 0.29055240750312805, + "step": 2611 + }, + { + "epoch": 0.6936661797902005, + "grad_norm": 1.1296077877181152, + "learning_rate": 1.556277372076912e-05, + "loss": 0.3247227370738983, + "step": 2612 + }, + { + "epoch": 0.6939317487717435, + "grad_norm": 1.0869397458201258, + "learning_rate": 1.555912447317792e-05, + "loss": 0.29944315552711487, + "step": 2613 + }, + { + "epoch": 0.6941973177532864, + "grad_norm": 1.140637727836958, + "learning_rate": 1.5555474153853092e-05, + "loss": 0.2984931170940399, + "step": 2614 + }, + { + "epoch": 0.6944628867348294, + "grad_norm": 1.0644561032518303, + "learning_rate": 1.5551822763498364e-05, + "loss": 0.301285982131958, + "step": 2615 + }, + { + "epoch": 0.6947284557163723, + "grad_norm": 1.0271314049069311, + "learning_rate": 1.5548170302817683e-05, + "loss": 0.2862967252731323, + "step": 2616 + }, + { + "epoch": 0.6949940246979153, + "grad_norm": 1.0216494335731472, + "learning_rate": 1.5544516772515207e-05, + "loss": 0.3071482181549072, + "step": 2617 + }, + { + "epoch": 0.6952595936794582, + "grad_norm": 1.153798162838472, + "learning_rate": 1.5540862173295285e-05, + "loss": 0.33668914437294006, + "step": 2618 + }, + { + "epoch": 0.6955251626610012, + "grad_norm": 1.0451730984690786, + "learning_rate": 1.5537206505862486e-05, + "loss": 0.32204627990722656, + "step": 2619 + }, + { + "epoch": 0.6957907316425441, + "grad_norm": 1.083101648134336, + "learning_rate": 1.5533549770921576e-05, + "loss": 0.30210041999816895, + "step": 2620 + }, + { + "epoch": 0.6960563006240871, + "grad_norm": 1.1518417167078652, + "learning_rate": 1.5529891969177535e-05, + "loss": 0.3116886019706726, + "step": 2621 + }, + { + "epoch": 0.69632186960563, + "grad_norm": 1.1473344970327815, + "learning_rate": 1.5526233101335543e-05, + "loss": 0.3460058867931366, + "step": 2622 + }, + { + "epoch": 0.696587438587173, + "grad_norm": 1.0477810576486106, + "learning_rate": 1.552257316810098e-05, + "loss": 0.30080512166023254, + "step": 2623 + }, + { + "epoch": 0.6968530075687159, + "grad_norm": 1.1107090823955428, + "learning_rate": 1.5518912170179447e-05, + "loss": 0.3381347954273224, + "step": 2624 + }, + { + "epoch": 0.6971185765502589, + "grad_norm": 1.0737064011248665, + "learning_rate": 1.5515250108276733e-05, + "loss": 0.30345672369003296, + "step": 2625 + }, + { + "epoch": 0.6973841455318018, + "grad_norm": 1.1809134250993814, + "learning_rate": 1.5511586983098847e-05, + "loss": 0.3002641797065735, + "step": 2626 + }, + { + "epoch": 0.6976497145133448, + "grad_norm": 0.9975793486319376, + "learning_rate": 1.5507922795351992e-05, + "loss": 0.2848126292228699, + "step": 2627 + }, + { + "epoch": 0.6979152834948879, + "grad_norm": 1.1203755244922207, + "learning_rate": 1.5504257545742585e-05, + "loss": 0.32360371947288513, + "step": 2628 + }, + { + "epoch": 0.6981808524764308, + "grad_norm": 1.0674295201271842, + "learning_rate": 1.5500591234977237e-05, + "loss": 0.2970595955848694, + "step": 2629 + }, + { + "epoch": 0.6984464214579738, + "grad_norm": 1.1343972682519483, + "learning_rate": 1.5496923863762773e-05, + "loss": 0.35431474447250366, + "step": 2630 + }, + { + "epoch": 0.6987119904395167, + "grad_norm": 1.027377246814574, + "learning_rate": 1.549325543280622e-05, + "loss": 0.30133551359176636, + "step": 2631 + }, + { + "epoch": 0.6989775594210597, + "grad_norm": 1.066148832325447, + "learning_rate": 1.5489585942814807e-05, + "loss": 0.3013160824775696, + "step": 2632 + }, + { + "epoch": 0.6992431284026026, + "grad_norm": 1.1981871164483473, + "learning_rate": 1.5485915394495967e-05, + "loss": 0.3291313052177429, + "step": 2633 + }, + { + "epoch": 0.6995086973841456, + "grad_norm": 1.3083774012082008, + "learning_rate": 1.5482243788557336e-05, + "loss": 0.32308053970336914, + "step": 2634 + }, + { + "epoch": 0.6997742663656885, + "grad_norm": 1.0802428984314951, + "learning_rate": 1.5478571125706762e-05, + "loss": 0.321450412273407, + "step": 2635 + }, + { + "epoch": 0.7000398353472315, + "grad_norm": 1.1144035500723286, + "learning_rate": 1.547489740665229e-05, + "loss": 0.30871254205703735, + "step": 2636 + }, + { + "epoch": 0.7003054043287744, + "grad_norm": 1.1599776854022048, + "learning_rate": 1.5471222632102168e-05, + "loss": 0.29414835572242737, + "step": 2637 + }, + { + "epoch": 0.7005709733103174, + "grad_norm": 1.019484878273918, + "learning_rate": 1.546754680276485e-05, + "loss": 0.2841604948043823, + "step": 2638 + }, + { + "epoch": 0.7008365422918603, + "grad_norm": 1.039625714192533, + "learning_rate": 1.546386991934899e-05, + "loss": 0.2895316183567047, + "step": 2639 + }, + { + "epoch": 0.7011021112734033, + "grad_norm": 1.0418724746200432, + "learning_rate": 1.546019198256345e-05, + "loss": 0.310278058052063, + "step": 2640 + }, + { + "epoch": 0.7013676802549462, + "grad_norm": 1.1737622034955963, + "learning_rate": 1.5456512993117297e-05, + "loss": 0.3000732660293579, + "step": 2641 + }, + { + "epoch": 0.7016332492364892, + "grad_norm": 1.034060473081883, + "learning_rate": 1.545283295171979e-05, + "loss": 0.2650133967399597, + "step": 2642 + }, + { + "epoch": 0.7018988182180321, + "grad_norm": 1.1833814596994714, + "learning_rate": 1.5449151859080395e-05, + "loss": 0.3414345681667328, + "step": 2643 + }, + { + "epoch": 0.7021643871995751, + "grad_norm": 0.9407765615747015, + "learning_rate": 1.5445469715908793e-05, + "loss": 0.26955321431159973, + "step": 2644 + }, + { + "epoch": 0.702429956181118, + "grad_norm": 1.0775826100815478, + "learning_rate": 1.5441786522914855e-05, + "loss": 0.3028743863105774, + "step": 2645 + }, + { + "epoch": 0.702695525162661, + "grad_norm": 1.1630883359211883, + "learning_rate": 1.5438102280808653e-05, + "loss": 0.28710106015205383, + "step": 2646 + }, + { + "epoch": 0.7029610941442039, + "grad_norm": 1.0828201415955274, + "learning_rate": 1.543441699030047e-05, + "loss": 0.33343076705932617, + "step": 2647 + }, + { + "epoch": 0.7032266631257469, + "grad_norm": 2.8774903725783445, + "learning_rate": 1.543073065210078e-05, + "loss": 0.27760642766952515, + "step": 2648 + }, + { + "epoch": 0.7034922321072898, + "grad_norm": 1.0939125975780095, + "learning_rate": 1.5427043266920276e-05, + "loss": 0.2844334840774536, + "step": 2649 + }, + { + "epoch": 0.7037578010888328, + "grad_norm": 1.0671776711844796, + "learning_rate": 1.542335483546983e-05, + "loss": 0.28979432582855225, + "step": 2650 + }, + { + "epoch": 0.7040233700703757, + "grad_norm": 1.1018820862649594, + "learning_rate": 1.5419665358460537e-05, + "loss": 0.313267320394516, + "step": 2651 + }, + { + "epoch": 0.7042889390519187, + "grad_norm": 1.122792570050495, + "learning_rate": 1.5415974836603676e-05, + "loss": 0.26702141761779785, + "step": 2652 + }, + { + "epoch": 0.7045545080334616, + "grad_norm": 1.084104909381419, + "learning_rate": 1.5412283270610752e-05, + "loss": 0.3256012499332428, + "step": 2653 + }, + { + "epoch": 0.7048200770150046, + "grad_norm": 1.1096374178765924, + "learning_rate": 1.540859066119344e-05, + "loss": 0.3035642206668854, + "step": 2654 + }, + { + "epoch": 0.7050856459965475, + "grad_norm": 1.1410920430169775, + "learning_rate": 1.5404897009063636e-05, + "loss": 0.32206645607948303, + "step": 2655 + }, + { + "epoch": 0.7053512149780906, + "grad_norm": 0.9596610334229038, + "learning_rate": 1.5401202314933436e-05, + "loss": 0.3023940920829773, + "step": 2656 + }, + { + "epoch": 0.7056167839596336, + "grad_norm": 0.9678878502259071, + "learning_rate": 1.539750657951513e-05, + "loss": 0.2839987277984619, + "step": 2657 + }, + { + "epoch": 0.7058823529411765, + "grad_norm": 0.9744312269236198, + "learning_rate": 1.5393809803521213e-05, + "loss": 0.2488149106502533, + "step": 2658 + }, + { + "epoch": 0.7061479219227195, + "grad_norm": 1.0311988168007409, + "learning_rate": 1.539011198766438e-05, + "loss": 0.27156201004981995, + "step": 2659 + }, + { + "epoch": 0.7064134909042624, + "grad_norm": 1.0925039664890526, + "learning_rate": 1.5386413132657528e-05, + "loss": 0.3038437068462372, + "step": 2660 + }, + { + "epoch": 0.7066790598858054, + "grad_norm": 0.9713190505037098, + "learning_rate": 1.5382713239213746e-05, + "loss": 0.27626922726631165, + "step": 2661 + }, + { + "epoch": 0.7069446288673483, + "grad_norm": 1.9675808121081846, + "learning_rate": 1.537901230804634e-05, + "loss": 0.27338162064552307, + "step": 2662 + }, + { + "epoch": 0.7072101978488913, + "grad_norm": 0.9540020890839573, + "learning_rate": 1.5375310339868798e-05, + "loss": 0.2635098099708557, + "step": 2663 + }, + { + "epoch": 0.7074757668304342, + "grad_norm": 1.1274430903932144, + "learning_rate": 1.537160733539482e-05, + "loss": 0.3245551288127899, + "step": 2664 + }, + { + "epoch": 0.7077413358119772, + "grad_norm": 1.1100804783644485, + "learning_rate": 1.53679032953383e-05, + "loss": 0.3226238787174225, + "step": 2665 + }, + { + "epoch": 0.7080069047935201, + "grad_norm": 1.0972084780717322, + "learning_rate": 1.536419822041333e-05, + "loss": 0.31588318943977356, + "step": 2666 + }, + { + "epoch": 0.7082724737750631, + "grad_norm": 1.031778059845932, + "learning_rate": 1.536049211133421e-05, + "loss": 0.2494429647922516, + "step": 2667 + }, + { + "epoch": 0.708538042756606, + "grad_norm": 1.1110915785079796, + "learning_rate": 1.5356784968815436e-05, + "loss": 0.30966901779174805, + "step": 2668 + }, + { + "epoch": 0.708803611738149, + "grad_norm": 1.1803956993815392, + "learning_rate": 1.5353076793571692e-05, + "loss": 0.29383328557014465, + "step": 2669 + }, + { + "epoch": 0.7090691807196919, + "grad_norm": 1.086625008831518, + "learning_rate": 1.5349367586317875e-05, + "loss": 0.30337825417518616, + "step": 2670 + }, + { + "epoch": 0.7093347497012349, + "grad_norm": 1.0049086741144315, + "learning_rate": 1.5345657347769082e-05, + "loss": 0.28128665685653687, + "step": 2671 + }, + { + "epoch": 0.7096003186827778, + "grad_norm": 1.1819105498956106, + "learning_rate": 1.5341946078640594e-05, + "loss": 0.35167062282562256, + "step": 2672 + }, + { + "epoch": 0.7098658876643208, + "grad_norm": 1.0441531577784944, + "learning_rate": 1.533823377964791e-05, + "loss": 0.30409517884254456, + "step": 2673 + }, + { + "epoch": 0.7101314566458637, + "grad_norm": 1.013441954819978, + "learning_rate": 1.5334520451506706e-05, + "loss": 0.2667735815048218, + "step": 2674 + }, + { + "epoch": 0.7103970256274067, + "grad_norm": 1.130854753100919, + "learning_rate": 1.5330806094932876e-05, + "loss": 0.290219247341156, + "step": 2675 + }, + { + "epoch": 0.7106625946089496, + "grad_norm": 1.120803532670259, + "learning_rate": 1.5327090710642503e-05, + "loss": 0.33118927478790283, + "step": 2676 + }, + { + "epoch": 0.7109281635904926, + "grad_norm": 1.2896959817209073, + "learning_rate": 1.5323374299351867e-05, + "loss": 0.34287041425704956, + "step": 2677 + }, + { + "epoch": 0.7111937325720356, + "grad_norm": 1.0183367847991263, + "learning_rate": 1.531965686177745e-05, + "loss": 0.27093711495399475, + "step": 2678 + }, + { + "epoch": 0.7114593015535785, + "grad_norm": 1.0913550671130643, + "learning_rate": 1.531593839863593e-05, + "loss": 0.2987911105155945, + "step": 2679 + }, + { + "epoch": 0.7117248705351215, + "grad_norm": 1.0145664449432468, + "learning_rate": 1.5312218910644185e-05, + "loss": 0.2914583086967468, + "step": 2680 + }, + { + "epoch": 0.7119904395166644, + "grad_norm": 1.0712171950199525, + "learning_rate": 1.530849839851928e-05, + "loss": 0.34159964323043823, + "step": 2681 + }, + { + "epoch": 0.7122560084982074, + "grad_norm": 1.0132523095253043, + "learning_rate": 1.5304776862978496e-05, + "loss": 0.28327372670173645, + "step": 2682 + }, + { + "epoch": 0.7125215774797503, + "grad_norm": 1.0473430655235008, + "learning_rate": 1.5301054304739292e-05, + "loss": 0.2902851104736328, + "step": 2683 + }, + { + "epoch": 0.7127871464612934, + "grad_norm": 1.106440530120003, + "learning_rate": 1.5297330724519344e-05, + "loss": 0.3192726969718933, + "step": 2684 + }, + { + "epoch": 0.7130527154428363, + "grad_norm": 1.0682705697817987, + "learning_rate": 1.5293606123036508e-05, + "loss": 0.30242764949798584, + "step": 2685 + }, + { + "epoch": 0.7133182844243793, + "grad_norm": 1.0059439200202651, + "learning_rate": 1.528988050100884e-05, + "loss": 0.2718653082847595, + "step": 2686 + }, + { + "epoch": 0.7135838534059222, + "grad_norm": 1.019566462631627, + "learning_rate": 1.52861538591546e-05, + "loss": 0.3014821708202362, + "step": 2687 + }, + { + "epoch": 0.7138494223874652, + "grad_norm": 1.1473508187880241, + "learning_rate": 1.528242619819224e-05, + "loss": 0.3378177881240845, + "step": 2688 + }, + { + "epoch": 0.7141149913690081, + "grad_norm": 1.0632179838195628, + "learning_rate": 1.5278697518840415e-05, + "loss": 0.29286471009254456, + "step": 2689 + }, + { + "epoch": 0.7143805603505511, + "grad_norm": 1.1140242619678895, + "learning_rate": 1.527496782181796e-05, + "loss": 0.3371768593788147, + "step": 2690 + }, + { + "epoch": 0.714646129332094, + "grad_norm": 1.0421377750374783, + "learning_rate": 1.5271237107843925e-05, + "loss": 0.30571556091308594, + "step": 2691 + }, + { + "epoch": 0.714911698313637, + "grad_norm": 1.0650624138184501, + "learning_rate": 1.526750537763754e-05, + "loss": 0.33064618706703186, + "step": 2692 + }, + { + "epoch": 0.7151772672951799, + "grad_norm": 1.0787164498543842, + "learning_rate": 1.5263772631918242e-05, + "loss": 0.3369274139404297, + "step": 2693 + }, + { + "epoch": 0.7154428362767229, + "grad_norm": 1.079249778019668, + "learning_rate": 1.5260038871405663e-05, + "loss": 0.2422705739736557, + "step": 2694 + }, + { + "epoch": 0.7157084052582658, + "grad_norm": 1.3990281605221084, + "learning_rate": 1.5256304096819628e-05, + "loss": 0.35786008834838867, + "step": 2695 + }, + { + "epoch": 0.7159739742398088, + "grad_norm": 1.0368618301698236, + "learning_rate": 1.5252568308880155e-05, + "loss": 0.2853243052959442, + "step": 2696 + }, + { + "epoch": 0.7162395432213517, + "grad_norm": 1.1300838792843926, + "learning_rate": 1.5248831508307459e-05, + "loss": 0.2903040051460266, + "step": 2697 + }, + { + "epoch": 0.7165051122028947, + "grad_norm": 1.0779989148221412, + "learning_rate": 1.5245093695821954e-05, + "loss": 0.3375359773635864, + "step": 2698 + }, + { + "epoch": 0.7167706811844377, + "grad_norm": 0.9828776196369989, + "learning_rate": 1.5241354872144242e-05, + "loss": 0.27855974435806274, + "step": 2699 + }, + { + "epoch": 0.7170362501659806, + "grad_norm": 1.0672391327565405, + "learning_rate": 1.5237615037995129e-05, + "loss": 0.32226768136024475, + "step": 2700 + }, + { + "epoch": 0.7173018191475236, + "grad_norm": 1.1089458515112456, + "learning_rate": 1.5233874194095606e-05, + "loss": 0.32856303453445435, + "step": 2701 + }, + { + "epoch": 0.7175673881290665, + "grad_norm": 1.15556869357308, + "learning_rate": 1.5230132341166868e-05, + "loss": 0.31619006395339966, + "step": 2702 + }, + { + "epoch": 0.7178329571106095, + "grad_norm": 1.09474796019269, + "learning_rate": 1.5226389479930296e-05, + "loss": 0.29736411571502686, + "step": 2703 + }, + { + "epoch": 0.7180985260921524, + "grad_norm": 1.0969127487202406, + "learning_rate": 1.5222645611107477e-05, + "loss": 0.2767728865146637, + "step": 2704 + }, + { + "epoch": 0.7183640950736954, + "grad_norm": 1.054074095850648, + "learning_rate": 1.5218900735420174e-05, + "loss": 0.30994221568107605, + "step": 2705 + }, + { + "epoch": 0.7186296640552383, + "grad_norm": 1.0931807335310835, + "learning_rate": 1.5215154853590362e-05, + "loss": 0.3419484496116638, + "step": 2706 + }, + { + "epoch": 0.7188952330367813, + "grad_norm": 1.0503021732812985, + "learning_rate": 1.5211407966340203e-05, + "loss": 0.3063664436340332, + "step": 2707 + }, + { + "epoch": 0.7191608020183242, + "grad_norm": 1.0345938706194526, + "learning_rate": 1.520766007439205e-05, + "loss": 0.2856604754924774, + "step": 2708 + }, + { + "epoch": 0.7194263709998672, + "grad_norm": 0.9757823992785323, + "learning_rate": 1.5203911178468453e-05, + "loss": 0.23257851600646973, + "step": 2709 + }, + { + "epoch": 0.7196919399814101, + "grad_norm": 1.0292145399058534, + "learning_rate": 1.5200161279292154e-05, + "loss": 0.31451839208602905, + "step": 2710 + }, + { + "epoch": 0.7199575089629531, + "grad_norm": 1.1017577588578753, + "learning_rate": 1.5196410377586095e-05, + "loss": 0.30298277735710144, + "step": 2711 + }, + { + "epoch": 0.7202230779444961, + "grad_norm": 1.0759590578514124, + "learning_rate": 1.5192658474073398e-05, + "loss": 0.28654640913009644, + "step": 2712 + }, + { + "epoch": 0.7204886469260391, + "grad_norm": 1.1189221983197806, + "learning_rate": 1.5188905569477391e-05, + "loss": 0.3148455023765564, + "step": 2713 + }, + { + "epoch": 0.720754215907582, + "grad_norm": 1.079970608729249, + "learning_rate": 1.5185151664521585e-05, + "loss": 0.3004840612411499, + "step": 2714 + }, + { + "epoch": 0.721019784889125, + "grad_norm": 1.206470642332625, + "learning_rate": 1.518139675992969e-05, + "loss": 0.3378010392189026, + "step": 2715 + }, + { + "epoch": 0.721285353870668, + "grad_norm": 1.0802971688897103, + "learning_rate": 1.517764085642561e-05, + "loss": 0.3084215223789215, + "step": 2716 + }, + { + "epoch": 0.7215509228522109, + "grad_norm": 1.1196175790564493, + "learning_rate": 1.517388395473344e-05, + "loss": 0.3434324264526367, + "step": 2717 + }, + { + "epoch": 0.7218164918337538, + "grad_norm": 1.2084125695848371, + "learning_rate": 1.517012605557746e-05, + "loss": 0.2862265706062317, + "step": 2718 + }, + { + "epoch": 0.7220820608152968, + "grad_norm": 0.9574562560549519, + "learning_rate": 1.5166367159682156e-05, + "loss": 0.2760370671749115, + "step": 2719 + }, + { + "epoch": 0.7223476297968398, + "grad_norm": 1.0623260792686084, + "learning_rate": 1.5162607267772194e-05, + "loss": 0.26659202575683594, + "step": 2720 + }, + { + "epoch": 0.7226131987783827, + "grad_norm": 1.069380288412464, + "learning_rate": 1.5158846380572439e-05, + "loss": 0.31900978088378906, + "step": 2721 + }, + { + "epoch": 0.7228787677599257, + "grad_norm": 0.9775730121294547, + "learning_rate": 1.5155084498807941e-05, + "loss": 0.2983658015727997, + "step": 2722 + }, + { + "epoch": 0.7231443367414686, + "grad_norm": 1.0202126383266699, + "learning_rate": 1.5151321623203953e-05, + "loss": 0.3086162805557251, + "step": 2723 + }, + { + "epoch": 0.7234099057230116, + "grad_norm": 1.2685875339489936, + "learning_rate": 1.5147557754485908e-05, + "loss": 0.3233461380004883, + "step": 2724 + }, + { + "epoch": 0.7236754747045545, + "grad_norm": 1.1386667332230644, + "learning_rate": 1.5143792893379441e-05, + "loss": 0.2979195713996887, + "step": 2725 + }, + { + "epoch": 0.7239410436860975, + "grad_norm": 0.9598628443474388, + "learning_rate": 1.5140027040610367e-05, + "loss": 0.27854713797569275, + "step": 2726 + }, + { + "epoch": 0.7242066126676404, + "grad_norm": 1.0735596908703036, + "learning_rate": 1.5136260196904704e-05, + "loss": 0.293560266494751, + "step": 2727 + }, + { + "epoch": 0.7244721816491834, + "grad_norm": 1.1273149809893865, + "learning_rate": 1.513249236298865e-05, + "loss": 0.3033742308616638, + "step": 2728 + }, + { + "epoch": 0.7247377506307263, + "grad_norm": 1.1425183002588892, + "learning_rate": 1.51287235395886e-05, + "loss": 0.27958324551582336, + "step": 2729 + }, + { + "epoch": 0.7250033196122693, + "grad_norm": 1.022839475112705, + "learning_rate": 1.512495372743114e-05, + "loss": 0.3063122034072876, + "step": 2730 + }, + { + "epoch": 0.7252688885938122, + "grad_norm": 1.0524007495354166, + "learning_rate": 1.5121182927243043e-05, + "loss": 0.29126864671707153, + "step": 2731 + }, + { + "epoch": 0.7255344575753552, + "grad_norm": 1.0517432179455284, + "learning_rate": 1.5117411139751279e-05, + "loss": 0.27507084608078003, + "step": 2732 + }, + { + "epoch": 0.7258000265568981, + "grad_norm": 1.1167955582078537, + "learning_rate": 1.5113638365682996e-05, + "loss": 0.3432404398918152, + "step": 2733 + }, + { + "epoch": 0.7260655955384411, + "grad_norm": 1.0687371329401973, + "learning_rate": 1.5109864605765552e-05, + "loss": 0.27633196115493774, + "step": 2734 + }, + { + "epoch": 0.726331164519984, + "grad_norm": 1.0811244514830984, + "learning_rate": 1.5106089860726474e-05, + "loss": 0.274509072303772, + "step": 2735 + }, + { + "epoch": 0.726596733501527, + "grad_norm": 0.97012581020674, + "learning_rate": 1.5102314131293494e-05, + "loss": 0.26650723814964294, + "step": 2736 + }, + { + "epoch": 0.7268623024830699, + "grad_norm": 0.9681782432226156, + "learning_rate": 1.5098537418194524e-05, + "loss": 0.24476298689842224, + "step": 2737 + }, + { + "epoch": 0.7271278714646129, + "grad_norm": 1.1154772400244737, + "learning_rate": 1.5094759722157671e-05, + "loss": 0.3337150812149048, + "step": 2738 + }, + { + "epoch": 0.7273934404461558, + "grad_norm": 1.0187825093211873, + "learning_rate": 1.509098104391123e-05, + "loss": 0.3147660195827484, + "step": 2739 + }, + { + "epoch": 0.7276590094276989, + "grad_norm": 0.969229068573487, + "learning_rate": 1.5087201384183687e-05, + "loss": 0.2613281309604645, + "step": 2740 + }, + { + "epoch": 0.7279245784092419, + "grad_norm": 1.0641712204852296, + "learning_rate": 1.5083420743703717e-05, + "loss": 0.2773926854133606, + "step": 2741 + }, + { + "epoch": 0.7281901473907848, + "grad_norm": 1.0826759541494775, + "learning_rate": 1.5079639123200179e-05, + "loss": 0.30515575408935547, + "step": 2742 + }, + { + "epoch": 0.7284557163723278, + "grad_norm": 1.0619554532285063, + "learning_rate": 1.5075856523402128e-05, + "loss": 0.3174355626106262, + "step": 2743 + }, + { + "epoch": 0.7287212853538707, + "grad_norm": 0.9676487172589012, + "learning_rate": 1.5072072945038802e-05, + "loss": 0.25163760781288147, + "step": 2744 + }, + { + "epoch": 0.7289868543354137, + "grad_norm": 1.009992458232401, + "learning_rate": 1.5068288388839634e-05, + "loss": 0.28822118043899536, + "step": 2745 + }, + { + "epoch": 0.7292524233169566, + "grad_norm": 1.1623698216562623, + "learning_rate": 1.5064502855534237e-05, + "loss": 0.3129134476184845, + "step": 2746 + }, + { + "epoch": 0.7295179922984996, + "grad_norm": 1.0993962878508883, + "learning_rate": 1.5060716345852423e-05, + "loss": 0.332313597202301, + "step": 2747 + }, + { + "epoch": 0.7297835612800425, + "grad_norm": 1.1989932540466257, + "learning_rate": 1.5056928860524181e-05, + "loss": 0.3425176739692688, + "step": 2748 + }, + { + "epoch": 0.7300491302615855, + "grad_norm": 1.006044605592889, + "learning_rate": 1.5053140400279693e-05, + "loss": 0.2737991511821747, + "step": 2749 + }, + { + "epoch": 0.7303146992431284, + "grad_norm": 0.963162900300573, + "learning_rate": 1.5049350965849337e-05, + "loss": 0.27506589889526367, + "step": 2750 + }, + { + "epoch": 0.7305802682246714, + "grad_norm": 0.9901021314780329, + "learning_rate": 1.5045560557963663e-05, + "loss": 0.25581830739974976, + "step": 2751 + }, + { + "epoch": 0.7308458372062143, + "grad_norm": 1.0977147554610498, + "learning_rate": 1.5041769177353423e-05, + "loss": 0.31746333837509155, + "step": 2752 + }, + { + "epoch": 0.7311114061877573, + "grad_norm": 1.142455577048558, + "learning_rate": 1.5037976824749545e-05, + "loss": 0.3119337260723114, + "step": 2753 + }, + { + "epoch": 0.7313769751693002, + "grad_norm": 1.0824713857839723, + "learning_rate": 1.5034183500883153e-05, + "loss": 0.3330266773700714, + "step": 2754 + }, + { + "epoch": 0.7316425441508432, + "grad_norm": 1.1870819737785345, + "learning_rate": 1.5030389206485554e-05, + "loss": 0.2794867753982544, + "step": 2755 + }, + { + "epoch": 0.7319081131323861, + "grad_norm": 1.0826714009199063, + "learning_rate": 1.5026593942288248e-05, + "loss": 0.33273079991340637, + "step": 2756 + }, + { + "epoch": 0.7321736821139291, + "grad_norm": 1.1000195904608074, + "learning_rate": 1.502279770902291e-05, + "loss": 0.30673256516456604, + "step": 2757 + }, + { + "epoch": 0.732439251095472, + "grad_norm": 1.1311236734843304, + "learning_rate": 1.5019000507421412e-05, + "loss": 0.3126910924911499, + "step": 2758 + }, + { + "epoch": 0.732704820077015, + "grad_norm": 1.1665747930638253, + "learning_rate": 1.5015202338215811e-05, + "loss": 0.35423290729522705, + "step": 2759 + }, + { + "epoch": 0.7329703890585579, + "grad_norm": 1.0691634248957984, + "learning_rate": 1.5011403202138346e-05, + "loss": 0.31541377305984497, + "step": 2760 + }, + { + "epoch": 0.7332359580401009, + "grad_norm": 3.4446251175420257, + "learning_rate": 1.5007603099921451e-05, + "loss": 0.31460440158843994, + "step": 2761 + }, + { + "epoch": 0.7335015270216438, + "grad_norm": 1.0828016056563536, + "learning_rate": 1.5003802032297735e-05, + "loss": 0.2786293923854828, + "step": 2762 + }, + { + "epoch": 0.7337670960031868, + "grad_norm": 1.1025311021139896, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.27977997064590454, + "step": 2763 + }, + { + "epoch": 0.7340326649847297, + "grad_norm": 1.1136339551828278, + "learning_rate": 1.4996197003761237e-05, + "loss": 0.2933383584022522, + "step": 2764 + }, + { + "epoch": 0.7342982339662727, + "grad_norm": 1.0743056930311463, + "learning_rate": 1.4992393044314617e-05, + "loss": 0.30623573064804077, + "step": 2765 + }, + { + "epoch": 0.7345638029478156, + "grad_norm": 1.112681662128017, + "learning_rate": 1.4988588122393497e-05, + "loss": 0.28665077686309814, + "step": 2766 + }, + { + "epoch": 0.7348293719293586, + "grad_norm": 1.0268941907147413, + "learning_rate": 1.4984782238731422e-05, + "loss": 0.3245697021484375, + "step": 2767 + }, + { + "epoch": 0.7350949409109017, + "grad_norm": 1.118864717612721, + "learning_rate": 1.4980975394062122e-05, + "loss": 0.29477447271347046, + "step": 2768 + }, + { + "epoch": 0.7353605098924446, + "grad_norm": 1.009879072463833, + "learning_rate": 1.4977167589119508e-05, + "loss": 0.29174134135246277, + "step": 2769 + }, + { + "epoch": 0.7356260788739876, + "grad_norm": 1.010733766191454, + "learning_rate": 1.4973358824637687e-05, + "loss": 0.29473474621772766, + "step": 2770 + }, + { + "epoch": 0.7358916478555305, + "grad_norm": 1.3454647120520804, + "learning_rate": 1.4969549101350938e-05, + "loss": 0.3095156252384186, + "step": 2771 + }, + { + "epoch": 0.7361572168370735, + "grad_norm": 1.0578448721867733, + "learning_rate": 1.4965738419993733e-05, + "loss": 0.26295265555381775, + "step": 2772 + }, + { + "epoch": 0.7364227858186164, + "grad_norm": 1.0590497560307077, + "learning_rate": 1.4961926781300723e-05, + "loss": 0.2989509701728821, + "step": 2773 + }, + { + "epoch": 0.7366883548001594, + "grad_norm": 1.0783454816561941, + "learning_rate": 1.4958114186006756e-05, + "loss": 0.31087079644203186, + "step": 2774 + }, + { + "epoch": 0.7369539237817023, + "grad_norm": 1.0953647378016445, + "learning_rate": 1.4954300634846845e-05, + "loss": 0.3063197433948517, + "step": 2775 + }, + { + "epoch": 0.7372194927632453, + "grad_norm": 1.0858506486148067, + "learning_rate": 1.4950486128556208e-05, + "loss": 0.3149424195289612, + "step": 2776 + }, + { + "epoch": 0.7374850617447882, + "grad_norm": 1.0199984929310564, + "learning_rate": 1.4946670667870224e-05, + "loss": 0.2724878191947937, + "step": 2777 + }, + { + "epoch": 0.7377506307263312, + "grad_norm": 1.0033150283887489, + "learning_rate": 1.4942854253524479e-05, + "loss": 0.2556690275669098, + "step": 2778 + }, + { + "epoch": 0.7380161997078741, + "grad_norm": 1.0594159401263619, + "learning_rate": 1.4939036886254727e-05, + "loss": 0.2704542875289917, + "step": 2779 + }, + { + "epoch": 0.7382817686894171, + "grad_norm": 1.052456117640013, + "learning_rate": 1.4935218566796918e-05, + "loss": 0.26762163639068604, + "step": 2780 + }, + { + "epoch": 0.73854733767096, + "grad_norm": 1.1328164222449624, + "learning_rate": 1.4931399295887172e-05, + "loss": 0.3376831114292145, + "step": 2781 + }, + { + "epoch": 0.738812906652503, + "grad_norm": 1.0695003562166123, + "learning_rate": 1.4927579074261803e-05, + "loss": 0.2980082631111145, + "step": 2782 + }, + { + "epoch": 0.7390784756340459, + "grad_norm": 1.0340858480290613, + "learning_rate": 1.4923757902657306e-05, + "loss": 0.27693796157836914, + "step": 2783 + }, + { + "epoch": 0.7393440446155889, + "grad_norm": 1.0204290883803, + "learning_rate": 1.4919935781810353e-05, + "loss": 0.3109282851219177, + "step": 2784 + }, + { + "epoch": 0.7396096135971318, + "grad_norm": 1.12631585013599, + "learning_rate": 1.4916112712457807e-05, + "loss": 0.3123949468135834, + "step": 2785 + }, + { + "epoch": 0.7398751825786748, + "grad_norm": 1.143039341014623, + "learning_rate": 1.4912288695336709e-05, + "loss": 0.3232062757015228, + "step": 2786 + }, + { + "epoch": 0.7401407515602177, + "grad_norm": 1.0315778016896975, + "learning_rate": 1.4908463731184287e-05, + "loss": 0.2685563862323761, + "step": 2787 + }, + { + "epoch": 0.7404063205417607, + "grad_norm": 1.076569860938466, + "learning_rate": 1.4904637820737945e-05, + "loss": 0.25752881169319153, + "step": 2788 + }, + { + "epoch": 0.7406718895233037, + "grad_norm": 1.2236263687690485, + "learning_rate": 1.4900810964735279e-05, + "loss": 0.2887497544288635, + "step": 2789 + }, + { + "epoch": 0.7409374585048466, + "grad_norm": 1.126755867019387, + "learning_rate": 1.489698316391406e-05, + "loss": 0.28804779052734375, + "step": 2790 + }, + { + "epoch": 0.7412030274863896, + "grad_norm": 1.0931262335064922, + "learning_rate": 1.489315441901224e-05, + "loss": 0.2684408724308014, + "step": 2791 + }, + { + "epoch": 0.7414685964679325, + "grad_norm": 1.0509233991385625, + "learning_rate": 1.4889324730767959e-05, + "loss": 0.31945526599884033, + "step": 2792 + }, + { + "epoch": 0.7417341654494755, + "grad_norm": 1.3391113530092205, + "learning_rate": 1.488549409991953e-05, + "loss": 0.34446024894714355, + "step": 2793 + }, + { + "epoch": 0.7419997344310184, + "grad_norm": 1.094751814978447, + "learning_rate": 1.488166252720546e-05, + "loss": 0.28849151730537415, + "step": 2794 + }, + { + "epoch": 0.7422653034125614, + "grad_norm": 1.0431424597135226, + "learning_rate": 1.4877830013364429e-05, + "loss": 0.2793633043766022, + "step": 2795 + }, + { + "epoch": 0.7425308723941043, + "grad_norm": 1.1811188011136542, + "learning_rate": 1.4873996559135298e-05, + "loss": 0.3211687505245209, + "step": 2796 + }, + { + "epoch": 0.7427964413756474, + "grad_norm": 1.004634818722801, + "learning_rate": 1.4870162165257114e-05, + "loss": 0.26225876808166504, + "step": 2797 + }, + { + "epoch": 0.7430620103571903, + "grad_norm": 1.7885293848946355, + "learning_rate": 1.4866326832469105e-05, + "loss": 0.3100029528141022, + "step": 2798 + }, + { + "epoch": 0.7433275793387333, + "grad_norm": 1.0428487423040855, + "learning_rate": 1.4862490561510675e-05, + "loss": 0.29399827122688293, + "step": 2799 + }, + { + "epoch": 0.7435931483202762, + "grad_norm": 0.9886298200418341, + "learning_rate": 1.4858653353121412e-05, + "loss": 0.27357399463653564, + "step": 2800 + }, + { + "epoch": 0.7438587173018192, + "grad_norm": 1.1101962385134683, + "learning_rate": 1.4854815208041087e-05, + "loss": 0.34575730562210083, + "step": 2801 + }, + { + "epoch": 0.7441242862833621, + "grad_norm": 1.0351474931606812, + "learning_rate": 1.4850976127009644e-05, + "loss": 0.28487247228622437, + "step": 2802 + }, + { + "epoch": 0.7443898552649051, + "grad_norm": 1.0283492066128257, + "learning_rate": 1.484713611076722e-05, + "loss": 0.264443576335907, + "step": 2803 + }, + { + "epoch": 0.744655424246448, + "grad_norm": 1.085429543255666, + "learning_rate": 1.4843295160054116e-05, + "loss": 0.32750973105430603, + "step": 2804 + }, + { + "epoch": 0.744920993227991, + "grad_norm": 1.0136013055294886, + "learning_rate": 1.4839453275610827e-05, + "loss": 0.24080191552639008, + "step": 2805 + }, + { + "epoch": 0.7451865622095339, + "grad_norm": 1.1486643921382949, + "learning_rate": 1.4835610458178025e-05, + "loss": 0.31667011976242065, + "step": 2806 + }, + { + "epoch": 0.7454521311910769, + "grad_norm": 1.0103490185384167, + "learning_rate": 1.4831766708496553e-05, + "loss": 0.2754175066947937, + "step": 2807 + }, + { + "epoch": 0.7457177001726198, + "grad_norm": 1.0607394107689443, + "learning_rate": 1.482792202730745e-05, + "loss": 0.2890132963657379, + "step": 2808 + }, + { + "epoch": 0.7459832691541628, + "grad_norm": 1.049970305589495, + "learning_rate": 1.4824076415351918e-05, + "loss": 0.3402877748012543, + "step": 2809 + }, + { + "epoch": 0.7462488381357057, + "grad_norm": 1.0879104018503691, + "learning_rate": 1.4820229873371347e-05, + "loss": 0.3167210519313812, + "step": 2810 + }, + { + "epoch": 0.7465144071172487, + "grad_norm": 0.9983910427341833, + "learning_rate": 1.4816382402107308e-05, + "loss": 0.2653643786907196, + "step": 2811 + }, + { + "epoch": 0.7467799760987917, + "grad_norm": 1.2191167585139304, + "learning_rate": 1.4812534002301547e-05, + "loss": 0.3202674984931946, + "step": 2812 + }, + { + "epoch": 0.7470455450803346, + "grad_norm": 1.0461975743299208, + "learning_rate": 1.4808684674695985e-05, + "loss": 0.2942724823951721, + "step": 2813 + }, + { + "epoch": 0.7473111140618776, + "grad_norm": 1.0581736193326858, + "learning_rate": 1.480483442003273e-05, + "loss": 0.28640663623809814, + "step": 2814 + }, + { + "epoch": 0.7475766830434205, + "grad_norm": 0.9932743335315769, + "learning_rate": 1.4800983239054071e-05, + "loss": 0.26214420795440674, + "step": 2815 + }, + { + "epoch": 0.7478422520249635, + "grad_norm": 1.0324489729554576, + "learning_rate": 1.4797131132502464e-05, + "loss": 0.3288992643356323, + "step": 2816 + }, + { + "epoch": 0.7481078210065064, + "grad_norm": 0.9775792939666473, + "learning_rate": 1.4793278101120551e-05, + "loss": 0.2622208297252655, + "step": 2817 + }, + { + "epoch": 0.7483733899880494, + "grad_norm": 1.0856486279870832, + "learning_rate": 1.4789424145651152e-05, + "loss": 0.3223533034324646, + "step": 2818 + }, + { + "epoch": 0.7486389589695923, + "grad_norm": 0.9640735701611682, + "learning_rate": 1.4785569266837264e-05, + "loss": 0.25849875807762146, + "step": 2819 + }, + { + "epoch": 0.7489045279511353, + "grad_norm": 1.20204465384733, + "learning_rate": 1.478171346542206e-05, + "loss": 0.3477833569049835, + "step": 2820 + }, + { + "epoch": 0.7491700969326782, + "grad_norm": 1.0577809669167442, + "learning_rate": 1.4777856742148897e-05, + "loss": 0.2799205780029297, + "step": 2821 + }, + { + "epoch": 0.7494356659142212, + "grad_norm": 1.624939710599736, + "learning_rate": 1.4773999097761304e-05, + "loss": 0.2591988444328308, + "step": 2822 + }, + { + "epoch": 0.7497012348957641, + "grad_norm": 1.2869478314125868, + "learning_rate": 1.477014053300299e-05, + "loss": 0.30161747336387634, + "step": 2823 + }, + { + "epoch": 0.7499668038773071, + "grad_norm": 1.0738509532979332, + "learning_rate": 1.4766281048617837e-05, + "loss": 0.28202176094055176, + "step": 2824 + }, + { + "epoch": 0.7502323728588501, + "grad_norm": 1.0042946509670743, + "learning_rate": 1.4762420645349912e-05, + "loss": 0.26074907183647156, + "step": 2825 + }, + { + "epoch": 0.7504979418403931, + "grad_norm": 1.1385436298617553, + "learning_rate": 1.4758559323943455e-05, + "loss": 0.2822819948196411, + "step": 2826 + }, + { + "epoch": 0.750763510821936, + "grad_norm": 1.1069166183989807, + "learning_rate": 1.4754697085142879e-05, + "loss": 0.2704991102218628, + "step": 2827 + }, + { + "epoch": 0.751029079803479, + "grad_norm": 1.1005590878466516, + "learning_rate": 1.4750833929692785e-05, + "loss": 0.2627401053905487, + "step": 2828 + }, + { + "epoch": 0.751294648785022, + "grad_norm": 1.0886740028659867, + "learning_rate": 1.474696985833794e-05, + "loss": 0.2898240089416504, + "step": 2829 + }, + { + "epoch": 0.7515602177665649, + "grad_norm": 1.0291450176805186, + "learning_rate": 1.4743104871823291e-05, + "loss": 0.30080029368400574, + "step": 2830 + }, + { + "epoch": 0.7518257867481078, + "grad_norm": 1.0953597523125502, + "learning_rate": 1.473923897089396e-05, + "loss": 0.2950359284877777, + "step": 2831 + }, + { + "epoch": 0.7520913557296508, + "grad_norm": 1.1129882579718784, + "learning_rate": 1.4735372156295253e-05, + "loss": 0.31936827301979065, + "step": 2832 + }, + { + "epoch": 0.7523569247111938, + "grad_norm": 1.1117484749822675, + "learning_rate": 1.4731504428772642e-05, + "loss": 0.2771468460559845, + "step": 2833 + }, + { + "epoch": 0.7526224936927367, + "grad_norm": 1.1332551367729735, + "learning_rate": 1.4727635789071779e-05, + "loss": 0.3135997951030731, + "step": 2834 + }, + { + "epoch": 0.7528880626742797, + "grad_norm": 1.1215560189558773, + "learning_rate": 1.4723766237938495e-05, + "loss": 0.29874372482299805, + "step": 2835 + }, + { + "epoch": 0.7531536316558226, + "grad_norm": 1.0292177835845961, + "learning_rate": 1.4719895776118789e-05, + "loss": 0.249681293964386, + "step": 2836 + }, + { + "epoch": 0.7534192006373656, + "grad_norm": 1.0567186687732057, + "learning_rate": 1.4716024404358847e-05, + "loss": 0.28544771671295166, + "step": 2837 + }, + { + "epoch": 0.7536847696189085, + "grad_norm": 1.1290911495331684, + "learning_rate": 1.4712152123405018e-05, + "loss": 0.32532355189323425, + "step": 2838 + }, + { + "epoch": 0.7539503386004515, + "grad_norm": 1.1212187873017119, + "learning_rate": 1.4708278934003835e-05, + "loss": 0.31663140654563904, + "step": 2839 + }, + { + "epoch": 0.7542159075819944, + "grad_norm": 1.123142254862964, + "learning_rate": 1.4704404836902005e-05, + "loss": 0.30552318692207336, + "step": 2840 + }, + { + "epoch": 0.7544814765635374, + "grad_norm": 1.1574657252500693, + "learning_rate": 1.47005298328464e-05, + "loss": 0.3019601106643677, + "step": 2841 + }, + { + "epoch": 0.7547470455450803, + "grad_norm": 1.0814580547673966, + "learning_rate": 1.4696653922584084e-05, + "loss": 0.321606308221817, + "step": 2842 + }, + { + "epoch": 0.7550126145266233, + "grad_norm": 1.138590953455986, + "learning_rate": 1.4692777106862281e-05, + "loss": 0.2709462642669678, + "step": 2843 + }, + { + "epoch": 0.7552781835081662, + "grad_norm": 1.1366302949330385, + "learning_rate": 1.46888993864284e-05, + "loss": 0.2882609963417053, + "step": 2844 + }, + { + "epoch": 0.7555437524897092, + "grad_norm": 0.9948609987035232, + "learning_rate": 1.4685020762030019e-05, + "loss": 0.25843000411987305, + "step": 2845 + }, + { + "epoch": 0.7558093214712521, + "grad_norm": 1.1002004205654323, + "learning_rate": 1.4681141234414889e-05, + "loss": 0.30962038040161133, + "step": 2846 + }, + { + "epoch": 0.7560748904527951, + "grad_norm": 1.2025960097123465, + "learning_rate": 1.4677260804330938e-05, + "loss": 0.304874062538147, + "step": 2847 + }, + { + "epoch": 0.756340459434338, + "grad_norm": 1.2287867091921092, + "learning_rate": 1.4673379472526268e-05, + "loss": 0.3425619602203369, + "step": 2848 + }, + { + "epoch": 0.756606028415881, + "grad_norm": 1.0701256182117689, + "learning_rate": 1.4669497239749153e-05, + "loss": 0.3002302050590515, + "step": 2849 + }, + { + "epoch": 0.7568715973974239, + "grad_norm": 1.1005370830207322, + "learning_rate": 1.4665614106748038e-05, + "loss": 0.31008803844451904, + "step": 2850 + }, + { + "epoch": 0.7571371663789669, + "grad_norm": 1.0175712407141912, + "learning_rate": 1.4661730074271551e-05, + "loss": 0.27829408645629883, + "step": 2851 + }, + { + "epoch": 0.7574027353605098, + "grad_norm": 1.0501959661073665, + "learning_rate": 1.4657845143068488e-05, + "loss": 0.25915467739105225, + "step": 2852 + }, + { + "epoch": 0.7576683043420529, + "grad_norm": 1.0719536636155031, + "learning_rate": 1.4653959313887813e-05, + "loss": 0.2843416929244995, + "step": 2853 + }, + { + "epoch": 0.7579338733235959, + "grad_norm": 1.0489373710223147, + "learning_rate": 1.465007258747867e-05, + "loss": 0.2851647138595581, + "step": 2854 + }, + { + "epoch": 0.7581994423051388, + "grad_norm": 1.085754694338766, + "learning_rate": 1.4646184964590378e-05, + "loss": 0.266017884016037, + "step": 2855 + }, + { + "epoch": 0.7584650112866818, + "grad_norm": 1.0789098348141843, + "learning_rate": 1.4642296445972421e-05, + "loss": 0.30142179131507874, + "step": 2856 + }, + { + "epoch": 0.7587305802682247, + "grad_norm": 0.9904299934324251, + "learning_rate": 1.463840703237446e-05, + "loss": 0.2878327965736389, + "step": 2857 + }, + { + "epoch": 0.7589961492497677, + "grad_norm": 1.114310168260114, + "learning_rate": 1.4634516724546326e-05, + "loss": 0.2919169068336487, + "step": 2858 + }, + { + "epoch": 0.7592617182313106, + "grad_norm": 0.9954308342175644, + "learning_rate": 1.4630625523238027e-05, + "loss": 0.2530924081802368, + "step": 2859 + }, + { + "epoch": 0.7595272872128536, + "grad_norm": 1.0858688189416337, + "learning_rate": 1.462673342919974e-05, + "loss": 0.3009106516838074, + "step": 2860 + }, + { + "epoch": 0.7597928561943965, + "grad_norm": 1.1572533440881312, + "learning_rate": 1.4622840443181817e-05, + "loss": 0.3114222288131714, + "step": 2861 + }, + { + "epoch": 0.7600584251759395, + "grad_norm": 1.2224434370177688, + "learning_rate": 1.4618946565934775e-05, + "loss": 0.344540536403656, + "step": 2862 + }, + { + "epoch": 0.7603239941574824, + "grad_norm": 1.0685722656113568, + "learning_rate": 1.4615051798209312e-05, + "loss": 0.263607919216156, + "step": 2863 + }, + { + "epoch": 0.7605895631390254, + "grad_norm": 1.018611353798299, + "learning_rate": 1.4611156140756293e-05, + "loss": 0.2685706317424774, + "step": 2864 + }, + { + "epoch": 0.7608551321205683, + "grad_norm": 1.1431197890714058, + "learning_rate": 1.4607259594326752e-05, + "loss": 0.32342326641082764, + "step": 2865 + }, + { + "epoch": 0.7611207011021113, + "grad_norm": 1.182050624874759, + "learning_rate": 1.4603362159671902e-05, + "loss": 0.3088849186897278, + "step": 2866 + }, + { + "epoch": 0.7613862700836542, + "grad_norm": 1.0482348167122462, + "learning_rate": 1.4599463837543114e-05, + "loss": 0.26718589663505554, + "step": 2867 + }, + { + "epoch": 0.7616518390651972, + "grad_norm": 1.0051992534296357, + "learning_rate": 1.4595564628691944e-05, + "loss": 0.29511263966560364, + "step": 2868 + }, + { + "epoch": 0.7619174080467401, + "grad_norm": 1.0974088254649037, + "learning_rate": 1.4591664533870118e-05, + "loss": 0.2940484285354614, + "step": 2869 + }, + { + "epoch": 0.7621829770282831, + "grad_norm": 1.1564456059915547, + "learning_rate": 1.4587763553829521e-05, + "loss": 0.28167295455932617, + "step": 2870 + }, + { + "epoch": 0.762448546009826, + "grad_norm": 1.0590804851451585, + "learning_rate": 1.4583861689322219e-05, + "loss": 0.3362962007522583, + "step": 2871 + }, + { + "epoch": 0.762714114991369, + "grad_norm": 1.1206777555300773, + "learning_rate": 1.4579958941100445e-05, + "loss": 0.3003339171409607, + "step": 2872 + }, + { + "epoch": 0.7629796839729119, + "grad_norm": 1.0572512051509857, + "learning_rate": 1.4576055309916602e-05, + "loss": 0.3191443979740143, + "step": 2873 + }, + { + "epoch": 0.7632452529544549, + "grad_norm": 1.0684782615871369, + "learning_rate": 1.4572150796523265e-05, + "loss": 0.30804574489593506, + "step": 2874 + }, + { + "epoch": 0.7635108219359978, + "grad_norm": 1.0214046475154577, + "learning_rate": 1.4568245401673178e-05, + "loss": 0.32462549209594727, + "step": 2875 + }, + { + "epoch": 0.7637763909175408, + "grad_norm": 1.1357318078490404, + "learning_rate": 1.4564339126119254e-05, + "loss": 0.27751386165618896, + "step": 2876 + }, + { + "epoch": 0.7640419598990837, + "grad_norm": 1.0701221152994065, + "learning_rate": 1.4560431970614578e-05, + "loss": 0.27194011211395264, + "step": 2877 + }, + { + "epoch": 0.7643075288806267, + "grad_norm": 1.134082938487784, + "learning_rate": 1.4556523935912406e-05, + "loss": 0.28701072931289673, + "step": 2878 + }, + { + "epoch": 0.7645730978621696, + "grad_norm": 1.0814539768930527, + "learning_rate": 1.4552615022766156e-05, + "loss": 0.3278783857822418, + "step": 2879 + }, + { + "epoch": 0.7648386668437126, + "grad_norm": 1.096499511679905, + "learning_rate": 1.4548705231929426e-05, + "loss": 0.3292006254196167, + "step": 2880 + }, + { + "epoch": 0.7651042358252557, + "grad_norm": 1.30563906707581, + "learning_rate": 1.4544794564155971e-05, + "loss": 0.33038759231567383, + "step": 2881 + }, + { + "epoch": 0.7653698048067986, + "grad_norm": 1.0799053745016685, + "learning_rate": 1.4540883020199725e-05, + "loss": 0.29183000326156616, + "step": 2882 + }, + { + "epoch": 0.7656353737883416, + "grad_norm": 1.049945067498866, + "learning_rate": 1.4536970600814789e-05, + "loss": 0.28066399693489075, + "step": 2883 + }, + { + "epoch": 0.7659009427698845, + "grad_norm": 1.0673215015420034, + "learning_rate": 1.4533057306755427e-05, + "loss": 0.2832046151161194, + "step": 2884 + }, + { + "epoch": 0.7661665117514275, + "grad_norm": 1.0799218487874103, + "learning_rate": 1.4529143138776078e-05, + "loss": 0.3006540834903717, + "step": 2885 + }, + { + "epoch": 0.7664320807329704, + "grad_norm": 0.965945374746046, + "learning_rate": 1.4525228097631351e-05, + "loss": 0.2793240547180176, + "step": 2886 + }, + { + "epoch": 0.7666976497145134, + "grad_norm": 1.0791298696355873, + "learning_rate": 1.452131218407602e-05, + "loss": 0.2895192503929138, + "step": 2887 + }, + { + "epoch": 0.7669632186960563, + "grad_norm": 1.1085071656285739, + "learning_rate": 1.4517395398865022e-05, + "loss": 0.27707618474960327, + "step": 2888 + }, + { + "epoch": 0.7672287876775993, + "grad_norm": 0.9801959170871006, + "learning_rate": 1.4513477742753465e-05, + "loss": 0.29167065024375916, + "step": 2889 + }, + { + "epoch": 0.7674943566591422, + "grad_norm": 0.9760628575291594, + "learning_rate": 1.4509559216496631e-05, + "loss": 0.2670987844467163, + "step": 2890 + }, + { + "epoch": 0.7677599256406852, + "grad_norm": 1.0541213606202946, + "learning_rate": 1.4505639820849968e-05, + "loss": 0.3025206923484802, + "step": 2891 + }, + { + "epoch": 0.7680254946222281, + "grad_norm": 1.0721054101606857, + "learning_rate": 1.4501719556569087e-05, + "loss": 0.3104705512523651, + "step": 2892 + }, + { + "epoch": 0.7682910636037711, + "grad_norm": 1.1715745485021363, + "learning_rate": 1.4497798424409766e-05, + "loss": 0.2972267270088196, + "step": 2893 + }, + { + "epoch": 0.768556632585314, + "grad_norm": 1.3084992927105763, + "learning_rate": 1.4493876425127957e-05, + "loss": 0.34956347942352295, + "step": 2894 + }, + { + "epoch": 0.768822201566857, + "grad_norm": 1.0910589486872886, + "learning_rate": 1.4489953559479775e-05, + "loss": 0.3122873902320862, + "step": 2895 + }, + { + "epoch": 0.7690877705483999, + "grad_norm": 1.0070263080445798, + "learning_rate": 1.4486029828221497e-05, + "loss": 0.29645755887031555, + "step": 2896 + }, + { + "epoch": 0.7693533395299429, + "grad_norm": 1.1312479199974272, + "learning_rate": 1.448210523210958e-05, + "loss": 0.33357223868370056, + "step": 2897 + }, + { + "epoch": 0.7696189085114858, + "grad_norm": 1.0807209302083978, + "learning_rate": 1.4478179771900634e-05, + "loss": 0.2780191898345947, + "step": 2898 + }, + { + "epoch": 0.7698844774930288, + "grad_norm": 1.098992372480737, + "learning_rate": 1.447425344835144e-05, + "loss": 0.31503236293792725, + "step": 2899 + }, + { + "epoch": 0.7701500464745717, + "grad_norm": 1.0152023365250116, + "learning_rate": 1.4470326262218955e-05, + "loss": 0.2843332290649414, + "step": 2900 + }, + { + "epoch": 0.7704156154561147, + "grad_norm": 1.1041753681410225, + "learning_rate": 1.4466398214260286e-05, + "loss": 0.305475652217865, + "step": 2901 + }, + { + "epoch": 0.7706811844376577, + "grad_norm": 1.0159008972115877, + "learning_rate": 1.446246930523272e-05, + "loss": 0.28418007493019104, + "step": 2902 + }, + { + "epoch": 0.7709467534192006, + "grad_norm": 2.0289726917266027, + "learning_rate": 1.44585395358937e-05, + "loss": 0.28237032890319824, + "step": 2903 + }, + { + "epoch": 0.7712123224007436, + "grad_norm": 1.1334683720848762, + "learning_rate": 1.4454608907000843e-05, + "loss": 0.33727777004241943, + "step": 2904 + }, + { + "epoch": 0.7714778913822865, + "grad_norm": 1.1393257541232447, + "learning_rate": 1.4450677419311925e-05, + "loss": 0.2977198660373688, + "step": 2905 + }, + { + "epoch": 0.7717434603638295, + "grad_norm": 1.0793508547506123, + "learning_rate": 1.4446745073584891e-05, + "loss": 0.3095981776714325, + "step": 2906 + }, + { + "epoch": 0.7720090293453724, + "grad_norm": 1.138471500425881, + "learning_rate": 1.4442811870577851e-05, + "loss": 0.29808440804481506, + "step": 2907 + }, + { + "epoch": 0.7722745983269154, + "grad_norm": 1.2668271633221484, + "learning_rate": 1.4438877811049079e-05, + "loss": 0.32444530725479126, + "step": 2908 + }, + { + "epoch": 0.7725401673084584, + "grad_norm": 1.0229226464155372, + "learning_rate": 1.443494289575702e-05, + "loss": 0.24782602488994598, + "step": 2909 + }, + { + "epoch": 0.7728057362900014, + "grad_norm": 1.079755307057506, + "learning_rate": 1.4431007125460274e-05, + "loss": 0.31289762258529663, + "step": 2910 + }, + { + "epoch": 0.7730713052715443, + "grad_norm": 1.0928540626872372, + "learning_rate": 1.4427070500917615e-05, + "loss": 0.31444042921066284, + "step": 2911 + }, + { + "epoch": 0.7733368742530873, + "grad_norm": 1.1235251868548595, + "learning_rate": 1.4423133022887973e-05, + "loss": 0.31347882747650146, + "step": 2912 + }, + { + "epoch": 0.7736024432346302, + "grad_norm": 1.1449169077961199, + "learning_rate": 1.4419194692130453e-05, + "loss": 0.3025411367416382, + "step": 2913 + }, + { + "epoch": 0.7738680122161732, + "grad_norm": 0.9734590933720824, + "learning_rate": 1.4415255509404316e-05, + "loss": 0.2954581081867218, + "step": 2914 + }, + { + "epoch": 0.7741335811977161, + "grad_norm": 1.051295802747811, + "learning_rate": 1.4411315475468988e-05, + "loss": 0.2675531506538391, + "step": 2915 + }, + { + "epoch": 0.7743991501792591, + "grad_norm": 1.0207923958770302, + "learning_rate": 1.4407374591084064e-05, + "loss": 0.29307854175567627, + "step": 2916 + }, + { + "epoch": 0.774664719160802, + "grad_norm": 0.9134258889524259, + "learning_rate": 1.4403432857009295e-05, + "loss": 0.2805953025817871, + "step": 2917 + }, + { + "epoch": 0.774930288142345, + "grad_norm": 1.1114518211112974, + "learning_rate": 1.439949027400461e-05, + "loss": 0.30805838108062744, + "step": 2918 + }, + { + "epoch": 0.7751958571238879, + "grad_norm": 1.063187320260136, + "learning_rate": 1.4395546842830085e-05, + "loss": 0.31501835584640503, + "step": 2919 + }, + { + "epoch": 0.7754614261054309, + "grad_norm": 1.025310766436644, + "learning_rate": 1.4391602564245975e-05, + "loss": 0.2719186246395111, + "step": 2920 + }, + { + "epoch": 0.7757269950869738, + "grad_norm": 1.0474571998069828, + "learning_rate": 1.4387657439012677e-05, + "loss": 0.29554325342178345, + "step": 2921 + }, + { + "epoch": 0.7759925640685168, + "grad_norm": 1.0103166752174864, + "learning_rate": 1.4383711467890776e-05, + "loss": 0.2993816137313843, + "step": 2922 + }, + { + "epoch": 0.7762581330500598, + "grad_norm": 1.087143911717871, + "learning_rate": 1.4379764651641004e-05, + "loss": 0.3412264883518219, + "step": 2923 + }, + { + "epoch": 0.7765237020316027, + "grad_norm": 1.3163055539647115, + "learning_rate": 1.4375816991024263e-05, + "loss": 0.3137913942337036, + "step": 2924 + }, + { + "epoch": 0.7767892710131457, + "grad_norm": 1.0026858390591848, + "learning_rate": 1.4371868486801611e-05, + "loss": 0.2710151672363281, + "step": 2925 + }, + { + "epoch": 0.7770548399946886, + "grad_norm": 1.060508746597415, + "learning_rate": 1.4367919139734279e-05, + "loss": 0.28521692752838135, + "step": 2926 + }, + { + "epoch": 0.7773204089762316, + "grad_norm": 0.9938687291505847, + "learning_rate": 1.4363968950583651e-05, + "loss": 0.2889919579029083, + "step": 2927 + }, + { + "epoch": 0.7775859779577745, + "grad_norm": 1.0641534591195945, + "learning_rate": 1.436001792011128e-05, + "loss": 0.31562381982803345, + "step": 2928 + }, + { + "epoch": 0.7778515469393175, + "grad_norm": 0.980719397790632, + "learning_rate": 1.4356066049078871e-05, + "loss": 0.2747528553009033, + "step": 2929 + }, + { + "epoch": 0.7781171159208604, + "grad_norm": 1.0890864939874727, + "learning_rate": 1.4352113338248303e-05, + "loss": 0.2918938398361206, + "step": 2930 + }, + { + "epoch": 0.7783826849024034, + "grad_norm": 1.1375978489291394, + "learning_rate": 1.4348159788381615e-05, + "loss": 0.3348507285118103, + "step": 2931 + }, + { + "epoch": 0.7786482538839463, + "grad_norm": 1.049930284325584, + "learning_rate": 1.4344205400241e-05, + "loss": 0.27206242084503174, + "step": 2932 + }, + { + "epoch": 0.7789138228654893, + "grad_norm": 1.0635705360778813, + "learning_rate": 1.434025017458882e-05, + "loss": 0.28496092557907104, + "step": 2933 + }, + { + "epoch": 0.7791793918470322, + "grad_norm": 1.1207237235097192, + "learning_rate": 1.4336294112187595e-05, + "loss": 0.3080131411552429, + "step": 2934 + }, + { + "epoch": 0.7794449608285752, + "grad_norm": 1.1562549835000784, + "learning_rate": 1.4332337213800008e-05, + "loss": 0.3116779029369354, + "step": 2935 + }, + { + "epoch": 0.7797105298101181, + "grad_norm": 1.0230593279992428, + "learning_rate": 1.43283794801889e-05, + "loss": 0.26526543498039246, + "step": 2936 + }, + { + "epoch": 0.7799760987916612, + "grad_norm": 1.0768548459396885, + "learning_rate": 1.4324420912117274e-05, + "loss": 0.2829325497150421, + "step": 2937 + }, + { + "epoch": 0.7802416677732041, + "grad_norm": 1.197165846783245, + "learning_rate": 1.43204615103483e-05, + "loss": 0.34146445989608765, + "step": 2938 + }, + { + "epoch": 0.7805072367547471, + "grad_norm": 1.1418950254878286, + "learning_rate": 1.43165012756453e-05, + "loss": 0.316609650850296, + "step": 2939 + }, + { + "epoch": 0.78077280573629, + "grad_norm": 1.119861281862994, + "learning_rate": 1.4312540208771766e-05, + "loss": 0.3215107321739197, + "step": 2940 + }, + { + "epoch": 0.781038374717833, + "grad_norm": 1.0591732101512668, + "learning_rate": 1.4308578310491342e-05, + "loss": 0.2834000587463379, + "step": 2941 + }, + { + "epoch": 0.781303943699376, + "grad_norm": 1.1186376453102755, + "learning_rate": 1.430461558156783e-05, + "loss": 0.30184993147850037, + "step": 2942 + }, + { + "epoch": 0.7815695126809189, + "grad_norm": 1.1319557052801907, + "learning_rate": 1.4300652022765207e-05, + "loss": 0.3299996256828308, + "step": 2943 + }, + { + "epoch": 0.7818350816624619, + "grad_norm": 1.1269288601015153, + "learning_rate": 1.4296687634847592e-05, + "loss": 0.27565228939056396, + "step": 2944 + }, + { + "epoch": 0.7821006506440048, + "grad_norm": 1.1019395409868211, + "learning_rate": 1.4292722418579278e-05, + "loss": 0.30347493290901184, + "step": 2945 + }, + { + "epoch": 0.7823662196255478, + "grad_norm": 1.125677517693181, + "learning_rate": 1.4288756374724709e-05, + "loss": 0.31469428539276123, + "step": 2946 + }, + { + "epoch": 0.7826317886070907, + "grad_norm": 1.0500101449680372, + "learning_rate": 1.4284789504048493e-05, + "loss": 0.27361029386520386, + "step": 2947 + }, + { + "epoch": 0.7828973575886337, + "grad_norm": 1.057442611584268, + "learning_rate": 1.428082180731539e-05, + "loss": 0.29180705547332764, + "step": 2948 + }, + { + "epoch": 0.7831629265701766, + "grad_norm": 1.0218659697209738, + "learning_rate": 1.4276853285290334e-05, + "loss": 0.281120628118515, + "step": 2949 + }, + { + "epoch": 0.7834284955517196, + "grad_norm": 1.0029783457826962, + "learning_rate": 1.4272883938738406e-05, + "loss": 0.26144471764564514, + "step": 2950 + }, + { + "epoch": 0.7836940645332625, + "grad_norm": 1.0904458839940374, + "learning_rate": 1.4268913768424848e-05, + "loss": 0.3118991255760193, + "step": 2951 + }, + { + "epoch": 0.7839596335148055, + "grad_norm": 1.0581869365443632, + "learning_rate": 1.4264942775115065e-05, + "loss": 0.29352328181266785, + "step": 2952 + }, + { + "epoch": 0.7842252024963484, + "grad_norm": 1.025234952757571, + "learning_rate": 1.426097095957461e-05, + "loss": 0.2687748968601227, + "step": 2953 + }, + { + "epoch": 0.7844907714778914, + "grad_norm": 1.0817782920006436, + "learning_rate": 1.4256998322569212e-05, + "loss": 0.3106890916824341, + "step": 2954 + }, + { + "epoch": 0.7847563404594343, + "grad_norm": 1.0039841255701216, + "learning_rate": 1.4253024864864742e-05, + "loss": 0.2522161304950714, + "step": 2955 + }, + { + "epoch": 0.7850219094409773, + "grad_norm": 1.031799618380073, + "learning_rate": 1.424905058722724e-05, + "loss": 0.2994377613067627, + "step": 2956 + }, + { + "epoch": 0.7852874784225202, + "grad_norm": 1.295564211303899, + "learning_rate": 1.4245075490422893e-05, + "loss": 0.3753565549850464, + "step": 2957 + }, + { + "epoch": 0.7855530474040632, + "grad_norm": 1.2386689798654595, + "learning_rate": 1.424109957521806e-05, + "loss": 0.29544737935066223, + "step": 2958 + }, + { + "epoch": 0.7858186163856061, + "grad_norm": 1.0381164701705432, + "learning_rate": 1.423712284237925e-05, + "loss": 0.307847797870636, + "step": 2959 + }, + { + "epoch": 0.7860841853671491, + "grad_norm": 1.1107576873332587, + "learning_rate": 1.4233145292673127e-05, + "loss": 0.31758183240890503, + "step": 2960 + }, + { + "epoch": 0.786349754348692, + "grad_norm": 1.0358601319268448, + "learning_rate": 1.4229166926866517e-05, + "loss": 0.307254433631897, + "step": 2961 + }, + { + "epoch": 0.786615323330235, + "grad_norm": 1.2228062733167704, + "learning_rate": 1.42251877457264e-05, + "loss": 0.3513748049736023, + "step": 2962 + }, + { + "epoch": 0.7868808923117779, + "grad_norm": 1.1359729522705007, + "learning_rate": 1.422120775001992e-05, + "loss": 0.3025718629360199, + "step": 2963 + }, + { + "epoch": 0.7871464612933209, + "grad_norm": 1.076503168390535, + "learning_rate": 1.4217226940514367e-05, + "loss": 0.2922811508178711, + "step": 2964 + }, + { + "epoch": 0.787412030274864, + "grad_norm": 1.07297262661661, + "learning_rate": 1.42132453179772e-05, + "loss": 0.29599297046661377, + "step": 2965 + }, + { + "epoch": 0.7876775992564069, + "grad_norm": 0.992121967255531, + "learning_rate": 1.4209262883176025e-05, + "loss": 0.28336548805236816, + "step": 2966 + }, + { + "epoch": 0.7879431682379499, + "grad_norm": 1.0655541697156172, + "learning_rate": 1.4205279636878613e-05, + "loss": 0.3100801110267639, + "step": 2967 + }, + { + "epoch": 0.7882087372194928, + "grad_norm": 1.165527486411767, + "learning_rate": 1.4201295579852881e-05, + "loss": 0.33067989349365234, + "step": 2968 + }, + { + "epoch": 0.7884743062010358, + "grad_norm": 1.1896877635723886, + "learning_rate": 1.4197310712866909e-05, + "loss": 0.282347172498703, + "step": 2969 + }, + { + "epoch": 0.7887398751825787, + "grad_norm": 1.0769183433483809, + "learning_rate": 1.419332503668894e-05, + "loss": 0.30585426092147827, + "step": 2970 + }, + { + "epoch": 0.7890054441641217, + "grad_norm": 1.0616062054836604, + "learning_rate": 1.4189338552087351e-05, + "loss": 0.3011561632156372, + "step": 2971 + }, + { + "epoch": 0.7892710131456646, + "grad_norm": 0.9722574451184507, + "learning_rate": 1.4185351259830705e-05, + "loss": 0.2700524926185608, + "step": 2972 + }, + { + "epoch": 0.7895365821272076, + "grad_norm": 1.0849811262666431, + "learning_rate": 1.4181363160687693e-05, + "loss": 0.2963382303714752, + "step": 2973 + }, + { + "epoch": 0.7898021511087505, + "grad_norm": 1.0388990841328773, + "learning_rate": 1.4177374255427183e-05, + "loss": 0.27132824063301086, + "step": 2974 + }, + { + "epoch": 0.7900677200902935, + "grad_norm": 0.9602477794817199, + "learning_rate": 1.417338454481818e-05, + "loss": 0.2539706826210022, + "step": 2975 + }, + { + "epoch": 0.7903332890718364, + "grad_norm": 1.0972216427869486, + "learning_rate": 1.416939402962986e-05, + "loss": 0.28465601801872253, + "step": 2976 + }, + { + "epoch": 0.7905988580533794, + "grad_norm": 1.1885027397372414, + "learning_rate": 1.4165402710631544e-05, + "loss": 0.3020748198032379, + "step": 2977 + }, + { + "epoch": 0.7908644270349223, + "grad_norm": 1.0709231597298363, + "learning_rate": 1.416141058859271e-05, + "loss": 0.3157690465450287, + "step": 2978 + }, + { + "epoch": 0.7911299960164653, + "grad_norm": 1.0874979641604023, + "learning_rate": 1.4157417664282994e-05, + "loss": 0.2720191776752472, + "step": 2979 + }, + { + "epoch": 0.7913955649980082, + "grad_norm": 1.0670143355557837, + "learning_rate": 1.4153423938472185e-05, + "loss": 0.2931746542453766, + "step": 2980 + }, + { + "epoch": 0.7916611339795512, + "grad_norm": 1.0836941185599118, + "learning_rate": 1.4149429411930226e-05, + "loss": 0.2683875560760498, + "step": 2981 + }, + { + "epoch": 0.7919267029610941, + "grad_norm": 1.0454189872619364, + "learning_rate": 1.4145434085427216e-05, + "loss": 0.2559819519519806, + "step": 2982 + }, + { + "epoch": 0.7921922719426371, + "grad_norm": 1.1028368657772893, + "learning_rate": 1.4141437959733404e-05, + "loss": 0.2845582365989685, + "step": 2983 + }, + { + "epoch": 0.79245784092418, + "grad_norm": 1.05827279827959, + "learning_rate": 1.4137441035619197e-05, + "loss": 0.26766544580459595, + "step": 2984 + }, + { + "epoch": 0.792723409905723, + "grad_norm": 1.2459472391823172, + "learning_rate": 1.4133443313855155e-05, + "loss": 0.32089024782180786, + "step": 2985 + }, + { + "epoch": 0.7929889788872659, + "grad_norm": 1.053106908199776, + "learning_rate": 1.4129444795211993e-05, + "loss": 0.2756182551383972, + "step": 2986 + }, + { + "epoch": 0.7932545478688089, + "grad_norm": 1.231241306668284, + "learning_rate": 1.4125445480460573e-05, + "loss": 0.29487302899360657, + "step": 2987 + }, + { + "epoch": 0.7935201168503518, + "grad_norm": 1.1738297230948855, + "learning_rate": 1.4121445370371922e-05, + "loss": 0.3362561762332916, + "step": 2988 + }, + { + "epoch": 0.7937856858318948, + "grad_norm": 1.1591988507026376, + "learning_rate": 1.4117444465717209e-05, + "loss": 0.2986692488193512, + "step": 2989 + }, + { + "epoch": 0.7940512548134377, + "grad_norm": 1.0341012671875776, + "learning_rate": 1.4113442767267766e-05, + "loss": 0.2725266218185425, + "step": 2990 + }, + { + "epoch": 0.7943168237949807, + "grad_norm": 1.1125466640148414, + "learning_rate": 1.4109440275795071e-05, + "loss": 0.29827257990837097, + "step": 2991 + }, + { + "epoch": 0.7945823927765236, + "grad_norm": 1.0512885973195232, + "learning_rate": 1.410543699207076e-05, + "loss": 0.2506203055381775, + "step": 2992 + }, + { + "epoch": 0.7948479617580667, + "grad_norm": 0.9867416114744889, + "learning_rate": 1.410143291686661e-05, + "loss": 0.2675034701824188, + "step": 2993 + }, + { + "epoch": 0.7951135307396097, + "grad_norm": 1.1763547306282318, + "learning_rate": 1.4097428050954571e-05, + "loss": 0.34528690576553345, + "step": 2994 + }, + { + "epoch": 0.7953790997211526, + "grad_norm": 1.1374135219725177, + "learning_rate": 1.4093422395106726e-05, + "loss": 0.27551063895225525, + "step": 2995 + }, + { + "epoch": 0.7956446687026956, + "grad_norm": 1.1195982376159075, + "learning_rate": 1.408941595009532e-05, + "loss": 0.3176268935203552, + "step": 2996 + }, + { + "epoch": 0.7959102376842385, + "grad_norm": 1.1804373403956752, + "learning_rate": 1.408540871669275e-05, + "loss": 0.30056723952293396, + "step": 2997 + }, + { + "epoch": 0.7961758066657815, + "grad_norm": 1.124570387942151, + "learning_rate": 1.4081400695671562e-05, + "loss": 0.32109886407852173, + "step": 2998 + }, + { + "epoch": 0.7964413756473244, + "grad_norm": 1.1262740571855958, + "learning_rate": 1.4077391887804457e-05, + "loss": 0.33622005581855774, + "step": 2999 + }, + { + "epoch": 0.7967069446288674, + "grad_norm": 1.1195153536613822, + "learning_rate": 1.4073382293864283e-05, + "loss": 0.3054961860179901, + "step": 3000 + }, + { + "epoch": 0.7969725136104103, + "grad_norm": 1.1210721039096916, + "learning_rate": 1.4069371914624044e-05, + "loss": 0.3022462725639343, + "step": 3001 + }, + { + "epoch": 0.7972380825919533, + "grad_norm": 1.0116555063320039, + "learning_rate": 1.4065360750856891e-05, + "loss": 0.2500512897968292, + "step": 3002 + }, + { + "epoch": 0.7975036515734962, + "grad_norm": 1.233947002119444, + "learning_rate": 1.4061348803336135e-05, + "loss": 0.2960171699523926, + "step": 3003 + }, + { + "epoch": 0.7977692205550392, + "grad_norm": 3.53476121579318, + "learning_rate": 1.4057336072835228e-05, + "loss": 0.2941724359989166, + "step": 3004 + }, + { + "epoch": 0.7980347895365821, + "grad_norm": 1.0143157952003843, + "learning_rate": 1.4053322560127779e-05, + "loss": 0.2827858328819275, + "step": 3005 + }, + { + "epoch": 0.7983003585181251, + "grad_norm": 1.34417890867956, + "learning_rate": 1.4049308265987544e-05, + "loss": 0.32525116205215454, + "step": 3006 + }, + { + "epoch": 0.798565927499668, + "grad_norm": 1.1622605286979444, + "learning_rate": 1.4045293191188431e-05, + "loss": 0.26509979367256165, + "step": 3007 + }, + { + "epoch": 0.798831496481211, + "grad_norm": 1.1649049829769997, + "learning_rate": 1.4041277336504503e-05, + "loss": 0.3462742567062378, + "step": 3008 + }, + { + "epoch": 0.7990970654627539, + "grad_norm": 1.118975693723979, + "learning_rate": 1.4037260702709967e-05, + "loss": 0.2971092164516449, + "step": 3009 + }, + { + "epoch": 0.7993626344442969, + "grad_norm": 1.0541078602131526, + "learning_rate": 1.4033243290579182e-05, + "loss": 0.32359808683395386, + "step": 3010 + }, + { + "epoch": 0.7996282034258398, + "grad_norm": 0.9819968107477214, + "learning_rate": 1.4029225100886657e-05, + "loss": 0.2949031591415405, + "step": 3011 + }, + { + "epoch": 0.7998937724073828, + "grad_norm": 0.9639154080405838, + "learning_rate": 1.4025206134407051e-05, + "loss": 0.29888901114463806, + "step": 3012 + }, + { + "epoch": 0.8001593413889257, + "grad_norm": 1.0921369087209054, + "learning_rate": 1.4021186391915181e-05, + "loss": 0.2999705672264099, + "step": 3013 + }, + { + "epoch": 0.8004249103704687, + "grad_norm": 1.027092536189555, + "learning_rate": 1.4017165874185996e-05, + "loss": 0.2725638449192047, + "step": 3014 + }, + { + "epoch": 0.8006904793520117, + "grad_norm": 1.6251260873819724, + "learning_rate": 1.4013144581994609e-05, + "loss": 0.2809314727783203, + "step": 3015 + }, + { + "epoch": 0.8009560483335546, + "grad_norm": 1.194026798460289, + "learning_rate": 1.400912251611628e-05, + "loss": 0.30335327982902527, + "step": 3016 + }, + { + "epoch": 0.8012216173150976, + "grad_norm": 1.0526756572542106, + "learning_rate": 1.400509967732641e-05, + "loss": 0.27780598402023315, + "step": 3017 + }, + { + "epoch": 0.8014871862966405, + "grad_norm": 1.0036615790617616, + "learning_rate": 1.400107606640056e-05, + "loss": 0.2865309715270996, + "step": 3018 + }, + { + "epoch": 0.8017527552781835, + "grad_norm": 1.067182271229665, + "learning_rate": 1.3997051684114431e-05, + "loss": 0.2691546082496643, + "step": 3019 + }, + { + "epoch": 0.8020183242597264, + "grad_norm": 1.0174199108878024, + "learning_rate": 1.3993026531243876e-05, + "loss": 0.30289226770401, + "step": 3020 + }, + { + "epoch": 0.8022838932412695, + "grad_norm": 1.1180967643802684, + "learning_rate": 1.3989000608564905e-05, + "loss": 0.2767682671546936, + "step": 3021 + }, + { + "epoch": 0.8025494622228124, + "grad_norm": 1.1982508587685934, + "learning_rate": 1.3984973916853657e-05, + "loss": 0.3423742353916168, + "step": 3022 + }, + { + "epoch": 0.8028150312043554, + "grad_norm": 1.1718790013716964, + "learning_rate": 1.3980946456886439e-05, + "loss": 0.3000536561012268, + "step": 3023 + }, + { + "epoch": 0.8030806001858983, + "grad_norm": 1.1431161282459077, + "learning_rate": 1.3976918229439698e-05, + "loss": 0.3071063756942749, + "step": 3024 + }, + { + "epoch": 0.8033461691674413, + "grad_norm": 1.6885640285561154, + "learning_rate": 1.397288923529002e-05, + "loss": 0.31261157989501953, + "step": 3025 + }, + { + "epoch": 0.8036117381489842, + "grad_norm": 1.0076153318556622, + "learning_rate": 1.3968859475214156e-05, + "loss": 0.2658939063549042, + "step": 3026 + }, + { + "epoch": 0.8038773071305272, + "grad_norm": 1.0309089161631302, + "learning_rate": 1.3964828949988993e-05, + "loss": 0.2772905230522156, + "step": 3027 + }, + { + "epoch": 0.8041428761120701, + "grad_norm": 1.1271894525974708, + "learning_rate": 1.396079766039157e-05, + "loss": 0.2903479337692261, + "step": 3028 + }, + { + "epoch": 0.8044084450936131, + "grad_norm": 1.2165332424367126, + "learning_rate": 1.3956765607199069e-05, + "loss": 0.35709524154663086, + "step": 3029 + }, + { + "epoch": 0.804674014075156, + "grad_norm": 1.0863328323430816, + "learning_rate": 1.3952732791188828e-05, + "loss": 0.2929389774799347, + "step": 3030 + }, + { + "epoch": 0.804939583056699, + "grad_norm": 0.999480167032172, + "learning_rate": 1.3948699213138321e-05, + "loss": 0.2609884440898895, + "step": 3031 + }, + { + "epoch": 0.805205152038242, + "grad_norm": 1.0946442757602284, + "learning_rate": 1.394466487382518e-05, + "loss": 0.3026544749736786, + "step": 3032 + }, + { + "epoch": 0.8054707210197849, + "grad_norm": 1.0415601836945267, + "learning_rate": 1.394062977402717e-05, + "loss": 0.28281137347221375, + "step": 3033 + }, + { + "epoch": 0.8057362900013278, + "grad_norm": 0.9908513124522437, + "learning_rate": 1.3936593914522214e-05, + "loss": 0.26189178228378296, + "step": 3034 + }, + { + "epoch": 0.8060018589828708, + "grad_norm": 1.0541854732158313, + "learning_rate": 1.3932557296088383e-05, + "loss": 0.27987509965896606, + "step": 3035 + }, + { + "epoch": 0.8062674279644138, + "grad_norm": 0.9961129101435677, + "learning_rate": 1.3928519919503884e-05, + "loss": 0.2857724130153656, + "step": 3036 + }, + { + "epoch": 0.8065329969459567, + "grad_norm": 0.9752377302684325, + "learning_rate": 1.3924481785547076e-05, + "loss": 0.28102418780326843, + "step": 3037 + }, + { + "epoch": 0.8067985659274997, + "grad_norm": 1.06882045524996, + "learning_rate": 1.3920442894996464e-05, + "loss": 0.30250412225723267, + "step": 3038 + }, + { + "epoch": 0.8070641349090426, + "grad_norm": 0.9854538363943691, + "learning_rate": 1.3916403248630703e-05, + "loss": 0.28951483964920044, + "step": 3039 + }, + { + "epoch": 0.8073297038905856, + "grad_norm": 0.990016753911339, + "learning_rate": 1.3912362847228585e-05, + "loss": 0.28455328941345215, + "step": 3040 + }, + { + "epoch": 0.8075952728721285, + "grad_norm": 1.0887176497400486, + "learning_rate": 1.3908321691569048e-05, + "loss": 0.29541105031967163, + "step": 3041 + }, + { + "epoch": 0.8078608418536715, + "grad_norm": 1.162648796815669, + "learning_rate": 1.3904279782431187e-05, + "loss": 0.3057629466056824, + "step": 3042 + }, + { + "epoch": 0.8081264108352144, + "grad_norm": 1.0909846424659564, + "learning_rate": 1.3900237120594226e-05, + "loss": 0.3204082250595093, + "step": 3043 + }, + { + "epoch": 0.8083919798167574, + "grad_norm": 0.9793203113476959, + "learning_rate": 1.3896193706837551e-05, + "loss": 0.28629523515701294, + "step": 3044 + }, + { + "epoch": 0.8086575487983003, + "grad_norm": 1.1874958252714642, + "learning_rate": 1.389214954194068e-05, + "loss": 0.298164427280426, + "step": 3045 + }, + { + "epoch": 0.8089231177798433, + "grad_norm": 1.005892758898695, + "learning_rate": 1.3888104626683282e-05, + "loss": 0.27309298515319824, + "step": 3046 + }, + { + "epoch": 0.8091886867613862, + "grad_norm": 0.9950263488620656, + "learning_rate": 1.3884058961845166e-05, + "loss": 0.25635263323783875, + "step": 3047 + }, + { + "epoch": 0.8094542557429292, + "grad_norm": 1.002808171969614, + "learning_rate": 1.3880012548206292e-05, + "loss": 0.29926127195358276, + "step": 3048 + }, + { + "epoch": 0.8097198247244722, + "grad_norm": 0.9867331912864394, + "learning_rate": 1.387596538654676e-05, + "loss": 0.26633137464523315, + "step": 3049 + }, + { + "epoch": 0.8099853937060152, + "grad_norm": 1.0757993931692869, + "learning_rate": 1.387191747764681e-05, + "loss": 0.28725534677505493, + "step": 3050 + }, + { + "epoch": 0.8102509626875581, + "grad_norm": 1.4955713597704303, + "learning_rate": 1.3867868822286838e-05, + "loss": 0.3015314042568207, + "step": 3051 + }, + { + "epoch": 0.8105165316691011, + "grad_norm": 1.048643971484194, + "learning_rate": 1.3863819421247375e-05, + "loss": 0.3054691553115845, + "step": 3052 + }, + { + "epoch": 0.810782100650644, + "grad_norm": 1.1596568650600225, + "learning_rate": 1.3859769275309097e-05, + "loss": 0.26315444707870483, + "step": 3053 + }, + { + "epoch": 0.811047669632187, + "grad_norm": 1.024319547072995, + "learning_rate": 1.3855718385252824e-05, + "loss": 0.2973077595233917, + "step": 3054 + }, + { + "epoch": 0.81131323861373, + "grad_norm": 1.1845129171721744, + "learning_rate": 1.385166675185952e-05, + "loss": 0.32824432849884033, + "step": 3055 + }, + { + "epoch": 0.8115788075952729, + "grad_norm": 1.2351976774044444, + "learning_rate": 1.3847614375910292e-05, + "loss": 0.3127811849117279, + "step": 3056 + }, + { + "epoch": 0.8118443765768159, + "grad_norm": 1.0840317870226388, + "learning_rate": 1.384356125818639e-05, + "loss": 0.2631932497024536, + "step": 3057 + }, + { + "epoch": 0.8121099455583588, + "grad_norm": 1.0251225163823416, + "learning_rate": 1.3839507399469213e-05, + "loss": 0.2856106162071228, + "step": 3058 + }, + { + "epoch": 0.8123755145399018, + "grad_norm": 1.2604810760435325, + "learning_rate": 1.3835452800540288e-05, + "loss": 0.28986629843711853, + "step": 3059 + }, + { + "epoch": 0.8126410835214447, + "grad_norm": 1.0804422287227695, + "learning_rate": 1.3831397462181298e-05, + "loss": 0.28411972522735596, + "step": 3060 + }, + { + "epoch": 0.8129066525029877, + "grad_norm": 1.117697190248139, + "learning_rate": 1.3827341385174063e-05, + "loss": 0.3234354853630066, + "step": 3061 + }, + { + "epoch": 0.8131722214845306, + "grad_norm": 0.9917598533716923, + "learning_rate": 1.3823284570300551e-05, + "loss": 0.24779736995697021, + "step": 3062 + }, + { + "epoch": 0.8134377904660736, + "grad_norm": 1.1743500466494587, + "learning_rate": 1.3819227018342865e-05, + "loss": 0.3306904137134552, + "step": 3063 + }, + { + "epoch": 0.8137033594476165, + "grad_norm": 1.1120224667451313, + "learning_rate": 1.3815168730083254e-05, + "loss": 0.31705451011657715, + "step": 3064 + }, + { + "epoch": 0.8139689284291595, + "grad_norm": 1.1351768868234977, + "learning_rate": 1.3811109706304105e-05, + "loss": 0.29830047488212585, + "step": 3065 + }, + { + "epoch": 0.8142344974107024, + "grad_norm": 1.1496885073051233, + "learning_rate": 1.3807049947787954e-05, + "loss": 0.30605942010879517, + "step": 3066 + }, + { + "epoch": 0.8145000663922454, + "grad_norm": 1.0745429008877887, + "learning_rate": 1.3802989455317475e-05, + "loss": 0.3139193058013916, + "step": 3067 + }, + { + "epoch": 0.8147656353737883, + "grad_norm": 1.0541430221228831, + "learning_rate": 1.3798928229675478e-05, + "loss": 0.3175879716873169, + "step": 3068 + }, + { + "epoch": 0.8150312043553313, + "grad_norm": 1.0450888698469754, + "learning_rate": 1.3794866271644922e-05, + "loss": 0.26391106843948364, + "step": 3069 + }, + { + "epoch": 0.8152967733368742, + "grad_norm": 0.945534402365018, + "learning_rate": 1.3790803582008906e-05, + "loss": 0.24128863215446472, + "step": 3070 + }, + { + "epoch": 0.8155623423184172, + "grad_norm": 1.1627322372772537, + "learning_rate": 1.378674016155067e-05, + "loss": 0.3249368965625763, + "step": 3071 + }, + { + "epoch": 0.8158279112999601, + "grad_norm": 1.0060562228451158, + "learning_rate": 1.3782676011053592e-05, + "loss": 0.2871986925601959, + "step": 3072 + }, + { + "epoch": 0.8160934802815031, + "grad_norm": 1.1624248444882197, + "learning_rate": 1.377861113130119e-05, + "loss": 0.29047372937202454, + "step": 3073 + }, + { + "epoch": 0.816359049263046, + "grad_norm": 1.0925698386610025, + "learning_rate": 1.3774545523077122e-05, + "loss": 0.3055281341075897, + "step": 3074 + }, + { + "epoch": 0.816624618244589, + "grad_norm": 0.9197098274775629, + "learning_rate": 1.37704791871652e-05, + "loss": 0.2565494179725647, + "step": 3075 + }, + { + "epoch": 0.8168901872261319, + "grad_norm": 1.0377185359248249, + "learning_rate": 1.3766412124349358e-05, + "loss": 0.3016049861907959, + "step": 3076 + }, + { + "epoch": 0.8171557562076749, + "grad_norm": 1.0790995041055653, + "learning_rate": 1.3762344335413677e-05, + "loss": 0.3021200895309448, + "step": 3077 + }, + { + "epoch": 0.817421325189218, + "grad_norm": 1.0643017770253544, + "learning_rate": 1.3758275821142382e-05, + "loss": 0.3024774193763733, + "step": 3078 + }, + { + "epoch": 0.8176868941707609, + "grad_norm": 1.0591328005001268, + "learning_rate": 1.3754206582319836e-05, + "loss": 0.33114269375801086, + "step": 3079 + }, + { + "epoch": 0.8179524631523039, + "grad_norm": 1.0815809107319383, + "learning_rate": 1.3750136619730534e-05, + "loss": 0.27339494228363037, + "step": 3080 + }, + { + "epoch": 0.8182180321338468, + "grad_norm": 1.170674128986789, + "learning_rate": 1.3746065934159123e-05, + "loss": 0.2827128767967224, + "step": 3081 + }, + { + "epoch": 0.8184836011153898, + "grad_norm": 1.1064880736532463, + "learning_rate": 1.3741994526390379e-05, + "loss": 0.2972746193408966, + "step": 3082 + }, + { + "epoch": 0.8187491700969327, + "grad_norm": 1.143548636761381, + "learning_rate": 1.3737922397209222e-05, + "loss": 0.29932117462158203, + "step": 3083 + }, + { + "epoch": 0.8190147390784757, + "grad_norm": 1.0415876434255473, + "learning_rate": 1.3733849547400713e-05, + "loss": 0.28307998180389404, + "step": 3084 + }, + { + "epoch": 0.8192803080600186, + "grad_norm": 1.1070561443231863, + "learning_rate": 1.3729775977750048e-05, + "loss": 0.2885883152484894, + "step": 3085 + }, + { + "epoch": 0.8195458770415616, + "grad_norm": 1.1106477390667713, + "learning_rate": 1.3725701689042564e-05, + "loss": 0.28837913274765015, + "step": 3086 + }, + { + "epoch": 0.8198114460231045, + "grad_norm": 1.0553526039271008, + "learning_rate": 1.3721626682063733e-05, + "loss": 0.2775058150291443, + "step": 3087 + }, + { + "epoch": 0.8200770150046475, + "grad_norm": 1.153176622627066, + "learning_rate": 1.3717550957599172e-05, + "loss": 0.2813493609428406, + "step": 3088 + }, + { + "epoch": 0.8203425839861904, + "grad_norm": 1.1477738573738745, + "learning_rate": 1.371347451643463e-05, + "loss": 0.2677592933177948, + "step": 3089 + }, + { + "epoch": 0.8206081529677334, + "grad_norm": 1.184705398593534, + "learning_rate": 1.3709397359355998e-05, + "loss": 0.3104957938194275, + "step": 3090 + }, + { + "epoch": 0.8208737219492763, + "grad_norm": 1.1714327280441006, + "learning_rate": 1.3705319487149303e-05, + "loss": 0.29315799474716187, + "step": 3091 + }, + { + "epoch": 0.8211392909308193, + "grad_norm": 1.1179168081295616, + "learning_rate": 1.370124090060071e-05, + "loss": 0.3044348657131195, + "step": 3092 + }, + { + "epoch": 0.8214048599123622, + "grad_norm": 1.1122209585212142, + "learning_rate": 1.3697161600496525e-05, + "loss": 0.2918691635131836, + "step": 3093 + }, + { + "epoch": 0.8216704288939052, + "grad_norm": 1.0702091422822353, + "learning_rate": 1.3693081587623187e-05, + "loss": 0.2887750267982483, + "step": 3094 + }, + { + "epoch": 0.8219359978754481, + "grad_norm": 1.1155429990394359, + "learning_rate": 1.3689000862767274e-05, + "loss": 0.3055661916732788, + "step": 3095 + }, + { + "epoch": 0.8222015668569911, + "grad_norm": 1.0251756704247361, + "learning_rate": 1.3684919426715504e-05, + "loss": 0.271525114774704, + "step": 3096 + }, + { + "epoch": 0.822467135838534, + "grad_norm": 1.1269584199088303, + "learning_rate": 1.3680837280254726e-05, + "loss": 0.3220426142215729, + "step": 3097 + }, + { + "epoch": 0.822732704820077, + "grad_norm": 1.0149552227204566, + "learning_rate": 1.3676754424171935e-05, + "loss": 0.29091203212738037, + "step": 3098 + }, + { + "epoch": 0.8229982738016199, + "grad_norm": 1.051328362150218, + "learning_rate": 1.3672670859254252e-05, + "loss": 0.2928692102432251, + "step": 3099 + }, + { + "epoch": 0.8232638427831629, + "grad_norm": 1.0366528987524315, + "learning_rate": 1.3668586586288942e-05, + "loss": 0.28635919094085693, + "step": 3100 + }, + { + "epoch": 0.8235294117647058, + "grad_norm": 1.0374876833794577, + "learning_rate": 1.3664501606063402e-05, + "loss": 0.2912571430206299, + "step": 3101 + }, + { + "epoch": 0.8237949807462488, + "grad_norm": 1.051516198651511, + "learning_rate": 1.3660415919365178e-05, + "loss": 0.2783615291118622, + "step": 3102 + }, + { + "epoch": 0.8240605497277917, + "grad_norm": 1.088921494432588, + "learning_rate": 1.365632952698193e-05, + "loss": 0.3064395785331726, + "step": 3103 + }, + { + "epoch": 0.8243261187093347, + "grad_norm": 1.023130230207284, + "learning_rate": 1.3652242429701477e-05, + "loss": 0.2528907358646393, + "step": 3104 + }, + { + "epoch": 0.8245916876908777, + "grad_norm": 1.0503421945431453, + "learning_rate": 1.3648154628311754e-05, + "loss": 0.2648676633834839, + "step": 3105 + }, + { + "epoch": 0.8248572566724207, + "grad_norm": 1.2732480631249905, + "learning_rate": 1.3644066123600846e-05, + "loss": 0.33425620198249817, + "step": 3106 + }, + { + "epoch": 0.8251228256539637, + "grad_norm": 1.0925062122156084, + "learning_rate": 1.3639976916356965e-05, + "loss": 0.3108072280883789, + "step": 3107 + }, + { + "epoch": 0.8253883946355066, + "grad_norm": 1.0815679409684162, + "learning_rate": 1.3635887007368467e-05, + "loss": 0.2860543131828308, + "step": 3108 + }, + { + "epoch": 0.8256539636170496, + "grad_norm": 1.0711932859903586, + "learning_rate": 1.3631796397423833e-05, + "loss": 0.25440749526023865, + "step": 3109 + }, + { + "epoch": 0.8259195325985925, + "grad_norm": 1.1006663978120534, + "learning_rate": 1.3627705087311687e-05, + "loss": 0.2676115334033966, + "step": 3110 + }, + { + "epoch": 0.8261851015801355, + "grad_norm": 1.1597529133358384, + "learning_rate": 1.3623613077820788e-05, + "loss": 0.28977078199386597, + "step": 3111 + }, + { + "epoch": 0.8264506705616784, + "grad_norm": 1.1046761011596355, + "learning_rate": 1.361952036974002e-05, + "loss": 0.30161401629447937, + "step": 3112 + }, + { + "epoch": 0.8267162395432214, + "grad_norm": 1.135120464396266, + "learning_rate": 1.3615426963858416e-05, + "loss": 0.28676310181617737, + "step": 3113 + }, + { + "epoch": 0.8269818085247643, + "grad_norm": 1.100109147839879, + "learning_rate": 1.361133286096513e-05, + "loss": 0.2957243323326111, + "step": 3114 + }, + { + "epoch": 0.8272473775063073, + "grad_norm": 1.0691905028493969, + "learning_rate": 1.3607238061849461e-05, + "loss": 0.3036375343799591, + "step": 3115 + }, + { + "epoch": 0.8275129464878502, + "grad_norm": 1.1142331461612014, + "learning_rate": 1.360314256730084e-05, + "loss": 0.31175294518470764, + "step": 3116 + }, + { + "epoch": 0.8277785154693932, + "grad_norm": 1.0665802680669934, + "learning_rate": 1.3599046378108825e-05, + "loss": 0.30212485790252686, + "step": 3117 + }, + { + "epoch": 0.8280440844509361, + "grad_norm": 1.1992776426845386, + "learning_rate": 1.3594949495063117e-05, + "loss": 0.3290692865848541, + "step": 3118 + }, + { + "epoch": 0.8283096534324791, + "grad_norm": 1.007005509411099, + "learning_rate": 1.3590851918953542e-05, + "loss": 0.25952839851379395, + "step": 3119 + }, + { + "epoch": 0.828575222414022, + "grad_norm": 1.0949064818424232, + "learning_rate": 1.3586753650570069e-05, + "loss": 0.27737247943878174, + "step": 3120 + }, + { + "epoch": 0.828840791395565, + "grad_norm": 1.0156990629875267, + "learning_rate": 1.3582654690702795e-05, + "loss": 0.29415374994277954, + "step": 3121 + }, + { + "epoch": 0.8291063603771079, + "grad_norm": 1.066804105313739, + "learning_rate": 1.3578555040141948e-05, + "loss": 0.29197627305984497, + "step": 3122 + }, + { + "epoch": 0.8293719293586509, + "grad_norm": 1.1089730397237387, + "learning_rate": 1.3574454699677893e-05, + "loss": 0.30318522453308105, + "step": 3123 + }, + { + "epoch": 0.8296374983401938, + "grad_norm": 1.0916871079120407, + "learning_rate": 1.357035367010113e-05, + "loss": 0.3184241056442261, + "step": 3124 + }, + { + "epoch": 0.8299030673217368, + "grad_norm": 1.3286365770942894, + "learning_rate": 1.3566251952202288e-05, + "loss": 0.30330199003219604, + "step": 3125 + }, + { + "epoch": 0.8301686363032797, + "grad_norm": 1.1117453782986153, + "learning_rate": 1.356214954677213e-05, + "loss": 0.25366994738578796, + "step": 3126 + }, + { + "epoch": 0.8304342052848227, + "grad_norm": 1.109752753436135, + "learning_rate": 1.3558046454601552e-05, + "loss": 0.3213343918323517, + "step": 3127 + }, + { + "epoch": 0.8306997742663657, + "grad_norm": 1.0918389418395038, + "learning_rate": 1.355394267648158e-05, + "loss": 0.3012468218803406, + "step": 3128 + }, + { + "epoch": 0.8309653432479086, + "grad_norm": 1.1319633441718049, + "learning_rate": 1.3549838213203374e-05, + "loss": 0.3272971510887146, + "step": 3129 + }, + { + "epoch": 0.8312309122294516, + "grad_norm": 1.0778057413430624, + "learning_rate": 1.354573306555823e-05, + "loss": 0.30032482743263245, + "step": 3130 + }, + { + "epoch": 0.8314964812109945, + "grad_norm": 1.0778331818873157, + "learning_rate": 1.3541627234337567e-05, + "loss": 0.2820669412612915, + "step": 3131 + }, + { + "epoch": 0.8317620501925375, + "grad_norm": 1.0187129279356677, + "learning_rate": 1.3537520720332943e-05, + "loss": 0.2638673782348633, + "step": 3132 + }, + { + "epoch": 0.8320276191740804, + "grad_norm": 1.0843507637886551, + "learning_rate": 1.3533413524336043e-05, + "loss": 0.2766842246055603, + "step": 3133 + }, + { + "epoch": 0.8322931881556235, + "grad_norm": 1.2660530642163288, + "learning_rate": 1.3529305647138689e-05, + "loss": 0.330536425113678, + "step": 3134 + }, + { + "epoch": 0.8325587571371664, + "grad_norm": 1.0925834195413107, + "learning_rate": 1.3525197089532833e-05, + "loss": 0.30375364422798157, + "step": 3135 + }, + { + "epoch": 0.8328243261187094, + "grad_norm": 1.1657669106128519, + "learning_rate": 1.3521087852310555e-05, + "loss": 0.3092171549797058, + "step": 3136 + }, + { + "epoch": 0.8330898951002523, + "grad_norm": 1.1686338102407274, + "learning_rate": 1.3516977936264062e-05, + "loss": 0.28651195764541626, + "step": 3137 + }, + { + "epoch": 0.8333554640817953, + "grad_norm": 1.0845327487717817, + "learning_rate": 1.3512867342185705e-05, + "loss": 0.2882133722305298, + "step": 3138 + }, + { + "epoch": 0.8336210330633382, + "grad_norm": 1.1325019700739036, + "learning_rate": 1.3508756070867955e-05, + "loss": 0.30633628368377686, + "step": 3139 + }, + { + "epoch": 0.8338866020448812, + "grad_norm": 1.090943303162736, + "learning_rate": 1.3504644123103415e-05, + "loss": 0.2819565236568451, + "step": 3140 + }, + { + "epoch": 0.8341521710264241, + "grad_norm": 1.0804420637943886, + "learning_rate": 1.3500531499684819e-05, + "loss": 0.29544374346733093, + "step": 3141 + }, + { + "epoch": 0.8344177400079671, + "grad_norm": 1.10400689114043, + "learning_rate": 1.3496418201405037e-05, + "loss": 0.29383376240730286, + "step": 3142 + }, + { + "epoch": 0.83468330898951, + "grad_norm": 0.9862964562028984, + "learning_rate": 1.3492304229057062e-05, + "loss": 0.24945983290672302, + "step": 3143 + }, + { + "epoch": 0.834948877971053, + "grad_norm": 1.2055608503616826, + "learning_rate": 1.3488189583434023e-05, + "loss": 0.338919997215271, + "step": 3144 + }, + { + "epoch": 0.835214446952596, + "grad_norm": 1.071166648249549, + "learning_rate": 1.348407426532917e-05, + "loss": 0.29555821418762207, + "step": 3145 + }, + { + "epoch": 0.8354800159341389, + "grad_norm": 1.0650010322896095, + "learning_rate": 1.3479958275535887e-05, + "loss": 0.31038299202919006, + "step": 3146 + }, + { + "epoch": 0.8357455849156818, + "grad_norm": 1.021351909092412, + "learning_rate": 1.347584161484769e-05, + "loss": 0.2595089077949524, + "step": 3147 + }, + { + "epoch": 0.8360111538972248, + "grad_norm": 1.1885926674667484, + "learning_rate": 1.3471724284058227e-05, + "loss": 0.3287338614463806, + "step": 3148 + }, + { + "epoch": 0.8362767228787678, + "grad_norm": 1.1997618392346763, + "learning_rate": 1.3467606283961268e-05, + "loss": 0.3109680414199829, + "step": 3149 + }, + { + "epoch": 0.8365422918603107, + "grad_norm": 1.0762954067078139, + "learning_rate": 1.346348761535071e-05, + "loss": 0.2584227919578552, + "step": 3150 + }, + { + "epoch": 0.8368078608418537, + "grad_norm": 1.137771769139511, + "learning_rate": 1.345936827902059e-05, + "loss": 0.3038554787635803, + "step": 3151 + }, + { + "epoch": 0.8370734298233966, + "grad_norm": 1.029659281383911, + "learning_rate": 1.3455248275765067e-05, + "loss": 0.28267812728881836, + "step": 3152 + }, + { + "epoch": 0.8373389988049396, + "grad_norm": 1.163661242492436, + "learning_rate": 1.3451127606378425e-05, + "loss": 0.3328094184398651, + "step": 3153 + }, + { + "epoch": 0.8376045677864825, + "grad_norm": 1.084045978606854, + "learning_rate": 1.3447006271655082e-05, + "loss": 0.3235865533351898, + "step": 3154 + }, + { + "epoch": 0.8378701367680255, + "grad_norm": 1.037100355990568, + "learning_rate": 1.3442884272389583e-05, + "loss": 0.25394493341445923, + "step": 3155 + }, + { + "epoch": 0.8381357057495684, + "grad_norm": 1.1250984496593863, + "learning_rate": 1.3438761609376604e-05, + "loss": 0.29841768741607666, + "step": 3156 + }, + { + "epoch": 0.8384012747311114, + "grad_norm": 1.1999100818775306, + "learning_rate": 1.3434638283410942e-05, + "loss": 0.3161924183368683, + "step": 3157 + }, + { + "epoch": 0.8386668437126543, + "grad_norm": 0.9017579941601053, + "learning_rate": 1.3430514295287526e-05, + "loss": 0.22781039774417877, + "step": 3158 + }, + { + "epoch": 0.8389324126941973, + "grad_norm": 1.0534948555265085, + "learning_rate": 1.3426389645801415e-05, + "loss": 0.2947984039783478, + "step": 3159 + }, + { + "epoch": 0.8391979816757402, + "grad_norm": 1.0286789238265646, + "learning_rate": 1.342226433574779e-05, + "loss": 0.2827467918395996, + "step": 3160 + }, + { + "epoch": 0.8394635506572832, + "grad_norm": 1.0453932660244052, + "learning_rate": 1.3418138365921962e-05, + "loss": 0.3149232268333435, + "step": 3161 + }, + { + "epoch": 0.8397291196388262, + "grad_norm": 1.2487567497076437, + "learning_rate": 1.3414011737119373e-05, + "loss": 0.33154603838920593, + "step": 3162 + }, + { + "epoch": 0.8399946886203692, + "grad_norm": 1.074983718750332, + "learning_rate": 1.3409884450135581e-05, + "loss": 0.28532034158706665, + "step": 3163 + }, + { + "epoch": 0.8402602576019121, + "grad_norm": 1.0695327636228384, + "learning_rate": 1.3405756505766286e-05, + "loss": 0.2539500892162323, + "step": 3164 + }, + { + "epoch": 0.8405258265834551, + "grad_norm": 1.0653532722719707, + "learning_rate": 1.3401627904807302e-05, + "loss": 0.3023888170719147, + "step": 3165 + }, + { + "epoch": 0.840791395564998, + "grad_norm": 1.0811844194203637, + "learning_rate": 1.3397498648054579e-05, + "loss": 0.3088506758213043, + "step": 3166 + }, + { + "epoch": 0.841056964546541, + "grad_norm": 1.2249048833028835, + "learning_rate": 1.3393368736304184e-05, + "loss": 0.3223467469215393, + "step": 3167 + }, + { + "epoch": 0.841322533528084, + "grad_norm": 1.0772937869709083, + "learning_rate": 1.3389238170352318e-05, + "loss": 0.2541419565677643, + "step": 3168 + }, + { + "epoch": 0.8415881025096269, + "grad_norm": 1.0463826735598363, + "learning_rate": 1.3385106950995308e-05, + "loss": 0.2915497422218323, + "step": 3169 + }, + { + "epoch": 0.8418536714911699, + "grad_norm": 1.1726858597591174, + "learning_rate": 1.3380975079029598e-05, + "loss": 0.2907465994358063, + "step": 3170 + }, + { + "epoch": 0.8421192404727128, + "grad_norm": 1.0581221380369799, + "learning_rate": 1.337684255525177e-05, + "loss": 0.2587417960166931, + "step": 3171 + }, + { + "epoch": 0.8423848094542558, + "grad_norm": 1.1080472137531636, + "learning_rate": 1.3372709380458522e-05, + "loss": 0.2932469844818115, + "step": 3172 + }, + { + "epoch": 0.8426503784357987, + "grad_norm": 1.2359417241278925, + "learning_rate": 1.3368575555446681e-05, + "loss": 0.31451860070228577, + "step": 3173 + }, + { + "epoch": 0.8429159474173417, + "grad_norm": 1.067745190297883, + "learning_rate": 1.3364441081013205e-05, + "loss": 0.24513742327690125, + "step": 3174 + }, + { + "epoch": 0.8431815163988846, + "grad_norm": 1.0795526820997523, + "learning_rate": 1.3360305957955166e-05, + "loss": 0.29781201481819153, + "step": 3175 + }, + { + "epoch": 0.8434470853804276, + "grad_norm": 1.3176130252584213, + "learning_rate": 1.3356170187069775e-05, + "loss": 0.30925726890563965, + "step": 3176 + }, + { + "epoch": 0.8437126543619705, + "grad_norm": 1.1110632932678028, + "learning_rate": 1.3352033769154347e-05, + "loss": 0.2822851538658142, + "step": 3177 + }, + { + "epoch": 0.8439782233435135, + "grad_norm": 1.0033731418220575, + "learning_rate": 1.3347896705006344e-05, + "loss": 0.2511071264743805, + "step": 3178 + }, + { + "epoch": 0.8442437923250564, + "grad_norm": 1.1921629041957855, + "learning_rate": 1.3343758995423344e-05, + "loss": 0.3002505302429199, + "step": 3179 + }, + { + "epoch": 0.8445093613065994, + "grad_norm": 0.9942107511416755, + "learning_rate": 1.3339620641203043e-05, + "loss": 0.285504549741745, + "step": 3180 + }, + { + "epoch": 0.8447749302881423, + "grad_norm": 1.1880306222164103, + "learning_rate": 1.3335481643143271e-05, + "loss": 0.31988856196403503, + "step": 3181 + }, + { + "epoch": 0.8450404992696853, + "grad_norm": 1.0905691447057935, + "learning_rate": 1.3331342002041973e-05, + "loss": 0.29330819845199585, + "step": 3182 + }, + { + "epoch": 0.8453060682512282, + "grad_norm": 1.049547579497453, + "learning_rate": 1.3327201718697232e-05, + "loss": 0.28694427013397217, + "step": 3183 + }, + { + "epoch": 0.8455716372327712, + "grad_norm": 1.0561569710297949, + "learning_rate": 1.3323060793907239e-05, + "loss": 0.24912211298942566, + "step": 3184 + }, + { + "epoch": 0.8458372062143141, + "grad_norm": 1.1346018526864223, + "learning_rate": 1.3318919228470315e-05, + "loss": 0.28117647767066956, + "step": 3185 + }, + { + "epoch": 0.8461027751958571, + "grad_norm": 1.2524387900920857, + "learning_rate": 1.3314777023184907e-05, + "loss": 0.3176446557044983, + "step": 3186 + }, + { + "epoch": 0.8463683441774, + "grad_norm": 1.0728463380702977, + "learning_rate": 1.3310634178849583e-05, + "loss": 0.31205689907073975, + "step": 3187 + }, + { + "epoch": 0.846633913158943, + "grad_norm": 1.1500545538779043, + "learning_rate": 1.3306490696263034e-05, + "loss": 0.29942232370376587, + "step": 3188 + }, + { + "epoch": 0.8468994821404859, + "grad_norm": 1.161750107962421, + "learning_rate": 1.3302346576224077e-05, + "loss": 0.3149508833885193, + "step": 3189 + }, + { + "epoch": 0.847165051122029, + "grad_norm": 1.0924626607758976, + "learning_rate": 1.3298201819531646e-05, + "loss": 0.2930619418621063, + "step": 3190 + }, + { + "epoch": 0.847430620103572, + "grad_norm": 1.0958680594537196, + "learning_rate": 1.3294056426984804e-05, + "loss": 0.3089582920074463, + "step": 3191 + }, + { + "epoch": 0.8476961890851149, + "grad_norm": 1.2175163313381927, + "learning_rate": 1.3289910399382733e-05, + "loss": 0.3120991587638855, + "step": 3192 + }, + { + "epoch": 0.8479617580666579, + "grad_norm": 1.0535688994558223, + "learning_rate": 1.3285763737524738e-05, + "loss": 0.2728833258152008, + "step": 3193 + }, + { + "epoch": 0.8482273270482008, + "grad_norm": 1.0457465617551238, + "learning_rate": 1.3281616442210246e-05, + "loss": 0.2833358347415924, + "step": 3194 + }, + { + "epoch": 0.8484928960297438, + "grad_norm": 1.0714039101779447, + "learning_rate": 1.3277468514238803e-05, + "loss": 0.26218950748443604, + "step": 3195 + }, + { + "epoch": 0.8487584650112867, + "grad_norm": 1.0938436245702892, + "learning_rate": 1.3273319954410088e-05, + "loss": 0.3120720386505127, + "step": 3196 + }, + { + "epoch": 0.8490240339928297, + "grad_norm": 1.0412833763909957, + "learning_rate": 1.3269170763523892e-05, + "loss": 0.2748696208000183, + "step": 3197 + }, + { + "epoch": 0.8492896029743726, + "grad_norm": 1.0148051769031237, + "learning_rate": 1.326502094238013e-05, + "loss": 0.2892690598964691, + "step": 3198 + }, + { + "epoch": 0.8495551719559156, + "grad_norm": 1.068648430192615, + "learning_rate": 1.3260870491778835e-05, + "loss": 0.26583510637283325, + "step": 3199 + }, + { + "epoch": 0.8498207409374585, + "grad_norm": 1.105620955007001, + "learning_rate": 1.325671941252017e-05, + "loss": 0.31602388620376587, + "step": 3200 + }, + { + "epoch": 0.8500863099190015, + "grad_norm": 1.068517421778971, + "learning_rate": 1.3252567705404409e-05, + "loss": 0.2980017364025116, + "step": 3201 + }, + { + "epoch": 0.8503518789005444, + "grad_norm": 1.0740685936810315, + "learning_rate": 1.3248415371231957e-05, + "loss": 0.27081727981567383, + "step": 3202 + }, + { + "epoch": 0.8506174478820874, + "grad_norm": 1.2590520587844396, + "learning_rate": 1.3244262410803333e-05, + "loss": 0.28895002603530884, + "step": 3203 + }, + { + "epoch": 0.8508830168636303, + "grad_norm": 1.1373552047630993, + "learning_rate": 1.3240108824919176e-05, + "loss": 0.30804315209388733, + "step": 3204 + }, + { + "epoch": 0.8511485858451733, + "grad_norm": 1.1074447190812993, + "learning_rate": 1.3235954614380253e-05, + "loss": 0.28173667192459106, + "step": 3205 + }, + { + "epoch": 0.8514141548267162, + "grad_norm": 1.097058715769224, + "learning_rate": 1.3231799779987445e-05, + "loss": 0.3113047778606415, + "step": 3206 + }, + { + "epoch": 0.8516797238082592, + "grad_norm": 1.0285862677327642, + "learning_rate": 1.3227644322541754e-05, + "loss": 0.247248113155365, + "step": 3207 + }, + { + "epoch": 0.8519452927898021, + "grad_norm": 1.1032823581833329, + "learning_rate": 1.3223488242844309e-05, + "loss": 0.27078187465667725, + "step": 3208 + }, + { + "epoch": 0.8522108617713451, + "grad_norm": 1.0635139884249352, + "learning_rate": 1.321933154169634e-05, + "loss": 0.2749357223510742, + "step": 3209 + }, + { + "epoch": 0.852476430752888, + "grad_norm": 1.0129100217319345, + "learning_rate": 1.3215174219899224e-05, + "loss": 0.25382956862449646, + "step": 3210 + }, + { + "epoch": 0.852741999734431, + "grad_norm": 1.0528151094235563, + "learning_rate": 1.3211016278254436e-05, + "loss": 0.3237685263156891, + "step": 3211 + }, + { + "epoch": 0.8530075687159739, + "grad_norm": 1.273911241149791, + "learning_rate": 1.3206857717563581e-05, + "loss": 0.2899032235145569, + "step": 3212 + }, + { + "epoch": 0.8532731376975169, + "grad_norm": 1.040323856520164, + "learning_rate": 1.3202698538628376e-05, + "loss": 0.25997933745384216, + "step": 3213 + }, + { + "epoch": 0.8535387066790598, + "grad_norm": 1.121125084608177, + "learning_rate": 1.3198538742250668e-05, + "loss": 0.3228183090686798, + "step": 3214 + }, + { + "epoch": 0.8538042756606028, + "grad_norm": 1.1002230220524851, + "learning_rate": 1.3194378329232413e-05, + "loss": 0.31993368268013, + "step": 3215 + }, + { + "epoch": 0.8540698446421457, + "grad_norm": 1.157115702913611, + "learning_rate": 1.3190217300375694e-05, + "loss": 0.29520007967948914, + "step": 3216 + }, + { + "epoch": 0.8543354136236887, + "grad_norm": 1.0898926058638614, + "learning_rate": 1.3186055656482702e-05, + "loss": 0.31073522567749023, + "step": 3217 + }, + { + "epoch": 0.8546009826052318, + "grad_norm": 1.1465583376043518, + "learning_rate": 1.3181893398355752e-05, + "loss": 0.34354183077812195, + "step": 3218 + }, + { + "epoch": 0.8548665515867747, + "grad_norm": 1.179928846812524, + "learning_rate": 1.3177730526797286e-05, + "loss": 0.27676698565483093, + "step": 3219 + }, + { + "epoch": 0.8551321205683177, + "grad_norm": 1.0792983255501365, + "learning_rate": 1.3173567042609852e-05, + "loss": 0.27313530445098877, + "step": 3220 + }, + { + "epoch": 0.8553976895498606, + "grad_norm": 0.9249374113484707, + "learning_rate": 1.3169402946596119e-05, + "loss": 0.2517555058002472, + "step": 3221 + }, + { + "epoch": 0.8556632585314036, + "grad_norm": 1.0684778793194236, + "learning_rate": 1.3165238239558878e-05, + "loss": 0.29700207710266113, + "step": 3222 + }, + { + "epoch": 0.8559288275129465, + "grad_norm": 1.1262235464302217, + "learning_rate": 1.3161072922301037e-05, + "loss": 0.3182620704174042, + "step": 3223 + }, + { + "epoch": 0.8561943964944895, + "grad_norm": 1.123570804553303, + "learning_rate": 1.3156906995625615e-05, + "loss": 0.3112961947917938, + "step": 3224 + }, + { + "epoch": 0.8564599654760324, + "grad_norm": 1.1746597736734636, + "learning_rate": 1.3152740460335757e-05, + "loss": 0.3080563545227051, + "step": 3225 + }, + { + "epoch": 0.8567255344575754, + "grad_norm": 1.1646363575237453, + "learning_rate": 1.3148573317234726e-05, + "loss": 0.31197935342788696, + "step": 3226 + }, + { + "epoch": 0.8569911034391183, + "grad_norm": 1.0455051980244612, + "learning_rate": 1.3144405567125886e-05, + "loss": 0.27377086877822876, + "step": 3227 + }, + { + "epoch": 0.8572566724206613, + "grad_norm": 1.050528412475655, + "learning_rate": 1.3140237210812741e-05, + "loss": 0.25303182005882263, + "step": 3228 + }, + { + "epoch": 0.8575222414022042, + "grad_norm": 1.0664458431943622, + "learning_rate": 1.3136068249098899e-05, + "loss": 0.27949726581573486, + "step": 3229 + }, + { + "epoch": 0.8577878103837472, + "grad_norm": 1.0907347405782384, + "learning_rate": 1.3131898682788082e-05, + "loss": 0.278359055519104, + "step": 3230 + }, + { + "epoch": 0.8580533793652901, + "grad_norm": 1.081462335761227, + "learning_rate": 1.312772851268414e-05, + "loss": 0.28507643938064575, + "step": 3231 + }, + { + "epoch": 0.8583189483468331, + "grad_norm": 1.0256133822907842, + "learning_rate": 1.3123557739591026e-05, + "loss": 0.2689790427684784, + "step": 3232 + }, + { + "epoch": 0.858584517328376, + "grad_norm": 1.1569049456144243, + "learning_rate": 1.3119386364312821e-05, + "loss": 0.31956973671913147, + "step": 3233 + }, + { + "epoch": 0.858850086309919, + "grad_norm": 1.0914807974802394, + "learning_rate": 1.3115214387653711e-05, + "loss": 0.2837323546409607, + "step": 3234 + }, + { + "epoch": 0.8591156552914619, + "grad_norm": 1.0015578039784754, + "learning_rate": 1.3111041810418011e-05, + "loss": 0.2756272554397583, + "step": 3235 + }, + { + "epoch": 0.8593812242730049, + "grad_norm": 1.0283979772106548, + "learning_rate": 1.3106868633410139e-05, + "loss": 0.2664923369884491, + "step": 3236 + }, + { + "epoch": 0.8596467932545478, + "grad_norm": 1.2217960050611696, + "learning_rate": 1.3102694857434637e-05, + "loss": 0.2842246890068054, + "step": 3237 + }, + { + "epoch": 0.8599123622360908, + "grad_norm": 1.0632739499737671, + "learning_rate": 1.3098520483296159e-05, + "loss": 0.3066467344760895, + "step": 3238 + }, + { + "epoch": 0.8601779312176338, + "grad_norm": 1.148754786147734, + "learning_rate": 1.3094345511799478e-05, + "loss": 0.3042510151863098, + "step": 3239 + }, + { + "epoch": 0.8604435001991767, + "grad_norm": 0.9995895975923785, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.2753696143627167, + "step": 3240 + }, + { + "epoch": 0.8607090691807197, + "grad_norm": 1.0325788591675433, + "learning_rate": 1.3085993779951154e-05, + "loss": 0.2561766803264618, + "step": 3241 + }, + { + "epoch": 0.8609746381622626, + "grad_norm": 1.2136300404308455, + "learning_rate": 1.3081817021209626e-05, + "loss": 0.297982782125473, + "step": 3242 + }, + { + "epoch": 0.8612402071438056, + "grad_norm": 1.0615498924909679, + "learning_rate": 1.3077639668330124e-05, + "loss": 0.2961920499801636, + "step": 3243 + }, + { + "epoch": 0.8615057761253485, + "grad_norm": 1.1445145037694135, + "learning_rate": 1.3073461722117991e-05, + "loss": 0.2868857979774475, + "step": 3244 + }, + { + "epoch": 0.8617713451068915, + "grad_norm": 0.9475657969770804, + "learning_rate": 1.3069283183378683e-05, + "loss": 0.22930951416492462, + "step": 3245 + }, + { + "epoch": 0.8620369140884345, + "grad_norm": 1.1416904771862697, + "learning_rate": 1.306510405291778e-05, + "loss": 0.29737964272499084, + "step": 3246 + }, + { + "epoch": 0.8623024830699775, + "grad_norm": 1.0401904023883137, + "learning_rate": 1.3060924331540964e-05, + "loss": 0.2764522433280945, + "step": 3247 + }, + { + "epoch": 0.8625680520515204, + "grad_norm": 0.9863739655208709, + "learning_rate": 1.3056744020054039e-05, + "loss": 0.27608832716941833, + "step": 3248 + }, + { + "epoch": 0.8628336210330634, + "grad_norm": 1.0115944755696356, + "learning_rate": 1.3052563119262915e-05, + "loss": 0.25667035579681396, + "step": 3249 + }, + { + "epoch": 0.8630991900146063, + "grad_norm": 1.1289498412687866, + "learning_rate": 1.3048381629973622e-05, + "loss": 0.3015863597393036, + "step": 3250 + }, + { + "epoch": 0.8633647589961493, + "grad_norm": 1.123802742380982, + "learning_rate": 1.3044199552992307e-05, + "loss": 0.2798422873020172, + "step": 3251 + }, + { + "epoch": 0.8636303279776922, + "grad_norm": 1.1385670465264601, + "learning_rate": 1.304001688912522e-05, + "loss": 0.2856596112251282, + "step": 3252 + }, + { + "epoch": 0.8638958969592352, + "grad_norm": 1.2094473565150297, + "learning_rate": 1.303583363917873e-05, + "loss": 0.30247554183006287, + "step": 3253 + }, + { + "epoch": 0.8641614659407781, + "grad_norm": 1.1517937069448307, + "learning_rate": 1.303164980395932e-05, + "loss": 0.26817965507507324, + "step": 3254 + }, + { + "epoch": 0.8644270349223211, + "grad_norm": 1.197653632931973, + "learning_rate": 1.3027465384273579e-05, + "loss": 0.26919034123420715, + "step": 3255 + }, + { + "epoch": 0.864692603903864, + "grad_norm": 1.1206851183742237, + "learning_rate": 1.3023280380928223e-05, + "loss": 0.29495447874069214, + "step": 3256 + }, + { + "epoch": 0.864958172885407, + "grad_norm": 1.0428738517831404, + "learning_rate": 1.3019094794730063e-05, + "loss": 0.26766717433929443, + "step": 3257 + }, + { + "epoch": 0.86522374186695, + "grad_norm": 0.9998039586765358, + "learning_rate": 1.3014908626486032e-05, + "loss": 0.2573341131210327, + "step": 3258 + }, + { + "epoch": 0.8654893108484929, + "grad_norm": 1.226366277313196, + "learning_rate": 1.3010721877003177e-05, + "loss": 0.32776498794555664, + "step": 3259 + }, + { + "epoch": 0.8657548798300359, + "grad_norm": 1.1631189448763641, + "learning_rate": 1.3006534547088651e-05, + "loss": 0.3107950687408447, + "step": 3260 + }, + { + "epoch": 0.8660204488115788, + "grad_norm": 1.0476224109192296, + "learning_rate": 1.3002346637549726e-05, + "loss": 0.26143360137939453, + "step": 3261 + }, + { + "epoch": 0.8662860177931218, + "grad_norm": 1.035123297672666, + "learning_rate": 1.2998158149193773e-05, + "loss": 0.25666722655296326, + "step": 3262 + }, + { + "epoch": 0.8665515867746647, + "grad_norm": 1.1492097701405037, + "learning_rate": 1.2993969082828296e-05, + "loss": 0.2982695698738098, + "step": 3263 + }, + { + "epoch": 0.8668171557562077, + "grad_norm": 1.0937256102841277, + "learning_rate": 1.2989779439260888e-05, + "loss": 0.30144304037094116, + "step": 3264 + }, + { + "epoch": 0.8670827247377506, + "grad_norm": 1.0563159913050848, + "learning_rate": 1.2985589219299264e-05, + "loss": 0.30421534180641174, + "step": 3265 + }, + { + "epoch": 0.8673482937192936, + "grad_norm": 1.0698350081311019, + "learning_rate": 1.298139842375125e-05, + "loss": 0.23653842508792877, + "step": 3266 + }, + { + "epoch": 0.8676138627008365, + "grad_norm": 1.2059661362441823, + "learning_rate": 1.2977207053424781e-05, + "loss": 0.284118115901947, + "step": 3267 + }, + { + "epoch": 0.8678794316823795, + "grad_norm": 1.0387152548948486, + "learning_rate": 1.2973015109127907e-05, + "loss": 0.30857348442077637, + "step": 3268 + }, + { + "epoch": 0.8681450006639224, + "grad_norm": 1.0987728632322369, + "learning_rate": 1.2968822591668784e-05, + "loss": 0.2826589047908783, + "step": 3269 + }, + { + "epoch": 0.8684105696454654, + "grad_norm": 1.109218087764862, + "learning_rate": 1.2964629501855678e-05, + "loss": 0.27634552121162415, + "step": 3270 + }, + { + "epoch": 0.8686761386270083, + "grad_norm": 1.0217259699141916, + "learning_rate": 1.296043584049697e-05, + "loss": 0.25823545455932617, + "step": 3271 + }, + { + "epoch": 0.8689417076085513, + "grad_norm": 1.148249635090711, + "learning_rate": 1.2956241608401145e-05, + "loss": 0.28939294815063477, + "step": 3272 + }, + { + "epoch": 0.8692072765900942, + "grad_norm": 1.0622455952024017, + "learning_rate": 1.2952046806376806e-05, + "loss": 0.3042459785938263, + "step": 3273 + }, + { + "epoch": 0.8694728455716373, + "grad_norm": 1.042505415392428, + "learning_rate": 1.2947851435232658e-05, + "loss": 0.2834415137767792, + "step": 3274 + }, + { + "epoch": 0.8697384145531802, + "grad_norm": 1.144903021800522, + "learning_rate": 1.2943655495777518e-05, + "loss": 0.28226330876350403, + "step": 3275 + }, + { + "epoch": 0.8700039835347232, + "grad_norm": 1.023547316743189, + "learning_rate": 1.2939458988820317e-05, + "loss": 0.2796105742454529, + "step": 3276 + }, + { + "epoch": 0.8702695525162661, + "grad_norm": 0.9903193313068561, + "learning_rate": 1.2935261915170091e-05, + "loss": 0.24790553748607635, + "step": 3277 + }, + { + "epoch": 0.8705351214978091, + "grad_norm": 1.0279177898991045, + "learning_rate": 1.2931064275635987e-05, + "loss": 0.25101587176322937, + "step": 3278 + }, + { + "epoch": 0.870800690479352, + "grad_norm": 1.1728597267839225, + "learning_rate": 1.2926866071027257e-05, + "loss": 0.3060816526412964, + "step": 3279 + }, + { + "epoch": 0.871066259460895, + "grad_norm": 1.1510511467115991, + "learning_rate": 1.2922667302153268e-05, + "loss": 0.3137212097644806, + "step": 3280 + }, + { + "epoch": 0.871331828442438, + "grad_norm": 0.9977159840643061, + "learning_rate": 1.2918467969823497e-05, + "loss": 0.2391548752784729, + "step": 3281 + }, + { + "epoch": 0.8715973974239809, + "grad_norm": 1.2003880700717509, + "learning_rate": 1.2914268074847516e-05, + "loss": 0.3219330608844757, + "step": 3282 + }, + { + "epoch": 0.8718629664055239, + "grad_norm": 1.126134187698585, + "learning_rate": 1.2910067618035025e-05, + "loss": 0.2934436798095703, + "step": 3283 + }, + { + "epoch": 0.8721285353870668, + "grad_norm": 1.2016016844780073, + "learning_rate": 1.2905866600195815e-05, + "loss": 0.2919486165046692, + "step": 3284 + }, + { + "epoch": 0.8723941043686098, + "grad_norm": 1.1895929482131946, + "learning_rate": 1.2901665022139796e-05, + "loss": 0.2840641438961029, + "step": 3285 + }, + { + "epoch": 0.8726596733501527, + "grad_norm": 1.0215741253911979, + "learning_rate": 1.2897462884676983e-05, + "loss": 0.24151530861854553, + "step": 3286 + }, + { + "epoch": 0.8729252423316957, + "grad_norm": 1.0040194757671277, + "learning_rate": 1.28932601886175e-05, + "loss": 0.24515505135059357, + "step": 3287 + }, + { + "epoch": 0.8731908113132386, + "grad_norm": 1.2173512735867882, + "learning_rate": 1.2889056934771577e-05, + "loss": 0.2561264634132385, + "step": 3288 + }, + { + "epoch": 0.8734563802947816, + "grad_norm": 1.1645401251165897, + "learning_rate": 1.2884853123949547e-05, + "loss": 0.2798641622066498, + "step": 3289 + }, + { + "epoch": 0.8737219492763245, + "grad_norm": 1.2693161910394721, + "learning_rate": 1.288064875696186e-05, + "loss": 0.35207298398017883, + "step": 3290 + }, + { + "epoch": 0.8739875182578675, + "grad_norm": 1.0184365377421387, + "learning_rate": 1.2876443834619066e-05, + "loss": 0.2778821289539337, + "step": 3291 + }, + { + "epoch": 0.8742530872394104, + "grad_norm": 1.044209880952949, + "learning_rate": 1.2872238357731825e-05, + "loss": 0.2691737413406372, + "step": 3292 + }, + { + "epoch": 0.8745186562209534, + "grad_norm": 1.1392637940929287, + "learning_rate": 1.2868032327110904e-05, + "loss": 0.25476595759391785, + "step": 3293 + }, + { + "epoch": 0.8747842252024963, + "grad_norm": 1.012064080488804, + "learning_rate": 1.2863825743567174e-05, + "loss": 0.258474737405777, + "step": 3294 + }, + { + "epoch": 0.8750497941840393, + "grad_norm": 1.17733236715245, + "learning_rate": 1.285961860791162e-05, + "loss": 0.32421568036079407, + "step": 3295 + }, + { + "epoch": 0.8753153631655822, + "grad_norm": 1.0747747984737868, + "learning_rate": 1.2855410920955323e-05, + "loss": 0.3090333342552185, + "step": 3296 + }, + { + "epoch": 0.8755809321471252, + "grad_norm": 1.1729934635240566, + "learning_rate": 1.2851202683509476e-05, + "loss": 0.26548707485198975, + "step": 3297 + }, + { + "epoch": 0.8758465011286681, + "grad_norm": 2.497627852681845, + "learning_rate": 1.2846993896385378e-05, + "loss": 0.3002355098724365, + "step": 3298 + }, + { + "epoch": 0.8761120701102111, + "grad_norm": 1.1706582997439863, + "learning_rate": 1.2842784560394433e-05, + "loss": 0.2924933135509491, + "step": 3299 + }, + { + "epoch": 0.876377639091754, + "grad_norm": 1.1544391256229967, + "learning_rate": 1.2838574676348155e-05, + "loss": 0.2886514663696289, + "step": 3300 + }, + { + "epoch": 0.876643208073297, + "grad_norm": 1.1131138367993383, + "learning_rate": 1.2834364245058155e-05, + "loss": 0.29821154475212097, + "step": 3301 + }, + { + "epoch": 0.87690877705484, + "grad_norm": 1.0278540671542709, + "learning_rate": 1.2830153267336159e-05, + "loss": 0.2656530737876892, + "step": 3302 + }, + { + "epoch": 0.877174346036383, + "grad_norm": 1.2018449655833119, + "learning_rate": 1.282594174399399e-05, + "loss": 0.3437826633453369, + "step": 3303 + }, + { + "epoch": 0.877439915017926, + "grad_norm": 1.0564301800372577, + "learning_rate": 1.2821729675843581e-05, + "loss": 0.29773175716400146, + "step": 3304 + }, + { + "epoch": 0.8777054839994689, + "grad_norm": 1.0707167209814024, + "learning_rate": 1.2817517063696973e-05, + "loss": 0.29772818088531494, + "step": 3305 + }, + { + "epoch": 0.8779710529810119, + "grad_norm": 1.1530012432828134, + "learning_rate": 1.2813303908366303e-05, + "loss": 0.3266611099243164, + "step": 3306 + }, + { + "epoch": 0.8782366219625548, + "grad_norm": 1.0044541774243023, + "learning_rate": 1.2809090210663818e-05, + "loss": 0.26599690318107605, + "step": 3307 + }, + { + "epoch": 0.8785021909440978, + "grad_norm": 1.0142651525790767, + "learning_rate": 1.2804875971401872e-05, + "loss": 0.27988117933273315, + "step": 3308 + }, + { + "epoch": 0.8787677599256407, + "grad_norm": 1.0221522532224918, + "learning_rate": 1.2800661191392916e-05, + "loss": 0.2630334496498108, + "step": 3309 + }, + { + "epoch": 0.8790333289071837, + "grad_norm": 1.022950247187023, + "learning_rate": 1.2796445871449517e-05, + "loss": 0.2628091871738434, + "step": 3310 + }, + { + "epoch": 0.8792988978887266, + "grad_norm": 1.1994310454875075, + "learning_rate": 1.2792230012384333e-05, + "loss": 0.3443898558616638, + "step": 3311 + }, + { + "epoch": 0.8795644668702696, + "grad_norm": 1.0673533832636588, + "learning_rate": 1.2788013615010136e-05, + "loss": 0.2966022491455078, + "step": 3312 + }, + { + "epoch": 0.8798300358518125, + "grad_norm": 1.1030087744198647, + "learning_rate": 1.2783796680139793e-05, + "loss": 0.2995494604110718, + "step": 3313 + }, + { + "epoch": 0.8800956048333555, + "grad_norm": 1.0504434000468303, + "learning_rate": 1.2779579208586283e-05, + "loss": 0.2652590870857239, + "step": 3314 + }, + { + "epoch": 0.8803611738148984, + "grad_norm": 1.1388460976467547, + "learning_rate": 1.2775361201162684e-05, + "loss": 0.3145690858364105, + "step": 3315 + }, + { + "epoch": 0.8806267427964414, + "grad_norm": 1.040210802651612, + "learning_rate": 1.2771142658682175e-05, + "loss": 0.25744086503982544, + "step": 3316 + }, + { + "epoch": 0.8808923117779843, + "grad_norm": 1.1618029117732733, + "learning_rate": 1.2766923581958046e-05, + "loss": 0.3129793405532837, + "step": 3317 + }, + { + "epoch": 0.8811578807595273, + "grad_norm": 1.166975234876197, + "learning_rate": 1.2762703971803684e-05, + "loss": 0.233384907245636, + "step": 3318 + }, + { + "epoch": 0.8814234497410702, + "grad_norm": 0.9242808009438505, + "learning_rate": 1.2758483829032579e-05, + "loss": 0.2422962635755539, + "step": 3319 + }, + { + "epoch": 0.8816890187226132, + "grad_norm": 1.0844595421589949, + "learning_rate": 1.2754263154458328e-05, + "loss": 0.2801973819732666, + "step": 3320 + }, + { + "epoch": 0.8819545877041561, + "grad_norm": 1.294346594070355, + "learning_rate": 1.2750041948894621e-05, + "loss": 0.30659937858581543, + "step": 3321 + }, + { + "epoch": 0.8822201566856991, + "grad_norm": 1.0921019252616484, + "learning_rate": 1.274582021315526e-05, + "loss": 0.28527066111564636, + "step": 3322 + }, + { + "epoch": 0.882485725667242, + "grad_norm": 1.0598264473011552, + "learning_rate": 1.2741597948054146e-05, + "loss": 0.23065675795078278, + "step": 3323 + }, + { + "epoch": 0.882751294648785, + "grad_norm": 1.0918730747592962, + "learning_rate": 1.2737375154405283e-05, + "loss": 0.2727832794189453, + "step": 3324 + }, + { + "epoch": 0.8830168636303279, + "grad_norm": 1.0789259788038712, + "learning_rate": 1.273315183302277e-05, + "loss": 0.26809507608413696, + "step": 3325 + }, + { + "epoch": 0.8832824326118709, + "grad_norm": 1.1647625824499415, + "learning_rate": 1.2728927984720823e-05, + "loss": 0.3250407576560974, + "step": 3326 + }, + { + "epoch": 0.8835480015934138, + "grad_norm": 1.0915300736309757, + "learning_rate": 1.2724703610313742e-05, + "loss": 0.2651330232620239, + "step": 3327 + }, + { + "epoch": 0.8838135705749568, + "grad_norm": 1.206298710080754, + "learning_rate": 1.2720478710615944e-05, + "loss": 0.27337920665740967, + "step": 3328 + }, + { + "epoch": 0.8840791395564997, + "grad_norm": 1.0282478968996285, + "learning_rate": 1.2716253286441935e-05, + "loss": 0.2664092183113098, + "step": 3329 + }, + { + "epoch": 0.8843447085380428, + "grad_norm": 1.1354570950284573, + "learning_rate": 1.2712027338606323e-05, + "loss": 0.27927765250205994, + "step": 3330 + }, + { + "epoch": 0.8846102775195858, + "grad_norm": 1.1204979208217445, + "learning_rate": 1.270780086792383e-05, + "loss": 0.27241113781929016, + "step": 3331 + }, + { + "epoch": 0.8848758465011287, + "grad_norm": 1.0795162414965664, + "learning_rate": 1.2703573875209264e-05, + "loss": 0.28279373049736023, + "step": 3332 + }, + { + "epoch": 0.8851414154826717, + "grad_norm": 1.1634487658284207, + "learning_rate": 1.2699346361277538e-05, + "loss": 0.3011108934879303, + "step": 3333 + }, + { + "epoch": 0.8854069844642146, + "grad_norm": 2.772716513531517, + "learning_rate": 1.2695118326943671e-05, + "loss": 0.3071288764476776, + "step": 3334 + }, + { + "epoch": 0.8856725534457576, + "grad_norm": 1.0969950934626527, + "learning_rate": 1.2690889773022778e-05, + "loss": 0.2688761353492737, + "step": 3335 + }, + { + "epoch": 0.8859381224273005, + "grad_norm": 1.1363327585955358, + "learning_rate": 1.2686660700330074e-05, + "loss": 0.2788669466972351, + "step": 3336 + }, + { + "epoch": 0.8862036914088435, + "grad_norm": 1.0884694079711634, + "learning_rate": 1.268243110968087e-05, + "loss": 0.2801516652107239, + "step": 3337 + }, + { + "epoch": 0.8864692603903864, + "grad_norm": 1.0414904749451368, + "learning_rate": 1.2678201001890587e-05, + "loss": 0.2876908779144287, + "step": 3338 + }, + { + "epoch": 0.8867348293719294, + "grad_norm": 1.1731879069090343, + "learning_rate": 1.2673970377774733e-05, + "loss": 0.27709734439849854, + "step": 3339 + }, + { + "epoch": 0.8870003983534723, + "grad_norm": 1.2053408848372587, + "learning_rate": 1.266973923814893e-05, + "loss": 0.3191622793674469, + "step": 3340 + }, + { + "epoch": 0.8872659673350153, + "grad_norm": 1.098682297791164, + "learning_rate": 1.2665507583828889e-05, + "loss": 0.2873385548591614, + "step": 3341 + }, + { + "epoch": 0.8875315363165582, + "grad_norm": 1.1730973936717166, + "learning_rate": 1.2661275415630421e-05, + "loss": 0.2922922372817993, + "step": 3342 + }, + { + "epoch": 0.8877971052981012, + "grad_norm": 1.1127017834272521, + "learning_rate": 1.2657042734369443e-05, + "loss": 0.305694043636322, + "step": 3343 + }, + { + "epoch": 0.8880626742796441, + "grad_norm": 1.120364019457983, + "learning_rate": 1.2652809540861958e-05, + "loss": 0.29108062386512756, + "step": 3344 + }, + { + "epoch": 0.8883282432611871, + "grad_norm": 1.076655765525218, + "learning_rate": 1.2648575835924084e-05, + "loss": 0.24170495569705963, + "step": 3345 + }, + { + "epoch": 0.88859381224273, + "grad_norm": 1.4853370236272063, + "learning_rate": 1.2644341620372025e-05, + "loss": 0.2987719476222992, + "step": 3346 + }, + { + "epoch": 0.888859381224273, + "grad_norm": 0.9743774864126274, + "learning_rate": 1.2640106895022088e-05, + "loss": 0.21037599444389343, + "step": 3347 + }, + { + "epoch": 0.889124950205816, + "grad_norm": 1.034527053965976, + "learning_rate": 1.2635871660690677e-05, + "loss": 0.25263655185699463, + "step": 3348 + }, + { + "epoch": 0.8893905191873589, + "grad_norm": 1.2196740502064325, + "learning_rate": 1.2631635918194301e-05, + "loss": 0.30169543623924255, + "step": 3349 + }, + { + "epoch": 0.8896560881689018, + "grad_norm": 1.0624381650731511, + "learning_rate": 1.2627399668349554e-05, + "loss": 0.26982420682907104, + "step": 3350 + }, + { + "epoch": 0.8899216571504448, + "grad_norm": 1.1785068724165282, + "learning_rate": 1.262316291197314e-05, + "loss": 0.3281899690628052, + "step": 3351 + }, + { + "epoch": 0.8901872261319878, + "grad_norm": 1.1157278400935415, + "learning_rate": 1.2618925649881852e-05, + "loss": 0.30140435695648193, + "step": 3352 + }, + { + "epoch": 0.8904527951135307, + "grad_norm": 0.9928732296573972, + "learning_rate": 1.261468788289259e-05, + "loss": 0.22343885898590088, + "step": 3353 + }, + { + "epoch": 0.8907183640950737, + "grad_norm": 1.0410264886026745, + "learning_rate": 1.261044961182234e-05, + "loss": 0.2889901399612427, + "step": 3354 + }, + { + "epoch": 0.8909839330766166, + "grad_norm": 1.0933214790144683, + "learning_rate": 1.260621083748819e-05, + "loss": 0.27896153926849365, + "step": 3355 + }, + { + "epoch": 0.8912495020581596, + "grad_norm": 1.077111437166839, + "learning_rate": 1.2601971560707328e-05, + "loss": 0.29390811920166016, + "step": 3356 + }, + { + "epoch": 0.8915150710397025, + "grad_norm": 1.0468332572471015, + "learning_rate": 1.2597731782297036e-05, + "loss": 0.2872384190559387, + "step": 3357 + }, + { + "epoch": 0.8917806400212455, + "grad_norm": 1.3094137802442116, + "learning_rate": 1.2593491503074698e-05, + "loss": 0.29753726720809937, + "step": 3358 + }, + { + "epoch": 0.8920462090027885, + "grad_norm": 1.1441306843080605, + "learning_rate": 1.2589250723857782e-05, + "loss": 0.31631946563720703, + "step": 3359 + }, + { + "epoch": 0.8923117779843315, + "grad_norm": 1.1374138683367387, + "learning_rate": 1.2585009445463867e-05, + "loss": 0.2932048738002777, + "step": 3360 + }, + { + "epoch": 0.8925773469658744, + "grad_norm": 1.0483655110874528, + "learning_rate": 1.2580767668710614e-05, + "loss": 0.2902034521102905, + "step": 3361 + }, + { + "epoch": 0.8928429159474174, + "grad_norm": 1.0712531988705474, + "learning_rate": 1.2576525394415795e-05, + "loss": 0.2596299648284912, + "step": 3362 + }, + { + "epoch": 0.8931084849289603, + "grad_norm": 1.1916540375753872, + "learning_rate": 1.2572282623397268e-05, + "loss": 0.29102641344070435, + "step": 3363 + }, + { + "epoch": 0.8933740539105033, + "grad_norm": 1.236954620143465, + "learning_rate": 1.2568039356472985e-05, + "loss": 0.2970406711101532, + "step": 3364 + }, + { + "epoch": 0.8936396228920462, + "grad_norm": 1.1384210267422126, + "learning_rate": 1.2563795594461003e-05, + "loss": 0.2916618585586548, + "step": 3365 + }, + { + "epoch": 0.8939051918735892, + "grad_norm": 1.1769911575713834, + "learning_rate": 1.2559551338179468e-05, + "loss": 0.3217374086380005, + "step": 3366 + }, + { + "epoch": 0.8941707608551321, + "grad_norm": 1.1228623922561494, + "learning_rate": 1.255530658844662e-05, + "loss": 0.3000059425830841, + "step": 3367 + }, + { + "epoch": 0.8944363298366751, + "grad_norm": 1.2170346898517979, + "learning_rate": 1.2551061346080804e-05, + "loss": 0.2848728895187378, + "step": 3368 + }, + { + "epoch": 0.894701898818218, + "grad_norm": 1.3197542136745113, + "learning_rate": 1.2546815611900442e-05, + "loss": 0.3328903317451477, + "step": 3369 + }, + { + "epoch": 0.894967467799761, + "grad_norm": 1.0838958961687528, + "learning_rate": 1.2542569386724069e-05, + "loss": 0.2920045256614685, + "step": 3370 + }, + { + "epoch": 0.895233036781304, + "grad_norm": 1.0679716869166582, + "learning_rate": 1.2538322671370305e-05, + "loss": 0.30370092391967773, + "step": 3371 + }, + { + "epoch": 0.8954986057628469, + "grad_norm": 1.069215534600395, + "learning_rate": 1.2534075466657866e-05, + "loss": 0.24454624950885773, + "step": 3372 + }, + { + "epoch": 0.8957641747443899, + "grad_norm": 1.172481734803523, + "learning_rate": 1.2529827773405566e-05, + "loss": 0.30908581614494324, + "step": 3373 + }, + { + "epoch": 0.8960297437259328, + "grad_norm": 1.1095939186212227, + "learning_rate": 1.2525579592432304e-05, + "loss": 0.2792360782623291, + "step": 3374 + }, + { + "epoch": 0.8962953127074758, + "grad_norm": 1.0658472517819026, + "learning_rate": 1.2521330924557087e-05, + "loss": 0.285555362701416, + "step": 3375 + }, + { + "epoch": 0.8965608816890187, + "grad_norm": 1.1649386203925687, + "learning_rate": 1.2517081770599002e-05, + "loss": 0.3159451484680176, + "step": 3376 + }, + { + "epoch": 0.8968264506705617, + "grad_norm": 1.2867424735092035, + "learning_rate": 1.2512832131377237e-05, + "loss": 0.35929200053215027, + "step": 3377 + }, + { + "epoch": 0.8970920196521046, + "grad_norm": 1.0781651079446009, + "learning_rate": 1.2508582007711074e-05, + "loss": 0.28624874353408813, + "step": 3378 + }, + { + "epoch": 0.8973575886336476, + "grad_norm": 1.0156684050998903, + "learning_rate": 1.2504331400419884e-05, + "loss": 0.27670109272003174, + "step": 3379 + }, + { + "epoch": 0.8976231576151905, + "grad_norm": 1.0786636895703534, + "learning_rate": 1.2500080310323139e-05, + "loss": 0.2894589304924011, + "step": 3380 + }, + { + "epoch": 0.8978887265967335, + "grad_norm": 1.1385795160382524, + "learning_rate": 1.2495828738240396e-05, + "loss": 0.31378716230392456, + "step": 3381 + }, + { + "epoch": 0.8981542955782764, + "grad_norm": 1.3149597134232174, + "learning_rate": 1.2491576684991306e-05, + "loss": 0.33676713705062866, + "step": 3382 + }, + { + "epoch": 0.8984198645598194, + "grad_norm": 0.9814689350619926, + "learning_rate": 1.2487324151395618e-05, + "loss": 0.2875351011753082, + "step": 3383 + }, + { + "epoch": 0.8986854335413623, + "grad_norm": 1.1646557221945626, + "learning_rate": 1.2483071138273168e-05, + "loss": 0.29729989171028137, + "step": 3384 + }, + { + "epoch": 0.8989510025229053, + "grad_norm": 1.0864970585536224, + "learning_rate": 1.2478817646443888e-05, + "loss": 0.3227398991584778, + "step": 3385 + }, + { + "epoch": 0.8992165715044482, + "grad_norm": 1.1586445900518523, + "learning_rate": 1.2474563676727803e-05, + "loss": 0.2664690315723419, + "step": 3386 + }, + { + "epoch": 0.8994821404859913, + "grad_norm": 1.1748792923054732, + "learning_rate": 1.2470309229945021e-05, + "loss": 0.29543352127075195, + "step": 3387 + }, + { + "epoch": 0.8997477094675342, + "grad_norm": 0.9899792334789409, + "learning_rate": 1.2466054306915756e-05, + "loss": 0.26658856868743896, + "step": 3388 + }, + { + "epoch": 0.9000132784490772, + "grad_norm": 1.123207894421506, + "learning_rate": 1.2461798908460305e-05, + "loss": 0.2899627387523651, + "step": 3389 + }, + { + "epoch": 0.9002788474306201, + "grad_norm": 1.1137567335053833, + "learning_rate": 1.245754303539906e-05, + "loss": 0.2708336114883423, + "step": 3390 + }, + { + "epoch": 0.9005444164121631, + "grad_norm": 1.1459655330577214, + "learning_rate": 1.2453286688552502e-05, + "loss": 0.28124746680259705, + "step": 3391 + }, + { + "epoch": 0.900809985393706, + "grad_norm": 1.0470005335558448, + "learning_rate": 1.2449029868741202e-05, + "loss": 0.2599399983882904, + "step": 3392 + }, + { + "epoch": 0.901075554375249, + "grad_norm": 0.9576026734877732, + "learning_rate": 1.2444772576785828e-05, + "loss": 0.25035667419433594, + "step": 3393 + }, + { + "epoch": 0.901341123356792, + "grad_norm": 1.1148471766082222, + "learning_rate": 1.2440514813507136e-05, + "loss": 0.2772521376609802, + "step": 3394 + }, + { + "epoch": 0.9016066923383349, + "grad_norm": 1.103787889433512, + "learning_rate": 1.2436256579725969e-05, + "loss": 0.3282839357852936, + "step": 3395 + }, + { + "epoch": 0.9018722613198779, + "grad_norm": 1.080988888326222, + "learning_rate": 1.2431997876263269e-05, + "loss": 0.2507914900779724, + "step": 3396 + }, + { + "epoch": 0.9021378303014208, + "grad_norm": 1.1123927965933749, + "learning_rate": 1.2427738703940055e-05, + "loss": 0.2620914876461029, + "step": 3397 + }, + { + "epoch": 0.9024033992829638, + "grad_norm": 1.0713438905056172, + "learning_rate": 1.2423479063577458e-05, + "loss": 0.26561641693115234, + "step": 3398 + }, + { + "epoch": 0.9026689682645067, + "grad_norm": 1.151582271756571, + "learning_rate": 1.2419218955996677e-05, + "loss": 0.2998678386211395, + "step": 3399 + }, + { + "epoch": 0.9029345372460497, + "grad_norm": 1.0484454707225395, + "learning_rate": 1.2414958382019017e-05, + "loss": 0.2368398755788803, + "step": 3400 + }, + { + "epoch": 0.9032001062275926, + "grad_norm": 1.0429929570241405, + "learning_rate": 1.241069734246586e-05, + "loss": 0.2623558044433594, + "step": 3401 + }, + { + "epoch": 0.9034656752091356, + "grad_norm": 1.0283944167565489, + "learning_rate": 1.2406435838158686e-05, + "loss": 0.2693074941635132, + "step": 3402 + }, + { + "epoch": 0.9037312441906785, + "grad_norm": 1.1211950634171715, + "learning_rate": 1.2402173869919063e-05, + "loss": 0.2933652698993683, + "step": 3403 + }, + { + "epoch": 0.9039968131722215, + "grad_norm": 1.0858313001207585, + "learning_rate": 1.2397911438568651e-05, + "loss": 0.28515487909317017, + "step": 3404 + }, + { + "epoch": 0.9042623821537644, + "grad_norm": 1.1243916508543286, + "learning_rate": 1.2393648544929193e-05, + "loss": 0.282942533493042, + "step": 3405 + }, + { + "epoch": 0.9045279511353074, + "grad_norm": 1.112018853789466, + "learning_rate": 1.2389385189822526e-05, + "loss": 0.28300392627716064, + "step": 3406 + }, + { + "epoch": 0.9047935201168503, + "grad_norm": 1.0490322847853841, + "learning_rate": 1.2385121374070577e-05, + "loss": 0.25697019696235657, + "step": 3407 + }, + { + "epoch": 0.9050590890983933, + "grad_norm": 1.15038978087342, + "learning_rate": 1.2380857098495355e-05, + "loss": 0.31156057119369507, + "step": 3408 + }, + { + "epoch": 0.9053246580799362, + "grad_norm": 1.1544066045654053, + "learning_rate": 1.2376592363918967e-05, + "loss": 0.2943422794342041, + "step": 3409 + }, + { + "epoch": 0.9055902270614792, + "grad_norm": 0.9968457114080438, + "learning_rate": 1.2372327171163596e-05, + "loss": 0.2792074680328369, + "step": 3410 + }, + { + "epoch": 0.9058557960430221, + "grad_norm": 1.0328662447203703, + "learning_rate": 1.2368061521051526e-05, + "loss": 0.2547443211078644, + "step": 3411 + }, + { + "epoch": 0.9061213650245651, + "grad_norm": 1.068901181257851, + "learning_rate": 1.2363795414405125e-05, + "loss": 0.25637373328208923, + "step": 3412 + }, + { + "epoch": 0.906386934006108, + "grad_norm": 1.1660475318941728, + "learning_rate": 1.2359528852046844e-05, + "loss": 0.3269123435020447, + "step": 3413 + }, + { + "epoch": 0.906652502987651, + "grad_norm": 1.0197427295072394, + "learning_rate": 1.2355261834799232e-05, + "loss": 0.28538423776626587, + "step": 3414 + }, + { + "epoch": 0.906918071969194, + "grad_norm": 1.1343354993973966, + "learning_rate": 1.2350994363484915e-05, + "loss": 0.2961096167564392, + "step": 3415 + }, + { + "epoch": 0.907183640950737, + "grad_norm": 1.0930595123597455, + "learning_rate": 1.2346726438926613e-05, + "loss": 0.3134537935256958, + "step": 3416 + }, + { + "epoch": 0.90744920993228, + "grad_norm": 1.018679268761631, + "learning_rate": 1.2342458061947129e-05, + "loss": 0.2614031434059143, + "step": 3417 + }, + { + "epoch": 0.9077147789138229, + "grad_norm": 1.0403373381004117, + "learning_rate": 1.2338189233369357e-05, + "loss": 0.27166056632995605, + "step": 3418 + }, + { + "epoch": 0.9079803478953659, + "grad_norm": 1.0735839504787106, + "learning_rate": 1.2333919954016277e-05, + "loss": 0.26053497195243835, + "step": 3419 + }, + { + "epoch": 0.9082459168769088, + "grad_norm": 1.1112591016079632, + "learning_rate": 1.2329650224710956e-05, + "loss": 0.3109636902809143, + "step": 3420 + }, + { + "epoch": 0.9085114858584518, + "grad_norm": 1.081828404421451, + "learning_rate": 1.232538004627655e-05, + "loss": 0.2576507329940796, + "step": 3421 + }, + { + "epoch": 0.9087770548399947, + "grad_norm": 1.0981308884589311, + "learning_rate": 1.2321109419536292e-05, + "loss": 0.2525216341018677, + "step": 3422 + }, + { + "epoch": 0.9090426238215377, + "grad_norm": 1.0732531844020532, + "learning_rate": 1.2316838345313517e-05, + "loss": 0.2483336180448532, + "step": 3423 + }, + { + "epoch": 0.9093081928030806, + "grad_norm": 1.1592146270526706, + "learning_rate": 1.2312566824431631e-05, + "loss": 0.26372796297073364, + "step": 3424 + }, + { + "epoch": 0.9095737617846236, + "grad_norm": 1.1537675520237485, + "learning_rate": 1.2308294857714138e-05, + "loss": 0.2933644950389862, + "step": 3425 + }, + { + "epoch": 0.9098393307661665, + "grad_norm": 1.0330883162146767, + "learning_rate": 1.2304022445984618e-05, + "loss": 0.2543371915817261, + "step": 3426 + }, + { + "epoch": 0.9101048997477095, + "grad_norm": 1.1689002717846686, + "learning_rate": 1.2299749590066745e-05, + "loss": 0.29246431589126587, + "step": 3427 + }, + { + "epoch": 0.9103704687292524, + "grad_norm": 1.0141798843769114, + "learning_rate": 1.2295476290784273e-05, + "loss": 0.2475431263446808, + "step": 3428 + }, + { + "epoch": 0.9106360377107954, + "grad_norm": 1.1845034794986053, + "learning_rate": 1.2291202548961042e-05, + "loss": 0.3312363624572754, + "step": 3429 + }, + { + "epoch": 0.9109016066923383, + "grad_norm": 1.0459618447051044, + "learning_rate": 1.2286928365420987e-05, + "loss": 0.25192639231681824, + "step": 3430 + }, + { + "epoch": 0.9111671756738813, + "grad_norm": 1.2038671566275931, + "learning_rate": 1.2282653740988114e-05, + "loss": 0.23189345002174377, + "step": 3431 + }, + { + "epoch": 0.9114327446554242, + "grad_norm": 1.17767221221897, + "learning_rate": 1.2278378676486522e-05, + "loss": 0.2888398766517639, + "step": 3432 + }, + { + "epoch": 0.9116983136369672, + "grad_norm": 1.1295595703903276, + "learning_rate": 1.2274103172740387e-05, + "loss": 0.2857785224914551, + "step": 3433 + }, + { + "epoch": 0.9119638826185101, + "grad_norm": 1.039533312390003, + "learning_rate": 1.2269827230573986e-05, + "loss": 0.23961025476455688, + "step": 3434 + }, + { + "epoch": 0.9122294516000531, + "grad_norm": 1.1192521835175562, + "learning_rate": 1.2265550850811663e-05, + "loss": 0.2791004478931427, + "step": 3435 + }, + { + "epoch": 0.912495020581596, + "grad_norm": 1.052040685054951, + "learning_rate": 1.2261274034277858e-05, + "loss": 0.2875480651855469, + "step": 3436 + }, + { + "epoch": 0.912760589563139, + "grad_norm": 1.12188070500717, + "learning_rate": 1.2256996781797086e-05, + "loss": 0.29422929883003235, + "step": 3437 + }, + { + "epoch": 0.9130261585446819, + "grad_norm": 1.2976046274469295, + "learning_rate": 1.225271909419395e-05, + "loss": 0.27114444971084595, + "step": 3438 + }, + { + "epoch": 0.9132917275262249, + "grad_norm": 1.0684416452719028, + "learning_rate": 1.2248440972293146e-05, + "loss": 0.3007166385650635, + "step": 3439 + }, + { + "epoch": 0.9135572965077678, + "grad_norm": 1.1408150577224654, + "learning_rate": 1.224416241691944e-05, + "loss": 0.28550055623054504, + "step": 3440 + }, + { + "epoch": 0.9138228654893108, + "grad_norm": 1.1159473328967766, + "learning_rate": 1.2239883428897687e-05, + "loss": 0.2861761450767517, + "step": 3441 + }, + { + "epoch": 0.9140884344708538, + "grad_norm": 1.1186358936011263, + "learning_rate": 1.2235604009052823e-05, + "loss": 0.3288506865501404, + "step": 3442 + }, + { + "epoch": 0.9143540034523968, + "grad_norm": 1.2101661293343442, + "learning_rate": 1.2231324158209876e-05, + "loss": 0.33189019560813904, + "step": 3443 + }, + { + "epoch": 0.9146195724339398, + "grad_norm": 0.9931883995236199, + "learning_rate": 1.2227043877193947e-05, + "loss": 0.20846885442733765, + "step": 3444 + }, + { + "epoch": 0.9148851414154827, + "grad_norm": 0.9579263575635046, + "learning_rate": 1.2222763166830223e-05, + "loss": 0.25184741616249084, + "step": 3445 + }, + { + "epoch": 0.9151507103970257, + "grad_norm": 1.0775642304955, + "learning_rate": 1.2218482027943977e-05, + "loss": 0.2954701781272888, + "step": 3446 + }, + { + "epoch": 0.9154162793785686, + "grad_norm": 1.055908963813806, + "learning_rate": 1.221420046136056e-05, + "loss": 0.263336718082428, + "step": 3447 + }, + { + "epoch": 0.9156818483601116, + "grad_norm": 1.2181481624195412, + "learning_rate": 1.2209918467905405e-05, + "loss": 0.31178128719329834, + "step": 3448 + }, + { + "epoch": 0.9159474173416545, + "grad_norm": 1.1248939907914326, + "learning_rate": 1.2205636048404037e-05, + "loss": 0.30373090505599976, + "step": 3449 + }, + { + "epoch": 0.9162129863231975, + "grad_norm": 1.1316476755108689, + "learning_rate": 1.2201353203682052e-05, + "loss": 0.31057459115982056, + "step": 3450 + }, + { + "epoch": 0.9164785553047404, + "grad_norm": 1.0432699213656527, + "learning_rate": 1.2197069934565126e-05, + "loss": 0.26834744215011597, + "step": 3451 + }, + { + "epoch": 0.9167441242862834, + "grad_norm": 1.0235490532622333, + "learning_rate": 1.2192786241879033e-05, + "loss": 0.30224066972732544, + "step": 3452 + }, + { + "epoch": 0.9170096932678263, + "grad_norm": 1.1136690118430506, + "learning_rate": 1.2188502126449616e-05, + "loss": 0.28249508142471313, + "step": 3453 + }, + { + "epoch": 0.9172752622493693, + "grad_norm": 1.0210144972314754, + "learning_rate": 1.2184217589102798e-05, + "loss": 0.24823793768882751, + "step": 3454 + }, + { + "epoch": 0.9175408312309122, + "grad_norm": 1.1878687209379464, + "learning_rate": 1.2179932630664589e-05, + "loss": 0.32556289434432983, + "step": 3455 + }, + { + "epoch": 0.9178064002124552, + "grad_norm": 1.0899520670240972, + "learning_rate": 1.217564725196108e-05, + "loss": 0.29420584440231323, + "step": 3456 + }, + { + "epoch": 0.9180719691939981, + "grad_norm": 1.028247015068141, + "learning_rate": 1.2171361453818437e-05, + "loss": 0.29294469952583313, + "step": 3457 + }, + { + "epoch": 0.9183375381755411, + "grad_norm": 1.0399893903415627, + "learning_rate": 1.2167075237062918e-05, + "loss": 0.3173823952674866, + "step": 3458 + }, + { + "epoch": 0.918603107157084, + "grad_norm": 1.1571492956528482, + "learning_rate": 1.2162788602520851e-05, + "loss": 0.32950159907341003, + "step": 3459 + }, + { + "epoch": 0.918868676138627, + "grad_norm": 1.0478118037587627, + "learning_rate": 1.2158501551018647e-05, + "loss": 0.3011544942855835, + "step": 3460 + }, + { + "epoch": 0.91913424512017, + "grad_norm": 1.0135067760604335, + "learning_rate": 1.2154214083382802e-05, + "loss": 0.25775954127311707, + "step": 3461 + }, + { + "epoch": 0.9193998141017129, + "grad_norm": 1.0514508898774713, + "learning_rate": 1.214992620043989e-05, + "loss": 0.286748468875885, + "step": 3462 + }, + { + "epoch": 0.9196653830832558, + "grad_norm": 1.1050004366949897, + "learning_rate": 1.214563790301656e-05, + "loss": 0.30588221549987793, + "step": 3463 + }, + { + "epoch": 0.9199309520647988, + "grad_norm": 1.0079666808538812, + "learning_rate": 1.214134919193955e-05, + "loss": 0.23506608605384827, + "step": 3464 + }, + { + "epoch": 0.9201965210463418, + "grad_norm": 1.037364536446331, + "learning_rate": 1.2137060068035672e-05, + "loss": 0.2612350285053253, + "step": 3465 + }, + { + "epoch": 0.9204620900278847, + "grad_norm": 1.0810309706979688, + "learning_rate": 1.2132770532131815e-05, + "loss": 0.3268318772315979, + "step": 3466 + }, + { + "epoch": 0.9207276590094277, + "grad_norm": 1.0723394192428657, + "learning_rate": 1.2128480585054951e-05, + "loss": 0.2970179319381714, + "step": 3467 + }, + { + "epoch": 0.9209932279909706, + "grad_norm": 1.0036147426745694, + "learning_rate": 1.2124190227632138e-05, + "loss": 0.2910206615924835, + "step": 3468 + }, + { + "epoch": 0.9212587969725136, + "grad_norm": 1.1089890742219906, + "learning_rate": 1.2119899460690496e-05, + "loss": 0.3000222444534302, + "step": 3469 + }, + { + "epoch": 0.9215243659540565, + "grad_norm": 1.1166450826016983, + "learning_rate": 1.2115608285057242e-05, + "loss": 0.30304765701293945, + "step": 3470 + }, + { + "epoch": 0.9217899349355996, + "grad_norm": 0.9893826238823328, + "learning_rate": 1.2111316701559663e-05, + "loss": 0.26393038034439087, + "step": 3471 + }, + { + "epoch": 0.9220555039171425, + "grad_norm": 1.1384217438340345, + "learning_rate": 1.2107024711025128e-05, + "loss": 0.3111063838005066, + "step": 3472 + }, + { + "epoch": 0.9223210728986855, + "grad_norm": 0.9599961450252364, + "learning_rate": 1.2102732314281073e-05, + "loss": 0.2897321581840515, + "step": 3473 + }, + { + "epoch": 0.9225866418802284, + "grad_norm": 1.1396280258666305, + "learning_rate": 1.2098439512155028e-05, + "loss": 0.2835896611213684, + "step": 3474 + }, + { + "epoch": 0.9228522108617714, + "grad_norm": 1.0165194494005183, + "learning_rate": 1.2094146305474596e-05, + "loss": 0.27648821473121643, + "step": 3475 + }, + { + "epoch": 0.9231177798433143, + "grad_norm": 1.1221504506656363, + "learning_rate": 1.2089852695067457e-05, + "loss": 0.2528097629547119, + "step": 3476 + }, + { + "epoch": 0.9233833488248573, + "grad_norm": 1.1105562286202324, + "learning_rate": 1.2085558681761361e-05, + "loss": 0.2750067412853241, + "step": 3477 + }, + { + "epoch": 0.9236489178064002, + "grad_norm": 1.1199967050670125, + "learning_rate": 1.2081264266384148e-05, + "loss": 0.3115938901901245, + "step": 3478 + }, + { + "epoch": 0.9239144867879432, + "grad_norm": 1.1203071431737686, + "learning_rate": 1.2076969449763734e-05, + "loss": 0.2858419418334961, + "step": 3479 + }, + { + "epoch": 0.9241800557694861, + "grad_norm": 1.051118385350032, + "learning_rate": 1.2072674232728105e-05, + "loss": 0.24990032613277435, + "step": 3480 + }, + { + "epoch": 0.9244456247510291, + "grad_norm": 1.2991104394876676, + "learning_rate": 1.206837861610533e-05, + "loss": 0.23106999695301056, + "step": 3481 + }, + { + "epoch": 0.924711193732572, + "grad_norm": 1.0396779513824141, + "learning_rate": 1.2064082600723546e-05, + "loss": 0.2737967371940613, + "step": 3482 + }, + { + "epoch": 0.924976762714115, + "grad_norm": 1.1890061925781694, + "learning_rate": 1.2059786187410984e-05, + "loss": 0.2810317873954773, + "step": 3483 + }, + { + "epoch": 0.925242331695658, + "grad_norm": 1.1358698893490913, + "learning_rate": 1.2055489376995938e-05, + "loss": 0.30852559208869934, + "step": 3484 + }, + { + "epoch": 0.9255079006772009, + "grad_norm": 1.1003932874354148, + "learning_rate": 1.2051192170306784e-05, + "loss": 0.2956348657608032, + "step": 3485 + }, + { + "epoch": 0.9257734696587439, + "grad_norm": 1.18261367067389, + "learning_rate": 1.204689456817197e-05, + "loss": 0.2825953960418701, + "step": 3486 + }, + { + "epoch": 0.9260390386402868, + "grad_norm": 1.2502616697865143, + "learning_rate": 1.2042596571420025e-05, + "loss": 0.3351168632507324, + "step": 3487 + }, + { + "epoch": 0.9263046076218298, + "grad_norm": 1.2354469073344645, + "learning_rate": 1.2038298180879548e-05, + "loss": 0.2718926668167114, + "step": 3488 + }, + { + "epoch": 0.9265701766033727, + "grad_norm": 1.1387239259181285, + "learning_rate": 1.2033999397379223e-05, + "loss": 0.29036587476730347, + "step": 3489 + }, + { + "epoch": 0.9268357455849157, + "grad_norm": 0.9499049433325992, + "learning_rate": 1.2029700221747804e-05, + "loss": 0.22917689383029938, + "step": 3490 + }, + { + "epoch": 0.9271013145664586, + "grad_norm": 1.2322966399012754, + "learning_rate": 1.2025400654814119e-05, + "loss": 0.2963443398475647, + "step": 3491 + }, + { + "epoch": 0.9273668835480016, + "grad_norm": 1.100231072465541, + "learning_rate": 1.2021100697407075e-05, + "loss": 0.2866464853286743, + "step": 3492 + }, + { + "epoch": 0.9276324525295445, + "grad_norm": 1.1717529025248212, + "learning_rate": 1.2016800350355654e-05, + "loss": 0.3069216012954712, + "step": 3493 + }, + { + "epoch": 0.9278980215110875, + "grad_norm": 1.0745448017128252, + "learning_rate": 1.2012499614488913e-05, + "loss": 0.27206870913505554, + "step": 3494 + }, + { + "epoch": 0.9281635904926304, + "grad_norm": 1.0995365532444106, + "learning_rate": 1.2008198490635978e-05, + "loss": 0.32130372524261475, + "step": 3495 + }, + { + "epoch": 0.9284291594741734, + "grad_norm": 1.151015013814654, + "learning_rate": 1.2003896979626061e-05, + "loss": 0.30631259083747864, + "step": 3496 + }, + { + "epoch": 0.9286947284557163, + "grad_norm": 1.125856079122124, + "learning_rate": 1.199959508228844e-05, + "loss": 0.3005716800689697, + "step": 3497 + }, + { + "epoch": 0.9289602974372593, + "grad_norm": 0.9983757548693274, + "learning_rate": 1.1995292799452472e-05, + "loss": 0.2381039410829544, + "step": 3498 + }, + { + "epoch": 0.9292258664188023, + "grad_norm": 1.1338580261514946, + "learning_rate": 1.1990990131947582e-05, + "loss": 0.31764286756515503, + "step": 3499 + }, + { + "epoch": 0.9294914354003453, + "grad_norm": 1.1445030838538803, + "learning_rate": 1.1986687080603273e-05, + "loss": 0.3029370903968811, + "step": 3500 + }, + { + "epoch": 0.9297570043818882, + "grad_norm": 1.0814133109661386, + "learning_rate": 1.198238364624913e-05, + "loss": 0.30967646837234497, + "step": 3501 + }, + { + "epoch": 0.9300225733634312, + "grad_norm": 1.0376796287878236, + "learning_rate": 1.1978079829714799e-05, + "loss": 0.24687506258487701, + "step": 3502 + }, + { + "epoch": 0.9302881423449741, + "grad_norm": 1.0529899744692286, + "learning_rate": 1.1973775631830007e-05, + "loss": 0.25909408926963806, + "step": 3503 + }, + { + "epoch": 0.9305537113265171, + "grad_norm": 1.1136411983367804, + "learning_rate": 1.196947105342455e-05, + "loss": 0.281025230884552, + "step": 3504 + }, + { + "epoch": 0.93081928030806, + "grad_norm": 1.2858712177395888, + "learning_rate": 1.1965166095328302e-05, + "loss": 0.33401811122894287, + "step": 3505 + }, + { + "epoch": 0.931084849289603, + "grad_norm": 0.9732764276792689, + "learning_rate": 1.1960860758371208e-05, + "loss": 0.25839388370513916, + "step": 3506 + }, + { + "epoch": 0.931350418271146, + "grad_norm": 0.954364218435113, + "learning_rate": 1.1956555043383286e-05, + "loss": 0.23343560099601746, + "step": 3507 + }, + { + "epoch": 0.9316159872526889, + "grad_norm": 1.176408931412559, + "learning_rate": 1.1952248951194629e-05, + "loss": 0.31106436252593994, + "step": 3508 + }, + { + "epoch": 0.9318815562342319, + "grad_norm": 1.108418204277134, + "learning_rate": 1.1947942482635395e-05, + "loss": 0.29152095317840576, + "step": 3509 + }, + { + "epoch": 0.9321471252157748, + "grad_norm": 1.2651732065185788, + "learning_rate": 1.1943635638535827e-05, + "loss": 0.31517675518989563, + "step": 3510 + }, + { + "epoch": 0.9324126941973178, + "grad_norm": 1.2309480505410157, + "learning_rate": 1.1939328419726231e-05, + "loss": 0.33221137523651123, + "step": 3511 + }, + { + "epoch": 0.9326782631788607, + "grad_norm": 1.2277892053470791, + "learning_rate": 1.193502082703699e-05, + "loss": 0.314359575510025, + "step": 3512 + }, + { + "epoch": 0.9329438321604037, + "grad_norm": 1.129757464324541, + "learning_rate": 1.1930712861298553e-05, + "loss": 0.2879924178123474, + "step": 3513 + }, + { + "epoch": 0.9332094011419466, + "grad_norm": 1.1622909402406336, + "learning_rate": 1.1926404523341443e-05, + "loss": 0.2732955515384674, + "step": 3514 + }, + { + "epoch": 0.9334749701234896, + "grad_norm": 1.1586501434218468, + "learning_rate": 1.1922095813996264e-05, + "loss": 0.32156097888946533, + "step": 3515 + }, + { + "epoch": 0.9337405391050325, + "grad_norm": 1.110486475282156, + "learning_rate": 1.1917786734093682e-05, + "loss": 0.2694319486618042, + "step": 3516 + }, + { + "epoch": 0.9340061080865755, + "grad_norm": 1.0871387001943549, + "learning_rate": 1.1913477284464434e-05, + "loss": 0.3049655258655548, + "step": 3517 + }, + { + "epoch": 0.9342716770681184, + "grad_norm": 1.0962864613999421, + "learning_rate": 1.1909167465939334e-05, + "loss": 0.30053725838661194, + "step": 3518 + }, + { + "epoch": 0.9345372460496614, + "grad_norm": 1.0261517334123498, + "learning_rate": 1.1904857279349265e-05, + "loss": 0.2611788809299469, + "step": 3519 + }, + { + "epoch": 0.9348028150312043, + "grad_norm": 1.1400957154071245, + "learning_rate": 1.1900546725525175e-05, + "loss": 0.28344646096229553, + "step": 3520 + }, + { + "epoch": 0.9350683840127473, + "grad_norm": 1.067093022484818, + "learning_rate": 1.1896235805298093e-05, + "loss": 0.2504042685031891, + "step": 3521 + }, + { + "epoch": 0.9353339529942902, + "grad_norm": 1.0534608212516616, + "learning_rate": 1.1891924519499113e-05, + "loss": 0.27877938747406006, + "step": 3522 + }, + { + "epoch": 0.9355995219758332, + "grad_norm": 1.046331705593262, + "learning_rate": 1.1887612868959394e-05, + "loss": 0.28176525235176086, + "step": 3523 + }, + { + "epoch": 0.9358650909573761, + "grad_norm": 1.1750063194789062, + "learning_rate": 1.1883300854510178e-05, + "loss": 0.32376354932785034, + "step": 3524 + }, + { + "epoch": 0.9361306599389191, + "grad_norm": 1.0908366283033504, + "learning_rate": 1.1878988476982772e-05, + "loss": 0.2846054434776306, + "step": 3525 + }, + { + "epoch": 0.936396228920462, + "grad_norm": 1.0507783491664777, + "learning_rate": 1.1874675737208546e-05, + "loss": 0.25711044669151306, + "step": 3526 + }, + { + "epoch": 0.9366617979020051, + "grad_norm": 1.078360429057703, + "learning_rate": 1.1870362636018946e-05, + "loss": 0.2810837924480438, + "step": 3527 + }, + { + "epoch": 0.936927366883548, + "grad_norm": 1.2088151262046463, + "learning_rate": 1.186604917424549e-05, + "loss": 0.3090322017669678, + "step": 3528 + }, + { + "epoch": 0.937192935865091, + "grad_norm": 1.061646146170892, + "learning_rate": 1.1861735352719763e-05, + "loss": 0.2797972559928894, + "step": 3529 + }, + { + "epoch": 0.937458504846634, + "grad_norm": 1.3937474116807773, + "learning_rate": 1.1857421172273415e-05, + "loss": 0.3124893605709076, + "step": 3530 + }, + { + "epoch": 0.9377240738281769, + "grad_norm": 1.1043040217194096, + "learning_rate": 1.1853106633738174e-05, + "loss": 0.28317195177078247, + "step": 3531 + }, + { + "epoch": 0.9379896428097199, + "grad_norm": 1.0483798154842934, + "learning_rate": 1.1848791737945823e-05, + "loss": 0.27804574370384216, + "step": 3532 + }, + { + "epoch": 0.9382552117912628, + "grad_norm": 1.1007797171562173, + "learning_rate": 1.1844476485728236e-05, + "loss": 0.24936731159687042, + "step": 3533 + }, + { + "epoch": 0.9385207807728058, + "grad_norm": 1.16922301793574, + "learning_rate": 1.1840160877917335e-05, + "loss": 0.296974778175354, + "step": 3534 + }, + { + "epoch": 0.9387863497543487, + "grad_norm": 1.1172266681075624, + "learning_rate": 1.1835844915345117e-05, + "loss": 0.3048890233039856, + "step": 3535 + }, + { + "epoch": 0.9390519187358917, + "grad_norm": 1.0372698095624082, + "learning_rate": 1.1831528598843654e-05, + "loss": 0.2703601121902466, + "step": 3536 + }, + { + "epoch": 0.9393174877174346, + "grad_norm": 1.123009081238491, + "learning_rate": 1.1827211929245075e-05, + "loss": 0.30738013982772827, + "step": 3537 + }, + { + "epoch": 0.9395830566989776, + "grad_norm": 1.0660333251952498, + "learning_rate": 1.1822894907381589e-05, + "loss": 0.26538529992103577, + "step": 3538 + }, + { + "epoch": 0.9398486256805205, + "grad_norm": 1.1050453871275616, + "learning_rate": 1.1818577534085462e-05, + "loss": 0.26795464754104614, + "step": 3539 + }, + { + "epoch": 0.9401141946620635, + "grad_norm": 1.1533311536850575, + "learning_rate": 1.1814259810189034e-05, + "loss": 0.30891868472099304, + "step": 3540 + }, + { + "epoch": 0.9403797636436064, + "grad_norm": 1.8167204702159565, + "learning_rate": 1.1809941736524713e-05, + "loss": 0.29164037108421326, + "step": 3541 + }, + { + "epoch": 0.9406453326251494, + "grad_norm": 1.0875424396631934, + "learning_rate": 1.180562331392497e-05, + "loss": 0.30322739481925964, + "step": 3542 + }, + { + "epoch": 0.9409109016066923, + "grad_norm": 1.0765622649066557, + "learning_rate": 1.1801304543222349e-05, + "loss": 0.275432288646698, + "step": 3543 + }, + { + "epoch": 0.9411764705882353, + "grad_norm": 1.1566847425916267, + "learning_rate": 1.1796985425249459e-05, + "loss": 0.2788141965866089, + "step": 3544 + }, + { + "epoch": 0.9414420395697782, + "grad_norm": 1.203313197377309, + "learning_rate": 1.1792665960838967e-05, + "loss": 0.24254676699638367, + "step": 3545 + }, + { + "epoch": 0.9417076085513212, + "grad_norm": 1.1050026210111878, + "learning_rate": 1.1788346150823625e-05, + "loss": 0.2803058326244354, + "step": 3546 + }, + { + "epoch": 0.9419731775328641, + "grad_norm": 1.0993090963339842, + "learning_rate": 1.1784025996036232e-05, + "loss": 0.3068317174911499, + "step": 3547 + }, + { + "epoch": 0.9422387465144071, + "grad_norm": 0.9977731134117688, + "learning_rate": 1.1779705497309673e-05, + "loss": 0.23124024271965027, + "step": 3548 + }, + { + "epoch": 0.94250431549595, + "grad_norm": 1.080710306089679, + "learning_rate": 1.177538465547688e-05, + "loss": 0.2815462648868561, + "step": 3549 + }, + { + "epoch": 0.942769884477493, + "grad_norm": 1.1118952137889662, + "learning_rate": 1.1771063471370862e-05, + "loss": 0.29448196291923523, + "step": 3550 + }, + { + "epoch": 0.9430354534590359, + "grad_norm": 1.2691077751501818, + "learning_rate": 1.1766741945824698e-05, + "loss": 0.3176615834236145, + "step": 3551 + }, + { + "epoch": 0.9433010224405789, + "grad_norm": 1.1390071879475103, + "learning_rate": 1.1762420079671527e-05, + "loss": 0.29126274585723877, + "step": 3552 + }, + { + "epoch": 0.9435665914221218, + "grad_norm": 1.084504171285626, + "learning_rate": 1.1758097873744547e-05, + "loss": 0.27074337005615234, + "step": 3553 + }, + { + "epoch": 0.9438321604036648, + "grad_norm": 1.0495499557301764, + "learning_rate": 1.175377532887703e-05, + "loss": 0.2756083011627197, + "step": 3554 + }, + { + "epoch": 0.9440977293852079, + "grad_norm": 1.1028881447166687, + "learning_rate": 1.1749452445902315e-05, + "loss": 0.26918384432792664, + "step": 3555 + }, + { + "epoch": 0.9443632983667508, + "grad_norm": 1.0856468025535497, + "learning_rate": 1.17451292256538e-05, + "loss": 0.2550349235534668, + "step": 3556 + }, + { + "epoch": 0.9446288673482938, + "grad_norm": 1.0791996633460945, + "learning_rate": 1.1740805668964954e-05, + "loss": 0.2601481080055237, + "step": 3557 + }, + { + "epoch": 0.9448944363298367, + "grad_norm": 1.1367109564667788, + "learning_rate": 1.1736481776669307e-05, + "loss": 0.2848352789878845, + "step": 3558 + }, + { + "epoch": 0.9451600053113797, + "grad_norm": 1.1168278064757895, + "learning_rate": 1.173215754960045e-05, + "loss": 0.266584575176239, + "step": 3559 + }, + { + "epoch": 0.9454255742929226, + "grad_norm": 0.9979692557530664, + "learning_rate": 1.172783298859205e-05, + "loss": 0.25037410855293274, + "step": 3560 + }, + { + "epoch": 0.9456911432744656, + "grad_norm": 1.1049326363207628, + "learning_rate": 1.1723508094477825e-05, + "loss": 0.30239278078079224, + "step": 3561 + }, + { + "epoch": 0.9459567122560085, + "grad_norm": 1.0413977608943958, + "learning_rate": 1.1719182868091567e-05, + "loss": 0.2893553078174591, + "step": 3562 + }, + { + "epoch": 0.9462222812375515, + "grad_norm": 1.215187947788902, + "learning_rate": 1.1714857310267124e-05, + "loss": 0.2840202748775482, + "step": 3563 + }, + { + "epoch": 0.9464878502190944, + "grad_norm": 1.0615180068139964, + "learning_rate": 1.1710531421838422e-05, + "loss": 0.2614031732082367, + "step": 3564 + }, + { + "epoch": 0.9467534192006374, + "grad_norm": 1.0290230331800772, + "learning_rate": 1.1706205203639433e-05, + "loss": 0.267095148563385, + "step": 3565 + }, + { + "epoch": 0.9470189881821803, + "grad_norm": 1.2397291626994196, + "learning_rate": 1.1701878656504206e-05, + "loss": 0.25835227966308594, + "step": 3566 + }, + { + "epoch": 0.9472845571637233, + "grad_norm": 1.1319162410146095, + "learning_rate": 1.1697551781266845e-05, + "loss": 0.27547580003738403, + "step": 3567 + }, + { + "epoch": 0.9475501261452662, + "grad_norm": 1.089656044815204, + "learning_rate": 1.169322457876152e-05, + "loss": 0.251165509223938, + "step": 3568 + }, + { + "epoch": 0.9478156951268092, + "grad_norm": 1.2350323802819905, + "learning_rate": 1.1688897049822467e-05, + "loss": 0.2738516926765442, + "step": 3569 + }, + { + "epoch": 0.9480812641083521, + "grad_norm": 1.0315369616879289, + "learning_rate": 1.1684569195283981e-05, + "loss": 0.2745274305343628, + "step": 3570 + }, + { + "epoch": 0.9483468330898951, + "grad_norm": 1.180099592022995, + "learning_rate": 1.1680241015980423e-05, + "loss": 0.28586819767951965, + "step": 3571 + }, + { + "epoch": 0.948612402071438, + "grad_norm": 1.2233918967574897, + "learning_rate": 1.167591251274621e-05, + "loss": 0.2559577524662018, + "step": 3572 + }, + { + "epoch": 0.948877971052981, + "grad_norm": 1.155824963337958, + "learning_rate": 1.1671583686415833e-05, + "loss": 0.26069143414497375, + "step": 3573 + }, + { + "epoch": 0.949143540034524, + "grad_norm": 1.078529730225554, + "learning_rate": 1.1667254537823838e-05, + "loss": 0.26866453886032104, + "step": 3574 + }, + { + "epoch": 0.9494091090160669, + "grad_norm": 1.0772599867154102, + "learning_rate": 1.166292506780483e-05, + "loss": 0.25285348296165466, + "step": 3575 + }, + { + "epoch": 0.9496746779976099, + "grad_norm": 1.1335172942215501, + "learning_rate": 1.1658595277193479e-05, + "loss": 0.3330434262752533, + "step": 3576 + }, + { + "epoch": 0.9499402469791528, + "grad_norm": 1.076438251163932, + "learning_rate": 1.1654265166824522e-05, + "loss": 0.2789473533630371, + "step": 3577 + }, + { + "epoch": 0.9502058159606958, + "grad_norm": 1.2746037306212283, + "learning_rate": 1.164993473753275e-05, + "loss": 0.30984824895858765, + "step": 3578 + }, + { + "epoch": 0.9504713849422387, + "grad_norm": 1.0517088315750878, + "learning_rate": 1.164560399015302e-05, + "loss": 0.23881833255290985, + "step": 3579 + }, + { + "epoch": 0.9507369539237817, + "grad_norm": 1.1012484750770577, + "learning_rate": 1.164127292552025e-05, + "loss": 0.3027937114238739, + "step": 3580 + }, + { + "epoch": 0.9510025229053246, + "grad_norm": 1.1998484228117954, + "learning_rate": 1.1636941544469413e-05, + "loss": 0.2901906371116638, + "step": 3581 + }, + { + "epoch": 0.9512680918868676, + "grad_norm": 1.069491787313744, + "learning_rate": 1.1632609847835556e-05, + "loss": 0.28961148858070374, + "step": 3582 + }, + { + "epoch": 0.9515336608684106, + "grad_norm": 1.0782542825887276, + "learning_rate": 1.1628277836453774e-05, + "loss": 0.2730783224105835, + "step": 3583 + }, + { + "epoch": 0.9517992298499536, + "grad_norm": 1.0952017771476839, + "learning_rate": 1.1623945511159232e-05, + "loss": 0.3195485770702362, + "step": 3584 + }, + { + "epoch": 0.9520647988314965, + "grad_norm": 1.1514370971708257, + "learning_rate": 1.1619612872787144e-05, + "loss": 0.3097516894340515, + "step": 3585 + }, + { + "epoch": 0.9523303678130395, + "grad_norm": 1.0422990071728377, + "learning_rate": 1.1615279922172796e-05, + "loss": 0.2716284692287445, + "step": 3586 + }, + { + "epoch": 0.9525959367945824, + "grad_norm": 0.9669355988334725, + "learning_rate": 1.1610946660151531e-05, + "loss": 0.2601209878921509, + "step": 3587 + }, + { + "epoch": 0.9528615057761254, + "grad_norm": 1.1027425019898653, + "learning_rate": 1.1606613087558748e-05, + "loss": 0.28665289282798767, + "step": 3588 + }, + { + "epoch": 0.9531270747576683, + "grad_norm": 1.082078861677668, + "learning_rate": 1.1602279205229912e-05, + "loss": 0.3019893765449524, + "step": 3589 + }, + { + "epoch": 0.9533926437392113, + "grad_norm": 0.9778282797717269, + "learning_rate": 1.1597945014000537e-05, + "loss": 0.2635146677494049, + "step": 3590 + }, + { + "epoch": 0.9536582127207542, + "grad_norm": 1.0527782897227813, + "learning_rate": 1.1593610514706217e-05, + "loss": 0.2704858183860779, + "step": 3591 + }, + { + "epoch": 0.9539237817022972, + "grad_norm": 1.2295509988273574, + "learning_rate": 1.1589275708182581e-05, + "loss": 0.31997931003570557, + "step": 3592 + }, + { + "epoch": 0.9541893506838401, + "grad_norm": 1.1529907760165448, + "learning_rate": 1.1584940595265332e-05, + "loss": 0.2308788150548935, + "step": 3593 + }, + { + "epoch": 0.9544549196653831, + "grad_norm": 1.0980235303762964, + "learning_rate": 1.1580605176790229e-05, + "loss": 0.28886470198631287, + "step": 3594 + }, + { + "epoch": 0.954720488646926, + "grad_norm": 1.313883667721807, + "learning_rate": 1.157626945359309e-05, + "loss": 0.30698686838150024, + "step": 3595 + }, + { + "epoch": 0.954986057628469, + "grad_norm": 1.1087251273709688, + "learning_rate": 1.1571933426509789e-05, + "loss": 0.27475905418395996, + "step": 3596 + }, + { + "epoch": 0.955251626610012, + "grad_norm": 1.1064883207545173, + "learning_rate": 1.1567597096376264e-05, + "loss": 0.2568071484565735, + "step": 3597 + }, + { + "epoch": 0.9555171955915549, + "grad_norm": 1.28706485993144, + "learning_rate": 1.1563260464028507e-05, + "loss": 0.2574060261249542, + "step": 3598 + }, + { + "epoch": 0.9557827645730979, + "grad_norm": 1.193494963897618, + "learning_rate": 1.1558923530302571e-05, + "loss": 0.2847997546195984, + "step": 3599 + }, + { + "epoch": 0.9560483335546408, + "grad_norm": 1.0723094070831873, + "learning_rate": 1.155458629603456e-05, + "loss": 0.2594734728336334, + "step": 3600 + }, + { + "epoch": 0.9563139025361838, + "grad_norm": 1.0020160427681732, + "learning_rate": 1.155024876206065e-05, + "loss": 0.2300589680671692, + "step": 3601 + }, + { + "epoch": 0.9565794715177267, + "grad_norm": 1.1475438454718678, + "learning_rate": 1.1545910929217059e-05, + "loss": 0.29174795746803284, + "step": 3602 + }, + { + "epoch": 0.9568450404992697, + "grad_norm": 1.0425930414114217, + "learning_rate": 1.1541572798340076e-05, + "loss": 0.2666400074958801, + "step": 3603 + }, + { + "epoch": 0.9571106094808126, + "grad_norm": 1.0067559469755134, + "learning_rate": 1.1537234370266035e-05, + "loss": 0.24651308357715607, + "step": 3604 + }, + { + "epoch": 0.9573761784623556, + "grad_norm": 1.1542471481522265, + "learning_rate": 1.1532895645831339e-05, + "loss": 0.29991376399993896, + "step": 3605 + }, + { + "epoch": 0.9576417474438985, + "grad_norm": 1.0631305192934537, + "learning_rate": 1.1528556625872443e-05, + "loss": 0.27713578939437866, + "step": 3606 + }, + { + "epoch": 0.9579073164254415, + "grad_norm": 1.0497999275546905, + "learning_rate": 1.1524217311225857e-05, + "loss": 0.26503294706344604, + "step": 3607 + }, + { + "epoch": 0.9581728854069844, + "grad_norm": 1.1479000180189152, + "learning_rate": 1.1519877702728149e-05, + "loss": 0.28627675771713257, + "step": 3608 + }, + { + "epoch": 0.9584384543885274, + "grad_norm": 1.0333891142616893, + "learning_rate": 1.1515537801215944e-05, + "loss": 0.26862916350364685, + "step": 3609 + }, + { + "epoch": 0.9587040233700703, + "grad_norm": 1.2518522451268181, + "learning_rate": 1.1511197607525926e-05, + "loss": 0.29697147011756897, + "step": 3610 + }, + { + "epoch": 0.9589695923516134, + "grad_norm": 1.0668919106736792, + "learning_rate": 1.1506857122494832e-05, + "loss": 0.2980155944824219, + "step": 3611 + }, + { + "epoch": 0.9592351613331563, + "grad_norm": 1.1016644329026075, + "learning_rate": 1.1502516346959458e-05, + "loss": 0.2847440838813782, + "step": 3612 + }, + { + "epoch": 0.9595007303146993, + "grad_norm": 1.1131533712076647, + "learning_rate": 1.149817528175665e-05, + "loss": 0.2812016010284424, + "step": 3613 + }, + { + "epoch": 0.9597662992962422, + "grad_norm": 1.0387818826049915, + "learning_rate": 1.1493833927723319e-05, + "loss": 0.26856982707977295, + "step": 3614 + }, + { + "epoch": 0.9600318682777852, + "grad_norm": 1.0595715138301371, + "learning_rate": 1.1489492285696424e-05, + "loss": 0.2651693820953369, + "step": 3615 + }, + { + "epoch": 0.9602974372593281, + "grad_norm": 1.1384265947297394, + "learning_rate": 1.1485150356512986e-05, + "loss": 0.29811644554138184, + "step": 3616 + }, + { + "epoch": 0.9605630062408711, + "grad_norm": 1.0449713925688802, + "learning_rate": 1.1480808141010071e-05, + "loss": 0.2622855007648468, + "step": 3617 + }, + { + "epoch": 0.960828575222414, + "grad_norm": 1.1964334046740135, + "learning_rate": 1.1476465640024814e-05, + "loss": 0.3067246377468109, + "step": 3618 + }, + { + "epoch": 0.961094144203957, + "grad_norm": 1.0999678942020576, + "learning_rate": 1.1472122854394394e-05, + "loss": 0.25928011536598206, + "step": 3619 + }, + { + "epoch": 0.9613597131855, + "grad_norm": 1.0356853160291564, + "learning_rate": 1.146777978495605e-05, + "loss": 0.2574170231819153, + "step": 3620 + }, + { + "epoch": 0.9616252821670429, + "grad_norm": 1.1366453776894136, + "learning_rate": 1.1463436432547073e-05, + "loss": 0.2845388650894165, + "step": 3621 + }, + { + "epoch": 0.9618908511485859, + "grad_norm": 1.1067131961561003, + "learning_rate": 1.145909279800481e-05, + "loss": 0.28735876083374023, + "step": 3622 + }, + { + "epoch": 0.9621564201301288, + "grad_norm": 1.100639151702203, + "learning_rate": 1.1454748882166666e-05, + "loss": 0.25739723443984985, + "step": 3623 + }, + { + "epoch": 0.9624219891116718, + "grad_norm": 1.0743852778260963, + "learning_rate": 1.1450404685870098e-05, + "loss": 0.25144338607788086, + "step": 3624 + }, + { + "epoch": 0.9626875580932147, + "grad_norm": 1.0451944769292063, + "learning_rate": 1.144606020995261e-05, + "loss": 0.23981891572475433, + "step": 3625 + }, + { + "epoch": 0.9629531270747577, + "grad_norm": 1.1215387475511582, + "learning_rate": 1.1441715455251764e-05, + "loss": 0.30925339460372925, + "step": 3626 + }, + { + "epoch": 0.9632186960563006, + "grad_norm": 1.1193965021491372, + "learning_rate": 1.1437370422605184e-05, + "loss": 0.2559184432029724, + "step": 3627 + }, + { + "epoch": 0.9634842650378436, + "grad_norm": 1.221260182162867, + "learning_rate": 1.1433025112850542e-05, + "loss": 0.3001229166984558, + "step": 3628 + }, + { + "epoch": 0.9637498340193865, + "grad_norm": 0.9957913669659347, + "learning_rate": 1.1428679526825557e-05, + "loss": 0.24304218590259552, + "step": 3629 + }, + { + "epoch": 0.9640154030009295, + "grad_norm": 1.0405086595778643, + "learning_rate": 1.1424333665368011e-05, + "loss": 0.25677186250686646, + "step": 3630 + }, + { + "epoch": 0.9642809719824724, + "grad_norm": 1.0362119568252992, + "learning_rate": 1.141998752931573e-05, + "loss": 0.2589085102081299, + "step": 3631 + }, + { + "epoch": 0.9645465409640154, + "grad_norm": 1.1004952842028541, + "learning_rate": 1.1415641119506601e-05, + "loss": 0.2588059604167938, + "step": 3632 + }, + { + "epoch": 0.9648121099455583, + "grad_norm": 1.1379378571012249, + "learning_rate": 1.1411294436778562e-05, + "loss": 0.26097869873046875, + "step": 3633 + }, + { + "epoch": 0.9650776789271013, + "grad_norm": 1.2218308438631786, + "learning_rate": 1.1406947481969598e-05, + "loss": 0.26022520661354065, + "step": 3634 + }, + { + "epoch": 0.9653432479086442, + "grad_norm": 1.0737420773814035, + "learning_rate": 1.140260025591775e-05, + "loss": 0.26242876052856445, + "step": 3635 + }, + { + "epoch": 0.9656088168901872, + "grad_norm": 1.1396910340144906, + "learning_rate": 1.1398252759461119e-05, + "loss": 0.30035555362701416, + "step": 3636 + }, + { + "epoch": 0.9658743858717301, + "grad_norm": 1.1365210980452296, + "learning_rate": 1.1393904993437848e-05, + "loss": 0.26388341188430786, + "step": 3637 + }, + { + "epoch": 0.9661399548532731, + "grad_norm": 1.06242333907382, + "learning_rate": 1.1389556958686132e-05, + "loss": 0.28116434812545776, + "step": 3638 + }, + { + "epoch": 0.966405523834816, + "grad_norm": 1.0513966621960738, + "learning_rate": 1.1385208656044222e-05, + "loss": 0.25372493267059326, + "step": 3639 + }, + { + "epoch": 0.9666710928163591, + "grad_norm": 1.1171784181414381, + "learning_rate": 1.1380860086350422e-05, + "loss": 0.2648317813873291, + "step": 3640 + }, + { + "epoch": 0.966936661797902, + "grad_norm": 1.0508956007113521, + "learning_rate": 1.1376511250443082e-05, + "loss": 0.26981276273727417, + "step": 3641 + }, + { + "epoch": 0.967202230779445, + "grad_norm": 1.1513465918880585, + "learning_rate": 1.1372162149160608e-05, + "loss": 0.2934207618236542, + "step": 3642 + }, + { + "epoch": 0.967467799760988, + "grad_norm": 0.9705407845284122, + "learning_rate": 1.1367812783341454e-05, + "loss": 0.24250900745391846, + "step": 3643 + }, + { + "epoch": 0.9677333687425309, + "grad_norm": 1.0409007473472116, + "learning_rate": 1.1363463153824125e-05, + "loss": 0.2565772235393524, + "step": 3644 + }, + { + "epoch": 0.9679989377240739, + "grad_norm": 1.2386980142351325, + "learning_rate": 1.1359113261447183e-05, + "loss": 0.28407829999923706, + "step": 3645 + }, + { + "epoch": 0.9682645067056168, + "grad_norm": 1.1134220293120092, + "learning_rate": 1.1354763107049234e-05, + "loss": 0.2974489629268646, + "step": 3646 + }, + { + "epoch": 0.9685300756871598, + "grad_norm": 1.1611486704366027, + "learning_rate": 1.1350412691468935e-05, + "loss": 0.27539899945259094, + "step": 3647 + }, + { + "epoch": 0.9687956446687027, + "grad_norm": 1.1777496863563888, + "learning_rate": 1.1346062015544997e-05, + "loss": 0.28256523609161377, + "step": 3648 + }, + { + "epoch": 0.9690612136502457, + "grad_norm": 1.0910813538672366, + "learning_rate": 1.1341711080116176e-05, + "loss": 0.27582883834838867, + "step": 3649 + }, + { + "epoch": 0.9693267826317886, + "grad_norm": 1.2299419127493794, + "learning_rate": 1.1337359886021285e-05, + "loss": 0.3199389576911926, + "step": 3650 + }, + { + "epoch": 0.9695923516133316, + "grad_norm": 1.078226808322517, + "learning_rate": 1.1333008434099178e-05, + "loss": 0.2922326922416687, + "step": 3651 + }, + { + "epoch": 0.9698579205948745, + "grad_norm": 1.1833154338367669, + "learning_rate": 1.1328656725188767e-05, + "loss": 0.285635381937027, + "step": 3652 + }, + { + "epoch": 0.9701234895764175, + "grad_norm": 1.1606724829825772, + "learning_rate": 1.1324304760129009e-05, + "loss": 0.3347492814064026, + "step": 3653 + }, + { + "epoch": 0.9703890585579604, + "grad_norm": 1.1079831575977723, + "learning_rate": 1.1319952539758912e-05, + "loss": 0.27379873394966125, + "step": 3654 + }, + { + "epoch": 0.9706546275395034, + "grad_norm": 1.2487680540467303, + "learning_rate": 1.1315600064917534e-05, + "loss": 0.27911311388015747, + "step": 3655 + }, + { + "epoch": 0.9709201965210463, + "grad_norm": 1.187492816658345, + "learning_rate": 1.1311247336443982e-05, + "loss": 0.25750118494033813, + "step": 3656 + }, + { + "epoch": 0.9711857655025893, + "grad_norm": 1.1010343448161526, + "learning_rate": 1.1306894355177405e-05, + "loss": 0.28723078966140747, + "step": 3657 + }, + { + "epoch": 0.9714513344841322, + "grad_norm": 1.0378840795289885, + "learning_rate": 1.1302541121957008e-05, + "loss": 0.25269389152526855, + "step": 3658 + }, + { + "epoch": 0.9717169034656752, + "grad_norm": 1.1923604766845932, + "learning_rate": 1.1298187637622046e-05, + "loss": 0.3041607439517975, + "step": 3659 + }, + { + "epoch": 0.9719824724472181, + "grad_norm": 1.0812687625707742, + "learning_rate": 1.1293833903011819e-05, + "loss": 0.2826605439186096, + "step": 3660 + }, + { + "epoch": 0.9722480414287611, + "grad_norm": 1.1010565715724137, + "learning_rate": 1.1289479918965675e-05, + "loss": 0.2830520570278168, + "step": 3661 + }, + { + "epoch": 0.972513610410304, + "grad_norm": 1.0160541896764337, + "learning_rate": 1.1285125686323011e-05, + "loss": 0.24295952916145325, + "step": 3662 + }, + { + "epoch": 0.972779179391847, + "grad_norm": 1.108181435484162, + "learning_rate": 1.1280771205923269e-05, + "loss": 0.28775808215141296, + "step": 3663 + }, + { + "epoch": 0.97304474837339, + "grad_norm": 0.9715417125511246, + "learning_rate": 1.127641647860595e-05, + "loss": 0.24650296568870544, + "step": 3664 + }, + { + "epoch": 0.9733103173549329, + "grad_norm": 0.9305293200248026, + "learning_rate": 1.1272061505210584e-05, + "loss": 0.22344040870666504, + "step": 3665 + }, + { + "epoch": 0.9735758863364758, + "grad_norm": 1.0859092127038839, + "learning_rate": 1.1267706286576759e-05, + "loss": 0.26920852065086365, + "step": 3666 + }, + { + "epoch": 0.9738414553180188, + "grad_norm": 1.1792674236289236, + "learning_rate": 1.1263350823544115e-05, + "loss": 0.27615875005722046, + "step": 3667 + }, + { + "epoch": 0.9741070242995619, + "grad_norm": 1.0470064037587914, + "learning_rate": 1.1258995116952334e-05, + "loss": 0.2768712043762207, + "step": 3668 + }, + { + "epoch": 0.9743725932811048, + "grad_norm": 1.0568329464095596, + "learning_rate": 1.1254639167641141e-05, + "loss": 0.27764153480529785, + "step": 3669 + }, + { + "epoch": 0.9746381622626478, + "grad_norm": 1.139437307258024, + "learning_rate": 1.1250282976450316e-05, + "loss": 0.27423611283302307, + "step": 3670 + }, + { + "epoch": 0.9749037312441907, + "grad_norm": 1.1238013222894891, + "learning_rate": 1.1245926544219676e-05, + "loss": 0.2626228332519531, + "step": 3671 + }, + { + "epoch": 0.9751693002257337, + "grad_norm": 1.2807555997920204, + "learning_rate": 1.1241569871789096e-05, + "loss": 0.25524014234542847, + "step": 3672 + }, + { + "epoch": 0.9754348692072766, + "grad_norm": 1.1042234540757712, + "learning_rate": 1.1237212959998485e-05, + "loss": 0.30857735872268677, + "step": 3673 + }, + { + "epoch": 0.9757004381888196, + "grad_norm": 1.0235359310129009, + "learning_rate": 1.1232855809687807e-05, + "loss": 0.25099021196365356, + "step": 3674 + }, + { + "epoch": 0.9759660071703625, + "grad_norm": 1.0116202981123898, + "learning_rate": 1.1228498421697068e-05, + "loss": 0.22664576768875122, + "step": 3675 + }, + { + "epoch": 0.9762315761519055, + "grad_norm": 1.151038777130998, + "learning_rate": 1.1224140796866322e-05, + "loss": 0.24727366864681244, + "step": 3676 + }, + { + "epoch": 0.9764971451334484, + "grad_norm": 1.160849411640656, + "learning_rate": 1.121978293603567e-05, + "loss": 0.2561935782432556, + "step": 3677 + }, + { + "epoch": 0.9767627141149914, + "grad_norm": 1.10648815955184, + "learning_rate": 1.1215424840045254e-05, + "loss": 0.2594214677810669, + "step": 3678 + }, + { + "epoch": 0.9770282830965343, + "grad_norm": 1.130419852826836, + "learning_rate": 1.1211066509735265e-05, + "loss": 0.2383778691291809, + "step": 3679 + }, + { + "epoch": 0.9772938520780773, + "grad_norm": 1.2393377504128167, + "learning_rate": 1.1206707945945934e-05, + "loss": 0.2864387035369873, + "step": 3680 + }, + { + "epoch": 0.9775594210596202, + "grad_norm": 1.2012269867709167, + "learning_rate": 1.1202349149517541e-05, + "loss": 0.30415672063827515, + "step": 3681 + }, + { + "epoch": 0.9778249900411632, + "grad_norm": 1.1590063847406842, + "learning_rate": 1.1197990121290415e-05, + "loss": 0.3030807375907898, + "step": 3682 + }, + { + "epoch": 0.9780905590227061, + "grad_norm": 1.1251124481371277, + "learning_rate": 1.1193630862104922e-05, + "loss": 0.2518938481807709, + "step": 3683 + }, + { + "epoch": 0.9783561280042491, + "grad_norm": 1.2096921428918863, + "learning_rate": 1.1189271372801474e-05, + "loss": 0.25353187322616577, + "step": 3684 + }, + { + "epoch": 0.978621696985792, + "grad_norm": 1.401372369430627, + "learning_rate": 1.1184911654220534e-05, + "loss": 0.30639684200286865, + "step": 3685 + }, + { + "epoch": 0.978887265967335, + "grad_norm": 1.1636733460077495, + "learning_rate": 1.1180551707202602e-05, + "loss": 0.295099139213562, + "step": 3686 + }, + { + "epoch": 0.979152834948878, + "grad_norm": 1.0596592048702305, + "learning_rate": 1.1176191532588224e-05, + "loss": 0.2428167164325714, + "step": 3687 + }, + { + "epoch": 0.9794184039304209, + "grad_norm": 1.0401088292404943, + "learning_rate": 1.1171831131217989e-05, + "loss": 0.2716362774372101, + "step": 3688 + }, + { + "epoch": 0.9796839729119639, + "grad_norm": 1.1130709970940986, + "learning_rate": 1.1167470503932534e-05, + "loss": 0.28350287675857544, + "step": 3689 + }, + { + "epoch": 0.9799495418935068, + "grad_norm": 1.0214004744947676, + "learning_rate": 1.1163109651572535e-05, + "loss": 0.2776945233345032, + "step": 3690 + }, + { + "epoch": 0.9802151108750498, + "grad_norm": 1.041237294346951, + "learning_rate": 1.115874857497871e-05, + "loss": 0.2712942063808441, + "step": 3691 + }, + { + "epoch": 0.9804806798565927, + "grad_norm": 1.058232702389033, + "learning_rate": 1.1154387274991829e-05, + "loss": 0.2530008852481842, + "step": 3692 + }, + { + "epoch": 0.9807462488381357, + "grad_norm": 1.0327043619893976, + "learning_rate": 1.1150025752452693e-05, + "loss": 0.24889500439167023, + "step": 3693 + }, + { + "epoch": 0.9810118178196786, + "grad_norm": 1.1013842404358833, + "learning_rate": 1.1145664008202158e-05, + "loss": 0.3051255941390991, + "step": 3694 + }, + { + "epoch": 0.9812773868012216, + "grad_norm": 1.0503003262830894, + "learning_rate": 1.1141302043081112e-05, + "loss": 0.24781765043735504, + "step": 3695 + }, + { + "epoch": 0.9815429557827646, + "grad_norm": 1.2510153019418302, + "learning_rate": 1.1136939857930497e-05, + "loss": 0.3021858036518097, + "step": 3696 + }, + { + "epoch": 0.9818085247643076, + "grad_norm": 1.1052947984569603, + "learning_rate": 1.1132577453591284e-05, + "loss": 0.3026372194290161, + "step": 3697 + }, + { + "epoch": 0.9820740937458505, + "grad_norm": 1.2367828155450835, + "learning_rate": 1.1128214830904494e-05, + "loss": 0.31511861085891724, + "step": 3698 + }, + { + "epoch": 0.9823396627273935, + "grad_norm": 1.076549494496895, + "learning_rate": 1.112385199071119e-05, + "loss": 0.27885258197784424, + "step": 3699 + }, + { + "epoch": 0.9826052317089364, + "grad_norm": 1.0546536629749794, + "learning_rate": 1.1119488933852477e-05, + "loss": 0.2724893391132355, + "step": 3700 + }, + { + "epoch": 0.9828708006904794, + "grad_norm": 1.0683428715266594, + "learning_rate": 1.1115125661169503e-05, + "loss": 0.2836218774318695, + "step": 3701 + }, + { + "epoch": 0.9831363696720223, + "grad_norm": 1.1039385208642913, + "learning_rate": 1.111076217350345e-05, + "loss": 0.24220457673072815, + "step": 3702 + }, + { + "epoch": 0.9834019386535653, + "grad_norm": 1.1586770288767172, + "learning_rate": 1.1106398471695554e-05, + "loss": 0.28599557280540466, + "step": 3703 + }, + { + "epoch": 0.9836675076351082, + "grad_norm": 1.0806945340822165, + "learning_rate": 1.110203455658708e-05, + "loss": 0.30559849739074707, + "step": 3704 + }, + { + "epoch": 0.9839330766166512, + "grad_norm": 1.0573640293446354, + "learning_rate": 1.109767042901934e-05, + "loss": 0.2763117551803589, + "step": 3705 + }, + { + "epoch": 0.9841986455981941, + "grad_norm": 0.9563131800944344, + "learning_rate": 1.109330608983369e-05, + "loss": 0.2028101086616516, + "step": 3706 + }, + { + "epoch": 0.9844642145797371, + "grad_norm": 0.9787835815750591, + "learning_rate": 1.1088941539871515e-05, + "loss": 0.25386112928390503, + "step": 3707 + }, + { + "epoch": 0.98472978356128, + "grad_norm": 1.075996733851366, + "learning_rate": 1.1084576779974257e-05, + "loss": 0.2588289976119995, + "step": 3708 + }, + { + "epoch": 0.984995352542823, + "grad_norm": 1.3003014971272602, + "learning_rate": 1.1080211810983385e-05, + "loss": 0.3201071321964264, + "step": 3709 + }, + { + "epoch": 0.985260921524366, + "grad_norm": 1.2030478206249715, + "learning_rate": 1.107584663374042e-05, + "loss": 0.28439003229141235, + "step": 3710 + }, + { + "epoch": 0.9855264905059089, + "grad_norm": 1.060347062251152, + "learning_rate": 1.1071481249086908e-05, + "loss": 0.2734091579914093, + "step": 3711 + }, + { + "epoch": 0.9857920594874519, + "grad_norm": 1.2115603819692051, + "learning_rate": 1.1067115657864451e-05, + "loss": 0.2917581796646118, + "step": 3712 + }, + { + "epoch": 0.9860576284689948, + "grad_norm": 1.2063997459644484, + "learning_rate": 1.1062749860914681e-05, + "loss": 0.3569914996623993, + "step": 3713 + }, + { + "epoch": 0.9863231974505378, + "grad_norm": 1.127711451799425, + "learning_rate": 1.1058383859079271e-05, + "loss": 0.2574514150619507, + "step": 3714 + }, + { + "epoch": 0.9865887664320807, + "grad_norm": 1.119813552337215, + "learning_rate": 1.1054017653199936e-05, + "loss": 0.3035826086997986, + "step": 3715 + }, + { + "epoch": 0.9868543354136237, + "grad_norm": 1.5863085854725767, + "learning_rate": 1.1049651244118424e-05, + "loss": 0.28067824244499207, + "step": 3716 + }, + { + "epoch": 0.9871199043951666, + "grad_norm": 1.0916600834300794, + "learning_rate": 1.1045284632676535e-05, + "loss": 0.2511579394340515, + "step": 3717 + }, + { + "epoch": 0.9873854733767096, + "grad_norm": 1.2657546371764674, + "learning_rate": 1.1040917819716097e-05, + "loss": 0.3059889078140259, + "step": 3718 + }, + { + "epoch": 0.9876510423582525, + "grad_norm": 1.1224253435238671, + "learning_rate": 1.103655080607898e-05, + "loss": 0.2642200291156769, + "step": 3719 + }, + { + "epoch": 0.9879166113397955, + "grad_norm": 1.0969568004465404, + "learning_rate": 1.1032183592607094e-05, + "loss": 0.2743483781814575, + "step": 3720 + }, + { + "epoch": 0.9881821803213384, + "grad_norm": 1.1317768374698567, + "learning_rate": 1.1027816180142383e-05, + "loss": 0.2597433030605316, + "step": 3721 + }, + { + "epoch": 0.9884477493028814, + "grad_norm": 1.0759312888673545, + "learning_rate": 1.1023448569526834e-05, + "loss": 0.24439337849617004, + "step": 3722 + }, + { + "epoch": 0.9887133182844243, + "grad_norm": 1.0386429343076329, + "learning_rate": 1.1019080761602473e-05, + "loss": 0.2520195245742798, + "step": 3723 + }, + { + "epoch": 0.9889788872659674, + "grad_norm": 1.0921837996926786, + "learning_rate": 1.1014712757211359e-05, + "loss": 0.2904737889766693, + "step": 3724 + }, + { + "epoch": 0.9892444562475103, + "grad_norm": 1.12008182824954, + "learning_rate": 1.1010344557195588e-05, + "loss": 0.28096869587898254, + "step": 3725 + }, + { + "epoch": 0.9895100252290533, + "grad_norm": 1.8392230806075218, + "learning_rate": 1.1005976162397309e-05, + "loss": 0.317839652299881, + "step": 3726 + }, + { + "epoch": 0.9897755942105962, + "grad_norm": 1.19381185696067, + "learning_rate": 1.100160757365869e-05, + "loss": 0.29213201999664307, + "step": 3727 + }, + { + "epoch": 0.9900411631921392, + "grad_norm": 1.215113877896921, + "learning_rate": 1.0997238791821943e-05, + "loss": 0.27034991979599, + "step": 3728 + }, + { + "epoch": 0.9903067321736821, + "grad_norm": 1.2893524723691567, + "learning_rate": 1.0992869817729317e-05, + "loss": 0.30504971742630005, + "step": 3729 + }, + { + "epoch": 0.9905723011552251, + "grad_norm": 1.109889585740049, + "learning_rate": 1.09885006522231e-05, + "loss": 0.30673110485076904, + "step": 3730 + }, + { + "epoch": 0.990837870136768, + "grad_norm": 1.0963153712692437, + "learning_rate": 1.0984131296145616e-05, + "loss": 0.27990686893463135, + "step": 3731 + }, + { + "epoch": 0.991103439118311, + "grad_norm": 1.0228240366531471, + "learning_rate": 1.0979761750339225e-05, + "loss": 0.24379019439220428, + "step": 3732 + }, + { + "epoch": 0.991369008099854, + "grad_norm": 1.1055702239918885, + "learning_rate": 1.0975392015646323e-05, + "loss": 0.30554595589637756, + "step": 3733 + }, + { + "epoch": 0.9916345770813969, + "grad_norm": 1.062606047652276, + "learning_rate": 1.0971022092909342e-05, + "loss": 0.245269775390625, + "step": 3734 + }, + { + "epoch": 0.9919001460629399, + "grad_norm": 1.0977829197687445, + "learning_rate": 1.0966651982970757e-05, + "loss": 0.2732948064804077, + "step": 3735 + }, + { + "epoch": 0.9921657150444828, + "grad_norm": 0.992060831416128, + "learning_rate": 1.0962281686673071e-05, + "loss": 0.25989004969596863, + "step": 3736 + }, + { + "epoch": 0.9924312840260258, + "grad_norm": 1.1415489224758493, + "learning_rate": 1.0957911204858824e-05, + "loss": 0.32891198992729187, + "step": 3737 + }, + { + "epoch": 0.9926968530075687, + "grad_norm": 1.094277657297916, + "learning_rate": 1.0953540538370591e-05, + "loss": 0.29184675216674805, + "step": 3738 + }, + { + "epoch": 0.9929624219891117, + "grad_norm": 1.1381026162174743, + "learning_rate": 1.094916968805099e-05, + "loss": 0.2784018814563751, + "step": 3739 + }, + { + "epoch": 0.9932279909706546, + "grad_norm": 1.1670677505581852, + "learning_rate": 1.094479865474267e-05, + "loss": 0.26586195826530457, + "step": 3740 + }, + { + "epoch": 0.9934935599521976, + "grad_norm": 0.9575913416137994, + "learning_rate": 1.094042743928831e-05, + "loss": 0.24593298137187958, + "step": 3741 + }, + { + "epoch": 0.9937591289337405, + "grad_norm": 1.065966707682552, + "learning_rate": 1.0936056042530632e-05, + "loss": 0.2462792694568634, + "step": 3742 + }, + { + "epoch": 0.9940246979152835, + "grad_norm": 1.2074020558104472, + "learning_rate": 1.0931684465312388e-05, + "loss": 0.2688900828361511, + "step": 3743 + }, + { + "epoch": 0.9942902668968264, + "grad_norm": 1.099682442025033, + "learning_rate": 1.0927312708476367e-05, + "loss": 0.2842782735824585, + "step": 3744 + }, + { + "epoch": 0.9945558358783694, + "grad_norm": 1.0548829148077135, + "learning_rate": 1.0922940772865393e-05, + "loss": 0.249299556016922, + "step": 3745 + }, + { + "epoch": 0.9948214048599123, + "grad_norm": 1.175705262338143, + "learning_rate": 1.0918568659322325e-05, + "loss": 0.2765413522720337, + "step": 3746 + }, + { + "epoch": 0.9950869738414553, + "grad_norm": 1.1414819691892306, + "learning_rate": 1.0914196368690049e-05, + "loss": 0.29750365018844604, + "step": 3747 + }, + { + "epoch": 0.9953525428229982, + "grad_norm": 1.153321336461836, + "learning_rate": 1.0909823901811496e-05, + "loss": 0.25272879004478455, + "step": 3748 + }, + { + "epoch": 0.9956181118045412, + "grad_norm": 1.1906489486154657, + "learning_rate": 1.0905451259529626e-05, + "loss": 0.3056861460208893, + "step": 3749 + }, + { + "epoch": 0.9958836807860841, + "grad_norm": 1.1596775625362263, + "learning_rate": 1.090107844268743e-05, + "loss": 0.26723814010620117, + "step": 3750 + }, + { + "epoch": 0.9961492497676271, + "grad_norm": 1.167023454532776, + "learning_rate": 1.0896705452127943e-05, + "loss": 0.29998716711997986, + "step": 3751 + }, + { + "epoch": 0.9964148187491702, + "grad_norm": 1.1519689723038142, + "learning_rate": 1.0892332288694216e-05, + "loss": 0.2690891623497009, + "step": 3752 + }, + { + "epoch": 0.9966803877307131, + "grad_norm": 1.1385088428140973, + "learning_rate": 1.0887958953229349e-05, + "loss": 0.25555333495140076, + "step": 3753 + }, + { + "epoch": 0.996945956712256, + "grad_norm": 1.1617836993376212, + "learning_rate": 1.088358544657647e-05, + "loss": 0.27788421511650085, + "step": 3754 + }, + { + "epoch": 0.997211525693799, + "grad_norm": 1.0981105518173184, + "learning_rate": 1.0879211769578734e-05, + "loss": 0.2566586136817932, + "step": 3755 + }, + { + "epoch": 0.997477094675342, + "grad_norm": 1.1742409056404244, + "learning_rate": 1.0874837923079339e-05, + "loss": 0.3028980493545532, + "step": 3756 + }, + { + "epoch": 0.9977426636568849, + "grad_norm": 1.151070664269376, + "learning_rate": 1.0870463907921512e-05, + "loss": 0.30244824290275574, + "step": 3757 + }, + { + "epoch": 0.9980082326384279, + "grad_norm": 1.0175517300218122, + "learning_rate": 1.086608972494851e-05, + "loss": 0.2610962390899658, + "step": 3758 + }, + { + "epoch": 0.9982738016199708, + "grad_norm": 1.1587347636182326, + "learning_rate": 1.0861715375003623e-05, + "loss": 0.2733536660671234, + "step": 3759 + }, + { + "epoch": 0.9985393706015138, + "grad_norm": 1.094010099730521, + "learning_rate": 1.0857340858930175e-05, + "loss": 0.2915020287036896, + "step": 3760 + }, + { + "epoch": 0.9988049395830567, + "grad_norm": 1.1164899423303463, + "learning_rate": 1.085296617757152e-05, + "loss": 0.2940186560153961, + "step": 3761 + }, + { + "epoch": 0.9990705085645997, + "grad_norm": 1.1441195343158572, + "learning_rate": 1.0848591331771045e-05, + "loss": 0.3002738952636719, + "step": 3762 + }, + { + "epoch": 0.9993360775461426, + "grad_norm": 1.0530840422742196, + "learning_rate": 1.0844216322372172e-05, + "loss": 0.284588485956192, + "step": 3763 + }, + { + "epoch": 0.9996016465276856, + "grad_norm": 1.0971261053209735, + "learning_rate": 1.0839841150218347e-05, + "loss": 0.29395923018455505, + "step": 3764 + }, + { + "epoch": 0.9998672155092285, + "grad_norm": 1.1355876604442514, + "learning_rate": 1.083546581615305e-05, + "loss": 0.2574613094329834, + "step": 3765 + }, + { + "epoch": 1.0, + "grad_norm": 1.535375625820537, + "learning_rate": 1.0831090321019801e-05, + "loss": 0.177712082862854, + "step": 3766 + }, + { + "epoch": 1.000265568981543, + "grad_norm": 1.1101315935040728, + "learning_rate": 1.0826714665662139e-05, + "loss": 0.29758381843566895, + "step": 3767 + }, + { + "epoch": 1.000531137963086, + "grad_norm": 1.055973006911073, + "learning_rate": 1.0822338850923644e-05, + "loss": 0.23377545177936554, + "step": 3768 + }, + { + "epoch": 1.0007967069446289, + "grad_norm": 1.1573191222761028, + "learning_rate": 1.0817962877647911e-05, + "loss": 0.2505020797252655, + "step": 3769 + }, + { + "epoch": 1.0010622759261718, + "grad_norm": 1.0395021899779042, + "learning_rate": 1.0813586746678584e-05, + "loss": 0.26122647523880005, + "step": 3770 + }, + { + "epoch": 1.0013278449077148, + "grad_norm": 1.1508778318464672, + "learning_rate": 1.0809210458859327e-05, + "loss": 0.27962177991867065, + "step": 3771 + }, + { + "epoch": 1.0015934138892577, + "grad_norm": 1.0479777844917506, + "learning_rate": 1.080483401503384e-05, + "loss": 0.21921640634536743, + "step": 3772 + }, + { + "epoch": 1.0018589828708007, + "grad_norm": 1.1277812491041006, + "learning_rate": 1.0800457416045845e-05, + "loss": 0.24623796343803406, + "step": 3773 + }, + { + "epoch": 1.0021245518523436, + "grad_norm": 1.259401152466985, + "learning_rate": 1.0796080662739098e-05, + "loss": 0.3130728006362915, + "step": 3774 + }, + { + "epoch": 1.0023901208338866, + "grad_norm": 1.1209083810179328, + "learning_rate": 1.0791703755957392e-05, + "loss": 0.2548064589500427, + "step": 3775 + }, + { + "epoch": 1.0026556898154295, + "grad_norm": 1.1167206534835417, + "learning_rate": 1.078732669654454e-05, + "loss": 0.20517288148403168, + "step": 3776 + }, + { + "epoch": 1.0029212587969725, + "grad_norm": 1.1055374385175383, + "learning_rate": 1.0782949485344385e-05, + "loss": 0.2634897530078888, + "step": 3777 + }, + { + "epoch": 1.0031868277785154, + "grad_norm": 1.3696848286677328, + "learning_rate": 1.0778572123200804e-05, + "loss": 0.2743223309516907, + "step": 3778 + }, + { + "epoch": 1.0034523967600584, + "grad_norm": 0.9930991365195264, + "learning_rate": 1.0774194610957695e-05, + "loss": 0.24595436453819275, + "step": 3779 + }, + { + "epoch": 1.0037179657416013, + "grad_norm": 1.0885778480679946, + "learning_rate": 1.0769816949459002e-05, + "loss": 0.2508128881454468, + "step": 3780 + }, + { + "epoch": 1.0039835347231443, + "grad_norm": 1.1243431648812525, + "learning_rate": 1.0765439139548677e-05, + "loss": 0.2326367199420929, + "step": 3781 + }, + { + "epoch": 1.0042491037046872, + "grad_norm": 1.1514050771182385, + "learning_rate": 1.0761061182070716e-05, + "loss": 0.2888404130935669, + "step": 3782 + }, + { + "epoch": 1.0045146726862302, + "grad_norm": 1.1399638718055765, + "learning_rate": 1.0756683077869133e-05, + "loss": 0.2804296612739563, + "step": 3783 + }, + { + "epoch": 1.0047802416677731, + "grad_norm": 1.1286027319524963, + "learning_rate": 1.0752304827787979e-05, + "loss": 0.2644953429698944, + "step": 3784 + }, + { + "epoch": 1.005045810649316, + "grad_norm": 1.2396532451569051, + "learning_rate": 1.0747926432671323e-05, + "loss": 0.297788143157959, + "step": 3785 + }, + { + "epoch": 1.005311379630859, + "grad_norm": 1.065071455363874, + "learning_rate": 1.0743547893363276e-05, + "loss": 0.2644156515598297, + "step": 3786 + }, + { + "epoch": 1.005576948612402, + "grad_norm": 1.1640867578019738, + "learning_rate": 1.073916921070796e-05, + "loss": 0.23818905651569366, + "step": 3787 + }, + { + "epoch": 1.005842517593945, + "grad_norm": 1.11872081222192, + "learning_rate": 1.0734790385549538e-05, + "loss": 0.2544933259487152, + "step": 3788 + }, + { + "epoch": 1.006108086575488, + "grad_norm": 1.0836442452511366, + "learning_rate": 1.0730411418732198e-05, + "loss": 0.2569275498390198, + "step": 3789 + }, + { + "epoch": 1.0063736555570308, + "grad_norm": 1.0348585374954582, + "learning_rate": 1.0726032311100153e-05, + "loss": 0.2248159945011139, + "step": 3790 + }, + { + "epoch": 1.0066392245385738, + "grad_norm": 1.1242207493876892, + "learning_rate": 1.072165306349764e-05, + "loss": 0.25541940331459045, + "step": 3791 + }, + { + "epoch": 1.0069047935201167, + "grad_norm": 9.328291099250833, + "learning_rate": 1.0717273676768924e-05, + "loss": 0.24429568648338318, + "step": 3792 + }, + { + "epoch": 1.0071703625016597, + "grad_norm": 1.0574884647737486, + "learning_rate": 1.0712894151758306e-05, + "loss": 0.2586621344089508, + "step": 3793 + }, + { + "epoch": 1.0074359314832027, + "grad_norm": 1.165205157800888, + "learning_rate": 1.0708514489310103e-05, + "loss": 0.28685104846954346, + "step": 3794 + }, + { + "epoch": 1.0077015004647458, + "grad_norm": 1.1536672746294196, + "learning_rate": 1.0704134690268661e-05, + "loss": 0.2847924530506134, + "step": 3795 + }, + { + "epoch": 1.0079670694462888, + "grad_norm": 1.1168453704329862, + "learning_rate": 1.0699754755478358e-05, + "loss": 0.24646440148353577, + "step": 3796 + }, + { + "epoch": 1.0082326384278317, + "grad_norm": 1.217438590106057, + "learning_rate": 1.0695374685783586e-05, + "loss": 0.22286385297775269, + "step": 3797 + }, + { + "epoch": 1.0084982074093747, + "grad_norm": 1.1352166249232278, + "learning_rate": 1.069099448202878e-05, + "loss": 0.2524179518222809, + "step": 3798 + }, + { + "epoch": 1.0087637763909176, + "grad_norm": 1.109981913009372, + "learning_rate": 1.0686614145058387e-05, + "loss": 0.2625758647918701, + "step": 3799 + }, + { + "epoch": 1.0090293453724606, + "grad_norm": 1.0622342238121125, + "learning_rate": 1.0682233675716884e-05, + "loss": 0.25318068265914917, + "step": 3800 + }, + { + "epoch": 1.0092949143540035, + "grad_norm": 1.073699024276181, + "learning_rate": 1.0677853074848774e-05, + "loss": 0.24224570393562317, + "step": 3801 + }, + { + "epoch": 1.0095604833355465, + "grad_norm": 1.1995813349182267, + "learning_rate": 1.0673472343298588e-05, + "loss": 0.28595417737960815, + "step": 3802 + }, + { + "epoch": 1.0098260523170894, + "grad_norm": 1.1558738404506108, + "learning_rate": 1.0669091481910874e-05, + "loss": 0.26894015073776245, + "step": 3803 + }, + { + "epoch": 1.0100916212986324, + "grad_norm": 1.0901744125075639, + "learning_rate": 1.0664710491530214e-05, + "loss": 0.2605208158493042, + "step": 3804 + }, + { + "epoch": 1.0103571902801753, + "grad_norm": 1.082458382717597, + "learning_rate": 1.0660329373001212e-05, + "loss": 0.2595113515853882, + "step": 3805 + }, + { + "epoch": 1.0106227592617183, + "grad_norm": 1.2467081294979763, + "learning_rate": 1.0655948127168494e-05, + "loss": 0.27478674054145813, + "step": 3806 + }, + { + "epoch": 1.0108883282432612, + "grad_norm": 1.0742167098010935, + "learning_rate": 1.0651566754876715e-05, + "loss": 0.2587064504623413, + "step": 3807 + }, + { + "epoch": 1.0111538972248042, + "grad_norm": 1.0593019665426413, + "learning_rate": 1.064718525697055e-05, + "loss": 0.2420537769794464, + "step": 3808 + }, + { + "epoch": 1.0114194662063472, + "grad_norm": 1.1660072059036033, + "learning_rate": 1.0642803634294699e-05, + "loss": 0.29424652457237244, + "step": 3809 + }, + { + "epoch": 1.01168503518789, + "grad_norm": 1.0902934718743655, + "learning_rate": 1.0638421887693887e-05, + "loss": 0.25162142515182495, + "step": 3810 + }, + { + "epoch": 1.011950604169433, + "grad_norm": 1.1456242703963635, + "learning_rate": 1.0634040018012865e-05, + "loss": 0.25661247968673706, + "step": 3811 + }, + { + "epoch": 1.012216173150976, + "grad_norm": 1.0060634238068926, + "learning_rate": 1.0629658026096408e-05, + "loss": 0.2042091339826584, + "step": 3812 + }, + { + "epoch": 1.012481742132519, + "grad_norm": 1.0129340658577524, + "learning_rate": 1.0625275912789307e-05, + "loss": 0.22496266663074493, + "step": 3813 + }, + { + "epoch": 1.012747311114062, + "grad_norm": 1.1382961966722176, + "learning_rate": 1.0620893678936385e-05, + "loss": 0.23609521985054016, + "step": 3814 + }, + { + "epoch": 1.0130128800956049, + "grad_norm": 1.2645443214744188, + "learning_rate": 1.0616511325382486e-05, + "loss": 0.2561722993850708, + "step": 3815 + }, + { + "epoch": 1.0132784490771478, + "grad_norm": 1.1379816472778304, + "learning_rate": 1.0612128852972474e-05, + "loss": 0.2617529630661011, + "step": 3816 + }, + { + "epoch": 1.0135440180586908, + "grad_norm": 1.1862833237483508, + "learning_rate": 1.060774626255124e-05, + "loss": 0.2633543014526367, + "step": 3817 + }, + { + "epoch": 1.0138095870402337, + "grad_norm": 1.0263666085354948, + "learning_rate": 1.0603363554963693e-05, + "loss": 0.19401729106903076, + "step": 3818 + }, + { + "epoch": 1.0140751560217767, + "grad_norm": 1.0891094169836097, + "learning_rate": 1.0598980731054765e-05, + "loss": 0.2583369016647339, + "step": 3819 + }, + { + "epoch": 1.0143407250033196, + "grad_norm": 1.1826598806695992, + "learning_rate": 1.0594597791669419e-05, + "loss": 0.26138922572135925, + "step": 3820 + }, + { + "epoch": 1.0146062939848626, + "grad_norm": 1.1580137447688548, + "learning_rate": 1.0590214737652632e-05, + "loss": 0.2506800591945648, + "step": 3821 + }, + { + "epoch": 1.0148718629664055, + "grad_norm": 1.032579662550809, + "learning_rate": 1.0585831569849405e-05, + "loss": 0.21569974720478058, + "step": 3822 + }, + { + "epoch": 1.0151374319479485, + "grad_norm": 1.37079648056154, + "learning_rate": 1.0581448289104759e-05, + "loss": 0.2765602767467499, + "step": 3823 + }, + { + "epoch": 1.0154030009294914, + "grad_norm": 1.2046968903946047, + "learning_rate": 1.0577064896263743e-05, + "loss": 0.25180384516716003, + "step": 3824 + }, + { + "epoch": 1.0156685699110344, + "grad_norm": 1.0796182560924539, + "learning_rate": 1.0572681392171417e-05, + "loss": 0.24164071679115295, + "step": 3825 + }, + { + "epoch": 1.0159341388925773, + "grad_norm": 1.1523354919316235, + "learning_rate": 1.0568297777672875e-05, + "loss": 0.24206972122192383, + "step": 3826 + }, + { + "epoch": 1.0161997078741203, + "grad_norm": 1.115771237946875, + "learning_rate": 1.0563914053613227e-05, + "loss": 0.24563468992710114, + "step": 3827 + }, + { + "epoch": 1.0164652768556632, + "grad_norm": 1.121826691352643, + "learning_rate": 1.0559530220837593e-05, + "loss": 0.23226243257522583, + "step": 3828 + }, + { + "epoch": 1.0167308458372062, + "grad_norm": 1.4499652400392462, + "learning_rate": 1.0555146280191137e-05, + "loss": 0.2245083749294281, + "step": 3829 + }, + { + "epoch": 1.0169964148187491, + "grad_norm": 1.1230707875328865, + "learning_rate": 1.0550762232519023e-05, + "loss": 0.24455049633979797, + "step": 3830 + }, + { + "epoch": 1.017261983800292, + "grad_norm": 1.1434011419253403, + "learning_rate": 1.0546378078666448e-05, + "loss": 0.2540651857852936, + "step": 3831 + }, + { + "epoch": 1.017527552781835, + "grad_norm": 1.222189193306495, + "learning_rate": 1.0541993819478622e-05, + "loss": 0.23392565548419952, + "step": 3832 + }, + { + "epoch": 1.017793121763378, + "grad_norm": 1.239236731837986, + "learning_rate": 1.053760945580078e-05, + "loss": 0.21601927280426025, + "step": 3833 + }, + { + "epoch": 1.018058690744921, + "grad_norm": 1.1697918037357793, + "learning_rate": 1.0533224988478176e-05, + "loss": 0.24622616171836853, + "step": 3834 + }, + { + "epoch": 1.018324259726464, + "grad_norm": 1.186224891573799, + "learning_rate": 1.0528840418356086e-05, + "loss": 0.2774650752544403, + "step": 3835 + }, + { + "epoch": 1.0185898287080069, + "grad_norm": 1.1218094293898884, + "learning_rate": 1.0524455746279795e-05, + "loss": 0.22323890030384064, + "step": 3836 + }, + { + "epoch": 1.0188553976895498, + "grad_norm": 1.0569207532138136, + "learning_rate": 1.0520070973094622e-05, + "loss": 0.21901552379131317, + "step": 3837 + }, + { + "epoch": 1.0191209666710928, + "grad_norm": 1.1936231752235407, + "learning_rate": 1.0515686099645901e-05, + "loss": 0.3037784695625305, + "step": 3838 + }, + { + "epoch": 1.0193865356526357, + "grad_norm": 1.0847362828180318, + "learning_rate": 1.0511301126778984e-05, + "loss": 0.22658365964889526, + "step": 3839 + }, + { + "epoch": 1.0196521046341787, + "grad_norm": 1.09040618490447, + "learning_rate": 1.0506916055339237e-05, + "loss": 0.23144160211086273, + "step": 3840 + }, + { + "epoch": 1.0199176736157216, + "grad_norm": 1.28339134317777, + "learning_rate": 1.0502530886172055e-05, + "loss": 0.25658899545669556, + "step": 3841 + }, + { + "epoch": 1.0201832425972646, + "grad_norm": 0.9689646092731519, + "learning_rate": 1.0498145620122845e-05, + "loss": 0.19658756256103516, + "step": 3842 + }, + { + "epoch": 1.0204488115788075, + "grad_norm": 1.0949311372526576, + "learning_rate": 1.049376025803703e-05, + "loss": 0.19045208394527435, + "step": 3843 + }, + { + "epoch": 1.0207143805603505, + "grad_norm": 1.1626763108379607, + "learning_rate": 1.0489374800760066e-05, + "loss": 0.2577810287475586, + "step": 3844 + }, + { + "epoch": 1.0209799495418934, + "grad_norm": 1.1521055149329589, + "learning_rate": 1.048498924913741e-05, + "loss": 0.2807403802871704, + "step": 3845 + }, + { + "epoch": 1.0212455185234364, + "grad_norm": 1.2275557893789377, + "learning_rate": 1.0480603604014545e-05, + "loss": 0.2710269093513489, + "step": 3846 + }, + { + "epoch": 1.0215110875049793, + "grad_norm": 1.173604136076929, + "learning_rate": 1.0476217866236974e-05, + "loss": 0.2560620903968811, + "step": 3847 + }, + { + "epoch": 1.0217766564865223, + "grad_norm": 1.1571778426612858, + "learning_rate": 1.0471832036650217e-05, + "loss": 0.2599894404411316, + "step": 3848 + }, + { + "epoch": 1.0220422254680652, + "grad_norm": 1.1339420848197217, + "learning_rate": 1.046744611609981e-05, + "loss": 0.2411944717168808, + "step": 3849 + }, + { + "epoch": 1.0223077944496084, + "grad_norm": 1.1528658942490468, + "learning_rate": 1.0463060105431303e-05, + "loss": 0.25216251611709595, + "step": 3850 + }, + { + "epoch": 1.0225733634311513, + "grad_norm": 1.1884423925105638, + "learning_rate": 1.0458674005490263e-05, + "loss": 0.255629301071167, + "step": 3851 + }, + { + "epoch": 1.0228389324126943, + "grad_norm": 1.0777718220336832, + "learning_rate": 1.0454287817122291e-05, + "loss": 0.24032849073410034, + "step": 3852 + }, + { + "epoch": 1.0231045013942373, + "grad_norm": 1.1154013609024198, + "learning_rate": 1.0449901541172983e-05, + "loss": 0.23188306391239166, + "step": 3853 + }, + { + "epoch": 1.0233700703757802, + "grad_norm": 1.149374478972437, + "learning_rate": 1.0445515178487965e-05, + "loss": 0.2718146741390228, + "step": 3854 + }, + { + "epoch": 1.0236356393573232, + "grad_norm": 1.460691184866812, + "learning_rate": 1.0441128729912876e-05, + "loss": 0.30279839038848877, + "step": 3855 + }, + { + "epoch": 1.023901208338866, + "grad_norm": 1.0711762201816422, + "learning_rate": 1.0436742196293368e-05, + "loss": 0.2185024917125702, + "step": 3856 + }, + { + "epoch": 1.024166777320409, + "grad_norm": 1.2737960148140446, + "learning_rate": 1.0432355578475118e-05, + "loss": 0.2956481873989105, + "step": 3857 + }, + { + "epoch": 1.024432346301952, + "grad_norm": 1.1913794327080105, + "learning_rate": 1.0427968877303809e-05, + "loss": 0.28460678458213806, + "step": 3858 + }, + { + "epoch": 1.024697915283495, + "grad_norm": 1.1716718579119476, + "learning_rate": 1.0423582093625146e-05, + "loss": 0.24597057700157166, + "step": 3859 + }, + { + "epoch": 1.024963484265038, + "grad_norm": 0.987642591779768, + "learning_rate": 1.0419195228284856e-05, + "loss": 0.23986583948135376, + "step": 3860 + }, + { + "epoch": 1.0252290532465809, + "grad_norm": 1.0867576400643644, + "learning_rate": 1.0414808282128668e-05, + "loss": 0.2489446997642517, + "step": 3861 + }, + { + "epoch": 1.0254946222281238, + "grad_norm": 1.1200031637603385, + "learning_rate": 1.0410421256002334e-05, + "loss": 0.26777884364128113, + "step": 3862 + }, + { + "epoch": 1.0257601912096668, + "grad_norm": 1.1645962699086565, + "learning_rate": 1.0406034150751625e-05, + "loss": 0.23506489396095276, + "step": 3863 + }, + { + "epoch": 1.0260257601912097, + "grad_norm": 1.1861093965134106, + "learning_rate": 1.040164696722232e-05, + "loss": 0.2526484429836273, + "step": 3864 + }, + { + "epoch": 1.0262913291727527, + "grad_norm": 1.1320109702434422, + "learning_rate": 1.0397259706260216e-05, + "loss": 0.2179267853498459, + "step": 3865 + }, + { + "epoch": 1.0265568981542956, + "grad_norm": 1.0267487594121727, + "learning_rate": 1.0392872368711126e-05, + "loss": 0.2431088387966156, + "step": 3866 + }, + { + "epoch": 1.0268224671358386, + "grad_norm": 1.1394336459602463, + "learning_rate": 1.0388484955420877e-05, + "loss": 0.26101407408714294, + "step": 3867 + }, + { + "epoch": 1.0270880361173815, + "grad_norm": 1.0741553283028158, + "learning_rate": 1.0384097467235308e-05, + "loss": 0.23780573904514313, + "step": 3868 + }, + { + "epoch": 1.0273536050989245, + "grad_norm": 1.467981467949694, + "learning_rate": 1.0379709905000278e-05, + "loss": 0.2469894289970398, + "step": 3869 + }, + { + "epoch": 1.0276191740804674, + "grad_norm": 1.074989572738127, + "learning_rate": 1.0375322269561658e-05, + "loss": 0.21271926164627075, + "step": 3870 + }, + { + "epoch": 1.0278847430620104, + "grad_norm": 1.1192343716648714, + "learning_rate": 1.0370934561765331e-05, + "loss": 0.22995726764202118, + "step": 3871 + }, + { + "epoch": 1.0281503120435533, + "grad_norm": 1.2051770162428763, + "learning_rate": 1.0366546782457196e-05, + "loss": 0.27448171377182007, + "step": 3872 + }, + { + "epoch": 1.0284158810250963, + "grad_norm": 1.232887313588547, + "learning_rate": 1.0362158932483165e-05, + "loss": 0.25459539890289307, + "step": 3873 + }, + { + "epoch": 1.0286814500066392, + "grad_norm": 1.1436601222318827, + "learning_rate": 1.0357771012689162e-05, + "loss": 0.23213380575180054, + "step": 3874 + }, + { + "epoch": 1.0289470189881822, + "grad_norm": 1.107979602389345, + "learning_rate": 1.0353383023921127e-05, + "loss": 0.2219776064157486, + "step": 3875 + }, + { + "epoch": 1.0292125879697251, + "grad_norm": 1.2445278934711803, + "learning_rate": 1.0348994967025012e-05, + "loss": 0.27059125900268555, + "step": 3876 + }, + { + "epoch": 1.029478156951268, + "grad_norm": 1.2314072238589235, + "learning_rate": 1.034460684284678e-05, + "loss": 0.26921501755714417, + "step": 3877 + }, + { + "epoch": 1.029743725932811, + "grad_norm": 1.153389282583655, + "learning_rate": 1.0340218652232419e-05, + "loss": 0.24727991223335266, + "step": 3878 + }, + { + "epoch": 1.030009294914354, + "grad_norm": 1.2105369925319034, + "learning_rate": 1.0335830396027912e-05, + "loss": 0.26276054978370667, + "step": 3879 + }, + { + "epoch": 1.030274863895897, + "grad_norm": 1.1222835146983237, + "learning_rate": 1.0331442075079268e-05, + "loss": 0.25906458497047424, + "step": 3880 + }, + { + "epoch": 1.03054043287744, + "grad_norm": 1.1936099182612667, + "learning_rate": 1.0327053690232498e-05, + "loss": 0.2708794176578522, + "step": 3881 + }, + { + "epoch": 1.0308060018589829, + "grad_norm": 1.1283814494585969, + "learning_rate": 1.0322665242333634e-05, + "loss": 0.24968653917312622, + "step": 3882 + }, + { + "epoch": 1.0310715708405258, + "grad_norm": 1.1912763351930955, + "learning_rate": 1.0318276732228716e-05, + "loss": 0.2669135332107544, + "step": 3883 + }, + { + "epoch": 1.0313371398220688, + "grad_norm": 1.0733368423352447, + "learning_rate": 1.0313888160763799e-05, + "loss": 0.24173730611801147, + "step": 3884 + }, + { + "epoch": 1.0316027088036117, + "grad_norm": 1.4084549111395024, + "learning_rate": 1.0309499528784948e-05, + "loss": 0.27513059973716736, + "step": 3885 + }, + { + "epoch": 1.0318682777851547, + "grad_norm": 1.163470416419209, + "learning_rate": 1.0305110837138235e-05, + "loss": 0.2512688934803009, + "step": 3886 + }, + { + "epoch": 1.0321338467666976, + "grad_norm": 1.100016135139411, + "learning_rate": 1.0300722086669753e-05, + "loss": 0.2584962844848633, + "step": 3887 + }, + { + "epoch": 1.0323994157482406, + "grad_norm": 1.1125458904355436, + "learning_rate": 1.0296333278225599e-05, + "loss": 0.23692303895950317, + "step": 3888 + }, + { + "epoch": 1.0326649847297835, + "grad_norm": 1.1981051682884363, + "learning_rate": 1.0291944412651884e-05, + "loss": 0.2570871114730835, + "step": 3889 + }, + { + "epoch": 1.0329305537113265, + "grad_norm": 1.1839354606788588, + "learning_rate": 1.028755549079473e-05, + "loss": 0.2896367609500885, + "step": 3890 + }, + { + "epoch": 1.0331961226928694, + "grad_norm": 0.958593784491898, + "learning_rate": 1.0283166513500267e-05, + "loss": 0.19990365207195282, + "step": 3891 + }, + { + "epoch": 1.0334616916744124, + "grad_norm": 1.1157517117826752, + "learning_rate": 1.0278777481614639e-05, + "loss": 0.25235646963119507, + "step": 3892 + }, + { + "epoch": 1.0337272606559553, + "grad_norm": 1.1808927381569394, + "learning_rate": 1.0274388395984003e-05, + "loss": 0.23675012588500977, + "step": 3893 + }, + { + "epoch": 1.0339928296374983, + "grad_norm": 1.1370597202642294, + "learning_rate": 1.026999925745452e-05, + "loss": 0.250516414642334, + "step": 3894 + }, + { + "epoch": 1.0342583986190412, + "grad_norm": 1.0692414219621886, + "learning_rate": 1.0265610066872365e-05, + "loss": 0.24573490023612976, + "step": 3895 + }, + { + "epoch": 1.0345239676005842, + "grad_norm": 1.085358990363196, + "learning_rate": 1.026122082508372e-05, + "loss": 0.2473086714744568, + "step": 3896 + }, + { + "epoch": 1.0347895365821271, + "grad_norm": 1.162338198859519, + "learning_rate": 1.0256831532934783e-05, + "loss": 0.26546406745910645, + "step": 3897 + }, + { + "epoch": 1.03505510556367, + "grad_norm": 1.1034436628854154, + "learning_rate": 1.0252442191271754e-05, + "loss": 0.2565246522426605, + "step": 3898 + }, + { + "epoch": 1.035320674545213, + "grad_norm": 1.0272875416109402, + "learning_rate": 1.0248052800940846e-05, + "loss": 0.24923476576805115, + "step": 3899 + }, + { + "epoch": 1.035586243526756, + "grad_norm": 1.1519345059696067, + "learning_rate": 1.0243663362788286e-05, + "loss": 0.3079240322113037, + "step": 3900 + }, + { + "epoch": 1.035851812508299, + "grad_norm": 1.0586971174066726, + "learning_rate": 1.0239273877660302e-05, + "loss": 0.2482951581478119, + "step": 3901 + }, + { + "epoch": 1.036117381489842, + "grad_norm": 1.1495296797401515, + "learning_rate": 1.0234884346403138e-05, + "loss": 0.2626204192638397, + "step": 3902 + }, + { + "epoch": 1.0363829504713848, + "grad_norm": 1.0578834148114886, + "learning_rate": 1.023049476986304e-05, + "loss": 0.23181654512882233, + "step": 3903 + }, + { + "epoch": 1.0366485194529278, + "grad_norm": 1.2527800012652353, + "learning_rate": 1.0226105148886272e-05, + "loss": 0.29164040088653564, + "step": 3904 + }, + { + "epoch": 1.0369140884344707, + "grad_norm": 1.034136654365203, + "learning_rate": 1.0221715484319094e-05, + "loss": 0.22025801241397858, + "step": 3905 + }, + { + "epoch": 1.0371796574160137, + "grad_norm": 1.1162047929812215, + "learning_rate": 1.021732577700779e-05, + "loss": 0.2819385826587677, + "step": 3906 + }, + { + "epoch": 1.0374452263975567, + "grad_norm": 1.0524498644463125, + "learning_rate": 1.0212936027798637e-05, + "loss": 0.24709002673625946, + "step": 3907 + }, + { + "epoch": 1.0377107953790998, + "grad_norm": 0.9984579723832369, + "learning_rate": 1.0208546237537928e-05, + "loss": 0.22570034861564636, + "step": 3908 + }, + { + "epoch": 1.0379763643606428, + "grad_norm": 1.1543900299803864, + "learning_rate": 1.0204156407071964e-05, + "loss": 0.25642865896224976, + "step": 3909 + }, + { + "epoch": 1.0382419333421857, + "grad_norm": 1.1657404882715603, + "learning_rate": 1.0199766537247053e-05, + "loss": 0.25970256328582764, + "step": 3910 + }, + { + "epoch": 1.0385075023237287, + "grad_norm": 1.1347864223586095, + "learning_rate": 1.019537662890951e-05, + "loss": 0.2560003101825714, + "step": 3911 + }, + { + "epoch": 1.0387730713052716, + "grad_norm": 1.3160565196765366, + "learning_rate": 1.0190986682905656e-05, + "loss": 0.28138649463653564, + "step": 3912 + }, + { + "epoch": 1.0390386402868146, + "grad_norm": 1.4353879235637104, + "learning_rate": 1.0186596700081825e-05, + "loss": 0.23531222343444824, + "step": 3913 + }, + { + "epoch": 1.0393042092683575, + "grad_norm": 1.1850676655471586, + "learning_rate": 1.018220668128435e-05, + "loss": 0.24912862479686737, + "step": 3914 + }, + { + "epoch": 1.0395697782499005, + "grad_norm": 1.0811585337632708, + "learning_rate": 1.0177816627359575e-05, + "loss": 0.24188724160194397, + "step": 3915 + }, + { + "epoch": 1.0398353472314434, + "grad_norm": 1.2093489820950423, + "learning_rate": 1.0173426539153853e-05, + "loss": 0.2709474563598633, + "step": 3916 + }, + { + "epoch": 1.0401009162129864, + "grad_norm": 1.1793292324294091, + "learning_rate": 1.0169036417513538e-05, + "loss": 0.2400204837322235, + "step": 3917 + }, + { + "epoch": 1.0403664851945293, + "grad_norm": 1.0489256907825586, + "learning_rate": 1.0164646263284993e-05, + "loss": 0.2687132954597473, + "step": 3918 + }, + { + "epoch": 1.0406320541760723, + "grad_norm": 1.1628887826217675, + "learning_rate": 1.0160256077314592e-05, + "loss": 0.25139346718788147, + "step": 3919 + }, + { + "epoch": 1.0408976231576152, + "grad_norm": 1.1762633281473511, + "learning_rate": 1.0155865860448712e-05, + "loss": 0.25873464345932007, + "step": 3920 + }, + { + "epoch": 1.0411631921391582, + "grad_norm": 1.1207165962030725, + "learning_rate": 1.0151475613533732e-05, + "loss": 0.2510434687137604, + "step": 3921 + }, + { + "epoch": 1.0414287611207012, + "grad_norm": 1.2260247662339232, + "learning_rate": 1.0147085337416036e-05, + "loss": 0.24567106366157532, + "step": 3922 + }, + { + "epoch": 1.041694330102244, + "grad_norm": 1.1642096823951156, + "learning_rate": 1.0142695032942024e-05, + "loss": 0.25028282403945923, + "step": 3923 + }, + { + "epoch": 1.041959899083787, + "grad_norm": 1.140963361472911, + "learning_rate": 1.0138304700958096e-05, + "loss": 0.23542484641075134, + "step": 3924 + }, + { + "epoch": 1.04222546806533, + "grad_norm": 1.2475887570620718, + "learning_rate": 1.0133914342310649e-05, + "loss": 0.28974449634552, + "step": 3925 + }, + { + "epoch": 1.042491037046873, + "grad_norm": 1.0648736453755918, + "learning_rate": 1.0129523957846097e-05, + "loss": 0.23417247831821442, + "step": 3926 + }, + { + "epoch": 1.042756606028416, + "grad_norm": 1.1427047582178407, + "learning_rate": 1.0125133548410852e-05, + "loss": 0.23247018456459045, + "step": 3927 + }, + { + "epoch": 1.0430221750099589, + "grad_norm": 1.1496713132119072, + "learning_rate": 1.0120743114851337e-05, + "loss": 0.23860129714012146, + "step": 3928 + }, + { + "epoch": 1.0432877439915018, + "grad_norm": 1.1567405333157526, + "learning_rate": 1.0116352658013973e-05, + "loss": 0.2609105706214905, + "step": 3929 + }, + { + "epoch": 1.0435533129730448, + "grad_norm": 1.2453984448185509, + "learning_rate": 1.0111962178745187e-05, + "loss": 0.2559507489204407, + "step": 3930 + }, + { + "epoch": 1.0438188819545877, + "grad_norm": 1.2247288020965454, + "learning_rate": 1.0107571677891415e-05, + "loss": 0.2708527147769928, + "step": 3931 + }, + { + "epoch": 1.0440844509361307, + "grad_norm": 1.2373037230453465, + "learning_rate": 1.0103181156299091e-05, + "loss": 0.25884875655174255, + "step": 3932 + }, + { + "epoch": 1.0443500199176736, + "grad_norm": 1.3022673165052032, + "learning_rate": 1.0098790614814658e-05, + "loss": 0.2631877660751343, + "step": 3933 + }, + { + "epoch": 1.0446155888992166, + "grad_norm": 1.0267097797291302, + "learning_rate": 1.0094400054284559e-05, + "loss": 0.27179086208343506, + "step": 3934 + }, + { + "epoch": 1.0448811578807595, + "grad_norm": 2.1081344450494144, + "learning_rate": 1.0090009475555245e-05, + "loss": 0.21690386533737183, + "step": 3935 + }, + { + "epoch": 1.0451467268623025, + "grad_norm": 1.0188398651288513, + "learning_rate": 1.0085618879473162e-05, + "loss": 0.20192815363407135, + "step": 3936 + }, + { + "epoch": 1.0454122958438454, + "grad_norm": 1.213624997308106, + "learning_rate": 1.0081228266884773e-05, + "loss": 0.2680777907371521, + "step": 3937 + }, + { + "epoch": 1.0456778648253884, + "grad_norm": 1.1871222610891168, + "learning_rate": 1.007683763863653e-05, + "loss": 0.2566579580307007, + "step": 3938 + }, + { + "epoch": 1.0459434338069313, + "grad_norm": 1.1229802475790265, + "learning_rate": 1.0072446995574895e-05, + "loss": 0.2508152723312378, + "step": 3939 + }, + { + "epoch": 1.0462090027884743, + "grad_norm": 1.0850640213400236, + "learning_rate": 1.0068056338546335e-05, + "loss": 0.2880190908908844, + "step": 3940 + }, + { + "epoch": 1.0464745717700172, + "grad_norm": 1.1129549761108044, + "learning_rate": 1.0063665668397316e-05, + "loss": 0.2646787464618683, + "step": 3941 + }, + { + "epoch": 1.0467401407515602, + "grad_norm": 1.1116528447502043, + "learning_rate": 1.0059274985974305e-05, + "loss": 0.2327616810798645, + "step": 3942 + }, + { + "epoch": 1.0470057097331031, + "grad_norm": 1.1644185595792014, + "learning_rate": 1.0054884292123778e-05, + "loss": 0.24756258726119995, + "step": 3943 + }, + { + "epoch": 1.047271278714646, + "grad_norm": 1.1010853288322209, + "learning_rate": 1.0050493587692207e-05, + "loss": 0.23657771944999695, + "step": 3944 + }, + { + "epoch": 1.047536847696189, + "grad_norm": 1.1386107444709148, + "learning_rate": 1.0046102873526068e-05, + "loss": 0.2541351616382599, + "step": 3945 + }, + { + "epoch": 1.047802416677732, + "grad_norm": 1.0912263009271301, + "learning_rate": 1.0041712150471839e-05, + "loss": 0.2330317348241806, + "step": 3946 + }, + { + "epoch": 1.048067985659275, + "grad_norm": 1.0696190454357721, + "learning_rate": 1.0037321419375997e-05, + "loss": 0.23411181569099426, + "step": 3947 + }, + { + "epoch": 1.048333554640818, + "grad_norm": 1.1223872975815399, + "learning_rate": 1.0032930681085028e-05, + "loss": 0.2605017125606537, + "step": 3948 + }, + { + "epoch": 1.0485991236223609, + "grad_norm": 1.1766579775240698, + "learning_rate": 1.0028539936445407e-05, + "loss": 0.28651514649391174, + "step": 3949 + }, + { + "epoch": 1.0488646926039038, + "grad_norm": 1.1469362905517786, + "learning_rate": 1.0024149186303628e-05, + "loss": 0.22912876307964325, + "step": 3950 + }, + { + "epoch": 1.0491302615854468, + "grad_norm": 1.206814749340921, + "learning_rate": 1.001975843150617e-05, + "loss": 0.24032847583293915, + "step": 3951 + }, + { + "epoch": 1.0493958305669897, + "grad_norm": 1.0089656289438405, + "learning_rate": 1.0015367672899521e-05, + "loss": 0.17826229333877563, + "step": 3952 + }, + { + "epoch": 1.0496613995485327, + "grad_norm": 1.1440301784208975, + "learning_rate": 1.0010976911330163e-05, + "loss": 0.2619745433330536, + "step": 3953 + }, + { + "epoch": 1.0499269685300756, + "grad_norm": 1.1124743886634039, + "learning_rate": 1.0006586147644585e-05, + "loss": 0.24104374647140503, + "step": 3954 + }, + { + "epoch": 1.0501925375116186, + "grad_norm": 1.2465051058358483, + "learning_rate": 1.0002195382689277e-05, + "loss": 0.22913998365402222, + "step": 3955 + }, + { + "epoch": 1.0504581064931615, + "grad_norm": 1.2288244416278613, + "learning_rate": 9.997804617310724e-06, + "loss": 0.2625126838684082, + "step": 3956 + }, + { + "epoch": 1.0507236754747045, + "grad_norm": 1.1016811290492863, + "learning_rate": 9.993413852355416e-06, + "loss": 0.23098430037498474, + "step": 3957 + }, + { + "epoch": 1.0509892444562474, + "grad_norm": 1.2581954843436995, + "learning_rate": 9.98902308866984e-06, + "loss": 0.2866731882095337, + "step": 3958 + }, + { + "epoch": 1.0512548134377904, + "grad_norm": 1.2595027481112393, + "learning_rate": 9.984632327100482e-06, + "loss": 0.2520306706428528, + "step": 3959 + }, + { + "epoch": 1.0515203824193333, + "grad_norm": 1.2731218614589663, + "learning_rate": 9.980241568493834e-06, + "loss": 0.29688766598701477, + "step": 3960 + }, + { + "epoch": 1.0517859514008763, + "grad_norm": 1.2865298416208544, + "learning_rate": 9.975850813696375e-06, + "loss": 0.2876695990562439, + "step": 3961 + }, + { + "epoch": 1.0520515203824194, + "grad_norm": 1.1190033835182807, + "learning_rate": 9.971460063554595e-06, + "loss": 0.2402629554271698, + "step": 3962 + }, + { + "epoch": 1.0523170893639624, + "grad_norm": 1.288030170241207, + "learning_rate": 9.967069318914977e-06, + "loss": 0.32080164551734924, + "step": 3963 + }, + { + "epoch": 1.0525826583455054, + "grad_norm": 1.3484684025161604, + "learning_rate": 9.962678580624008e-06, + "loss": 0.2642936110496521, + "step": 3964 + }, + { + "epoch": 1.0528482273270483, + "grad_norm": 1.1668064537758471, + "learning_rate": 9.958287849528163e-06, + "loss": 0.255870521068573, + "step": 3965 + }, + { + "epoch": 1.0531137963085913, + "grad_norm": 1.1779058124731279, + "learning_rate": 9.953897126473933e-06, + "loss": 0.2695184350013733, + "step": 3966 + }, + { + "epoch": 1.0533793652901342, + "grad_norm": 1.1937956388734083, + "learning_rate": 9.949506412307795e-06, + "loss": 0.24576464295387268, + "step": 3967 + }, + { + "epoch": 1.0536449342716772, + "grad_norm": 1.210893055599799, + "learning_rate": 9.945115707876224e-06, + "loss": 0.26517459750175476, + "step": 3968 + }, + { + "epoch": 1.05391050325322, + "grad_norm": 1.261309936483727, + "learning_rate": 9.940725014025696e-06, + "loss": 0.30468082427978516, + "step": 3969 + }, + { + "epoch": 1.054176072234763, + "grad_norm": 1.1007633858966879, + "learning_rate": 9.936334331602687e-06, + "loss": 0.25299298763275146, + "step": 3970 + }, + { + "epoch": 1.054441641216306, + "grad_norm": 1.1621642625136148, + "learning_rate": 9.931943661453668e-06, + "loss": 0.2659488320350647, + "step": 3971 + }, + { + "epoch": 1.054707210197849, + "grad_norm": 1.129768041847351, + "learning_rate": 9.92755300442511e-06, + "loss": 0.25957295298576355, + "step": 3972 + }, + { + "epoch": 1.054972779179392, + "grad_norm": 1.0969185518732962, + "learning_rate": 9.923162361363476e-06, + "loss": 0.2416645884513855, + "step": 3973 + }, + { + "epoch": 1.0552383481609349, + "grad_norm": 1.1032067417924427, + "learning_rate": 9.91877173311523e-06, + "loss": 0.2627662122249603, + "step": 3974 + }, + { + "epoch": 1.0555039171424778, + "grad_norm": 1.1485553701369502, + "learning_rate": 9.91438112052684e-06, + "loss": 0.2876631021499634, + "step": 3975 + }, + { + "epoch": 1.0557694861240208, + "grad_norm": 1.1306607772682384, + "learning_rate": 9.90999052444476e-06, + "loss": 0.28336596488952637, + "step": 3976 + }, + { + "epoch": 1.0560350551055637, + "grad_norm": 1.266085815857313, + "learning_rate": 9.905599945715443e-06, + "loss": 0.2970484495162964, + "step": 3977 + }, + { + "epoch": 1.0563006240871067, + "grad_norm": 1.188464425479595, + "learning_rate": 9.901209385185345e-06, + "loss": 0.27202755212783813, + "step": 3978 + }, + { + "epoch": 1.0565661930686496, + "grad_norm": 1.0823738866829473, + "learning_rate": 9.896818843700912e-06, + "loss": 0.2702459990978241, + "step": 3979 + }, + { + "epoch": 1.0568317620501926, + "grad_norm": 1.2166105195755876, + "learning_rate": 9.89242832210859e-06, + "loss": 0.26057881116867065, + "step": 3980 + }, + { + "epoch": 1.0570973310317355, + "grad_norm": 1.1526398422075472, + "learning_rate": 9.888037821254816e-06, + "loss": 0.24006876349449158, + "step": 3981 + }, + { + "epoch": 1.0573629000132785, + "grad_norm": 1.0864441989704317, + "learning_rate": 9.883647341986032e-06, + "loss": 0.2437625676393509, + "step": 3982 + }, + { + "epoch": 1.0576284689948214, + "grad_norm": 1.0572722810626467, + "learning_rate": 9.879256885148666e-06, + "loss": 0.24256819486618042, + "step": 3983 + }, + { + "epoch": 1.0578940379763644, + "grad_norm": 1.2008491436753201, + "learning_rate": 9.874866451589151e-06, + "loss": 0.2714581787586212, + "step": 3984 + }, + { + "epoch": 1.0581596069579073, + "grad_norm": 1.1859043120388024, + "learning_rate": 9.870476042153907e-06, + "loss": 0.30309075117111206, + "step": 3985 + }, + { + "epoch": 1.0584251759394503, + "grad_norm": 1.3001941243887445, + "learning_rate": 9.866085657689355e-06, + "loss": 0.2938288450241089, + "step": 3986 + }, + { + "epoch": 1.0586907449209932, + "grad_norm": 1.1041962963159588, + "learning_rate": 9.86169529904191e-06, + "loss": 0.23748518526554108, + "step": 3987 + }, + { + "epoch": 1.0589563139025362, + "grad_norm": 1.2345572480055271, + "learning_rate": 9.857304967057977e-06, + "loss": 0.2883969247341156, + "step": 3988 + }, + { + "epoch": 1.0592218828840791, + "grad_norm": 1.0871048681541509, + "learning_rate": 9.852914662583966e-06, + "loss": 0.28301289677619934, + "step": 3989 + }, + { + "epoch": 1.059487451865622, + "grad_norm": 1.0733060702724175, + "learning_rate": 9.848524386466273e-06, + "loss": 0.22616548836231232, + "step": 3990 + }, + { + "epoch": 1.059753020847165, + "grad_norm": 1.06530549901144, + "learning_rate": 9.844134139551291e-06, + "loss": 0.2282804250717163, + "step": 3991 + }, + { + "epoch": 1.060018589828708, + "grad_norm": 1.154557745213229, + "learning_rate": 9.839743922685408e-06, + "loss": 0.2407834678888321, + "step": 3992 + }, + { + "epoch": 1.060284158810251, + "grad_norm": 1.0504099183304738, + "learning_rate": 9.835353736715007e-06, + "loss": 0.22690361738204956, + "step": 3993 + }, + { + "epoch": 1.060549727791794, + "grad_norm": 1.529267187296219, + "learning_rate": 9.830963582486465e-06, + "loss": 0.23291411995887756, + "step": 3994 + }, + { + "epoch": 1.0608152967733369, + "grad_norm": 1.0804914844168854, + "learning_rate": 9.82657346084615e-06, + "loss": 0.24524198472499847, + "step": 3995 + }, + { + "epoch": 1.0610808657548798, + "grad_norm": 1.130929241291739, + "learning_rate": 9.822183372640426e-06, + "loss": 0.22087743878364563, + "step": 3996 + }, + { + "epoch": 1.0613464347364228, + "grad_norm": 1.1374060021264791, + "learning_rate": 9.817793318715652e-06, + "loss": 0.2459079772233963, + "step": 3997 + }, + { + "epoch": 1.0616120037179657, + "grad_norm": 1.1393890830478974, + "learning_rate": 9.813403299918178e-06, + "loss": 0.24429920315742493, + "step": 3998 + }, + { + "epoch": 1.0618775726995087, + "grad_norm": 1.140499707599593, + "learning_rate": 9.809013317094345e-06, + "loss": 0.2332335114479065, + "step": 3999 + }, + { + "epoch": 1.0621431416810516, + "grad_norm": 1.2157908167694267, + "learning_rate": 9.804623371090493e-06, + "loss": 0.2861659526824951, + "step": 4000 + }, + { + "epoch": 1.0624087106625946, + "grad_norm": 1.1293440606459217, + "learning_rate": 9.800233462752949e-06, + "loss": 0.22731532156467438, + "step": 4001 + }, + { + "epoch": 1.0626742796441375, + "grad_norm": 1.127775309467411, + "learning_rate": 9.795843592928036e-06, + "loss": 0.245025634765625, + "step": 4002 + }, + { + "epoch": 1.0629398486256805, + "grad_norm": 1.2380242649872155, + "learning_rate": 9.791453762462075e-06, + "loss": 0.2826273441314697, + "step": 4003 + }, + { + "epoch": 1.0632054176072234, + "grad_norm": 1.1330484645300947, + "learning_rate": 9.787063972201368e-06, + "loss": 0.24737229943275452, + "step": 4004 + }, + { + "epoch": 1.0634709865887664, + "grad_norm": 1.3814870803010457, + "learning_rate": 9.782674222992214e-06, + "loss": 0.23368477821350098, + "step": 4005 + }, + { + "epoch": 1.0637365555703093, + "grad_norm": 1.2631953536046527, + "learning_rate": 9.778284515680908e-06, + "loss": 0.2754492461681366, + "step": 4006 + }, + { + "epoch": 1.0640021245518523, + "grad_norm": 1.1906091191722363, + "learning_rate": 9.773894851113732e-06, + "loss": 0.2814168334007263, + "step": 4007 + }, + { + "epoch": 1.0642676935333952, + "grad_norm": 1.1594492512554253, + "learning_rate": 9.769505230136962e-06, + "loss": 0.25388047099113464, + "step": 4008 + }, + { + "epoch": 1.0645332625149382, + "grad_norm": 1.2618382745485697, + "learning_rate": 9.765115653596867e-06, + "loss": 0.25435230135917664, + "step": 4009 + }, + { + "epoch": 1.0647988314964811, + "grad_norm": 1.2251032153283614, + "learning_rate": 9.760726122339698e-06, + "loss": 0.265840083360672, + "step": 4010 + }, + { + "epoch": 1.065064400478024, + "grad_norm": 1.1297656349054435, + "learning_rate": 9.756336637211716e-06, + "loss": 0.2533451020717621, + "step": 4011 + }, + { + "epoch": 1.065329969459567, + "grad_norm": 1.0890158421111886, + "learning_rate": 9.751947199059155e-06, + "loss": 0.25214290618896484, + "step": 4012 + }, + { + "epoch": 1.06559553844111, + "grad_norm": 1.0603532415232781, + "learning_rate": 9.74755780872825e-06, + "loss": 0.25039419531822205, + "step": 4013 + }, + { + "epoch": 1.065861107422653, + "grad_norm": 1.0177623632775965, + "learning_rate": 9.74316846706522e-06, + "loss": 0.21251091361045837, + "step": 4014 + }, + { + "epoch": 1.066126676404196, + "grad_norm": 1.123294230398497, + "learning_rate": 9.738779174916281e-06, + "loss": 0.25898969173431396, + "step": 4015 + }, + { + "epoch": 1.0663922453857388, + "grad_norm": 1.1054663361669936, + "learning_rate": 9.734389933127639e-06, + "loss": 0.2655499577522278, + "step": 4016 + }, + { + "epoch": 1.0666578143672818, + "grad_norm": 1.1153507141873742, + "learning_rate": 9.730000742545485e-06, + "loss": 0.2221338450908661, + "step": 4017 + }, + { + "epoch": 1.0669233833488247, + "grad_norm": 1.1746716643835395, + "learning_rate": 9.725611604016002e-06, + "loss": 0.2567589581012726, + "step": 4018 + }, + { + "epoch": 1.0671889523303677, + "grad_norm": 1.1090772377521565, + "learning_rate": 9.721222518385361e-06, + "loss": 0.24440976977348328, + "step": 4019 + }, + { + "epoch": 1.0674545213119107, + "grad_norm": 1.061787642846094, + "learning_rate": 9.716833486499735e-06, + "loss": 0.2229192852973938, + "step": 4020 + }, + { + "epoch": 1.0677200902934538, + "grad_norm": 1.1014121727705226, + "learning_rate": 9.712444509205273e-06, + "loss": 0.26231470704078674, + "step": 4021 + }, + { + "epoch": 1.0679856592749968, + "grad_norm": 1.2531191320236732, + "learning_rate": 9.708055587348119e-06, + "loss": 0.25099092721939087, + "step": 4022 + }, + { + "epoch": 1.0682512282565397, + "grad_norm": 1.1402160070516023, + "learning_rate": 9.703666721774403e-06, + "loss": 0.22979633510112762, + "step": 4023 + }, + { + "epoch": 1.0685167972380827, + "grad_norm": 1.09571485621585, + "learning_rate": 9.699277913330252e-06, + "loss": 0.2361093908548355, + "step": 4024 + }, + { + "epoch": 1.0687823662196256, + "grad_norm": 1.0765448804717204, + "learning_rate": 9.694889162861768e-06, + "loss": 0.2390863001346588, + "step": 4025 + }, + { + "epoch": 1.0690479352011686, + "grad_norm": 1.2569917808844517, + "learning_rate": 9.690500471215057e-06, + "loss": 0.24917885661125183, + "step": 4026 + }, + { + "epoch": 1.0693135041827115, + "grad_norm": 1.1387127210628816, + "learning_rate": 9.686111839236206e-06, + "loss": 0.24215272068977356, + "step": 4027 + }, + { + "epoch": 1.0695790731642545, + "grad_norm": 1.2809085503832063, + "learning_rate": 9.681723267771284e-06, + "loss": 0.27874231338500977, + "step": 4028 + }, + { + "epoch": 1.0698446421457974, + "grad_norm": 1.1707122559783085, + "learning_rate": 9.677334757666368e-06, + "loss": 0.24076086282730103, + "step": 4029 + }, + { + "epoch": 1.0701102111273404, + "grad_norm": 1.1092369229920938, + "learning_rate": 9.672946309767504e-06, + "loss": 0.2444242238998413, + "step": 4030 + }, + { + "epoch": 1.0703757801088833, + "grad_norm": 1.2086874522857378, + "learning_rate": 9.668557924920735e-06, + "loss": 0.2737279236316681, + "step": 4031 + }, + { + "epoch": 1.0706413490904263, + "grad_norm": 1.1006436240463247, + "learning_rate": 9.664169603972091e-06, + "loss": 0.24105575680732727, + "step": 4032 + }, + { + "epoch": 1.0709069180719692, + "grad_norm": 1.336482466569566, + "learning_rate": 9.659781347767584e-06, + "loss": 0.27791836857795715, + "step": 4033 + }, + { + "epoch": 1.0711724870535122, + "grad_norm": 1.1518461528529822, + "learning_rate": 9.655393157153221e-06, + "loss": 0.255472868680954, + "step": 4034 + }, + { + "epoch": 1.0714380560350552, + "grad_norm": 1.371220848551681, + "learning_rate": 9.651005032974994e-06, + "loss": 0.2523707151412964, + "step": 4035 + }, + { + "epoch": 1.071703625016598, + "grad_norm": 1.235756547113907, + "learning_rate": 9.64661697607888e-06, + "loss": 0.24584606289863586, + "step": 4036 + }, + { + "epoch": 1.071969193998141, + "grad_norm": 1.1497174260677319, + "learning_rate": 9.64222898731084e-06, + "loss": 0.25182732939720154, + "step": 4037 + }, + { + "epoch": 1.072234762979684, + "grad_norm": 1.0822892740683951, + "learning_rate": 9.637841067516837e-06, + "loss": 0.254008412361145, + "step": 4038 + }, + { + "epoch": 1.072500331961227, + "grad_norm": 1.080204167750926, + "learning_rate": 9.633453217542806e-06, + "loss": 0.2314324826002121, + "step": 4039 + }, + { + "epoch": 1.07276590094277, + "grad_norm": 1.1139945732367915, + "learning_rate": 9.62906543823467e-06, + "loss": 0.2256058305501938, + "step": 4040 + }, + { + "epoch": 1.0730314699243129, + "grad_norm": 1.283214941862177, + "learning_rate": 9.624677730438344e-06, + "loss": 0.2577894330024719, + "step": 4041 + }, + { + "epoch": 1.0732970389058558, + "grad_norm": 1.0911199623079508, + "learning_rate": 9.620290094999723e-06, + "loss": 0.23520560562610626, + "step": 4042 + }, + { + "epoch": 1.0735626078873988, + "grad_norm": 1.1791405346126818, + "learning_rate": 9.615902532764695e-06, + "loss": 0.2472849190235138, + "step": 4043 + }, + { + "epoch": 1.0738281768689417, + "grad_norm": 1.2195787110249676, + "learning_rate": 9.611515044579128e-06, + "loss": 0.25053414702415466, + "step": 4044 + }, + { + "epoch": 1.0740937458504847, + "grad_norm": 1.1090102650773974, + "learning_rate": 9.607127631288879e-06, + "loss": 0.24229007959365845, + "step": 4045 + }, + { + "epoch": 1.0743593148320276, + "grad_norm": 1.4628298980675831, + "learning_rate": 9.602740293739786e-06, + "loss": 0.2793073058128357, + "step": 4046 + }, + { + "epoch": 1.0746248838135706, + "grad_norm": 1.225079236387791, + "learning_rate": 9.598353032777682e-06, + "loss": 0.24547399580478668, + "step": 4047 + }, + { + "epoch": 1.0748904527951135, + "grad_norm": 1.1980997957436126, + "learning_rate": 9.593965849248378e-06, + "loss": 0.2776937186717987, + "step": 4048 + }, + { + "epoch": 1.0751560217766565, + "grad_norm": 1.0781858695117066, + "learning_rate": 9.589578743997668e-06, + "loss": 0.22677727043628693, + "step": 4049 + }, + { + "epoch": 1.0754215907581994, + "grad_norm": 1.4867723677136682, + "learning_rate": 9.585191717871336e-06, + "loss": 0.23254704475402832, + "step": 4050 + }, + { + "epoch": 1.0756871597397424, + "grad_norm": 1.3243435003953368, + "learning_rate": 9.580804771715148e-06, + "loss": 0.2899828255176544, + "step": 4051 + }, + { + "epoch": 1.0759527287212853, + "grad_norm": 1.1397018772236696, + "learning_rate": 9.576417906374856e-06, + "loss": 0.24632850289344788, + "step": 4052 + }, + { + "epoch": 1.0762182977028283, + "grad_norm": 1.2322214200527608, + "learning_rate": 9.572031122696196e-06, + "loss": 0.2661561369895935, + "step": 4053 + }, + { + "epoch": 1.0764838666843712, + "grad_norm": 1.1394013200357536, + "learning_rate": 9.567644421524889e-06, + "loss": 0.22364279627799988, + "step": 4054 + }, + { + "epoch": 1.0767494356659142, + "grad_norm": 1.5026366502842776, + "learning_rate": 9.563257803706635e-06, + "loss": 0.26748427748680115, + "step": 4055 + }, + { + "epoch": 1.0770150046474571, + "grad_norm": 1.1794922225625246, + "learning_rate": 9.55887127008713e-06, + "loss": 0.22851283848285675, + "step": 4056 + }, + { + "epoch": 1.077280573629, + "grad_norm": 1.1340260741391435, + "learning_rate": 9.554484821512037e-06, + "loss": 0.2456260323524475, + "step": 4057 + }, + { + "epoch": 1.077546142610543, + "grad_norm": 1.2884657617459025, + "learning_rate": 9.55009845882702e-06, + "loss": 0.2556169629096985, + "step": 4058 + }, + { + "epoch": 1.077811711592086, + "grad_norm": 1.274618544457263, + "learning_rate": 9.545712182877714e-06, + "loss": 0.280727744102478, + "step": 4059 + }, + { + "epoch": 1.078077280573629, + "grad_norm": 1.1205087247319334, + "learning_rate": 9.54132599450974e-06, + "loss": 0.25315386056900024, + "step": 4060 + }, + { + "epoch": 1.078342849555172, + "grad_norm": 1.1990539773915618, + "learning_rate": 9.536939894568704e-06, + "loss": 0.21985477209091187, + "step": 4061 + }, + { + "epoch": 1.0786084185367149, + "grad_norm": 1.1575613416248978, + "learning_rate": 9.532553883900196e-06, + "loss": 0.24329043924808502, + "step": 4062 + }, + { + "epoch": 1.0788739875182578, + "grad_norm": 1.173950465827748, + "learning_rate": 9.528167963349786e-06, + "loss": 0.2362256497144699, + "step": 4063 + }, + { + "epoch": 1.0791395564998008, + "grad_norm": 1.1458704347110154, + "learning_rate": 9.523782133763027e-06, + "loss": 0.23685476183891296, + "step": 4064 + }, + { + "epoch": 1.0794051254813437, + "grad_norm": 1.2383774104342302, + "learning_rate": 9.519396395985456e-06, + "loss": 0.26232481002807617, + "step": 4065 + }, + { + "epoch": 1.0796706944628867, + "grad_norm": 1.2768574792534622, + "learning_rate": 9.515010750862594e-06, + "loss": 0.25196313858032227, + "step": 4066 + }, + { + "epoch": 1.0799362634444296, + "grad_norm": 1.082792256362845, + "learning_rate": 9.510625199239939e-06, + "loss": 0.22520464658737183, + "step": 4067 + }, + { + "epoch": 1.0802018324259726, + "grad_norm": 1.190229461562689, + "learning_rate": 9.506239741962971e-06, + "loss": 0.27422505617141724, + "step": 4068 + }, + { + "epoch": 1.0804674014075155, + "grad_norm": 1.3120430811123187, + "learning_rate": 9.50185437987716e-06, + "loss": 0.2646682560443878, + "step": 4069 + }, + { + "epoch": 1.0807329703890585, + "grad_norm": 1.3425819541318131, + "learning_rate": 9.497469113827949e-06, + "loss": 0.2661365866661072, + "step": 4070 + }, + { + "epoch": 1.0809985393706014, + "grad_norm": 1.1101351469883673, + "learning_rate": 9.493083944660766e-06, + "loss": 0.23156839609146118, + "step": 4071 + }, + { + "epoch": 1.0812641083521444, + "grad_norm": 1.1805541153651362, + "learning_rate": 9.488698873221021e-06, + "loss": 0.25353243947029114, + "step": 4072 + }, + { + "epoch": 1.0815296773336873, + "grad_norm": 1.2862671823918606, + "learning_rate": 9.484313900354099e-06, + "loss": 0.27488404512405396, + "step": 4073 + }, + { + "epoch": 1.0817952463152305, + "grad_norm": 1.4041005997261422, + "learning_rate": 9.479929026905378e-06, + "loss": 0.2580753564834595, + "step": 4074 + }, + { + "epoch": 1.0820608152967734, + "grad_norm": 1.1405056260482733, + "learning_rate": 9.475544253720206e-06, + "loss": 0.2425471544265747, + "step": 4075 + }, + { + "epoch": 1.0823263842783164, + "grad_norm": 1.2040355319488043, + "learning_rate": 9.471159581643918e-06, + "loss": 0.25268295407295227, + "step": 4076 + }, + { + "epoch": 1.0825919532598594, + "grad_norm": 1.1573228524057126, + "learning_rate": 9.466775011521825e-06, + "loss": 0.2683602571487427, + "step": 4077 + }, + { + "epoch": 1.0828575222414023, + "grad_norm": 1.1300610618916742, + "learning_rate": 9.462390544199221e-06, + "loss": 0.24945034086704254, + "step": 4078 + }, + { + "epoch": 1.0831230912229453, + "grad_norm": 1.1698494765527112, + "learning_rate": 9.458006180521379e-06, + "loss": 0.21784156560897827, + "step": 4079 + }, + { + "epoch": 1.0833886602044882, + "grad_norm": 1.136268907040887, + "learning_rate": 9.453621921333554e-06, + "loss": 0.22704020142555237, + "step": 4080 + }, + { + "epoch": 1.0836542291860312, + "grad_norm": 1.1373990713388034, + "learning_rate": 9.449237767480979e-06, + "loss": 0.2532106637954712, + "step": 4081 + }, + { + "epoch": 1.0839197981675741, + "grad_norm": 1.1568862012297532, + "learning_rate": 9.444853719808864e-06, + "loss": 0.27809134125709534, + "step": 4082 + }, + { + "epoch": 1.084185367149117, + "grad_norm": 1.2102387789201872, + "learning_rate": 9.440469779162407e-06, + "loss": 0.25704264640808105, + "step": 4083 + }, + { + "epoch": 1.08445093613066, + "grad_norm": 1.1827141084910668, + "learning_rate": 9.436085946386778e-06, + "loss": 0.2656276226043701, + "step": 4084 + }, + { + "epoch": 1.084716505112203, + "grad_norm": 1.256991317445651, + "learning_rate": 9.431702222327126e-06, + "loss": 0.277826726436615, + "step": 4085 + }, + { + "epoch": 1.084982074093746, + "grad_norm": 1.2975495041461134, + "learning_rate": 9.427318607828584e-06, + "loss": 0.24656976759433746, + "step": 4086 + }, + { + "epoch": 1.0852476430752889, + "grad_norm": 1.1974770836803283, + "learning_rate": 9.42293510373626e-06, + "loss": 0.2498110830783844, + "step": 4087 + }, + { + "epoch": 1.0855132120568318, + "grad_norm": 1.1492935678310237, + "learning_rate": 9.418551710895243e-06, + "loss": 0.24574093520641327, + "step": 4088 + }, + { + "epoch": 1.0857787810383748, + "grad_norm": 1.2274895872775384, + "learning_rate": 9.414168430150601e-06, + "loss": 0.25271761417388916, + "step": 4089 + }, + { + "epoch": 1.0860443500199177, + "grad_norm": 1.1759358027679858, + "learning_rate": 9.409785262347373e-06, + "loss": 0.29269370436668396, + "step": 4090 + }, + { + "epoch": 1.0863099190014607, + "grad_norm": 1.1247973273146177, + "learning_rate": 9.405402208330581e-06, + "loss": 0.244449645280838, + "step": 4091 + }, + { + "epoch": 1.0865754879830036, + "grad_norm": 1.186787867713906, + "learning_rate": 9.401019268945237e-06, + "loss": 0.23785406351089478, + "step": 4092 + }, + { + "epoch": 1.0868410569645466, + "grad_norm": 1.1479686632621091, + "learning_rate": 9.39663644503631e-06, + "loss": 0.2493479996919632, + "step": 4093 + }, + { + "epoch": 1.0871066259460895, + "grad_norm": 1.1474347559215512, + "learning_rate": 9.392253737448764e-06, + "loss": 0.23758000135421753, + "step": 4094 + }, + { + "epoch": 1.0873721949276325, + "grad_norm": 1.0946885138749496, + "learning_rate": 9.387871147027528e-06, + "loss": 0.22560475766658783, + "step": 4095 + }, + { + "epoch": 1.0876377639091754, + "grad_norm": 1.1552533162715968, + "learning_rate": 9.383488674617515e-06, + "loss": 0.2558273673057556, + "step": 4096 + }, + { + "epoch": 1.0879033328907184, + "grad_norm": 1.2619180705972233, + "learning_rate": 9.379106321063618e-06, + "loss": 0.2822023034095764, + "step": 4097 + }, + { + "epoch": 1.0881689018722613, + "grad_norm": 1.2076346653444254, + "learning_rate": 9.374724087210698e-06, + "loss": 0.2596978545188904, + "step": 4098 + }, + { + "epoch": 1.0884344708538043, + "grad_norm": 1.6785014002913365, + "learning_rate": 9.370341973903597e-06, + "loss": 0.25353628396987915, + "step": 4099 + }, + { + "epoch": 1.0887000398353472, + "grad_norm": 1.2184499887942242, + "learning_rate": 9.365959981987135e-06, + "loss": 0.2547294497489929, + "step": 4100 + }, + { + "epoch": 1.0889656088168902, + "grad_norm": 1.40658558629773, + "learning_rate": 9.361578112306115e-06, + "loss": 0.2688470780849457, + "step": 4101 + }, + { + "epoch": 1.0892311777984331, + "grad_norm": 1.207208011814592, + "learning_rate": 9.357196365705303e-06, + "loss": 0.25772029161453247, + "step": 4102 + }, + { + "epoch": 1.089496746779976, + "grad_norm": 1.3552039168974384, + "learning_rate": 9.352814743029454e-06, + "loss": 0.2875550091266632, + "step": 4103 + }, + { + "epoch": 1.089762315761519, + "grad_norm": 1.4164869081453233, + "learning_rate": 9.34843324512329e-06, + "loss": 0.23085735738277435, + "step": 4104 + }, + { + "epoch": 1.090027884743062, + "grad_norm": 1.2013725541896922, + "learning_rate": 9.34405187283151e-06, + "loss": 0.2607901096343994, + "step": 4105 + }, + { + "epoch": 1.090293453724605, + "grad_norm": 1.1738523720935938, + "learning_rate": 9.339670626998791e-06, + "loss": 0.26165345311164856, + "step": 4106 + }, + { + "epoch": 1.090559022706148, + "grad_norm": 1.1931234826270498, + "learning_rate": 9.335289508469789e-06, + "loss": 0.27884238958358765, + "step": 4107 + }, + { + "epoch": 1.0908245916876909, + "grad_norm": 1.283025870689831, + "learning_rate": 9.33090851808913e-06, + "loss": 0.2689289152622223, + "step": 4108 + }, + { + "epoch": 1.0910901606692338, + "grad_norm": 1.2574326426613287, + "learning_rate": 9.326527656701414e-06, + "loss": 0.2633207440376282, + "step": 4109 + }, + { + "epoch": 1.0913557296507768, + "grad_norm": 1.1611202948336292, + "learning_rate": 9.322146925151226e-06, + "loss": 0.26001888513565063, + "step": 4110 + }, + { + "epoch": 1.0916212986323197, + "grad_norm": 1.1436383156785508, + "learning_rate": 9.31776632428312e-06, + "loss": 0.2739099860191345, + "step": 4111 + }, + { + "epoch": 1.0918868676138627, + "grad_norm": 1.1080458686771364, + "learning_rate": 9.313385854941616e-06, + "loss": 0.24885550141334534, + "step": 4112 + }, + { + "epoch": 1.0921524365954056, + "grad_norm": 1.1643870148920956, + "learning_rate": 9.309005517971222e-06, + "loss": 0.2609873414039612, + "step": 4113 + }, + { + "epoch": 1.0924180055769486, + "grad_norm": 1.427636157796487, + "learning_rate": 9.304625314216415e-06, + "loss": 0.28853538632392883, + "step": 4114 + }, + { + "epoch": 1.0926835745584915, + "grad_norm": 1.072833070391428, + "learning_rate": 9.300245244521647e-06, + "loss": 0.2629924714565277, + "step": 4115 + }, + { + "epoch": 1.0929491435400345, + "grad_norm": 1.1804644749067619, + "learning_rate": 9.295865309731342e-06, + "loss": 0.2687820494174957, + "step": 4116 + }, + { + "epoch": 1.0932147125215774, + "grad_norm": 1.0831905202820669, + "learning_rate": 9.2914855106899e-06, + "loss": 0.2293676733970642, + "step": 4117 + }, + { + "epoch": 1.0934802815031204, + "grad_norm": 1.1645005992728827, + "learning_rate": 9.287105848241694e-06, + "loss": 0.25261443853378296, + "step": 4118 + }, + { + "epoch": 1.0937458504846633, + "grad_norm": 1.1209341991417805, + "learning_rate": 9.282726323231077e-06, + "loss": 0.26238197088241577, + "step": 4119 + }, + { + "epoch": 1.0940114194662063, + "grad_norm": 1.1230838898563178, + "learning_rate": 9.278346936502364e-06, + "loss": 0.25718310475349426, + "step": 4120 + }, + { + "epoch": 1.0942769884477492, + "grad_norm": 1.1872711264618019, + "learning_rate": 9.273967688899849e-06, + "loss": 0.23810459673404694, + "step": 4121 + }, + { + "epoch": 1.0945425574292922, + "grad_norm": 1.0680734314830214, + "learning_rate": 9.269588581267804e-06, + "loss": 0.2197081446647644, + "step": 4122 + }, + { + "epoch": 1.0948081264108351, + "grad_norm": 1.1043223190124707, + "learning_rate": 9.265209614450463e-06, + "loss": 0.2429335117340088, + "step": 4123 + }, + { + "epoch": 1.095073695392378, + "grad_norm": 1.1380552272436657, + "learning_rate": 9.260830789292043e-06, + "loss": 0.23028087615966797, + "step": 4124 + }, + { + "epoch": 1.095339264373921, + "grad_norm": 1.2203393500716264, + "learning_rate": 9.25645210663673e-06, + "loss": 0.2783699035644531, + "step": 4125 + }, + { + "epoch": 1.095604833355464, + "grad_norm": 1.1686978964802806, + "learning_rate": 9.25207356732868e-06, + "loss": 0.25055867433547974, + "step": 4126 + }, + { + "epoch": 1.095870402337007, + "grad_norm": 1.2313132067115398, + "learning_rate": 9.247695172212026e-06, + "loss": 0.28629350662231445, + "step": 4127 + }, + { + "epoch": 1.09613597131855, + "grad_norm": 1.2403423880097748, + "learning_rate": 9.24331692213087e-06, + "loss": 0.2626604735851288, + "step": 4128 + }, + { + "epoch": 1.0964015403000928, + "grad_norm": 1.2478078302425437, + "learning_rate": 9.238938817929288e-06, + "loss": 0.237881600856781, + "step": 4129 + }, + { + "epoch": 1.0966671092816358, + "grad_norm": 1.144955023428898, + "learning_rate": 9.234560860451325e-06, + "loss": 0.2602109909057617, + "step": 4130 + }, + { + "epoch": 1.0969326782631788, + "grad_norm": 1.1775071297104545, + "learning_rate": 9.230183050541001e-06, + "loss": 0.2721475064754486, + "step": 4131 + }, + { + "epoch": 1.0971982472447217, + "grad_norm": 1.7664052681173497, + "learning_rate": 9.225805389042307e-06, + "loss": 0.25844910740852356, + "step": 4132 + }, + { + "epoch": 1.0974638162262647, + "grad_norm": 1.1612334633259545, + "learning_rate": 9.221427876799201e-06, + "loss": 0.26671040058135986, + "step": 4133 + }, + { + "epoch": 1.0977293852078078, + "grad_norm": 1.3116748641368057, + "learning_rate": 9.21705051465562e-06, + "loss": 0.2610115706920624, + "step": 4134 + }, + { + "epoch": 1.0979949541893508, + "grad_norm": 1.1348320206960383, + "learning_rate": 9.212673303455464e-06, + "loss": 0.2518802881240845, + "step": 4135 + }, + { + "epoch": 1.0982605231708937, + "grad_norm": 1.2313324732863455, + "learning_rate": 9.20829624404261e-06, + "loss": 0.28600364923477173, + "step": 4136 + }, + { + "epoch": 1.0985260921524367, + "grad_norm": 1.0787729379648288, + "learning_rate": 9.203919337260903e-06, + "loss": 0.2649504840373993, + "step": 4137 + }, + { + "epoch": 1.0987916611339796, + "grad_norm": 1.0717018301402161, + "learning_rate": 9.199542583954159e-06, + "loss": 0.22613298892974854, + "step": 4138 + }, + { + "epoch": 1.0990572301155226, + "grad_norm": 1.1049408193201318, + "learning_rate": 9.195165984966163e-06, + "loss": 0.22546961903572083, + "step": 4139 + }, + { + "epoch": 1.0993227990970655, + "grad_norm": 1.1132579479037434, + "learning_rate": 9.190789541140675e-06, + "loss": 0.20618169009685516, + "step": 4140 + }, + { + "epoch": 1.0995883680786085, + "grad_norm": 1.1910818165933836, + "learning_rate": 9.18641325332142e-06, + "loss": 0.2434382289648056, + "step": 4141 + }, + { + "epoch": 1.0998539370601514, + "grad_norm": 1.0160349259469954, + "learning_rate": 9.182037122352092e-06, + "loss": 0.19114840030670166, + "step": 4142 + }, + { + "epoch": 1.1001195060416944, + "grad_norm": 1.371175220167047, + "learning_rate": 9.17766114907636e-06, + "loss": 0.2793614864349365, + "step": 4143 + }, + { + "epoch": 1.1003850750232373, + "grad_norm": 1.3230746818872392, + "learning_rate": 9.173285334337863e-06, + "loss": 0.2908466160297394, + "step": 4144 + }, + { + "epoch": 1.1006506440047803, + "grad_norm": 1.1707475106499343, + "learning_rate": 9.168909678980199e-06, + "loss": 0.260933518409729, + "step": 4145 + }, + { + "epoch": 1.1009162129863233, + "grad_norm": 1.170079737982666, + "learning_rate": 9.16453418384695e-06, + "loss": 0.2819761037826538, + "step": 4146 + }, + { + "epoch": 1.1011817819678662, + "grad_norm": 1.251357168283767, + "learning_rate": 9.160158849781657e-06, + "loss": 0.25290411710739136, + "step": 4147 + }, + { + "epoch": 1.1014473509494092, + "grad_norm": 1.0782378998536035, + "learning_rate": 9.155783677627831e-06, + "loss": 0.21255841851234436, + "step": 4148 + }, + { + "epoch": 1.101712919930952, + "grad_norm": 0.9808101112826028, + "learning_rate": 9.151408668228958e-06, + "loss": 0.20631751418113708, + "step": 4149 + }, + { + "epoch": 1.101978488912495, + "grad_norm": 1.0273447794760797, + "learning_rate": 9.147033822428484e-06, + "loss": 0.20976273715496063, + "step": 4150 + }, + { + "epoch": 1.102244057894038, + "grad_norm": 1.0193138467531315, + "learning_rate": 9.142659141069828e-06, + "loss": 0.21464477479457855, + "step": 4151 + }, + { + "epoch": 1.102509626875581, + "grad_norm": 1.182770191723374, + "learning_rate": 9.13828462499638e-06, + "loss": 0.2262338101863861, + "step": 4152 + }, + { + "epoch": 1.102775195857124, + "grad_norm": 1.2057409707570275, + "learning_rate": 9.133910275051493e-06, + "loss": 0.26331469416618347, + "step": 4153 + }, + { + "epoch": 1.1030407648386669, + "grad_norm": 1.1729382721759571, + "learning_rate": 9.129536092078488e-06, + "loss": 0.26280921697616577, + "step": 4154 + }, + { + "epoch": 1.1033063338202098, + "grad_norm": 1.1474203361843618, + "learning_rate": 9.12516207692066e-06, + "loss": 0.2527182698249817, + "step": 4155 + }, + { + "epoch": 1.1035719028017528, + "grad_norm": 1.114868090084267, + "learning_rate": 9.120788230421267e-06, + "loss": 0.21416455507278442, + "step": 4156 + }, + { + "epoch": 1.1038374717832957, + "grad_norm": 1.149698502937602, + "learning_rate": 9.116414553423535e-06, + "loss": 0.25882014632225037, + "step": 4157 + }, + { + "epoch": 1.1041030407648387, + "grad_norm": 1.1615644224212993, + "learning_rate": 9.112041046770653e-06, + "loss": 0.20510248839855194, + "step": 4158 + }, + { + "epoch": 1.1043686097463816, + "grad_norm": 1.372282887646487, + "learning_rate": 9.107667711305786e-06, + "loss": 0.2348058819770813, + "step": 4159 + }, + { + "epoch": 1.1046341787279246, + "grad_norm": 1.2389958643414019, + "learning_rate": 9.10329454787206e-06, + "loss": 0.24561384320259094, + "step": 4160 + }, + { + "epoch": 1.1048997477094675, + "grad_norm": 1.133562757165387, + "learning_rate": 9.098921557312573e-06, + "loss": 0.23025226593017578, + "step": 4161 + }, + { + "epoch": 1.1051653166910105, + "grad_norm": 1.2483870007074676, + "learning_rate": 9.094548740470375e-06, + "loss": 0.2724589705467224, + "step": 4162 + }, + { + "epoch": 1.1054308856725534, + "grad_norm": 1.2319217483915181, + "learning_rate": 9.090176098188504e-06, + "loss": 0.25196704268455505, + "step": 4163 + }, + { + "epoch": 1.1056964546540964, + "grad_norm": 1.0723466269314343, + "learning_rate": 9.085803631309953e-06, + "loss": 0.22673696279525757, + "step": 4164 + }, + { + "epoch": 1.1059620236356393, + "grad_norm": 1.3129015386402236, + "learning_rate": 9.081431340677679e-06, + "loss": 0.23913519084453583, + "step": 4165 + }, + { + "epoch": 1.1062275926171823, + "grad_norm": 1.3859005835374885, + "learning_rate": 9.07705922713461e-06, + "loss": 0.2723861336708069, + "step": 4166 + }, + { + "epoch": 1.1064931615987252, + "grad_norm": 1.15651219284811, + "learning_rate": 9.072687291523636e-06, + "loss": 0.262167364358902, + "step": 4167 + }, + { + "epoch": 1.1067587305802682, + "grad_norm": 1.4186208937810438, + "learning_rate": 9.068315534687615e-06, + "loss": 0.2394658625125885, + "step": 4168 + }, + { + "epoch": 1.1070242995618111, + "grad_norm": 1.116555661084851, + "learning_rate": 9.063943957469373e-06, + "loss": 0.2547619938850403, + "step": 4169 + }, + { + "epoch": 1.107289868543354, + "grad_norm": 1.1242129377429575, + "learning_rate": 9.059572560711697e-06, + "loss": 0.24057570099830627, + "step": 4170 + }, + { + "epoch": 1.107555437524897, + "grad_norm": 1.057297781351654, + "learning_rate": 9.055201345257331e-06, + "loss": 0.21729445457458496, + "step": 4171 + }, + { + "epoch": 1.10782100650644, + "grad_norm": 1.2310508574302907, + "learning_rate": 9.05083031194901e-06, + "loss": 0.26590001583099365, + "step": 4172 + }, + { + "epoch": 1.108086575487983, + "grad_norm": 1.2932563576951384, + "learning_rate": 9.04645946162941e-06, + "loss": 0.26114848256111145, + "step": 4173 + }, + { + "epoch": 1.108352144469526, + "grad_norm": 1.1776684059902396, + "learning_rate": 9.04208879514118e-06, + "loss": 0.2255469262599945, + "step": 4174 + }, + { + "epoch": 1.1086177134510689, + "grad_norm": 1.1791871226781019, + "learning_rate": 9.037718313326932e-06, + "loss": 0.2597671151161194, + "step": 4175 + }, + { + "epoch": 1.1088832824326118, + "grad_norm": 1.1140795273935102, + "learning_rate": 9.033348017029247e-06, + "loss": 0.24820469319820404, + "step": 4176 + }, + { + "epoch": 1.1091488514141548, + "grad_norm": 1.2459789693741423, + "learning_rate": 9.028977907090661e-06, + "loss": 0.23886600136756897, + "step": 4177 + }, + { + "epoch": 1.1094144203956977, + "grad_norm": 1.091274384086243, + "learning_rate": 9.024607984353682e-06, + "loss": 0.24204152822494507, + "step": 4178 + }, + { + "epoch": 1.1096799893772407, + "grad_norm": 1.0934112812518066, + "learning_rate": 9.02023824966078e-06, + "loss": 0.23246638476848602, + "step": 4179 + }, + { + "epoch": 1.1099455583587836, + "grad_norm": 1.124332043141092, + "learning_rate": 9.015868703854386e-06, + "loss": 0.25057342648506165, + "step": 4180 + }, + { + "epoch": 1.1102111273403266, + "grad_norm": 1.117105393632997, + "learning_rate": 9.011499347776902e-06, + "loss": 0.2316257357597351, + "step": 4181 + }, + { + "epoch": 1.1104766963218695, + "grad_norm": 1.4294765240232425, + "learning_rate": 9.007130182270685e-06, + "loss": 0.24824783205986023, + "step": 4182 + }, + { + "epoch": 1.1107422653034125, + "grad_norm": 1.1667528236187257, + "learning_rate": 9.002761208178059e-06, + "loss": 0.25174480676651, + "step": 4183 + }, + { + "epoch": 1.1110078342849554, + "grad_norm": 1.0615254217045484, + "learning_rate": 8.998392426341313e-06, + "loss": 0.22364717721939087, + "step": 4184 + }, + { + "epoch": 1.1112734032664984, + "grad_norm": 1.0478203412338092, + "learning_rate": 8.994023837602694e-06, + "loss": 0.2205432504415512, + "step": 4185 + }, + { + "epoch": 1.1115389722480415, + "grad_norm": 1.4181125559874541, + "learning_rate": 8.989655442804413e-06, + "loss": 0.23303675651550293, + "step": 4186 + }, + { + "epoch": 1.1118045412295845, + "grad_norm": 1.2558407878646785, + "learning_rate": 8.985287242788646e-06, + "loss": 0.3003222644329071, + "step": 4187 + }, + { + "epoch": 1.1120701102111274, + "grad_norm": 1.146183553652687, + "learning_rate": 8.980919238397532e-06, + "loss": 0.2734413146972656, + "step": 4188 + }, + { + "epoch": 1.1123356791926704, + "grad_norm": 1.200748942223162, + "learning_rate": 8.976551430473166e-06, + "loss": 0.24086692929267883, + "step": 4189 + }, + { + "epoch": 1.1126012481742134, + "grad_norm": 1.2277073829430902, + "learning_rate": 8.972183819857618e-06, + "loss": 0.2531188130378723, + "step": 4190 + }, + { + "epoch": 1.1128668171557563, + "grad_norm": 1.1067327267341682, + "learning_rate": 8.96781640739291e-06, + "loss": 0.25059640407562256, + "step": 4191 + }, + { + "epoch": 1.1131323861372993, + "grad_norm": 1.1987793097859372, + "learning_rate": 8.963449193921023e-06, + "loss": 0.22427335381507874, + "step": 4192 + }, + { + "epoch": 1.1133979551188422, + "grad_norm": 1.1842662472837817, + "learning_rate": 8.959082180283906e-06, + "loss": 0.28835898637771606, + "step": 4193 + }, + { + "epoch": 1.1136635241003852, + "grad_norm": 1.1161865281550452, + "learning_rate": 8.954715367323468e-06, + "loss": 0.23919034004211426, + "step": 4194 + }, + { + "epoch": 1.1139290930819281, + "grad_norm": 1.186821665962327, + "learning_rate": 8.950348755881578e-06, + "loss": 0.24583986401557922, + "step": 4195 + }, + { + "epoch": 1.114194662063471, + "grad_norm": 1.2519292440490923, + "learning_rate": 8.94598234680007e-06, + "loss": 0.23869696259498596, + "step": 4196 + }, + { + "epoch": 1.114460231045014, + "grad_norm": 1.1662462204488522, + "learning_rate": 8.941616140920734e-06, + "loss": 0.2672434449195862, + "step": 4197 + }, + { + "epoch": 1.114725800026557, + "grad_norm": 1.2253961517889995, + "learning_rate": 8.937250139085322e-06, + "loss": 0.2660336494445801, + "step": 4198 + }, + { + "epoch": 1.1149913690081, + "grad_norm": 1.1608224464613695, + "learning_rate": 8.932884342135552e-06, + "loss": 0.26461780071258545, + "step": 4199 + }, + { + "epoch": 1.1152569379896429, + "grad_norm": 1.1632580978978435, + "learning_rate": 8.928518750913094e-06, + "loss": 0.22947481274604797, + "step": 4200 + }, + { + "epoch": 1.1155225069711858, + "grad_norm": 1.116659758904741, + "learning_rate": 8.924153366259584e-06, + "loss": 0.22715970873832703, + "step": 4201 + }, + { + "epoch": 1.1157880759527288, + "grad_norm": 1.3785482068816968, + "learning_rate": 8.919788189016618e-06, + "loss": 0.2994215190410614, + "step": 4202 + }, + { + "epoch": 1.1160536449342717, + "grad_norm": 1.158412598714371, + "learning_rate": 8.915423220025747e-06, + "loss": 0.2290656566619873, + "step": 4203 + }, + { + "epoch": 1.1163192139158147, + "grad_norm": 1.093685203516635, + "learning_rate": 8.911058460128489e-06, + "loss": 0.22284844517707825, + "step": 4204 + }, + { + "epoch": 1.1165847828973576, + "grad_norm": 1.0534371355750514, + "learning_rate": 8.906693910166316e-06, + "loss": 0.2095392495393753, + "step": 4205 + }, + { + "epoch": 1.1168503518789006, + "grad_norm": 1.197609739800315, + "learning_rate": 8.902329570980665e-06, + "loss": 0.25098133087158203, + "step": 4206 + }, + { + "epoch": 1.1171159208604435, + "grad_norm": 1.1630125842119448, + "learning_rate": 8.897965443412923e-06, + "loss": 0.24768148362636566, + "step": 4207 + }, + { + "epoch": 1.1173814898419865, + "grad_norm": 1.1213395777051767, + "learning_rate": 8.89360152830445e-06, + "loss": 0.22255480289459229, + "step": 4208 + }, + { + "epoch": 1.1176470588235294, + "grad_norm": 1.2306365389400118, + "learning_rate": 8.889237826496551e-06, + "loss": 0.23721200227737427, + "step": 4209 + }, + { + "epoch": 1.1179126278050724, + "grad_norm": 1.1422779685655824, + "learning_rate": 8.8848743388305e-06, + "loss": 0.25002530217170715, + "step": 4210 + }, + { + "epoch": 1.1181781967866153, + "grad_norm": 1.2862841308153614, + "learning_rate": 8.880511066147524e-06, + "loss": 0.27188029885292053, + "step": 4211 + }, + { + "epoch": 1.1184437657681583, + "grad_norm": 1.1517061730387759, + "learning_rate": 8.876148009288813e-06, + "loss": 0.23056066036224365, + "step": 4212 + }, + { + "epoch": 1.1187093347497012, + "grad_norm": 1.172676602980077, + "learning_rate": 8.87178516909551e-06, + "loss": 0.2336079478263855, + "step": 4213 + }, + { + "epoch": 1.1189749037312442, + "grad_norm": 1.1868473876345316, + "learning_rate": 8.86742254640872e-06, + "loss": 0.27449533343315125, + "step": 4214 + }, + { + "epoch": 1.1192404727127871, + "grad_norm": 1.1500112066365369, + "learning_rate": 8.863060142069508e-06, + "loss": 0.24714893102645874, + "step": 4215 + }, + { + "epoch": 1.11950604169433, + "grad_norm": 1.072070573678295, + "learning_rate": 8.858697956918886e-06, + "loss": 0.2155439257621765, + "step": 4216 + }, + { + "epoch": 1.119771610675873, + "grad_norm": 1.1798452175680678, + "learning_rate": 8.854335991797842e-06, + "loss": 0.23189155757427216, + "step": 4217 + }, + { + "epoch": 1.120037179657416, + "grad_norm": 1.0773206236657924, + "learning_rate": 8.849974247547307e-06, + "loss": 0.23413527011871338, + "step": 4218 + }, + { + "epoch": 1.120302748638959, + "grad_norm": 1.1991513784988423, + "learning_rate": 8.845612725008173e-06, + "loss": 0.2569039463996887, + "step": 4219 + }, + { + "epoch": 1.120568317620502, + "grad_norm": 1.1795807532964264, + "learning_rate": 8.84125142502129e-06, + "loss": 0.2699541449546814, + "step": 4220 + }, + { + "epoch": 1.1208338866020449, + "grad_norm": 1.1092727759218166, + "learning_rate": 8.836890348427468e-06, + "loss": 0.27172449231147766, + "step": 4221 + }, + { + "epoch": 1.1210994555835878, + "grad_norm": 1.2315684717645485, + "learning_rate": 8.83252949606747e-06, + "loss": 0.2839444875717163, + "step": 4222 + }, + { + "epoch": 1.1213650245651308, + "grad_norm": 1.1676850588618106, + "learning_rate": 8.828168868782013e-06, + "loss": 0.22782178223133087, + "step": 4223 + }, + { + "epoch": 1.1216305935466737, + "grad_norm": 1.132889704492098, + "learning_rate": 8.82380846741178e-06, + "loss": 0.2567726671695709, + "step": 4224 + }, + { + "epoch": 1.1218961625282167, + "grad_norm": 1.1872540675130212, + "learning_rate": 8.8194482927974e-06, + "loss": 0.25879523158073425, + "step": 4225 + }, + { + "epoch": 1.1221617315097596, + "grad_norm": 1.0193477801534692, + "learning_rate": 8.815088345779466e-06, + "loss": 0.22109058499336243, + "step": 4226 + }, + { + "epoch": 1.1224273004913026, + "grad_norm": 1.1414592493281657, + "learning_rate": 8.810728627198526e-06, + "loss": 0.23615925014019012, + "step": 4227 + }, + { + "epoch": 1.1226928694728455, + "grad_norm": 1.160290266155045, + "learning_rate": 8.806369137895081e-06, + "loss": 0.2751353085041046, + "step": 4228 + }, + { + "epoch": 1.1229584384543885, + "grad_norm": 1.2566953981709197, + "learning_rate": 8.802009878709587e-06, + "loss": 0.2361963391304016, + "step": 4229 + }, + { + "epoch": 1.1232240074359314, + "grad_norm": 1.186723455251228, + "learning_rate": 8.79765085048246e-06, + "loss": 0.22435930371284485, + "step": 4230 + }, + { + "epoch": 1.1234895764174744, + "grad_norm": 1.1759467333820823, + "learning_rate": 8.79329205405407e-06, + "loss": 0.2355855256319046, + "step": 4231 + }, + { + "epoch": 1.1237551453990173, + "grad_norm": 1.1450490838951077, + "learning_rate": 8.78893349026474e-06, + "loss": 0.24127572774887085, + "step": 4232 + }, + { + "epoch": 1.1240207143805603, + "grad_norm": 1.222656849347683, + "learning_rate": 8.784575159954748e-06, + "loss": 0.2677989602088928, + "step": 4233 + }, + { + "epoch": 1.1242862833621032, + "grad_norm": 1.109384474337522, + "learning_rate": 8.78021706396433e-06, + "loss": 0.2283135950565338, + "step": 4234 + }, + { + "epoch": 1.1245518523436462, + "grad_norm": 1.1669732456316693, + "learning_rate": 8.775859203133678e-06, + "loss": 0.2686103582382202, + "step": 4235 + }, + { + "epoch": 1.1248174213251891, + "grad_norm": 1.3869789172842044, + "learning_rate": 8.771501578302934e-06, + "loss": 0.2638726234436035, + "step": 4236 + }, + { + "epoch": 1.125082990306732, + "grad_norm": 1.0752600847920544, + "learning_rate": 8.767144190312196e-06, + "loss": 0.2517441511154175, + "step": 4237 + }, + { + "epoch": 1.125348559288275, + "grad_norm": 1.1903096570499558, + "learning_rate": 8.762787040001518e-06, + "loss": 0.2593642771244049, + "step": 4238 + }, + { + "epoch": 1.125614128269818, + "grad_norm": 1.123653942868709, + "learning_rate": 8.758430128210908e-06, + "loss": 0.23758336901664734, + "step": 4239 + }, + { + "epoch": 1.125879697251361, + "grad_norm": 1.182033088729647, + "learning_rate": 8.754073455780327e-06, + "loss": 0.2557980716228485, + "step": 4240 + }, + { + "epoch": 1.126145266232904, + "grad_norm": 1.1182311632466304, + "learning_rate": 8.74971702354969e-06, + "loss": 0.2484067678451538, + "step": 4241 + }, + { + "epoch": 1.1264108352144468, + "grad_norm": 1.121886097833982, + "learning_rate": 8.745360832358864e-06, + "loss": 0.23103098571300507, + "step": 4242 + }, + { + "epoch": 1.1266764041959898, + "grad_norm": 1.1856800379472048, + "learning_rate": 8.741004883047667e-06, + "loss": 0.2630731463432312, + "step": 4243 + }, + { + "epoch": 1.1269419731775328, + "grad_norm": 1.1814851216743405, + "learning_rate": 8.736649176455885e-06, + "loss": 0.2413114309310913, + "step": 4244 + }, + { + "epoch": 1.1272075421590757, + "grad_norm": 1.1465608986560651, + "learning_rate": 8.732293713423243e-06, + "loss": 0.22463169693946838, + "step": 4245 + }, + { + "epoch": 1.1274731111406187, + "grad_norm": 1.1943136125759177, + "learning_rate": 8.727938494789421e-06, + "loss": 0.23641429841518402, + "step": 4246 + }, + { + "epoch": 1.1277386801221616, + "grad_norm": 1.399290186521162, + "learning_rate": 8.723583521394054e-06, + "loss": 0.2547767162322998, + "step": 4247 + }, + { + "epoch": 1.1280042491037048, + "grad_norm": 1.1274578262359225, + "learning_rate": 8.719228794076733e-06, + "loss": 0.25753074884414673, + "step": 4248 + }, + { + "epoch": 1.1282698180852477, + "grad_norm": 1.2581544322188265, + "learning_rate": 8.714874313676992e-06, + "loss": 0.30602240562438965, + "step": 4249 + }, + { + "epoch": 1.1285353870667907, + "grad_norm": 1.3693509289176364, + "learning_rate": 8.710520081034328e-06, + "loss": 0.28336623311042786, + "step": 4250 + }, + { + "epoch": 1.1288009560483336, + "grad_norm": 1.179198933472593, + "learning_rate": 8.706166096988185e-06, + "loss": 0.24065867066383362, + "step": 4251 + }, + { + "epoch": 1.1290665250298766, + "grad_norm": 1.1350442144429624, + "learning_rate": 8.701812362377954e-06, + "loss": 0.25674968957901, + "step": 4252 + }, + { + "epoch": 1.1293320940114195, + "grad_norm": 1.0526431620404462, + "learning_rate": 8.697458878042992e-06, + "loss": 0.21502923965454102, + "step": 4253 + }, + { + "epoch": 1.1295976629929625, + "grad_norm": 1.199807552125115, + "learning_rate": 8.693105644822598e-06, + "loss": 0.26848286390304565, + "step": 4254 + }, + { + "epoch": 1.1298632319745054, + "grad_norm": 1.1632395937948599, + "learning_rate": 8.688752663556022e-06, + "loss": 0.24283824861049652, + "step": 4255 + }, + { + "epoch": 1.1301288009560484, + "grad_norm": 1.231861138079484, + "learning_rate": 8.684399935082468e-06, + "loss": 0.2511506974697113, + "step": 4256 + }, + { + "epoch": 1.1303943699375913, + "grad_norm": 1.1293067099587706, + "learning_rate": 8.68004746024109e-06, + "loss": 0.23932483792304993, + "step": 4257 + }, + { + "epoch": 1.1306599389191343, + "grad_norm": 1.229437521917496, + "learning_rate": 8.675695239870993e-06, + "loss": 0.30030694603919983, + "step": 4258 + }, + { + "epoch": 1.1309255079006773, + "grad_norm": 1.1154596754627621, + "learning_rate": 8.671343274811238e-06, + "loss": 0.24699059128761292, + "step": 4259 + }, + { + "epoch": 1.1311910768822202, + "grad_norm": 1.1288414782501015, + "learning_rate": 8.666991565900827e-06, + "loss": 0.26828041672706604, + "step": 4260 + }, + { + "epoch": 1.1314566458637632, + "grad_norm": 1.0765132569205758, + "learning_rate": 8.662640113978717e-06, + "loss": 0.2372082769870758, + "step": 4261 + }, + { + "epoch": 1.131722214845306, + "grad_norm": 1.2100447285144145, + "learning_rate": 8.658288919883824e-06, + "loss": 0.26367881894111633, + "step": 4262 + }, + { + "epoch": 1.131987783826849, + "grad_norm": 1.1035052537421275, + "learning_rate": 8.653937984455007e-06, + "loss": 0.2287222146987915, + "step": 4263 + }, + { + "epoch": 1.132253352808392, + "grad_norm": 1.1417963040520365, + "learning_rate": 8.649587308531067e-06, + "loss": 0.244521826505661, + "step": 4264 + }, + { + "epoch": 1.132518921789935, + "grad_norm": 1.2243689126496846, + "learning_rate": 8.64523689295077e-06, + "loss": 0.26912257075309753, + "step": 4265 + }, + { + "epoch": 1.132784490771478, + "grad_norm": 1.2384832947619873, + "learning_rate": 8.64088673855282e-06, + "loss": 0.23002780973911285, + "step": 4266 + }, + { + "epoch": 1.1330500597530209, + "grad_norm": 1.253742603342847, + "learning_rate": 8.636536846175878e-06, + "loss": 0.2561958432197571, + "step": 4267 + }, + { + "epoch": 1.1333156287345638, + "grad_norm": 1.2156026453092519, + "learning_rate": 8.63218721665855e-06, + "loss": 0.25553008913993835, + "step": 4268 + }, + { + "epoch": 1.1335811977161068, + "grad_norm": 1.1992385112791626, + "learning_rate": 8.627837850839398e-06, + "loss": 0.1992083340883255, + "step": 4269 + }, + { + "epoch": 1.1338467666976497, + "grad_norm": 1.3643398602160783, + "learning_rate": 8.62348874955692e-06, + "loss": 0.23075388371944427, + "step": 4270 + }, + { + "epoch": 1.1341123356791927, + "grad_norm": 1.1072751580070286, + "learning_rate": 8.619139913649582e-06, + "loss": 0.23691913485527039, + "step": 4271 + }, + { + "epoch": 1.1343779046607356, + "grad_norm": 1.2656689209279672, + "learning_rate": 8.61479134395578e-06, + "loss": 0.2536017894744873, + "step": 4272 + }, + { + "epoch": 1.1346434736422786, + "grad_norm": 1.2870409796681632, + "learning_rate": 8.61044304131387e-06, + "loss": 0.3014161288738251, + "step": 4273 + }, + { + "epoch": 1.1349090426238215, + "grad_norm": 1.1669055614665604, + "learning_rate": 8.606095006562156e-06, + "loss": 0.26333582401275635, + "step": 4274 + }, + { + "epoch": 1.1351746116053645, + "grad_norm": 1.2370251285176135, + "learning_rate": 8.601747240538883e-06, + "loss": 0.23796264827251434, + "step": 4275 + }, + { + "epoch": 1.1354401805869074, + "grad_norm": 1.1989417705813543, + "learning_rate": 8.597399744082251e-06, + "loss": 0.23737141489982605, + "step": 4276 + }, + { + "epoch": 1.1357057495684504, + "grad_norm": 1.1281376384049915, + "learning_rate": 8.593052518030407e-06, + "loss": 0.21073032915592194, + "step": 4277 + }, + { + "epoch": 1.1359713185499933, + "grad_norm": 1.2935455290015059, + "learning_rate": 8.588705563221444e-06, + "loss": 0.2597163915634155, + "step": 4278 + }, + { + "epoch": 1.1362368875315363, + "grad_norm": 1.137636804234172, + "learning_rate": 8.584358880493402e-06, + "loss": 0.24541154503822327, + "step": 4279 + }, + { + "epoch": 1.1365024565130792, + "grad_norm": 1.1331800338594176, + "learning_rate": 8.580012470684273e-06, + "loss": 0.19294027984142303, + "step": 4280 + }, + { + "epoch": 1.1367680254946222, + "grad_norm": 1.2387583554091215, + "learning_rate": 8.575666334631994e-06, + "loss": 0.26909738779067993, + "step": 4281 + }, + { + "epoch": 1.1370335944761651, + "grad_norm": 1.2850664046416893, + "learning_rate": 8.571320473174444e-06, + "loss": 0.2550502121448517, + "step": 4282 + }, + { + "epoch": 1.137299163457708, + "grad_norm": 1.138070930000495, + "learning_rate": 8.566974887149461e-06, + "loss": 0.2256634682416916, + "step": 4283 + }, + { + "epoch": 1.137564732439251, + "grad_norm": 1.3289753418379673, + "learning_rate": 8.562629577394817e-06, + "loss": 0.26154983043670654, + "step": 4284 + }, + { + "epoch": 1.137830301420794, + "grad_norm": 1.2426566834274124, + "learning_rate": 8.558284544748239e-06, + "loss": 0.24685145914554596, + "step": 4285 + }, + { + "epoch": 1.138095870402337, + "grad_norm": 1.177162412641928, + "learning_rate": 8.553939790047396e-06, + "loss": 0.2584421932697296, + "step": 4286 + }, + { + "epoch": 1.13836143938388, + "grad_norm": 1.2486541463378953, + "learning_rate": 8.549595314129907e-06, + "loss": 0.24582788348197937, + "step": 4287 + }, + { + "epoch": 1.1386270083654229, + "grad_norm": 1.1978925998644077, + "learning_rate": 8.545251117833334e-06, + "loss": 0.26023977994918823, + "step": 4288 + }, + { + "epoch": 1.1388925773469658, + "grad_norm": 1.2566090334130535, + "learning_rate": 8.54090720199519e-06, + "loss": 0.25575515627861023, + "step": 4289 + }, + { + "epoch": 1.1391581463285088, + "grad_norm": 1.2234599227483165, + "learning_rate": 8.53656356745293e-06, + "loss": 0.2784460783004761, + "step": 4290 + }, + { + "epoch": 1.1394237153100517, + "grad_norm": 1.11922615590049, + "learning_rate": 8.532220215043953e-06, + "loss": 0.24723297357559204, + "step": 4291 + }, + { + "epoch": 1.1396892842915947, + "grad_norm": 1.1960822646368614, + "learning_rate": 8.52787714560561e-06, + "loss": 0.24694418907165527, + "step": 4292 + }, + { + "epoch": 1.1399548532731376, + "grad_norm": 1.2073723964066632, + "learning_rate": 8.52353435997519e-06, + "loss": 0.19976040720939636, + "step": 4293 + }, + { + "epoch": 1.1402204222546806, + "grad_norm": 1.0875644999756633, + "learning_rate": 8.519191858989932e-06, + "loss": 0.21742458641529083, + "step": 4294 + }, + { + "epoch": 1.1404859912362235, + "grad_norm": 1.2040315384402727, + "learning_rate": 8.514849643487018e-06, + "loss": 0.26382917165756226, + "step": 4295 + }, + { + "epoch": 1.1407515602177665, + "grad_norm": 1.3073789721234685, + "learning_rate": 8.510507714303577e-06, + "loss": 0.30778488516807556, + "step": 4296 + }, + { + "epoch": 1.1410171291993096, + "grad_norm": 1.0727267660957265, + "learning_rate": 8.506166072276681e-06, + "loss": 0.20894449949264526, + "step": 4297 + }, + { + "epoch": 1.1412826981808526, + "grad_norm": 1.2119089915252295, + "learning_rate": 8.50182471824335e-06, + "loss": 0.2389567494392395, + "step": 4298 + }, + { + "epoch": 1.1415482671623955, + "grad_norm": 1.0286533711803312, + "learning_rate": 8.497483653040545e-06, + "loss": 0.20531126856803894, + "step": 4299 + }, + { + "epoch": 1.1418138361439385, + "grad_norm": 1.2153067733576255, + "learning_rate": 8.49314287750517e-06, + "loss": 0.2577363848686218, + "step": 4300 + }, + { + "epoch": 1.1420794051254815, + "grad_norm": 1.211343687077752, + "learning_rate": 8.488802392474076e-06, + "loss": 0.24225997924804688, + "step": 4301 + }, + { + "epoch": 1.1423449741070244, + "grad_norm": 1.2698570110354703, + "learning_rate": 8.484462198784058e-06, + "loss": 0.26494917273521423, + "step": 4302 + }, + { + "epoch": 1.1426105430885674, + "grad_norm": 1.2988704892129896, + "learning_rate": 8.480122297271855e-06, + "loss": 0.24903994798660278, + "step": 4303 + }, + { + "epoch": 1.1428761120701103, + "grad_norm": 1.1681075442122268, + "learning_rate": 8.475782688774147e-06, + "loss": 0.25291907787323, + "step": 4304 + }, + { + "epoch": 1.1431416810516533, + "grad_norm": 1.1301459507046017, + "learning_rate": 8.47144337412756e-06, + "loss": 0.22958475351333618, + "step": 4305 + }, + { + "epoch": 1.1434072500331962, + "grad_norm": 1.175766015682232, + "learning_rate": 8.46710435416866e-06, + "loss": 0.2305452972650528, + "step": 4306 + }, + { + "epoch": 1.1436728190147392, + "grad_norm": 1.2105790475425935, + "learning_rate": 8.462765629733965e-06, + "loss": 0.25028055906295776, + "step": 4307 + }, + { + "epoch": 1.1439383879962821, + "grad_norm": 1.2809924485725674, + "learning_rate": 8.458427201659926e-06, + "loss": 0.24873222410678864, + "step": 4308 + }, + { + "epoch": 1.144203956977825, + "grad_norm": 1.2345010944986379, + "learning_rate": 8.454089070782943e-06, + "loss": 0.23396535217761993, + "step": 4309 + }, + { + "epoch": 1.144469525959368, + "grad_norm": 1.1955062282547588, + "learning_rate": 8.449751237939354e-06, + "loss": 0.27120494842529297, + "step": 4310 + }, + { + "epoch": 1.144735094940911, + "grad_norm": 1.182924840045628, + "learning_rate": 8.445413703965441e-06, + "loss": 0.2734759449958801, + "step": 4311 + }, + { + "epoch": 1.145000663922454, + "grad_norm": 1.1584309667252248, + "learning_rate": 8.441076469697434e-06, + "loss": 0.25353512167930603, + "step": 4312 + }, + { + "epoch": 1.1452662329039969, + "grad_norm": 1.1913513856414861, + "learning_rate": 8.436739535971497e-06, + "loss": 0.23851020634174347, + "step": 4313 + }, + { + "epoch": 1.1455318018855398, + "grad_norm": 1.2006838398252668, + "learning_rate": 8.432402903623741e-06, + "loss": 0.26320093870162964, + "step": 4314 + }, + { + "epoch": 1.1457973708670828, + "grad_norm": 1.1065666799118796, + "learning_rate": 8.428066573490211e-06, + "loss": 0.23859955370426178, + "step": 4315 + }, + { + "epoch": 1.1460629398486257, + "grad_norm": 1.197716796975668, + "learning_rate": 8.423730546406911e-06, + "loss": 0.2636772096157074, + "step": 4316 + }, + { + "epoch": 1.1463285088301687, + "grad_norm": 1.2459962038175347, + "learning_rate": 8.419394823209773e-06, + "loss": 0.2656415104866028, + "step": 4317 + }, + { + "epoch": 1.1465940778117116, + "grad_norm": 1.2225993542972535, + "learning_rate": 8.41505940473467e-06, + "loss": 0.2872830033302307, + "step": 4318 + }, + { + "epoch": 1.1468596467932546, + "grad_norm": 1.4653362839323858, + "learning_rate": 8.410724291817422e-06, + "loss": 0.229783833026886, + "step": 4319 + }, + { + "epoch": 1.1471252157747975, + "grad_norm": 4.273944826146497, + "learning_rate": 8.406389485293786e-06, + "loss": 0.24418675899505615, + "step": 4320 + }, + { + "epoch": 1.1473907847563405, + "grad_norm": 1.2385236183806463, + "learning_rate": 8.402054985999464e-06, + "loss": 0.2535584270954132, + "step": 4321 + }, + { + "epoch": 1.1476563537378834, + "grad_norm": 1.2116145926695832, + "learning_rate": 8.397720794770093e-06, + "loss": 0.23207828402519226, + "step": 4322 + }, + { + "epoch": 1.1479219227194264, + "grad_norm": 1.8129143471218838, + "learning_rate": 8.393386912441257e-06, + "loss": 0.27990391850471497, + "step": 4323 + }, + { + "epoch": 1.1481874917009693, + "grad_norm": 1.059877272327032, + "learning_rate": 8.38905333984847e-06, + "loss": 0.2098318189382553, + "step": 4324 + }, + { + "epoch": 1.1484530606825123, + "grad_norm": 1.1462464609840002, + "learning_rate": 8.384720077827204e-06, + "loss": 0.25303804874420166, + "step": 4325 + }, + { + "epoch": 1.1487186296640552, + "grad_norm": 1.0794728099252306, + "learning_rate": 8.380387127212858e-06, + "loss": 0.23481838405132294, + "step": 4326 + }, + { + "epoch": 1.1489841986455982, + "grad_norm": 1.1782142095551065, + "learning_rate": 8.376054488840771e-06, + "loss": 0.24842356145381927, + "step": 4327 + }, + { + "epoch": 1.1492497676271411, + "grad_norm": 1.136832039914945, + "learning_rate": 8.37172216354623e-06, + "loss": 0.23927366733551025, + "step": 4328 + }, + { + "epoch": 1.149515336608684, + "grad_norm": 1.1577812724546028, + "learning_rate": 8.367390152164448e-06, + "loss": 0.23836453258991241, + "step": 4329 + }, + { + "epoch": 1.149780905590227, + "grad_norm": 1.2492179140984832, + "learning_rate": 8.36305845553059e-06, + "loss": 0.2562161982059479, + "step": 4330 + }, + { + "epoch": 1.15004647457177, + "grad_norm": 1.120151700121908, + "learning_rate": 8.358727074479755e-06, + "loss": 0.21255920827388763, + "step": 4331 + }, + { + "epoch": 1.150312043553313, + "grad_norm": 1.1011600870179878, + "learning_rate": 8.354396009846985e-06, + "loss": 0.24200043082237244, + "step": 4332 + }, + { + "epoch": 1.150577612534856, + "grad_norm": 1.1644551235897023, + "learning_rate": 8.35006526246725e-06, + "loss": 0.23582379519939423, + "step": 4333 + }, + { + "epoch": 1.1508431815163989, + "grad_norm": 1.093546349726341, + "learning_rate": 8.34573483317548e-06, + "loss": 0.21554499864578247, + "step": 4334 + }, + { + "epoch": 1.1511087504979418, + "grad_norm": 1.2460346716976907, + "learning_rate": 8.341404722806525e-06, + "loss": 0.2789759039878845, + "step": 4335 + }, + { + "epoch": 1.1513743194794848, + "grad_norm": 1.212813860768853, + "learning_rate": 8.337074932195175e-06, + "loss": 0.24677832424640656, + "step": 4336 + }, + { + "epoch": 1.1516398884610277, + "grad_norm": 1.2351497128261646, + "learning_rate": 8.332745462176166e-06, + "loss": 0.28122392296791077, + "step": 4337 + }, + { + "epoch": 1.1519054574425707, + "grad_norm": 1.2447069177647443, + "learning_rate": 8.328416313584169e-06, + "loss": 0.23219403624534607, + "step": 4338 + }, + { + "epoch": 1.1521710264241136, + "grad_norm": 1.1258797089625292, + "learning_rate": 8.324087487253792e-06, + "loss": 0.19928379356861115, + "step": 4339 + }, + { + "epoch": 1.1524365954056566, + "grad_norm": 1.2737910298174706, + "learning_rate": 8.31975898401958e-06, + "loss": 0.27730467915534973, + "step": 4340 + }, + { + "epoch": 1.1527021643871995, + "grad_norm": 1.3906235348842741, + "learning_rate": 8.315430804716022e-06, + "loss": 0.25462737679481506, + "step": 4341 + }, + { + "epoch": 1.1529677333687425, + "grad_norm": 1.1703737499238527, + "learning_rate": 8.311102950177533e-06, + "loss": 0.2363007366657257, + "step": 4342 + }, + { + "epoch": 1.1532333023502854, + "grad_norm": 1.2498285131266695, + "learning_rate": 8.306775421238482e-06, + "loss": 0.2648352384567261, + "step": 4343 + }, + { + "epoch": 1.1534988713318284, + "grad_norm": 1.394847110607811, + "learning_rate": 8.302448218733158e-06, + "loss": 0.25645309686660767, + "step": 4344 + }, + { + "epoch": 1.1537644403133713, + "grad_norm": 1.2178564426244172, + "learning_rate": 8.298121343495797e-06, + "loss": 0.22962522506713867, + "step": 4345 + }, + { + "epoch": 1.1540300092949143, + "grad_norm": 1.132403649349265, + "learning_rate": 8.293794796360569e-06, + "loss": 0.21269623935222626, + "step": 4346 + }, + { + "epoch": 1.1542955782764572, + "grad_norm": 1.1646919704485588, + "learning_rate": 8.289468578161581e-06, + "loss": 0.2518436014652252, + "step": 4347 + }, + { + "epoch": 1.1545611472580002, + "grad_norm": 1.193830808481187, + "learning_rate": 8.285142689732877e-06, + "loss": 0.2318439483642578, + "step": 4348 + }, + { + "epoch": 1.1548267162395431, + "grad_norm": 1.0953821300718658, + "learning_rate": 8.280817131908438e-06, + "loss": 0.2278512567281723, + "step": 4349 + }, + { + "epoch": 1.155092285221086, + "grad_norm": 1.3446091578493078, + "learning_rate": 8.27649190552218e-06, + "loss": 0.2521114945411682, + "step": 4350 + }, + { + "epoch": 1.155357854202629, + "grad_norm": 1.1722019112748296, + "learning_rate": 8.272167011407955e-06, + "loss": 0.2565760016441345, + "step": 4351 + }, + { + "epoch": 1.155623423184172, + "grad_norm": 1.3209067321897832, + "learning_rate": 8.267842450399552e-06, + "loss": 0.2603546679019928, + "step": 4352 + }, + { + "epoch": 1.155888992165715, + "grad_norm": 1.1697050726438265, + "learning_rate": 8.263518223330698e-06, + "loss": 0.2175855189561844, + "step": 4353 + }, + { + "epoch": 1.156154561147258, + "grad_norm": 1.1937135661774867, + "learning_rate": 8.25919433103505e-06, + "loss": 0.24521774053573608, + "step": 4354 + }, + { + "epoch": 1.1564201301288008, + "grad_norm": 1.3267445452853517, + "learning_rate": 8.254870774346203e-06, + "loss": 0.29673823714256287, + "step": 4355 + }, + { + "epoch": 1.1566856991103438, + "grad_norm": 1.260162624950344, + "learning_rate": 8.25054755409769e-06, + "loss": 0.26994144916534424, + "step": 4356 + }, + { + "epoch": 1.1569512680918868, + "grad_norm": 1.1578908727655277, + "learning_rate": 8.246224671122974e-06, + "loss": 0.2545935809612274, + "step": 4357 + }, + { + "epoch": 1.1572168370734297, + "grad_norm": 1.1469888258961152, + "learning_rate": 8.241902126255458e-06, + "loss": 0.23589034378528595, + "step": 4358 + }, + { + "epoch": 1.1574824060549727, + "grad_norm": 1.229284708155894, + "learning_rate": 8.237579920328478e-06, + "loss": 0.2617190480232239, + "step": 4359 + }, + { + "epoch": 1.1577479750365158, + "grad_norm": 1.2741716320060574, + "learning_rate": 8.233258054175302e-06, + "loss": 0.3092418313026428, + "step": 4360 + }, + { + "epoch": 1.1580135440180588, + "grad_norm": 1.1377305602079475, + "learning_rate": 8.228936528629138e-06, + "loss": 0.22873908281326294, + "step": 4361 + }, + { + "epoch": 1.1582791129996017, + "grad_norm": 1.0592847205754, + "learning_rate": 8.224615344523123e-06, + "loss": 0.22549089789390564, + "step": 4362 + }, + { + "epoch": 1.1585446819811447, + "grad_norm": 1.0288617285826194, + "learning_rate": 8.22029450269033e-06, + "loss": 0.19141459465026855, + "step": 4363 + }, + { + "epoch": 1.1588102509626876, + "grad_norm": 1.1679333849265336, + "learning_rate": 8.21597400396377e-06, + "loss": 0.24277547001838684, + "step": 4364 + }, + { + "epoch": 1.1590758199442306, + "grad_norm": 1.1463053400858605, + "learning_rate": 8.21165384917638e-06, + "loss": 0.2429513931274414, + "step": 4365 + }, + { + "epoch": 1.1593413889257735, + "grad_norm": 1.0775583631999657, + "learning_rate": 8.207334039161035e-06, + "loss": 0.24710172414779663, + "step": 4366 + }, + { + "epoch": 1.1596069579073165, + "grad_norm": 1.1226530732908067, + "learning_rate": 8.203014574750546e-06, + "loss": 0.2553783357143402, + "step": 4367 + }, + { + "epoch": 1.1598725268888594, + "grad_norm": 1.1664625510577165, + "learning_rate": 8.198695456777653e-06, + "loss": 0.2558436095714569, + "step": 4368 + }, + { + "epoch": 1.1601380958704024, + "grad_norm": 1.093371491828669, + "learning_rate": 8.19437668607503e-06, + "loss": 0.20780377089977264, + "step": 4369 + }, + { + "epoch": 1.1604036648519453, + "grad_norm": 1.0184271240235683, + "learning_rate": 8.190058263475288e-06, + "loss": 0.22397254407405853, + "step": 4370 + }, + { + "epoch": 1.1606692338334883, + "grad_norm": 1.1123966470918765, + "learning_rate": 8.185740189810967e-06, + "loss": 0.2763773798942566, + "step": 4371 + }, + { + "epoch": 1.1609348028150313, + "grad_norm": 1.234569017856286, + "learning_rate": 8.181422465914541e-06, + "loss": 0.2801940441131592, + "step": 4372 + }, + { + "epoch": 1.1612003717965742, + "grad_norm": 1.3078225086374202, + "learning_rate": 8.177105092618413e-06, + "loss": 0.20949441194534302, + "step": 4373 + }, + { + "epoch": 1.1614659407781172, + "grad_norm": 1.020800458401727, + "learning_rate": 8.172788070754927e-06, + "loss": 0.24503354728221893, + "step": 4374 + }, + { + "epoch": 1.16173150975966, + "grad_norm": 1.212252624187319, + "learning_rate": 8.16847140115635e-06, + "loss": 0.256147563457489, + "step": 4375 + }, + { + "epoch": 1.161997078741203, + "grad_norm": 1.079933692504349, + "learning_rate": 8.164155084654886e-06, + "loss": 0.2178848683834076, + "step": 4376 + }, + { + "epoch": 1.162262647722746, + "grad_norm": 1.0121292441974634, + "learning_rate": 8.159839122082668e-06, + "loss": 0.22624582052230835, + "step": 4377 + }, + { + "epoch": 1.162528216704289, + "grad_norm": 1.0294597777179986, + "learning_rate": 8.155523514271764e-06, + "loss": 0.2184191346168518, + "step": 4378 + }, + { + "epoch": 1.162793785685832, + "grad_norm": 1.2825595051682412, + "learning_rate": 8.151208262054175e-06, + "loss": 0.2623840868473053, + "step": 4379 + }, + { + "epoch": 1.1630593546673749, + "grad_norm": 1.2529929341607686, + "learning_rate": 8.14689336626183e-06, + "loss": 0.27181199193000793, + "step": 4380 + }, + { + "epoch": 1.1633249236489178, + "grad_norm": 1.282994089786083, + "learning_rate": 8.142578827726587e-06, + "loss": 0.2791554629802704, + "step": 4381 + }, + { + "epoch": 1.1635904926304608, + "grad_norm": 1.221608581014812, + "learning_rate": 8.13826464728024e-06, + "loss": 0.2466641068458557, + "step": 4382 + }, + { + "epoch": 1.1638560616120037, + "grad_norm": 0.9724735599541757, + "learning_rate": 8.133950825754511e-06, + "loss": 0.1951724737882614, + "step": 4383 + }, + { + "epoch": 1.1641216305935467, + "grad_norm": 1.2462068833977051, + "learning_rate": 8.129637363981056e-06, + "loss": 0.2520062029361725, + "step": 4384 + }, + { + "epoch": 1.1643871995750896, + "grad_norm": 1.230128345167748, + "learning_rate": 8.12532426279146e-06, + "loss": 0.24101334810256958, + "step": 4385 + }, + { + "epoch": 1.1646527685566326, + "grad_norm": 1.244671245504639, + "learning_rate": 8.121011523017235e-06, + "loss": 0.2741190791130066, + "step": 4386 + }, + { + "epoch": 1.1649183375381755, + "grad_norm": 1.1570746383559662, + "learning_rate": 8.116699145489822e-06, + "loss": 0.2575281858444214, + "step": 4387 + }, + { + "epoch": 1.1651839065197185, + "grad_norm": 1.157233381368316, + "learning_rate": 8.112387131040608e-06, + "loss": 0.2557298243045807, + "step": 4388 + }, + { + "epoch": 1.1654494755012614, + "grad_norm": 1.2560692108341776, + "learning_rate": 8.108075480500892e-06, + "loss": 0.27485036849975586, + "step": 4389 + }, + { + "epoch": 1.1657150444828044, + "grad_norm": 1.2517544472207511, + "learning_rate": 8.103764194701909e-06, + "loss": 0.26458340883255005, + "step": 4390 + }, + { + "epoch": 1.1659806134643473, + "grad_norm": 1.2310585386329624, + "learning_rate": 8.099453274474827e-06, + "loss": 0.2281840592622757, + "step": 4391 + }, + { + "epoch": 1.1662461824458903, + "grad_norm": 1.2367230880082285, + "learning_rate": 8.095142720650739e-06, + "loss": 0.24956555664539337, + "step": 4392 + }, + { + "epoch": 1.1665117514274332, + "grad_norm": 1.109202461245095, + "learning_rate": 8.090832534060671e-06, + "loss": 0.22619420289993286, + "step": 4393 + }, + { + "epoch": 1.1667773204089762, + "grad_norm": 1.2922206575995636, + "learning_rate": 8.086522715535571e-06, + "loss": 0.2780688405036926, + "step": 4394 + }, + { + "epoch": 1.1670428893905191, + "grad_norm": 1.2699378735794575, + "learning_rate": 8.082213265906323e-06, + "loss": 0.2600886821746826, + "step": 4395 + }, + { + "epoch": 1.167308458372062, + "grad_norm": 1.244234758234162, + "learning_rate": 8.077904186003736e-06, + "loss": 0.25049078464508057, + "step": 4396 + }, + { + "epoch": 1.167574027353605, + "grad_norm": 1.2327544821473595, + "learning_rate": 8.073595476658558e-06, + "loss": 0.27745798230171204, + "step": 4397 + }, + { + "epoch": 1.167839596335148, + "grad_norm": 1.1682547274263488, + "learning_rate": 8.069287138701452e-06, + "loss": 0.2191929668188095, + "step": 4398 + }, + { + "epoch": 1.168105165316691, + "grad_norm": 1.297306908163856, + "learning_rate": 8.064979172963014e-06, + "loss": 0.24307313561439514, + "step": 4399 + }, + { + "epoch": 1.168370734298234, + "grad_norm": 1.1837345133145987, + "learning_rate": 8.060671580273772e-06, + "loss": 0.23036238551139832, + "step": 4400 + }, + { + "epoch": 1.1686363032797769, + "grad_norm": 1.096627050675377, + "learning_rate": 8.056364361464176e-06, + "loss": 0.2394433617591858, + "step": 4401 + }, + { + "epoch": 1.1689018722613198, + "grad_norm": 1.183557399538609, + "learning_rate": 8.052057517364608e-06, + "loss": 0.24099211394786835, + "step": 4402 + }, + { + "epoch": 1.1691674412428628, + "grad_norm": 1.1293667282926971, + "learning_rate": 8.047751048805376e-06, + "loss": 0.22036173939704895, + "step": 4403 + }, + { + "epoch": 1.1694330102244057, + "grad_norm": 1.185484128157471, + "learning_rate": 8.043444956616717e-06, + "loss": 0.22400429844856262, + "step": 4404 + }, + { + "epoch": 1.1696985792059487, + "grad_norm": 1.0594769241160498, + "learning_rate": 8.039139241628792e-06, + "loss": 0.21649131178855896, + "step": 4405 + }, + { + "epoch": 1.1699641481874916, + "grad_norm": 1.150957898906185, + "learning_rate": 8.034833904671698e-06, + "loss": 0.23412205278873444, + "step": 4406 + }, + { + "epoch": 1.1702297171690346, + "grad_norm": 1.2025485392569255, + "learning_rate": 8.030528946575453e-06, + "loss": 0.23822304606437683, + "step": 4407 + }, + { + "epoch": 1.1704952861505775, + "grad_norm": 1.2929661052617345, + "learning_rate": 8.026224368169998e-06, + "loss": 0.29250186681747437, + "step": 4408 + }, + { + "epoch": 1.1707608551321207, + "grad_norm": 1.4098437716027425, + "learning_rate": 8.021920170285205e-06, + "loss": 0.26794207096099854, + "step": 4409 + }, + { + "epoch": 1.1710264241136636, + "grad_norm": 1.2469013694849018, + "learning_rate": 8.017616353750874e-06, + "loss": 0.2573787271976471, + "step": 4410 + }, + { + "epoch": 1.1712919930952066, + "grad_norm": 1.1835378975512396, + "learning_rate": 8.01331291939673e-06, + "loss": 0.2744356691837311, + "step": 4411 + }, + { + "epoch": 1.1715575620767495, + "grad_norm": 1.4542599881672131, + "learning_rate": 8.009009868052424e-06, + "loss": 0.2582886815071106, + "step": 4412 + }, + { + "epoch": 1.1718231310582925, + "grad_norm": 1.1766031171819216, + "learning_rate": 8.004707200547534e-06, + "loss": 0.2553568482398987, + "step": 4413 + }, + { + "epoch": 1.1720887000398355, + "grad_norm": 1.144579662849428, + "learning_rate": 8.00040491771156e-06, + "loss": 0.2670289874076843, + "step": 4414 + }, + { + "epoch": 1.1723542690213784, + "grad_norm": 1.1520006084984327, + "learning_rate": 7.99610302037394e-06, + "loss": 0.215460866689682, + "step": 4415 + }, + { + "epoch": 1.1726198380029214, + "grad_norm": 1.2764670908026035, + "learning_rate": 7.991801509364023e-06, + "loss": 0.26481571793556213, + "step": 4416 + }, + { + "epoch": 1.1728854069844643, + "grad_norm": 1.0239999030663398, + "learning_rate": 7.98750038551109e-06, + "loss": 0.2060776650905609, + "step": 4417 + }, + { + "epoch": 1.1731509759660073, + "grad_norm": 1.147707044406535, + "learning_rate": 7.983199649644349e-06, + "loss": 0.2401561588048935, + "step": 4418 + }, + { + "epoch": 1.1734165449475502, + "grad_norm": 1.3064882111410037, + "learning_rate": 7.978899302592927e-06, + "loss": 0.2545842230319977, + "step": 4419 + }, + { + "epoch": 1.1736821139290932, + "grad_norm": 1.199445262296627, + "learning_rate": 7.974599345185884e-06, + "loss": 0.29925093054771423, + "step": 4420 + }, + { + "epoch": 1.1739476829106361, + "grad_norm": 1.7583031900565322, + "learning_rate": 7.9702997782522e-06, + "loss": 0.23944757878780365, + "step": 4421 + }, + { + "epoch": 1.174213251892179, + "grad_norm": 1.057746400765015, + "learning_rate": 7.96600060262078e-06, + "loss": 0.23745761811733246, + "step": 4422 + }, + { + "epoch": 1.174478820873722, + "grad_norm": 1.1164780002442092, + "learning_rate": 7.961701819120453e-06, + "loss": 0.22170330584049225, + "step": 4423 + }, + { + "epoch": 1.174744389855265, + "grad_norm": 1.2607094160663312, + "learning_rate": 7.95740342857998e-06, + "loss": 0.2645890712738037, + "step": 4424 + }, + { + "epoch": 1.175009958836808, + "grad_norm": 1.2171129338535713, + "learning_rate": 7.953105431828032e-06, + "loss": 0.25232207775115967, + "step": 4425 + }, + { + "epoch": 1.1752755278183509, + "grad_norm": 1.20503293579659, + "learning_rate": 7.948807829693219e-06, + "loss": 0.2656644880771637, + "step": 4426 + }, + { + "epoch": 1.1755410967998938, + "grad_norm": 1.069230366230624, + "learning_rate": 7.944510623004063e-06, + "loss": 0.25290653109550476, + "step": 4427 + }, + { + "epoch": 1.1758066657814368, + "grad_norm": 1.1825821036814732, + "learning_rate": 7.940213812589018e-06, + "loss": 0.27464741468429565, + "step": 4428 + }, + { + "epoch": 1.1760722347629797, + "grad_norm": 1.4910942744639428, + "learning_rate": 7.935917399276455e-06, + "loss": 0.2562064528465271, + "step": 4429 + }, + { + "epoch": 1.1763378037445227, + "grad_norm": 1.2720371671465533, + "learning_rate": 7.931621383894676e-06, + "loss": 0.267793208360672, + "step": 4430 + }, + { + "epoch": 1.1766033727260656, + "grad_norm": 1.1490167098873316, + "learning_rate": 7.9273257672719e-06, + "loss": 0.23651085793972015, + "step": 4431 + }, + { + "epoch": 1.1768689417076086, + "grad_norm": 1.0804412076412697, + "learning_rate": 7.923030550236267e-06, + "loss": 0.23691008985042572, + "step": 4432 + }, + { + "epoch": 1.1771345106891515, + "grad_norm": 1.1540873295746452, + "learning_rate": 7.918735733615852e-06, + "loss": 0.24495704472064972, + "step": 4433 + }, + { + "epoch": 1.1774000796706945, + "grad_norm": 1.4423069413713672, + "learning_rate": 7.91444131823864e-06, + "loss": 0.25423017144203186, + "step": 4434 + }, + { + "epoch": 1.1776656486522374, + "grad_norm": 1.1113893983435537, + "learning_rate": 7.910147304932548e-06, + "loss": 0.22870390117168427, + "step": 4435 + }, + { + "epoch": 1.1779312176337804, + "grad_norm": 1.0473620824498977, + "learning_rate": 7.905853694525405e-06, + "loss": 0.23037508130073547, + "step": 4436 + }, + { + "epoch": 1.1781967866153233, + "grad_norm": 1.2886040363623328, + "learning_rate": 7.901560487844973e-06, + "loss": 0.31184864044189453, + "step": 4437 + }, + { + "epoch": 1.1784623555968663, + "grad_norm": 1.302197101799982, + "learning_rate": 7.89726768571893e-06, + "loss": 0.24140426516532898, + "step": 4438 + }, + { + "epoch": 1.1787279245784092, + "grad_norm": 1.2134032336682008, + "learning_rate": 7.892975288974877e-06, + "loss": 0.25602301955223083, + "step": 4439 + }, + { + "epoch": 1.1789934935599522, + "grad_norm": 1.1868063067331378, + "learning_rate": 7.888683298440339e-06, + "loss": 0.2717514932155609, + "step": 4440 + }, + { + "epoch": 1.1792590625414952, + "grad_norm": 1.1670818939848298, + "learning_rate": 7.884391714942757e-06, + "loss": 0.252475380897522, + "step": 4441 + }, + { + "epoch": 1.179524631523038, + "grad_norm": 1.161546405047816, + "learning_rate": 7.880100539309506e-06, + "loss": 0.24777942895889282, + "step": 4442 + }, + { + "epoch": 1.179790200504581, + "grad_norm": 1.194146333188245, + "learning_rate": 7.875809772367867e-06, + "loss": 0.25111010670661926, + "step": 4443 + }, + { + "epoch": 1.180055769486124, + "grad_norm": 1.163412583383914, + "learning_rate": 7.87151941494505e-06, + "loss": 0.26183217763900757, + "step": 4444 + }, + { + "epoch": 1.180321338467667, + "grad_norm": 1.2974065116766642, + "learning_rate": 7.867229467868189e-06, + "loss": 0.27538490295410156, + "step": 4445 + }, + { + "epoch": 1.18058690744921, + "grad_norm": 1.078206017492716, + "learning_rate": 7.862939931964333e-06, + "loss": 0.2192106693983078, + "step": 4446 + }, + { + "epoch": 1.1808524764307529, + "grad_norm": 1.2415747879020278, + "learning_rate": 7.858650808060453e-06, + "loss": 0.26506057381629944, + "step": 4447 + }, + { + "epoch": 1.1811180454122958, + "grad_norm": 1.103375758703505, + "learning_rate": 7.854362096983443e-06, + "loss": 0.2345719337463379, + "step": 4448 + }, + { + "epoch": 1.1813836143938388, + "grad_norm": 1.1651284585435833, + "learning_rate": 7.850073799560114e-06, + "loss": 0.21404311060905457, + "step": 4449 + }, + { + "epoch": 1.1816491833753817, + "grad_norm": 1.1572235550991925, + "learning_rate": 7.8457859166172e-06, + "loss": 0.24332138895988464, + "step": 4450 + }, + { + "epoch": 1.1819147523569247, + "grad_norm": 1.1687901862394692, + "learning_rate": 7.841498448981354e-06, + "loss": 0.25025150179862976, + "step": 4451 + }, + { + "epoch": 1.1821803213384676, + "grad_norm": 1.167419454587793, + "learning_rate": 7.837211397479152e-06, + "loss": 0.21918940544128418, + "step": 4452 + }, + { + "epoch": 1.1824458903200106, + "grad_norm": 1.1517463754639392, + "learning_rate": 7.832924762937083e-06, + "loss": 0.24976079165935516, + "step": 4453 + }, + { + "epoch": 1.1827114593015535, + "grad_norm": 1.1165052000707918, + "learning_rate": 7.828638546181565e-06, + "loss": 0.21146243810653687, + "step": 4454 + }, + { + "epoch": 1.1829770282830965, + "grad_norm": 1.1110608449393633, + "learning_rate": 7.824352748038924e-06, + "loss": 0.22921445965766907, + "step": 4455 + }, + { + "epoch": 1.1832425972646394, + "grad_norm": 1.1833669908026252, + "learning_rate": 7.820067369335413e-06, + "loss": 0.24401478469371796, + "step": 4456 + }, + { + "epoch": 1.1835081662461824, + "grad_norm": 1.2543977272663969, + "learning_rate": 7.815782410897209e-06, + "loss": 0.2717207074165344, + "step": 4457 + }, + { + "epoch": 1.1837737352277253, + "grad_norm": 1.0934075655453726, + "learning_rate": 7.81149787355039e-06, + "loss": 0.20752058923244476, + "step": 4458 + }, + { + "epoch": 1.1840393042092683, + "grad_norm": 1.3448722481333402, + "learning_rate": 7.807213758120965e-06, + "loss": 0.31095850467681885, + "step": 4459 + }, + { + "epoch": 1.1843048731908112, + "grad_norm": 1.1769654791590503, + "learning_rate": 7.802930065434874e-06, + "loss": 0.23761102557182312, + "step": 4460 + }, + { + "epoch": 1.1845704421723542, + "grad_norm": 1.3225327364557968, + "learning_rate": 7.798646796317952e-06, + "loss": 0.2509460151195526, + "step": 4461 + }, + { + "epoch": 1.1848360111538971, + "grad_norm": 1.472525937697874, + "learning_rate": 7.794363951595966e-06, + "loss": 0.25903213024139404, + "step": 4462 + }, + { + "epoch": 1.18510158013544, + "grad_norm": 1.1904413554334654, + "learning_rate": 7.790081532094596e-06, + "loss": 0.23304736614227295, + "step": 4463 + }, + { + "epoch": 1.185367149116983, + "grad_norm": 1.311875765456408, + "learning_rate": 7.785799538639445e-06, + "loss": 0.28707265853881836, + "step": 4464 + }, + { + "epoch": 1.185632718098526, + "grad_norm": 1.0202920254712324, + "learning_rate": 7.781517972056028e-06, + "loss": 0.20282745361328125, + "step": 4465 + }, + { + "epoch": 1.185898287080069, + "grad_norm": 1.2606153791729335, + "learning_rate": 7.777236833169782e-06, + "loss": 0.24056631326675415, + "step": 4466 + }, + { + "epoch": 1.186163856061612, + "grad_norm": 1.4946194524955894, + "learning_rate": 7.772956122806058e-06, + "loss": 0.2677255868911743, + "step": 4467 + }, + { + "epoch": 1.1864294250431549, + "grad_norm": 1.2681064192856966, + "learning_rate": 7.768675841790124e-06, + "loss": 0.22032876312732697, + "step": 4468 + }, + { + "epoch": 1.1866949940246978, + "grad_norm": 1.3138325978828467, + "learning_rate": 7.764395990947177e-06, + "loss": 0.2980336546897888, + "step": 4469 + }, + { + "epoch": 1.1869605630062408, + "grad_norm": 1.2624280680532078, + "learning_rate": 7.760116571102314e-06, + "loss": 0.2562638521194458, + "step": 4470 + }, + { + "epoch": 1.1872261319877837, + "grad_norm": 1.2207997545500016, + "learning_rate": 7.755837583080561e-06, + "loss": 0.262576699256897, + "step": 4471 + }, + { + "epoch": 1.1874917009693267, + "grad_norm": 1.2672893771429377, + "learning_rate": 7.751559027706858e-06, + "loss": 0.2654029130935669, + "step": 4472 + }, + { + "epoch": 1.1877572699508698, + "grad_norm": 1.2996444615622489, + "learning_rate": 7.747280905806051e-06, + "loss": 0.2946662902832031, + "step": 4473 + }, + { + "epoch": 1.1880228389324128, + "grad_norm": 1.193974235945654, + "learning_rate": 7.743003218202921e-06, + "loss": 0.25140905380249023, + "step": 4474 + }, + { + "epoch": 1.1882884079139557, + "grad_norm": 1.2240016583398612, + "learning_rate": 7.738725965722149e-06, + "loss": 0.2601654529571533, + "step": 4475 + }, + { + "epoch": 1.1885539768954987, + "grad_norm": 1.9675422662507516, + "learning_rate": 7.73444914918834e-06, + "loss": 0.2639954090118408, + "step": 4476 + }, + { + "epoch": 1.1888195458770416, + "grad_norm": 1.174151986382161, + "learning_rate": 7.730172769426014e-06, + "loss": 0.23391291499137878, + "step": 4477 + }, + { + "epoch": 1.1890851148585846, + "grad_norm": 2.254589386622623, + "learning_rate": 7.725896827259613e-06, + "loss": 0.2912144958972931, + "step": 4478 + }, + { + "epoch": 1.1893506838401275, + "grad_norm": 1.0905445077469016, + "learning_rate": 7.72162132351348e-06, + "loss": 0.23867549002170563, + "step": 4479 + }, + { + "epoch": 1.1896162528216705, + "grad_norm": 1.1124853975848743, + "learning_rate": 7.717346259011888e-06, + "loss": 0.22434742748737335, + "step": 4480 + }, + { + "epoch": 1.1898818218032134, + "grad_norm": 1.2440839352544732, + "learning_rate": 7.713071634579017e-06, + "loss": 0.2504398822784424, + "step": 4481 + }, + { + "epoch": 1.1901473907847564, + "grad_norm": 1.1759629506533034, + "learning_rate": 7.70879745103896e-06, + "loss": 0.24887195229530334, + "step": 4482 + }, + { + "epoch": 1.1904129597662994, + "grad_norm": 1.2603454999195398, + "learning_rate": 7.704523709215732e-06, + "loss": 0.2730141580104828, + "step": 4483 + }, + { + "epoch": 1.1906785287478423, + "grad_norm": 1.2285382464481551, + "learning_rate": 7.70025040993326e-06, + "loss": 0.22197315096855164, + "step": 4484 + }, + { + "epoch": 1.1909440977293853, + "grad_norm": 1.2004564929121084, + "learning_rate": 7.695977554015387e-06, + "loss": 0.2852731943130493, + "step": 4485 + }, + { + "epoch": 1.1912096667109282, + "grad_norm": 1.2815387200597224, + "learning_rate": 7.691705142285863e-06, + "loss": 0.2577238976955414, + "step": 4486 + }, + { + "epoch": 1.1914752356924712, + "grad_norm": 1.066499567502605, + "learning_rate": 7.68743317556837e-06, + "loss": 0.23510503768920898, + "step": 4487 + }, + { + "epoch": 1.191740804674014, + "grad_norm": 1.557745891642732, + "learning_rate": 7.683161654686486e-06, + "loss": 0.2553985118865967, + "step": 4488 + }, + { + "epoch": 1.192006373655557, + "grad_norm": 1.1965147913981737, + "learning_rate": 7.67889058046371e-06, + "loss": 0.2778642475605011, + "step": 4489 + }, + { + "epoch": 1.1922719426371, + "grad_norm": 1.1622951487110165, + "learning_rate": 7.674619953723455e-06, + "loss": 0.24740618467330933, + "step": 4490 + }, + { + "epoch": 1.192537511618643, + "grad_norm": 1.1598996003550786, + "learning_rate": 7.670349775289047e-06, + "loss": 0.2453901171684265, + "step": 4491 + }, + { + "epoch": 1.192803080600186, + "grad_norm": 1.1444233008842855, + "learning_rate": 7.666080045983726e-06, + "loss": 0.2336064875125885, + "step": 4492 + }, + { + "epoch": 1.1930686495817289, + "grad_norm": 1.18047841753512, + "learning_rate": 7.661810766630648e-06, + "loss": 0.2375800907611847, + "step": 4493 + }, + { + "epoch": 1.1933342185632718, + "grad_norm": 1.1241813274405275, + "learning_rate": 7.657541938052876e-06, + "loss": 0.21272733807563782, + "step": 4494 + }, + { + "epoch": 1.1935997875448148, + "grad_norm": 1.1531042348696576, + "learning_rate": 7.65327356107339e-06, + "loss": 0.26597708463668823, + "step": 4495 + }, + { + "epoch": 1.1938653565263577, + "grad_norm": 1.1715955143508257, + "learning_rate": 7.649005636515088e-06, + "loss": 0.267806738615036, + "step": 4496 + }, + { + "epoch": 1.1941309255079007, + "grad_norm": 1.1812545197713797, + "learning_rate": 7.64473816520077e-06, + "loss": 0.2260194569826126, + "step": 4497 + }, + { + "epoch": 1.1943964944894436, + "grad_norm": 1.298416110387325, + "learning_rate": 7.640471147953157e-06, + "loss": 0.24523532390594482, + "step": 4498 + }, + { + "epoch": 1.1946620634709866, + "grad_norm": 1.1020194586485352, + "learning_rate": 7.636204585594879e-06, + "loss": 0.23230910301208496, + "step": 4499 + }, + { + "epoch": 1.1949276324525295, + "grad_norm": 1.1141631171804318, + "learning_rate": 7.631938478948478e-06, + "loss": 0.23322705924510956, + "step": 4500 + }, + { + "epoch": 1.1951932014340725, + "grad_norm": 1.3011711597097497, + "learning_rate": 7.6276728288364086e-06, + "loss": 0.25614386796951294, + "step": 4501 + }, + { + "epoch": 1.1954587704156154, + "grad_norm": 1.2188058731839337, + "learning_rate": 7.62340763608104e-06, + "loss": 0.22921821475028992, + "step": 4502 + }, + { + "epoch": 1.1957243393971584, + "grad_norm": 1.1538976889459698, + "learning_rate": 7.619142901504649e-06, + "loss": 0.25528913736343384, + "step": 4503 + }, + { + "epoch": 1.1959899083787013, + "grad_norm": 1.1730292690453887, + "learning_rate": 7.614878625929425e-06, + "loss": 0.2528502643108368, + "step": 4504 + }, + { + "epoch": 1.1962554773602443, + "grad_norm": 1.2636827238002009, + "learning_rate": 7.610614810177474e-06, + "loss": 0.2519027590751648, + "step": 4505 + }, + { + "epoch": 1.1965210463417872, + "grad_norm": 1.3563109831905724, + "learning_rate": 7.606351455070808e-06, + "loss": 0.2895655333995819, + "step": 4506 + }, + { + "epoch": 1.1967866153233302, + "grad_norm": 1.2317858842714817, + "learning_rate": 7.6020885614313515e-06, + "loss": 0.24588793516159058, + "step": 4507 + }, + { + "epoch": 1.1970521843048731, + "grad_norm": 1.3148149004868621, + "learning_rate": 7.597826130080938e-06, + "loss": 0.2996830940246582, + "step": 4508 + }, + { + "epoch": 1.197317753286416, + "grad_norm": 1.2289139982746875, + "learning_rate": 7.593564161841318e-06, + "loss": 0.2654343247413635, + "step": 4509 + }, + { + "epoch": 1.197583322267959, + "grad_norm": 1.2104660234722762, + "learning_rate": 7.589302657534144e-06, + "loss": 0.24949109554290771, + "step": 4510 + }, + { + "epoch": 1.197848891249502, + "grad_norm": 1.1785955409512114, + "learning_rate": 7.5850416179809886e-06, + "loss": 0.23205731809139252, + "step": 4511 + }, + { + "epoch": 1.198114460231045, + "grad_norm": 3.351023225066079, + "learning_rate": 7.580781044003324e-06, + "loss": 0.232904314994812, + "step": 4512 + }, + { + "epoch": 1.198380029212588, + "grad_norm": 1.0569352775404934, + "learning_rate": 7.576520936422542e-06, + "loss": 0.25071364641189575, + "step": 4513 + }, + { + "epoch": 1.1986455981941309, + "grad_norm": 1.3613643273685416, + "learning_rate": 7.572261296059944e-06, + "loss": 0.2574467658996582, + "step": 4514 + }, + { + "epoch": 1.1989111671756738, + "grad_norm": 1.1866331959407248, + "learning_rate": 7.568002123736735e-06, + "loss": 0.23134055733680725, + "step": 4515 + }, + { + "epoch": 1.1991767361572168, + "grad_norm": 1.093870770411857, + "learning_rate": 7.5637434202740334e-06, + "loss": 0.22163332998752594, + "step": 4516 + }, + { + "epoch": 1.1994423051387597, + "grad_norm": 1.182308432196374, + "learning_rate": 7.559485186492868e-06, + "loss": 0.2665749788284302, + "step": 4517 + }, + { + "epoch": 1.1997078741203027, + "grad_norm": 1.0758759053634162, + "learning_rate": 7.555227423214174e-06, + "loss": 0.2237103432416916, + "step": 4518 + }, + { + "epoch": 1.1999734431018456, + "grad_norm": 1.2216323349035507, + "learning_rate": 7.550970131258801e-06, + "loss": 0.23287461698055267, + "step": 4519 + }, + { + "epoch": 1.2002390120833886, + "grad_norm": 1.1237156855078405, + "learning_rate": 7.5467133114475025e-06, + "loss": 0.2296323925256729, + "step": 4520 + }, + { + "epoch": 1.2005045810649315, + "grad_norm": 1.0900498705064874, + "learning_rate": 7.542456964600944e-06, + "loss": 0.21358339488506317, + "step": 4521 + }, + { + "epoch": 1.2007701500464747, + "grad_norm": 1.2516498821908515, + "learning_rate": 7.5382010915396954e-06, + "loss": 0.2355872094631195, + "step": 4522 + }, + { + "epoch": 1.2010357190280176, + "grad_norm": 1.2039029354448443, + "learning_rate": 7.5339456930842455e-06, + "loss": 0.25397661328315735, + "step": 4523 + }, + { + "epoch": 1.2013012880095606, + "grad_norm": 1.1762399479435963, + "learning_rate": 7.52969077005498e-06, + "loss": 0.26658257842063904, + "step": 4524 + }, + { + "epoch": 1.2015668569911035, + "grad_norm": 1.1889790145170218, + "learning_rate": 7.525436323272201e-06, + "loss": 0.27207136154174805, + "step": 4525 + }, + { + "epoch": 1.2018324259726465, + "grad_norm": 1.1867510172835751, + "learning_rate": 7.521182353556114e-06, + "loss": 0.25889313220977783, + "step": 4526 + }, + { + "epoch": 1.2020979949541895, + "grad_norm": 1.3095753328357655, + "learning_rate": 7.516928861726834e-06, + "loss": 0.272185742855072, + "step": 4527 + }, + { + "epoch": 1.2023635639357324, + "grad_norm": 1.156226984644319, + "learning_rate": 7.512675848604385e-06, + "loss": 0.25371503829956055, + "step": 4528 + }, + { + "epoch": 1.2026291329172754, + "grad_norm": 1.2028831911106082, + "learning_rate": 7.5084233150086964e-06, + "loss": 0.2554902732372284, + "step": 4529 + }, + { + "epoch": 1.2028947018988183, + "grad_norm": 1.1714528701705076, + "learning_rate": 7.50417126175961e-06, + "loss": 0.22007369995117188, + "step": 4530 + }, + { + "epoch": 1.2031602708803613, + "grad_norm": 1.2057968317835202, + "learning_rate": 7.499919689676861e-06, + "loss": 0.27492445707321167, + "step": 4531 + }, + { + "epoch": 1.2034258398619042, + "grad_norm": 1.1229280499713745, + "learning_rate": 7.4956685995801144e-06, + "loss": 0.2321021854877472, + "step": 4532 + }, + { + "epoch": 1.2036914088434472, + "grad_norm": 1.1735641467762012, + "learning_rate": 7.491417992288927e-06, + "loss": 0.25410759449005127, + "step": 4533 + }, + { + "epoch": 1.2039569778249901, + "grad_norm": 1.0638924164212193, + "learning_rate": 7.487167868622765e-06, + "loss": 0.2080576866865158, + "step": 4534 + }, + { + "epoch": 1.204222546806533, + "grad_norm": 1.115815492341061, + "learning_rate": 7.482918229401001e-06, + "loss": 0.2333327978849411, + "step": 4535 + }, + { + "epoch": 1.204488115788076, + "grad_norm": 1.1999209092526242, + "learning_rate": 7.478669075442917e-06, + "loss": 0.23160479962825775, + "step": 4536 + }, + { + "epoch": 1.204753684769619, + "grad_norm": 1.2136747509439494, + "learning_rate": 7.474420407567699e-06, + "loss": 0.2627696394920349, + "step": 4537 + }, + { + "epoch": 1.205019253751162, + "grad_norm": 1.0694648198090266, + "learning_rate": 7.470172226594441e-06, + "loss": 0.18656940758228302, + "step": 4538 + }, + { + "epoch": 1.2052848227327049, + "grad_norm": 1.2245138263513848, + "learning_rate": 7.465924533342139e-06, + "loss": 0.2749083340167999, + "step": 4539 + }, + { + "epoch": 1.2055503917142478, + "grad_norm": 1.3944907322006155, + "learning_rate": 7.461677328629696e-06, + "loss": 0.27484387159347534, + "step": 4540 + }, + { + "epoch": 1.2058159606957908, + "grad_norm": 1.254197138569937, + "learning_rate": 7.457430613275934e-06, + "loss": 0.26357588171958923, + "step": 4541 + }, + { + "epoch": 1.2060815296773337, + "grad_norm": 1.2004336778554112, + "learning_rate": 7.453184388099559e-06, + "loss": 0.23495343327522278, + "step": 4542 + }, + { + "epoch": 1.2063470986588767, + "grad_norm": 1.2123259782755003, + "learning_rate": 7.4489386539192e-06, + "loss": 0.253970205783844, + "step": 4543 + }, + { + "epoch": 1.2066126676404196, + "grad_norm": 1.1523820852778563, + "learning_rate": 7.444693411553383e-06, + "loss": 0.24919062852859497, + "step": 4544 + }, + { + "epoch": 1.2068782366219626, + "grad_norm": 1.2181666045865969, + "learning_rate": 7.440448661820536e-06, + "loss": 0.24373450875282288, + "step": 4545 + }, + { + "epoch": 1.2071438056035055, + "grad_norm": 1.3762501451890354, + "learning_rate": 7.436204405539002e-06, + "loss": 0.24739482998847961, + "step": 4546 + }, + { + "epoch": 1.2074093745850485, + "grad_norm": 1.2982074074943253, + "learning_rate": 7.4319606435270195e-06, + "loss": 0.27041494846343994, + "step": 4547 + }, + { + "epoch": 1.2076749435665914, + "grad_norm": 1.1359942984852744, + "learning_rate": 7.427717376602739e-06, + "loss": 0.23243938386440277, + "step": 4548 + }, + { + "epoch": 1.2079405125481344, + "grad_norm": 1.3118758722508392, + "learning_rate": 7.423474605584206e-06, + "loss": 0.2346343696117401, + "step": 4549 + }, + { + "epoch": 1.2082060815296773, + "grad_norm": 1.1819354183035133, + "learning_rate": 7.419232331289385e-06, + "loss": 0.2587367296218872, + "step": 4550 + }, + { + "epoch": 1.2084716505112203, + "grad_norm": 1.195922174249915, + "learning_rate": 7.414990554536134e-06, + "loss": 0.2552938461303711, + "step": 4551 + }, + { + "epoch": 1.2087372194927632, + "grad_norm": 1.2688216449772127, + "learning_rate": 7.410749276142221e-06, + "loss": 0.2693648040294647, + "step": 4552 + }, + { + "epoch": 1.2090027884743062, + "grad_norm": 1.1997939452425357, + "learning_rate": 7.406508496925307e-06, + "loss": 0.21543294191360474, + "step": 4553 + }, + { + "epoch": 1.2092683574558492, + "grad_norm": 1.2385892147047024, + "learning_rate": 7.402268217702966e-06, + "loss": 0.2913009524345398, + "step": 4554 + }, + { + "epoch": 1.209533926437392, + "grad_norm": 1.0671356100150298, + "learning_rate": 7.398028439292675e-06, + "loss": 0.23279520869255066, + "step": 4555 + }, + { + "epoch": 1.209799495418935, + "grad_norm": 1.0946575444558022, + "learning_rate": 7.393789162511815e-06, + "loss": 0.25086939334869385, + "step": 4556 + }, + { + "epoch": 1.210065064400478, + "grad_norm": 1.0964890001200192, + "learning_rate": 7.389550388177662e-06, + "loss": 0.21704714000225067, + "step": 4557 + }, + { + "epoch": 1.210330633382021, + "grad_norm": 1.126699331966135, + "learning_rate": 7.3853121171074115e-06, + "loss": 0.230219304561615, + "step": 4558 + }, + { + "epoch": 1.210596202363564, + "grad_norm": 1.1809668678269754, + "learning_rate": 7.381074350118149e-06, + "loss": 0.26073017716407776, + "step": 4559 + }, + { + "epoch": 1.2108617713451069, + "grad_norm": 1.2065072762311946, + "learning_rate": 7.376837088026863e-06, + "loss": 0.25186216831207275, + "step": 4560 + }, + { + "epoch": 1.2111273403266498, + "grad_norm": 1.3978877577958326, + "learning_rate": 7.372600331650449e-06, + "loss": 0.28719040751457214, + "step": 4561 + }, + { + "epoch": 1.2113929093081928, + "grad_norm": 1.16073083909203, + "learning_rate": 7.368364081805704e-06, + "loss": 0.23972755670547485, + "step": 4562 + }, + { + "epoch": 1.2116584782897357, + "grad_norm": 1.096919114864748, + "learning_rate": 7.364128339309326e-06, + "loss": 0.23053769767284393, + "step": 4563 + }, + { + "epoch": 1.2119240472712787, + "grad_norm": 1.2910615683085556, + "learning_rate": 7.359893104977917e-06, + "loss": 0.25124189257621765, + "step": 4564 + }, + { + "epoch": 1.2121896162528216, + "grad_norm": 1.1863697592423188, + "learning_rate": 7.355658379627981e-06, + "loss": 0.2243686318397522, + "step": 4565 + }, + { + "epoch": 1.2124551852343646, + "grad_norm": 1.244591161752608, + "learning_rate": 7.3514241640759175e-06, + "loss": 0.26047343015670776, + "step": 4566 + }, + { + "epoch": 1.2127207542159075, + "grad_norm": 1.1775978450301259, + "learning_rate": 7.3471904591380434e-06, + "loss": 0.23603469133377075, + "step": 4567 + }, + { + "epoch": 1.2129863231974505, + "grad_norm": 1.2261707581126196, + "learning_rate": 7.342957265630561e-06, + "loss": 0.31320711970329285, + "step": 4568 + }, + { + "epoch": 1.2132518921789934, + "grad_norm": 1.22464158648852, + "learning_rate": 7.338724584369581e-06, + "loss": 0.22159788012504578, + "step": 4569 + }, + { + "epoch": 1.2135174611605364, + "grad_norm": 1.1206153371836056, + "learning_rate": 7.334492416171114e-06, + "loss": 0.21992239356040955, + "step": 4570 + }, + { + "epoch": 1.2137830301420793, + "grad_norm": 1.3229661253734524, + "learning_rate": 7.330260761851071e-06, + "loss": 0.20708827674388885, + "step": 4571 + }, + { + "epoch": 1.2140485991236223, + "grad_norm": 1.1899658624900848, + "learning_rate": 7.326029622225269e-06, + "loss": 0.2846507132053375, + "step": 4572 + }, + { + "epoch": 1.2143141681051652, + "grad_norm": 1.2218224134688922, + "learning_rate": 7.321798998109417e-06, + "loss": 0.24903801083564758, + "step": 4573 + }, + { + "epoch": 1.2145797370867082, + "grad_norm": 1.1817295734811926, + "learning_rate": 7.317568890319134e-06, + "loss": 0.23426681756973267, + "step": 4574 + }, + { + "epoch": 1.2148453060682511, + "grad_norm": 1.1685993771040228, + "learning_rate": 7.31333929966993e-06, + "loss": 0.2374490350484848, + "step": 4575 + }, + { + "epoch": 1.215110875049794, + "grad_norm": 1.13335327598736, + "learning_rate": 7.309110226977223e-06, + "loss": 0.24035832285881042, + "step": 4576 + }, + { + "epoch": 1.215376444031337, + "grad_norm": 1.2837405582571324, + "learning_rate": 7.30488167305633e-06, + "loss": 0.21872258186340332, + "step": 4577 + }, + { + "epoch": 1.21564201301288, + "grad_norm": 1.3425258296129825, + "learning_rate": 7.300653638722463e-06, + "loss": 0.2940255403518677, + "step": 4578 + }, + { + "epoch": 1.215907581994423, + "grad_norm": 1.1158795437619367, + "learning_rate": 7.29642612479074e-06, + "loss": 0.20970892906188965, + "step": 4579 + }, + { + "epoch": 1.216173150975966, + "grad_norm": 1.1571301789790744, + "learning_rate": 7.292199132076175e-06, + "loss": 0.21217449009418488, + "step": 4580 + }, + { + "epoch": 1.2164387199575089, + "grad_norm": 1.2448503896532135, + "learning_rate": 7.28797266139368e-06, + "loss": 0.2463359832763672, + "step": 4581 + }, + { + "epoch": 1.2167042889390518, + "grad_norm": 1.132320428820701, + "learning_rate": 7.283746713558071e-06, + "loss": 0.21921415627002716, + "step": 4582 + }, + { + "epoch": 1.2169698579205948, + "grad_norm": 1.2437376760058587, + "learning_rate": 7.279521289384059e-06, + "loss": 0.2412380576133728, + "step": 4583 + }, + { + "epoch": 1.2172354269021377, + "grad_norm": 1.180878934188553, + "learning_rate": 7.275296389686258e-06, + "loss": 0.2558564245700836, + "step": 4584 + }, + { + "epoch": 1.2175009958836809, + "grad_norm": 1.2566060880081307, + "learning_rate": 7.271072015279179e-06, + "loss": 0.2548869848251343, + "step": 4585 + }, + { + "epoch": 1.2177665648652238, + "grad_norm": 1.4407566508510072, + "learning_rate": 7.2668481669772304e-06, + "loss": 0.22183407843112946, + "step": 4586 + }, + { + "epoch": 1.2180321338467668, + "grad_norm": 1.20165829214997, + "learning_rate": 7.262624845594721e-06, + "loss": 0.24722473323345184, + "step": 4587 + }, + { + "epoch": 1.2182977028283097, + "grad_norm": 1.190564524584547, + "learning_rate": 7.258402051945858e-06, + "loss": 0.2678988575935364, + "step": 4588 + }, + { + "epoch": 1.2185632718098527, + "grad_norm": 1.187777405395345, + "learning_rate": 7.2541797868447435e-06, + "loss": 0.2116469144821167, + "step": 4589 + }, + { + "epoch": 1.2188288407913956, + "grad_norm": 1.2500071795758152, + "learning_rate": 7.249958051105383e-06, + "loss": 0.23897933959960938, + "step": 4590 + }, + { + "epoch": 1.2190944097729386, + "grad_norm": 1.2473885744661077, + "learning_rate": 7.245736845541676e-06, + "loss": 0.25434061884880066, + "step": 4591 + }, + { + "epoch": 1.2193599787544815, + "grad_norm": 1.2108382272450464, + "learning_rate": 7.2415161709674235e-06, + "loss": 0.2602628469467163, + "step": 4592 + }, + { + "epoch": 1.2196255477360245, + "grad_norm": 3.1633443202169764, + "learning_rate": 7.2372960281963165e-06, + "loss": 0.2519065737724304, + "step": 4593 + }, + { + "epoch": 1.2198911167175674, + "grad_norm": 1.550903602515833, + "learning_rate": 7.233076418041954e-06, + "loss": 0.24404102563858032, + "step": 4594 + }, + { + "epoch": 1.2201566856991104, + "grad_norm": 1.1561711817096534, + "learning_rate": 7.228857341317825e-06, + "loss": 0.23633979260921478, + "step": 4595 + }, + { + "epoch": 1.2204222546806534, + "grad_norm": 1.2128002082313463, + "learning_rate": 7.224638798837319e-06, + "loss": 0.2513781189918518, + "step": 4596 + }, + { + "epoch": 1.2206878236621963, + "grad_norm": 1.2409533600026899, + "learning_rate": 7.220420791413721e-06, + "loss": 0.23270189762115479, + "step": 4597 + }, + { + "epoch": 1.2209533926437393, + "grad_norm": 1.2503409564498669, + "learning_rate": 7.21620331986021e-06, + "loss": 0.2770010530948639, + "step": 4598 + }, + { + "epoch": 1.2212189616252822, + "grad_norm": 1.1284522462719728, + "learning_rate": 7.2119863849898684e-06, + "loss": 0.2312745451927185, + "step": 4599 + }, + { + "epoch": 1.2214845306068252, + "grad_norm": 1.2725314186948387, + "learning_rate": 7.20776998761567e-06, + "loss": 0.231276735663414, + "step": 4600 + }, + { + "epoch": 1.221750099588368, + "grad_norm": 1.1715742737590393, + "learning_rate": 7.203554128550486e-06, + "loss": 0.24927708506584167, + "step": 4601 + }, + { + "epoch": 1.222015668569911, + "grad_norm": 1.1138441718661785, + "learning_rate": 7.199338808607084e-06, + "loss": 0.23033373057842255, + "step": 4602 + }, + { + "epoch": 1.222281237551454, + "grad_norm": 1.2545098885673684, + "learning_rate": 7.195124028598131e-06, + "loss": 0.24003425240516663, + "step": 4603 + }, + { + "epoch": 1.222546806532997, + "grad_norm": 1.1872708193619057, + "learning_rate": 7.190909789336185e-06, + "loss": 0.22648809850215912, + "step": 4604 + }, + { + "epoch": 1.22281237551454, + "grad_norm": 1.2511860493227276, + "learning_rate": 7.1866960916337006e-06, + "loss": 0.2605816125869751, + "step": 4605 + }, + { + "epoch": 1.2230779444960829, + "grad_norm": 1.1424629632361756, + "learning_rate": 7.1824829363030305e-06, + "loss": 0.21549202501773834, + "step": 4606 + }, + { + "epoch": 1.2233435134776258, + "grad_norm": 1.1532084986944064, + "learning_rate": 7.17827032415642e-06, + "loss": 0.23113220930099487, + "step": 4607 + }, + { + "epoch": 1.2236090824591688, + "grad_norm": 1.1649312720163907, + "learning_rate": 7.174058256006012e-06, + "loss": 0.22736643254756927, + "step": 4608 + }, + { + "epoch": 1.2238746514407117, + "grad_norm": 1.172011833362534, + "learning_rate": 7.169846732663845e-06, + "loss": 0.2686663866043091, + "step": 4609 + }, + { + "epoch": 1.2241402204222547, + "grad_norm": 1.1555217624379808, + "learning_rate": 7.1656357549418485e-06, + "loss": 0.1980462670326233, + "step": 4610 + }, + { + "epoch": 1.2244057894037976, + "grad_norm": 1.2401629806715768, + "learning_rate": 7.161425323651846e-06, + "loss": 0.22997641563415527, + "step": 4611 + }, + { + "epoch": 1.2246713583853406, + "grad_norm": 1.3367939845671126, + "learning_rate": 7.157215439605567e-06, + "loss": 0.28781357407569885, + "step": 4612 + }, + { + "epoch": 1.2249369273668835, + "grad_norm": 1.2895382897388425, + "learning_rate": 7.153006103614624e-06, + "loss": 0.22558270394802094, + "step": 4613 + }, + { + "epoch": 1.2252024963484265, + "grad_norm": 1.1860196927831441, + "learning_rate": 7.148797316490527e-06, + "loss": 0.2435922622680664, + "step": 4614 + }, + { + "epoch": 1.2254680653299694, + "grad_norm": 1.2828543438888096, + "learning_rate": 7.14458907904468e-06, + "loss": 0.27840936183929443, + "step": 4615 + }, + { + "epoch": 1.2257336343115124, + "grad_norm": 1.2350405670943831, + "learning_rate": 7.1403813920883825e-06, + "loss": 0.2775651812553406, + "step": 4616 + }, + { + "epoch": 1.2259992032930553, + "grad_norm": 1.2738452228129284, + "learning_rate": 7.136174256432828e-06, + "loss": 0.2430988848209381, + "step": 4617 + }, + { + "epoch": 1.2262647722745983, + "grad_norm": 1.0618083363199646, + "learning_rate": 7.131967672889101e-06, + "loss": 0.2018759697675705, + "step": 4618 + }, + { + "epoch": 1.2265303412561412, + "grad_norm": 1.2320094058432127, + "learning_rate": 7.127761642268179e-06, + "loss": 0.25314825773239136, + "step": 4619 + }, + { + "epoch": 1.2267959102376842, + "grad_norm": 1.409693024729639, + "learning_rate": 7.123556165380935e-06, + "loss": 0.2542746365070343, + "step": 4620 + }, + { + "epoch": 1.2270614792192271, + "grad_norm": 1.2571649384815597, + "learning_rate": 7.119351243038142e-06, + "loss": 0.2912300229072571, + "step": 4621 + }, + { + "epoch": 1.22732704820077, + "grad_norm": 1.3877507856901592, + "learning_rate": 7.115146876050454e-06, + "loss": 0.26893284916877747, + "step": 4622 + }, + { + "epoch": 1.227592617182313, + "grad_norm": 1.3833428208823224, + "learning_rate": 7.110943065228425e-06, + "loss": 0.2711215317249298, + "step": 4623 + }, + { + "epoch": 1.227858186163856, + "grad_norm": 1.346165350849743, + "learning_rate": 7.106739811382501e-06, + "loss": 0.25530266761779785, + "step": 4624 + }, + { + "epoch": 1.228123755145399, + "grad_norm": 1.268299981159743, + "learning_rate": 7.102537115323018e-06, + "loss": 0.2547178864479065, + "step": 4625 + }, + { + "epoch": 1.228389324126942, + "grad_norm": 1.5802606545447795, + "learning_rate": 7.0983349778602064e-06, + "loss": 0.27973634004592896, + "step": 4626 + }, + { + "epoch": 1.2286548931084849, + "grad_norm": 1.205257873334912, + "learning_rate": 7.0941333998041884e-06, + "loss": 0.24066339433193207, + "step": 4627 + }, + { + "epoch": 1.2289204620900278, + "grad_norm": 1.1798307734371165, + "learning_rate": 7.0899323819649816e-06, + "loss": 0.24305742979049683, + "step": 4628 + }, + { + "epoch": 1.2291860310715708, + "grad_norm": 1.163221794708842, + "learning_rate": 7.085731925152484e-06, + "loss": 0.22478783130645752, + "step": 4629 + }, + { + "epoch": 1.2294516000531137, + "grad_norm": 1.1812808698189172, + "learning_rate": 7.081532030176506e-06, + "loss": 0.24995659291744232, + "step": 4630 + }, + { + "epoch": 1.2297171690346567, + "grad_norm": 1.1575900439946216, + "learning_rate": 7.077332697846733e-06, + "loss": 0.2579454183578491, + "step": 4631 + }, + { + "epoch": 1.2299827380161996, + "grad_norm": 1.2378373931288529, + "learning_rate": 7.073133928972745e-06, + "loss": 0.2513299286365509, + "step": 4632 + }, + { + "epoch": 1.2302483069977426, + "grad_norm": 1.0751310135047412, + "learning_rate": 7.068935724364016e-06, + "loss": 0.23344315588474274, + "step": 4633 + }, + { + "epoch": 1.2305138759792857, + "grad_norm": 1.1882346043976466, + "learning_rate": 7.064738084829912e-06, + "loss": 0.26750341057777405, + "step": 4634 + }, + { + "epoch": 1.2307794449608287, + "grad_norm": 1.1622882344241228, + "learning_rate": 7.0605410111796855e-06, + "loss": 0.22424373030662537, + "step": 4635 + }, + { + "epoch": 1.2310450139423716, + "grad_norm": 1.0711348851881108, + "learning_rate": 7.056344504222485e-06, + "loss": 0.24261844158172607, + "step": 4636 + }, + { + "epoch": 1.2313105829239146, + "grad_norm": 1.1382788327638453, + "learning_rate": 7.052148564767347e-06, + "loss": 0.22273704409599304, + "step": 4637 + }, + { + "epoch": 1.2315761519054576, + "grad_norm": 1.217398110209698, + "learning_rate": 7.047953193623195e-06, + "loss": 0.23726603388786316, + "step": 4638 + }, + { + "epoch": 1.2318417208870005, + "grad_norm": 1.1961933626954258, + "learning_rate": 7.043758391598856e-06, + "loss": 0.2612340748310089, + "step": 4639 + }, + { + "epoch": 1.2321072898685435, + "grad_norm": 1.3828917417203295, + "learning_rate": 7.039564159503034e-06, + "loss": 0.25722867250442505, + "step": 4640 + }, + { + "epoch": 1.2323728588500864, + "grad_norm": 1.2106898963951274, + "learning_rate": 7.035370498144325e-06, + "loss": 0.25940731167793274, + "step": 4641 + }, + { + "epoch": 1.2326384278316294, + "grad_norm": 1.1431229158704634, + "learning_rate": 7.03117740833122e-06, + "loss": 0.2328685224056244, + "step": 4642 + }, + { + "epoch": 1.2329039968131723, + "grad_norm": 1.360549509974518, + "learning_rate": 7.0269848908720965e-06, + "loss": 0.3019352853298187, + "step": 4643 + }, + { + "epoch": 1.2331695657947153, + "grad_norm": 1.370123584713732, + "learning_rate": 7.022792946575222e-06, + "loss": 0.2665002942085266, + "step": 4644 + }, + { + "epoch": 1.2334351347762582, + "grad_norm": 1.2172549009924116, + "learning_rate": 7.018601576248755e-06, + "loss": 0.2425101399421692, + "step": 4645 + }, + { + "epoch": 1.2337007037578012, + "grad_norm": 1.2088470091841177, + "learning_rate": 7.014410780700743e-06, + "loss": 0.23319771885871887, + "step": 4646 + }, + { + "epoch": 1.2339662727393441, + "grad_norm": 1.1714631765087196, + "learning_rate": 7.010220560739116e-06, + "loss": 0.23033195734024048, + "step": 4647 + }, + { + "epoch": 1.234231841720887, + "grad_norm": 1.211199620492339, + "learning_rate": 7.006030917171707e-06, + "loss": 0.24682006239891052, + "step": 4648 + }, + { + "epoch": 1.23449741070243, + "grad_norm": 1.2881207045369418, + "learning_rate": 7.001841850806228e-06, + "loss": 0.25566285848617554, + "step": 4649 + }, + { + "epoch": 1.234762979683973, + "grad_norm": 1.32329780476303, + "learning_rate": 6.9976533624502784e-06, + "loss": 0.2791779339313507, + "step": 4650 + }, + { + "epoch": 1.235028548665516, + "grad_norm": 1.3093366388831746, + "learning_rate": 6.993465452911352e-06, + "loss": 0.25597846508026123, + "step": 4651 + }, + { + "epoch": 1.2352941176470589, + "grad_norm": 1.197170425293823, + "learning_rate": 6.9892781229968275e-06, + "loss": 0.24034728109836578, + "step": 4652 + }, + { + "epoch": 1.2355596866286018, + "grad_norm": 1.2583607623295634, + "learning_rate": 6.985091373513972e-06, + "loss": 0.2209509015083313, + "step": 4653 + }, + { + "epoch": 1.2358252556101448, + "grad_norm": 1.298261075070858, + "learning_rate": 6.980905205269942e-06, + "loss": 0.29106947779655457, + "step": 4654 + }, + { + "epoch": 1.2360908245916877, + "grad_norm": 1.226505577270481, + "learning_rate": 6.976719619071782e-06, + "loss": 0.24014753103256226, + "step": 4655 + }, + { + "epoch": 1.2363563935732307, + "grad_norm": 1.2297022971330018, + "learning_rate": 6.972534615726422e-06, + "loss": 0.27135470509529114, + "step": 4656 + }, + { + "epoch": 1.2366219625547736, + "grad_norm": 1.2219120714336154, + "learning_rate": 6.968350196040683e-06, + "loss": 0.23386257886886597, + "step": 4657 + }, + { + "epoch": 1.2368875315363166, + "grad_norm": 1.1452987159774544, + "learning_rate": 6.964166360821271e-06, + "loss": 0.23119661211967468, + "step": 4658 + }, + { + "epoch": 1.2371531005178595, + "grad_norm": 1.1767967288021879, + "learning_rate": 6.959983110874782e-06, + "loss": 0.2399922013282776, + "step": 4659 + }, + { + "epoch": 1.2374186694994025, + "grad_norm": 1.0521231856668218, + "learning_rate": 6.9558004470076944e-06, + "loss": 0.18323534727096558, + "step": 4660 + }, + { + "epoch": 1.2376842384809454, + "grad_norm": 1.1985431375912965, + "learning_rate": 6.951618370026378e-06, + "loss": 0.25683268904685974, + "step": 4661 + }, + { + "epoch": 1.2379498074624884, + "grad_norm": 1.307367140627743, + "learning_rate": 6.947436880737089e-06, + "loss": 0.2861499786376953, + "step": 4662 + }, + { + "epoch": 1.2382153764440313, + "grad_norm": 1.3831407282476516, + "learning_rate": 6.943255979945965e-06, + "loss": 0.28021398186683655, + "step": 4663 + }, + { + "epoch": 1.2384809454255743, + "grad_norm": 1.2940713851528283, + "learning_rate": 6.939075668459039e-06, + "loss": 0.2739776074886322, + "step": 4664 + }, + { + "epoch": 1.2387465144071172, + "grad_norm": 1.3433235944815516, + "learning_rate": 6.934895947082221e-06, + "loss": 0.26015231013298035, + "step": 4665 + }, + { + "epoch": 1.2390120833886602, + "grad_norm": 1.3230400884249285, + "learning_rate": 6.930716816621317e-06, + "loss": 0.2572113871574402, + "step": 4666 + }, + { + "epoch": 1.2392776523702032, + "grad_norm": 1.266134559335497, + "learning_rate": 6.926538277882012e-06, + "loss": 0.24094708263874054, + "step": 4667 + }, + { + "epoch": 1.239543221351746, + "grad_norm": 1.1175335748548278, + "learning_rate": 6.92236033166988e-06, + "loss": 0.22803835570812225, + "step": 4668 + }, + { + "epoch": 1.239808790333289, + "grad_norm": 1.1198379137737728, + "learning_rate": 6.9181829787903774e-06, + "loss": 0.23672322928905487, + "step": 4669 + }, + { + "epoch": 1.240074359314832, + "grad_norm": 1.3356297624894082, + "learning_rate": 6.91400622004885e-06, + "loss": 0.2568579912185669, + "step": 4670 + }, + { + "epoch": 1.240339928296375, + "grad_norm": 1.1768710116388783, + "learning_rate": 6.909830056250527e-06, + "loss": 0.25267845392227173, + "step": 4671 + }, + { + "epoch": 1.240605497277918, + "grad_norm": 1.2702969549109802, + "learning_rate": 6.905654488200524e-06, + "loss": 0.30336999893188477, + "step": 4672 + }, + { + "epoch": 1.2408710662594609, + "grad_norm": 1.17710991443045, + "learning_rate": 6.901479516703842e-06, + "loss": 0.2741299867630005, + "step": 4673 + }, + { + "epoch": 1.2411366352410038, + "grad_norm": 1.276658372251755, + "learning_rate": 6.897305142565363e-06, + "loss": 0.2896823585033417, + "step": 4674 + }, + { + "epoch": 1.2414022042225468, + "grad_norm": 1.2718591233587666, + "learning_rate": 6.8931313665898625e-06, + "loss": 0.23102329671382904, + "step": 4675 + }, + { + "epoch": 1.2416677732040897, + "grad_norm": 1.3209479857777737, + "learning_rate": 6.8889581895819915e-06, + "loss": 0.2600775361061096, + "step": 4676 + }, + { + "epoch": 1.2419333421856327, + "grad_norm": 1.1932453661715805, + "learning_rate": 6.884785612346291e-06, + "loss": 0.23589132726192474, + "step": 4677 + }, + { + "epoch": 1.2421989111671756, + "grad_norm": 1.155454248544126, + "learning_rate": 6.880613635687184e-06, + "loss": 0.24419361352920532, + "step": 4678 + }, + { + "epoch": 1.2424644801487186, + "grad_norm": 1.1323309321599895, + "learning_rate": 6.876442260408977e-06, + "loss": 0.23267227411270142, + "step": 4679 + }, + { + "epoch": 1.2427300491302615, + "grad_norm": 1.2244929254620942, + "learning_rate": 6.8722714873158635e-06, + "loss": 0.2507064938545227, + "step": 4680 + }, + { + "epoch": 1.2429956181118045, + "grad_norm": 1.2079227486812785, + "learning_rate": 6.868101317211922e-06, + "loss": 0.2529929280281067, + "step": 4681 + }, + { + "epoch": 1.2432611870933474, + "grad_norm": 1.1627205371245832, + "learning_rate": 6.863931750901107e-06, + "loss": 0.23255379498004913, + "step": 4682 + }, + { + "epoch": 1.2435267560748904, + "grad_norm": 1.1997195000446994, + "learning_rate": 6.859762789187259e-06, + "loss": 0.22757332026958466, + "step": 4683 + }, + { + "epoch": 1.2437923250564333, + "grad_norm": 1.2115398233652928, + "learning_rate": 6.8555944328741145e-06, + "loss": 0.2578364312648773, + "step": 4684 + }, + { + "epoch": 1.2440578940379763, + "grad_norm": 1.1854445431935166, + "learning_rate": 6.851426682765278e-06, + "loss": 0.27568408846855164, + "step": 4685 + }, + { + "epoch": 1.2443234630195192, + "grad_norm": 1.19754548578965, + "learning_rate": 6.847259539664244e-06, + "loss": 0.25595831871032715, + "step": 4686 + }, + { + "epoch": 1.2445890320010622, + "grad_norm": 1.1807617266458326, + "learning_rate": 6.843093004374386e-06, + "loss": 0.2195426970720291, + "step": 4687 + }, + { + "epoch": 1.2448546009826051, + "grad_norm": 1.1623631531241645, + "learning_rate": 6.838927077698967e-06, + "loss": 0.23247741162776947, + "step": 4688 + }, + { + "epoch": 1.245120169964148, + "grad_norm": 1.2953467781322094, + "learning_rate": 6.834761760441127e-06, + "loss": 0.26149916648864746, + "step": 4689 + }, + { + "epoch": 1.245385738945691, + "grad_norm": 1.1310243964126157, + "learning_rate": 6.830597053403885e-06, + "loss": 0.2521447241306305, + "step": 4690 + }, + { + "epoch": 1.245651307927234, + "grad_norm": 1.1803812700297758, + "learning_rate": 6.826432957390155e-06, + "loss": 0.23401981592178345, + "step": 4691 + }, + { + "epoch": 1.245916876908777, + "grad_norm": 1.3114713754211442, + "learning_rate": 6.822269473202714e-06, + "loss": 0.25341230630874634, + "step": 4692 + }, + { + "epoch": 1.24618244589032, + "grad_norm": 1.2025537581570156, + "learning_rate": 6.818106601644248e-06, + "loss": 0.2513907551765442, + "step": 4693 + }, + { + "epoch": 1.2464480148718629, + "grad_norm": 1.2263403478965602, + "learning_rate": 6.8139443435173005e-06, + "loss": 0.2682073414325714, + "step": 4694 + }, + { + "epoch": 1.2467135838534058, + "grad_norm": 1.1801313342439474, + "learning_rate": 6.809782699624308e-06, + "loss": 0.22726872563362122, + "step": 4695 + }, + { + "epoch": 1.2469791528349488, + "grad_norm": 1.3004812874511507, + "learning_rate": 6.805621670767588e-06, + "loss": 0.24184030294418335, + "step": 4696 + }, + { + "epoch": 1.247244721816492, + "grad_norm": 1.0395051535883466, + "learning_rate": 6.801461257749334e-06, + "loss": 0.203639417886734, + "step": 4697 + }, + { + "epoch": 1.2475102907980349, + "grad_norm": 1.1786557175840897, + "learning_rate": 6.797301461371626e-06, + "loss": 0.2170606106519699, + "step": 4698 + }, + { + "epoch": 1.2477758597795778, + "grad_norm": 1.1231113548110434, + "learning_rate": 6.7931422824364245e-06, + "loss": 0.2225056290626526, + "step": 4699 + }, + { + "epoch": 1.2480414287611208, + "grad_norm": 1.1702414518259399, + "learning_rate": 6.788983721745569e-06, + "loss": 0.2388974130153656, + "step": 4700 + }, + { + "epoch": 1.2483069977426637, + "grad_norm": 1.14649445863332, + "learning_rate": 6.784825780100776e-06, + "loss": 0.2291644811630249, + "step": 4701 + }, + { + "epoch": 1.2485725667242067, + "grad_norm": 1.3474164807852358, + "learning_rate": 6.7806684583036595e-06, + "loss": 0.23793739080429077, + "step": 4702 + }, + { + "epoch": 1.2488381357057496, + "grad_norm": 1.2839354787463726, + "learning_rate": 6.776511757155695e-06, + "loss": 0.2756902277469635, + "step": 4703 + }, + { + "epoch": 1.2491037046872926, + "grad_norm": 1.3039866822855, + "learning_rate": 6.772355677458249e-06, + "loss": 0.25046268105506897, + "step": 4704 + }, + { + "epoch": 1.2493692736688355, + "grad_norm": 1.3053078100109528, + "learning_rate": 6.7682002200125575e-06, + "loss": 0.238486647605896, + "step": 4705 + }, + { + "epoch": 1.2496348426503785, + "grad_norm": 1.1855651210182463, + "learning_rate": 6.764045385619751e-06, + "loss": 0.2366628348827362, + "step": 4706 + }, + { + "epoch": 1.2499004116319214, + "grad_norm": 1.21176387977239, + "learning_rate": 6.759891175080827e-06, + "loss": 0.24825221300125122, + "step": 4707 + }, + { + "epoch": 1.2501659806134644, + "grad_norm": 1.2922207381934139, + "learning_rate": 6.755737589196673e-06, + "loss": 0.2304186224937439, + "step": 4708 + }, + { + "epoch": 1.2504315495950074, + "grad_norm": 1.200468035859197, + "learning_rate": 6.7515846287680476e-06, + "loss": 0.2824471592903137, + "step": 4709 + }, + { + "epoch": 1.2506971185765503, + "grad_norm": 1.1994302764371214, + "learning_rate": 6.747432294595591e-06, + "loss": 0.23130697011947632, + "step": 4710 + }, + { + "epoch": 1.2509626875580933, + "grad_norm": 1.3183641444794993, + "learning_rate": 6.7432805874798334e-06, + "loss": 0.28371602296829224, + "step": 4711 + }, + { + "epoch": 1.2512282565396362, + "grad_norm": 1.1529924861272876, + "learning_rate": 6.739129508221167e-06, + "loss": 0.23452092707157135, + "step": 4712 + }, + { + "epoch": 1.2514938255211792, + "grad_norm": 1.245806995398341, + "learning_rate": 6.734979057619873e-06, + "loss": 0.22486859560012817, + "step": 4713 + }, + { + "epoch": 1.2517593945027221, + "grad_norm": 1.3481589110906722, + "learning_rate": 6.730829236476111e-06, + "loss": 0.2818532884120941, + "step": 4714 + }, + { + "epoch": 1.252024963484265, + "grad_norm": 1.172531442878329, + "learning_rate": 6.7266800455899125e-06, + "loss": 0.2060810923576355, + "step": 4715 + }, + { + "epoch": 1.252290532465808, + "grad_norm": 1.2183128764116598, + "learning_rate": 6.722531485761199e-06, + "loss": 0.2183244377374649, + "step": 4716 + }, + { + "epoch": 1.252556101447351, + "grad_norm": 1.2596677279915016, + "learning_rate": 6.71838355778976e-06, + "loss": 0.24757327139377594, + "step": 4717 + }, + { + "epoch": 1.252821670428894, + "grad_norm": 1.3267776765958388, + "learning_rate": 6.714236262475268e-06, + "loss": 0.3058333396911621, + "step": 4718 + }, + { + "epoch": 1.2530872394104369, + "grad_norm": 1.1893155452841293, + "learning_rate": 6.71008960061727e-06, + "loss": 0.24095620214939117, + "step": 4719 + }, + { + "epoch": 1.2533528083919798, + "grad_norm": 1.3050165159615794, + "learning_rate": 6.705943573015199e-06, + "loss": 0.25614839792251587, + "step": 4720 + }, + { + "epoch": 1.2536183773735228, + "grad_norm": 1.2537185610498753, + "learning_rate": 6.701798180468356e-06, + "loss": 0.22295254468917847, + "step": 4721 + }, + { + "epoch": 1.2538839463550657, + "grad_norm": 1.1724661677534984, + "learning_rate": 6.697653423775926e-06, + "loss": 0.24783796072006226, + "step": 4722 + }, + { + "epoch": 1.2541495153366087, + "grad_norm": 1.5676339911360846, + "learning_rate": 6.693509303736969e-06, + "loss": 0.19702200591564178, + "step": 4723 + }, + { + "epoch": 1.2544150843181516, + "grad_norm": 1.2713976115459882, + "learning_rate": 6.689365821150421e-06, + "loss": 0.2539074122905731, + "step": 4724 + }, + { + "epoch": 1.2546806532996946, + "grad_norm": 1.2015875463338734, + "learning_rate": 6.6852229768150976e-06, + "loss": 0.2480372041463852, + "step": 4725 + }, + { + "epoch": 1.2549462222812375, + "grad_norm": 1.1742876462412417, + "learning_rate": 6.68108077152969e-06, + "loss": 0.2231048047542572, + "step": 4726 + }, + { + "epoch": 1.2552117912627805, + "grad_norm": 1.1571308721577904, + "learning_rate": 6.676939206092766e-06, + "loss": 0.260783851146698, + "step": 4727 + }, + { + "epoch": 1.2554773602443234, + "grad_norm": 1.2569537102203152, + "learning_rate": 6.67279828130277e-06, + "loss": 0.24069254100322723, + "step": 4728 + }, + { + "epoch": 1.2557429292258664, + "grad_norm": 1.1732343490674524, + "learning_rate": 6.668657997958027e-06, + "loss": 0.2578867971897125, + "step": 4729 + }, + { + "epoch": 1.2560084982074093, + "grad_norm": 1.102080552368197, + "learning_rate": 6.664518356856732e-06, + "loss": 0.20724457502365112, + "step": 4730 + }, + { + "epoch": 1.2562740671889523, + "grad_norm": 1.1527224778451435, + "learning_rate": 6.6603793587969586e-06, + "loss": 0.23107580840587616, + "step": 4731 + }, + { + "epoch": 1.2565396361704952, + "grad_norm": 1.123633807819834, + "learning_rate": 6.656241004576659e-06, + "loss": 0.2481832504272461, + "step": 4732 + }, + { + "epoch": 1.2568052051520382, + "grad_norm": 1.1353422900728998, + "learning_rate": 6.652103294993657e-06, + "loss": 0.2219698578119278, + "step": 4733 + }, + { + "epoch": 1.2570707741335811, + "grad_norm": 1.1538807443087884, + "learning_rate": 6.647966230845655e-06, + "loss": 0.2245863974094391, + "step": 4734 + }, + { + "epoch": 1.257336343115124, + "grad_norm": 1.1991392114731283, + "learning_rate": 6.643829812930231e-06, + "loss": 0.2086387574672699, + "step": 4735 + }, + { + "epoch": 1.257601912096667, + "grad_norm": 1.1702949625685939, + "learning_rate": 6.6396940420448355e-06, + "loss": 0.23484499752521515, + "step": 4736 + }, + { + "epoch": 1.25786748107821, + "grad_norm": 1.1449620939429583, + "learning_rate": 6.635558918986797e-06, + "loss": 0.22011062502861023, + "step": 4737 + }, + { + "epoch": 1.258133050059753, + "grad_norm": 1.240312422577115, + "learning_rate": 6.631424444553319e-06, + "loss": 0.2426830381155014, + "step": 4738 + }, + { + "epoch": 1.258398619041296, + "grad_norm": 1.2472398676845469, + "learning_rate": 6.627290619541481e-06, + "loss": 0.2702174484729767, + "step": 4739 + }, + { + "epoch": 1.2586641880228389, + "grad_norm": 1.4005529994015682, + "learning_rate": 6.623157444748234e-06, + "loss": 0.26594820618629456, + "step": 4740 + }, + { + "epoch": 1.2589297570043818, + "grad_norm": 1.2550785934224764, + "learning_rate": 6.619024920970405e-06, + "loss": 0.2546013593673706, + "step": 4741 + }, + { + "epoch": 1.2591953259859248, + "grad_norm": 1.425429985784882, + "learning_rate": 6.614893049004696e-06, + "loss": 0.27207985520362854, + "step": 4742 + }, + { + "epoch": 1.259460894967468, + "grad_norm": 1.4445692953489113, + "learning_rate": 6.610761829647685e-06, + "loss": 0.2640937566757202, + "step": 4743 + }, + { + "epoch": 1.2597264639490109, + "grad_norm": 1.4095791296432063, + "learning_rate": 6.60663126369582e-06, + "loss": 0.2890278697013855, + "step": 4744 + }, + { + "epoch": 1.2599920329305538, + "grad_norm": 1.1225606468440805, + "learning_rate": 6.602501351945425e-06, + "loss": 0.24610492587089539, + "step": 4745 + }, + { + "epoch": 1.2602576019120968, + "grad_norm": 1.5273064552741338, + "learning_rate": 6.598372095192699e-06, + "loss": 0.24946746230125427, + "step": 4746 + }, + { + "epoch": 1.2605231708936397, + "grad_norm": 1.0546449518544165, + "learning_rate": 6.594243494233717e-06, + "loss": 0.2369944453239441, + "step": 4747 + }, + { + "epoch": 1.2607887398751827, + "grad_norm": 1.180556169492091, + "learning_rate": 6.590115549864421e-06, + "loss": 0.20980143547058105, + "step": 4748 + }, + { + "epoch": 1.2610543088567256, + "grad_norm": 1.1524244978042124, + "learning_rate": 6.5859882628806315e-06, + "loss": 0.22930344939231873, + "step": 4749 + }, + { + "epoch": 1.2613198778382686, + "grad_norm": 1.1353386909454481, + "learning_rate": 6.5818616340780405e-06, + "loss": 0.22352416813373566, + "step": 4750 + }, + { + "epoch": 1.2615854468198116, + "grad_norm": 1.0615225488277533, + "learning_rate": 6.577735664252214e-06, + "loss": 0.2049327939748764, + "step": 4751 + }, + { + "epoch": 1.2618510158013545, + "grad_norm": 1.3420243952278277, + "learning_rate": 6.573610354198587e-06, + "loss": 0.21858355402946472, + "step": 4752 + }, + { + "epoch": 1.2621165847828975, + "grad_norm": 1.1248247337478985, + "learning_rate": 6.5694857047124786e-06, + "loss": 0.225118950009346, + "step": 4753 + }, + { + "epoch": 1.2623821537644404, + "grad_norm": 1.1623337764465298, + "learning_rate": 6.565361716589063e-06, + "loss": 0.25780409574508667, + "step": 4754 + }, + { + "epoch": 1.2626477227459834, + "grad_norm": 1.1580907073042885, + "learning_rate": 6.5612383906233964e-06, + "loss": 0.23507939279079437, + "step": 4755 + }, + { + "epoch": 1.2629132917275263, + "grad_norm": 1.1733914893757196, + "learning_rate": 6.557115727610417e-06, + "loss": 0.27884477376937866, + "step": 4756 + }, + { + "epoch": 1.2631788607090693, + "grad_norm": 1.145599873702901, + "learning_rate": 6.552993728344921e-06, + "loss": 0.2564120888710022, + "step": 4757 + }, + { + "epoch": 1.2634444296906122, + "grad_norm": 1.3139857622357067, + "learning_rate": 6.548872393621578e-06, + "loss": 0.259651243686676, + "step": 4758 + }, + { + "epoch": 1.2637099986721552, + "grad_norm": 1.2930462493551071, + "learning_rate": 6.544751724234937e-06, + "loss": 0.23473814129829407, + "step": 4759 + }, + { + "epoch": 1.2639755676536981, + "grad_norm": 1.4411652435541018, + "learning_rate": 6.540631720979411e-06, + "loss": 0.2447129189968109, + "step": 4760 + }, + { + "epoch": 1.264241136635241, + "grad_norm": 1.1968236723875711, + "learning_rate": 6.536512384649294e-06, + "loss": 0.22695237398147583, + "step": 4761 + }, + { + "epoch": 1.264506705616784, + "grad_norm": 1.117214929215876, + "learning_rate": 6.532393716038738e-06, + "loss": 0.24303656816482544, + "step": 4762 + }, + { + "epoch": 1.264772274598327, + "grad_norm": 1.2106972269991043, + "learning_rate": 6.528275715941776e-06, + "loss": 0.23911908268928528, + "step": 4763 + }, + { + "epoch": 1.26503784357987, + "grad_norm": 1.0480584899589354, + "learning_rate": 6.524158385152309e-06, + "loss": 0.19766747951507568, + "step": 4764 + }, + { + "epoch": 1.2653034125614129, + "grad_norm": 1.390914844473808, + "learning_rate": 6.520041724464114e-06, + "loss": 0.24074134230613708, + "step": 4765 + }, + { + "epoch": 1.2655689815429558, + "grad_norm": 1.3379815630375766, + "learning_rate": 6.515925734670834e-06, + "loss": 0.27557867765426636, + "step": 4766 + }, + { + "epoch": 1.2658345505244988, + "grad_norm": 1.3286252957995823, + "learning_rate": 6.511810416565979e-06, + "loss": 0.24387787282466888, + "step": 4767 + }, + { + "epoch": 1.2661001195060417, + "grad_norm": 1.4234035593814256, + "learning_rate": 6.507695770942939e-06, + "loss": 0.27863091230392456, + "step": 4768 + }, + { + "epoch": 1.2663656884875847, + "grad_norm": 1.1364646133588507, + "learning_rate": 6.503581798594965e-06, + "loss": 0.23589591681957245, + "step": 4769 + }, + { + "epoch": 1.2666312574691276, + "grad_norm": 1.1932509985997282, + "learning_rate": 6.499468500315185e-06, + "loss": 0.22869807481765747, + "step": 4770 + }, + { + "epoch": 1.2668968264506706, + "grad_norm": 1.2498634762148577, + "learning_rate": 6.495355876896592e-06, + "loss": 0.2351568192243576, + "step": 4771 + }, + { + "epoch": 1.2671623954322135, + "grad_norm": 1.1271253337210285, + "learning_rate": 6.491243929132052e-06, + "loss": 0.2291228175163269, + "step": 4772 + }, + { + "epoch": 1.2674279644137565, + "grad_norm": 1.2013953219342957, + "learning_rate": 6.487132657814297e-06, + "loss": 0.23203743994235992, + "step": 4773 + }, + { + "epoch": 1.2676935333952994, + "grad_norm": 1.0887907712326863, + "learning_rate": 6.483022063735938e-06, + "loss": 0.22035656869411469, + "step": 4774 + }, + { + "epoch": 1.2679591023768424, + "grad_norm": 1.1270651148723736, + "learning_rate": 6.478912147689448e-06, + "loss": 0.21576716005802155, + "step": 4775 + }, + { + "epoch": 1.2682246713583853, + "grad_norm": 1.3174966546949713, + "learning_rate": 6.474802910467171e-06, + "loss": 0.27764660120010376, + "step": 4776 + }, + { + "epoch": 1.2684902403399283, + "grad_norm": 1.2418434137314485, + "learning_rate": 6.4706943528613135e-06, + "loss": 0.23715822398662567, + "step": 4777 + }, + { + "epoch": 1.2687558093214713, + "grad_norm": 1.1794293567561218, + "learning_rate": 6.4665864756639606e-06, + "loss": 0.27764302492141724, + "step": 4778 + }, + { + "epoch": 1.2690213783030142, + "grad_norm": 1.2157630211554828, + "learning_rate": 6.4624792796670624e-06, + "loss": 0.21634885668754578, + "step": 4779 + }, + { + "epoch": 1.2692869472845572, + "grad_norm": 1.2217447541656432, + "learning_rate": 6.458372765662438e-06, + "loss": 0.27262234687805176, + "step": 4780 + }, + { + "epoch": 1.2695525162661, + "grad_norm": 1.1716437260315133, + "learning_rate": 6.454266934441775e-06, + "loss": 0.2219458371400833, + "step": 4781 + }, + { + "epoch": 1.269818085247643, + "grad_norm": 1.2515340549821425, + "learning_rate": 6.450161786796625e-06, + "loss": 0.22181497514247894, + "step": 4782 + }, + { + "epoch": 1.270083654229186, + "grad_norm": 1.1858127036353512, + "learning_rate": 6.446057323518422e-06, + "loss": 0.22642338275909424, + "step": 4783 + }, + { + "epoch": 1.270349223210729, + "grad_norm": 1.2243357553110101, + "learning_rate": 6.441953545398451e-06, + "loss": 0.239711195230484, + "step": 4784 + }, + { + "epoch": 1.270614792192272, + "grad_norm": 1.29507599792429, + "learning_rate": 6.437850453227872e-06, + "loss": 0.2422255128622055, + "step": 4785 + }, + { + "epoch": 1.2708803611738149, + "grad_norm": 1.3013507424737665, + "learning_rate": 6.433748047797715e-06, + "loss": 0.23184439539909363, + "step": 4786 + }, + { + "epoch": 1.2711459301553578, + "grad_norm": 1.3032581886502261, + "learning_rate": 6.429646329898873e-06, + "loss": 0.2737428843975067, + "step": 4787 + }, + { + "epoch": 1.2714114991369008, + "grad_norm": 1.2565288812855064, + "learning_rate": 6.4255453003221115e-06, + "loss": 0.23565897345542908, + "step": 4788 + }, + { + "epoch": 1.2716770681184437, + "grad_norm": 1.3665497750328797, + "learning_rate": 6.421444959858059e-06, + "loss": 0.24349254369735718, + "step": 4789 + }, + { + "epoch": 1.2719426370999867, + "grad_norm": 1.2050219186384792, + "learning_rate": 6.4173453092972115e-06, + "loss": 0.2637769281864166, + "step": 4790 + }, + { + "epoch": 1.2722082060815296, + "grad_norm": 1.0381858832581394, + "learning_rate": 6.413246349429934e-06, + "loss": 0.21420228481292725, + "step": 4791 + }, + { + "epoch": 1.2724737750630726, + "grad_norm": 1.1333618917642097, + "learning_rate": 6.409148081046461e-06, + "loss": 0.25270405411720276, + "step": 4792 + }, + { + "epoch": 1.2727393440446155, + "grad_norm": 1.270676964933882, + "learning_rate": 6.405050504936887e-06, + "loss": 0.2710546851158142, + "step": 4793 + }, + { + "epoch": 1.2730049130261585, + "grad_norm": 1.1608891040490155, + "learning_rate": 6.400953621891178e-06, + "loss": 0.2388489842414856, + "step": 4794 + }, + { + "epoch": 1.2732704820077014, + "grad_norm": 1.1600463634666516, + "learning_rate": 6.396857432699164e-06, + "loss": 0.24581485986709595, + "step": 4795 + }, + { + "epoch": 1.2735360509892444, + "grad_norm": 1.18464881130754, + "learning_rate": 6.3927619381505404e-06, + "loss": 0.24219104647636414, + "step": 4796 + }, + { + "epoch": 1.2738016199707873, + "grad_norm": 1.0878857914267965, + "learning_rate": 6.388667139034873e-06, + "loss": 0.22722014784812927, + "step": 4797 + }, + { + "epoch": 1.2740671889523303, + "grad_norm": 1.275017638940232, + "learning_rate": 6.384573036141589e-06, + "loss": 0.25177234411239624, + "step": 4798 + }, + { + "epoch": 1.2743327579338732, + "grad_norm": 1.2824350948041237, + "learning_rate": 6.380479630259983e-06, + "loss": 0.2291412651538849, + "step": 4799 + }, + { + "epoch": 1.2745983269154162, + "grad_norm": 1.3215047708165757, + "learning_rate": 6.376386922179216e-06, + "loss": 0.2528606951236725, + "step": 4800 + }, + { + "epoch": 1.2748638958969591, + "grad_norm": 1.11001311385955, + "learning_rate": 6.372294912688315e-06, + "loss": 0.21383032202720642, + "step": 4801 + }, + { + "epoch": 1.275129464878502, + "grad_norm": 1.2162134010863295, + "learning_rate": 6.368203602576168e-06, + "loss": 0.2538087069988251, + "step": 4802 + }, + { + "epoch": 1.275395033860045, + "grad_norm": 1.2127822206191197, + "learning_rate": 6.364112992631537e-06, + "loss": 0.24437417089939117, + "step": 4803 + }, + { + "epoch": 1.275660602841588, + "grad_norm": 1.1678428848154245, + "learning_rate": 6.360023083643036e-06, + "loss": 0.2347753942012787, + "step": 4804 + }, + { + "epoch": 1.275926171823131, + "grad_norm": 1.226812886332051, + "learning_rate": 6.3559338763991576e-06, + "loss": 0.271645188331604, + "step": 4805 + }, + { + "epoch": 1.276191740804674, + "grad_norm": 1.2088165730060163, + "learning_rate": 6.35184537168825e-06, + "loss": 0.2465275228023529, + "step": 4806 + }, + { + "epoch": 1.2764573097862169, + "grad_norm": 1.216147524532817, + "learning_rate": 6.347757570298527e-06, + "loss": 0.26494044065475464, + "step": 4807 + }, + { + "epoch": 1.2767228787677598, + "grad_norm": 3.360286997098956, + "learning_rate": 6.343670473018071e-06, + "loss": 0.28292080760002136, + "step": 4808 + }, + { + "epoch": 1.2769884477493028, + "grad_norm": 1.2160142828428218, + "learning_rate": 6.339584080634824e-06, + "loss": 0.2525850534439087, + "step": 4809 + }, + { + "epoch": 1.2772540167308457, + "grad_norm": 1.224576908350391, + "learning_rate": 6.335498393936597e-06, + "loss": 0.22056345641613007, + "step": 4810 + }, + { + "epoch": 1.2775195857123887, + "grad_norm": 1.1603347806824698, + "learning_rate": 6.331413413711061e-06, + "loss": 0.23081058263778687, + "step": 4811 + }, + { + "epoch": 1.2777851546939316, + "grad_norm": 1.2309265633693007, + "learning_rate": 6.327329140745751e-06, + "loss": 0.2722470760345459, + "step": 4812 + }, + { + "epoch": 1.2780507236754748, + "grad_norm": 1.2598117885787161, + "learning_rate": 6.32324557582807e-06, + "loss": 0.24454641342163086, + "step": 4813 + }, + { + "epoch": 1.2783162926570177, + "grad_norm": 1.2713820573097572, + "learning_rate": 6.319162719745277e-06, + "loss": 0.21884413063526154, + "step": 4814 + }, + { + "epoch": 1.2785818616385607, + "grad_norm": 1.276590514388197, + "learning_rate": 6.3150805732845e-06, + "loss": 0.2737545669078827, + "step": 4815 + }, + { + "epoch": 1.2788474306201036, + "grad_norm": 1.1747258996206047, + "learning_rate": 6.31099913723273e-06, + "loss": 0.2478230595588684, + "step": 4816 + }, + { + "epoch": 1.2791129996016466, + "grad_norm": 1.2461752717378811, + "learning_rate": 6.306918412376817e-06, + "loss": 0.2508094310760498, + "step": 4817 + }, + { + "epoch": 1.2793785685831895, + "grad_norm": 1.267840547546021, + "learning_rate": 6.302838399503477e-06, + "loss": 0.24666383862495422, + "step": 4818 + }, + { + "epoch": 1.2796441375647325, + "grad_norm": 1.176059099377582, + "learning_rate": 6.298759099399292e-06, + "loss": 0.27833491563796997, + "step": 4819 + }, + { + "epoch": 1.2799097065462754, + "grad_norm": 1.1948595147219725, + "learning_rate": 6.294680512850699e-06, + "loss": 0.23092475533485413, + "step": 4820 + }, + { + "epoch": 1.2801752755278184, + "grad_norm": 1.1935160504644853, + "learning_rate": 6.290602640644005e-06, + "loss": 0.2714667022228241, + "step": 4821 + }, + { + "epoch": 1.2804408445093614, + "grad_norm": 1.1769422055863235, + "learning_rate": 6.286525483565373e-06, + "loss": 0.23292411863803864, + "step": 4822 + }, + { + "epoch": 1.2807064134909043, + "grad_norm": 1.1322856806053188, + "learning_rate": 6.282449042400831e-06, + "loss": 0.23809143900871277, + "step": 4823 + }, + { + "epoch": 1.2809719824724473, + "grad_norm": 1.0235534573008647, + "learning_rate": 6.278373317936269e-06, + "loss": 0.22593267261981964, + "step": 4824 + }, + { + "epoch": 1.2812375514539902, + "grad_norm": 1.2491300300411192, + "learning_rate": 6.274298310957439e-06, + "loss": 0.26024624705314636, + "step": 4825 + }, + { + "epoch": 1.2815031204355332, + "grad_norm": 1.138185007529017, + "learning_rate": 6.270224022249957e-06, + "loss": 0.22418126463890076, + "step": 4826 + }, + { + "epoch": 1.2817686894170761, + "grad_norm": 1.2374650134400174, + "learning_rate": 6.266150452599288e-06, + "loss": 0.26452577114105225, + "step": 4827 + }, + { + "epoch": 1.282034258398619, + "grad_norm": 1.2453587043668277, + "learning_rate": 6.262077602790779e-06, + "loss": 0.24412381649017334, + "step": 4828 + }, + { + "epoch": 1.282299827380162, + "grad_norm": 1.1670875672055734, + "learning_rate": 6.258005473609623e-06, + "loss": 0.22476118803024292, + "step": 4829 + }, + { + "epoch": 1.282565396361705, + "grad_norm": 1.1744502576491334, + "learning_rate": 6.25393406584088e-06, + "loss": 0.2208547294139862, + "step": 4830 + }, + { + "epoch": 1.282830965343248, + "grad_norm": 1.340282271944368, + "learning_rate": 6.249863380269467e-06, + "loss": 0.2903650999069214, + "step": 4831 + }, + { + "epoch": 1.2830965343247909, + "grad_norm": 1.2018727401561922, + "learning_rate": 6.245793417680168e-06, + "loss": 0.24413639307022095, + "step": 4832 + }, + { + "epoch": 1.2833621033063338, + "grad_norm": 1.162422850806728, + "learning_rate": 6.241724178857621e-06, + "loss": 0.2193944752216339, + "step": 4833 + }, + { + "epoch": 1.2836276722878768, + "grad_norm": 1.2159517583191957, + "learning_rate": 6.237655664586326e-06, + "loss": 0.22847513854503632, + "step": 4834 + }, + { + "epoch": 1.2838932412694197, + "grad_norm": 1.4211501406512423, + "learning_rate": 6.233587875650648e-06, + "loss": 0.269639253616333, + "step": 4835 + }, + { + "epoch": 1.2841588102509627, + "grad_norm": 1.3153478129856002, + "learning_rate": 6.229520812834801e-06, + "loss": 0.26329392194747925, + "step": 4836 + }, + { + "epoch": 1.2844243792325056, + "grad_norm": 1.0811891602166492, + "learning_rate": 6.225454476922877e-06, + "loss": 0.18800514936447144, + "step": 4837 + }, + { + "epoch": 1.2846899482140486, + "grad_norm": 1.2987987933289529, + "learning_rate": 6.2213888686988125e-06, + "loss": 0.2617965340614319, + "step": 4838 + }, + { + "epoch": 1.2849555171955915, + "grad_norm": 1.2029687476094635, + "learning_rate": 6.217323988946411e-06, + "loss": 0.22468717396259308, + "step": 4839 + }, + { + "epoch": 1.2852210861771345, + "grad_norm": 1.2126923104659393, + "learning_rate": 6.213259838449333e-06, + "loss": 0.22465646266937256, + "step": 4840 + }, + { + "epoch": 1.2854866551586774, + "grad_norm": 1.243457795287806, + "learning_rate": 6.209196417991096e-06, + "loss": 0.2655075490474701, + "step": 4841 + }, + { + "epoch": 1.2857522241402204, + "grad_norm": 1.2818071805394324, + "learning_rate": 6.205133728355081e-06, + "loss": 0.25313282012939453, + "step": 4842 + }, + { + "epoch": 1.2860177931217633, + "grad_norm": 1.2136879668034726, + "learning_rate": 6.201071770324527e-06, + "loss": 0.23176322877407074, + "step": 4843 + }, + { + "epoch": 1.2862833621033063, + "grad_norm": 1.3628911983979357, + "learning_rate": 6.197010544682531e-06, + "loss": 0.27396953105926514, + "step": 4844 + }, + { + "epoch": 1.2865489310848492, + "grad_norm": 1.2333432651370633, + "learning_rate": 6.192950052212046e-06, + "loss": 0.24966171383857727, + "step": 4845 + }, + { + "epoch": 1.2868145000663922, + "grad_norm": 1.184789059228899, + "learning_rate": 6.188890293695895e-06, + "loss": 0.23290866613388062, + "step": 4846 + }, + { + "epoch": 1.2870800690479351, + "grad_norm": 1.2080105834836115, + "learning_rate": 6.184831269916749e-06, + "loss": 0.2368975132703781, + "step": 4847 + }, + { + "epoch": 1.287345638029478, + "grad_norm": 1.35199057217418, + "learning_rate": 6.180772981657139e-06, + "loss": 0.25305312871932983, + "step": 4848 + }, + { + "epoch": 1.287611207011021, + "grad_norm": 1.1825950927599171, + "learning_rate": 6.176715429699452e-06, + "loss": 0.22752982378005981, + "step": 4849 + }, + { + "epoch": 1.287876775992564, + "grad_norm": 1.152582857494987, + "learning_rate": 6.1726586148259395e-06, + "loss": 0.22426503896713257, + "step": 4850 + }, + { + "epoch": 1.288142344974107, + "grad_norm": 1.2203273234703247, + "learning_rate": 6.168602537818706e-06, + "loss": 0.21261993050575256, + "step": 4851 + }, + { + "epoch": 1.28840791395565, + "grad_norm": 1.1907151660933317, + "learning_rate": 6.1645471994597185e-06, + "loss": 0.237461656332016, + "step": 4852 + }, + { + "epoch": 1.2886734829371929, + "grad_norm": 1.113120156932308, + "learning_rate": 6.160492600530794e-06, + "loss": 0.1926390826702118, + "step": 4853 + }, + { + "epoch": 1.2889390519187358, + "grad_norm": 1.6824005161064397, + "learning_rate": 6.156438741813608e-06, + "loss": 0.22673740983009338, + "step": 4854 + }, + { + "epoch": 1.289204620900279, + "grad_norm": 1.1453361708789405, + "learning_rate": 6.15238562408971e-06, + "loss": 0.22148582339286804, + "step": 4855 + }, + { + "epoch": 1.289470189881822, + "grad_norm": 1.3581323367394031, + "learning_rate": 6.148333248140483e-06, + "loss": 0.28319716453552246, + "step": 4856 + }, + { + "epoch": 1.289735758863365, + "grad_norm": 1.4367360633574449, + "learning_rate": 6.14428161474718e-06, + "loss": 0.23505647480487823, + "step": 4857 + }, + { + "epoch": 1.2900013278449078, + "grad_norm": 1.2052965186154045, + "learning_rate": 6.140230724690908e-06, + "loss": 0.24323523044586182, + "step": 4858 + }, + { + "epoch": 1.2902668968264508, + "grad_norm": 1.2357784405363281, + "learning_rate": 6.136180578752629e-06, + "loss": 0.22818386554718018, + "step": 4859 + }, + { + "epoch": 1.2905324658079937, + "grad_norm": 1.2670464740614045, + "learning_rate": 6.132131177713165e-06, + "loss": 0.24285198748111725, + "step": 4860 + }, + { + "epoch": 1.2907980347895367, + "grad_norm": 1.1369753370104339, + "learning_rate": 6.128082522353194e-06, + "loss": 0.24115213751792908, + "step": 4861 + }, + { + "epoch": 1.2910636037710796, + "grad_norm": 1.2213111344560537, + "learning_rate": 6.124034613453247e-06, + "loss": 0.21564510464668274, + "step": 4862 + }, + { + "epoch": 1.2913291727526226, + "grad_norm": 1.299973209896211, + "learning_rate": 6.119987451793711e-06, + "loss": 0.2329743504524231, + "step": 4863 + }, + { + "epoch": 1.2915947417341656, + "grad_norm": 1.2218786239106318, + "learning_rate": 6.115941038154835e-06, + "loss": 0.2161208689212799, + "step": 4864 + }, + { + "epoch": 1.2918603107157085, + "grad_norm": 1.2078035628631776, + "learning_rate": 6.111895373316721e-06, + "loss": 0.22765520215034485, + "step": 4865 + }, + { + "epoch": 1.2921258796972515, + "grad_norm": 1.2199257873933993, + "learning_rate": 6.107850458059322e-06, + "loss": 0.25506818294525146, + "step": 4866 + }, + { + "epoch": 1.2923914486787944, + "grad_norm": 1.2014544077782259, + "learning_rate": 6.1038062931624505e-06, + "loss": 0.22543852031230927, + "step": 4867 + }, + { + "epoch": 1.2926570176603374, + "grad_norm": 1.282222410309602, + "learning_rate": 6.099762879405776e-06, + "loss": 0.24295030534267426, + "step": 4868 + }, + { + "epoch": 1.2929225866418803, + "grad_norm": 1.2221545432256802, + "learning_rate": 6.095720217568819e-06, + "loss": 0.2385009229183197, + "step": 4869 + }, + { + "epoch": 1.2931881556234233, + "grad_norm": 1.119514297375773, + "learning_rate": 6.091678308430956e-06, + "loss": 0.21410472691059113, + "step": 4870 + }, + { + "epoch": 1.2934537246049662, + "grad_norm": 1.299309717988783, + "learning_rate": 6.087637152771422e-06, + "loss": 0.25934773683547974, + "step": 4871 + }, + { + "epoch": 1.2937192935865092, + "grad_norm": 1.1783576597419445, + "learning_rate": 6.0835967513693e-06, + "loss": 0.24584373831748962, + "step": 4872 + }, + { + "epoch": 1.2939848625680521, + "grad_norm": 1.3413866916188153, + "learning_rate": 6.079557105003537e-06, + "loss": 0.2403055876493454, + "step": 4873 + }, + { + "epoch": 1.294250431549595, + "grad_norm": 1.2348806886655737, + "learning_rate": 6.075518214452927e-06, + "loss": 0.23861736059188843, + "step": 4874 + }, + { + "epoch": 1.294516000531138, + "grad_norm": 1.2099712971645404, + "learning_rate": 6.071480080496119e-06, + "loss": 0.21356427669525146, + "step": 4875 + }, + { + "epoch": 1.294781569512681, + "grad_norm": 1.314183683224707, + "learning_rate": 6.067442703911621e-06, + "loss": 0.2835869789123535, + "step": 4876 + }, + { + "epoch": 1.295047138494224, + "grad_norm": 1.1868362719294436, + "learning_rate": 6.063406085477788e-06, + "loss": 0.24233242869377136, + "step": 4877 + }, + { + "epoch": 1.2953127074757669, + "grad_norm": 1.2596980829406919, + "learning_rate": 6.059370225972834e-06, + "loss": 0.24986369907855988, + "step": 4878 + }, + { + "epoch": 1.2955782764573098, + "grad_norm": 1.2583930460503605, + "learning_rate": 6.055335126174826e-06, + "loss": 0.2445756494998932, + "step": 4879 + }, + { + "epoch": 1.2958438454388528, + "grad_norm": 1.0635663336037695, + "learning_rate": 6.0513007868616825e-06, + "loss": 0.21331898868083954, + "step": 4880 + }, + { + "epoch": 1.2961094144203957, + "grad_norm": 1.1578193819974294, + "learning_rate": 6.047267208811174e-06, + "loss": 0.2782329320907593, + "step": 4881 + }, + { + "epoch": 1.2963749834019387, + "grad_norm": 2.326385436360766, + "learning_rate": 6.043234392800932e-06, + "loss": 0.20866765081882477, + "step": 4882 + }, + { + "epoch": 1.2966405523834816, + "grad_norm": 1.3211750202424803, + "learning_rate": 6.039202339608432e-06, + "loss": 0.2517815828323364, + "step": 4883 + }, + { + "epoch": 1.2969061213650246, + "grad_norm": 1.283845753322191, + "learning_rate": 6.03517105001101e-06, + "loss": 0.2617926597595215, + "step": 4884 + }, + { + "epoch": 1.2971716903465675, + "grad_norm": 1.3255504140080887, + "learning_rate": 6.0311405247858465e-06, + "loss": 0.24753305315971375, + "step": 4885 + }, + { + "epoch": 1.2974372593281105, + "grad_norm": 1.1805849927447047, + "learning_rate": 6.027110764709982e-06, + "loss": 0.19791719317436218, + "step": 4886 + }, + { + "epoch": 1.2977028283096534, + "grad_norm": 1.236398594932959, + "learning_rate": 6.023081770560307e-06, + "loss": 0.243608757853508, + "step": 4887 + }, + { + "epoch": 1.2979683972911964, + "grad_norm": 1.3652744342035896, + "learning_rate": 6.019053543113564e-06, + "loss": 0.20469853281974792, + "step": 4888 + }, + { + "epoch": 1.2982339662727393, + "grad_norm": 1.4682720215540639, + "learning_rate": 6.015026083146345e-06, + "loss": 0.25613903999328613, + "step": 4889 + }, + { + "epoch": 1.2984995352542823, + "grad_norm": 1.236223607561111, + "learning_rate": 6.010999391435097e-06, + "loss": 0.23349006474018097, + "step": 4890 + }, + { + "epoch": 1.2987651042358253, + "grad_norm": 1.1137410591057113, + "learning_rate": 6.006973468756124e-06, + "loss": 0.23646268248558044, + "step": 4891 + }, + { + "epoch": 1.2990306732173682, + "grad_norm": 1.2845979720118916, + "learning_rate": 6.002948315885572e-06, + "loss": 0.2371794581413269, + "step": 4892 + }, + { + "epoch": 1.2992962421989112, + "grad_norm": 1.1150236044260142, + "learning_rate": 5.998923933599443e-06, + "loss": 0.23791949450969696, + "step": 4893 + }, + { + "epoch": 1.299561811180454, + "grad_norm": 1.2865838186648229, + "learning_rate": 5.994900322673593e-06, + "loss": 0.26923009753227234, + "step": 4894 + }, + { + "epoch": 1.299827380161997, + "grad_norm": 1.2724647699376699, + "learning_rate": 5.990877483883723e-06, + "loss": 0.20164884626865387, + "step": 4895 + }, + { + "epoch": 1.30009294914354, + "grad_norm": 1.1263986142938482, + "learning_rate": 5.986855418005393e-06, + "loss": 0.22345462441444397, + "step": 4896 + }, + { + "epoch": 1.300358518125083, + "grad_norm": 1.2936789930425872, + "learning_rate": 5.982834125814007e-06, + "loss": 0.26678675413131714, + "step": 4897 + }, + { + "epoch": 1.300624087106626, + "grad_norm": 1.3112472329084983, + "learning_rate": 5.978813608084825e-06, + "loss": 0.24674496054649353, + "step": 4898 + }, + { + "epoch": 1.3008896560881689, + "grad_norm": 1.3746634467420622, + "learning_rate": 5.974793865592947e-06, + "loss": 0.2804900109767914, + "step": 4899 + }, + { + "epoch": 1.3011552250697118, + "grad_norm": 1.3113866221822363, + "learning_rate": 5.970774899113345e-06, + "loss": 0.2413155734539032, + "step": 4900 + }, + { + "epoch": 1.3014207940512548, + "grad_norm": 1.139036608300987, + "learning_rate": 5.96675670942082e-06, + "loss": 0.21217301487922668, + "step": 4901 + }, + { + "epoch": 1.3016863630327977, + "grad_norm": 1.2012277530250777, + "learning_rate": 5.962739297290035e-06, + "loss": 0.23362940549850464, + "step": 4902 + }, + { + "epoch": 1.3019519320143407, + "grad_norm": 1.251148135143295, + "learning_rate": 5.958722663495499e-06, + "loss": 0.2669242322444916, + "step": 4903 + }, + { + "epoch": 1.3022175009958836, + "grad_norm": 1.2365395348631665, + "learning_rate": 5.95470680881157e-06, + "loss": 0.2234608232975006, + "step": 4904 + }, + { + "epoch": 1.3024830699774266, + "grad_norm": 1.2441781101215288, + "learning_rate": 5.95069173401246e-06, + "loss": 0.25150394439697266, + "step": 4905 + }, + { + "epoch": 1.3027486389589695, + "grad_norm": 1.127228294882686, + "learning_rate": 5.9466774398722264e-06, + "loss": 0.2408430427312851, + "step": 4906 + }, + { + "epoch": 1.3030142079405125, + "grad_norm": 1.1200862415380408, + "learning_rate": 5.942663927164776e-06, + "loss": 0.2197013795375824, + "step": 4907 + }, + { + "epoch": 1.3032797769220554, + "grad_norm": 1.1474317141184802, + "learning_rate": 5.938651196663865e-06, + "loss": 0.2224964201450348, + "step": 4908 + }, + { + "epoch": 1.3035453459035984, + "grad_norm": 1.313380369558454, + "learning_rate": 5.934639249143108e-06, + "loss": 0.26466232538223267, + "step": 4909 + }, + { + "epoch": 1.3038109148851413, + "grad_norm": 1.2910852400248352, + "learning_rate": 5.930628085375958e-06, + "loss": 0.257996141910553, + "step": 4910 + }, + { + "epoch": 1.3040764838666843, + "grad_norm": 1.2056479933898356, + "learning_rate": 5.92661770613572e-06, + "loss": 0.21995162963867188, + "step": 4911 + }, + { + "epoch": 1.3043420528482272, + "grad_norm": 1.3003100511120855, + "learning_rate": 5.922608112195546e-06, + "loss": 0.26007258892059326, + "step": 4912 + }, + { + "epoch": 1.3046076218297702, + "grad_norm": 1.2951583817832037, + "learning_rate": 5.918599304328442e-06, + "loss": 0.25168827176094055, + "step": 4913 + }, + { + "epoch": 1.3048731908113131, + "grad_norm": 1.1932184000685677, + "learning_rate": 5.9145912833072535e-06, + "loss": 0.24686852097511292, + "step": 4914 + }, + { + "epoch": 1.305138759792856, + "grad_norm": 1.1951264683753895, + "learning_rate": 5.910584049904684e-06, + "loss": 0.247032031416893, + "step": 4915 + }, + { + "epoch": 1.305404328774399, + "grad_norm": 1.1517786776797445, + "learning_rate": 5.906577604893278e-06, + "loss": 0.21644674241542816, + "step": 4916 + }, + { + "epoch": 1.305669897755942, + "grad_norm": 1.3685662184124912, + "learning_rate": 5.9025719490454304e-06, + "loss": 0.28093478083610535, + "step": 4917 + }, + { + "epoch": 1.305935466737485, + "grad_norm": 1.2246452754262638, + "learning_rate": 5.898567083133389e-06, + "loss": 0.23731757700443268, + "step": 4918 + }, + { + "epoch": 1.306201035719028, + "grad_norm": 1.1125400405938466, + "learning_rate": 5.894563007929243e-06, + "loss": 0.20725491642951965, + "step": 4919 + }, + { + "epoch": 1.3064666047005709, + "grad_norm": 1.3186749566879576, + "learning_rate": 5.89055972420493e-06, + "loss": 0.2509433329105377, + "step": 4920 + }, + { + "epoch": 1.3067321736821138, + "grad_norm": 1.2793911736037649, + "learning_rate": 5.886557232732235e-06, + "loss": 0.2611580491065979, + "step": 4921 + }, + { + "epoch": 1.3069977426636568, + "grad_norm": 1.1754660821918204, + "learning_rate": 5.882555534282792e-06, + "loss": 0.20567595958709717, + "step": 4922 + }, + { + "epoch": 1.3072633116451997, + "grad_norm": 1.2179299933591687, + "learning_rate": 5.878554629628081e-06, + "loss": 0.22851137816905975, + "step": 4923 + }, + { + "epoch": 1.3075288806267427, + "grad_norm": 1.2283350051517878, + "learning_rate": 5.874554519539431e-06, + "loss": 0.24295902252197266, + "step": 4924 + }, + { + "epoch": 1.3077944496082856, + "grad_norm": 1.4565590371796837, + "learning_rate": 5.870555204788013e-06, + "loss": 0.29564642906188965, + "step": 4925 + }, + { + "epoch": 1.3080600185898288, + "grad_norm": 1.1906652754397118, + "learning_rate": 5.8665566861448465e-06, + "loss": 0.2399739921092987, + "step": 4926 + }, + { + "epoch": 1.3083255875713717, + "grad_norm": 1.2056826487968673, + "learning_rate": 5.862558964380806e-06, + "loss": 0.23882555961608887, + "step": 4927 + }, + { + "epoch": 1.3085911565529147, + "grad_norm": 1.2167231777259742, + "learning_rate": 5.858562040266599e-06, + "loss": 0.2510842978954315, + "step": 4928 + }, + { + "epoch": 1.3088567255344576, + "grad_norm": 1.3760419048772665, + "learning_rate": 5.854565914572787e-06, + "loss": 0.257358193397522, + "step": 4929 + }, + { + "epoch": 1.3091222945160006, + "grad_norm": 1.1144476904886809, + "learning_rate": 5.850570588069775e-06, + "loss": 0.23228219151496887, + "step": 4930 + }, + { + "epoch": 1.3093878634975435, + "grad_norm": 1.2711888334314898, + "learning_rate": 5.846576061527818e-06, + "loss": 0.2234456092119217, + "step": 4931 + }, + { + "epoch": 1.3096534324790865, + "grad_norm": 1.1978737759145446, + "learning_rate": 5.842582335717009e-06, + "loss": 0.2273438423871994, + "step": 4932 + }, + { + "epoch": 1.3099190014606295, + "grad_norm": 1.2382395020505186, + "learning_rate": 5.838589411407294e-06, + "loss": 0.2423306405544281, + "step": 4933 + }, + { + "epoch": 1.3101845704421724, + "grad_norm": 1.2388376015521172, + "learning_rate": 5.834597289368463e-06, + "loss": 0.266438364982605, + "step": 4934 + }, + { + "epoch": 1.3104501394237154, + "grad_norm": 1.2553012161793193, + "learning_rate": 5.830605970370142e-06, + "loss": 0.2469342052936554, + "step": 4935 + }, + { + "epoch": 1.3107157084052583, + "grad_norm": 1.2077087937137967, + "learning_rate": 5.8266154551818225e-06, + "loss": 0.2834509611129761, + "step": 4936 + }, + { + "epoch": 1.3109812773868013, + "grad_norm": 1.3037377411135151, + "learning_rate": 5.822625744572821e-06, + "loss": 0.2615162134170532, + "step": 4937 + }, + { + "epoch": 1.3112468463683442, + "grad_norm": 1.1529903033018742, + "learning_rate": 5.818636839312309e-06, + "loss": 0.2247931957244873, + "step": 4938 + }, + { + "epoch": 1.3115124153498872, + "grad_norm": 1.162136486746663, + "learning_rate": 5.814648740169299e-06, + "loss": 0.23759335279464722, + "step": 4939 + }, + { + "epoch": 1.3117779843314301, + "grad_norm": 1.2647326324758852, + "learning_rate": 5.8106614479126515e-06, + "loss": 0.23381784558296204, + "step": 4940 + }, + { + "epoch": 1.312043553312973, + "grad_norm": 1.2132087226777075, + "learning_rate": 5.8066749633110675e-06, + "loss": 0.2671264410018921, + "step": 4941 + }, + { + "epoch": 1.312309122294516, + "grad_norm": 1.09997395594631, + "learning_rate": 5.8026892871330944e-06, + "loss": 0.226065531373024, + "step": 4942 + }, + { + "epoch": 1.312574691276059, + "grad_norm": 1.3057172624305828, + "learning_rate": 5.798704420147124e-06, + "loss": 0.2654735743999481, + "step": 4943 + }, + { + "epoch": 1.312840260257602, + "grad_norm": 1.2538641402604982, + "learning_rate": 5.794720363121389e-06, + "loss": 0.23757833242416382, + "step": 4944 + }, + { + "epoch": 1.3131058292391449, + "grad_norm": 1.2131030914710175, + "learning_rate": 5.790737116823975e-06, + "loss": 0.2561591565608978, + "step": 4945 + }, + { + "epoch": 1.3133713982206878, + "grad_norm": 1.1698592689009908, + "learning_rate": 5.7867546820227995e-06, + "loss": 0.22105304896831512, + "step": 4946 + }, + { + "epoch": 1.3136369672022308, + "grad_norm": 1.190016500907537, + "learning_rate": 5.7827730594856325e-06, + "loss": 0.2485857605934143, + "step": 4947 + }, + { + "epoch": 1.3139025361837737, + "grad_norm": 1.2087719424455774, + "learning_rate": 5.7787922499800804e-06, + "loss": 0.21256676316261292, + "step": 4948 + }, + { + "epoch": 1.3141681051653167, + "grad_norm": 1.2561271472593831, + "learning_rate": 5.774812254273604e-06, + "loss": 0.2700715661048889, + "step": 4949 + }, + { + "epoch": 1.3144336741468596, + "grad_norm": 1.072264118800501, + "learning_rate": 5.770833073133488e-06, + "loss": 0.22239381074905396, + "step": 4950 + }, + { + "epoch": 1.3146992431284026, + "grad_norm": 1.2811464089131772, + "learning_rate": 5.766854707326878e-06, + "loss": 0.22973249852657318, + "step": 4951 + }, + { + "epoch": 1.3149648121099455, + "grad_norm": 1.3904264621036453, + "learning_rate": 5.762877157620751e-06, + "loss": 0.27923673391342163, + "step": 4952 + }, + { + "epoch": 1.3152303810914885, + "grad_norm": 1.1321859486950596, + "learning_rate": 5.758900424781939e-06, + "loss": 0.23142218589782715, + "step": 4953 + }, + { + "epoch": 1.3154959500730314, + "grad_norm": 1.2732500147617782, + "learning_rate": 5.754924509577107e-06, + "loss": 0.23697996139526367, + "step": 4954 + }, + { + "epoch": 1.3157615190545744, + "grad_norm": 1.2838523265227373, + "learning_rate": 5.750949412772764e-06, + "loss": 0.27600961923599243, + "step": 4955 + }, + { + "epoch": 1.3160270880361173, + "grad_norm": 1.1644607269636458, + "learning_rate": 5.74697513513526e-06, + "loss": 0.2300705760717392, + "step": 4956 + }, + { + "epoch": 1.3162926570176603, + "grad_norm": 1.2927833273456342, + "learning_rate": 5.743001677430791e-06, + "loss": 0.2771111726760864, + "step": 4957 + }, + { + "epoch": 1.3165582259992032, + "grad_norm": 1.2582954956741819, + "learning_rate": 5.739029040425391e-06, + "loss": 0.2195657342672348, + "step": 4958 + }, + { + "epoch": 1.3168237949807462, + "grad_norm": 1.3450534906440017, + "learning_rate": 5.735057224884939e-06, + "loss": 0.2877159118652344, + "step": 4959 + }, + { + "epoch": 1.3170893639622892, + "grad_norm": 1.2211564124942835, + "learning_rate": 5.731086231575154e-06, + "loss": 0.264115571975708, + "step": 4960 + }, + { + "epoch": 1.317354932943832, + "grad_norm": 1.1286607753384608, + "learning_rate": 5.727116061261593e-06, + "loss": 0.22574637830257416, + "step": 4961 + }, + { + "epoch": 1.317620501925375, + "grad_norm": 1.3177978069758023, + "learning_rate": 5.723146714709664e-06, + "loss": 0.26063698530197144, + "step": 4962 + }, + { + "epoch": 1.317886070906918, + "grad_norm": 1.2211473527893268, + "learning_rate": 5.719178192684611e-06, + "loss": 0.26272428035736084, + "step": 4963 + }, + { + "epoch": 1.318151639888461, + "grad_norm": 1.257373941755789, + "learning_rate": 5.715210495951513e-06, + "loss": 0.27188578248023987, + "step": 4964 + }, + { + "epoch": 1.318417208870004, + "grad_norm": 1.2786927551317604, + "learning_rate": 5.711243625275296e-06, + "loss": 0.26374363899230957, + "step": 4965 + }, + { + "epoch": 1.3186827778515469, + "grad_norm": 1.2469422291735242, + "learning_rate": 5.7072775814207275e-06, + "loss": 0.24819093942642212, + "step": 4966 + }, + { + "epoch": 1.3189483468330898, + "grad_norm": 1.3834225319345155, + "learning_rate": 5.703312365152412e-06, + "loss": 0.24387019872665405, + "step": 4967 + }, + { + "epoch": 1.319213915814633, + "grad_norm": 1.2919715806670669, + "learning_rate": 5.699347977234799e-06, + "loss": 0.2198091745376587, + "step": 4968 + }, + { + "epoch": 1.319479484796176, + "grad_norm": 1.3500197578827224, + "learning_rate": 5.695384418432174e-06, + "loss": 0.24349649250507355, + "step": 4969 + }, + { + "epoch": 1.319745053777719, + "grad_norm": 1.238323956307032, + "learning_rate": 5.691421689508661e-06, + "loss": 0.2330506294965744, + "step": 4970 + }, + { + "epoch": 1.3200106227592618, + "grad_norm": 1.2015417123740977, + "learning_rate": 5.687459791228234e-06, + "loss": 0.22821848094463348, + "step": 4971 + }, + { + "epoch": 1.3202761917408048, + "grad_norm": 1.1813366864368284, + "learning_rate": 5.683498724354699e-06, + "loss": 0.2342798113822937, + "step": 4972 + }, + { + "epoch": 1.3205417607223477, + "grad_norm": 1.0659168750954966, + "learning_rate": 5.679538489651702e-06, + "loss": 0.19689922034740448, + "step": 4973 + }, + { + "epoch": 1.3208073297038907, + "grad_norm": 1.1808385090527131, + "learning_rate": 5.675579087882727e-06, + "loss": 0.23910056054592133, + "step": 4974 + }, + { + "epoch": 1.3210728986854336, + "grad_norm": 1.381638431012013, + "learning_rate": 5.671620519811105e-06, + "loss": 0.25725993514060974, + "step": 4975 + }, + { + "epoch": 1.3213384676669766, + "grad_norm": 1.3528699347449313, + "learning_rate": 5.667662786199997e-06, + "loss": 0.3030434250831604, + "step": 4976 + }, + { + "epoch": 1.3216040366485196, + "grad_norm": 1.1182092617897728, + "learning_rate": 5.6637058878124075e-06, + "loss": 0.223737433552742, + "step": 4977 + }, + { + "epoch": 1.3218696056300625, + "grad_norm": 1.07766141822832, + "learning_rate": 5.659749825411183e-06, + "loss": 0.21480265259742737, + "step": 4978 + }, + { + "epoch": 1.3221351746116055, + "grad_norm": 1.2398269968997129, + "learning_rate": 5.655794599759001e-06, + "loss": 0.23288744688034058, + "step": 4979 + }, + { + "epoch": 1.3224007435931484, + "grad_norm": 1.3344080514533678, + "learning_rate": 5.651840211618387e-06, + "loss": 0.23701068758964539, + "step": 4980 + }, + { + "epoch": 1.3226663125746914, + "grad_norm": 1.2102834630940547, + "learning_rate": 5.647886661751698e-06, + "loss": 0.22164157032966614, + "step": 4981 + }, + { + "epoch": 1.3229318815562343, + "grad_norm": 1.2096538262244674, + "learning_rate": 5.643933950921132e-06, + "loss": 0.23426607251167297, + "step": 4982 + }, + { + "epoch": 1.3231974505377773, + "grad_norm": 1.1880047089826309, + "learning_rate": 5.6399820798887266e-06, + "loss": 0.2567834258079529, + "step": 4983 + }, + { + "epoch": 1.3234630195193202, + "grad_norm": 1.3013809826248692, + "learning_rate": 5.6360310494163525e-06, + "loss": 0.2713038921356201, + "step": 4984 + }, + { + "epoch": 1.3237285885008632, + "grad_norm": 1.2908080991459006, + "learning_rate": 5.632080860265725e-06, + "loss": 0.2548249661922455, + "step": 4985 + }, + { + "epoch": 1.3239941574824061, + "grad_norm": 1.3471244082770852, + "learning_rate": 5.628131513198392e-06, + "loss": 0.2442832589149475, + "step": 4986 + }, + { + "epoch": 1.324259726463949, + "grad_norm": 1.3063670062134878, + "learning_rate": 5.6241830089757435e-06, + "loss": 0.24654853343963623, + "step": 4987 + }, + { + "epoch": 1.324525295445492, + "grad_norm": 1.2792033582455469, + "learning_rate": 5.620235348358997e-06, + "loss": 0.2802797853946686, + "step": 4988 + }, + { + "epoch": 1.324790864427035, + "grad_norm": 1.0588655062771883, + "learning_rate": 5.616288532109225e-06, + "loss": 0.18801404535770416, + "step": 4989 + }, + { + "epoch": 1.325056433408578, + "grad_norm": 1.2235746865490262, + "learning_rate": 5.6123425609873235e-06, + "loss": 0.2685382068157196, + "step": 4990 + }, + { + "epoch": 1.3253220023901209, + "grad_norm": 1.1873888072876837, + "learning_rate": 5.608397435754029e-06, + "loss": 0.23479774594306946, + "step": 4991 + }, + { + "epoch": 1.3255875713716638, + "grad_norm": 1.2164455244711625, + "learning_rate": 5.604453157169914e-06, + "loss": 0.24198031425476074, + "step": 4992 + }, + { + "epoch": 1.3258531403532068, + "grad_norm": 1.3448749532595476, + "learning_rate": 5.60050972599539e-06, + "loss": 0.25523462891578674, + "step": 4993 + }, + { + "epoch": 1.3261187093347497, + "grad_norm": 1.1695382845281797, + "learning_rate": 5.596567142990703e-06, + "loss": 0.23196743428707123, + "step": 4994 + }, + { + "epoch": 1.3263842783162927, + "grad_norm": 1.3145586744837223, + "learning_rate": 5.592625408915939e-06, + "loss": 0.29365748167037964, + "step": 4995 + }, + { + "epoch": 1.3266498472978356, + "grad_norm": 1.1946134760289593, + "learning_rate": 5.588684524531014e-06, + "loss": 0.24509185552597046, + "step": 4996 + }, + { + "epoch": 1.3269154162793786, + "grad_norm": 1.3358300509723116, + "learning_rate": 5.584744490595687e-06, + "loss": 0.27032390236854553, + "step": 4997 + }, + { + "epoch": 1.3271809852609215, + "grad_norm": 1.1645416268641489, + "learning_rate": 5.580805307869549e-06, + "loss": 0.24401508271694183, + "step": 4998 + }, + { + "epoch": 1.3274465542424645, + "grad_norm": 1.1506901325018217, + "learning_rate": 5.576866977112028e-06, + "loss": 0.2216658741235733, + "step": 4999 + }, + { + "epoch": 1.3277121232240074, + "grad_norm": 1.1830944265124126, + "learning_rate": 5.5729294990823875e-06, + "loss": 0.24545373022556305, + "step": 5000 + }, + { + "epoch": 1.3279776922055504, + "grad_norm": 1.377548009409137, + "learning_rate": 5.568992874539728e-06, + "loss": 0.260816752910614, + "step": 5001 + }, + { + "epoch": 1.3282432611870933, + "grad_norm": 1.1392730403811622, + "learning_rate": 5.565057104242984e-06, + "loss": 0.1850551962852478, + "step": 5002 + }, + { + "epoch": 1.3285088301686363, + "grad_norm": 2.1232949408605624, + "learning_rate": 5.561122188950923e-06, + "loss": 0.26854407787323, + "step": 5003 + }, + { + "epoch": 1.3287743991501793, + "grad_norm": 1.1591208934359583, + "learning_rate": 5.557188129422153e-06, + "loss": 0.24294906854629517, + "step": 5004 + }, + { + "epoch": 1.3290399681317222, + "grad_norm": 1.1880501452095942, + "learning_rate": 5.553254926415114e-06, + "loss": 0.2533603310585022, + "step": 5005 + }, + { + "epoch": 1.3293055371132652, + "grad_norm": 1.1756183262516449, + "learning_rate": 5.549322580688077e-06, + "loss": 0.2082313448190689, + "step": 5006 + }, + { + "epoch": 1.329571106094808, + "grad_norm": 1.1602290025540025, + "learning_rate": 5.545391092999158e-06, + "loss": 0.24265842139720917, + "step": 5007 + }, + { + "epoch": 1.329836675076351, + "grad_norm": 1.2321490774961563, + "learning_rate": 5.541460464106301e-06, + "loss": 0.2483578324317932, + "step": 5008 + }, + { + "epoch": 1.330102244057894, + "grad_norm": 1.2798509363454456, + "learning_rate": 5.537530694767281e-06, + "loss": 0.2769540548324585, + "step": 5009 + }, + { + "epoch": 1.330367813039437, + "grad_norm": 1.1781048091325885, + "learning_rate": 5.533601785739714e-06, + "loss": 0.2132025957107544, + "step": 5010 + }, + { + "epoch": 1.33063338202098, + "grad_norm": 1.2726887496075767, + "learning_rate": 5.529673737781047e-06, + "loss": 0.25223806500434875, + "step": 5011 + }, + { + "epoch": 1.3308989510025229, + "grad_norm": 1.13329365262538, + "learning_rate": 5.52574655164856e-06, + "loss": 0.22631296515464783, + "step": 5012 + }, + { + "epoch": 1.3311645199840658, + "grad_norm": 1.1821255064699665, + "learning_rate": 5.5218202280993725e-06, + "loss": 0.23756693303585052, + "step": 5013 + }, + { + "epoch": 1.3314300889656088, + "grad_norm": 1.2775335630974591, + "learning_rate": 5.517894767890427e-06, + "loss": 0.24746376276016235, + "step": 5014 + }, + { + "epoch": 1.3316956579471517, + "grad_norm": 1.105165815318004, + "learning_rate": 5.513970171778504e-06, + "loss": 0.21463070809841156, + "step": 5015 + }, + { + "epoch": 1.3319612269286947, + "grad_norm": 1.2090979668871258, + "learning_rate": 5.510046440520228e-06, + "loss": 0.21256107091903687, + "step": 5016 + }, + { + "epoch": 1.3322267959102376, + "grad_norm": 1.1963664670778913, + "learning_rate": 5.506123574872044e-06, + "loss": 0.25800254940986633, + "step": 5017 + }, + { + "epoch": 1.3324923648917806, + "grad_norm": 1.2726257558813519, + "learning_rate": 5.502201575590236e-06, + "loss": 0.2421891689300537, + "step": 5018 + }, + { + "epoch": 1.3327579338733235, + "grad_norm": 1.3181283061442692, + "learning_rate": 5.498280443430917e-06, + "loss": 0.24375903606414795, + "step": 5019 + }, + { + "epoch": 1.3330235028548665, + "grad_norm": 1.2419078132332353, + "learning_rate": 5.494360179150033e-06, + "loss": 0.22173303365707397, + "step": 5020 + }, + { + "epoch": 1.3332890718364094, + "grad_norm": 1.1754676882141941, + "learning_rate": 5.49044078350337e-06, + "loss": 0.24005022644996643, + "step": 5021 + }, + { + "epoch": 1.3335546408179524, + "grad_norm": 1.194558748352182, + "learning_rate": 5.486522257246538e-06, + "loss": 0.2600201964378357, + "step": 5022 + }, + { + "epoch": 1.3338202097994953, + "grad_norm": 1.2112657273591712, + "learning_rate": 5.482604601134984e-06, + "loss": 0.22889836132526398, + "step": 5023 + }, + { + "epoch": 1.3340857787810383, + "grad_norm": 1.151722502872684, + "learning_rate": 5.478687815923981e-06, + "loss": 0.25045812129974365, + "step": 5024 + }, + { + "epoch": 1.3343513477625812, + "grad_norm": 1.2499612320902753, + "learning_rate": 5.474771902368646e-06, + "loss": 0.24649837613105774, + "step": 5025 + }, + { + "epoch": 1.3346169167441242, + "grad_norm": 1.1975824340507155, + "learning_rate": 5.470856861223919e-06, + "loss": 0.23994389176368713, + "step": 5026 + }, + { + "epoch": 1.3348824857256671, + "grad_norm": 1.2488470912807048, + "learning_rate": 5.466942693244572e-06, + "loss": 0.24381600320339203, + "step": 5027 + }, + { + "epoch": 1.33514805470721, + "grad_norm": 1.1770895947351019, + "learning_rate": 5.463029399185217e-06, + "loss": 0.22110486030578613, + "step": 5028 + }, + { + "epoch": 1.335413623688753, + "grad_norm": 1.2878634690011452, + "learning_rate": 5.459116979800281e-06, + "loss": 0.25733259320259094, + "step": 5029 + }, + { + "epoch": 1.335679192670296, + "grad_norm": 1.2598918710105835, + "learning_rate": 5.4552054358440355e-06, + "loss": 0.22853803634643555, + "step": 5030 + }, + { + "epoch": 1.335944761651839, + "grad_norm": 1.3118793520277159, + "learning_rate": 5.451294768070581e-06, + "loss": 0.27503639459609985, + "step": 5031 + }, + { + "epoch": 1.336210330633382, + "grad_norm": 1.2721314541046291, + "learning_rate": 5.447384977233849e-06, + "loss": 0.27931997179985046, + "step": 5032 + }, + { + "epoch": 1.3364758996149249, + "grad_norm": 1.2287817779118972, + "learning_rate": 5.443476064087596e-06, + "loss": 0.2477954626083374, + "step": 5033 + }, + { + "epoch": 1.3367414685964678, + "grad_norm": 1.2204002745504476, + "learning_rate": 5.439568029385422e-06, + "loss": 0.2195623219013214, + "step": 5034 + }, + { + "epoch": 1.3370070375780108, + "grad_norm": 1.230653492520276, + "learning_rate": 5.435660873880747e-06, + "loss": 0.22160238027572632, + "step": 5035 + }, + { + "epoch": 1.3372726065595537, + "grad_norm": 1.6764380815480615, + "learning_rate": 5.4317545983268235e-06, + "loss": 0.24107405543327332, + "step": 5036 + }, + { + "epoch": 1.3375381755410967, + "grad_norm": 1.2985203082435115, + "learning_rate": 5.427849203476738e-06, + "loss": 0.2480086386203766, + "step": 5037 + }, + { + "epoch": 1.3378037445226398, + "grad_norm": 1.2654518356324462, + "learning_rate": 5.4239446900834005e-06, + "loss": 0.22476691007614136, + "step": 5038 + }, + { + "epoch": 1.3380693135041828, + "grad_norm": 1.217906592075979, + "learning_rate": 5.420041058899559e-06, + "loss": 0.23685473203659058, + "step": 5039 + }, + { + "epoch": 1.3383348824857257, + "grad_norm": 1.215790635675812, + "learning_rate": 5.416138310677784e-06, + "loss": 0.27753746509552, + "step": 5040 + }, + { + "epoch": 1.3386004514672687, + "grad_norm": 1.2682075315501737, + "learning_rate": 5.412236446170482e-06, + "loss": 0.22446027398109436, + "step": 5041 + }, + { + "epoch": 1.3388660204488116, + "grad_norm": 1.2214424011593596, + "learning_rate": 5.4083354661298816e-06, + "loss": 0.2535285949707031, + "step": 5042 + }, + { + "epoch": 1.3391315894303546, + "grad_norm": 1.2982364680013232, + "learning_rate": 5.4044353713080565e-06, + "loss": 0.2412964254617691, + "step": 5043 + }, + { + "epoch": 1.3393971584118975, + "grad_norm": 1.3092797704576777, + "learning_rate": 5.4005361624568895e-06, + "loss": 0.23863038420677185, + "step": 5044 + }, + { + "epoch": 1.3396627273934405, + "grad_norm": 1.159506578977356, + "learning_rate": 5.396637840328105e-06, + "loss": 0.22741727530956268, + "step": 5045 + }, + { + "epoch": 1.3399282963749835, + "grad_norm": 1.285452356277395, + "learning_rate": 5.392740405673251e-06, + "loss": 0.2497379630804062, + "step": 5046 + }, + { + "epoch": 1.3401938653565264, + "grad_norm": 1.2401289485061215, + "learning_rate": 5.388843859243712e-06, + "loss": 0.19558298587799072, + "step": 5047 + }, + { + "epoch": 1.3404594343380694, + "grad_norm": 1.2074615239750155, + "learning_rate": 5.3849482017906914e-06, + "loss": 0.2266748994588852, + "step": 5048 + }, + { + "epoch": 1.3407250033196123, + "grad_norm": 1.2657162316868396, + "learning_rate": 5.381053434065229e-06, + "loss": 0.2410028576850891, + "step": 5049 + }, + { + "epoch": 1.3409905723011553, + "grad_norm": 1.301692886719208, + "learning_rate": 5.37715955681819e-06, + "loss": 0.23965512216091156, + "step": 5050 + }, + { + "epoch": 1.3412561412826982, + "grad_norm": 1.1756365557449155, + "learning_rate": 5.373266570800262e-06, + "loss": 0.22440138459205627, + "step": 5051 + }, + { + "epoch": 1.3415217102642412, + "grad_norm": 1.2562473271519534, + "learning_rate": 5.369374476761975e-06, + "loss": 0.2509710192680359, + "step": 5052 + }, + { + "epoch": 1.3417872792457841, + "grad_norm": 1.3381440207626536, + "learning_rate": 5.365483275453677e-06, + "loss": 0.26555800437927246, + "step": 5053 + }, + { + "epoch": 1.342052848227327, + "grad_norm": 1.2240809600669689, + "learning_rate": 5.361592967625544e-06, + "loss": 0.23089733719825745, + "step": 5054 + }, + { + "epoch": 1.34231841720887, + "grad_norm": 1.1178692263054482, + "learning_rate": 5.357703554027582e-06, + "loss": 0.2040700763463974, + "step": 5055 + }, + { + "epoch": 1.342583986190413, + "grad_norm": 1.309704975193781, + "learning_rate": 5.353815035409624e-06, + "loss": 0.23539039492607117, + "step": 5056 + }, + { + "epoch": 1.342849555171956, + "grad_norm": 1.7065922202358847, + "learning_rate": 5.3499274125213294e-06, + "loss": 0.2190464437007904, + "step": 5057 + }, + { + "epoch": 1.3431151241534989, + "grad_norm": 1.1478595499251703, + "learning_rate": 5.346040686112189e-06, + "loss": 0.21557429432868958, + "step": 5058 + }, + { + "epoch": 1.3433806931350418, + "grad_norm": 1.1934269644730748, + "learning_rate": 5.342154856931515e-06, + "loss": 0.24398267269134521, + "step": 5059 + }, + { + "epoch": 1.3436462621165848, + "grad_norm": 1.1089059625649784, + "learning_rate": 5.338269925728451e-06, + "loss": 0.21652038395404816, + "step": 5060 + }, + { + "epoch": 1.3439118310981277, + "grad_norm": 1.1937531358219302, + "learning_rate": 5.334385893251966e-06, + "loss": 0.2031325101852417, + "step": 5061 + }, + { + "epoch": 1.3441774000796707, + "grad_norm": 1.1621991357090053, + "learning_rate": 5.330502760250853e-06, + "loss": 0.2484835982322693, + "step": 5062 + }, + { + "epoch": 1.3444429690612136, + "grad_norm": 1.2657742595884374, + "learning_rate": 5.326620527473737e-06, + "loss": 0.23698699474334717, + "step": 5063 + }, + { + "epoch": 1.3447085380427566, + "grad_norm": 1.2000433743668328, + "learning_rate": 5.322739195669065e-06, + "loss": 0.23928484320640564, + "step": 5064 + }, + { + "epoch": 1.3449741070242995, + "grad_norm": 1.1828146199314795, + "learning_rate": 5.318858765585115e-06, + "loss": 0.22679512202739716, + "step": 5065 + }, + { + "epoch": 1.3452396760058425, + "grad_norm": 1.2334385564497414, + "learning_rate": 5.314979237969984e-06, + "loss": 0.2115025818347931, + "step": 5066 + }, + { + "epoch": 1.3455052449873854, + "grad_norm": 1.261129899382787, + "learning_rate": 5.311100613571603e-06, + "loss": 0.2441834807395935, + "step": 5067 + }, + { + "epoch": 1.3457708139689284, + "grad_norm": 1.2722125718860966, + "learning_rate": 5.307222893137722e-06, + "loss": 0.2549205720424652, + "step": 5068 + }, + { + "epoch": 1.3460363829504713, + "grad_norm": 1.179054242584843, + "learning_rate": 5.3033460774159185e-06, + "loss": 0.24652990698814392, + "step": 5069 + }, + { + "epoch": 1.3463019519320143, + "grad_norm": 1.2062419936470874, + "learning_rate": 5.299470167153602e-06, + "loss": 0.2403775006532669, + "step": 5070 + }, + { + "epoch": 1.3465675209135572, + "grad_norm": 1.1208895570259512, + "learning_rate": 5.295595163097999e-06, + "loss": 0.2215663194656372, + "step": 5071 + }, + { + "epoch": 1.3468330898951002, + "grad_norm": 1.2914937229567889, + "learning_rate": 5.291721065996167e-06, + "loss": 0.2567424774169922, + "step": 5072 + }, + { + "epoch": 1.3470986588766432, + "grad_norm": 1.0608079556396839, + "learning_rate": 5.287847876594984e-06, + "loss": 0.21162359416484833, + "step": 5073 + }, + { + "epoch": 1.347364227858186, + "grad_norm": 1.221049341797181, + "learning_rate": 5.283975595641155e-06, + "loss": 0.21851085126399994, + "step": 5074 + }, + { + "epoch": 1.347629796839729, + "grad_norm": 1.2935501467753354, + "learning_rate": 5.280104223881212e-06, + "loss": 0.2491171509027481, + "step": 5075 + }, + { + "epoch": 1.347895365821272, + "grad_norm": 1.2921255335421646, + "learning_rate": 5.276233762061507e-06, + "loss": 0.22467780113220215, + "step": 5076 + }, + { + "epoch": 1.348160934802815, + "grad_norm": 1.159790816626821, + "learning_rate": 5.272364210928223e-06, + "loss": 0.24531611800193787, + "step": 5077 + }, + { + "epoch": 1.348426503784358, + "grad_norm": 1.2178282841242851, + "learning_rate": 5.268495571227361e-06, + "loss": 0.2582520544528961, + "step": 5078 + }, + { + "epoch": 1.3486920727659009, + "grad_norm": 1.2175282778251775, + "learning_rate": 5.264627843704749e-06, + "loss": 0.21180811524391174, + "step": 5079 + }, + { + "epoch": 1.348957641747444, + "grad_norm": 1.2942378328530906, + "learning_rate": 5.2607610291060406e-06, + "loss": 0.27026671171188354, + "step": 5080 + }, + { + "epoch": 1.349223210728987, + "grad_norm": 1.1721525183169563, + "learning_rate": 5.256895128176712e-06, + "loss": 0.22954419255256653, + "step": 5081 + }, + { + "epoch": 1.34948877971053, + "grad_norm": 1.3561853541918854, + "learning_rate": 5.253030141662063e-06, + "loss": 0.24064484238624573, + "step": 5082 + }, + { + "epoch": 1.349754348692073, + "grad_norm": 1.1245550279116328, + "learning_rate": 5.249166070307218e-06, + "loss": 0.1981196105480194, + "step": 5083 + }, + { + "epoch": 1.3500199176736158, + "grad_norm": 1.0881909699390468, + "learning_rate": 5.2453029148571226e-06, + "loss": 0.19882233440876007, + "step": 5084 + }, + { + "epoch": 1.3502854866551588, + "grad_norm": 1.2123536275051694, + "learning_rate": 5.24144067605655e-06, + "loss": 0.2409907579421997, + "step": 5085 + }, + { + "epoch": 1.3505510556367017, + "grad_norm": 1.2197874501412473, + "learning_rate": 5.237579354650092e-06, + "loss": 0.2205093652009964, + "step": 5086 + }, + { + "epoch": 1.3508166246182447, + "grad_norm": 1.4716074796051495, + "learning_rate": 5.233718951382163e-06, + "loss": 0.2283058911561966, + "step": 5087 + }, + { + "epoch": 1.3510821935997877, + "grad_norm": 1.2561007307780203, + "learning_rate": 5.229859466997012e-06, + "loss": 0.25584474205970764, + "step": 5088 + }, + { + "epoch": 1.3513477625813306, + "grad_norm": 1.1491167817661179, + "learning_rate": 5.226000902238696e-06, + "loss": 0.22516845166683197, + "step": 5089 + }, + { + "epoch": 1.3516133315628736, + "grad_norm": 1.2604818786719383, + "learning_rate": 5.222143257851102e-06, + "loss": 0.23440764844417572, + "step": 5090 + }, + { + "epoch": 1.3518789005444165, + "grad_norm": 1.2156754572685655, + "learning_rate": 5.218286534577938e-06, + "loss": 0.25858962535858154, + "step": 5091 + }, + { + "epoch": 1.3521444695259595, + "grad_norm": 1.1425154357949754, + "learning_rate": 5.214430733162736e-06, + "loss": 0.20676326751708984, + "step": 5092 + }, + { + "epoch": 1.3524100385075024, + "grad_norm": 1.1266241214136956, + "learning_rate": 5.210575854348853e-06, + "loss": 0.21892425417900085, + "step": 5093 + }, + { + "epoch": 1.3526756074890454, + "grad_norm": 1.2379350388596377, + "learning_rate": 5.206721898879454e-06, + "loss": 0.2538335919380188, + "step": 5094 + }, + { + "epoch": 1.3529411764705883, + "grad_norm": 1.2059035716196298, + "learning_rate": 5.202868867497542e-06, + "loss": 0.24750448763370514, + "step": 5095 + }, + { + "epoch": 1.3532067454521313, + "grad_norm": 1.2602608504342458, + "learning_rate": 5.199016760945931e-06, + "loss": 0.2569364011287689, + "step": 5096 + }, + { + "epoch": 1.3534723144336742, + "grad_norm": 0.9860855220263709, + "learning_rate": 5.19516557996727e-06, + "loss": 0.16788914799690247, + "step": 5097 + }, + { + "epoch": 1.3537378834152172, + "grad_norm": 1.0020852845957948, + "learning_rate": 5.191315325304018e-06, + "loss": 0.19006651639938354, + "step": 5098 + }, + { + "epoch": 1.3540034523967601, + "grad_norm": 1.187896658740898, + "learning_rate": 5.1874659976984575e-06, + "loss": 0.23474551737308502, + "step": 5099 + }, + { + "epoch": 1.354269021378303, + "grad_norm": 1.2829971661643687, + "learning_rate": 5.183617597892694e-06, + "loss": 0.26601099967956543, + "step": 5100 + }, + { + "epoch": 1.354534590359846, + "grad_norm": 1.1758855450162613, + "learning_rate": 5.179770126628654e-06, + "loss": 0.24207550287246704, + "step": 5101 + }, + { + "epoch": 1.354800159341389, + "grad_norm": 1.2535446057143411, + "learning_rate": 5.175923584648083e-06, + "loss": 0.2538307309150696, + "step": 5102 + }, + { + "epoch": 1.355065728322932, + "grad_norm": 1.1865818667829109, + "learning_rate": 5.172077972692553e-06, + "loss": 0.23073242604732513, + "step": 5103 + }, + { + "epoch": 1.3553312973044749, + "grad_norm": 1.348848385270533, + "learning_rate": 5.168233291503448e-06, + "loss": 0.2634595036506653, + "step": 5104 + }, + { + "epoch": 1.3555968662860178, + "grad_norm": 1.225057907199874, + "learning_rate": 5.1643895418219744e-06, + "loss": 0.23282350599765778, + "step": 5105 + }, + { + "epoch": 1.3558624352675608, + "grad_norm": 1.333152685269679, + "learning_rate": 5.160546724389172e-06, + "loss": 0.2543700933456421, + "step": 5106 + }, + { + "epoch": 1.3561280042491037, + "grad_norm": 1.1449256417555271, + "learning_rate": 5.1567048399458855e-06, + "loss": 0.2005772739648819, + "step": 5107 + }, + { + "epoch": 1.3563935732306467, + "grad_norm": 1.2429630346358373, + "learning_rate": 5.152863889232787e-06, + "loss": 0.2367073893547058, + "step": 5108 + }, + { + "epoch": 1.3566591422121896, + "grad_norm": 1.2839253544945022, + "learning_rate": 5.14902387299036e-06, + "loss": 0.25600770115852356, + "step": 5109 + }, + { + "epoch": 1.3569247111937326, + "grad_norm": 1.198566513294344, + "learning_rate": 5.145184791958918e-06, + "loss": 0.21678754687309265, + "step": 5110 + }, + { + "epoch": 1.3571902801752755, + "grad_norm": 1.3894724787206996, + "learning_rate": 5.141346646878591e-06, + "loss": 0.265438973903656, + "step": 5111 + }, + { + "epoch": 1.3574558491568185, + "grad_norm": 1.1239736089383028, + "learning_rate": 5.13750943848933e-06, + "loss": 0.24246999621391296, + "step": 5112 + }, + { + "epoch": 1.3577214181383614, + "grad_norm": 1.299396280421792, + "learning_rate": 5.133673167530899e-06, + "loss": 0.25401771068573, + "step": 5113 + }, + { + "epoch": 1.3579869871199044, + "grad_norm": 1.2329813534125698, + "learning_rate": 5.129837834742885e-06, + "loss": 0.2698017656803131, + "step": 5114 + }, + { + "epoch": 1.3582525561014474, + "grad_norm": 1.2787210937788358, + "learning_rate": 5.126003440864703e-06, + "loss": 0.27006995677948, + "step": 5115 + }, + { + "epoch": 1.3585181250829903, + "grad_norm": 1.2695682196385796, + "learning_rate": 5.122169986635575e-06, + "loss": 0.2370866984128952, + "step": 5116 + }, + { + "epoch": 1.3587836940645333, + "grad_norm": 1.3031561376922138, + "learning_rate": 5.1183374727945425e-06, + "loss": 0.24017807841300964, + "step": 5117 + }, + { + "epoch": 1.3590492630460762, + "grad_norm": 1.1487956614446662, + "learning_rate": 5.114505900080473e-06, + "loss": 0.21664533019065857, + "step": 5118 + }, + { + "epoch": 1.3593148320276192, + "grad_norm": 4.246209132455192, + "learning_rate": 5.110675269232046e-06, + "loss": 0.24561598896980286, + "step": 5119 + }, + { + "epoch": 1.359580401009162, + "grad_norm": 1.3902415348604562, + "learning_rate": 5.106845580987763e-06, + "loss": 0.26678937673568726, + "step": 5120 + }, + { + "epoch": 1.359845969990705, + "grad_norm": 1.354168350096278, + "learning_rate": 5.103016836085943e-06, + "loss": 0.21919070184230804, + "step": 5121 + }, + { + "epoch": 1.360111538972248, + "grad_norm": 1.3057665036353723, + "learning_rate": 5.099189035264722e-06, + "loss": 0.24887943267822266, + "step": 5122 + }, + { + "epoch": 1.360377107953791, + "grad_norm": 1.2017875007060346, + "learning_rate": 5.0953621792620556e-06, + "loss": 0.23597784340381622, + "step": 5123 + }, + { + "epoch": 1.360642676935334, + "grad_norm": 1.2098630506546966, + "learning_rate": 5.091536268815717e-06, + "loss": 0.21265193819999695, + "step": 5124 + }, + { + "epoch": 1.3609082459168769, + "grad_norm": 1.3606980074054404, + "learning_rate": 5.0877113046632945e-06, + "loss": 0.29837465286254883, + "step": 5125 + }, + { + "epoch": 1.3611738148984198, + "grad_norm": 1.1915793844006848, + "learning_rate": 5.0838872875421975e-06, + "loss": 0.2324269413948059, + "step": 5126 + }, + { + "epoch": 1.3614393838799628, + "grad_norm": 1.0970197687294143, + "learning_rate": 5.080064218189652e-06, + "loss": 0.19149541854858398, + "step": 5127 + }, + { + "epoch": 1.3617049528615057, + "grad_norm": 1.1710303609542994, + "learning_rate": 5.0762420973427e-06, + "loss": 0.247644305229187, + "step": 5128 + }, + { + "epoch": 1.3619705218430487, + "grad_norm": 1.1403838601028529, + "learning_rate": 5.0724209257382006e-06, + "loss": 0.2272202968597412, + "step": 5129 + }, + { + "epoch": 1.3622360908245916, + "grad_norm": 1.2012952880900256, + "learning_rate": 5.068600704112832e-06, + "loss": 0.25735989212989807, + "step": 5130 + }, + { + "epoch": 1.3625016598061346, + "grad_norm": 1.1771555574179005, + "learning_rate": 5.064781433203086e-06, + "loss": 0.19970473647117615, + "step": 5131 + }, + { + "epoch": 1.3627672287876775, + "grad_norm": 1.2156620394191346, + "learning_rate": 5.060963113745272e-06, + "loss": 0.24289372563362122, + "step": 5132 + }, + { + "epoch": 1.3630327977692205, + "grad_norm": 1.2352988713677027, + "learning_rate": 5.0571457464755226e-06, + "loss": 0.2757350504398346, + "step": 5133 + }, + { + "epoch": 1.3632983667507634, + "grad_norm": 1.2115447809386193, + "learning_rate": 5.053329332129777e-06, + "loss": 0.24552851915359497, + "step": 5134 + }, + { + "epoch": 1.3635639357323064, + "grad_norm": 1.1546263092618338, + "learning_rate": 5.049513871443797e-06, + "loss": 0.22152797877788544, + "step": 5135 + }, + { + "epoch": 1.3638295047138493, + "grad_norm": 1.2567398712194906, + "learning_rate": 5.045699365153155e-06, + "loss": 0.27098602056503296, + "step": 5136 + }, + { + "epoch": 1.3640950736953923, + "grad_norm": 1.201852433475055, + "learning_rate": 5.041885813993246e-06, + "loss": 0.21275216341018677, + "step": 5137 + }, + { + "epoch": 1.3643606426769352, + "grad_norm": 1.3326670101473788, + "learning_rate": 5.038073218699275e-06, + "loss": 0.2510162591934204, + "step": 5138 + }, + { + "epoch": 1.3646262116584782, + "grad_norm": 1.2702563681918038, + "learning_rate": 5.034261580006269e-06, + "loss": 0.23203429579734802, + "step": 5139 + }, + { + "epoch": 1.3648917806400211, + "grad_norm": 1.137285489869793, + "learning_rate": 5.030450898649064e-06, + "loss": 0.22178995609283447, + "step": 5140 + }, + { + "epoch": 1.365157349621564, + "grad_norm": 1.2415754400243457, + "learning_rate": 5.026641175362316e-06, + "loss": 0.2567412257194519, + "step": 5141 + }, + { + "epoch": 1.365422918603107, + "grad_norm": 1.232487080143156, + "learning_rate": 5.022832410880494e-06, + "loss": 0.21939827501773834, + "step": 5142 + }, + { + "epoch": 1.36568848758465, + "grad_norm": 1.4733425270104286, + "learning_rate": 5.019024605937882e-06, + "loss": 0.2325637936592102, + "step": 5143 + }, + { + "epoch": 1.365954056566193, + "grad_norm": 1.266575596941496, + "learning_rate": 5.015217761268582e-06, + "loss": 0.2416393756866455, + "step": 5144 + }, + { + "epoch": 1.366219625547736, + "grad_norm": 1.289260413423763, + "learning_rate": 5.011411877606507e-06, + "loss": 0.2439568042755127, + "step": 5145 + }, + { + "epoch": 1.3664851945292789, + "grad_norm": 1.1439689034996021, + "learning_rate": 5.007606955685387e-06, + "loss": 0.2495957612991333, + "step": 5146 + }, + { + "epoch": 1.3667507635108218, + "grad_norm": 1.1937127912858143, + "learning_rate": 5.003802996238766e-06, + "loss": 0.23415328562259674, + "step": 5147 + }, + { + "epoch": 1.3670163324923648, + "grad_norm": 1.26410321081345, + "learning_rate": 5.000000000000003e-06, + "loss": 0.2637922465801239, + "step": 5148 + }, + { + "epoch": 1.3672819014739077, + "grad_norm": 1.243307173830296, + "learning_rate": 4.9961979677022696e-06, + "loss": 0.2319526970386505, + "step": 5149 + }, + { + "epoch": 1.3675474704554509, + "grad_norm": 1.2115383829826751, + "learning_rate": 4.992396900078551e-06, + "loss": 0.2338445484638214, + "step": 5150 + }, + { + "epoch": 1.3678130394369938, + "grad_norm": 1.1683439299091893, + "learning_rate": 4.988596797861654e-06, + "loss": 0.19041961431503296, + "step": 5151 + }, + { + "epoch": 1.3680786084185368, + "grad_norm": 1.233073404450011, + "learning_rate": 4.984797661784191e-06, + "loss": 0.2698138952255249, + "step": 5152 + }, + { + "epoch": 1.3683441774000797, + "grad_norm": 1.2592426315358647, + "learning_rate": 4.980999492578588e-06, + "loss": 0.2208167165517807, + "step": 5153 + }, + { + "epoch": 1.3686097463816227, + "grad_norm": 1.1935159953807641, + "learning_rate": 4.9772022909770915e-06, + "loss": 0.2515152096748352, + "step": 5154 + }, + { + "epoch": 1.3688753153631656, + "grad_norm": 1.3110804278343313, + "learning_rate": 4.973406057711755e-06, + "loss": 0.2393365204334259, + "step": 5155 + }, + { + "epoch": 1.3691408843447086, + "grad_norm": 1.302037077529998, + "learning_rate": 4.969610793514446e-06, + "loss": 0.24546492099761963, + "step": 5156 + }, + { + "epoch": 1.3694064533262515, + "grad_norm": 1.5300417364025873, + "learning_rate": 4.965816499116849e-06, + "loss": 0.252412348985672, + "step": 5157 + }, + { + "epoch": 1.3696720223077945, + "grad_norm": 1.1552882128683561, + "learning_rate": 4.962023175250461e-06, + "loss": 0.22654281556606293, + "step": 5158 + }, + { + "epoch": 1.3699375912893375, + "grad_norm": 1.2873880265204376, + "learning_rate": 4.958230822646581e-06, + "loss": 0.2542813718318939, + "step": 5159 + }, + { + "epoch": 1.3702031602708804, + "grad_norm": 1.2851879635778218, + "learning_rate": 4.9544394420363395e-06, + "loss": 0.25376224517822266, + "step": 5160 + }, + { + "epoch": 1.3704687292524234, + "grad_norm": 1.252574665809313, + "learning_rate": 4.950649034150666e-06, + "loss": 0.21911674737930298, + "step": 5161 + }, + { + "epoch": 1.3707342982339663, + "grad_norm": 1.3527776455922371, + "learning_rate": 4.946859599720308e-06, + "loss": 0.2805126905441284, + "step": 5162 + }, + { + "epoch": 1.3709998672155093, + "grad_norm": 1.1716388954292443, + "learning_rate": 4.943071139475824e-06, + "loss": 0.2189590483903885, + "step": 5163 + }, + { + "epoch": 1.3712654361970522, + "grad_norm": 1.2218109142926636, + "learning_rate": 4.939283654147582e-06, + "loss": 0.21837599575519562, + "step": 5164 + }, + { + "epoch": 1.3715310051785952, + "grad_norm": 1.2779646624690562, + "learning_rate": 4.935497144465766e-06, + "loss": 0.25090983510017395, + "step": 5165 + }, + { + "epoch": 1.3717965741601381, + "grad_norm": 1.1988734011828608, + "learning_rate": 4.93171161116037e-06, + "loss": 0.22028754651546478, + "step": 5166 + }, + { + "epoch": 1.372062143141681, + "grad_norm": 1.1554753760684375, + "learning_rate": 4.927927054961201e-06, + "loss": 0.20097196102142334, + "step": 5167 + }, + { + "epoch": 1.372327712123224, + "grad_norm": 1.209557738779129, + "learning_rate": 4.924143476597872e-06, + "loss": 0.230082705616951, + "step": 5168 + }, + { + "epoch": 1.372593281104767, + "grad_norm": 1.1549715219295726, + "learning_rate": 4.920360876799821e-06, + "loss": 0.23701804876327515, + "step": 5169 + }, + { + "epoch": 1.37285885008631, + "grad_norm": 1.2740998730652584, + "learning_rate": 4.9165792562962834e-06, + "loss": 0.22357231378555298, + "step": 5170 + }, + { + "epoch": 1.3731244190678529, + "grad_norm": 1.2042473616661704, + "learning_rate": 4.912798615816312e-06, + "loss": 0.2533026337623596, + "step": 5171 + }, + { + "epoch": 1.3733899880493958, + "grad_norm": 1.3342025781776312, + "learning_rate": 4.90901895608877e-06, + "loss": 0.24878138303756714, + "step": 5172 + }, + { + "epoch": 1.3736555570309388, + "grad_norm": 1.5415419516618216, + "learning_rate": 4.905240277842335e-06, + "loss": 0.22641420364379883, + "step": 5173 + }, + { + "epoch": 1.3739211260124817, + "grad_norm": 1.2916997982097302, + "learning_rate": 4.901462581805483e-06, + "loss": 0.24495793879032135, + "step": 5174 + }, + { + "epoch": 1.3741866949940247, + "grad_norm": 1.3531795848957913, + "learning_rate": 4.897685868706512e-06, + "loss": 0.2688868045806885, + "step": 5175 + }, + { + "epoch": 1.3744522639755676, + "grad_norm": 1.2828126418821555, + "learning_rate": 4.893910139273531e-06, + "loss": 0.25796642899513245, + "step": 5176 + }, + { + "epoch": 1.3747178329571106, + "grad_norm": 1.4091718050104127, + "learning_rate": 4.890135394234451e-06, + "loss": 0.27557405829429626, + "step": 5177 + }, + { + "epoch": 1.3749834019386535, + "grad_norm": 1.620605499986823, + "learning_rate": 4.886361634317004e-06, + "loss": 0.23553809523582458, + "step": 5178 + }, + { + "epoch": 1.3752489709201965, + "grad_norm": 1.2608742989736732, + "learning_rate": 4.882588860248725e-06, + "loss": 0.2454400360584259, + "step": 5179 + }, + { + "epoch": 1.3755145399017394, + "grad_norm": 1.1743865548501493, + "learning_rate": 4.878817072756959e-06, + "loss": 0.19460657238960266, + "step": 5180 + }, + { + "epoch": 1.3757801088832824, + "grad_norm": 1.2528300475452, + "learning_rate": 4.875046272568863e-06, + "loss": 0.24833449721336365, + "step": 5181 + }, + { + "epoch": 1.3760456778648253, + "grad_norm": 1.3263672125712147, + "learning_rate": 4.871276460411403e-06, + "loss": 0.2774161994457245, + "step": 5182 + }, + { + "epoch": 1.3763112468463683, + "grad_norm": 2.6268834337513667, + "learning_rate": 4.867507637011353e-06, + "loss": 0.2277964949607849, + "step": 5183 + }, + { + "epoch": 1.3765768158279112, + "grad_norm": 1.8924198767245841, + "learning_rate": 4.863739803095299e-06, + "loss": 0.2176733911037445, + "step": 5184 + }, + { + "epoch": 1.3768423848094542, + "grad_norm": 1.3153810073025014, + "learning_rate": 4.859972959389634e-06, + "loss": 0.23529113829135895, + "step": 5185 + }, + { + "epoch": 1.3771079537909972, + "grad_norm": 1.3909544444662505, + "learning_rate": 4.856207106620557e-06, + "loss": 0.2646695077419281, + "step": 5186 + }, + { + "epoch": 1.37737352277254, + "grad_norm": 1.2095108180861869, + "learning_rate": 4.852442245514093e-06, + "loss": 0.23179873824119568, + "step": 5187 + }, + { + "epoch": 1.377639091754083, + "grad_norm": 1.1084014698771758, + "learning_rate": 4.84867837679605e-06, + "loss": 0.2127494066953659, + "step": 5188 + }, + { + "epoch": 1.377904660735626, + "grad_norm": 1.2275201950569183, + "learning_rate": 4.844915501192062e-06, + "loss": 0.2204679548740387, + "step": 5189 + }, + { + "epoch": 1.378170229717169, + "grad_norm": 1.2078653060668294, + "learning_rate": 4.841153619427567e-06, + "loss": 0.20271794497966766, + "step": 5190 + }, + { + "epoch": 1.378435798698712, + "grad_norm": 1.4269963155687142, + "learning_rate": 4.837392732227811e-06, + "loss": 0.2785792052745819, + "step": 5191 + }, + { + "epoch": 1.3787013676802549, + "grad_norm": 1.2501319487764966, + "learning_rate": 4.8336328403178486e-06, + "loss": 0.24904468655586243, + "step": 5192 + }, + { + "epoch": 1.378966936661798, + "grad_norm": 1.1230965332904321, + "learning_rate": 4.829873944422544e-06, + "loss": 0.20045346021652222, + "step": 5193 + }, + { + "epoch": 1.379232505643341, + "grad_norm": 1.1339816903135191, + "learning_rate": 4.826116045266565e-06, + "loss": 0.21814313530921936, + "step": 5194 + }, + { + "epoch": 1.379498074624884, + "grad_norm": 1.236126479276255, + "learning_rate": 4.82235914357439e-06, + "loss": 0.2408592253923416, + "step": 5195 + }, + { + "epoch": 1.379763643606427, + "grad_norm": 1.1229995433845732, + "learning_rate": 4.818603240070311e-06, + "loss": 0.21453416347503662, + "step": 5196 + }, + { + "epoch": 1.3800292125879698, + "grad_norm": 1.2915687788203387, + "learning_rate": 4.814848335478418e-06, + "loss": 0.2578599154949188, + "step": 5197 + }, + { + "epoch": 1.3802947815695128, + "grad_norm": 1.0696662022967476, + "learning_rate": 4.811094430522613e-06, + "loss": 0.1980094015598297, + "step": 5198 + }, + { + "epoch": 1.3805603505510557, + "grad_norm": 1.202740960535961, + "learning_rate": 4.807341525926604e-06, + "loss": 0.24620960652828217, + "step": 5199 + }, + { + "epoch": 1.3808259195325987, + "grad_norm": 1.2486655803425535, + "learning_rate": 4.803589622413908e-06, + "loss": 0.23525282740592957, + "step": 5200 + }, + { + "epoch": 1.3810914885141417, + "grad_norm": 1.1657735912575689, + "learning_rate": 4.799838720707847e-06, + "loss": 0.2277744859457016, + "step": 5201 + }, + { + "epoch": 1.3813570574956846, + "grad_norm": 1.2927728942283212, + "learning_rate": 4.796088821531549e-06, + "loss": 0.2727074921131134, + "step": 5202 + }, + { + "epoch": 1.3816226264772276, + "grad_norm": 1.2370931993726209, + "learning_rate": 4.7923399256079525e-06, + "loss": 0.21686753630638123, + "step": 5203 + }, + { + "epoch": 1.3818881954587705, + "grad_norm": 1.2572583885252075, + "learning_rate": 4.788592033659799e-06, + "loss": 0.2841380834579468, + "step": 5204 + }, + { + "epoch": 1.3821537644403135, + "grad_norm": 1.1157272204593003, + "learning_rate": 4.78484514640964e-06, + "loss": 0.24577853083610535, + "step": 5205 + }, + { + "epoch": 1.3824193334218564, + "grad_norm": 1.2077705032221964, + "learning_rate": 4.7810992645798285e-06, + "loss": 0.22289782762527466, + "step": 5206 + }, + { + "epoch": 1.3826849024033994, + "grad_norm": 1.1476107334002954, + "learning_rate": 4.7773543888925274e-06, + "loss": 0.2223999947309494, + "step": 5207 + }, + { + "epoch": 1.3829504713849423, + "grad_norm": 1.2183085137487102, + "learning_rate": 4.773610520069706e-06, + "loss": 0.23938870429992676, + "step": 5208 + }, + { + "epoch": 1.3832160403664853, + "grad_norm": 1.219370193725879, + "learning_rate": 4.769867658833136e-06, + "loss": 0.260856568813324, + "step": 5209 + }, + { + "epoch": 1.3834816093480282, + "grad_norm": 1.2333269697463725, + "learning_rate": 4.766125805904398e-06, + "loss": 0.23602089285850525, + "step": 5210 + }, + { + "epoch": 1.3837471783295712, + "grad_norm": 1.156747833138865, + "learning_rate": 4.762384962004877e-06, + "loss": 0.22543978691101074, + "step": 5211 + }, + { + "epoch": 1.3840127473111141, + "grad_norm": 1.3639051201807257, + "learning_rate": 4.758645127855763e-06, + "loss": 0.2432224452495575, + "step": 5212 + }, + { + "epoch": 1.384278316292657, + "grad_norm": 1.3947016936895973, + "learning_rate": 4.754906304178049e-06, + "loss": 0.22764597833156586, + "step": 5213 + }, + { + "epoch": 1.3845438852742, + "grad_norm": 1.2064067504011344, + "learning_rate": 4.751168491692541e-06, + "loss": 0.22503387928009033, + "step": 5214 + }, + { + "epoch": 1.384809454255743, + "grad_norm": 1.1066861130484609, + "learning_rate": 4.747431691119846e-06, + "loss": 0.21889932453632355, + "step": 5215 + }, + { + "epoch": 1.385075023237286, + "grad_norm": 1.3903278318809302, + "learning_rate": 4.743695903180372e-06, + "loss": 0.2695825695991516, + "step": 5216 + }, + { + "epoch": 1.3853405922188289, + "grad_norm": 1.2921759622470506, + "learning_rate": 4.739961128594336e-06, + "loss": 0.265118271112442, + "step": 5217 + }, + { + "epoch": 1.3856061612003718, + "grad_norm": 1.1349207398090602, + "learning_rate": 4.736227368081757e-06, + "loss": 0.2050788253545761, + "step": 5218 + }, + { + "epoch": 1.3858717301819148, + "grad_norm": 1.23951121142384, + "learning_rate": 4.7324946223624625e-06, + "loss": 0.274588406085968, + "step": 5219 + }, + { + "epoch": 1.3861372991634577, + "grad_norm": 1.209560473571303, + "learning_rate": 4.728762892156079e-06, + "loss": 0.2242514044046402, + "step": 5220 + }, + { + "epoch": 1.3864028681450007, + "grad_norm": 1.1337174836883812, + "learning_rate": 4.725032178182042e-06, + "loss": 0.19989261031150818, + "step": 5221 + }, + { + "epoch": 1.3866684371265436, + "grad_norm": 1.1989339880554155, + "learning_rate": 4.721302481159588e-06, + "loss": 0.24409207701683044, + "step": 5222 + }, + { + "epoch": 1.3869340061080866, + "grad_norm": 1.2425140627800753, + "learning_rate": 4.71757380180776e-06, + "loss": 0.25146353244781494, + "step": 5223 + }, + { + "epoch": 1.3871995750896295, + "grad_norm": 1.245669068902739, + "learning_rate": 4.713846140845401e-06, + "loss": 0.23076622188091278, + "step": 5224 + }, + { + "epoch": 1.3874651440711725, + "grad_norm": 1.1122357580396618, + "learning_rate": 4.7101194989911635e-06, + "loss": 0.2159188687801361, + "step": 5225 + }, + { + "epoch": 1.3877307130527154, + "grad_norm": 1.433039209205417, + "learning_rate": 4.706393876963497e-06, + "loss": 0.24891307950019836, + "step": 5226 + }, + { + "epoch": 1.3879962820342584, + "grad_norm": 1.2167285098476437, + "learning_rate": 4.702669275480659e-06, + "loss": 0.26254773139953613, + "step": 5227 + }, + { + "epoch": 1.3882618510158014, + "grad_norm": 1.0872799599118763, + "learning_rate": 4.698945695260709e-06, + "loss": 0.19589121639728546, + "step": 5228 + }, + { + "epoch": 1.3885274199973443, + "grad_norm": 1.273899860234835, + "learning_rate": 4.695223137021509e-06, + "loss": 0.23796147108078003, + "step": 5229 + }, + { + "epoch": 1.3887929889788873, + "grad_norm": 1.1566738109261303, + "learning_rate": 4.6915016014807235e-06, + "loss": 0.21211156249046326, + "step": 5230 + }, + { + "epoch": 1.3890585579604302, + "grad_norm": 1.1477189909918881, + "learning_rate": 4.687781089355817e-06, + "loss": 0.22418555617332458, + "step": 5231 + }, + { + "epoch": 1.3893241269419732, + "grad_norm": 1.1999712861158167, + "learning_rate": 4.68406160136407e-06, + "loss": 0.24140511453151703, + "step": 5232 + }, + { + "epoch": 1.389589695923516, + "grad_norm": 1.3515422291949701, + "learning_rate": 4.68034313822255e-06, + "loss": 0.2863473892211914, + "step": 5233 + }, + { + "epoch": 1.389855264905059, + "grad_norm": 1.1002404477789451, + "learning_rate": 4.676625700648133e-06, + "loss": 0.21283546090126038, + "step": 5234 + }, + { + "epoch": 1.390120833886602, + "grad_norm": 1.311958297113244, + "learning_rate": 4.672909289357498e-06, + "loss": 0.2701990008354187, + "step": 5235 + }, + { + "epoch": 1.390386402868145, + "grad_norm": 1.1672674472381515, + "learning_rate": 4.669193905067124e-06, + "loss": 0.23807264864444733, + "step": 5236 + }, + { + "epoch": 1.390651971849688, + "grad_norm": 1.3282268361230456, + "learning_rate": 4.665479548493298e-06, + "loss": 0.22204206883907318, + "step": 5237 + }, + { + "epoch": 1.3909175408312309, + "grad_norm": 1.2590492281878678, + "learning_rate": 4.661766220352098e-06, + "loss": 0.22389569878578186, + "step": 5238 + }, + { + "epoch": 1.3911831098127738, + "grad_norm": 1.2844920522393721, + "learning_rate": 4.65805392135941e-06, + "loss": 0.23752997815608978, + "step": 5239 + }, + { + "epoch": 1.3914486787943168, + "grad_norm": 1.8677910056359206, + "learning_rate": 4.654342652230921e-06, + "loss": 0.24055880308151245, + "step": 5240 + }, + { + "epoch": 1.3917142477758597, + "grad_norm": 1.2030621240735913, + "learning_rate": 4.6506324136821255e-06, + "loss": 0.22136151790618896, + "step": 5241 + }, + { + "epoch": 1.3919798167574027, + "grad_norm": 1.299031121789001, + "learning_rate": 4.646923206428311e-06, + "loss": 0.2616429924964905, + "step": 5242 + }, + { + "epoch": 1.3922453857389456, + "grad_norm": 1.218734267375269, + "learning_rate": 4.643215031184569e-06, + "loss": 0.24827662110328674, + "step": 5243 + }, + { + "epoch": 1.3925109547204886, + "grad_norm": 1.3223478407487963, + "learning_rate": 4.639507888665792e-06, + "loss": 0.21999669075012207, + "step": 5244 + }, + { + "epoch": 1.3927765237020315, + "grad_norm": 1.3241857590600639, + "learning_rate": 4.6358017795866715e-06, + "loss": 0.24511300027370453, + "step": 5245 + }, + { + "epoch": 1.3930420926835745, + "grad_norm": 1.2459535025826622, + "learning_rate": 4.632096704661704e-06, + "loss": 0.2410753220319748, + "step": 5246 + }, + { + "epoch": 1.3933076616651174, + "grad_norm": 1.157173292152249, + "learning_rate": 4.628392664605184e-06, + "loss": 0.2160021960735321, + "step": 5247 + }, + { + "epoch": 1.3935732306466604, + "grad_norm": 1.2204303717623475, + "learning_rate": 4.624689660131204e-06, + "loss": 0.22672782838344574, + "step": 5248 + }, + { + "epoch": 1.3938387996282033, + "grad_norm": 1.3056904555347544, + "learning_rate": 4.620987691953659e-06, + "loss": 0.25474926829338074, + "step": 5249 + }, + { + "epoch": 1.3941043686097463, + "grad_norm": 1.3078938706976893, + "learning_rate": 4.617286760786252e-06, + "loss": 0.2449323832988739, + "step": 5250 + }, + { + "epoch": 1.3943699375912892, + "grad_norm": 1.4350253205296164, + "learning_rate": 4.613586867342473e-06, + "loss": 0.23727643489837646, + "step": 5251 + }, + { + "epoch": 1.3946355065728322, + "grad_norm": 1.492440797106639, + "learning_rate": 4.609888012335624e-06, + "loss": 0.23727962374687195, + "step": 5252 + }, + { + "epoch": 1.3949010755543751, + "grad_norm": 1.1595482332609377, + "learning_rate": 4.60619019647879e-06, + "loss": 0.21957805752754211, + "step": 5253 + }, + { + "epoch": 1.395166644535918, + "grad_norm": 1.1972608851584254, + "learning_rate": 4.6024934204848745e-06, + "loss": 0.24184471368789673, + "step": 5254 + }, + { + "epoch": 1.395432213517461, + "grad_norm": 1.2654091836286674, + "learning_rate": 4.598797685066568e-06, + "loss": 0.239216148853302, + "step": 5255 + }, + { + "epoch": 1.395697782499004, + "grad_norm": 1.1503034311319646, + "learning_rate": 4.595102990936367e-06, + "loss": 0.17741018533706665, + "step": 5256 + }, + { + "epoch": 1.395963351480547, + "grad_norm": 1.2669115039567294, + "learning_rate": 4.591409338806566e-06, + "loss": 0.26139867305755615, + "step": 5257 + }, + { + "epoch": 1.39622892046209, + "grad_norm": 1.1295627244433792, + "learning_rate": 4.587716729389251e-06, + "loss": 0.23689255118370056, + "step": 5258 + }, + { + "epoch": 1.3964944894436329, + "grad_norm": 1.3449494333614898, + "learning_rate": 4.584025163396323e-06, + "loss": 0.22679267823696136, + "step": 5259 + }, + { + "epoch": 1.3967600584251758, + "grad_norm": 1.4665032620533849, + "learning_rate": 4.580334641539467e-06, + "loss": 0.2743435204029083, + "step": 5260 + }, + { + "epoch": 1.3970256274067188, + "grad_norm": 1.166091966014122, + "learning_rate": 4.5766451645301735e-06, + "loss": 0.22738990187644958, + "step": 5261 + }, + { + "epoch": 1.3972911963882617, + "grad_norm": 1.2398512539901747, + "learning_rate": 4.57295673307973e-06, + "loss": 0.24826082587242126, + "step": 5262 + }, + { + "epoch": 1.3975567653698049, + "grad_norm": 1.2172880570038314, + "learning_rate": 4.569269347899222e-06, + "loss": 0.23121042549610138, + "step": 5263 + }, + { + "epoch": 1.3978223343513478, + "grad_norm": 2.1881918032824443, + "learning_rate": 4.5655830096995345e-06, + "loss": 0.21382957696914673, + "step": 5264 + }, + { + "epoch": 1.3980879033328908, + "grad_norm": 1.6700623666107715, + "learning_rate": 4.561897719191349e-06, + "loss": 0.24439184367656708, + "step": 5265 + }, + { + "epoch": 1.3983534723144337, + "grad_norm": 1.1734120938371422, + "learning_rate": 4.558213477085148e-06, + "loss": 0.2106003314256668, + "step": 5266 + }, + { + "epoch": 1.3986190412959767, + "grad_norm": 1.568387486793487, + "learning_rate": 4.554530284091209e-06, + "loss": 0.3073291480541229, + "step": 5267 + }, + { + "epoch": 1.3988846102775196, + "grad_norm": 1.226744359266016, + "learning_rate": 4.550848140919606e-06, + "loss": 0.2448226660490036, + "step": 5268 + }, + { + "epoch": 1.3991501792590626, + "grad_norm": 1.4434974870419186, + "learning_rate": 4.5471670482802165e-06, + "loss": 0.25378671288490295, + "step": 5269 + }, + { + "epoch": 1.3994157482406056, + "grad_norm": 1.243366792714921, + "learning_rate": 4.5434870068827086e-06, + "loss": 0.2735089659690857, + "step": 5270 + }, + { + "epoch": 1.3996813172221485, + "grad_norm": 1.3983115308066707, + "learning_rate": 4.539808017436552e-06, + "loss": 0.2530548870563507, + "step": 5271 + }, + { + "epoch": 1.3999468862036915, + "grad_norm": 1.2566722493021396, + "learning_rate": 4.536130080651015e-06, + "loss": 0.23692254722118378, + "step": 5272 + }, + { + "epoch": 1.4002124551852344, + "grad_norm": 1.257120121799197, + "learning_rate": 4.532453197235155e-06, + "loss": 0.24554882943630219, + "step": 5273 + }, + { + "epoch": 1.4004780241667774, + "grad_norm": 1.2106096425654094, + "learning_rate": 4.528777367897837e-06, + "loss": 0.20152084529399872, + "step": 5274 + }, + { + "epoch": 1.4007435931483203, + "grad_norm": 1.207683737630722, + "learning_rate": 4.525102593347714e-06, + "loss": 0.20908965170383453, + "step": 5275 + }, + { + "epoch": 1.4010091621298633, + "grad_norm": 1.2398706056963738, + "learning_rate": 4.521428874293238e-06, + "loss": 0.23158209025859833, + "step": 5276 + }, + { + "epoch": 1.4012747311114062, + "grad_norm": 1.2494835342931663, + "learning_rate": 4.517756211442664e-06, + "loss": 0.2483675330877304, + "step": 5277 + }, + { + "epoch": 1.4015403000929492, + "grad_norm": 1.1662936164598174, + "learning_rate": 4.514084605504035e-06, + "loss": 0.23435397446155548, + "step": 5278 + }, + { + "epoch": 1.4018058690744921, + "grad_norm": 1.242534131664269, + "learning_rate": 4.510414057185195e-06, + "loss": 0.2605316936969757, + "step": 5279 + }, + { + "epoch": 1.402071438056035, + "grad_norm": 1.148911142729499, + "learning_rate": 4.506744567193782e-06, + "loss": 0.2279929518699646, + "step": 5280 + }, + { + "epoch": 1.402337007037578, + "grad_norm": 1.1849060379752767, + "learning_rate": 4.503076136237228e-06, + "loss": 0.23011639714241028, + "step": 5281 + }, + { + "epoch": 1.402602576019121, + "grad_norm": 1.1735153050753564, + "learning_rate": 4.499408765022765e-06, + "loss": 0.213611900806427, + "step": 5282 + }, + { + "epoch": 1.402868145000664, + "grad_norm": 1.3225078215525052, + "learning_rate": 4.495742454257418e-06, + "loss": 0.25555503368377686, + "step": 5283 + }, + { + "epoch": 1.4031337139822069, + "grad_norm": 1.331030123703595, + "learning_rate": 4.4920772046480095e-06, + "loss": 0.2694614827632904, + "step": 5284 + }, + { + "epoch": 1.4033992829637498, + "grad_norm": 1.3958578164403037, + "learning_rate": 4.4884130169011565e-06, + "loss": 0.2160607874393463, + "step": 5285 + }, + { + "epoch": 1.4036648519452928, + "grad_norm": 1.4996515147203022, + "learning_rate": 4.48474989172327e-06, + "loss": 0.2556128203868866, + "step": 5286 + }, + { + "epoch": 1.4039304209268357, + "grad_norm": 1.2506403611380352, + "learning_rate": 4.481087829820558e-06, + "loss": 0.2251313328742981, + "step": 5287 + }, + { + "epoch": 1.4041959899083787, + "grad_norm": 1.380992563161254, + "learning_rate": 4.477426831899024e-06, + "loss": 0.26856666803359985, + "step": 5288 + }, + { + "epoch": 1.4044615588899216, + "grad_norm": 1.2429158128712894, + "learning_rate": 4.473766898664464e-06, + "loss": 0.25573840737342834, + "step": 5289 + }, + { + "epoch": 1.4047271278714646, + "grad_norm": 1.2559748496125192, + "learning_rate": 4.4701080308224685e-06, + "loss": 0.26519301533699036, + "step": 5290 + }, + { + "epoch": 1.4049926968530075, + "grad_norm": 1.5959863642176566, + "learning_rate": 4.466450229078427e-06, + "loss": 0.2329619824886322, + "step": 5291 + }, + { + "epoch": 1.4052582658345505, + "grad_norm": 1.208485124140325, + "learning_rate": 4.4627934941375185e-06, + "loss": 0.2243901491165161, + "step": 5292 + }, + { + "epoch": 1.4055238348160934, + "grad_norm": 1.2042065274178317, + "learning_rate": 4.45913782670472e-06, + "loss": 0.22516998648643494, + "step": 5293 + }, + { + "epoch": 1.4057894037976364, + "grad_norm": 1.2427926273641645, + "learning_rate": 4.455483227484796e-06, + "loss": 0.25573113560676575, + "step": 5294 + }, + { + "epoch": 1.4060549727791793, + "grad_norm": 1.3935629686917204, + "learning_rate": 4.451829697182317e-06, + "loss": 0.2568536698818207, + "step": 5295 + }, + { + "epoch": 1.4063205417607223, + "grad_norm": 1.293797792298673, + "learning_rate": 4.448177236501638e-06, + "loss": 0.24510663747787476, + "step": 5296 + }, + { + "epoch": 1.4065861107422652, + "grad_norm": 1.3445763390180965, + "learning_rate": 4.444525846146911e-06, + "loss": 0.24890470504760742, + "step": 5297 + }, + { + "epoch": 1.4068516797238082, + "grad_norm": 1.3096169257052843, + "learning_rate": 4.440875526822081e-06, + "loss": 0.21442994475364685, + "step": 5298 + }, + { + "epoch": 1.4071172487053512, + "grad_norm": 1.2628911672392604, + "learning_rate": 4.437226279230884e-06, + "loss": 0.24281370639801025, + "step": 5299 + }, + { + "epoch": 1.407382817686894, + "grad_norm": 1.2336479145010515, + "learning_rate": 4.433578104076853e-06, + "loss": 0.19542500376701355, + "step": 5300 + }, + { + "epoch": 1.407648386668437, + "grad_norm": 1.256359230599367, + "learning_rate": 4.429931002063315e-06, + "loss": 0.22688990831375122, + "step": 5301 + }, + { + "epoch": 1.40791395564998, + "grad_norm": 1.3692436485711592, + "learning_rate": 4.42628497389339e-06, + "loss": 0.2520858347415924, + "step": 5302 + }, + { + "epoch": 1.408179524631523, + "grad_norm": 1.1723697651028326, + "learning_rate": 4.42264002026998e-06, + "loss": 0.237991064786911, + "step": 5303 + }, + { + "epoch": 1.408445093613066, + "grad_norm": 1.1277997255078087, + "learning_rate": 4.418996141895797e-06, + "loss": 0.20164436101913452, + "step": 5304 + }, + { + "epoch": 1.408710662594609, + "grad_norm": 1.2657361694815492, + "learning_rate": 4.415353339473338e-06, + "loss": 0.24009189009666443, + "step": 5305 + }, + { + "epoch": 1.408976231576152, + "grad_norm": 1.138145945953283, + "learning_rate": 4.411711613704889e-06, + "loss": 0.23170322179794312, + "step": 5306 + }, + { + "epoch": 1.409241800557695, + "grad_norm": 1.2244077415708243, + "learning_rate": 4.408070965292534e-06, + "loss": 0.2280617356300354, + "step": 5307 + }, + { + "epoch": 1.409507369539238, + "grad_norm": 1.2724409466040383, + "learning_rate": 4.404431394938145e-06, + "loss": 0.21982887387275696, + "step": 5308 + }, + { + "epoch": 1.409772938520781, + "grad_norm": 1.265647410959733, + "learning_rate": 4.40079290334339e-06, + "loss": 0.25295430421829224, + "step": 5309 + }, + { + "epoch": 1.4100385075023238, + "grad_norm": 1.1099961782761754, + "learning_rate": 4.397155491209727e-06, + "loss": 0.20109041035175323, + "step": 5310 + }, + { + "epoch": 1.4103040764838668, + "grad_norm": 1.3436616824827443, + "learning_rate": 4.393519159238405e-06, + "loss": 0.2487715482711792, + "step": 5311 + }, + { + "epoch": 1.4105696454654097, + "grad_norm": 1.1475311486694626, + "learning_rate": 4.389883908130465e-06, + "loss": 0.2031790167093277, + "step": 5312 + }, + { + "epoch": 1.4108352144469527, + "grad_norm": 1.277969729475343, + "learning_rate": 4.386249738586744e-06, + "loss": 0.23029211163520813, + "step": 5313 + }, + { + "epoch": 1.4111007834284957, + "grad_norm": 1.2100830863469687, + "learning_rate": 4.382616651307866e-06, + "loss": 0.23080995678901672, + "step": 5314 + }, + { + "epoch": 1.4113663524100386, + "grad_norm": 1.2376227742095711, + "learning_rate": 4.378984646994248e-06, + "loss": 0.2450534999370575, + "step": 5315 + }, + { + "epoch": 1.4116319213915816, + "grad_norm": 1.266655148641824, + "learning_rate": 4.375353726346094e-06, + "loss": 0.24349799752235413, + "step": 5316 + }, + { + "epoch": 1.4118974903731245, + "grad_norm": 1.2696628766548714, + "learning_rate": 4.371723890063411e-06, + "loss": 0.2431599199771881, + "step": 5317 + }, + { + "epoch": 1.4121630593546675, + "grad_norm": 1.3688178233929764, + "learning_rate": 4.368095138845978e-06, + "loss": 0.2051251232624054, + "step": 5318 + }, + { + "epoch": 1.4124286283362104, + "grad_norm": 1.1726447102511934, + "learning_rate": 4.36446747339338e-06, + "loss": 0.21346575021743774, + "step": 5319 + }, + { + "epoch": 1.4126941973177534, + "grad_norm": 1.2726406383058895, + "learning_rate": 4.360840894404989e-06, + "loss": 0.22193217277526855, + "step": 5320 + }, + { + "epoch": 1.4129597662992963, + "grad_norm": 1.2762131056761095, + "learning_rate": 4.357215402579961e-06, + "loss": 0.2112501859664917, + "step": 5321 + }, + { + "epoch": 1.4132253352808393, + "grad_norm": 1.1864412536946314, + "learning_rate": 4.3535909986172565e-06, + "loss": 0.2648766040802002, + "step": 5322 + }, + { + "epoch": 1.4134909042623822, + "grad_norm": 1.1533413783243194, + "learning_rate": 4.349967683215614e-06, + "loss": 0.22139690816402435, + "step": 5323 + }, + { + "epoch": 1.4137564732439252, + "grad_norm": 1.0259028802936685, + "learning_rate": 4.346345457073568e-06, + "loss": 0.21558481454849243, + "step": 5324 + }, + { + "epoch": 1.4140220422254681, + "grad_norm": 1.2763949378052617, + "learning_rate": 4.342724320889438e-06, + "loss": 0.2013886272907257, + "step": 5325 + }, + { + "epoch": 1.414287611207011, + "grad_norm": 1.2216640015824227, + "learning_rate": 4.3391042753613375e-06, + "loss": 0.2428729385137558, + "step": 5326 + }, + { + "epoch": 1.414553180188554, + "grad_norm": 1.2385329501903242, + "learning_rate": 4.3354853211871696e-06, + "loss": 0.20930354297161102, + "step": 5327 + }, + { + "epoch": 1.414818749170097, + "grad_norm": 1.1373474530618315, + "learning_rate": 4.331867459064623e-06, + "loss": 0.18988853693008423, + "step": 5328 + }, + { + "epoch": 1.41508431815164, + "grad_norm": 1.2833653393491664, + "learning_rate": 4.328250689691182e-06, + "loss": 0.24618801474571228, + "step": 5329 + }, + { + "epoch": 1.4153498871331829, + "grad_norm": 1.2635824567099267, + "learning_rate": 4.324635013764113e-06, + "loss": 0.23857265710830688, + "step": 5330 + }, + { + "epoch": 1.4156154561147258, + "grad_norm": 1.3200622076177175, + "learning_rate": 4.321020431980483e-06, + "loss": 0.21869014203548431, + "step": 5331 + }, + { + "epoch": 1.4158810250962688, + "grad_norm": 1.2317649692424293, + "learning_rate": 4.317406945037138e-06, + "loss": 0.2508969008922577, + "step": 5332 + }, + { + "epoch": 1.4161465940778117, + "grad_norm": 1.2114692744130235, + "learning_rate": 4.313794553630711e-06, + "loss": 0.2406233549118042, + "step": 5333 + }, + { + "epoch": 1.4164121630593547, + "grad_norm": 1.3314396378070763, + "learning_rate": 4.310183258457632e-06, + "loss": 0.2376224398612976, + "step": 5334 + }, + { + "epoch": 1.4166777320408976, + "grad_norm": 1.4802475566731417, + "learning_rate": 4.306573060214115e-06, + "loss": 0.2818688750267029, + "step": 5335 + }, + { + "epoch": 1.4169433010224406, + "grad_norm": 1.2248721858463099, + "learning_rate": 4.302963959596165e-06, + "loss": 0.2279777228832245, + "step": 5336 + }, + { + "epoch": 1.4172088700039835, + "grad_norm": 1.3681495314955672, + "learning_rate": 4.299355957299573e-06, + "loss": 0.2652052640914917, + "step": 5337 + }, + { + "epoch": 1.4174744389855265, + "grad_norm": 1.2814638931564002, + "learning_rate": 4.2957490540199185e-06, + "loss": 0.24415750801563263, + "step": 5338 + }, + { + "epoch": 1.4177400079670694, + "grad_norm": 1.2028147011593575, + "learning_rate": 4.292143250452569e-06, + "loss": 0.2318287044763565, + "step": 5339 + }, + { + "epoch": 1.4180055769486124, + "grad_norm": 1.1621443407054215, + "learning_rate": 4.288538547292685e-06, + "loss": 0.19914361834526062, + "step": 5340 + }, + { + "epoch": 1.4182711459301554, + "grad_norm": 1.2533818722517012, + "learning_rate": 4.2849349452352095e-06, + "loss": 0.22550678253173828, + "step": 5341 + }, + { + "epoch": 1.4185367149116983, + "grad_norm": 1.3481328868952585, + "learning_rate": 4.281332444974874e-06, + "loss": 0.25001436471939087, + "step": 5342 + }, + { + "epoch": 1.4188022838932413, + "grad_norm": 1.2557895781680242, + "learning_rate": 4.277731047206197e-06, + "loss": 0.24873407185077667, + "step": 5343 + }, + { + "epoch": 1.4190678528747842, + "grad_norm": 1.2532145662207181, + "learning_rate": 4.274130752623487e-06, + "loss": 0.25732600688934326, + "step": 5344 + }, + { + "epoch": 1.4193334218563272, + "grad_norm": 1.1956499236331526, + "learning_rate": 4.270531561920836e-06, + "loss": 0.1894054263830185, + "step": 5345 + }, + { + "epoch": 1.4195989908378701, + "grad_norm": 1.2861805940078326, + "learning_rate": 4.2669334757921284e-06, + "loss": 0.2632025480270386, + "step": 5346 + }, + { + "epoch": 1.419864559819413, + "grad_norm": 1.1223708980675566, + "learning_rate": 4.2633364949310315e-06, + "loss": 0.22106415033340454, + "step": 5347 + }, + { + "epoch": 1.420130128800956, + "grad_norm": 1.2191554963858982, + "learning_rate": 4.259740620031e-06, + "loss": 0.2246699184179306, + "step": 5348 + }, + { + "epoch": 1.420395697782499, + "grad_norm": 1.2377251567235985, + "learning_rate": 4.256145851785277e-06, + "loss": 0.2335890382528305, + "step": 5349 + }, + { + "epoch": 1.420661266764042, + "grad_norm": 1.3200881727026734, + "learning_rate": 4.252552190886892e-06, + "loss": 0.25485220551490784, + "step": 5350 + }, + { + "epoch": 1.4209268357455849, + "grad_norm": 1.406483107573335, + "learning_rate": 4.248959638028659e-06, + "loss": 0.26234719157218933, + "step": 5351 + }, + { + "epoch": 1.4211924047271278, + "grad_norm": 1.1946878328095272, + "learning_rate": 4.245368193903181e-06, + "loss": 0.22083795070648193, + "step": 5352 + }, + { + "epoch": 1.4214579737086708, + "grad_norm": 1.288602079194267, + "learning_rate": 4.241777859202846e-06, + "loss": 0.1886332929134369, + "step": 5353 + }, + { + "epoch": 1.4217235426902137, + "grad_norm": 1.506700165302322, + "learning_rate": 4.238188634619826e-06, + "loss": 0.26154160499572754, + "step": 5354 + }, + { + "epoch": 1.4219891116717567, + "grad_norm": 1.1472960297751262, + "learning_rate": 4.234600520846085e-06, + "loss": 0.24761158227920532, + "step": 5355 + }, + { + "epoch": 1.4222546806532996, + "grad_norm": 1.154393443673505, + "learning_rate": 4.2310135185733625e-06, + "loss": 0.20936736464500427, + "step": 5356 + }, + { + "epoch": 1.4225202496348426, + "grad_norm": 1.15600424022186, + "learning_rate": 4.227427628493198e-06, + "loss": 0.2173127979040146, + "step": 5357 + }, + { + "epoch": 1.4227858186163855, + "grad_norm": 1.217414245555098, + "learning_rate": 4.223842851296907e-06, + "loss": 0.2598559260368347, + "step": 5358 + }, + { + "epoch": 1.4230513875979285, + "grad_norm": 1.224021391863692, + "learning_rate": 4.22025918767559e-06, + "loss": 0.23701196908950806, + "step": 5359 + }, + { + "epoch": 1.4233169565794714, + "grad_norm": 1.2134140712383175, + "learning_rate": 4.216676638320135e-06, + "loss": 0.26052403450012207, + "step": 5360 + }, + { + "epoch": 1.4235825255610144, + "grad_norm": 1.2465682642545985, + "learning_rate": 4.213095203921217e-06, + "loss": 0.2464584857225418, + "step": 5361 + }, + { + "epoch": 1.4238480945425573, + "grad_norm": 1.2646547527576821, + "learning_rate": 4.209514885169294e-06, + "loss": 0.25889426469802856, + "step": 5362 + }, + { + "epoch": 1.4241136635241003, + "grad_norm": 1.2990812156107416, + "learning_rate": 4.2059356827546076e-06, + "loss": 0.26529380679130554, + "step": 5363 + }, + { + "epoch": 1.4243792325056432, + "grad_norm": 1.1509506747022789, + "learning_rate": 4.202357597367187e-06, + "loss": 0.2284630388021469, + "step": 5364 + }, + { + "epoch": 1.4246448014871862, + "grad_norm": 1.1509689814009059, + "learning_rate": 4.198780629696845e-06, + "loss": 0.2361873984336853, + "step": 5365 + }, + { + "epoch": 1.4249103704687291, + "grad_norm": 1.2489364054166838, + "learning_rate": 4.195204780433179e-06, + "loss": 0.2473624348640442, + "step": 5366 + }, + { + "epoch": 1.425175939450272, + "grad_norm": 1.2584581044476912, + "learning_rate": 4.19163005026557e-06, + "loss": 0.24852773547172546, + "step": 5367 + }, + { + "epoch": 1.425441508431815, + "grad_norm": 1.413523972125062, + "learning_rate": 4.188056439883183e-06, + "loss": 0.28409647941589355, + "step": 5368 + }, + { + "epoch": 1.425707077413358, + "grad_norm": 1.2672381227374172, + "learning_rate": 4.18448394997497e-06, + "loss": 0.2500985562801361, + "step": 5369 + }, + { + "epoch": 1.425972646394901, + "grad_norm": 1.2421534737421158, + "learning_rate": 4.1809125812296635e-06, + "loss": 0.23475977778434753, + "step": 5370 + }, + { + "epoch": 1.426238215376444, + "grad_norm": 1.3107626948919207, + "learning_rate": 4.177342334335782e-06, + "loss": 0.22925345599651337, + "step": 5371 + }, + { + "epoch": 1.4265037843579869, + "grad_norm": 1.1701714137905739, + "learning_rate": 4.173773209981627e-06, + "loss": 0.24463894963264465, + "step": 5372 + }, + { + "epoch": 1.4267693533395298, + "grad_norm": 1.2600839330793319, + "learning_rate": 4.170205208855281e-06, + "loss": 0.2451590746641159, + "step": 5373 + }, + { + "epoch": 1.4270349223210728, + "grad_norm": 1.192456234510782, + "learning_rate": 4.166638331644613e-06, + "loss": 0.21078437566757202, + "step": 5374 + }, + { + "epoch": 1.427300491302616, + "grad_norm": 1.1548728286132999, + "learning_rate": 4.163072579037279e-06, + "loss": 0.21466529369354248, + "step": 5375 + }, + { + "epoch": 1.4275660602841589, + "grad_norm": 1.3327200015078104, + "learning_rate": 4.159507951720713e-06, + "loss": 0.20103147625923157, + "step": 5376 + }, + { + "epoch": 1.4278316292657018, + "grad_norm": 1.2634022835060015, + "learning_rate": 4.15594445038213e-06, + "loss": 0.2618871331214905, + "step": 5377 + }, + { + "epoch": 1.4280971982472448, + "grad_norm": 1.314150540124243, + "learning_rate": 4.152382075708534e-06, + "loss": 0.2496388852596283, + "step": 5378 + }, + { + "epoch": 1.4283627672287877, + "grad_norm": 1.2776066314767451, + "learning_rate": 4.148820828386707e-06, + "loss": 0.2663899064064026, + "step": 5379 + }, + { + "epoch": 1.4286283362103307, + "grad_norm": 1.223751737565641, + "learning_rate": 4.145260709103216e-06, + "loss": 0.23617541790008545, + "step": 5380 + }, + { + "epoch": 1.4288939051918736, + "grad_norm": 1.2184450229688006, + "learning_rate": 4.141701718544411e-06, + "loss": 0.200006365776062, + "step": 5381 + }, + { + "epoch": 1.4291594741734166, + "grad_norm": 1.2899877428495155, + "learning_rate": 4.138143857396425e-06, + "loss": 0.22707203030586243, + "step": 5382 + }, + { + "epoch": 1.4294250431549596, + "grad_norm": 1.210998695531734, + "learning_rate": 4.134587126345162e-06, + "loss": 0.23903624713420868, + "step": 5383 + }, + { + "epoch": 1.4296906121365025, + "grad_norm": 1.56990305006701, + "learning_rate": 4.131031526076329e-06, + "loss": 0.2308908998966217, + "step": 5384 + }, + { + "epoch": 1.4299561811180455, + "grad_norm": 1.2125776866133393, + "learning_rate": 4.127477057275398e-06, + "loss": 0.18762601912021637, + "step": 5385 + }, + { + "epoch": 1.4302217500995884, + "grad_norm": 1.3670823879917342, + "learning_rate": 4.123923720627633e-06, + "loss": 0.281406044960022, + "step": 5386 + }, + { + "epoch": 1.4304873190811314, + "grad_norm": 1.24677960623226, + "learning_rate": 4.120371516818071e-06, + "loss": 0.24858589470386505, + "step": 5387 + }, + { + "epoch": 1.4307528880626743, + "grad_norm": 1.2017896897650255, + "learning_rate": 4.116820446531538e-06, + "loss": 0.22179371118545532, + "step": 5388 + }, + { + "epoch": 1.4310184570442173, + "grad_norm": 1.1523445225939053, + "learning_rate": 4.113270510452636e-06, + "loss": 0.22086869180202484, + "step": 5389 + }, + { + "epoch": 1.4312840260257602, + "grad_norm": 1.295626323300653, + "learning_rate": 4.109721709265753e-06, + "loss": 0.231503427028656, + "step": 5390 + }, + { + "epoch": 1.4315495950073032, + "grad_norm": 1.31237620612278, + "learning_rate": 4.106174043655054e-06, + "loss": 0.255252867937088, + "step": 5391 + }, + { + "epoch": 1.4318151639888461, + "grad_norm": 1.2773394357808008, + "learning_rate": 4.1026275143044854e-06, + "loss": 0.23336587846279144, + "step": 5392 + }, + { + "epoch": 1.432080732970389, + "grad_norm": 1.3267952754600625, + "learning_rate": 4.099082121897783e-06, + "loss": 0.2468583881855011, + "step": 5393 + }, + { + "epoch": 1.432346301951932, + "grad_norm": 1.2137255679394872, + "learning_rate": 4.095537867118452e-06, + "loss": 0.21211153268814087, + "step": 5394 + }, + { + "epoch": 1.432611870933475, + "grad_norm": 1.2552061461264346, + "learning_rate": 4.091994750649783e-06, + "loss": 0.23173204064369202, + "step": 5395 + }, + { + "epoch": 1.432877439915018, + "grad_norm": 1.2420339991667666, + "learning_rate": 4.088452773174853e-06, + "loss": 0.2606658935546875, + "step": 5396 + }, + { + "epoch": 1.4331430088965609, + "grad_norm": 1.2141954954044303, + "learning_rate": 4.084911935376502e-06, + "loss": 0.21198314428329468, + "step": 5397 + }, + { + "epoch": 1.4334085778781038, + "grad_norm": 1.273859413406427, + "learning_rate": 4.08137223793737e-06, + "loss": 0.216193288564682, + "step": 5398 + }, + { + "epoch": 1.4336741468596468, + "grad_norm": 1.3862686522767422, + "learning_rate": 4.077833681539866e-06, + "loss": 0.27767330408096313, + "step": 5399 + }, + { + "epoch": 1.4339397158411897, + "grad_norm": 1.193043888736233, + "learning_rate": 4.0742962668661826e-06, + "loss": 0.21584349870681763, + "step": 5400 + }, + { + "epoch": 1.4342052848227327, + "grad_norm": 1.2801175216615184, + "learning_rate": 4.070759994598288e-06, + "loss": 0.220070481300354, + "step": 5401 + }, + { + "epoch": 1.4344708538042756, + "grad_norm": 1.4276288870785, + "learning_rate": 4.067224865417941e-06, + "loss": 0.26035353541374207, + "step": 5402 + }, + { + "epoch": 1.4347364227858186, + "grad_norm": 1.1784144309393945, + "learning_rate": 4.063690880006671e-06, + "loss": 0.23704876005649567, + "step": 5403 + }, + { + "epoch": 1.4350019917673615, + "grad_norm": 1.2793709287846655, + "learning_rate": 4.060158039045785e-06, + "loss": 0.2345760464668274, + "step": 5404 + }, + { + "epoch": 1.4352675607489045, + "grad_norm": 1.2583985201804126, + "learning_rate": 4.056626343216377e-06, + "loss": 0.21307331323623657, + "step": 5405 + }, + { + "epoch": 1.4355331297304474, + "grad_norm": 1.2401804894465362, + "learning_rate": 4.053095793199313e-06, + "loss": 0.22029465436935425, + "step": 5406 + }, + { + "epoch": 1.4357986987119904, + "grad_norm": 1.3865770800537958, + "learning_rate": 4.049566389675244e-06, + "loss": 0.23419252038002014, + "step": 5407 + }, + { + "epoch": 1.4360642676935333, + "grad_norm": 1.2114754283066453, + "learning_rate": 4.046038133324595e-06, + "loss": 0.21648669242858887, + "step": 5408 + }, + { + "epoch": 1.4363298366750763, + "grad_norm": 1.3682353450989566, + "learning_rate": 4.042511024827573e-06, + "loss": 0.2343464195728302, + "step": 5409 + }, + { + "epoch": 1.4365954056566193, + "grad_norm": 1.28417678054491, + "learning_rate": 4.0389850648641615e-06, + "loss": 0.20108605921268463, + "step": 5410 + }, + { + "epoch": 1.4368609746381622, + "grad_norm": 1.2806759093192033, + "learning_rate": 4.0354602541141315e-06, + "loss": 0.21885806322097778, + "step": 5411 + }, + { + "epoch": 1.4371265436197052, + "grad_norm": 1.276580988371958, + "learning_rate": 4.031936593257017e-06, + "loss": 0.2382376492023468, + "step": 5412 + }, + { + "epoch": 1.437392112601248, + "grad_norm": 1.1333519329501958, + "learning_rate": 4.028414082972141e-06, + "loss": 0.21434128284454346, + "step": 5413 + }, + { + "epoch": 1.437657681582791, + "grad_norm": 1.2161992893188567, + "learning_rate": 4.024892723938601e-06, + "loss": 0.2345191240310669, + "step": 5414 + }, + { + "epoch": 1.437923250564334, + "grad_norm": 1.309666461481554, + "learning_rate": 4.021372516835273e-06, + "loss": 0.2478899210691452, + "step": 5415 + }, + { + "epoch": 1.438188819545877, + "grad_norm": 1.2593045594203824, + "learning_rate": 4.017853462340813e-06, + "loss": 0.21356827020645142, + "step": 5416 + }, + { + "epoch": 1.4384543885274201, + "grad_norm": 1.3891493537034765, + "learning_rate": 4.014335561133652e-06, + "loss": 0.26329827308654785, + "step": 5417 + }, + { + "epoch": 1.438719957508963, + "grad_norm": 1.3689872343615141, + "learning_rate": 4.010818813892e-06, + "loss": 0.25880998373031616, + "step": 5418 + }, + { + "epoch": 1.438985526490506, + "grad_norm": 1.2738388972586026, + "learning_rate": 4.007303221293844e-06, + "loss": 0.22749441862106323, + "step": 5419 + }, + { + "epoch": 1.439251095472049, + "grad_norm": 1.2267331489472144, + "learning_rate": 4.00378878401695e-06, + "loss": 0.2242615520954132, + "step": 5420 + }, + { + "epoch": 1.439516664453592, + "grad_norm": 1.168704950265394, + "learning_rate": 4.000275502738862e-06, + "loss": 0.19751839339733124, + "step": 5421 + }, + { + "epoch": 1.439782233435135, + "grad_norm": 1.4000090999513362, + "learning_rate": 3.996763378136895e-06, + "loss": 0.27319905161857605, + "step": 5422 + }, + { + "epoch": 1.4400478024166778, + "grad_norm": 1.1483039760635705, + "learning_rate": 3.993252410888149e-06, + "loss": 0.21676769852638245, + "step": 5423 + }, + { + "epoch": 1.4403133713982208, + "grad_norm": 1.222649759682682, + "learning_rate": 3.989742601669494e-06, + "loss": 0.22788718342781067, + "step": 5424 + }, + { + "epoch": 1.4405789403797638, + "grad_norm": 1.1800102666876688, + "learning_rate": 3.986233951157581e-06, + "loss": 0.23224875330924988, + "step": 5425 + }, + { + "epoch": 1.4408445093613067, + "grad_norm": 1.3242271211713557, + "learning_rate": 3.982726460028836e-06, + "loss": 0.23625247180461884, + "step": 5426 + }, + { + "epoch": 1.4411100783428497, + "grad_norm": 1.237043381628487, + "learning_rate": 3.979220128959463e-06, + "loss": 0.2092093527317047, + "step": 5427 + }, + { + "epoch": 1.4413756473243926, + "grad_norm": 1.164989095324882, + "learning_rate": 3.975714958625442e-06, + "loss": 0.22196070849895477, + "step": 5428 + }, + { + "epoch": 1.4416412163059356, + "grad_norm": 1.248575755705502, + "learning_rate": 3.972210949702525e-06, + "loss": 0.21276375651359558, + "step": 5429 + }, + { + "epoch": 1.4419067852874785, + "grad_norm": 1.2714203744447936, + "learning_rate": 3.968708102866247e-06, + "loss": 0.22150103747844696, + "step": 5430 + }, + { + "epoch": 1.4421723542690215, + "grad_norm": 1.2519929176778726, + "learning_rate": 3.965206418791914e-06, + "loss": 0.24529573321342468, + "step": 5431 + }, + { + "epoch": 1.4424379232505644, + "grad_norm": 1.3331662749929607, + "learning_rate": 3.961705898154609e-06, + "loss": 0.24349135160446167, + "step": 5432 + }, + { + "epoch": 1.4427034922321074, + "grad_norm": 1.3094668545917496, + "learning_rate": 3.9582065416291926e-06, + "loss": 0.23481428623199463, + "step": 5433 + }, + { + "epoch": 1.4429690612136503, + "grad_norm": 1.2664431166747565, + "learning_rate": 3.954708349890299e-06, + "loss": 0.2366936057806015, + "step": 5434 + }, + { + "epoch": 1.4432346301951933, + "grad_norm": 1.2699903819491114, + "learning_rate": 3.951211323612336e-06, + "loss": 0.24792322516441345, + "step": 5435 + }, + { + "epoch": 1.4435001991767362, + "grad_norm": 1.1943208090894295, + "learning_rate": 3.947715463469493e-06, + "loss": 0.22601652145385742, + "step": 5436 + }, + { + "epoch": 1.4437657681582792, + "grad_norm": 1.1333130191791405, + "learning_rate": 3.9442207701357235e-06, + "loss": 0.19603165984153748, + "step": 5437 + }, + { + "epoch": 1.4440313371398221, + "grad_norm": 1.26512939224431, + "learning_rate": 3.940727244284772e-06, + "loss": 0.22619353234767914, + "step": 5438 + }, + { + "epoch": 1.444296906121365, + "grad_norm": 1.3207139711857465, + "learning_rate": 3.937234886590146e-06, + "loss": 0.24836638569831848, + "step": 5439 + }, + { + "epoch": 1.444562475102908, + "grad_norm": 1.2114237797025103, + "learning_rate": 3.933743697725129e-06, + "loss": 0.21585768461227417, + "step": 5440 + }, + { + "epoch": 1.444828044084451, + "grad_norm": 1.2037953387653635, + "learning_rate": 3.930253678362784e-06, + "loss": 0.20876167714595795, + "step": 5441 + }, + { + "epoch": 1.445093613065994, + "grad_norm": 1.2825218153573943, + "learning_rate": 3.926764829175943e-06, + "loss": 0.24337999522686005, + "step": 5442 + }, + { + "epoch": 1.4453591820475369, + "grad_norm": 1.2238662957767994, + "learning_rate": 3.9232771508372155e-06, + "loss": 0.2511219084262848, + "step": 5443 + }, + { + "epoch": 1.4456247510290798, + "grad_norm": 1.2796769482653771, + "learning_rate": 3.919790644018986e-06, + "loss": 0.26257213950157166, + "step": 5444 + }, + { + "epoch": 1.4458903200106228, + "grad_norm": 1.3570371082898334, + "learning_rate": 3.91630530939341e-06, + "loss": 0.2720959782600403, + "step": 5445 + }, + { + "epoch": 1.4461558889921657, + "grad_norm": 1.2897968589877258, + "learning_rate": 3.912821147632421e-06, + "loss": 0.23849177360534668, + "step": 5446 + }, + { + "epoch": 1.4464214579737087, + "grad_norm": 1.2539273982781811, + "learning_rate": 3.909338159407722e-06, + "loss": 0.2366214245557785, + "step": 5447 + }, + { + "epoch": 1.4466870269552516, + "grad_norm": 1.21348130376658, + "learning_rate": 3.905856345390793e-06, + "loss": 0.21905584633350372, + "step": 5448 + }, + { + "epoch": 1.4469525959367946, + "grad_norm": 1.3001423574977207, + "learning_rate": 3.902375706252887e-06, + "loss": 0.23964065313339233, + "step": 5449 + }, + { + "epoch": 1.4472181649183375, + "grad_norm": 1.2161208716702177, + "learning_rate": 3.89889624266503e-06, + "loss": 0.22246500849723816, + "step": 5450 + }, + { + "epoch": 1.4474837338998805, + "grad_norm": 1.2845367508241097, + "learning_rate": 3.895417955298022e-06, + "loss": 0.22980710864067078, + "step": 5451 + }, + { + "epoch": 1.4477493028814234, + "grad_norm": 1.4690832477509688, + "learning_rate": 3.8919408448224346e-06, + "loss": 0.21276253461837769, + "step": 5452 + }, + { + "epoch": 1.4480148718629664, + "grad_norm": 1.3515036942552143, + "learning_rate": 3.888464911908616e-06, + "loss": 0.23925542831420898, + "step": 5453 + }, + { + "epoch": 1.4482804408445094, + "grad_norm": 1.1871457723177183, + "learning_rate": 3.884990157226683e-06, + "loss": 0.21528369188308716, + "step": 5454 + }, + { + "epoch": 1.4485460098260523, + "grad_norm": 1.2673056278722348, + "learning_rate": 3.8815165814465235e-06, + "loss": 0.24563542008399963, + "step": 5455 + }, + { + "epoch": 1.4488115788075953, + "grad_norm": 1.2561210989748839, + "learning_rate": 3.87804418523781e-06, + "loss": 0.2721150517463684, + "step": 5456 + }, + { + "epoch": 1.4490771477891382, + "grad_norm": 1.3721328159682122, + "learning_rate": 3.874572969269976e-06, + "loss": 0.23716527223587036, + "step": 5457 + }, + { + "epoch": 1.4493427167706812, + "grad_norm": 1.5185790933002854, + "learning_rate": 3.871102934212231e-06, + "loss": 0.2182254046201706, + "step": 5458 + }, + { + "epoch": 1.4496082857522241, + "grad_norm": 1.233204842662738, + "learning_rate": 3.867634080733557e-06, + "loss": 0.2179020643234253, + "step": 5459 + }, + { + "epoch": 1.449873854733767, + "grad_norm": 1.2633976965193632, + "learning_rate": 3.864166409502706e-06, + "loss": 0.22901684045791626, + "step": 5460 + }, + { + "epoch": 1.45013942371531, + "grad_norm": 1.209132482684757, + "learning_rate": 3.860699921188211e-06, + "loss": 0.2287352979183197, + "step": 5461 + }, + { + "epoch": 1.450404992696853, + "grad_norm": 1.214494370780124, + "learning_rate": 3.85723461645836e-06, + "loss": 0.2448873668909073, + "step": 5462 + }, + { + "epoch": 1.450670561678396, + "grad_norm": 1.323933009108344, + "learning_rate": 3.85377049598123e-06, + "loss": 0.2693510055541992, + "step": 5463 + }, + { + "epoch": 1.4509361306599389, + "grad_norm": 1.1826355120377283, + "learning_rate": 3.8503075604246554e-06, + "loss": 0.25414884090423584, + "step": 5464 + }, + { + "epoch": 1.4512016996414818, + "grad_norm": 1.3400776704302024, + "learning_rate": 3.846845810456258e-06, + "loss": 0.27798837423324585, + "step": 5465 + }, + { + "epoch": 1.4514672686230248, + "grad_norm": 1.3109571985733361, + "learning_rate": 3.8433852467434175e-06, + "loss": 0.23348593711853027, + "step": 5466 + }, + { + "epoch": 1.4517328376045677, + "grad_norm": 1.148921292979252, + "learning_rate": 3.839925869953292e-06, + "loss": 0.20993635058403015, + "step": 5467 + }, + { + "epoch": 1.4519984065861107, + "grad_norm": 1.1967150813107374, + "learning_rate": 3.836467680752808e-06, + "loss": 0.225263774394989, + "step": 5468 + }, + { + "epoch": 1.4522639755676536, + "grad_norm": 4.549069881323283, + "learning_rate": 3.833010679808662e-06, + "loss": 0.2481595277786255, + "step": 5469 + }, + { + "epoch": 1.4525295445491966, + "grad_norm": 1.098861894900169, + "learning_rate": 3.829554867787324e-06, + "loss": 0.20755310356616974, + "step": 5470 + }, + { + "epoch": 1.4527951135307395, + "grad_norm": 1.3031978879220207, + "learning_rate": 3.826100245355034e-06, + "loss": 0.22124455869197845, + "step": 5471 + }, + { + "epoch": 1.4530606825122825, + "grad_norm": 1.1779333046553406, + "learning_rate": 3.822646813177803e-06, + "loss": 0.23461398482322693, + "step": 5472 + }, + { + "epoch": 1.4533262514938254, + "grad_norm": 1.123494857736561, + "learning_rate": 3.819194571921407e-06, + "loss": 0.22890526056289673, + "step": 5473 + }, + { + "epoch": 1.4535918204753684, + "grad_norm": 1.1163449125196687, + "learning_rate": 3.815743522251406e-06, + "loss": 0.23236533999443054, + "step": 5474 + }, + { + "epoch": 1.4538573894569113, + "grad_norm": 1.204733497516731, + "learning_rate": 3.8122936648331164e-06, + "loss": 0.2192365825176239, + "step": 5475 + }, + { + "epoch": 1.4541229584384543, + "grad_norm": 1.3061324350348682, + "learning_rate": 3.8088450003316346e-06, + "loss": 0.23970162868499756, + "step": 5476 + }, + { + "epoch": 1.4543885274199972, + "grad_norm": 1.256131451943752, + "learning_rate": 3.8053975294118163e-06, + "loss": 0.24270984530448914, + "step": 5477 + }, + { + "epoch": 1.4546540964015402, + "grad_norm": 1.1616491435133687, + "learning_rate": 3.801951252738295e-06, + "loss": 0.22228944301605225, + "step": 5478 + }, + { + "epoch": 1.4549196653830831, + "grad_norm": 1.2998939083384287, + "learning_rate": 3.7985061709754735e-06, + "loss": 0.25029584765434265, + "step": 5479 + }, + { + "epoch": 1.455185234364626, + "grad_norm": 1.1546196330858232, + "learning_rate": 3.795062284787522e-06, + "loss": 0.23831725120544434, + "step": 5480 + }, + { + "epoch": 1.455450803346169, + "grad_norm": 1.2698177511587796, + "learning_rate": 3.7916195948383817e-06, + "loss": 0.2571605145931244, + "step": 5481 + }, + { + "epoch": 1.455716372327712, + "grad_norm": 1.4321109332673951, + "learning_rate": 3.7881781017917586e-06, + "loss": 0.2660857141017914, + "step": 5482 + }, + { + "epoch": 1.455981941309255, + "grad_norm": 1.3406733437493707, + "learning_rate": 3.7847378063111394e-06, + "loss": 0.2468302845954895, + "step": 5483 + }, + { + "epoch": 1.456247510290798, + "grad_norm": 1.363296358111954, + "learning_rate": 3.7812987090597696e-06, + "loss": 0.2559482753276825, + "step": 5484 + }, + { + "epoch": 1.4565130792723409, + "grad_norm": 1.2144737578388247, + "learning_rate": 3.7778608107006654e-06, + "loss": 0.24484393000602722, + "step": 5485 + }, + { + "epoch": 1.4567786482538838, + "grad_norm": 1.1782087302857855, + "learning_rate": 3.774424111896614e-06, + "loss": 0.2376541644334793, + "step": 5486 + }, + { + "epoch": 1.4570442172354268, + "grad_norm": 1.1748479481028287, + "learning_rate": 3.770988613310169e-06, + "loss": 0.22265875339508057, + "step": 5487 + }, + { + "epoch": 1.45730978621697, + "grad_norm": 1.2316185421612622, + "learning_rate": 3.7675543156036555e-06, + "loss": 0.2511552572250366, + "step": 5488 + }, + { + "epoch": 1.457575355198513, + "grad_norm": 1.2601957381413438, + "learning_rate": 3.764121219439165e-06, + "loss": 0.2412843108177185, + "step": 5489 + }, + { + "epoch": 1.4578409241800558, + "grad_norm": 1.2622123015546969, + "learning_rate": 3.760689325478559e-06, + "loss": 0.26342809200286865, + "step": 5490 + }, + { + "epoch": 1.4581064931615988, + "grad_norm": 1.2994089172948287, + "learning_rate": 3.7572586343834638e-06, + "loss": 0.23315641283988953, + "step": 5491 + }, + { + "epoch": 1.4583720621431417, + "grad_norm": 1.0927170518216454, + "learning_rate": 3.753829146815279e-06, + "loss": 0.24148929119110107, + "step": 5492 + }, + { + "epoch": 1.4586376311246847, + "grad_norm": 1.363697618202234, + "learning_rate": 3.750400863435166e-06, + "loss": 0.22838115692138672, + "step": 5493 + }, + { + "epoch": 1.4589032001062276, + "grad_norm": 1.2083898158968958, + "learning_rate": 3.746973784904061e-06, + "loss": 0.21669608354568481, + "step": 5494 + }, + { + "epoch": 1.4591687690877706, + "grad_norm": 1.4819576271076944, + "learning_rate": 3.743547911882662e-06, + "loss": 0.25619322061538696, + "step": 5495 + }, + { + "epoch": 1.4594343380693136, + "grad_norm": 1.2058542987095502, + "learning_rate": 3.7401232450314384e-06, + "loss": 0.23629480600357056, + "step": 5496 + }, + { + "epoch": 1.4596999070508565, + "grad_norm": 1.189438722154431, + "learning_rate": 3.7366997850106245e-06, + "loss": 0.21799582242965698, + "step": 5497 + }, + { + "epoch": 1.4599654760323995, + "grad_norm": 1.372571579127378, + "learning_rate": 3.733277532480223e-06, + "loss": 0.2582590579986572, + "step": 5498 + }, + { + "epoch": 1.4602310450139424, + "grad_norm": 1.1675281771435806, + "learning_rate": 3.729856488100003e-06, + "loss": 0.23641736805438995, + "step": 5499 + }, + { + "epoch": 1.4604966139954854, + "grad_norm": 1.3024331747300109, + "learning_rate": 3.7264366525295e-06, + "loss": 0.24150417745113373, + "step": 5500 + }, + { + "epoch": 1.4607621829770283, + "grad_norm": 1.2012687985267718, + "learning_rate": 3.7230180264280245e-06, + "loss": 0.2474009394645691, + "step": 5501 + }, + { + "epoch": 1.4610277519585713, + "grad_norm": 1.3411668359609863, + "learning_rate": 3.7196006104546435e-06, + "loss": 0.269604355096817, + "step": 5502 + }, + { + "epoch": 1.4612933209401142, + "grad_norm": 1.3014753471077654, + "learning_rate": 3.716184405268194e-06, + "loss": 0.24324679374694824, + "step": 5503 + }, + { + "epoch": 1.4615588899216572, + "grad_norm": 1.1306865007600708, + "learning_rate": 3.7127694115272805e-06, + "loss": 0.2249709963798523, + "step": 5504 + }, + { + "epoch": 1.4618244589032001, + "grad_norm": 1.2915165646779034, + "learning_rate": 3.7093556298902734e-06, + "loss": 0.2560918629169464, + "step": 5505 + }, + { + "epoch": 1.462090027884743, + "grad_norm": 1.154084739271703, + "learning_rate": 3.705943061015309e-06, + "loss": 0.22693020105361938, + "step": 5506 + }, + { + "epoch": 1.462355596866286, + "grad_norm": 1.2640727525169442, + "learning_rate": 3.702531705560292e-06, + "loss": 0.2617371678352356, + "step": 5507 + }, + { + "epoch": 1.462621165847829, + "grad_norm": 1.2561844307954502, + "learning_rate": 3.6991215641828903e-06, + "loss": 0.2314397394657135, + "step": 5508 + }, + { + "epoch": 1.462886734829372, + "grad_norm": 1.1063207547372251, + "learning_rate": 3.6957126375405383e-06, + "loss": 0.23186162114143372, + "step": 5509 + }, + { + "epoch": 1.4631523038109149, + "grad_norm": 1.2602306615156422, + "learning_rate": 3.6923049262904375e-06, + "loss": 0.21775083243846893, + "step": 5510 + }, + { + "epoch": 1.4634178727924578, + "grad_norm": 1.2619669881473867, + "learning_rate": 3.688898431089556e-06, + "loss": 0.24707889556884766, + "step": 5511 + }, + { + "epoch": 1.4636834417740008, + "grad_norm": 1.0923805026421214, + "learning_rate": 3.6854931525946237e-06, + "loss": 0.1941150575876236, + "step": 5512 + }, + { + "epoch": 1.4639490107555437, + "grad_norm": 1.0123090946182933, + "learning_rate": 3.6820890914621376e-06, + "loss": 0.17808857560157776, + "step": 5513 + }, + { + "epoch": 1.4642145797370867, + "grad_norm": 1.2139965705715394, + "learning_rate": 3.678686248348363e-06, + "loss": 0.2150077074766159, + "step": 5514 + }, + { + "epoch": 1.4644801487186296, + "grad_norm": 1.4267562521267494, + "learning_rate": 3.6752846239093276e-06, + "loss": 0.2605292797088623, + "step": 5515 + }, + { + "epoch": 1.4647457177001726, + "grad_norm": 1.202920213288267, + "learning_rate": 3.671884218800822e-06, + "loss": 0.22481867671012878, + "step": 5516 + }, + { + "epoch": 1.4650112866817155, + "grad_norm": 5.588780783186036, + "learning_rate": 3.668485033678406e-06, + "loss": 0.24453294277191162, + "step": 5517 + }, + { + "epoch": 1.4652768556632585, + "grad_norm": 1.379432138271627, + "learning_rate": 3.6650870691973996e-06, + "loss": 0.2672286033630371, + "step": 5518 + }, + { + "epoch": 1.4655424246448014, + "grad_norm": 1.2625747265975353, + "learning_rate": 3.661690326012897e-06, + "loss": 0.2514987587928772, + "step": 5519 + }, + { + "epoch": 1.4658079936263444, + "grad_norm": 1.3337549906693908, + "learning_rate": 3.6582948047797438e-06, + "loss": 0.25671514868736267, + "step": 5520 + }, + { + "epoch": 1.4660735626078873, + "grad_norm": 1.3535247420304835, + "learning_rate": 3.654900506152561e-06, + "loss": 0.25485602021217346, + "step": 5521 + }, + { + "epoch": 1.4663391315894303, + "grad_norm": 1.1813027271086827, + "learning_rate": 3.6515074307857257e-06, + "loss": 0.23556292057037354, + "step": 5522 + }, + { + "epoch": 1.4666047005709733, + "grad_norm": 1.15604598759747, + "learning_rate": 3.6481155793333855e-06, + "loss": 0.23347696661949158, + "step": 5523 + }, + { + "epoch": 1.4668702695525162, + "grad_norm": 1.218328581124676, + "learning_rate": 3.6447249524494466e-06, + "loss": 0.2405884712934494, + "step": 5524 + }, + { + "epoch": 1.4671358385340592, + "grad_norm": 1.2423110513745568, + "learning_rate": 3.6413355507875845e-06, + "loss": 0.23668336868286133, + "step": 5525 + }, + { + "epoch": 1.467401407515602, + "grad_norm": 1.207526661238473, + "learning_rate": 3.6379473750012375e-06, + "loss": 0.25534945726394653, + "step": 5526 + }, + { + "epoch": 1.467666976497145, + "grad_norm": 1.267472887202726, + "learning_rate": 3.634560425743596e-06, + "loss": 0.22227410972118378, + "step": 5527 + }, + { + "epoch": 1.467932545478688, + "grad_norm": 1.4853214348875312, + "learning_rate": 3.631174703667636e-06, + "loss": 0.23395927250385284, + "step": 5528 + }, + { + "epoch": 1.468198114460231, + "grad_norm": 1.2396534638298151, + "learning_rate": 3.6277902094260785e-06, + "loss": 0.23419208824634552, + "step": 5529 + }, + { + "epoch": 1.4684636834417741, + "grad_norm": 1.3441597355302621, + "learning_rate": 3.6244069436714158e-06, + "loss": 0.22185654938220978, + "step": 5530 + }, + { + "epoch": 1.468729252423317, + "grad_norm": 1.2489989202798994, + "learning_rate": 3.621024907055901e-06, + "loss": 0.2705134153366089, + "step": 5531 + }, + { + "epoch": 1.46899482140486, + "grad_norm": 1.23195362246657, + "learning_rate": 3.617644100231551e-06, + "loss": 0.23426109552383423, + "step": 5532 + }, + { + "epoch": 1.469260390386403, + "grad_norm": 1.2477206941188708, + "learning_rate": 3.6142645238501462e-06, + "loss": 0.25527146458625793, + "step": 5533 + }, + { + "epoch": 1.469525959367946, + "grad_norm": 1.1030456616341389, + "learning_rate": 3.610886178563228e-06, + "loss": 0.1882668435573578, + "step": 5534 + }, + { + "epoch": 1.469791528349489, + "grad_norm": 1.2622509171219458, + "learning_rate": 3.607509065022101e-06, + "loss": 0.24060532450675964, + "step": 5535 + }, + { + "epoch": 1.4700570973310318, + "grad_norm": 1.2245038712856335, + "learning_rate": 3.6041331838778325e-06, + "loss": 0.23555803298950195, + "step": 5536 + }, + { + "epoch": 1.4703226663125748, + "grad_norm": 1.2192798079575136, + "learning_rate": 3.6007585357812557e-06, + "loss": 0.23126551508903503, + "step": 5537 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 1.139497037450913, + "learning_rate": 3.597385121382961e-06, + "loss": 0.24203836917877197, + "step": 5538 + }, + { + "epoch": 1.4708538042756607, + "grad_norm": 1.2467383616518404, + "learning_rate": 3.5940129413333046e-06, + "loss": 0.239767923951149, + "step": 5539 + }, + { + "epoch": 1.4711193732572037, + "grad_norm": 1.158137574546163, + "learning_rate": 3.5906419962824002e-06, + "loss": 0.24732957780361176, + "step": 5540 + }, + { + "epoch": 1.4713849422387466, + "grad_norm": 1.2722296085836442, + "learning_rate": 3.587272286880131e-06, + "loss": 0.2296421229839325, + "step": 5541 + }, + { + "epoch": 1.4716505112202896, + "grad_norm": 1.2453973567418024, + "learning_rate": 3.583903813776132e-06, + "loss": 0.2339775711297989, + "step": 5542 + }, + { + "epoch": 1.4719160802018325, + "grad_norm": 1.194940832073201, + "learning_rate": 3.5805365776198052e-06, + "loss": 0.230351984500885, + "step": 5543 + }, + { + "epoch": 1.4721816491833755, + "grad_norm": 1.2792126719917591, + "learning_rate": 3.5771705790603163e-06, + "loss": 0.2501414716243744, + "step": 5544 + }, + { + "epoch": 1.4724472181649184, + "grad_norm": 1.2327284472179139, + "learning_rate": 3.5738058187465864e-06, + "loss": 0.23387153446674347, + "step": 5545 + }, + { + "epoch": 1.4727127871464614, + "grad_norm": 1.2921618045206031, + "learning_rate": 3.570442297327307e-06, + "loss": 0.23874594271183014, + "step": 5546 + }, + { + "epoch": 1.4729783561280043, + "grad_norm": 1.2841826918754735, + "learning_rate": 3.5670800154509245e-06, + "loss": 0.21867451071739197, + "step": 5547 + }, + { + "epoch": 1.4732439251095473, + "grad_norm": 1.2937830650411482, + "learning_rate": 3.563718973765644e-06, + "loss": 0.24124100804328918, + "step": 5548 + }, + { + "epoch": 1.4735094940910902, + "grad_norm": 1.2156419794246578, + "learning_rate": 3.5603591729194377e-06, + "loss": 0.22185327112674713, + "step": 5549 + }, + { + "epoch": 1.4737750630726332, + "grad_norm": 1.1571779294098303, + "learning_rate": 3.5570006135600345e-06, + "loss": 0.21193793416023254, + "step": 5550 + }, + { + "epoch": 1.4740406320541761, + "grad_norm": 1.3939617841899903, + "learning_rate": 3.553643296334924e-06, + "loss": 0.2615143656730652, + "step": 5551 + }, + { + "epoch": 1.474306201035719, + "grad_norm": 1.1936451275051074, + "learning_rate": 3.5502872218913597e-06, + "loss": 0.24937541782855988, + "step": 5552 + }, + { + "epoch": 1.474571770017262, + "grad_norm": 1.0736225386439564, + "learning_rate": 3.5469323908763507e-06, + "loss": 0.22849224507808685, + "step": 5553 + }, + { + "epoch": 1.474837338998805, + "grad_norm": 1.6488166459783042, + "learning_rate": 3.5435788039366657e-06, + "loss": 0.2209717333316803, + "step": 5554 + }, + { + "epoch": 1.475102907980348, + "grad_norm": 1.2992665215674652, + "learning_rate": 3.5402264617188453e-06, + "loss": 0.2529235780239105, + "step": 5555 + }, + { + "epoch": 1.4753684769618909, + "grad_norm": 1.2133685762997675, + "learning_rate": 3.536875364869181e-06, + "loss": 0.2045450657606125, + "step": 5556 + }, + { + "epoch": 1.4756340459434338, + "grad_norm": 1.0591536248970717, + "learning_rate": 3.5335255140337167e-06, + "loss": 0.1973644196987152, + "step": 5557 + }, + { + "epoch": 1.4758996149249768, + "grad_norm": 1.3059187006673687, + "learning_rate": 3.5301769098582685e-06, + "loss": 0.27417299151420593, + "step": 5558 + }, + { + "epoch": 1.4761651839065197, + "grad_norm": 1.2500382678843112, + "learning_rate": 3.5268295529884077e-06, + "loss": 0.24541756510734558, + "step": 5559 + }, + { + "epoch": 1.4764307528880627, + "grad_norm": 1.4461383875060436, + "learning_rate": 3.5234834440694655e-06, + "loss": 0.25785958766937256, + "step": 5560 + }, + { + "epoch": 1.4766963218696056, + "grad_norm": 1.1676448271023605, + "learning_rate": 3.5201385837465307e-06, + "loss": 0.21099212765693665, + "step": 5561 + }, + { + "epoch": 1.4769618908511486, + "grad_norm": 1.1787333048605453, + "learning_rate": 3.5167949726644545e-06, + "loss": 0.26023173332214355, + "step": 5562 + }, + { + "epoch": 1.4772274598326915, + "grad_norm": 1.6670162101301063, + "learning_rate": 3.5134526114678426e-06, + "loss": 0.22882963716983795, + "step": 5563 + }, + { + "epoch": 1.4774930288142345, + "grad_norm": 1.312450944331431, + "learning_rate": 3.5101115008010677e-06, + "loss": 0.21987251937389374, + "step": 5564 + }, + { + "epoch": 1.4777585977957775, + "grad_norm": 1.163985983495263, + "learning_rate": 3.506771641308255e-06, + "loss": 0.2169610857963562, + "step": 5565 + }, + { + "epoch": 1.4780241667773204, + "grad_norm": 4.440133890295746, + "learning_rate": 3.50343303363329e-06, + "loss": 0.22723034024238586, + "step": 5566 + }, + { + "epoch": 1.4782897357588634, + "grad_norm": 1.2392064660120468, + "learning_rate": 3.5000956784198157e-06, + "loss": 0.23738276958465576, + "step": 5567 + }, + { + "epoch": 1.4785553047404063, + "grad_norm": 1.1818266174210303, + "learning_rate": 3.496759576311235e-06, + "loss": 0.19922251999378204, + "step": 5568 + }, + { + "epoch": 1.4788208737219493, + "grad_norm": 1.294067668946831, + "learning_rate": 3.4934247279507092e-06, + "loss": 0.22529268264770508, + "step": 5569 + }, + { + "epoch": 1.4790864427034922, + "grad_norm": 1.3551359298814187, + "learning_rate": 3.4900911339811583e-06, + "loss": 0.26758015155792236, + "step": 5570 + }, + { + "epoch": 1.4793520116850352, + "grad_norm": 1.2627897957153122, + "learning_rate": 3.48675879504526e-06, + "loss": 0.24752648174762726, + "step": 5571 + }, + { + "epoch": 1.4796175806665781, + "grad_norm": 1.3085621441307098, + "learning_rate": 3.483427711785449e-06, + "loss": 0.25337618589401245, + "step": 5572 + }, + { + "epoch": 1.479883149648121, + "grad_norm": 1.3543288061594618, + "learning_rate": 3.480097884843919e-06, + "loss": 0.24504786729812622, + "step": 5573 + }, + { + "epoch": 1.480148718629664, + "grad_norm": 1.1750849317955903, + "learning_rate": 3.4767693148626223e-06, + "loss": 0.21255145967006683, + "step": 5574 + }, + { + "epoch": 1.480414287611207, + "grad_norm": 1.2853041773936769, + "learning_rate": 3.473442002483267e-06, + "loss": 0.2501891553401947, + "step": 5575 + }, + { + "epoch": 1.48067985659275, + "grad_norm": 1.195974425335747, + "learning_rate": 3.4701159483473202e-06, + "loss": 0.25276634097099304, + "step": 5576 + }, + { + "epoch": 1.4809454255742929, + "grad_norm": 1.427206116406706, + "learning_rate": 3.4667911530960052e-06, + "loss": 0.2760567367076874, + "step": 5577 + }, + { + "epoch": 1.4812109945558358, + "grad_norm": 1.2442739080424003, + "learning_rate": 3.463467617370305e-06, + "loss": 0.22686481475830078, + "step": 5578 + }, + { + "epoch": 1.4814765635373788, + "grad_norm": 1.2374194002920247, + "learning_rate": 3.4601453418109554e-06, + "loss": 0.23262599110603333, + "step": 5579 + }, + { + "epoch": 1.4817421325189217, + "grad_norm": 1.2263890428702933, + "learning_rate": 3.4568243270584545e-06, + "loss": 0.22231365740299225, + "step": 5580 + }, + { + "epoch": 1.4820077015004647, + "grad_norm": 1.2193067799394695, + "learning_rate": 3.4535045737530504e-06, + "loss": 0.22237855195999146, + "step": 5581 + }, + { + "epoch": 1.4822732704820076, + "grad_norm": 1.208437884817879, + "learning_rate": 3.4501860825347587e-06, + "loss": 0.2260412871837616, + "step": 5582 + }, + { + "epoch": 1.4825388394635506, + "grad_norm": 1.3488909026023506, + "learning_rate": 3.4468688540433425e-06, + "loss": 0.2133496105670929, + "step": 5583 + }, + { + "epoch": 1.4828044084450935, + "grad_norm": 1.231358912436915, + "learning_rate": 3.4435528889183245e-06, + "loss": 0.24750375747680664, + "step": 5584 + }, + { + "epoch": 1.4830699774266365, + "grad_norm": 1.2053641188090713, + "learning_rate": 3.440238187798983e-06, + "loss": 0.23673412203788757, + "step": 5585 + }, + { + "epoch": 1.4833355464081794, + "grad_norm": 1.312048381493266, + "learning_rate": 3.436924751324354e-06, + "loss": 0.2505243420600891, + "step": 5586 + }, + { + "epoch": 1.4836011153897224, + "grad_norm": 1.2769153596955758, + "learning_rate": 3.433612580133229e-06, + "loss": 0.276151180267334, + "step": 5587 + }, + { + "epoch": 1.4838666843712653, + "grad_norm": 1.0245497892529305, + "learning_rate": 3.430301674864154e-06, + "loss": 0.1756816953420639, + "step": 5588 + }, + { + "epoch": 1.4841322533528083, + "grad_norm": 1.2667973514811224, + "learning_rate": 3.4269920361554342e-06, + "loss": 0.25901898741722107, + "step": 5589 + }, + { + "epoch": 1.4843978223343512, + "grad_norm": 1.2034260428652863, + "learning_rate": 3.4236836646451286e-06, + "loss": 0.21196085214614868, + "step": 5590 + }, + { + "epoch": 1.4846633913158942, + "grad_norm": 1.2887221468811698, + "learning_rate": 3.4203765609710525e-06, + "loss": 0.24153128266334534, + "step": 5591 + }, + { + "epoch": 1.4849289602974372, + "grad_norm": 1.2285562462634616, + "learning_rate": 3.4170707257707757e-06, + "loss": 0.25715887546539307, + "step": 5592 + }, + { + "epoch": 1.48519452927898, + "grad_norm": 1.430212837200284, + "learning_rate": 3.413766159681624e-06, + "loss": 0.2920379042625427, + "step": 5593 + }, + { + "epoch": 1.485460098260523, + "grad_norm": 1.2173970332611068, + "learning_rate": 3.41046286334068e-06, + "loss": 0.22127456963062286, + "step": 5594 + }, + { + "epoch": 1.485725667242066, + "grad_norm": 1.2534339617557788, + "learning_rate": 3.4071608373847786e-06, + "loss": 0.23103584349155426, + "step": 5595 + }, + { + "epoch": 1.485991236223609, + "grad_norm": 1.2999427041349472, + "learning_rate": 3.403860082450513e-06, + "loss": 0.29068222641944885, + "step": 5596 + }, + { + "epoch": 1.486256805205152, + "grad_norm": 1.2532608064541852, + "learning_rate": 3.4005605991742296e-06, + "loss": 0.23703888058662415, + "step": 5597 + }, + { + "epoch": 1.4865223741866949, + "grad_norm": 1.4039489349034764, + "learning_rate": 3.3972623881920296e-06, + "loss": 0.23348261415958405, + "step": 5598 + }, + { + "epoch": 1.4867879431682378, + "grad_norm": 1.1603139615742908, + "learning_rate": 3.3939654501397645e-06, + "loss": 0.24733223021030426, + "step": 5599 + }, + { + "epoch": 1.487053512149781, + "grad_norm": 1.1220204153088178, + "learning_rate": 3.3906697856530548e-06, + "loss": 0.22576835751533508, + "step": 5600 + }, + { + "epoch": 1.487319081131324, + "grad_norm": 1.1809335952834177, + "learning_rate": 3.3873753953672593e-06, + "loss": 0.20863527059555054, + "step": 5601 + }, + { + "epoch": 1.487584650112867, + "grad_norm": 1.1823379745083873, + "learning_rate": 3.384082279917499e-06, + "loss": 0.2299712598323822, + "step": 5602 + }, + { + "epoch": 1.4878502190944098, + "grad_norm": 1.1858521746021262, + "learning_rate": 3.380790439938648e-06, + "loss": 0.23058944940567017, + "step": 5603 + }, + { + "epoch": 1.4881157880759528, + "grad_norm": 1.1304663814123712, + "learning_rate": 3.3774998760653344e-06, + "loss": 0.20307201147079468, + "step": 5604 + }, + { + "epoch": 1.4883813570574957, + "grad_norm": 1.112411027996001, + "learning_rate": 3.3742105889319388e-06, + "loss": 0.2296266108751297, + "step": 5605 + }, + { + "epoch": 1.4886469260390387, + "grad_norm": 1.3206442060716181, + "learning_rate": 3.370922579172601e-06, + "loss": 0.22702309489250183, + "step": 5606 + }, + { + "epoch": 1.4889124950205816, + "grad_norm": 1.4590848907033545, + "learning_rate": 3.3676358474212035e-06, + "loss": 0.30432331562042236, + "step": 5607 + }, + { + "epoch": 1.4891780640021246, + "grad_norm": 1.201356120373459, + "learning_rate": 3.3643503943113907e-06, + "loss": 0.2488052248954773, + "step": 5608 + }, + { + "epoch": 1.4894436329836676, + "grad_norm": 1.2096846483257637, + "learning_rate": 3.361066220476564e-06, + "loss": 0.2221754938364029, + "step": 5609 + }, + { + "epoch": 1.4897092019652105, + "grad_norm": 1.289556223007011, + "learning_rate": 3.3577833265498728e-06, + "loss": 0.2547761797904968, + "step": 5610 + }, + { + "epoch": 1.4899747709467535, + "grad_norm": 1.3306628367975963, + "learning_rate": 3.3545017131642164e-06, + "loss": 0.21811938285827637, + "step": 5611 + }, + { + "epoch": 1.4902403399282964, + "grad_norm": 1.4022029015386877, + "learning_rate": 3.3512213809522554e-06, + "loss": 0.30436158180236816, + "step": 5612 + }, + { + "epoch": 1.4905059089098394, + "grad_norm": 1.2224150283856856, + "learning_rate": 3.3479423305463953e-06, + "loss": 0.2053622156381607, + "step": 5613 + }, + { + "epoch": 1.4907714778913823, + "grad_norm": 1.3026832238379669, + "learning_rate": 3.344664562578801e-06, + "loss": 0.2017601728439331, + "step": 5614 + }, + { + "epoch": 1.4910370468729253, + "grad_norm": 1.2856046275416113, + "learning_rate": 3.341388077681387e-06, + "loss": 0.23668046295642853, + "step": 5615 + }, + { + "epoch": 1.4913026158544682, + "grad_norm": 1.1460002150937032, + "learning_rate": 3.338112876485821e-06, + "loss": 0.20016951858997345, + "step": 5616 + }, + { + "epoch": 1.4915681848360112, + "grad_norm": 1.3606548245166536, + "learning_rate": 3.3348389596235177e-06, + "loss": 0.25477850437164307, + "step": 5617 + }, + { + "epoch": 1.4918337538175541, + "grad_norm": 1.2758175160721472, + "learning_rate": 3.3315663277256594e-06, + "loss": 0.24063366651535034, + "step": 5618 + }, + { + "epoch": 1.492099322799097, + "grad_norm": 1.2737128535751616, + "learning_rate": 3.328294981423165e-06, + "loss": 0.23443251848220825, + "step": 5619 + }, + { + "epoch": 1.49236489178064, + "grad_norm": 1.1580169148577781, + "learning_rate": 3.325024921346717e-06, + "loss": 0.21191264688968658, + "step": 5620 + }, + { + "epoch": 1.492630460762183, + "grad_norm": 1.213323558189925, + "learning_rate": 3.3217561481267367e-06, + "loss": 0.22062326967716217, + "step": 5621 + }, + { + "epoch": 1.492896029743726, + "grad_norm": 1.1757529457487401, + "learning_rate": 3.318488662393409e-06, + "loss": 0.2235480695962906, + "step": 5622 + }, + { + "epoch": 1.4931615987252689, + "grad_norm": 1.2611472240425432, + "learning_rate": 3.315222464776665e-06, + "loss": 0.26665517687797546, + "step": 5623 + }, + { + "epoch": 1.4934271677068118, + "grad_norm": 1.270220596773442, + "learning_rate": 3.3119575559061902e-06, + "loss": 0.24300602078437805, + "step": 5624 + }, + { + "epoch": 1.4936927366883548, + "grad_norm": 1.2622444254847978, + "learning_rate": 3.308693936411421e-06, + "loss": 0.25441884994506836, + "step": 5625 + }, + { + "epoch": 1.4939583056698977, + "grad_norm": 1.2781695234171213, + "learning_rate": 3.3054316069215407e-06, + "loss": 0.23236152529716492, + "step": 5626 + }, + { + "epoch": 1.4942238746514407, + "grad_norm": 1.2299113342509724, + "learning_rate": 3.3021705680654946e-06, + "loss": 0.24535568058490753, + "step": 5627 + }, + { + "epoch": 1.4944894436329836, + "grad_norm": 1.3635919919461823, + "learning_rate": 3.29891082047197e-06, + "loss": 0.2542986273765564, + "step": 5628 + }, + { + "epoch": 1.4947550126145266, + "grad_norm": 1.3442816383357798, + "learning_rate": 3.295652364769407e-06, + "loss": 0.26490268111228943, + "step": 5629 + }, + { + "epoch": 1.4950205815960695, + "grad_norm": 1.2455944135633985, + "learning_rate": 3.292395201585997e-06, + "loss": 0.25576913356781006, + "step": 5630 + }, + { + "epoch": 1.4952861505776125, + "grad_norm": 1.321982811797117, + "learning_rate": 3.2891393315496846e-06, + "loss": 0.2930823266506195, + "step": 5631 + }, + { + "epoch": 1.4955517195591554, + "grad_norm": 1.3029577245101889, + "learning_rate": 3.285884755288161e-06, + "loss": 0.2426074892282486, + "step": 5632 + }, + { + "epoch": 1.4958172885406984, + "grad_norm": 1.1912484566122454, + "learning_rate": 3.2826314734288713e-06, + "loss": 0.24090878665447235, + "step": 5633 + }, + { + "epoch": 1.4960828575222413, + "grad_norm": 1.291391881665867, + "learning_rate": 3.2793794865990092e-06, + "loss": 0.26155173778533936, + "step": 5634 + }, + { + "epoch": 1.4963484265037843, + "grad_norm": 1.2581171617638447, + "learning_rate": 3.2761287954255195e-06, + "loss": 0.2594009041786194, + "step": 5635 + }, + { + "epoch": 1.4966139954853273, + "grad_norm": 1.248912763921314, + "learning_rate": 3.2728794005350972e-06, + "loss": 0.24434763193130493, + "step": 5636 + }, + { + "epoch": 1.4968795644668702, + "grad_norm": 1.3459414061970596, + "learning_rate": 3.269631302554188e-06, + "loss": 0.2622208297252655, + "step": 5637 + }, + { + "epoch": 1.4971451334484132, + "grad_norm": 1.2222057610309294, + "learning_rate": 3.266384502108987e-06, + "loss": 0.18913154304027557, + "step": 5638 + }, + { + "epoch": 1.497410702429956, + "grad_norm": 1.260519406868159, + "learning_rate": 3.263138999825437e-06, + "loss": 0.2610907554626465, + "step": 5639 + }, + { + "epoch": 1.497676271411499, + "grad_norm": 1.2585537664404678, + "learning_rate": 3.2598947963292337e-06, + "loss": 0.25841569900512695, + "step": 5640 + }, + { + "epoch": 1.497941840393042, + "grad_norm": 1.1680179490188496, + "learning_rate": 3.256651892245822e-06, + "loss": 0.2066381573677063, + "step": 5641 + }, + { + "epoch": 1.4982074093745852, + "grad_norm": 1.1877407935219242, + "learning_rate": 3.253410288200396e-06, + "loss": 0.23956719040870667, + "step": 5642 + }, + { + "epoch": 1.4984729783561281, + "grad_norm": 1.1996406642135662, + "learning_rate": 3.250169984817897e-06, + "loss": 0.23999394476413727, + "step": 5643 + }, + { + "epoch": 1.498738547337671, + "grad_norm": 1.4056134439986134, + "learning_rate": 3.2469309827230156e-06, + "loss": 0.24273940920829773, + "step": 5644 + }, + { + "epoch": 1.499004116319214, + "grad_norm": 1.193555704549332, + "learning_rate": 3.2436932825401977e-06, + "loss": 0.2212621569633484, + "step": 5645 + }, + { + "epoch": 1.499269685300757, + "grad_norm": 1.293874995027958, + "learning_rate": 3.2404568848936325e-06, + "loss": 0.2487148940563202, + "step": 5646 + }, + { + "epoch": 1.4995352542823, + "grad_norm": 1.2610121684030642, + "learning_rate": 3.237221790407259e-06, + "loss": 0.29314422607421875, + "step": 5647 + }, + { + "epoch": 1.499800823263843, + "grad_norm": 1.1765702458871505, + "learning_rate": 3.233987999704763e-06, + "loss": 0.22727417945861816, + "step": 5648 + }, + { + "epoch": 1.5000663922453858, + "grad_norm": 1.1578089091098656, + "learning_rate": 3.230755513409585e-06, + "loss": 0.18877442181110382, + "step": 5649 + }, + { + "epoch": 1.5003319612269288, + "grad_norm": 1.2855274132536632, + "learning_rate": 3.2275243321449068e-06, + "loss": 0.2504552900791168, + "step": 5650 + }, + { + "epoch": 1.5005975302084718, + "grad_norm": 1.1905373910388852, + "learning_rate": 3.224294456533663e-06, + "loss": 0.23579174280166626, + "step": 5651 + }, + { + "epoch": 1.5008630991900147, + "grad_norm": 1.3692203179408873, + "learning_rate": 3.221065887198537e-06, + "loss": 0.29236793518066406, + "step": 5652 + }, + { + "epoch": 1.5011286681715577, + "grad_norm": 1.3245217175369617, + "learning_rate": 3.2178386247619577e-06, + "loss": 0.2735568881034851, + "step": 5653 + }, + { + "epoch": 1.5013942371531006, + "grad_norm": 1.240462888838021, + "learning_rate": 3.214612669846103e-06, + "loss": 0.2391616702079773, + "step": 5654 + }, + { + "epoch": 1.5016598061346436, + "grad_norm": 1.3766117264936455, + "learning_rate": 3.2113880230729e-06, + "loss": 0.24532485008239746, + "step": 5655 + }, + { + "epoch": 1.5019253751161865, + "grad_norm": 1.3310069624279295, + "learning_rate": 3.2081646850640215e-06, + "loss": 0.2605767250061035, + "step": 5656 + }, + { + "epoch": 1.5021909440977295, + "grad_norm": 1.2109489933208193, + "learning_rate": 3.2049426564408893e-06, + "loss": 0.2651350200176239, + "step": 5657 + }, + { + "epoch": 1.5024565130792724, + "grad_norm": 1.3305800775425032, + "learning_rate": 3.2017219378246734e-06, + "loss": 0.2719389498233795, + "step": 5658 + }, + { + "epoch": 1.5027220820608154, + "grad_norm": 1.2359239723239188, + "learning_rate": 3.198502529836288e-06, + "loss": 0.23077815771102905, + "step": 5659 + }, + { + "epoch": 1.5029876510423583, + "grad_norm": 1.0838054114896152, + "learning_rate": 3.1952844330964007e-06, + "loss": 0.21954959630966187, + "step": 5660 + }, + { + "epoch": 1.5032532200239013, + "grad_norm": 1.3480229773492907, + "learning_rate": 3.1920676482254186e-06, + "loss": 0.28229185938835144, + "step": 5661 + }, + { + "epoch": 1.5035187890054442, + "grad_norm": 1.2587796771658648, + "learning_rate": 3.1888521758435e-06, + "loss": 0.24612295627593994, + "step": 5662 + }, + { + "epoch": 1.5037843579869872, + "grad_norm": 1.2649379995915024, + "learning_rate": 3.185638016570555e-06, + "loss": 0.24191413819789886, + "step": 5663 + }, + { + "epoch": 1.5040499269685301, + "grad_norm": 1.225446339219085, + "learning_rate": 3.1824251710262323e-06, + "loss": 0.2427935004234314, + "step": 5664 + }, + { + "epoch": 1.504315495950073, + "grad_norm": 1.2595635392757376, + "learning_rate": 3.17921363982993e-06, + "loss": 0.2600318193435669, + "step": 5665 + }, + { + "epoch": 1.504581064931616, + "grad_norm": 1.2817020254494476, + "learning_rate": 3.1760034236007954e-06, + "loss": 0.25215205550193787, + "step": 5666 + }, + { + "epoch": 1.504846633913159, + "grad_norm": 1.2568573714231897, + "learning_rate": 3.1727945229577183e-06, + "loss": 0.24460548162460327, + "step": 5667 + }, + { + "epoch": 1.505112202894702, + "grad_norm": 1.2881955251422392, + "learning_rate": 3.169586938519338e-06, + "loss": 0.2812577486038208, + "step": 5668 + }, + { + "epoch": 1.5053777718762449, + "grad_norm": 1.1272225605105841, + "learning_rate": 3.166380670904039e-06, + "loss": 0.23297616839408875, + "step": 5669 + }, + { + "epoch": 1.5056433408577878, + "grad_norm": 1.1954331932042688, + "learning_rate": 3.163175720729954e-06, + "loss": 0.21659572422504425, + "step": 5670 + }, + { + "epoch": 1.5059089098393308, + "grad_norm": 1.2142230208725098, + "learning_rate": 3.1599720886149508e-06, + "loss": 0.22246181964874268, + "step": 5671 + }, + { + "epoch": 1.5061744788208737, + "grad_norm": 1.132636194795227, + "learning_rate": 3.1567697751766624e-06, + "loss": 0.20020918548107147, + "step": 5672 + }, + { + "epoch": 1.5064400478024167, + "grad_norm": 1.363041735701654, + "learning_rate": 3.1535687810324523e-06, + "loss": 0.25693628191947937, + "step": 5673 + }, + { + "epoch": 1.5067056167839596, + "grad_norm": 1.5250673507385644, + "learning_rate": 3.150369106799436e-06, + "loss": 0.21841923892498016, + "step": 5674 + }, + { + "epoch": 1.5069711857655026, + "grad_norm": 1.1710254495806258, + "learning_rate": 3.1471707530944707e-06, + "loss": 0.18131780624389648, + "step": 5675 + }, + { + "epoch": 1.5072367547470455, + "grad_norm": 1.180596749481675, + "learning_rate": 3.143973720534164e-06, + "loss": 0.22510449588298798, + "step": 5676 + }, + { + "epoch": 1.5075023237285885, + "grad_norm": 1.3952546557365002, + "learning_rate": 3.1407780097348627e-06, + "loss": 0.23721462488174438, + "step": 5677 + }, + { + "epoch": 1.5077678927101315, + "grad_norm": 1.2200574848273704, + "learning_rate": 3.1375836213126653e-06, + "loss": 0.24281899631023407, + "step": 5678 + }, + { + "epoch": 1.5080334616916744, + "grad_norm": 1.3211068465604292, + "learning_rate": 3.134390555883412e-06, + "loss": 0.23910081386566162, + "step": 5679 + }, + { + "epoch": 1.5082990306732174, + "grad_norm": 1.357027881520108, + "learning_rate": 3.1311988140626825e-06, + "loss": 0.2635132670402527, + "step": 5680 + }, + { + "epoch": 1.5085645996547603, + "grad_norm": 1.239638674575543, + "learning_rate": 3.1280083964658147e-06, + "loss": 0.24802634119987488, + "step": 5681 + }, + { + "epoch": 1.5088301686363033, + "grad_norm": 1.3861680174510138, + "learning_rate": 3.1248193037078823e-06, + "loss": 0.24081437289714813, + "step": 5682 + }, + { + "epoch": 1.5090957376178462, + "grad_norm": 1.2124748227090532, + "learning_rate": 3.121631536403701e-06, + "loss": 0.19550001621246338, + "step": 5683 + }, + { + "epoch": 1.5093613065993892, + "grad_norm": 1.309177755877421, + "learning_rate": 3.118445095167837e-06, + "loss": 0.2397807538509369, + "step": 5684 + }, + { + "epoch": 1.5096268755809321, + "grad_norm": 1.2243819490197418, + "learning_rate": 3.115259980614602e-06, + "loss": 0.2185651659965515, + "step": 5685 + }, + { + "epoch": 1.509892444562475, + "grad_norm": 1.2555724014592389, + "learning_rate": 3.1120761933580414e-06, + "loss": 0.22214055061340332, + "step": 5686 + }, + { + "epoch": 1.510158013544018, + "grad_norm": 1.4127254863789025, + "learning_rate": 3.108893734011955e-06, + "loss": 0.23971091210842133, + "step": 5687 + }, + { + "epoch": 1.510423582525561, + "grad_norm": 1.3331222718828735, + "learning_rate": 3.1057126031898843e-06, + "loss": 0.26458197832107544, + "step": 5688 + }, + { + "epoch": 1.510689151507104, + "grad_norm": 1.3487790050882777, + "learning_rate": 3.1025328015051093e-06, + "loss": 0.23730339109897614, + "step": 5689 + }, + { + "epoch": 1.5109547204886469, + "grad_norm": 1.2964784198979393, + "learning_rate": 3.0993543295706653e-06, + "loss": 0.21981677412986755, + "step": 5690 + }, + { + "epoch": 1.5112202894701898, + "grad_norm": 1.1812817656913812, + "learning_rate": 3.0961771879993206e-06, + "loss": 0.21984878182411194, + "step": 5691 + }, + { + "epoch": 1.5114858584517328, + "grad_norm": 1.2732802047873515, + "learning_rate": 3.093001377403592e-06, + "loss": 0.23086440563201904, + "step": 5692 + }, + { + "epoch": 1.5117514274332757, + "grad_norm": 2.3681680891314953, + "learning_rate": 3.0898268983957368e-06, + "loss": 0.2355024814605713, + "step": 5693 + }, + { + "epoch": 1.5120169964148187, + "grad_norm": 1.3061363772251866, + "learning_rate": 3.0866537515877584e-06, + "loss": 0.21210229396820068, + "step": 5694 + }, + { + "epoch": 1.5122825653963616, + "grad_norm": 1.3436771657394675, + "learning_rate": 3.0834819375914003e-06, + "loss": 0.2387622594833374, + "step": 5695 + }, + { + "epoch": 1.5125481343779046, + "grad_norm": 1.3482258979232278, + "learning_rate": 3.0803114570181527e-06, + "loss": 0.23822402954101562, + "step": 5696 + }, + { + "epoch": 1.5128137033594475, + "grad_norm": 1.3248058910768958, + "learning_rate": 3.0771423104792454e-06, + "loss": 0.26844173669815063, + "step": 5697 + }, + { + "epoch": 1.5130792723409905, + "grad_norm": 1.2131778927640824, + "learning_rate": 3.07397449858565e-06, + "loss": 0.23288767039775848, + "step": 5698 + }, + { + "epoch": 1.5133448413225334, + "grad_norm": 1.2716046597052009, + "learning_rate": 3.0708080219480896e-06, + "loss": 0.23273086547851562, + "step": 5699 + }, + { + "epoch": 1.5136104103040764, + "grad_norm": 1.4240236624695346, + "learning_rate": 3.067642881177023e-06, + "loss": 0.2505509555339813, + "step": 5700 + }, + { + "epoch": 1.5138759792856193, + "grad_norm": 1.1441752919653974, + "learning_rate": 3.0644790768826473e-06, + "loss": 0.22801508009433746, + "step": 5701 + }, + { + "epoch": 1.5141415482671623, + "grad_norm": 1.1462347465841034, + "learning_rate": 3.061316609674908e-06, + "loss": 0.2110593169927597, + "step": 5702 + }, + { + "epoch": 1.5144071172487052, + "grad_norm": 1.2145033288630525, + "learning_rate": 3.0581554801634927e-06, + "loss": 0.22201795876026154, + "step": 5703 + }, + { + "epoch": 1.5146726862302482, + "grad_norm": 1.2993896506173446, + "learning_rate": 3.054995688957829e-06, + "loss": 0.23104460537433624, + "step": 5704 + }, + { + "epoch": 1.5149382552117912, + "grad_norm": 1.5590161841107484, + "learning_rate": 3.0518372366670877e-06, + "loss": 0.23373261094093323, + "step": 5705 + }, + { + "epoch": 1.515203824193334, + "grad_norm": 1.368121139637646, + "learning_rate": 3.0486801239001806e-06, + "loss": 0.2404957264661789, + "step": 5706 + }, + { + "epoch": 1.515469393174877, + "grad_norm": 1.2346548477581518, + "learning_rate": 3.0455243512657606e-06, + "loss": 0.23209382593631744, + "step": 5707 + }, + { + "epoch": 1.51573496215642, + "grad_norm": 1.156984368318911, + "learning_rate": 3.042369919372228e-06, + "loss": 0.218237042427063, + "step": 5708 + }, + { + "epoch": 1.516000531137963, + "grad_norm": 12.380411974697722, + "learning_rate": 3.039216828827717e-06, + "loss": 0.25025027990341187, + "step": 5709 + }, + { + "epoch": 1.516266100119506, + "grad_norm": 1.3454644235463973, + "learning_rate": 3.036065080240106e-06, + "loss": 0.24729448556900024, + "step": 5710 + }, + { + "epoch": 1.5165316691010489, + "grad_norm": 1.246980236713752, + "learning_rate": 3.032914674217017e-06, + "loss": 0.23614796996116638, + "step": 5711 + }, + { + "epoch": 1.5167972380825918, + "grad_norm": 1.1947534591327391, + "learning_rate": 3.029765611365808e-06, + "loss": 0.2313452661037445, + "step": 5712 + }, + { + "epoch": 1.5170628070641348, + "grad_norm": 1.2169352172923076, + "learning_rate": 3.0266178922935842e-06, + "loss": 0.22152003645896912, + "step": 5713 + }, + { + "epoch": 1.5173283760456777, + "grad_norm": 1.3132034423317465, + "learning_rate": 3.0234715176071874e-06, + "loss": 0.25942179560661316, + "step": 5714 + }, + { + "epoch": 1.5175939450272207, + "grad_norm": 1.213532583392701, + "learning_rate": 3.0203264879132e-06, + "loss": 0.25030237436294556, + "step": 5715 + }, + { + "epoch": 1.5178595140087636, + "grad_norm": 1.212709044397772, + "learning_rate": 3.0171828038179497e-06, + "loss": 0.2025807797908783, + "step": 5716 + }, + { + "epoch": 1.5181250829903066, + "grad_norm": 1.3035190960753136, + "learning_rate": 3.014040465927499e-06, + "loss": 0.20455190539360046, + "step": 5717 + }, + { + "epoch": 1.5183906519718495, + "grad_norm": 1.2171025232725439, + "learning_rate": 3.010899474847655e-06, + "loss": 0.24197113513946533, + "step": 5718 + }, + { + "epoch": 1.5186562209533925, + "grad_norm": 1.243656057613246, + "learning_rate": 3.007759831183964e-06, + "loss": 0.22290384769439697, + "step": 5719 + }, + { + "epoch": 1.5189217899349357, + "grad_norm": 1.133911078511842, + "learning_rate": 3.0046215355417117e-06, + "loss": 0.23087520897388458, + "step": 5720 + }, + { + "epoch": 1.5191873589164786, + "grad_norm": 1.3329430419316783, + "learning_rate": 3.0014845885259236e-06, + "loss": 0.24425405263900757, + "step": 5721 + }, + { + "epoch": 1.5194529278980216, + "grad_norm": 1.310265396817766, + "learning_rate": 2.9983489907413675e-06, + "loss": 0.24888862669467926, + "step": 5722 + }, + { + "epoch": 1.5197184968795645, + "grad_norm": 1.3023172954247402, + "learning_rate": 2.9952147427925493e-06, + "loss": 0.23556756973266602, + "step": 5723 + }, + { + "epoch": 1.5199840658611075, + "grad_norm": 1.3924872169111115, + "learning_rate": 2.992081845283715e-06, + "loss": 0.2532619833946228, + "step": 5724 + }, + { + "epoch": 1.5202496348426504, + "grad_norm": 1.3351422936737996, + "learning_rate": 2.988950298818848e-06, + "loss": 0.2574974000453949, + "step": 5725 + }, + { + "epoch": 1.5205152038241934, + "grad_norm": 1.1244851887087242, + "learning_rate": 2.9858201040016775e-06, + "loss": 0.21997734904289246, + "step": 5726 + }, + { + "epoch": 1.5207807728057363, + "grad_norm": 1.3952335702566243, + "learning_rate": 2.982691261435666e-06, + "loss": 0.2174127697944641, + "step": 5727 + }, + { + "epoch": 1.5210463417872793, + "grad_norm": 1.4277294646697747, + "learning_rate": 2.979563771724019e-06, + "loss": 0.22455093264579773, + "step": 5728 + }, + { + "epoch": 1.5213119107688222, + "grad_norm": 1.2606427849530746, + "learning_rate": 2.976437635469678e-06, + "loss": 0.270727276802063, + "step": 5729 + }, + { + "epoch": 1.5215774797503652, + "grad_norm": 1.1901052998095392, + "learning_rate": 2.9733128532753254e-06, + "loss": 0.2233714610338211, + "step": 5730 + }, + { + "epoch": 1.5218430487319081, + "grad_norm": 1.364720864117707, + "learning_rate": 2.970189425743383e-06, + "loss": 0.23599566519260406, + "step": 5731 + }, + { + "epoch": 1.522108617713451, + "grad_norm": 1.2707197493270106, + "learning_rate": 2.967067353476011e-06, + "loss": 0.23598654568195343, + "step": 5732 + }, + { + "epoch": 1.522374186694994, + "grad_norm": 1.1793549120144597, + "learning_rate": 2.963946637075107e-06, + "loss": 0.205197274684906, + "step": 5733 + }, + { + "epoch": 1.522639755676537, + "grad_norm": 1.1887492971446227, + "learning_rate": 2.9608272771423073e-06, + "loss": 0.23581506311893463, + "step": 5734 + }, + { + "epoch": 1.52290532465808, + "grad_norm": 1.2937911951812968, + "learning_rate": 2.9577092742789915e-06, + "loss": 0.2088197022676468, + "step": 5735 + }, + { + "epoch": 1.5231708936396229, + "grad_norm": 1.2943182118738674, + "learning_rate": 2.95459262908627e-06, + "loss": 0.22607067227363586, + "step": 5736 + }, + { + "epoch": 1.5234364626211658, + "grad_norm": 1.1748118237242067, + "learning_rate": 2.951477342164998e-06, + "loss": 0.22242344915866852, + "step": 5737 + }, + { + "epoch": 1.5237020316027088, + "grad_norm": 1.3280405020263697, + "learning_rate": 2.9483634141157636e-06, + "loss": 0.25626271963119507, + "step": 5738 + }, + { + "epoch": 1.5239676005842517, + "grad_norm": 1.2212084732536523, + "learning_rate": 2.9452508455388975e-06, + "loss": 0.2241421341896057, + "step": 5739 + }, + { + "epoch": 1.5242331695657947, + "grad_norm": 1.5088982481303157, + "learning_rate": 2.9421396370344648e-06, + "loss": 0.2191103994846344, + "step": 5740 + }, + { + "epoch": 1.5244987385473376, + "grad_norm": 1.2411878451658047, + "learning_rate": 2.9390297892022703e-06, + "loss": 0.26252660155296326, + "step": 5741 + }, + { + "epoch": 1.5247643075288806, + "grad_norm": 1.3964551352557335, + "learning_rate": 2.9359213026418567e-06, + "loss": 0.21522507071495056, + "step": 5742 + }, + { + "epoch": 1.5250298765104235, + "grad_norm": 1.0905013771622027, + "learning_rate": 2.932814177952499e-06, + "loss": 0.20159044861793518, + "step": 5743 + }, + { + "epoch": 1.5252954454919665, + "grad_norm": 1.138416177249403, + "learning_rate": 2.929708415733221e-06, + "loss": 0.22679558396339417, + "step": 5744 + }, + { + "epoch": 1.5255610144735094, + "grad_norm": 1.199157018703913, + "learning_rate": 2.926604016582776e-06, + "loss": 0.2315664291381836, + "step": 5745 + }, + { + "epoch": 1.5258265834550524, + "grad_norm": 1.2568252329386058, + "learning_rate": 2.923500981099652e-06, + "loss": 0.229634091258049, + "step": 5746 + }, + { + "epoch": 1.5260921524365954, + "grad_norm": 1.2179751735416722, + "learning_rate": 2.9203993098820793e-06, + "loss": 0.20657674968242645, + "step": 5747 + }, + { + "epoch": 1.5263577214181385, + "grad_norm": 1.2447733239425043, + "learning_rate": 2.9172990035280237e-06, + "loss": 0.2306358814239502, + "step": 5748 + }, + { + "epoch": 1.5266232903996815, + "grad_norm": 1.2950411042959078, + "learning_rate": 2.9142000626351875e-06, + "loss": 0.2608031928539276, + "step": 5749 + }, + { + "epoch": 1.5268888593812244, + "grad_norm": 1.337100599856471, + "learning_rate": 2.911102487801013e-06, + "loss": 0.24675670266151428, + "step": 5750 + }, + { + "epoch": 1.5271544283627674, + "grad_norm": 1.3568337572597398, + "learning_rate": 2.908006279622667e-06, + "loss": 0.22544966638088226, + "step": 5751 + }, + { + "epoch": 1.5274199973443103, + "grad_norm": 1.3214418017258782, + "learning_rate": 2.904911438697071e-06, + "loss": 0.2328556478023529, + "step": 5752 + }, + { + "epoch": 1.5276855663258533, + "grad_norm": 1.25396823790717, + "learning_rate": 2.901817965620871e-06, + "loss": 0.2316005825996399, + "step": 5753 + }, + { + "epoch": 1.5279511353073962, + "grad_norm": 1.2976508240318196, + "learning_rate": 2.8987258609904522e-06, + "loss": 0.2332756370306015, + "step": 5754 + }, + { + "epoch": 1.5282167042889392, + "grad_norm": 1.3432276903845415, + "learning_rate": 2.8956351254019355e-06, + "loss": 0.24855142831802368, + "step": 5755 + }, + { + "epoch": 1.5284822732704821, + "grad_norm": 1.2138875439685706, + "learning_rate": 2.8925457594511775e-06, + "loss": 0.18745368719100952, + "step": 5756 + }, + { + "epoch": 1.528747842252025, + "grad_norm": 1.877743895818308, + "learning_rate": 2.889457763733774e-06, + "loss": 0.22402942180633545, + "step": 5757 + }, + { + "epoch": 1.529013411233568, + "grad_norm": 1.292567134146249, + "learning_rate": 2.886371138845051e-06, + "loss": 0.2156108319759369, + "step": 5758 + }, + { + "epoch": 1.529278980215111, + "grad_norm": 1.2848231417758293, + "learning_rate": 2.883285885380076e-06, + "loss": 0.22866520285606384, + "step": 5759 + }, + { + "epoch": 1.529544549196654, + "grad_norm": 1.2907471990668473, + "learning_rate": 2.880202003933645e-06, + "loss": 0.2486938238143921, + "step": 5760 + }, + { + "epoch": 1.529810118178197, + "grad_norm": 1.34098643692872, + "learning_rate": 2.877119495100301e-06, + "loss": 0.2565295696258545, + "step": 5761 + }, + { + "epoch": 1.5300756871597399, + "grad_norm": 1.1480290388256142, + "learning_rate": 2.8740383594743116e-06, + "loss": 0.21510455012321472, + "step": 5762 + }, + { + "epoch": 1.5303412561412828, + "grad_norm": 1.266250058472157, + "learning_rate": 2.8709585976496825e-06, + "loss": 0.2122025489807129, + "step": 5763 + }, + { + "epoch": 1.5306068251228258, + "grad_norm": 1.3017513152107745, + "learning_rate": 2.8678802102201575e-06, + "loss": 0.24274399876594543, + "step": 5764 + }, + { + "epoch": 1.5308723941043687, + "grad_norm": 1.4573413266326471, + "learning_rate": 2.864803197779216e-06, + "loss": 0.22325341403484344, + "step": 5765 + }, + { + "epoch": 1.5311379630859117, + "grad_norm": 1.3303976558080437, + "learning_rate": 2.8617275609200625e-06, + "loss": 0.25205284357070923, + "step": 5766 + }, + { + "epoch": 1.5314035320674546, + "grad_norm": 1.2638986714524767, + "learning_rate": 2.8586533002356465e-06, + "loss": 0.2047557830810547, + "step": 5767 + }, + { + "epoch": 1.5316691010489976, + "grad_norm": 1.2195584514594966, + "learning_rate": 2.8555804163186508e-06, + "loss": 0.2166992425918579, + "step": 5768 + }, + { + "epoch": 1.5319346700305405, + "grad_norm": 1.2333416807696795, + "learning_rate": 2.8525089097614867e-06, + "loss": 0.26253193616867065, + "step": 5769 + }, + { + "epoch": 1.5322002390120835, + "grad_norm": 1.2030637435961495, + "learning_rate": 2.8494387811563108e-06, + "loss": 0.23307687044143677, + "step": 5770 + }, + { + "epoch": 1.5324658079936264, + "grad_norm": 1.2191481171426857, + "learning_rate": 2.8463700310950047e-06, + "loss": 0.22128549218177795, + "step": 5771 + }, + { + "epoch": 1.5327313769751694, + "grad_norm": 1.272136705974986, + "learning_rate": 2.8433026601691883e-06, + "loss": 0.21966281533241272, + "step": 5772 + }, + { + "epoch": 1.5329969459567123, + "grad_norm": 1.341088625881783, + "learning_rate": 2.840236668970213e-06, + "loss": 0.22869305312633514, + "step": 5773 + }, + { + "epoch": 1.5332625149382553, + "grad_norm": 1.2257027323986465, + "learning_rate": 2.837172058089167e-06, + "loss": 0.21431279182434082, + "step": 5774 + }, + { + "epoch": 1.5335280839197982, + "grad_norm": 1.3512853622822856, + "learning_rate": 2.8341088281168693e-06, + "loss": 0.24610282480716705, + "step": 5775 + }, + { + "epoch": 1.5337936529013412, + "grad_norm": 1.3400303957635655, + "learning_rate": 2.8310469796438767e-06, + "loss": 0.24414925277233124, + "step": 5776 + }, + { + "epoch": 1.5340592218828841, + "grad_norm": 1.3597459613858938, + "learning_rate": 2.8279865132604766e-06, + "loss": 0.2330513596534729, + "step": 5777 + }, + { + "epoch": 1.534324790864427, + "grad_norm": 1.2551411616890042, + "learning_rate": 2.8249274295566863e-06, + "loss": 0.23048308491706848, + "step": 5778 + }, + { + "epoch": 1.53459035984597, + "grad_norm": 1.2566974883874766, + "learning_rate": 2.821869729122273e-06, + "loss": 0.2411375492811203, + "step": 5779 + }, + { + "epoch": 1.534855928827513, + "grad_norm": 1.384873838300398, + "learning_rate": 2.818813412546715e-06, + "loss": 0.22985543310642242, + "step": 5780 + }, + { + "epoch": 1.535121497809056, + "grad_norm": 1.320574666083159, + "learning_rate": 2.815758480419235e-06, + "loss": 0.20867247879505157, + "step": 5781 + }, + { + "epoch": 1.5353870667905989, + "grad_norm": 2.0414068761810182, + "learning_rate": 2.8127049333287913e-06, + "loss": 0.26378586888313293, + "step": 5782 + }, + { + "epoch": 1.5356526357721418, + "grad_norm": 1.552041032509997, + "learning_rate": 2.8096527718640687e-06, + "loss": 0.2690306305885315, + "step": 5783 + }, + { + "epoch": 1.5359182047536848, + "grad_norm": 1.1602606034579108, + "learning_rate": 2.8066019966134907e-06, + "loss": 0.22226165235042572, + "step": 5784 + }, + { + "epoch": 1.5361837737352277, + "grad_norm": 1.2201060637055436, + "learning_rate": 2.803552608165209e-06, + "loss": 0.23370322585105896, + "step": 5785 + }, + { + "epoch": 1.5364493427167707, + "grad_norm": 1.3067141176486328, + "learning_rate": 2.8005046071071107e-06, + "loss": 0.26137909293174744, + "step": 5786 + }, + { + "epoch": 1.5367149116983136, + "grad_norm": 1.3588127622676833, + "learning_rate": 2.7974579940268096e-06, + "loss": 0.22630617022514343, + "step": 5787 + }, + { + "epoch": 1.5369804806798566, + "grad_norm": 1.2356618590652273, + "learning_rate": 2.7944127695116663e-06, + "loss": 0.22641140222549438, + "step": 5788 + }, + { + "epoch": 1.5372460496613995, + "grad_norm": 1.266648551925957, + "learning_rate": 2.791368934148757e-06, + "loss": 0.19647541642189026, + "step": 5789 + }, + { + "epoch": 1.5375116186429425, + "grad_norm": 1.212906210017999, + "learning_rate": 2.788326488524901e-06, + "loss": 0.22399532794952393, + "step": 5790 + }, + { + "epoch": 1.5377771876244855, + "grad_norm": 1.2862970389756843, + "learning_rate": 2.7852854332266434e-06, + "loss": 0.22549685835838318, + "step": 5791 + }, + { + "epoch": 1.5380427566060284, + "grad_norm": 1.168406987557996, + "learning_rate": 2.7822457688402637e-06, + "loss": 0.2129821628332138, + "step": 5792 + }, + { + "epoch": 1.5383083255875714, + "grad_norm": 1.2301298306170827, + "learning_rate": 2.7792074959517755e-06, + "loss": 0.25330638885498047, + "step": 5793 + }, + { + "epoch": 1.5385738945691143, + "grad_norm": 1.3148661968254225, + "learning_rate": 2.7761706151469204e-06, + "loss": 0.2413945198059082, + "step": 5794 + }, + { + "epoch": 1.5388394635506573, + "grad_norm": 1.2551515744231165, + "learning_rate": 2.773135127011174e-06, + "loss": 0.21930523216724396, + "step": 5795 + }, + { + "epoch": 1.5391050325322002, + "grad_norm": 1.2506577052831476, + "learning_rate": 2.7701010321297416e-06, + "loss": 0.25499141216278076, + "step": 5796 + }, + { + "epoch": 1.5393706015137432, + "grad_norm": 1.1567311669751301, + "learning_rate": 2.7670683310875613e-06, + "loss": 0.19475680589675903, + "step": 5797 + }, + { + "epoch": 1.5396361704952861, + "grad_norm": 1.3159422945276043, + "learning_rate": 2.7640370244693026e-06, + "loss": 0.22155825793743134, + "step": 5798 + }, + { + "epoch": 1.539901739476829, + "grad_norm": 1.1818601031709017, + "learning_rate": 2.761007112859365e-06, + "loss": 0.2146138846874237, + "step": 5799 + }, + { + "epoch": 1.540167308458372, + "grad_norm": 1.146035478957987, + "learning_rate": 2.7579785968418804e-06, + "loss": 0.22698411345481873, + "step": 5800 + }, + { + "epoch": 1.540432877439915, + "grad_norm": 1.2904710642906891, + "learning_rate": 2.75495147700071e-06, + "loss": 0.23889532685279846, + "step": 5801 + }, + { + "epoch": 1.540698446421458, + "grad_norm": 1.2353012354195356, + "learning_rate": 2.7519257539194488e-06, + "loss": 0.2514609694480896, + "step": 5802 + }, + { + "epoch": 1.5409640154030009, + "grad_norm": 1.2405153867334813, + "learning_rate": 2.7489014281814185e-06, + "loss": 0.22332100570201874, + "step": 5803 + }, + { + "epoch": 1.5412295843845438, + "grad_norm": 1.1768236369414826, + "learning_rate": 2.745878500369673e-06, + "loss": 0.21316683292388916, + "step": 5804 + }, + { + "epoch": 1.5414951533660868, + "grad_norm": 1.2446325297163028, + "learning_rate": 2.742856971066996e-06, + "loss": 0.2228018194437027, + "step": 5805 + }, + { + "epoch": 1.5417607223476297, + "grad_norm": 1.3243067869686356, + "learning_rate": 2.7398368408559084e-06, + "loss": 0.22217239439487457, + "step": 5806 + }, + { + "epoch": 1.5420262913291727, + "grad_norm": 1.331116794742511, + "learning_rate": 2.736818110318652e-06, + "loss": 0.21147233247756958, + "step": 5807 + }, + { + "epoch": 1.5422918603107156, + "grad_norm": 1.2851526092309566, + "learning_rate": 2.7338007800372024e-06, + "loss": 0.23844698071479797, + "step": 5808 + }, + { + "epoch": 1.5425574292922586, + "grad_norm": 1.3238454632326748, + "learning_rate": 2.7307848505932653e-06, + "loss": 0.2361423820257187, + "step": 5809 + }, + { + "epoch": 1.5428229982738015, + "grad_norm": 1.1977956377916248, + "learning_rate": 2.727770322568277e-06, + "loss": 0.21585656702518463, + "step": 5810 + }, + { + "epoch": 1.5430885672553445, + "grad_norm": 1.172295737533699, + "learning_rate": 2.724757196543403e-06, + "loss": 0.233969584107399, + "step": 5811 + }, + { + "epoch": 1.5433541362368874, + "grad_norm": 1.3309852612756656, + "learning_rate": 2.7217454730995363e-06, + "loss": 0.25040164589881897, + "step": 5812 + }, + { + "epoch": 1.5436197052184304, + "grad_norm": 1.5198455877328005, + "learning_rate": 2.7187351528173046e-06, + "loss": 0.25848713517189026, + "step": 5813 + }, + { + "epoch": 1.5438852741999733, + "grad_norm": 1.409976572144199, + "learning_rate": 2.715726236277061e-06, + "loss": 0.22255051136016846, + "step": 5814 + }, + { + "epoch": 1.5441508431815163, + "grad_norm": 1.1799889920310853, + "learning_rate": 2.7127187240588883e-06, + "loss": 0.1882694661617279, + "step": 5815 + }, + { + "epoch": 1.5444164121630592, + "grad_norm": 1.178741445510241, + "learning_rate": 2.7097126167426002e-06, + "loss": 0.20070400834083557, + "step": 5816 + }, + { + "epoch": 1.5446819811446022, + "grad_norm": 1.2959554460073714, + "learning_rate": 2.706707914907739e-06, + "loss": 0.25316092371940613, + "step": 5817 + }, + { + "epoch": 1.5449475501261452, + "grad_norm": 1.334925654094324, + "learning_rate": 2.703704619133576e-06, + "loss": 0.24665585160255432, + "step": 5818 + }, + { + "epoch": 1.545213119107688, + "grad_norm": 1.290703779819622, + "learning_rate": 2.7007027299991095e-06, + "loss": 0.24172846972942352, + "step": 5819 + }, + { + "epoch": 1.545478688089231, + "grad_norm": 1.2781945872260183, + "learning_rate": 2.6977022480830708e-06, + "loss": 0.2405129075050354, + "step": 5820 + }, + { + "epoch": 1.545744257070774, + "grad_norm": 1.075296946307477, + "learning_rate": 2.694703173963914e-06, + "loss": 0.19716276228427887, + "step": 5821 + }, + { + "epoch": 1.546009826052317, + "grad_norm": 1.1434881656258093, + "learning_rate": 2.6917055082198284e-06, + "loss": 0.20343703031539917, + "step": 5822 + }, + { + "epoch": 1.54627539503386, + "grad_norm": 1.5985849963050902, + "learning_rate": 2.688709251428725e-06, + "loss": 0.24382619559764862, + "step": 5823 + }, + { + "epoch": 1.5465409640154029, + "grad_norm": 1.7314575476063523, + "learning_rate": 2.6857144041682514e-06, + "loss": 0.2962399423122406, + "step": 5824 + }, + { + "epoch": 1.5468065329969458, + "grad_norm": 1.2699118659079873, + "learning_rate": 2.6827209670157774e-06, + "loss": 0.24034687876701355, + "step": 5825 + }, + { + "epoch": 1.5470721019784888, + "grad_norm": 1.3757632125147359, + "learning_rate": 2.6797289405484016e-06, + "loss": 0.2575085163116455, + "step": 5826 + }, + { + "epoch": 1.5473376709600317, + "grad_norm": 1.556424910652697, + "learning_rate": 2.6767383253429515e-06, + "loss": 0.2586629092693329, + "step": 5827 + }, + { + "epoch": 1.5476032399415747, + "grad_norm": 1.096117045688234, + "learning_rate": 2.6737491219759815e-06, + "loss": 0.18447624146938324, + "step": 5828 + }, + { + "epoch": 1.5478688089231176, + "grad_norm": 1.3930188378643134, + "learning_rate": 2.670761331023779e-06, + "loss": 0.244853213429451, + "step": 5829 + }, + { + "epoch": 1.5481343779046606, + "grad_norm": 1.3163693020327074, + "learning_rate": 2.66777495306235e-06, + "loss": 0.24641919136047363, + "step": 5830 + }, + { + "epoch": 1.5483999468862035, + "grad_norm": 1.4086337954424433, + "learning_rate": 2.6647899886674323e-06, + "loss": 0.2364550232887268, + "step": 5831 + }, + { + "epoch": 1.5486655158677467, + "grad_norm": 1.1695450852938096, + "learning_rate": 2.6618064384144925e-06, + "loss": 0.17760278284549713, + "step": 5832 + }, + { + "epoch": 1.5489310848492897, + "grad_norm": 1.1988872335295608, + "learning_rate": 2.6588243028787274e-06, + "loss": 0.18571510910987854, + "step": 5833 + }, + { + "epoch": 1.5491966538308326, + "grad_norm": 1.2537289047953852, + "learning_rate": 2.655843582635057e-06, + "loss": 0.23693162202835083, + "step": 5834 + }, + { + "epoch": 1.5494622228123756, + "grad_norm": 1.3552352092705502, + "learning_rate": 2.652864278258126e-06, + "loss": 0.26481011509895325, + "step": 5835 + }, + { + "epoch": 1.5497277917939185, + "grad_norm": 1.4182429828127188, + "learning_rate": 2.6498863903223115e-06, + "loss": 0.23405003547668457, + "step": 5836 + }, + { + "epoch": 1.5499933607754615, + "grad_norm": 2.5576796684815686, + "learning_rate": 2.6469099194017144e-06, + "loss": 0.20662814378738403, + "step": 5837 + }, + { + "epoch": 1.5502589297570044, + "grad_norm": 1.3124069479853646, + "learning_rate": 2.6439348660701634e-06, + "loss": 0.2722313404083252, + "step": 5838 + }, + { + "epoch": 1.5505244987385474, + "grad_norm": 1.3906100112719377, + "learning_rate": 2.6409612309012134e-06, + "loss": 0.2288864552974701, + "step": 5839 + }, + { + "epoch": 1.5507900677200903, + "grad_norm": 1.322570753297788, + "learning_rate": 2.6379890144681464e-06, + "loss": 0.2286190539598465, + "step": 5840 + }, + { + "epoch": 1.5510556367016333, + "grad_norm": 1.2231420705695173, + "learning_rate": 2.6350182173439666e-06, + "loss": 0.22478938102722168, + "step": 5841 + }, + { + "epoch": 1.5513212056831762, + "grad_norm": 1.415848841276022, + "learning_rate": 2.6320488401014166e-06, + "loss": 0.2520615756511688, + "step": 5842 + }, + { + "epoch": 1.5515867746647192, + "grad_norm": 1.3741284890856262, + "learning_rate": 2.629080883312952e-06, + "loss": 0.2121289074420929, + "step": 5843 + }, + { + "epoch": 1.5518523436462621, + "grad_norm": 1.3092311759839703, + "learning_rate": 2.6261143475507656e-06, + "loss": 0.2252352237701416, + "step": 5844 + }, + { + "epoch": 1.552117912627805, + "grad_norm": 1.191285245143269, + "learning_rate": 2.6231492333867626e-06, + "loss": 0.21188892424106598, + "step": 5845 + }, + { + "epoch": 1.552383481609348, + "grad_norm": 1.1276138403597054, + "learning_rate": 2.6201855413925857e-06, + "loss": 0.21534699201583862, + "step": 5846 + }, + { + "epoch": 1.552649050590891, + "grad_norm": 1.2849885490704696, + "learning_rate": 2.6172232721395998e-06, + "loss": 0.21781614422798157, + "step": 5847 + }, + { + "epoch": 1.552914619572434, + "grad_norm": 1.3317886914724781, + "learning_rate": 2.6142624261988947e-06, + "loss": 0.2476508915424347, + "step": 5848 + }, + { + "epoch": 1.5531801885539769, + "grad_norm": 1.3439658215829489, + "learning_rate": 2.611303004141287e-06, + "loss": 0.2692151665687561, + "step": 5849 + }, + { + "epoch": 1.5534457575355198, + "grad_norm": 1.2839746536411722, + "learning_rate": 2.6083450065373163e-06, + "loss": 0.24868687987327576, + "step": 5850 + }, + { + "epoch": 1.5537113265170628, + "grad_norm": 1.2704813852574235, + "learning_rate": 2.6053884339572543e-06, + "loss": 0.24215853214263916, + "step": 5851 + }, + { + "epoch": 1.5539768954986057, + "grad_norm": 1.2100819665594098, + "learning_rate": 2.602433286971091e-06, + "loss": 0.2157444804906845, + "step": 5852 + }, + { + "epoch": 1.5542424644801487, + "grad_norm": 1.369237575424674, + "learning_rate": 2.599479566148544e-06, + "loss": 0.22152379155158997, + "step": 5853 + }, + { + "epoch": 1.5545080334616916, + "grad_norm": 1.1930490692336162, + "learning_rate": 2.596527272059055e-06, + "loss": 0.2278299182653427, + "step": 5854 + }, + { + "epoch": 1.5547736024432346, + "grad_norm": 1.406485645097326, + "learning_rate": 2.593576405271793e-06, + "loss": 0.23183950781822205, + "step": 5855 + }, + { + "epoch": 1.5550391714247775, + "grad_norm": 1.209726796816396, + "learning_rate": 2.5906269663556484e-06, + "loss": 0.22167566418647766, + "step": 5856 + }, + { + "epoch": 1.5553047404063205, + "grad_norm": 1.1790986825354977, + "learning_rate": 2.5876789558792403e-06, + "loss": 0.24111366271972656, + "step": 5857 + }, + { + "epoch": 1.5555703093878634, + "grad_norm": 1.1706391072024214, + "learning_rate": 2.5847323744109087e-06, + "loss": 0.2090388983488083, + "step": 5858 + }, + { + "epoch": 1.5558358783694064, + "grad_norm": 1.2588154614837785, + "learning_rate": 2.58178722251872e-06, + "loss": 0.2087189108133316, + "step": 5859 + }, + { + "epoch": 1.5561014473509496, + "grad_norm": 1.300626487965864, + "learning_rate": 2.578843500770465e-06, + "loss": 0.2277342677116394, + "step": 5860 + }, + { + "epoch": 1.5563670163324925, + "grad_norm": 1.3517116904487896, + "learning_rate": 2.57590120973366e-06, + "loss": 0.2204241305589676, + "step": 5861 + }, + { + "epoch": 1.5566325853140355, + "grad_norm": 1.213807933631201, + "learning_rate": 2.5729603499755416e-06, + "loss": 0.2138606607913971, + "step": 5862 + }, + { + "epoch": 1.5568981542955784, + "grad_norm": 1.4669648743657906, + "learning_rate": 2.5700209220630733e-06, + "loss": 0.21257862448692322, + "step": 5863 + }, + { + "epoch": 1.5571637232771214, + "grad_norm": 1.2314998246120414, + "learning_rate": 2.5670829265629437e-06, + "loss": 0.20991909503936768, + "step": 5864 + }, + { + "epoch": 1.5574292922586643, + "grad_norm": 1.294980658460416, + "learning_rate": 2.5641463640415633e-06, + "loss": 0.23745422065258026, + "step": 5865 + }, + { + "epoch": 1.5576948612402073, + "grad_norm": 1.2425796180120088, + "learning_rate": 2.561211235065065e-06, + "loss": 0.21482989192008972, + "step": 5866 + }, + { + "epoch": 1.5579604302217502, + "grad_norm": 1.008120888370748, + "learning_rate": 2.558277540199309e-06, + "loss": 0.17866572737693787, + "step": 5867 + }, + { + "epoch": 1.5582259992032932, + "grad_norm": 1.2966262005019353, + "learning_rate": 2.555345280009872e-06, + "loss": 0.223822683095932, + "step": 5868 + }, + { + "epoch": 1.5584915681848361, + "grad_norm": 1.339606961190666, + "learning_rate": 2.552414455062068e-06, + "loss": 0.2293519228696823, + "step": 5869 + }, + { + "epoch": 1.558757137166379, + "grad_norm": 1.3023504432012787, + "learning_rate": 2.5494850659209203e-06, + "loss": 0.2556726038455963, + "step": 5870 + }, + { + "epoch": 1.559022706147922, + "grad_norm": 1.255574464472328, + "learning_rate": 2.546557113151181e-06, + "loss": 0.26891303062438965, + "step": 5871 + }, + { + "epoch": 1.559288275129465, + "grad_norm": 1.1754509839553133, + "learning_rate": 2.5436305973173257e-06, + "loss": 0.19510813057422638, + "step": 5872 + }, + { + "epoch": 1.559553844111008, + "grad_norm": 1.2819966401856495, + "learning_rate": 2.5407055189835518e-06, + "loss": 0.22906547784805298, + "step": 5873 + }, + { + "epoch": 1.559819413092551, + "grad_norm": 1.3121165067922245, + "learning_rate": 2.5377818787137788e-06, + "loss": 0.25452786684036255, + "step": 5874 + }, + { + "epoch": 1.5600849820740939, + "grad_norm": 1.2743199898597464, + "learning_rate": 2.5348596770716503e-06, + "loss": 0.205597922205925, + "step": 5875 + }, + { + "epoch": 1.5603505510556368, + "grad_norm": 1.3020148941868286, + "learning_rate": 2.5319389146205344e-06, + "loss": 0.24009352922439575, + "step": 5876 + }, + { + "epoch": 1.5606161200371798, + "grad_norm": 1.433983972963341, + "learning_rate": 2.5290195919235173e-06, + "loss": 0.23381268978118896, + "step": 5877 + }, + { + "epoch": 1.5608816890187227, + "grad_norm": 1.1554092234943296, + "learning_rate": 2.52610170954341e-06, + "loss": 0.2267276644706726, + "step": 5878 + }, + { + "epoch": 1.5611472580002657, + "grad_norm": 1.2742422977156036, + "learning_rate": 2.5231852680427482e-06, + "loss": 0.24330289661884308, + "step": 5879 + }, + { + "epoch": 1.5614128269818086, + "grad_norm": 1.2802855767249914, + "learning_rate": 2.5202702679837852e-06, + "loss": 0.24877145886421204, + "step": 5880 + }, + { + "epoch": 1.5616783959633516, + "grad_norm": 1.1377670913842177, + "learning_rate": 2.5173567099285e-06, + "loss": 0.20410388708114624, + "step": 5881 + }, + { + "epoch": 1.5619439649448945, + "grad_norm": 1.2268765869469427, + "learning_rate": 2.514444594438591e-06, + "loss": 0.21524877846240997, + "step": 5882 + }, + { + "epoch": 1.5622095339264375, + "grad_norm": 1.1986269244208958, + "learning_rate": 2.5115339220754796e-06, + "loss": 0.18785043060779572, + "step": 5883 + }, + { + "epoch": 1.5624751029079804, + "grad_norm": 1.3539528047627718, + "learning_rate": 2.5086246934003113e-06, + "loss": 0.21200208365917206, + "step": 5884 + }, + { + "epoch": 1.5627406718895234, + "grad_norm": 1.6373531833898813, + "learning_rate": 2.5057169089739485e-06, + "loss": 0.20752021670341492, + "step": 5885 + }, + { + "epoch": 1.5630062408710663, + "grad_norm": 1.1717071963534185, + "learning_rate": 2.502810569356976e-06, + "loss": 0.21395736932754517, + "step": 5886 + }, + { + "epoch": 1.5632718098526093, + "grad_norm": 1.2664848714228343, + "learning_rate": 2.499905675109707e-06, + "loss": 0.26949262619018555, + "step": 5887 + }, + { + "epoch": 1.5635373788341522, + "grad_norm": 1.5283985889023297, + "learning_rate": 2.497002226792169e-06, + "loss": 0.2309839278459549, + "step": 5888 + }, + { + "epoch": 1.5638029478156952, + "grad_norm": 1.2596143819163301, + "learning_rate": 2.4941002249641123e-06, + "loss": 0.24415400624275208, + "step": 5889 + }, + { + "epoch": 1.5640685167972381, + "grad_norm": 1.3074402223027564, + "learning_rate": 2.4911996701850083e-06, + "loss": 0.23493322730064392, + "step": 5890 + }, + { + "epoch": 1.564334085778781, + "grad_norm": 1.260748243658743, + "learning_rate": 2.488300563014049e-06, + "loss": 0.23824438452720642, + "step": 5891 + }, + { + "epoch": 1.564599654760324, + "grad_norm": 1.2534870916273309, + "learning_rate": 2.4854029040101503e-06, + "loss": 0.2523414194583893, + "step": 5892 + }, + { + "epoch": 1.564865223741867, + "grad_norm": 1.2879106186872462, + "learning_rate": 2.482506693731944e-06, + "loss": 0.21360887587070465, + "step": 5893 + }, + { + "epoch": 1.56513079272341, + "grad_norm": 1.1951820042572139, + "learning_rate": 2.47961193273779e-06, + "loss": 0.21182934939861298, + "step": 5894 + }, + { + "epoch": 1.5653963617049529, + "grad_norm": 1.4293886797193323, + "learning_rate": 2.4767186215857542e-06, + "loss": 0.23104771971702576, + "step": 5895 + }, + { + "epoch": 1.5656619306864958, + "grad_norm": 1.2606491547398977, + "learning_rate": 2.473826760833643e-06, + "loss": 0.22297397255897522, + "step": 5896 + }, + { + "epoch": 1.5659274996680388, + "grad_norm": 1.176802218612286, + "learning_rate": 2.4709363510389684e-06, + "loss": 0.21597865223884583, + "step": 5897 + }, + { + "epoch": 1.5661930686495817, + "grad_norm": 1.4303555951561693, + "learning_rate": 2.468047392758969e-06, + "loss": 0.27620527148246765, + "step": 5898 + }, + { + "epoch": 1.5664586376311247, + "grad_norm": 1.373809252877093, + "learning_rate": 2.465159886550601e-06, + "loss": 0.25262463092803955, + "step": 5899 + }, + { + "epoch": 1.5667242066126676, + "grad_norm": 1.376719462816966, + "learning_rate": 2.462273832970542e-06, + "loss": 0.2729034125804901, + "step": 5900 + }, + { + "epoch": 1.5669897755942106, + "grad_norm": 1.3637563490895455, + "learning_rate": 2.459389232575188e-06, + "loss": 0.2313854992389679, + "step": 5901 + }, + { + "epoch": 1.5672553445757536, + "grad_norm": 1.3202318144066494, + "learning_rate": 2.456506085920658e-06, + "loss": 0.22513791918754578, + "step": 5902 + }, + { + "epoch": 1.5675209135572965, + "grad_norm": 1.3152362934287614, + "learning_rate": 2.4536243935627856e-06, + "loss": 0.2658824026584625, + "step": 5903 + }, + { + "epoch": 1.5677864825388395, + "grad_norm": 1.1721087348112986, + "learning_rate": 2.4507441560571275e-06, + "loss": 0.21781010925769806, + "step": 5904 + }, + { + "epoch": 1.5680520515203824, + "grad_norm": 1.3393030222309363, + "learning_rate": 2.4478653739589632e-06, + "loss": 0.21047937870025635, + "step": 5905 + }, + { + "epoch": 1.5683176205019254, + "grad_norm": 1.2196979825563006, + "learning_rate": 2.4449880478232858e-06, + "loss": 0.21674057841300964, + "step": 5906 + }, + { + "epoch": 1.5685831894834683, + "grad_norm": 1.200112520021674, + "learning_rate": 2.44211217820481e-06, + "loss": 0.22062627971172333, + "step": 5907 + }, + { + "epoch": 1.5688487584650113, + "grad_norm": 1.3158234051142574, + "learning_rate": 2.439237765657968e-06, + "loss": 0.22440886497497559, + "step": 5908 + }, + { + "epoch": 1.5691143274465542, + "grad_norm": 1.129873307165861, + "learning_rate": 2.4363648107369175e-06, + "loss": 0.21888123452663422, + "step": 5909 + }, + { + "epoch": 1.5693798964280972, + "grad_norm": 1.2586007199788052, + "learning_rate": 2.433493313995524e-06, + "loss": 0.23104462027549744, + "step": 5910 + }, + { + "epoch": 1.5696454654096401, + "grad_norm": 1.427902558182486, + "learning_rate": 2.4306232759873803e-06, + "loss": 0.23032237589359283, + "step": 5911 + }, + { + "epoch": 1.569911034391183, + "grad_norm": 1.3780752776280365, + "learning_rate": 2.4277546972657974e-06, + "loss": 0.2588527202606201, + "step": 5912 + }, + { + "epoch": 1.570176603372726, + "grad_norm": 1.4647042397629928, + "learning_rate": 2.424887578383799e-06, + "loss": 0.2845698893070221, + "step": 5913 + }, + { + "epoch": 1.570442172354269, + "grad_norm": 1.338246310760916, + "learning_rate": 2.4220219198941384e-06, + "loss": 0.23010894656181335, + "step": 5914 + }, + { + "epoch": 1.570707741335812, + "grad_norm": 1.3783426416349442, + "learning_rate": 2.419157722349278e-06, + "loss": 0.2623594403266907, + "step": 5915 + }, + { + "epoch": 1.5709733103173549, + "grad_norm": 1.2349976574308903, + "learning_rate": 2.416294986301401e-06, + "loss": 0.2107153981924057, + "step": 5916 + }, + { + "epoch": 1.5712388792988978, + "grad_norm": 1.3633626366853218, + "learning_rate": 2.413433712302409e-06, + "loss": 0.2115003615617752, + "step": 5917 + }, + { + "epoch": 1.5715044482804408, + "grad_norm": 1.3738602333573011, + "learning_rate": 2.410573900903921e-06, + "loss": 0.22406762838363647, + "step": 5918 + }, + { + "epoch": 1.5717700172619837, + "grad_norm": 1.3017270649216575, + "learning_rate": 2.407715552657277e-06, + "loss": 0.24878525733947754, + "step": 5919 + }, + { + "epoch": 1.5720355862435267, + "grad_norm": 1.5003273963811, + "learning_rate": 2.404858668113532e-06, + "loss": 0.24546805024147034, + "step": 5920 + }, + { + "epoch": 1.5723011552250696, + "grad_norm": 1.5650848412040055, + "learning_rate": 2.402003247823459e-06, + "loss": 0.23430263996124268, + "step": 5921 + }, + { + "epoch": 1.5725667242066126, + "grad_norm": 1.3939131226044492, + "learning_rate": 2.399149292337547e-06, + "loss": 0.26935267448425293, + "step": 5922 + }, + { + "epoch": 1.5728322931881555, + "grad_norm": 1.1554138984093538, + "learning_rate": 2.3962968022060097e-06, + "loss": 0.21104472875595093, + "step": 5923 + }, + { + "epoch": 1.5730978621696985, + "grad_norm": 1.147816084956367, + "learning_rate": 2.3934457779787755e-06, + "loss": 0.17162750661373138, + "step": 5924 + }, + { + "epoch": 1.5733634311512414, + "grad_norm": 1.2036391990293953, + "learning_rate": 2.390596220205481e-06, + "loss": 0.22233474254608154, + "step": 5925 + }, + { + "epoch": 1.5736290001327844, + "grad_norm": 1.456348691360017, + "learning_rate": 2.387748129435491e-06, + "loss": 0.2326992005109787, + "step": 5926 + }, + { + "epoch": 1.5738945691143273, + "grad_norm": 1.2656294085970974, + "learning_rate": 2.3849015062178835e-06, + "loss": 0.245779350399971, + "step": 5927 + }, + { + "epoch": 1.5741601380958703, + "grad_norm": 1.2198185109849795, + "learning_rate": 2.382056351101454e-06, + "loss": 0.24269379675388336, + "step": 5928 + }, + { + "epoch": 1.5744257070774133, + "grad_norm": 1.2241918308854736, + "learning_rate": 2.3792126646347138e-06, + "loss": 0.23644019663333893, + "step": 5929 + }, + { + "epoch": 1.5746912760589562, + "grad_norm": 1.2680435600362268, + "learning_rate": 2.376370447365893e-06, + "loss": 0.254330575466156, + "step": 5930 + }, + { + "epoch": 1.5749568450404992, + "grad_norm": 1.4146409212378834, + "learning_rate": 2.373529699842936e-06, + "loss": 0.2728506922721863, + "step": 5931 + }, + { + "epoch": 1.575222414022042, + "grad_norm": 1.3627178065769006, + "learning_rate": 2.3706904226135087e-06, + "loss": 0.23671439290046692, + "step": 5932 + }, + { + "epoch": 1.575487983003585, + "grad_norm": 1.409873356618632, + "learning_rate": 2.367852616224989e-06, + "loss": 0.24205748736858368, + "step": 5933 + }, + { + "epoch": 1.575753551985128, + "grad_norm": 1.2728197754861583, + "learning_rate": 2.3650162812244725e-06, + "loss": 0.1915436089038849, + "step": 5934 + }, + { + "epoch": 1.576019120966671, + "grad_norm": 1.2091326643578577, + "learning_rate": 2.3621814181587697e-06, + "loss": 0.23453299701213837, + "step": 5935 + }, + { + "epoch": 1.576284689948214, + "grad_norm": 1.3060415308267561, + "learning_rate": 2.3593480275744106e-06, + "loss": 0.24066327512264252, + "step": 5936 + }, + { + "epoch": 1.5765502589297569, + "grad_norm": 1.246429396187596, + "learning_rate": 2.356516110017639e-06, + "loss": 0.22510530054569244, + "step": 5937 + }, + { + "epoch": 1.5768158279112998, + "grad_norm": 1.2889494549478113, + "learning_rate": 2.3536856660344144e-06, + "loss": 0.22967353463172913, + "step": 5938 + }, + { + "epoch": 1.5770813968928428, + "grad_norm": 1.2404139099674472, + "learning_rate": 2.3508566961704127e-06, + "loss": 0.2299107313156128, + "step": 5939 + }, + { + "epoch": 1.5773469658743857, + "grad_norm": 1.2560783974284127, + "learning_rate": 2.3480292009710282e-06, + "loss": 0.23418918251991272, + "step": 5940 + }, + { + "epoch": 1.5776125348559287, + "grad_norm": 1.2857056044544095, + "learning_rate": 2.3452031809813657e-06, + "loss": 0.26528510451316833, + "step": 5941 + }, + { + "epoch": 1.5778781038374716, + "grad_norm": 1.1247059842406957, + "learning_rate": 2.342378636746251e-06, + "loss": 0.21878717839717865, + "step": 5942 + }, + { + "epoch": 1.5781436728190146, + "grad_norm": 1.1637472196421235, + "learning_rate": 2.339555568810221e-06, + "loss": 0.19697530567646027, + "step": 5943 + }, + { + "epoch": 1.5784092418005577, + "grad_norm": 1.3422665805434115, + "learning_rate": 2.3367339777175313e-06, + "loss": 0.24812257289886475, + "step": 5944 + }, + { + "epoch": 1.5786748107821007, + "grad_norm": 1.3285793357341238, + "learning_rate": 2.3339138640121504e-06, + "loss": 0.27651745080947876, + "step": 5945 + }, + { + "epoch": 1.5789403797636437, + "grad_norm": 1.308131821171991, + "learning_rate": 2.3310952282377643e-06, + "loss": 0.2651634216308594, + "step": 5946 + }, + { + "epoch": 1.5792059487451866, + "grad_norm": 1.3163549633798883, + "learning_rate": 2.328278070937772e-06, + "loss": 0.23799028992652893, + "step": 5947 + }, + { + "epoch": 1.5794715177267296, + "grad_norm": 1.4229706240812914, + "learning_rate": 2.3254623926552867e-06, + "loss": 0.2528802752494812, + "step": 5948 + }, + { + "epoch": 1.5797370867082725, + "grad_norm": 1.2071666314804592, + "learning_rate": 2.322648193933137e-06, + "loss": 0.23819346725940704, + "step": 5949 + }, + { + "epoch": 1.5800026556898155, + "grad_norm": 1.2694222057013376, + "learning_rate": 2.319835475313873e-06, + "loss": 0.2510845959186554, + "step": 5950 + }, + { + "epoch": 1.5802682246713584, + "grad_norm": 1.0731141255180743, + "learning_rate": 2.31702423733975e-06, + "loss": 0.20156612992286682, + "step": 5951 + }, + { + "epoch": 1.5805337936529014, + "grad_norm": 1.320010192923148, + "learning_rate": 2.3142144805527413e-06, + "loss": 0.23375174403190613, + "step": 5952 + }, + { + "epoch": 1.5807993626344443, + "grad_norm": 1.187058092026163, + "learning_rate": 2.311406205494535e-06, + "loss": 0.2378280758857727, + "step": 5953 + }, + { + "epoch": 1.5810649316159873, + "grad_norm": 1.4550533599389408, + "learning_rate": 2.308599412706535e-06, + "loss": 0.2087683081626892, + "step": 5954 + }, + { + "epoch": 1.5813305005975302, + "grad_norm": 1.2856302099767283, + "learning_rate": 2.3057941027298557e-06, + "loss": 0.2228693962097168, + "step": 5955 + }, + { + "epoch": 1.5815960695790732, + "grad_norm": 1.4738789364963756, + "learning_rate": 2.302990276105329e-06, + "loss": 0.22694727778434753, + "step": 5956 + }, + { + "epoch": 1.5818616385606161, + "grad_norm": 1.2486840544551192, + "learning_rate": 2.300187933373499e-06, + "loss": 0.22996942698955536, + "step": 5957 + }, + { + "epoch": 1.582127207542159, + "grad_norm": 1.331719034245123, + "learning_rate": 2.2973870750746253e-06, + "loss": 0.2440253496170044, + "step": 5958 + }, + { + "epoch": 1.582392776523702, + "grad_norm": 1.3266637203740035, + "learning_rate": 2.2945877017486782e-06, + "loss": 0.2507309019565582, + "step": 5959 + }, + { + "epoch": 1.582658345505245, + "grad_norm": 2.8683041985739677, + "learning_rate": 2.2917898139353467e-06, + "loss": 0.24790918827056885, + "step": 5960 + }, + { + "epoch": 1.582923914486788, + "grad_norm": 1.4168604850261965, + "learning_rate": 2.2889934121740287e-06, + "loss": 0.22106975317001343, + "step": 5961 + }, + { + "epoch": 1.5831894834683309, + "grad_norm": 1.5726662217531726, + "learning_rate": 2.2861984970038385e-06, + "loss": 0.2410939633846283, + "step": 5962 + }, + { + "epoch": 1.5834550524498738, + "grad_norm": 1.1559016560001114, + "learning_rate": 2.283405068963601e-06, + "loss": 0.22821484506130219, + "step": 5963 + }, + { + "epoch": 1.5837206214314168, + "grad_norm": 1.2324685594628142, + "learning_rate": 2.2806131285918588e-06, + "loss": 0.21425281465053558, + "step": 5964 + }, + { + "epoch": 1.5839861904129597, + "grad_norm": 1.2434376170807215, + "learning_rate": 2.277822676426863e-06, + "loss": 0.22428902983665466, + "step": 5965 + }, + { + "epoch": 1.5842517593945027, + "grad_norm": 1.4592375031786005, + "learning_rate": 2.27503371300658e-06, + "loss": 0.2986769676208496, + "step": 5966 + }, + { + "epoch": 1.5845173283760456, + "grad_norm": 1.4384957681975041, + "learning_rate": 2.272246238868687e-06, + "loss": 0.24697065353393555, + "step": 5967 + }, + { + "epoch": 1.5847828973575886, + "grad_norm": 1.3175254870878064, + "learning_rate": 2.269460254550583e-06, + "loss": 0.23725461959838867, + "step": 5968 + }, + { + "epoch": 1.5850484663391315, + "grad_norm": 1.5010497616053564, + "learning_rate": 2.2666757605893664e-06, + "loss": 0.2661248445510864, + "step": 5969 + }, + { + "epoch": 1.5853140353206745, + "grad_norm": 1.2390278830143426, + "learning_rate": 2.263892757521858e-06, + "loss": 0.23328733444213867, + "step": 5970 + }, + { + "epoch": 1.5855796043022174, + "grad_norm": 1.2547818797647754, + "learning_rate": 2.2611112458845873e-06, + "loss": 0.22886580228805542, + "step": 5971 + }, + { + "epoch": 1.5858451732837606, + "grad_norm": 1.1882681583888588, + "learning_rate": 2.2583312262137966e-06, + "loss": 0.25051698088645935, + "step": 5972 + }, + { + "epoch": 1.5861107422653036, + "grad_norm": 1.2988472953319592, + "learning_rate": 2.2555526990454413e-06, + "loss": 0.2400815784931183, + "step": 5973 + }, + { + "epoch": 1.5863763112468465, + "grad_norm": 1.1598677166947555, + "learning_rate": 2.2527756649151912e-06, + "loss": 0.2212347537279129, + "step": 5974 + }, + { + "epoch": 1.5866418802283895, + "grad_norm": 1.355013417523964, + "learning_rate": 2.2500001243584204e-06, + "loss": 0.3002026379108429, + "step": 5975 + }, + { + "epoch": 1.5869074492099324, + "grad_norm": 1.1899701199057289, + "learning_rate": 2.2472260779102185e-06, + "loss": 0.19813531637191772, + "step": 5976 + }, + { + "epoch": 1.5871730181914754, + "grad_norm": 1.2404972223723234, + "learning_rate": 2.2444535261053968e-06, + "loss": 0.2233983874320984, + "step": 5977 + }, + { + "epoch": 1.5874385871730183, + "grad_norm": 1.417840431772693, + "learning_rate": 2.2416824694784676e-06, + "loss": 0.26059988141059875, + "step": 5978 + }, + { + "epoch": 1.5877041561545613, + "grad_norm": 1.2961846276739968, + "learning_rate": 2.2389129085636573e-06, + "loss": 0.23058606684207916, + "step": 5979 + }, + { + "epoch": 1.5879697251361042, + "grad_norm": 1.3397298592095879, + "learning_rate": 2.236144843894904e-06, + "loss": 0.2414383739233017, + "step": 5980 + }, + { + "epoch": 1.5882352941176472, + "grad_norm": 1.2013757541083616, + "learning_rate": 2.23337827600586e-06, + "loss": 0.21688291430473328, + "step": 5981 + }, + { + "epoch": 1.5885008630991901, + "grad_norm": 1.2977536190104755, + "learning_rate": 2.2306132054298847e-06, + "loss": 0.24297408759593964, + "step": 5982 + }, + { + "epoch": 1.588766432080733, + "grad_norm": 1.449081017944755, + "learning_rate": 2.227849632700052e-06, + "loss": 0.2655821442604065, + "step": 5983 + }, + { + "epoch": 1.589032001062276, + "grad_norm": 1.2305338711146763, + "learning_rate": 2.225087558349146e-06, + "loss": 0.20545080304145813, + "step": 5984 + }, + { + "epoch": 1.589297570043819, + "grad_norm": 1.470607418959754, + "learning_rate": 2.2223269829096593e-06, + "loss": 0.24151475727558136, + "step": 5985 + }, + { + "epoch": 1.589563139025362, + "grad_norm": 1.2194062039730535, + "learning_rate": 2.2195679069138043e-06, + "loss": 0.2294519543647766, + "step": 5986 + }, + { + "epoch": 1.589828708006905, + "grad_norm": 1.3319096935394759, + "learning_rate": 2.2168103308934953e-06, + "loss": 0.2041824758052826, + "step": 5987 + }, + { + "epoch": 1.5900942769884479, + "grad_norm": 1.181577384258167, + "learning_rate": 2.21405425538036e-06, + "loss": 0.1856188029050827, + "step": 5988 + }, + { + "epoch": 1.5903598459699908, + "grad_norm": 1.2644853901124522, + "learning_rate": 2.2112996809057395e-06, + "loss": 0.24337685108184814, + "step": 5989 + }, + { + "epoch": 1.5906254149515338, + "grad_norm": 1.1714048449744126, + "learning_rate": 2.20854660800068e-06, + "loss": 0.2201787382364273, + "step": 5990 + }, + { + "epoch": 1.5908909839330767, + "grad_norm": 1.322531300676563, + "learning_rate": 2.2057950371959427e-06, + "loss": 0.23505619168281555, + "step": 5991 + }, + { + "epoch": 1.5911565529146197, + "grad_norm": 1.4085526679551708, + "learning_rate": 2.203044969021997e-06, + "loss": 0.19528049230575562, + "step": 5992 + }, + { + "epoch": 1.5914221218961626, + "grad_norm": 1.2299879902160842, + "learning_rate": 2.2002964040090256e-06, + "loss": 0.22281290590763092, + "step": 5993 + }, + { + "epoch": 1.5916876908777056, + "grad_norm": 1.310771483519368, + "learning_rate": 2.1975493426869155e-06, + "loss": 0.19606761634349823, + "step": 5994 + }, + { + "epoch": 1.5919532598592485, + "grad_norm": 1.2570005315725017, + "learning_rate": 2.1948037855852733e-06, + "loss": 0.22559323906898499, + "step": 5995 + }, + { + "epoch": 1.5922188288407915, + "grad_norm": 1.2326545276620708, + "learning_rate": 2.192059733233408e-06, + "loss": 0.20417393743991852, + "step": 5996 + }, + { + "epoch": 1.5924843978223344, + "grad_norm": 1.351064737074131, + "learning_rate": 2.18931718616034e-06, + "loss": 0.2579960525035858, + "step": 5997 + }, + { + "epoch": 1.5927499668038774, + "grad_norm": 1.2980140620122547, + "learning_rate": 2.1865761448948e-06, + "loss": 0.23339781165122986, + "step": 5998 + }, + { + "epoch": 1.5930155357854203, + "grad_norm": 1.2588476812522966, + "learning_rate": 2.1838366099652274e-06, + "loss": 0.2368197739124298, + "step": 5999 + }, + { + "epoch": 1.5932811047669633, + "grad_norm": 1.2980274155826699, + "learning_rate": 2.1810985818997743e-06, + "loss": 0.2225847840309143, + "step": 6000 + }, + { + "epoch": 1.5935466737485062, + "grad_norm": 1.3094945647641514, + "learning_rate": 2.1783620612263e-06, + "loss": 0.2426701784133911, + "step": 6001 + }, + { + "epoch": 1.5938122427300492, + "grad_norm": 1.284834767608695, + "learning_rate": 2.175627048472372e-06, + "loss": 0.23647268116474152, + "step": 6002 + }, + { + "epoch": 1.5940778117115921, + "grad_norm": 1.2525920428706867, + "learning_rate": 2.1728935441652687e-06, + "loss": 0.22843337059020996, + "step": 6003 + }, + { + "epoch": 1.594343380693135, + "grad_norm": 1.1786632019087344, + "learning_rate": 2.1701615488319785e-06, + "loss": 0.21524465084075928, + "step": 6004 + }, + { + "epoch": 1.594608949674678, + "grad_norm": 1.225831889373155, + "learning_rate": 2.167431062999197e-06, + "loss": 0.2160830795764923, + "step": 6005 + }, + { + "epoch": 1.594874518656221, + "grad_norm": 1.238709201727011, + "learning_rate": 2.1647020871933288e-06, + "loss": 0.2321595996618271, + "step": 6006 + }, + { + "epoch": 1.595140087637764, + "grad_norm": 1.164283210992047, + "learning_rate": 2.1619746219404916e-06, + "loss": 0.21255026757717133, + "step": 6007 + }, + { + "epoch": 1.5954056566193069, + "grad_norm": 1.3822319128280973, + "learning_rate": 2.1592486677665047e-06, + "loss": 0.22851255536079407, + "step": 6008 + }, + { + "epoch": 1.5956712256008498, + "grad_norm": 1.3982384304626327, + "learning_rate": 2.1565242251969022e-06, + "loss": 0.23844364285469055, + "step": 6009 + }, + { + "epoch": 1.5959367945823928, + "grad_norm": 1.3184134341650149, + "learning_rate": 2.153801294756924e-06, + "loss": 0.2592385411262512, + "step": 6010 + }, + { + "epoch": 1.5962023635639357, + "grad_norm": 1.221300094567036, + "learning_rate": 2.151079876971519e-06, + "loss": 0.22163718938827515, + "step": 6011 + }, + { + "epoch": 1.5964679325454787, + "grad_norm": 1.1840952132259899, + "learning_rate": 2.1483599723653415e-06, + "loss": 0.1960998773574829, + "step": 6012 + }, + { + "epoch": 1.5967335015270216, + "grad_norm": 1.1732770789502442, + "learning_rate": 2.145641581462762e-06, + "loss": 0.20811150968074799, + "step": 6013 + }, + { + "epoch": 1.5969990705085646, + "grad_norm": 1.2065470685478314, + "learning_rate": 2.1429247047878534e-06, + "loss": 0.23184621334075928, + "step": 6014 + }, + { + "epoch": 1.5972646394901076, + "grad_norm": 1.3338850940720004, + "learning_rate": 2.1402093428643942e-06, + "loss": 0.22043758630752563, + "step": 6015 + }, + { + "epoch": 1.5975302084716505, + "grad_norm": 1.1736165993383876, + "learning_rate": 2.137495496215878e-06, + "loss": 0.18621152639389038, + "step": 6016 + }, + { + "epoch": 1.5977957774531935, + "grad_norm": 1.332636421894691, + "learning_rate": 2.1347831653654995e-06, + "loss": 0.2422473132610321, + "step": 6017 + }, + { + "epoch": 1.5980613464347364, + "grad_norm": 1.5933227500597664, + "learning_rate": 2.132072350836164e-06, + "loss": 0.2147202491760254, + "step": 6018 + }, + { + "epoch": 1.5983269154162794, + "grad_norm": 1.5455916288717333, + "learning_rate": 2.1293630531504873e-06, + "loss": 0.23091933131217957, + "step": 6019 + }, + { + "epoch": 1.5985924843978223, + "grad_norm": 1.290869089573798, + "learning_rate": 2.1266552728307876e-06, + "loss": 0.220037579536438, + "step": 6020 + }, + { + "epoch": 1.5988580533793653, + "grad_norm": 1.3343924424387823, + "learning_rate": 2.1239490103990946e-06, + "loss": 0.25520551204681396, + "step": 6021 + }, + { + "epoch": 1.5991236223609082, + "grad_norm": 1.412222062207012, + "learning_rate": 2.1212442663771427e-06, + "loss": 0.23216915130615234, + "step": 6022 + }, + { + "epoch": 1.5993891913424512, + "grad_norm": 1.381515312381825, + "learning_rate": 2.118541041286374e-06, + "loss": 0.22098806500434875, + "step": 6023 + }, + { + "epoch": 1.5996547603239941, + "grad_norm": 1.4609594644715316, + "learning_rate": 2.11583933564794e-06, + "loss": 0.261300265789032, + "step": 6024 + }, + { + "epoch": 1.599920329305537, + "grad_norm": 1.2095539498781858, + "learning_rate": 2.113139149982698e-06, + "loss": 0.20427154004573822, + "step": 6025 + }, + { + "epoch": 1.60018589828708, + "grad_norm": 1.2158101663646808, + "learning_rate": 2.110440484811209e-06, + "loss": 0.20700547099113464, + "step": 6026 + }, + { + "epoch": 1.600451467268623, + "grad_norm": 1.4331467444820847, + "learning_rate": 2.1077433406537475e-06, + "loss": 0.2789752185344696, + "step": 6027 + }, + { + "epoch": 1.600717036250166, + "grad_norm": 1.2991321976135584, + "learning_rate": 2.1050477180302885e-06, + "loss": 0.2205841988325119, + "step": 6028 + }, + { + "epoch": 1.6009826052317089, + "grad_norm": 1.3197920849647402, + "learning_rate": 2.1023536174605184e-06, + "loss": 0.24921822547912598, + "step": 6029 + }, + { + "epoch": 1.6012481742132518, + "grad_norm": 2.014197229906981, + "learning_rate": 2.0996610394638228e-06, + "loss": 0.2516329288482666, + "step": 6030 + }, + { + "epoch": 1.6015137431947948, + "grad_norm": 1.2656936665142342, + "learning_rate": 2.096969984559306e-06, + "loss": 0.21832503378391266, + "step": 6031 + }, + { + "epoch": 1.6017793121763377, + "grad_norm": 1.530808592055088, + "learning_rate": 2.094280453265769e-06, + "loss": 0.2499273419380188, + "step": 6032 + }, + { + "epoch": 1.6020448811578807, + "grad_norm": 1.167125195859278, + "learning_rate": 2.09159244610172e-06, + "loss": 0.21701282262802124, + "step": 6033 + }, + { + "epoch": 1.6023104501394236, + "grad_norm": 1.2536801575307182, + "learning_rate": 2.0889059635853783e-06, + "loss": 0.24446213245391846, + "step": 6034 + }, + { + "epoch": 1.6025760191209666, + "grad_norm": 1.412317581200794, + "learning_rate": 2.0862210062346622e-06, + "loss": 0.27299973368644714, + "step": 6035 + }, + { + "epoch": 1.6028415881025095, + "grad_norm": 1.320945278338079, + "learning_rate": 2.0835375745672027e-06, + "loss": 0.2384832501411438, + "step": 6036 + }, + { + "epoch": 1.6031071570840525, + "grad_norm": 1.340788170535406, + "learning_rate": 2.0808556691003335e-06, + "loss": 0.2563338875770569, + "step": 6037 + }, + { + "epoch": 1.6033727260655954, + "grad_norm": 1.5240284764155023, + "learning_rate": 2.0781752903510954e-06, + "loss": 0.29148975014686584, + "step": 6038 + }, + { + "epoch": 1.6036382950471384, + "grad_norm": 1.1673304070468655, + "learning_rate": 2.0754964388362264e-06, + "loss": 0.24276503920555115, + "step": 6039 + }, + { + "epoch": 1.6039038640286813, + "grad_norm": 1.2629655044665746, + "learning_rate": 2.0728191150721866e-06, + "loss": 0.1863931119441986, + "step": 6040 + }, + { + "epoch": 1.6041694330102243, + "grad_norm": 1.1731073698012655, + "learning_rate": 2.0701433195751286e-06, + "loss": 0.21270868182182312, + "step": 6041 + }, + { + "epoch": 1.6044350019917673, + "grad_norm": 1.2780583308550695, + "learning_rate": 2.0674690528609155e-06, + "loss": 0.21542516350746155, + "step": 6042 + }, + { + "epoch": 1.6047005709733102, + "grad_norm": 1.256432235067539, + "learning_rate": 2.0647963154451124e-06, + "loss": 0.23099860548973083, + "step": 6043 + }, + { + "epoch": 1.6049661399548532, + "grad_norm": 1.1769565332020941, + "learning_rate": 2.062125107842993e-06, + "loss": 0.22757291793823242, + "step": 6044 + }, + { + "epoch": 1.605231708936396, + "grad_norm": 1.317404807729369, + "learning_rate": 2.0594554305695346e-06, + "loss": 0.2370409518480301, + "step": 6045 + }, + { + "epoch": 1.605497277917939, + "grad_norm": 1.1803781252235817, + "learning_rate": 2.0567872841394186e-06, + "loss": 0.21620309352874756, + "step": 6046 + }, + { + "epoch": 1.605762846899482, + "grad_norm": 1.2191738819977833, + "learning_rate": 2.0541206690670324e-06, + "loss": 0.22821158170700073, + "step": 6047 + }, + { + "epoch": 1.606028415881025, + "grad_norm": 1.385940331470305, + "learning_rate": 2.0514555858664663e-06, + "loss": 0.24930253624916077, + "step": 6048 + }, + { + "epoch": 1.606293984862568, + "grad_norm": 1.3966922562239508, + "learning_rate": 2.048792035051521e-06, + "loss": 0.2491561770439148, + "step": 6049 + }, + { + "epoch": 1.6065595538441109, + "grad_norm": 1.3037697337655914, + "learning_rate": 2.046130017135697e-06, + "loss": 0.20652002096176147, + "step": 6050 + }, + { + "epoch": 1.6068251228256538, + "grad_norm": 1.1970911046995705, + "learning_rate": 2.0434695326321975e-06, + "loss": 0.25670793652534485, + "step": 6051 + }, + { + "epoch": 1.6070906918071968, + "grad_norm": 1.2469219040368793, + "learning_rate": 2.0408105820539328e-06, + "loss": 0.2328418493270874, + "step": 6052 + }, + { + "epoch": 1.6073562607887397, + "grad_norm": 1.2657559287734064, + "learning_rate": 2.0381531659135213e-06, + "loss": 0.20811162889003754, + "step": 6053 + }, + { + "epoch": 1.6076218297702827, + "grad_norm": 1.2637409014709644, + "learning_rate": 2.0354972847232756e-06, + "loss": 0.24068522453308105, + "step": 6054 + }, + { + "epoch": 1.6078873987518256, + "grad_norm": 1.3537388998191249, + "learning_rate": 2.032842938995221e-06, + "loss": 0.2519197463989258, + "step": 6055 + }, + { + "epoch": 1.6081529677333686, + "grad_norm": 1.349413355425799, + "learning_rate": 2.030190129241083e-06, + "loss": 0.2293267697095871, + "step": 6056 + }, + { + "epoch": 1.6084185367149118, + "grad_norm": 1.8474927483406436, + "learning_rate": 2.027538855972291e-06, + "loss": 0.22398510575294495, + "step": 6057 + }, + { + "epoch": 1.6086841056964547, + "grad_norm": 1.4186878733418118, + "learning_rate": 2.0248891196999833e-06, + "loss": 0.23074102401733398, + "step": 6058 + }, + { + "epoch": 1.6089496746779977, + "grad_norm": 1.352152679115686, + "learning_rate": 2.0222409209349957e-06, + "loss": 0.2618173658847809, + "step": 6059 + }, + { + "epoch": 1.6092152436595406, + "grad_norm": 1.2898742263880296, + "learning_rate": 2.0195942601878703e-06, + "loss": 0.25361114740371704, + "step": 6060 + }, + { + "epoch": 1.6094808126410836, + "grad_norm": 1.2270527625039152, + "learning_rate": 2.016949137968851e-06, + "loss": 0.2276519238948822, + "step": 6061 + }, + { + "epoch": 1.6097463816226265, + "grad_norm": 1.3155356069823825, + "learning_rate": 2.0143055547878863e-06, + "loss": 0.20834363996982574, + "step": 6062 + }, + { + "epoch": 1.6100119506041695, + "grad_norm": 1.348708703656222, + "learning_rate": 2.011663511154628e-06, + "loss": 0.2579394578933716, + "step": 6063 + }, + { + "epoch": 1.6102775195857124, + "grad_norm": 1.2574503425710122, + "learning_rate": 2.009023007578431e-06, + "loss": 0.22118912637233734, + "step": 6064 + }, + { + "epoch": 1.6105430885672554, + "grad_norm": 1.1631210187007555, + "learning_rate": 2.0063840445683537e-06, + "loss": 0.1881515383720398, + "step": 6065 + }, + { + "epoch": 1.6108086575487983, + "grad_norm": 1.2884662240297928, + "learning_rate": 2.003746622633155e-06, + "loss": 0.2270805984735489, + "step": 6066 + }, + { + "epoch": 1.6110742265303413, + "grad_norm": 1.4261065534360056, + "learning_rate": 2.0011107422813013e-06, + "loss": 0.26356351375579834, + "step": 6067 + }, + { + "epoch": 1.6113397955118842, + "grad_norm": 1.2506363457624738, + "learning_rate": 1.9984764040209615e-06, + "loss": 0.22937676310539246, + "step": 6068 + }, + { + "epoch": 1.6116053644934272, + "grad_norm": 1.329188800311282, + "learning_rate": 1.99584360836e-06, + "loss": 0.25062739849090576, + "step": 6069 + }, + { + "epoch": 1.6118709334749701, + "grad_norm": 1.1593663351806502, + "learning_rate": 1.993212355805989e-06, + "loss": 0.2031324952840805, + "step": 6070 + }, + { + "epoch": 1.612136502456513, + "grad_norm": 1.3722085699931008, + "learning_rate": 1.990582646866206e-06, + "loss": 0.25769656896591187, + "step": 6071 + }, + { + "epoch": 1.612402071438056, + "grad_norm": 1.3184109520906713, + "learning_rate": 1.987954482047626e-06, + "loss": 0.23856252431869507, + "step": 6072 + }, + { + "epoch": 1.612667640419599, + "grad_norm": 1.3452730145342116, + "learning_rate": 1.9853278618569284e-06, + "loss": 0.2336723804473877, + "step": 6073 + }, + { + "epoch": 1.612933209401142, + "grad_norm": 1.3427497614935235, + "learning_rate": 1.9827027868004942e-06, + "loss": 0.22327622771263123, + "step": 6074 + }, + { + "epoch": 1.6131987783826849, + "grad_norm": 1.302817235652594, + "learning_rate": 1.980079257384405e-06, + "loss": 0.26695019006729126, + "step": 6075 + }, + { + "epoch": 1.6134643473642278, + "grad_norm": 1.174792834468628, + "learning_rate": 1.9774572741144514e-06, + "loss": 0.2467387616634369, + "step": 6076 + }, + { + "epoch": 1.6137299163457708, + "grad_norm": 1.3974546997540778, + "learning_rate": 1.9748368374961193e-06, + "loss": 0.25473737716674805, + "step": 6077 + }, + { + "epoch": 1.6139954853273137, + "grad_norm": 1.295354894556923, + "learning_rate": 1.972217948034596e-06, + "loss": 0.25508594512939453, + "step": 6078 + }, + { + "epoch": 1.6142610543088567, + "grad_norm": 1.2627621502033493, + "learning_rate": 1.969600606234774e-06, + "loss": 0.23020131886005402, + "step": 6079 + }, + { + "epoch": 1.6145266232903996, + "grad_norm": 1.2036992831321345, + "learning_rate": 1.9669848126012447e-06, + "loss": 0.249805748462677, + "step": 6080 + }, + { + "epoch": 1.6147921922719426, + "grad_norm": 1.2304217597704168, + "learning_rate": 1.964370567638303e-06, + "loss": 0.2377707064151764, + "step": 6081 + }, + { + "epoch": 1.6150577612534855, + "grad_norm": 1.3812388616949685, + "learning_rate": 1.9617578718499452e-06, + "loss": 0.28656789660453796, + "step": 6082 + }, + { + "epoch": 1.6153233302350285, + "grad_norm": 1.3083477730508752, + "learning_rate": 1.9591467257398668e-06, + "loss": 0.22079989314079285, + "step": 6083 + }, + { + "epoch": 1.6155888992165715, + "grad_norm": 1.048982897357468, + "learning_rate": 1.9565371298114666e-06, + "loss": 0.1993042230606079, + "step": 6084 + }, + { + "epoch": 1.6158544681981146, + "grad_norm": 1.1837758778278344, + "learning_rate": 1.9539290845678438e-06, + "loss": 0.20818357169628143, + "step": 6085 + }, + { + "epoch": 1.6161200371796576, + "grad_norm": 1.2192677831294998, + "learning_rate": 1.9513225905117996e-06, + "loss": 0.20531761646270752, + "step": 6086 + }, + { + "epoch": 1.6163856061612005, + "grad_norm": 1.2499003349392819, + "learning_rate": 1.948717648145834e-06, + "loss": 0.23414376378059387, + "step": 6087 + }, + { + "epoch": 1.6166511751427435, + "grad_norm": 1.2073482694002922, + "learning_rate": 1.9461142579721493e-06, + "loss": 0.2025471031665802, + "step": 6088 + }, + { + "epoch": 1.6169167441242864, + "grad_norm": 1.4729414889087271, + "learning_rate": 1.943512420492649e-06, + "loss": 0.19130446016788483, + "step": 6089 + }, + { + "epoch": 1.6171823131058294, + "grad_norm": 1.1947055473554775, + "learning_rate": 1.940912136208938e-06, + "loss": 0.21637848019599915, + "step": 6090 + }, + { + "epoch": 1.6174478820873723, + "grad_norm": 1.301401884532825, + "learning_rate": 1.9383134056223176e-06, + "loss": 0.26844075322151184, + "step": 6091 + }, + { + "epoch": 1.6177134510689153, + "grad_norm": 1.1755891449306313, + "learning_rate": 1.935716229233794e-06, + "loss": 0.19573305547237396, + "step": 6092 + }, + { + "epoch": 1.6179790200504582, + "grad_norm": 1.2705214543802177, + "learning_rate": 1.93312060754407e-06, + "loss": 0.22705954313278198, + "step": 6093 + }, + { + "epoch": 1.6182445890320012, + "grad_norm": 1.279170245457384, + "learning_rate": 1.9305265410535545e-06, + "loss": 0.2505400478839874, + "step": 6094 + }, + { + "epoch": 1.6185101580135441, + "grad_norm": 1.2108711177458409, + "learning_rate": 1.927934030262353e-06, + "loss": 0.2328193187713623, + "step": 6095 + }, + { + "epoch": 1.618775726995087, + "grad_norm": 1.2588974628750198, + "learning_rate": 1.9253430756702674e-06, + "loss": 0.23876577615737915, + "step": 6096 + }, + { + "epoch": 1.61904129597663, + "grad_norm": 1.3685755624123837, + "learning_rate": 1.9227536777768063e-06, + "loss": 0.2390732318162918, + "step": 6097 + }, + { + "epoch": 1.619306864958173, + "grad_norm": 1.3858306009370809, + "learning_rate": 1.9201658370811736e-06, + "loss": 0.25231993198394775, + "step": 6098 + }, + { + "epoch": 1.619572433939716, + "grad_norm": 1.2520374949609627, + "learning_rate": 1.917579554082274e-06, + "loss": 0.21527352929115295, + "step": 6099 + }, + { + "epoch": 1.619838002921259, + "grad_norm": 1.2236250632687489, + "learning_rate": 1.9149948292787133e-06, + "loss": 0.21394580602645874, + "step": 6100 + }, + { + "epoch": 1.6201035719028019, + "grad_norm": 1.3465338603905943, + "learning_rate": 1.912411663168796e-06, + "loss": 0.26093196868896484, + "step": 6101 + }, + { + "epoch": 1.6203691408843448, + "grad_norm": 1.3518497357465815, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.2631412744522095, + "step": 6102 + }, + { + "epoch": 1.6206347098658878, + "grad_norm": 1.3007944720423297, + "learning_rate": 1.9072500090216073e-06, + "loss": 0.270250141620636, + "step": 6103 + }, + { + "epoch": 1.6209002788474307, + "grad_norm": 1.3385737712068424, + "learning_rate": 1.9046715219794397e-06, + "loss": 0.22944031655788422, + "step": 6104 + }, + { + "epoch": 1.6211658478289737, + "grad_norm": 1.2125488505372424, + "learning_rate": 1.902094595621129e-06, + "loss": 0.24429070949554443, + "step": 6105 + }, + { + "epoch": 1.6214314168105166, + "grad_norm": 1.2581532570405378, + "learning_rate": 1.8995192304434729e-06, + "loss": 0.25656238198280334, + "step": 6106 + }, + { + "epoch": 1.6216969857920596, + "grad_norm": 1.3466122688772229, + "learning_rate": 1.8969454269429743e-06, + "loss": 0.2575233280658722, + "step": 6107 + }, + { + "epoch": 1.6219625547736025, + "grad_norm": 1.245984919504028, + "learning_rate": 1.8943731856158299e-06, + "loss": 0.24881063401699066, + "step": 6108 + }, + { + "epoch": 1.6222281237551455, + "grad_norm": 1.2845731125917577, + "learning_rate": 1.8918025069579382e-06, + "loss": 0.23353847861289978, + "step": 6109 + }, + { + "epoch": 1.6224936927366884, + "grad_norm": 1.2505489106727152, + "learning_rate": 1.8892333914648953e-06, + "loss": 0.21085457503795624, + "step": 6110 + }, + { + "epoch": 1.6227592617182314, + "grad_norm": 1.4134001131082032, + "learning_rate": 1.8866658396319947e-06, + "loss": 0.28600943088531494, + "step": 6111 + }, + { + "epoch": 1.6230248306997743, + "grad_norm": 1.1689838110439057, + "learning_rate": 1.8840998519542352e-06, + "loss": 0.22580507397651672, + "step": 6112 + }, + { + "epoch": 1.6232903996813173, + "grad_norm": 1.212526750953587, + "learning_rate": 1.8815354289263066e-06, + "loss": 0.19310800731182098, + "step": 6113 + }, + { + "epoch": 1.6235559686628602, + "grad_norm": 1.3020905454433194, + "learning_rate": 1.8789725710425988e-06, + "loss": 0.21633204817771912, + "step": 6114 + }, + { + "epoch": 1.6238215376444032, + "grad_norm": 1.4315370828946672, + "learning_rate": 1.8764112787972e-06, + "loss": 0.21346023678779602, + "step": 6115 + }, + { + "epoch": 1.6240871066259461, + "grad_norm": 1.21392020481053, + "learning_rate": 1.8738515526838986e-06, + "loss": 0.21206694841384888, + "step": 6116 + }, + { + "epoch": 1.624352675607489, + "grad_norm": 1.3197096686410696, + "learning_rate": 1.8712933931961773e-06, + "loss": 0.2135339230298996, + "step": 6117 + }, + { + "epoch": 1.624618244589032, + "grad_norm": 1.2484635869956482, + "learning_rate": 1.8687368008272243e-06, + "loss": 0.2168758660554886, + "step": 6118 + }, + { + "epoch": 1.624883813570575, + "grad_norm": 1.1804251189525716, + "learning_rate": 1.866181776069914e-06, + "loss": 0.20825617015361786, + "step": 6119 + }, + { + "epoch": 1.625149382552118, + "grad_norm": 1.291082575518304, + "learning_rate": 1.863628319416826e-06, + "loss": 0.25367867946624756, + "step": 6120 + }, + { + "epoch": 1.625414951533661, + "grad_norm": 1.3053498393136334, + "learning_rate": 1.8610764313602404e-06, + "loss": 0.21604284644126892, + "step": 6121 + }, + { + "epoch": 1.6256805205152038, + "grad_norm": 1.2871138327885168, + "learning_rate": 1.8585261123921283e-06, + "loss": 0.2324865758419037, + "step": 6122 + }, + { + "epoch": 1.6259460894967468, + "grad_norm": 1.2467444217539543, + "learning_rate": 1.8559773630041632e-06, + "loss": 0.2077629417181015, + "step": 6123 + }, + { + "epoch": 1.6262116584782897, + "grad_norm": 1.1704936500874914, + "learning_rate": 1.8534301836877122e-06, + "loss": 0.19919469952583313, + "step": 6124 + }, + { + "epoch": 1.6264772274598327, + "grad_norm": 1.1998850682672693, + "learning_rate": 1.8508845749338412e-06, + "loss": 0.21069160103797913, + "step": 6125 + }, + { + "epoch": 1.6267427964413756, + "grad_norm": 1.218804714337499, + "learning_rate": 1.8483405372333152e-06, + "loss": 0.2286640703678131, + "step": 6126 + }, + { + "epoch": 1.6270083654229186, + "grad_norm": 1.33630910648056, + "learning_rate": 1.8457980710765932e-06, + "loss": 0.2430541068315506, + "step": 6127 + }, + { + "epoch": 1.6272739344044616, + "grad_norm": 1.3713498598627625, + "learning_rate": 1.8432571769538344e-06, + "loss": 0.21875709295272827, + "step": 6128 + }, + { + "epoch": 1.6275395033860045, + "grad_norm": 1.4416966555618131, + "learning_rate": 1.8407178553548876e-06, + "loss": 0.22591018676757812, + "step": 6129 + }, + { + "epoch": 1.6278050723675475, + "grad_norm": 1.362917465597037, + "learning_rate": 1.8381801067693129e-06, + "loss": 0.25429075956344604, + "step": 6130 + }, + { + "epoch": 1.6280706413490904, + "grad_norm": 1.31452454626215, + "learning_rate": 1.8356439316863528e-06, + "loss": 0.2437858283519745, + "step": 6131 + }, + { + "epoch": 1.6283362103306334, + "grad_norm": 1.2489983792436092, + "learning_rate": 1.8331093305949532e-06, + "loss": 0.24196262657642365, + "step": 6132 + }, + { + "epoch": 1.6286017793121763, + "grad_norm": 1.3756170241894088, + "learning_rate": 1.8305763039837576e-06, + "loss": 0.25779271125793457, + "step": 6133 + }, + { + "epoch": 1.6288673482937193, + "grad_norm": 1.223955710903011, + "learning_rate": 1.8280448523410987e-06, + "loss": 0.23418015241622925, + "step": 6134 + }, + { + "epoch": 1.6291329172752622, + "grad_norm": 1.3748973147827792, + "learning_rate": 1.8255149761550128e-06, + "loss": 0.2670775353908539, + "step": 6135 + }, + { + "epoch": 1.6293984862568052, + "grad_norm": 1.423176544673552, + "learning_rate": 1.822986675913231e-06, + "loss": 0.29342639446258545, + "step": 6136 + }, + { + "epoch": 1.6296640552383481, + "grad_norm": 1.244422511511833, + "learning_rate": 1.8204599521031785e-06, + "loss": 0.22768062353134155, + "step": 6137 + }, + { + "epoch": 1.629929624219891, + "grad_norm": 1.6355607569945512, + "learning_rate": 1.817934805211976e-06, + "loss": 0.23938167095184326, + "step": 6138 + }, + { + "epoch": 1.630195193201434, + "grad_norm": 1.311916117620117, + "learning_rate": 1.8154112357264474e-06, + "loss": 0.1982264518737793, + "step": 6139 + }, + { + "epoch": 1.630460762182977, + "grad_norm": 1.3026965235969699, + "learning_rate": 1.8128892441331047e-06, + "loss": 0.23591312766075134, + "step": 6140 + }, + { + "epoch": 1.63072633116452, + "grad_norm": 1.259123916156089, + "learning_rate": 1.8103688309181567e-06, + "loss": 0.20317673683166504, + "step": 6141 + }, + { + "epoch": 1.6309919001460629, + "grad_norm": 1.2846300858550195, + "learning_rate": 1.8078499965675112e-06, + "loss": 0.233676478266716, + "step": 6142 + }, + { + "epoch": 1.6312574691276058, + "grad_norm": 1.3296785293607047, + "learning_rate": 1.8053327415667688e-06, + "loss": 0.22850775718688965, + "step": 6143 + }, + { + "epoch": 1.6315230381091488, + "grad_norm": 1.2850656633806874, + "learning_rate": 1.8028170664012268e-06, + "loss": 0.2603572607040405, + "step": 6144 + }, + { + "epoch": 1.6317886070906917, + "grad_norm": 1.3208849168125785, + "learning_rate": 1.8003029715558773e-06, + "loss": 0.27881523966789246, + "step": 6145 + }, + { + "epoch": 1.6320541760722347, + "grad_norm": 1.225668329292659, + "learning_rate": 1.797790457515406e-06, + "loss": 0.21744176745414734, + "step": 6146 + }, + { + "epoch": 1.6323197450537776, + "grad_norm": 1.2220588910103882, + "learning_rate": 1.7952795247642008e-06, + "loss": 0.20449542999267578, + "step": 6147 + }, + { + "epoch": 1.6325853140353206, + "grad_norm": 1.3015735321136237, + "learning_rate": 1.7927701737863402e-06, + "loss": 0.25641053915023804, + "step": 6148 + }, + { + "epoch": 1.6328508830168635, + "grad_norm": 1.294201240106412, + "learning_rate": 1.7902624050655914e-06, + "loss": 0.23583751916885376, + "step": 6149 + }, + { + "epoch": 1.6331164519984065, + "grad_norm": 1.4310897316272893, + "learning_rate": 1.787756219085427e-06, + "loss": 0.2709866762161255, + "step": 6150 + }, + { + "epoch": 1.6333820209799494, + "grad_norm": 1.2536554341378991, + "learning_rate": 1.785251616329009e-06, + "loss": 0.233103945851326, + "step": 6151 + }, + { + "epoch": 1.6336475899614924, + "grad_norm": 1.2660813048243769, + "learning_rate": 1.7827485972791957e-06, + "loss": 0.2665184438228607, + "step": 6152 + }, + { + "epoch": 1.6339131589430353, + "grad_norm": 1.2551185732946457, + "learning_rate": 1.7802471624185392e-06, + "loss": 0.20934605598449707, + "step": 6153 + }, + { + "epoch": 1.6341787279245783, + "grad_norm": 1.2179362426676639, + "learning_rate": 1.7777473122292866e-06, + "loss": 0.2102464735507965, + "step": 6154 + }, + { + "epoch": 1.6344442969061213, + "grad_norm": 1.2289784110367914, + "learning_rate": 1.7752490471933769e-06, + "loss": 0.22889986634254456, + "step": 6155 + }, + { + "epoch": 1.6347098658876642, + "grad_norm": 1.3627659705359922, + "learning_rate": 1.772752367792452e-06, + "loss": 0.2261584997177124, + "step": 6156 + }, + { + "epoch": 1.6349754348692072, + "grad_norm": 1.2186249427048736, + "learning_rate": 1.7702572745078395e-06, + "loss": 0.21456710994243622, + "step": 6157 + }, + { + "epoch": 1.63524100385075, + "grad_norm": 1.1535452073956258, + "learning_rate": 1.7677637678205627e-06, + "loss": 0.22762097418308258, + "step": 6158 + }, + { + "epoch": 1.635506572832293, + "grad_norm": 1.306484526102534, + "learning_rate": 1.7652718482113417e-06, + "loss": 0.24772633612155914, + "step": 6159 + }, + { + "epoch": 1.635772141813836, + "grad_norm": 1.3290630048425123, + "learning_rate": 1.7627815161605887e-06, + "loss": 0.22980757057666779, + "step": 6160 + }, + { + "epoch": 1.636037710795379, + "grad_norm": 1.1593602123779645, + "learning_rate": 1.760292772148411e-06, + "loss": 0.19560125470161438, + "step": 6161 + }, + { + "epoch": 1.636303279776922, + "grad_norm": 1.388673809129743, + "learning_rate": 1.7578056166546086e-06, + "loss": 0.23733064532279968, + "step": 6162 + }, + { + "epoch": 1.6365688487584649, + "grad_norm": 1.2026681813349183, + "learning_rate": 1.7553200501586743e-06, + "loss": 0.21064560115337372, + "step": 6163 + }, + { + "epoch": 1.6368344177400078, + "grad_norm": 1.3444341606502546, + "learning_rate": 1.7528360731397986e-06, + "loss": 0.26709994673728943, + "step": 6164 + }, + { + "epoch": 1.6370999867215508, + "grad_norm": 1.2755110888757868, + "learning_rate": 1.750353686076861e-06, + "loss": 0.26555943489074707, + "step": 6165 + }, + { + "epoch": 1.6373655557030937, + "grad_norm": 1.3299250322981557, + "learning_rate": 1.7478728894484375e-06, + "loss": 0.24480760097503662, + "step": 6166 + }, + { + "epoch": 1.6376311246846367, + "grad_norm": 1.2560095314061934, + "learning_rate": 1.7453936837327967e-06, + "loss": 0.2170884907245636, + "step": 6167 + }, + { + "epoch": 1.6378966936661796, + "grad_norm": 1.340756013397369, + "learning_rate": 1.7429160694078983e-06, + "loss": 0.24728982150554657, + "step": 6168 + }, + { + "epoch": 1.6381622626477228, + "grad_norm": 1.1911402182063675, + "learning_rate": 1.7404400469513994e-06, + "loss": 0.20886945724487305, + "step": 6169 + }, + { + "epoch": 1.6384278316292658, + "grad_norm": 1.2150445755778985, + "learning_rate": 1.7379656168406467e-06, + "loss": 0.1892474740743637, + "step": 6170 + }, + { + "epoch": 1.6386934006108087, + "grad_norm": 1.3004801024505461, + "learning_rate": 1.7354927795526821e-06, + "loss": 0.24953782558441162, + "step": 6171 + }, + { + "epoch": 1.6389589695923517, + "grad_norm": 1.2292705802712374, + "learning_rate": 1.7330215355642377e-06, + "loss": 0.2311600148677826, + "step": 6172 + }, + { + "epoch": 1.6392245385738946, + "grad_norm": 1.2596864005467026, + "learning_rate": 1.73055188535174e-06, + "loss": 0.24018675088882446, + "step": 6173 + }, + { + "epoch": 1.6394901075554376, + "grad_norm": 1.3394449685829455, + "learning_rate": 1.7280838293913116e-06, + "loss": 0.22607022523880005, + "step": 6174 + }, + { + "epoch": 1.6397556765369805, + "grad_norm": 1.2860534255043978, + "learning_rate": 1.7256173681587619e-06, + "loss": 0.23725482821464539, + "step": 6175 + }, + { + "epoch": 1.6400212455185235, + "grad_norm": 1.2500709715234832, + "learning_rate": 1.723152502129597e-06, + "loss": 0.241235613822937, + "step": 6176 + }, + { + "epoch": 1.6402868145000664, + "grad_norm": 1.2070755501863832, + "learning_rate": 1.7206892317790136e-06, + "loss": 0.2150690108537674, + "step": 6177 + }, + { + "epoch": 1.6405523834816094, + "grad_norm": 1.2557873581014805, + "learning_rate": 1.7182275575819007e-06, + "loss": 0.22133421897888184, + "step": 6178 + }, + { + "epoch": 1.6408179524631523, + "grad_norm": 1.1297884729403, + "learning_rate": 1.7157674800128399e-06, + "loss": 0.1937463879585266, + "step": 6179 + }, + { + "epoch": 1.6410835214446953, + "grad_norm": 1.0851305240668396, + "learning_rate": 1.7133089995461062e-06, + "loss": 0.18938027322292328, + "step": 6180 + }, + { + "epoch": 1.6413490904262382, + "grad_norm": 1.2621430482402598, + "learning_rate": 1.7108521166556646e-06, + "loss": 0.23577997088432312, + "step": 6181 + }, + { + "epoch": 1.6416146594077812, + "grad_norm": 1.2915526813468403, + "learning_rate": 1.7083968318151734e-06, + "loss": 0.2712448537349701, + "step": 6182 + }, + { + "epoch": 1.6418802283893241, + "grad_norm": 1.276409938985324, + "learning_rate": 1.7059431454979825e-06, + "loss": 0.24242255091667175, + "step": 6183 + }, + { + "epoch": 1.642145797370867, + "grad_norm": 1.3152058895449834, + "learning_rate": 1.7034910581771347e-06, + "loss": 0.22521010041236877, + "step": 6184 + }, + { + "epoch": 1.64241136635241, + "grad_norm": 1.3840145244958133, + "learning_rate": 1.7010405703253618e-06, + "loss": 0.22026273608207703, + "step": 6185 + }, + { + "epoch": 1.642676935333953, + "grad_norm": 1.458737402535225, + "learning_rate": 1.6985916824150894e-06, + "loss": 0.22726528346538544, + "step": 6186 + }, + { + "epoch": 1.642942504315496, + "grad_norm": 1.3396783040947258, + "learning_rate": 1.6961443949184353e-06, + "loss": 0.25172409415245056, + "step": 6187 + }, + { + "epoch": 1.6432080732970389, + "grad_norm": 1.1393591185728944, + "learning_rate": 1.6936987083072065e-06, + "loss": 0.21173113584518433, + "step": 6188 + }, + { + "epoch": 1.6434736422785818, + "grad_norm": 1.3589729407555038, + "learning_rate": 1.6912546230529036e-06, + "loss": 0.22596749663352966, + "step": 6189 + }, + { + "epoch": 1.6437392112601248, + "grad_norm": 1.3604263454917045, + "learning_rate": 1.6888121396267166e-06, + "loss": 0.2749077081680298, + "step": 6190 + }, + { + "epoch": 1.6440047802416677, + "grad_norm": 2.5555069132462283, + "learning_rate": 1.6863712584995252e-06, + "loss": 0.22150780260562897, + "step": 6191 + }, + { + "epoch": 1.6442703492232107, + "grad_norm": 1.2838243253096144, + "learning_rate": 1.6839319801419073e-06, + "loss": 0.23437368869781494, + "step": 6192 + }, + { + "epoch": 1.6445359182047536, + "grad_norm": 1.3069256977628543, + "learning_rate": 1.681494305024125e-06, + "loss": 0.22949008643627167, + "step": 6193 + }, + { + "epoch": 1.6448014871862966, + "grad_norm": 1.2956112975441718, + "learning_rate": 1.6790582336161332e-06, + "loss": 0.24147525429725647, + "step": 6194 + }, + { + "epoch": 1.6450670561678395, + "grad_norm": 1.180082798545332, + "learning_rate": 1.6766237663875773e-06, + "loss": 0.2001456618309021, + "step": 6195 + }, + { + "epoch": 1.6453326251493825, + "grad_norm": 1.2710753216206616, + "learning_rate": 1.674190903807794e-06, + "loss": 0.17668186128139496, + "step": 6196 + }, + { + "epoch": 1.6455981941309257, + "grad_norm": 1.369840319031622, + "learning_rate": 1.6717596463458107e-06, + "loss": 0.24585255980491638, + "step": 6197 + }, + { + "epoch": 1.6458637631124686, + "grad_norm": 1.2328642285488454, + "learning_rate": 1.6693299944703479e-06, + "loss": 0.2234572172164917, + "step": 6198 + }, + { + "epoch": 1.6461293320940116, + "grad_norm": 1.2369910191993496, + "learning_rate": 1.6669019486498083e-06, + "loss": 0.2007240653038025, + "step": 6199 + }, + { + "epoch": 1.6463949010755545, + "grad_norm": 1.317383450933259, + "learning_rate": 1.6644755093522913e-06, + "loss": 0.21926215291023254, + "step": 6200 + }, + { + "epoch": 1.6466604700570975, + "grad_norm": 1.3404302006039666, + "learning_rate": 1.662050677045589e-06, + "loss": 0.24797898530960083, + "step": 6201 + }, + { + "epoch": 1.6469260390386404, + "grad_norm": 1.285343354391859, + "learning_rate": 1.65962745219718e-06, + "loss": 0.22087037563323975, + "step": 6202 + }, + { + "epoch": 1.6471916080201834, + "grad_norm": 1.2765781805195457, + "learning_rate": 1.6572058352742327e-06, + "loss": 0.23073960840702057, + "step": 6203 + }, + { + "epoch": 1.6474571770017263, + "grad_norm": 1.3644493807061109, + "learning_rate": 1.6547858267436056e-06, + "loss": 0.2430298924446106, + "step": 6204 + }, + { + "epoch": 1.6477227459832693, + "grad_norm": 1.286198443262182, + "learning_rate": 1.6523674270718493e-06, + "loss": 0.23337247967720032, + "step": 6205 + }, + { + "epoch": 1.6479883149648122, + "grad_norm": 1.2144238817830517, + "learning_rate": 1.6499506367252016e-06, + "loss": 0.22141093015670776, + "step": 6206 + }, + { + "epoch": 1.6482538839463552, + "grad_norm": 1.280282959866893, + "learning_rate": 1.647535456169591e-06, + "loss": 0.23247988522052765, + "step": 6207 + }, + { + "epoch": 1.6485194529278981, + "grad_norm": 1.3728921390628253, + "learning_rate": 1.6451218858706374e-06, + "loss": 0.2659391760826111, + "step": 6208 + }, + { + "epoch": 1.648785021909441, + "grad_norm": 1.2534645715863684, + "learning_rate": 1.642709926293644e-06, + "loss": 0.2154998630285263, + "step": 6209 + }, + { + "epoch": 1.649050590890984, + "grad_norm": 1.322825591754104, + "learning_rate": 1.6402995779036146e-06, + "loss": 0.20363599061965942, + "step": 6210 + }, + { + "epoch": 1.649316159872527, + "grad_norm": 1.3775669953664806, + "learning_rate": 1.6378908411652328e-06, + "loss": 0.23388779163360596, + "step": 6211 + }, + { + "epoch": 1.64958172885407, + "grad_norm": 1.205059730534318, + "learning_rate": 1.6354837165428772e-06, + "loss": 0.20465341210365295, + "step": 6212 + }, + { + "epoch": 1.649847297835613, + "grad_norm": 1.2409004364034002, + "learning_rate": 1.6330782045006088e-06, + "loss": 0.2233584225177765, + "step": 6213 + }, + { + "epoch": 1.6501128668171559, + "grad_norm": 1.313264623251788, + "learning_rate": 1.6306743055021834e-06, + "loss": 0.2880077064037323, + "step": 6214 + }, + { + "epoch": 1.6503784357986988, + "grad_norm": 1.2769524753658168, + "learning_rate": 1.6282720200110458e-06, + "loss": 0.23332230746746063, + "step": 6215 + }, + { + "epoch": 1.6506440047802418, + "grad_norm": 1.2682336609825682, + "learning_rate": 1.6258713484903266e-06, + "loss": 0.22191204130649567, + "step": 6216 + }, + { + "epoch": 1.6509095737617847, + "grad_norm": 1.2899982671052521, + "learning_rate": 1.6234722914028478e-06, + "loss": 0.2403659224510193, + "step": 6217 + }, + { + "epoch": 1.6511751427433277, + "grad_norm": 1.2823746538865957, + "learning_rate": 1.6210748492111161e-06, + "loss": 0.2230256348848343, + "step": 6218 + }, + { + "epoch": 1.6514407117248706, + "grad_norm": 1.233703409456991, + "learning_rate": 1.6186790223773375e-06, + "loss": 0.2086302787065506, + "step": 6219 + }, + { + "epoch": 1.6517062807064136, + "grad_norm": 1.2696219439991872, + "learning_rate": 1.6162848113633934e-06, + "loss": 0.22336703538894653, + "step": 6220 + }, + { + "epoch": 1.6519718496879565, + "grad_norm": 1.2026474951561137, + "learning_rate": 1.6138922166308613e-06, + "loss": 0.2354746013879776, + "step": 6221 + }, + { + "epoch": 1.6522374186694995, + "grad_norm": 1.212799588563382, + "learning_rate": 1.6115012386410045e-06, + "loss": 0.23983564972877502, + "step": 6222 + }, + { + "epoch": 1.6525029876510424, + "grad_norm": 1.3394195242071623, + "learning_rate": 1.6091118778547765e-06, + "loss": 0.25468897819519043, + "step": 6223 + }, + { + "epoch": 1.6527685566325854, + "grad_norm": 1.2085737685975797, + "learning_rate": 1.6067241347328166e-06, + "loss": 0.2225346863269806, + "step": 6224 + }, + { + "epoch": 1.6530341256141283, + "grad_norm": 1.4474708027397767, + "learning_rate": 1.6043380097354543e-06, + "loss": 0.28801992535591125, + "step": 6225 + }, + { + "epoch": 1.6532996945956713, + "grad_norm": 1.1308003259460488, + "learning_rate": 1.6019535033227063e-06, + "loss": 0.1869816929101944, + "step": 6226 + }, + { + "epoch": 1.6535652635772142, + "grad_norm": 1.3022141110443597, + "learning_rate": 1.5995706159542768e-06, + "loss": 0.2569049894809723, + "step": 6227 + }, + { + "epoch": 1.6538308325587572, + "grad_norm": 1.2689496619282572, + "learning_rate": 1.5971893480895583e-06, + "loss": 0.19138488173484802, + "step": 6228 + }, + { + "epoch": 1.6540964015403001, + "grad_norm": 1.2583553251304942, + "learning_rate": 1.5948097001876318e-06, + "loss": 0.23107777535915375, + "step": 6229 + }, + { + "epoch": 1.654361970521843, + "grad_norm": 1.4140324563807463, + "learning_rate": 1.5924316727072652e-06, + "loss": 0.21682313084602356, + "step": 6230 + }, + { + "epoch": 1.654627539503386, + "grad_norm": 1.6445896965406597, + "learning_rate": 1.5900552661069135e-06, + "loss": 0.27629974484443665, + "step": 6231 + }, + { + "epoch": 1.654893108484929, + "grad_norm": 1.2060133562172235, + "learning_rate": 1.587680480844721e-06, + "loss": 0.21919876337051392, + "step": 6232 + }, + { + "epoch": 1.655158677466472, + "grad_norm": 1.4827934801999716, + "learning_rate": 1.5853073173785183e-06, + "loss": 0.2556184232234955, + "step": 6233 + }, + { + "epoch": 1.655424246448015, + "grad_norm": 1.1362954303327644, + "learning_rate": 1.5829357761658214e-06, + "loss": 0.1904449462890625, + "step": 6234 + }, + { + "epoch": 1.6556898154295578, + "grad_norm": 1.2410374365127181, + "learning_rate": 1.5805658576638372e-06, + "loss": 0.1991434246301651, + "step": 6235 + }, + { + "epoch": 1.6559553844111008, + "grad_norm": 1.4428347821081515, + "learning_rate": 1.5781975623294554e-06, + "loss": 0.2609177231788635, + "step": 6236 + }, + { + "epoch": 1.6562209533926437, + "grad_norm": 1.276051044481299, + "learning_rate": 1.575830890619261e-06, + "loss": 0.2481592893600464, + "step": 6237 + }, + { + "epoch": 1.6564865223741867, + "grad_norm": 1.2930470444266673, + "learning_rate": 1.5734658429895156e-06, + "loss": 0.23855090141296387, + "step": 6238 + }, + { + "epoch": 1.6567520913557297, + "grad_norm": 1.326739898505445, + "learning_rate": 1.5711024198961745e-06, + "loss": 0.2480623573064804, + "step": 6239 + }, + { + "epoch": 1.6570176603372726, + "grad_norm": 1.4145385747738486, + "learning_rate": 1.5687406217948775e-06, + "loss": 0.2504739463329315, + "step": 6240 + }, + { + "epoch": 1.6572832293188156, + "grad_norm": 1.1843269954841462, + "learning_rate": 1.5663804491409506e-06, + "loss": 0.2068580538034439, + "step": 6241 + }, + { + "epoch": 1.6575487983003585, + "grad_norm": 1.45151426190796, + "learning_rate": 1.5640219023894077e-06, + "loss": 0.2448163628578186, + "step": 6242 + }, + { + "epoch": 1.6578143672819015, + "grad_norm": 1.3391765527579818, + "learning_rate": 1.5616649819949492e-06, + "loss": 0.2514716386795044, + "step": 6243 + }, + { + "epoch": 1.6580799362634444, + "grad_norm": 1.1884099966156902, + "learning_rate": 1.559309688411962e-06, + "loss": 0.2067629098892212, + "step": 6244 + }, + { + "epoch": 1.6583455052449874, + "grad_norm": 1.2042735442206352, + "learning_rate": 1.5569560220945168e-06, + "loss": 0.22909750044345856, + "step": 6245 + }, + { + "epoch": 1.6586110742265303, + "grad_norm": 1.4646403481954997, + "learning_rate": 1.5546039834963745e-06, + "loss": 0.203629732131958, + "step": 6246 + }, + { + "epoch": 1.6588766432080733, + "grad_norm": 1.2050936311763847, + "learning_rate": 1.552253573070981e-06, + "loss": 0.21919086575508118, + "step": 6247 + }, + { + "epoch": 1.6591422121896162, + "grad_norm": 1.4379501702554756, + "learning_rate": 1.549904791271466e-06, + "loss": 0.2535661458969116, + "step": 6248 + }, + { + "epoch": 1.6594077811711592, + "grad_norm": 1.2609582047884877, + "learning_rate": 1.5475576385506475e-06, + "loss": 0.224460631608963, + "step": 6249 + }, + { + "epoch": 1.6596733501527021, + "grad_norm": 1.2625738742925756, + "learning_rate": 1.5452121153610288e-06, + "loss": 0.21925818920135498, + "step": 6250 + }, + { + "epoch": 1.659938919134245, + "grad_norm": 1.2787763694898493, + "learning_rate": 1.5428682221547997e-06, + "loss": 0.2100696563720703, + "step": 6251 + }, + { + "epoch": 1.660204488115788, + "grad_norm": 1.3484219674096825, + "learning_rate": 1.540525959383834e-06, + "loss": 0.25982293486595154, + "step": 6252 + }, + { + "epoch": 1.660470057097331, + "grad_norm": 1.2527966644905648, + "learning_rate": 1.538185327499694e-06, + "loss": 0.23615162074565887, + "step": 6253 + }, + { + "epoch": 1.660735626078874, + "grad_norm": 1.2738910414784854, + "learning_rate": 1.5358463269536218e-06, + "loss": 0.2454022467136383, + "step": 6254 + }, + { + "epoch": 1.6610011950604169, + "grad_norm": 1.3825181535789863, + "learning_rate": 1.5335089581965556e-06, + "loss": 0.2330605536699295, + "step": 6255 + }, + { + "epoch": 1.6612667640419598, + "grad_norm": 1.2169082012465264, + "learning_rate": 1.5311732216791087e-06, + "loss": 0.23193006217479706, + "step": 6256 + }, + { + "epoch": 1.6615323330235028, + "grad_norm": 1.2690481284418431, + "learning_rate": 1.5288391178515838e-06, + "loss": 0.23254770040512085, + "step": 6257 + }, + { + "epoch": 1.6617979020050457, + "grad_norm": 1.2246821396199268, + "learning_rate": 1.5265066471639701e-06, + "loss": 0.23240572214126587, + "step": 6258 + }, + { + "epoch": 1.6620634709865887, + "grad_norm": 1.3414134094293932, + "learning_rate": 1.5241758100659386e-06, + "loss": 0.2765730619430542, + "step": 6259 + }, + { + "epoch": 1.6623290399681316, + "grad_norm": 1.2956291225041994, + "learning_rate": 1.5218466070068472e-06, + "loss": 0.26366496086120605, + "step": 6260 + }, + { + "epoch": 1.6625946089496746, + "grad_norm": 1.240730160583952, + "learning_rate": 1.5195190384357405e-06, + "loss": 0.22322653234004974, + "step": 6261 + }, + { + "epoch": 1.6628601779312175, + "grad_norm": 1.2433877123660553, + "learning_rate": 1.5171931048013466e-06, + "loss": 0.24144116044044495, + "step": 6262 + }, + { + "epoch": 1.6631257469127605, + "grad_norm": 1.3783130308299147, + "learning_rate": 1.5148688065520734e-06, + "loss": 0.24559618532657623, + "step": 6263 + }, + { + "epoch": 1.6633913158943034, + "grad_norm": 1.3258590224160887, + "learning_rate": 1.5125461441360223e-06, + "loss": 0.24337056279182434, + "step": 6264 + }, + { + "epoch": 1.6636568848758464, + "grad_norm": 1.3292875380649603, + "learning_rate": 1.5102251180009752e-06, + "loss": 0.2733612358570099, + "step": 6265 + }, + { + "epoch": 1.6639224538573893, + "grad_norm": 1.2329811544038785, + "learning_rate": 1.5079057285943976e-06, + "loss": 0.2116459757089615, + "step": 6266 + }, + { + "epoch": 1.6641880228389323, + "grad_norm": 1.2335642813115397, + "learning_rate": 1.5055879763634407e-06, + "loss": 0.21221664547920227, + "step": 6267 + }, + { + "epoch": 1.6644535918204753, + "grad_norm": 1.2500150658336624, + "learning_rate": 1.503271861754939e-06, + "loss": 0.21166589856147766, + "step": 6268 + }, + { + "epoch": 1.6647191608020182, + "grad_norm": 1.5113123418333367, + "learning_rate": 1.5009573852154136e-06, + "loss": 0.2652161121368408, + "step": 6269 + }, + { + "epoch": 1.6649847297835612, + "grad_norm": 1.262834880378694, + "learning_rate": 1.4986445471910672e-06, + "loss": 0.22142267227172852, + "step": 6270 + }, + { + "epoch": 1.665250298765104, + "grad_norm": 1.4442965183949772, + "learning_rate": 1.4963333481277874e-06, + "loss": 0.2307332456111908, + "step": 6271 + }, + { + "epoch": 1.665515867746647, + "grad_norm": 1.411326986781179, + "learning_rate": 1.494023788471144e-06, + "loss": 0.2669411897659302, + "step": 6272 + }, + { + "epoch": 1.66578143672819, + "grad_norm": 1.2823998109594834, + "learning_rate": 1.4917158686663992e-06, + "loss": 0.2468804121017456, + "step": 6273 + }, + { + "epoch": 1.666047005709733, + "grad_norm": 1.2639666166307362, + "learning_rate": 1.4894095891584882e-06, + "loss": 0.24152463674545288, + "step": 6274 + }, + { + "epoch": 1.666312574691276, + "grad_norm": 1.098201760932299, + "learning_rate": 1.4871049503920353e-06, + "loss": 0.1966545283794403, + "step": 6275 + }, + { + "epoch": 1.6665781436728189, + "grad_norm": 1.2773845282560163, + "learning_rate": 1.4848019528113477e-06, + "loss": 0.24772626161575317, + "step": 6276 + }, + { + "epoch": 1.6668437126543618, + "grad_norm": 1.3731672204722256, + "learning_rate": 1.4825005968604189e-06, + "loss": 0.22138851881027222, + "step": 6277 + }, + { + "epoch": 1.6671092816359048, + "grad_norm": 1.2245583238686863, + "learning_rate": 1.4802008829829172e-06, + "loss": 0.24345465004444122, + "step": 6278 + }, + { + "epoch": 1.6673748506174477, + "grad_norm": 1.3209828849983516, + "learning_rate": 1.477902811622205e-06, + "loss": 0.22862716019153595, + "step": 6279 + }, + { + "epoch": 1.6676404195989907, + "grad_norm": 1.2914770883474422, + "learning_rate": 1.4756063832213207e-06, + "loss": 0.2763083577156067, + "step": 6280 + }, + { + "epoch": 1.6679059885805336, + "grad_norm": 1.3142139937070516, + "learning_rate": 1.4733115982229885e-06, + "loss": 0.24631357192993164, + "step": 6281 + }, + { + "epoch": 1.6681715575620768, + "grad_norm": 1.322429969576976, + "learning_rate": 1.4710184570696184e-06, + "loss": 0.22650030255317688, + "step": 6282 + }, + { + "epoch": 1.6684371265436198, + "grad_norm": 1.3243342318873437, + "learning_rate": 1.4687269602033006e-06, + "loss": 0.2455909103155136, + "step": 6283 + }, + { + "epoch": 1.6687026955251627, + "grad_norm": 1.3711517369784783, + "learning_rate": 1.4664371080658079e-06, + "loss": 0.25625506043434143, + "step": 6284 + }, + { + "epoch": 1.6689682645067057, + "grad_norm": 1.1450036681372322, + "learning_rate": 1.4641489010985954e-06, + "loss": 0.22178369760513306, + "step": 6285 + }, + { + "epoch": 1.6692338334882486, + "grad_norm": 1.2644620602089436, + "learning_rate": 1.4618623397428055e-06, + "loss": 0.23936234414577484, + "step": 6286 + }, + { + "epoch": 1.6694994024697916, + "grad_norm": 1.2667144776178243, + "learning_rate": 1.459577424439258e-06, + "loss": 0.21629829704761505, + "step": 6287 + }, + { + "epoch": 1.6697649714513345, + "grad_norm": 1.3486786043134158, + "learning_rate": 1.457294155628457e-06, + "loss": 0.238427072763443, + "step": 6288 + }, + { + "epoch": 1.6700305404328775, + "grad_norm": 1.412674472973442, + "learning_rate": 1.4550125337505926e-06, + "loss": 0.23168250918388367, + "step": 6289 + }, + { + "epoch": 1.6702961094144204, + "grad_norm": 1.3185872633193214, + "learning_rate": 1.45273255924553e-06, + "loss": 0.25518402457237244, + "step": 6290 + }, + { + "epoch": 1.6705616783959634, + "grad_norm": 1.2092220747685465, + "learning_rate": 1.450454232552826e-06, + "loss": 0.2488553822040558, + "step": 6291 + }, + { + "epoch": 1.6708272473775063, + "grad_norm": 1.4309048190710245, + "learning_rate": 1.448177554111716e-06, + "loss": 0.2684085965156555, + "step": 6292 + }, + { + "epoch": 1.6710928163590493, + "grad_norm": 1.3645105519242562, + "learning_rate": 1.4459025243611124e-06, + "loss": 0.24627447128295898, + "step": 6293 + }, + { + "epoch": 1.6713583853405922, + "grad_norm": 1.2960987120962004, + "learning_rate": 1.4436291437396156e-06, + "loss": 0.24725376069545746, + "step": 6294 + }, + { + "epoch": 1.6716239543221352, + "grad_norm": 1.2752333210419433, + "learning_rate": 1.4413574126855067e-06, + "loss": 0.23488914966583252, + "step": 6295 + }, + { + "epoch": 1.6718895233036781, + "grad_norm": 1.2385365684534737, + "learning_rate": 1.4390873316367492e-06, + "loss": 0.2031177133321762, + "step": 6296 + }, + { + "epoch": 1.672155092285221, + "grad_norm": 1.265889760948498, + "learning_rate": 1.4368189010309874e-06, + "loss": 0.25378018617630005, + "step": 6297 + }, + { + "epoch": 1.672420661266764, + "grad_norm": 1.2443137764428682, + "learning_rate": 1.434552121305548e-06, + "loss": 0.21305282413959503, + "step": 6298 + }, + { + "epoch": 1.672686230248307, + "grad_norm": 1.1925787762252436, + "learning_rate": 1.432286992897437e-06, + "loss": 0.20908987522125244, + "step": 6299 + }, + { + "epoch": 1.67295179922985, + "grad_norm": 1.2228377563088515, + "learning_rate": 1.4300235162433496e-06, + "loss": 0.21945340931415558, + "step": 6300 + }, + { + "epoch": 1.6732173682113929, + "grad_norm": 1.3659267409445854, + "learning_rate": 1.4277616917796544e-06, + "loss": 0.22096669673919678, + "step": 6301 + }, + { + "epoch": 1.6734829371929358, + "grad_norm": 1.2773291306452106, + "learning_rate": 1.425501519942406e-06, + "loss": 0.2233850657939911, + "step": 6302 + }, + { + "epoch": 1.6737485061744788, + "grad_norm": 1.2672720076411363, + "learning_rate": 1.423243001167337e-06, + "loss": 0.21432995796203613, + "step": 6303 + }, + { + "epoch": 1.6740140751560217, + "grad_norm": 1.3864014459258447, + "learning_rate": 1.4209861358898636e-06, + "loss": 0.2649557590484619, + "step": 6304 + }, + { + "epoch": 1.6742796441375647, + "grad_norm": 1.2642836811067808, + "learning_rate": 1.418730924545083e-06, + "loss": 0.24918347597122192, + "step": 6305 + }, + { + "epoch": 1.6745452131191076, + "grad_norm": 1.3089175693989048, + "learning_rate": 1.4164773675677745e-06, + "loss": 0.24121029675006866, + "step": 6306 + }, + { + "epoch": 1.6748107821006506, + "grad_norm": 1.2569762960026158, + "learning_rate": 1.4142254653923949e-06, + "loss": 0.24401789903640747, + "step": 6307 + }, + { + "epoch": 1.6750763510821935, + "grad_norm": 1.3272546708188746, + "learning_rate": 1.4119752184530867e-06, + "loss": 0.2374853938817978, + "step": 6308 + }, + { + "epoch": 1.6753419200637365, + "grad_norm": 1.2973848864698938, + "learning_rate": 1.4097266271836695e-06, + "loss": 0.2351088970899582, + "step": 6309 + }, + { + "epoch": 1.6756074890452797, + "grad_norm": 1.301417674196528, + "learning_rate": 1.407479692017647e-06, + "loss": 0.19560754299163818, + "step": 6310 + }, + { + "epoch": 1.6758730580268226, + "grad_norm": 1.390250023674765, + "learning_rate": 1.405234413388199e-06, + "loss": 0.24124252796173096, + "step": 6311 + }, + { + "epoch": 1.6761386270083656, + "grad_norm": 1.3742469305206364, + "learning_rate": 1.4029907917281903e-06, + "loss": 0.2208215445280075, + "step": 6312 + }, + { + "epoch": 1.6764041959899085, + "grad_norm": 1.2125662977366807, + "learning_rate": 1.4007488274701653e-06, + "loss": 0.23888292908668518, + "step": 6313 + }, + { + "epoch": 1.6766697649714515, + "grad_norm": 1.2936432356109655, + "learning_rate": 1.3985085210463479e-06, + "loss": 0.24079063534736633, + "step": 6314 + }, + { + "epoch": 1.6769353339529944, + "grad_norm": 1.2011852751375642, + "learning_rate": 1.3962698728886414e-06, + "loss": 0.18975606560707092, + "step": 6315 + }, + { + "epoch": 1.6772009029345374, + "grad_norm": 1.322599968285396, + "learning_rate": 1.3940328834286333e-06, + "loss": 0.201214998960495, + "step": 6316 + }, + { + "epoch": 1.6774664719160803, + "grad_norm": 1.2090909210103018, + "learning_rate": 1.3917975530975836e-06, + "loss": 0.20079322159290314, + "step": 6317 + }, + { + "epoch": 1.6777320408976233, + "grad_norm": 1.2732868066143843, + "learning_rate": 1.3895638823264447e-06, + "loss": 0.23593586683273315, + "step": 6318 + }, + { + "epoch": 1.6779976098791662, + "grad_norm": 1.3931846809533017, + "learning_rate": 1.3873318715458383e-06, + "loss": 0.26574259996414185, + "step": 6319 + }, + { + "epoch": 1.6782631788607092, + "grad_norm": 1.252943610173436, + "learning_rate": 1.3851015211860696e-06, + "loss": 0.20573323965072632, + "step": 6320 + }, + { + "epoch": 1.6785287478422521, + "grad_norm": 1.4484920974875073, + "learning_rate": 1.3828728316771244e-06, + "loss": 0.25610506534576416, + "step": 6321 + }, + { + "epoch": 1.678794316823795, + "grad_norm": 1.330338299337135, + "learning_rate": 1.380645803448668e-06, + "loss": 0.2138693630695343, + "step": 6322 + }, + { + "epoch": 1.679059885805338, + "grad_norm": 1.1479105398064924, + "learning_rate": 1.3784204369300447e-06, + "loss": 0.21522866189479828, + "step": 6323 + }, + { + "epoch": 1.679325454786881, + "grad_norm": 1.441538971613898, + "learning_rate": 1.376196732550279e-06, + "loss": 0.25622743368148804, + "step": 6324 + }, + { + "epoch": 1.679591023768424, + "grad_norm": 1.354050705773023, + "learning_rate": 1.3739746907380757e-06, + "loss": 0.18025386333465576, + "step": 6325 + }, + { + "epoch": 1.679856592749967, + "grad_norm": 1.1665775097977176, + "learning_rate": 1.3717543119218168e-06, + "loss": 0.18785078823566437, + "step": 6326 + }, + { + "epoch": 1.6801221617315099, + "grad_norm": 1.3771154706722653, + "learning_rate": 1.3695355965295653e-06, + "loss": 0.24682481586933136, + "step": 6327 + }, + { + "epoch": 1.6803877307130528, + "grad_norm": 1.2994385931646761, + "learning_rate": 1.3673185449890647e-06, + "loss": 0.2193487137556076, + "step": 6328 + }, + { + "epoch": 1.6806532996945958, + "grad_norm": 1.2960131024456552, + "learning_rate": 1.3651031577277351e-06, + "loss": 0.24963265657424927, + "step": 6329 + }, + { + "epoch": 1.6809188686761387, + "grad_norm": 1.2714587333981215, + "learning_rate": 1.3628894351726785e-06, + "loss": 0.21473057568073273, + "step": 6330 + }, + { + "epoch": 1.6811844376576817, + "grad_norm": 1.4508064568072063, + "learning_rate": 1.3606773777506731e-06, + "loss": 0.2539534866809845, + "step": 6331 + }, + { + "epoch": 1.6814500066392246, + "grad_norm": 1.5049767699399101, + "learning_rate": 1.3584669858881771e-06, + "loss": 0.2671799659729004, + "step": 6332 + }, + { + "epoch": 1.6817155756207676, + "grad_norm": 1.211295376852026, + "learning_rate": 1.3562582600113295e-06, + "loss": 0.24291013181209564, + "step": 6333 + }, + { + "epoch": 1.6819811446023105, + "grad_norm": 1.3672105989135315, + "learning_rate": 1.354051200545946e-06, + "loss": 0.24249233305454254, + "step": 6334 + }, + { + "epoch": 1.6822467135838535, + "grad_norm": 1.2855842039831968, + "learning_rate": 1.351845807917519e-06, + "loss": 0.21647261083126068, + "step": 6335 + }, + { + "epoch": 1.6825122825653964, + "grad_norm": 1.2764605035604815, + "learning_rate": 1.349642082551227e-06, + "loss": 0.2348332703113556, + "step": 6336 + }, + { + "epoch": 1.6827778515469394, + "grad_norm": 1.3049495455341118, + "learning_rate": 1.34744002487192e-06, + "loss": 0.22503259778022766, + "step": 6337 + }, + { + "epoch": 1.6830434205284823, + "grad_norm": 1.3236190891705721, + "learning_rate": 1.3452396353041286e-06, + "loss": 0.2397763580083847, + "step": 6338 + }, + { + "epoch": 1.6833089895100253, + "grad_norm": 1.156426557066381, + "learning_rate": 1.3430409142720624e-06, + "loss": 0.23345956206321716, + "step": 6339 + }, + { + "epoch": 1.6835745584915682, + "grad_norm": 1.1932341696009043, + "learning_rate": 1.3408438621996088e-06, + "loss": 0.19660598039627075, + "step": 6340 + }, + { + "epoch": 1.6838401274731112, + "grad_norm": 1.262928020262074, + "learning_rate": 1.3386484795103327e-06, + "loss": 0.19148695468902588, + "step": 6341 + }, + { + "epoch": 1.6841056964546541, + "grad_norm": 1.2112774084067142, + "learning_rate": 1.3364547666274819e-06, + "loss": 0.2078169733285904, + "step": 6342 + }, + { + "epoch": 1.684371265436197, + "grad_norm": 1.3703852622718744, + "learning_rate": 1.3342627239739715e-06, + "loss": 0.23122575879096985, + "step": 6343 + }, + { + "epoch": 1.68463683441774, + "grad_norm": 1.350523705417422, + "learning_rate": 1.3320723519724032e-06, + "loss": 0.2744083106517792, + "step": 6344 + }, + { + "epoch": 1.684902403399283, + "grad_norm": 1.3462449472678248, + "learning_rate": 1.3298836510450597e-06, + "loss": 0.26361098885536194, + "step": 6345 + }, + { + "epoch": 1.685167972380826, + "grad_norm": 1.2550654654863131, + "learning_rate": 1.3276966216138932e-06, + "loss": 0.21833205223083496, + "step": 6346 + }, + { + "epoch": 1.685433541362369, + "grad_norm": 1.306325021058624, + "learning_rate": 1.3255112641005374e-06, + "loss": 0.22075100243091583, + "step": 6347 + }, + { + "epoch": 1.6856991103439118, + "grad_norm": 1.4286786068270776, + "learning_rate": 1.3233275789263034e-06, + "loss": 0.24352343380451202, + "step": 6348 + }, + { + "epoch": 1.6859646793254548, + "grad_norm": 1.5476580340833483, + "learning_rate": 1.3211455665121808e-06, + "loss": 0.2331303060054779, + "step": 6349 + }, + { + "epoch": 1.6862302483069977, + "grad_norm": 1.398559395598541, + "learning_rate": 1.3189652272788356e-06, + "loss": 0.2511689066886902, + "step": 6350 + }, + { + "epoch": 1.6864958172885407, + "grad_norm": 1.1704691076383393, + "learning_rate": 1.3167865616466113e-06, + "loss": 0.18535873293876648, + "step": 6351 + }, + { + "epoch": 1.6867613862700837, + "grad_norm": 1.3097469055952822, + "learning_rate": 1.3146095700355289e-06, + "loss": 0.23924914002418518, + "step": 6352 + }, + { + "epoch": 1.6870269552516266, + "grad_norm": 1.1591649275755667, + "learning_rate": 1.3124342528652845e-06, + "loss": 0.19710025191307068, + "step": 6353 + }, + { + "epoch": 1.6872925242331696, + "grad_norm": 1.393629731020981, + "learning_rate": 1.3102606105552585e-06, + "loss": 0.21439281105995178, + "step": 6354 + }, + { + "epoch": 1.6875580932147125, + "grad_norm": 1.3051512833867451, + "learning_rate": 1.3080886435245e-06, + "loss": 0.2647722363471985, + "step": 6355 + }, + { + "epoch": 1.6878236621962555, + "grad_norm": 2.6038516980586355, + "learning_rate": 1.3059183521917396e-06, + "loss": 0.2202019840478897, + "step": 6356 + }, + { + "epoch": 1.6880892311777984, + "grad_norm": 1.3022104210295473, + "learning_rate": 1.3037497369753871e-06, + "loss": 0.25833001732826233, + "step": 6357 + }, + { + "epoch": 1.6883548001593414, + "grad_norm": 1.1906464618269579, + "learning_rate": 1.3015827982935192e-06, + "loss": 0.19984321296215057, + "step": 6358 + }, + { + "epoch": 1.6886203691408843, + "grad_norm": 1.3347301103088016, + "learning_rate": 1.2994175365638996e-06, + "loss": 0.2190552055835724, + "step": 6359 + }, + { + "epoch": 1.6888859381224273, + "grad_norm": 1.265894337049371, + "learning_rate": 1.2972539522039652e-06, + "loss": 0.26262593269348145, + "step": 6360 + }, + { + "epoch": 1.6891515071039702, + "grad_norm": 1.285416913994909, + "learning_rate": 1.2950920456308292e-06, + "loss": 0.2665651738643646, + "step": 6361 + }, + { + "epoch": 1.6894170760855132, + "grad_norm": 1.213162722605336, + "learning_rate": 1.2929318172612803e-06, + "loss": 0.22369208931922913, + "step": 6362 + }, + { + "epoch": 1.6896826450670561, + "grad_norm": 1.2234073567984471, + "learning_rate": 1.2907732675117878e-06, + "loss": 0.21063543856143951, + "step": 6363 + }, + { + "epoch": 1.689948214048599, + "grad_norm": 1.3608426715056905, + "learning_rate": 1.2886163967984944e-06, + "loss": 0.2303045690059662, + "step": 6364 + }, + { + "epoch": 1.690213783030142, + "grad_norm": 1.1473656525455074, + "learning_rate": 1.2864612055372182e-06, + "loss": 0.20185884833335876, + "step": 6365 + }, + { + "epoch": 1.690479352011685, + "grad_norm": 1.2673026097919315, + "learning_rate": 1.284307694143455e-06, + "loss": 0.22900527715682983, + "step": 6366 + }, + { + "epoch": 1.690744920993228, + "grad_norm": 1.2373147270640896, + "learning_rate": 1.282155863032377e-06, + "loss": 0.21405862271785736, + "step": 6367 + }, + { + "epoch": 1.6910104899747709, + "grad_norm": 1.3139606008654157, + "learning_rate": 1.2800057126188304e-06, + "loss": 0.26143258810043335, + "step": 6368 + }, + { + "epoch": 1.6912760589563138, + "grad_norm": 1.319330305112879, + "learning_rate": 1.2778572433173397e-06, + "loss": 0.24437926709651947, + "step": 6369 + }, + { + "epoch": 1.6915416279378568, + "grad_norm": 1.1954155676954614, + "learning_rate": 1.275710455542104e-06, + "loss": 0.24862337112426758, + "step": 6370 + }, + { + "epoch": 1.6918071969193997, + "grad_norm": 1.2264107157331223, + "learning_rate": 1.2735653497069978e-06, + "loss": 0.2146604359149933, + "step": 6371 + }, + { + "epoch": 1.6920727659009427, + "grad_norm": 1.3217815480091177, + "learning_rate": 1.2714219262255777e-06, + "loss": 0.2525256872177124, + "step": 6372 + }, + { + "epoch": 1.6923383348824856, + "grad_norm": 1.289957068010404, + "learning_rate": 1.2692801855110638e-06, + "loss": 0.23462912440299988, + "step": 6373 + }, + { + "epoch": 1.6926039038640286, + "grad_norm": 1.3468375801476438, + "learning_rate": 1.2671401279763595e-06, + "loss": 0.21551170945167542, + "step": 6374 + }, + { + "epoch": 1.6928694728455715, + "grad_norm": 1.4457180200872415, + "learning_rate": 1.2650017540340454e-06, + "loss": 0.24094407260417938, + "step": 6375 + }, + { + "epoch": 1.6931350418271145, + "grad_norm": 1.2168123169553724, + "learning_rate": 1.2628650640963736e-06, + "loss": 0.23101133108139038, + "step": 6376 + }, + { + "epoch": 1.6934006108086574, + "grad_norm": 1.4830646801660192, + "learning_rate": 1.2607300585752724e-06, + "loss": 0.2513899803161621, + "step": 6377 + }, + { + "epoch": 1.6936661797902004, + "grad_norm": 1.417144859782869, + "learning_rate": 1.258596737882345e-06, + "loss": 0.2490600198507309, + "step": 6378 + }, + { + "epoch": 1.6939317487717434, + "grad_norm": 1.3403225341914131, + "learning_rate": 1.256465102428872e-06, + "loss": 0.25767675042152405, + "step": 6379 + }, + { + "epoch": 1.6941973177532863, + "grad_norm": 1.2775246675329248, + "learning_rate": 1.254335152625804e-06, + "loss": 0.2231348305940628, + "step": 6380 + }, + { + "epoch": 1.6944628867348293, + "grad_norm": 1.4410136520558763, + "learning_rate": 1.2522068888837758e-06, + "loss": 0.25873979926109314, + "step": 6381 + }, + { + "epoch": 1.6947284557163722, + "grad_norm": 1.4111151195923193, + "learning_rate": 1.2500803116130887e-06, + "loss": 0.2848423421382904, + "step": 6382 + }, + { + "epoch": 1.6949940246979152, + "grad_norm": 1.1110125207312456, + "learning_rate": 1.247955421223721e-06, + "loss": 0.21343804895877838, + "step": 6383 + }, + { + "epoch": 1.695259593679458, + "grad_norm": 1.3025436504976033, + "learning_rate": 1.245832218125328e-06, + "loss": 0.23080062866210938, + "step": 6384 + }, + { + "epoch": 1.695525162661001, + "grad_norm": 1.3020267493975237, + "learning_rate": 1.2437107027272376e-06, + "loss": 0.2397225797176361, + "step": 6385 + }, + { + "epoch": 1.695790731642544, + "grad_norm": 1.3120966348534624, + "learning_rate": 1.2415908754384532e-06, + "loss": 0.22798654437065125, + "step": 6386 + }, + { + "epoch": 1.696056300624087, + "grad_norm": 1.3399304326822938, + "learning_rate": 1.2394727366676518e-06, + "loss": 0.2534061074256897, + "step": 6387 + }, + { + "epoch": 1.69632186960563, + "grad_norm": 1.2269756633197797, + "learning_rate": 1.2373562868231858e-06, + "loss": 0.2127036452293396, + "step": 6388 + }, + { + "epoch": 1.6965874385871729, + "grad_norm": 1.341525895521795, + "learning_rate": 1.2352415263130813e-06, + "loss": 0.22341205179691315, + "step": 6389 + }, + { + "epoch": 1.6968530075687158, + "grad_norm": 1.316572711467383, + "learning_rate": 1.2331284555450406e-06, + "loss": 0.2435426563024521, + "step": 6390 + }, + { + "epoch": 1.6971185765502588, + "grad_norm": 1.3203864338710647, + "learning_rate": 1.2310170749264383e-06, + "loss": 0.24652531743049622, + "step": 6391 + }, + { + "epoch": 1.6973841455318017, + "grad_norm": 1.251250109623578, + "learning_rate": 1.228907384864323e-06, + "loss": 0.24172671139240265, + "step": 6392 + }, + { + "epoch": 1.6976497145133447, + "grad_norm": 1.293405881850453, + "learning_rate": 1.2267993857654182e-06, + "loss": 0.21534420549869537, + "step": 6393 + }, + { + "epoch": 1.6979152834948879, + "grad_norm": 2.1259133697182575, + "learning_rate": 1.2246930780361221e-06, + "loss": 0.2617778182029724, + "step": 6394 + }, + { + "epoch": 1.6981808524764308, + "grad_norm": 1.1793022391098469, + "learning_rate": 1.2225884620825046e-06, + "loss": 0.20388583838939667, + "step": 6395 + }, + { + "epoch": 1.6984464214579738, + "grad_norm": 1.289033320527503, + "learning_rate": 1.220485538310312e-06, + "loss": 0.23714327812194824, + "step": 6396 + }, + { + "epoch": 1.6987119904395167, + "grad_norm": 1.3592785135687544, + "learning_rate": 1.2183843071249634e-06, + "loss": 0.2495463341474533, + "step": 6397 + }, + { + "epoch": 1.6989775594210597, + "grad_norm": 1.2730498991215184, + "learning_rate": 1.2162847689315483e-06, + "loss": 0.2419012188911438, + "step": 6398 + }, + { + "epoch": 1.6992431284026026, + "grad_norm": 1.2226640861076554, + "learning_rate": 1.214186924134838e-06, + "loss": 0.23392438888549805, + "step": 6399 + }, + { + "epoch": 1.6995086973841456, + "grad_norm": 1.3210458214149883, + "learning_rate": 1.2120907731392695e-06, + "loss": 0.22855526208877563, + "step": 6400 + }, + { + "epoch": 1.6997742663656885, + "grad_norm": 1.2152782326664608, + "learning_rate": 1.2099963163489558e-06, + "loss": 0.22393949329853058, + "step": 6401 + }, + { + "epoch": 1.7000398353472315, + "grad_norm": 1.3855673404796554, + "learning_rate": 1.2079035541676832e-06, + "loss": 0.2539960741996765, + "step": 6402 + }, + { + "epoch": 1.7003054043287744, + "grad_norm": 1.3330270743987416, + "learning_rate": 1.2058124869989129e-06, + "loss": 0.23716852068901062, + "step": 6403 + }, + { + "epoch": 1.7005709733103174, + "grad_norm": 1.347782549245642, + "learning_rate": 1.2037231152457773e-06, + "loss": 0.24658545851707458, + "step": 6404 + }, + { + "epoch": 1.7008365422918603, + "grad_norm": 1.2494300647338343, + "learning_rate": 1.201635439311083e-06, + "loss": 0.2316630333662033, + "step": 6405 + }, + { + "epoch": 1.7011021112734033, + "grad_norm": 1.0834142572483991, + "learning_rate": 1.1995494595973089e-06, + "loss": 0.20434345304965973, + "step": 6406 + }, + { + "epoch": 1.7013676802549462, + "grad_norm": 1.3445140884275912, + "learning_rate": 1.197465176506607e-06, + "loss": 0.2585931420326233, + "step": 6407 + }, + { + "epoch": 1.7016332492364892, + "grad_norm": 1.2567668360829787, + "learning_rate": 1.1953825904408033e-06, + "loss": 0.23007069528102875, + "step": 6408 + }, + { + "epoch": 1.7018988182180321, + "grad_norm": 1.2770978609777501, + "learning_rate": 1.1933017018013948e-06, + "loss": 0.21822810173034668, + "step": 6409 + }, + { + "epoch": 1.702164387199575, + "grad_norm": 1.2875752799081717, + "learning_rate": 1.1912225109895526e-06, + "loss": 0.241228848695755, + "step": 6410 + }, + { + "epoch": 1.702429956181118, + "grad_norm": 1.3509759956774154, + "learning_rate": 1.1891450184061203e-06, + "loss": 0.28803908824920654, + "step": 6411 + }, + { + "epoch": 1.702695525162661, + "grad_norm": 1.3018941028318989, + "learning_rate": 1.1870692244516147e-06, + "loss": 0.2387516349554062, + "step": 6412 + }, + { + "epoch": 1.702961094144204, + "grad_norm": 1.2538051398244094, + "learning_rate": 1.1849951295262242e-06, + "loss": 0.19774140417575836, + "step": 6413 + }, + { + "epoch": 1.7032266631257469, + "grad_norm": 1.269953409174644, + "learning_rate": 1.1829227340298088e-06, + "loss": 0.22842247784137726, + "step": 6414 + }, + { + "epoch": 1.7034922321072898, + "grad_norm": 1.1987695898844528, + "learning_rate": 1.1808520383619015e-06, + "loss": 0.21994739770889282, + "step": 6415 + }, + { + "epoch": 1.7037578010888328, + "grad_norm": 1.2719096074486522, + "learning_rate": 1.1787830429217084e-06, + "loss": 0.22328051924705505, + "step": 6416 + }, + { + "epoch": 1.7040233700703757, + "grad_norm": 1.3583279531737376, + "learning_rate": 1.1767157481081092e-06, + "loss": 0.26704326272010803, + "step": 6417 + }, + { + "epoch": 1.7042889390519187, + "grad_norm": 1.2796404749500392, + "learning_rate": 1.174650154319653e-06, + "loss": 0.2148481160402298, + "step": 6418 + }, + { + "epoch": 1.7045545080334616, + "grad_norm": 1.1912742761204351, + "learning_rate": 1.1725862619545625e-06, + "loss": 0.21731218695640564, + "step": 6419 + }, + { + "epoch": 1.7048200770150046, + "grad_norm": 1.3502505047017879, + "learning_rate": 1.1705240714107301e-06, + "loss": 0.20832043886184692, + "step": 6420 + }, + { + "epoch": 1.7050856459965475, + "grad_norm": 1.2922565511595965, + "learning_rate": 1.1684635830857249e-06, + "loss": 0.21739046275615692, + "step": 6421 + }, + { + "epoch": 1.7053512149780907, + "grad_norm": 1.3041232291639149, + "learning_rate": 1.1664047973767811e-06, + "loss": 0.23972246050834656, + "step": 6422 + }, + { + "epoch": 1.7056167839596337, + "grad_norm": 1.2420174603299015, + "learning_rate": 1.1643477146808092e-06, + "loss": 0.2471289187669754, + "step": 6423 + }, + { + "epoch": 1.7058823529411766, + "grad_norm": 1.2148999014811244, + "learning_rate": 1.1622923353943916e-06, + "loss": 0.2014283537864685, + "step": 6424 + }, + { + "epoch": 1.7061479219227196, + "grad_norm": 1.1799937956162947, + "learning_rate": 1.1602386599137782e-06, + "loss": 0.21680915355682373, + "step": 6425 + }, + { + "epoch": 1.7064134909042625, + "grad_norm": 1.2221660563202492, + "learning_rate": 1.158186688634898e-06, + "loss": 0.2101205736398697, + "step": 6426 + }, + { + "epoch": 1.7066790598858055, + "grad_norm": 1.2879683442276364, + "learning_rate": 1.1561364219533444e-06, + "loss": 0.22114071249961853, + "step": 6427 + }, + { + "epoch": 1.7069446288673484, + "grad_norm": 1.2910925736026095, + "learning_rate": 1.1540878602643858e-06, + "loss": 0.20608706772327423, + "step": 6428 + }, + { + "epoch": 1.7072101978488914, + "grad_norm": 1.2486066037383718, + "learning_rate": 1.1520410039629593e-06, + "loss": 0.2247905433177948, + "step": 6429 + }, + { + "epoch": 1.7074757668304343, + "grad_norm": 1.1718742986299986, + "learning_rate": 1.1499958534436751e-06, + "loss": 0.22623226046562195, + "step": 6430 + }, + { + "epoch": 1.7077413358119773, + "grad_norm": 1.2776253558863635, + "learning_rate": 1.1479524091008142e-06, + "loss": 0.2063906192779541, + "step": 6431 + }, + { + "epoch": 1.7080069047935202, + "grad_norm": 1.4035125322254989, + "learning_rate": 1.1459106713283286e-06, + "loss": 0.2787795960903168, + "step": 6432 + }, + { + "epoch": 1.7082724737750632, + "grad_norm": 1.2096674582385407, + "learning_rate": 1.1438706405198419e-06, + "loss": 0.23090440034866333, + "step": 6433 + }, + { + "epoch": 1.7085380427566061, + "grad_norm": 1.288319877687408, + "learning_rate": 1.141832317068645e-06, + "loss": 0.23690670728683472, + "step": 6434 + }, + { + "epoch": 1.708803611738149, + "grad_norm": 1.2499926164056985, + "learning_rate": 1.1397957013677064e-06, + "loss": 0.209202378988266, + "step": 6435 + }, + { + "epoch": 1.709069180719692, + "grad_norm": 1.2311768368116, + "learning_rate": 1.1377607938096635e-06, + "loss": 0.22541575133800507, + "step": 6436 + }, + { + "epoch": 1.709334749701235, + "grad_norm": 1.3505125458173146, + "learning_rate": 1.1357275947868162e-06, + "loss": 0.2460884153842926, + "step": 6437 + }, + { + "epoch": 1.709600318682778, + "grad_norm": 1.195327574575731, + "learning_rate": 1.1336961046911443e-06, + "loss": 0.21967202425003052, + "step": 6438 + }, + { + "epoch": 1.709865887664321, + "grad_norm": 1.346022527152768, + "learning_rate": 1.1316663239142954e-06, + "loss": 0.23619329929351807, + "step": 6439 + }, + { + "epoch": 1.7101314566458639, + "grad_norm": 1.3033234842407981, + "learning_rate": 1.129638252847587e-06, + "loss": 0.24563436210155487, + "step": 6440 + }, + { + "epoch": 1.7103970256274068, + "grad_norm": 1.3840933006905622, + "learning_rate": 1.1276118918820068e-06, + "loss": 0.25508859753608704, + "step": 6441 + }, + { + "epoch": 1.7106625946089498, + "grad_norm": 1.3406379279103604, + "learning_rate": 1.1255872414082136e-06, + "loss": 0.24761545658111572, + "step": 6442 + }, + { + "epoch": 1.7109281635904927, + "grad_norm": 4.632018568484065, + "learning_rate": 1.1235643018165344e-06, + "loss": 0.2355962097644806, + "step": 6443 + }, + { + "epoch": 1.7111937325720357, + "grad_norm": 1.3274457548497118, + "learning_rate": 1.1215430734969723e-06, + "loss": 0.2534273862838745, + "step": 6444 + }, + { + "epoch": 1.7114593015535786, + "grad_norm": 1.2846712625276346, + "learning_rate": 1.1195235568391938e-06, + "loss": 0.2756424844264984, + "step": 6445 + }, + { + "epoch": 1.7117248705351216, + "grad_norm": 1.2126020570228762, + "learning_rate": 1.1175057522325383e-06, + "loss": 0.2198309451341629, + "step": 6446 + }, + { + "epoch": 1.7119904395166645, + "grad_norm": 1.2343738377988847, + "learning_rate": 1.1154896600660136e-06, + "loss": 0.21767666935920715, + "step": 6447 + }, + { + "epoch": 1.7122560084982075, + "grad_norm": 1.4965895030859304, + "learning_rate": 1.1134752807283e-06, + "loss": 0.2679128348827362, + "step": 6448 + }, + { + "epoch": 1.7125215774797504, + "grad_norm": 1.292131622576057, + "learning_rate": 1.1114626146077457e-06, + "loss": 0.2268792986869812, + "step": 6449 + }, + { + "epoch": 1.7127871464612934, + "grad_norm": 1.224637524783582, + "learning_rate": 1.109451662092369e-06, + "loss": 0.21585378050804138, + "step": 6450 + }, + { + "epoch": 1.7130527154428363, + "grad_norm": 1.3157463227820392, + "learning_rate": 1.1074424235698567e-06, + "loss": 0.2258647382259369, + "step": 6451 + }, + { + "epoch": 1.7133182844243793, + "grad_norm": 1.3742268123946286, + "learning_rate": 1.1054348994275677e-06, + "loss": 0.2456682175397873, + "step": 6452 + }, + { + "epoch": 1.7135838534059222, + "grad_norm": 1.4853732102975625, + "learning_rate": 1.1034290900525279e-06, + "loss": 0.22897745668888092, + "step": 6453 + }, + { + "epoch": 1.7138494223874652, + "grad_norm": 1.133114987282755, + "learning_rate": 1.101424995831435e-06, + "loss": 0.1910650134086609, + "step": 6454 + }, + { + "epoch": 1.7141149913690081, + "grad_norm": 1.2728981818199352, + "learning_rate": 1.0994226171506529e-06, + "loss": 0.2519158720970154, + "step": 6455 + }, + { + "epoch": 1.714380560350551, + "grad_norm": 1.259309948081026, + "learning_rate": 1.0974219543962184e-06, + "loss": 0.24191951751708984, + "step": 6456 + }, + { + "epoch": 1.714646129332094, + "grad_norm": 1.3159238719963862, + "learning_rate": 1.0954230079538352e-06, + "loss": 0.2560814619064331, + "step": 6457 + }, + { + "epoch": 1.714911698313637, + "grad_norm": 1.2640782659289207, + "learning_rate": 1.0934257782088763e-06, + "loss": 0.22969035804271698, + "step": 6458 + }, + { + "epoch": 1.71517726729518, + "grad_norm": 1.3584917562872394, + "learning_rate": 1.0914302655463837e-06, + "loss": 0.26114046573638916, + "step": 6459 + }, + { + "epoch": 1.715442836276723, + "grad_norm": 1.2235177756044688, + "learning_rate": 1.0894364703510685e-06, + "loss": 0.21457752585411072, + "step": 6460 + }, + { + "epoch": 1.7157084052582658, + "grad_norm": 1.164559577491723, + "learning_rate": 1.0874443930073098e-06, + "loss": 0.19998760521411896, + "step": 6461 + }, + { + "epoch": 1.7159739742398088, + "grad_norm": 1.2278101157674874, + "learning_rate": 1.0854540338991615e-06, + "loss": 0.2379671037197113, + "step": 6462 + }, + { + "epoch": 1.7162395432213517, + "grad_norm": 1.3827652808641404, + "learning_rate": 1.0834653934103367e-06, + "loss": 0.2236609309911728, + "step": 6463 + }, + { + "epoch": 1.7165051122028947, + "grad_norm": 1.2673726734268553, + "learning_rate": 1.0814784719242234e-06, + "loss": 0.22507379949092865, + "step": 6464 + }, + { + "epoch": 1.7167706811844377, + "grad_norm": 1.3174434539455087, + "learning_rate": 1.079493269823877e-06, + "loss": 0.22138816118240356, + "step": 6465 + }, + { + "epoch": 1.7170362501659806, + "grad_norm": 1.3880746036316538, + "learning_rate": 1.0775097874920204e-06, + "loss": 0.227338969707489, + "step": 6466 + }, + { + "epoch": 1.7173018191475236, + "grad_norm": 1.2588670866885754, + "learning_rate": 1.0755280253110466e-06, + "loss": 0.23694375157356262, + "step": 6467 + }, + { + "epoch": 1.7175673881290665, + "grad_norm": 1.365387614603678, + "learning_rate": 1.0735479836630136e-06, + "loss": 0.26219409704208374, + "step": 6468 + }, + { + "epoch": 1.7178329571106095, + "grad_norm": 1.20539748496599, + "learning_rate": 1.0715696629296524e-06, + "loss": 0.22215887904167175, + "step": 6469 + }, + { + "epoch": 1.7180985260921524, + "grad_norm": 1.3543481839639284, + "learning_rate": 1.0695930634923602e-06, + "loss": 0.25434768199920654, + "step": 6470 + }, + { + "epoch": 1.7183640950736954, + "grad_norm": 1.1809119822759757, + "learning_rate": 1.0676181857321998e-06, + "loss": 0.2092076987028122, + "step": 6471 + }, + { + "epoch": 1.7186296640552383, + "grad_norm": 1.330663320526799, + "learning_rate": 1.0656450300299048e-06, + "loss": 0.2710237503051758, + "step": 6472 + }, + { + "epoch": 1.7188952330367813, + "grad_norm": 1.2715188060789504, + "learning_rate": 1.0636735967658785e-06, + "loss": 0.2533886432647705, + "step": 6473 + }, + { + "epoch": 1.7191608020183242, + "grad_norm": 1.2174102707049457, + "learning_rate": 1.0617038863201878e-06, + "loss": 0.2545754909515381, + "step": 6474 + }, + { + "epoch": 1.7194263709998672, + "grad_norm": 1.2560655592374788, + "learning_rate": 1.0597358990725703e-06, + "loss": 0.26010993123054504, + "step": 6475 + }, + { + "epoch": 1.7196919399814101, + "grad_norm": 1.2632076366916114, + "learning_rate": 1.0577696354024314e-06, + "loss": 0.22529907524585724, + "step": 6476 + }, + { + "epoch": 1.719957508962953, + "grad_norm": 1.157260113755536, + "learning_rate": 1.0558050956888433e-06, + "loss": 0.1897469311952591, + "step": 6477 + }, + { + "epoch": 1.720223077944496, + "grad_norm": 1.31651804495616, + "learning_rate": 1.0538422803105441e-06, + "loss": 0.24663670361042023, + "step": 6478 + }, + { + "epoch": 1.720488646926039, + "grad_norm": 1.343902959790046, + "learning_rate": 1.0518811896459423e-06, + "loss": 0.2462892383337021, + "step": 6479 + }, + { + "epoch": 1.720754215907582, + "grad_norm": 1.117431347891292, + "learning_rate": 1.0499218240731157e-06, + "loss": 0.18652144074440002, + "step": 6480 + }, + { + "epoch": 1.7210197848891249, + "grad_norm": 1.2234103731079693, + "learning_rate": 1.0479641839698052e-06, + "loss": 0.24614468216896057, + "step": 6481 + }, + { + "epoch": 1.7212853538706678, + "grad_norm": 1.2632894895468527, + "learning_rate": 1.046008269713421e-06, + "loss": 0.27925312519073486, + "step": 6482 + }, + { + "epoch": 1.7215509228522108, + "grad_norm": 1.3426272887839532, + "learning_rate": 1.0440540816810395e-06, + "loss": 0.2626710832118988, + "step": 6483 + }, + { + "epoch": 1.7218164918337537, + "grad_norm": 1.2982212521269376, + "learning_rate": 1.042101620249405e-06, + "loss": 0.23039895296096802, + "step": 6484 + }, + { + "epoch": 1.7220820608152967, + "grad_norm": 1.2564768074123291, + "learning_rate": 1.0401508857949295e-06, + "loss": 0.19559775292873383, + "step": 6485 + }, + { + "epoch": 1.7223476297968396, + "grad_norm": 1.222035384596064, + "learning_rate": 1.0382018786936943e-06, + "loss": 0.24982990324497223, + "step": 6486 + }, + { + "epoch": 1.7226131987783826, + "grad_norm": 1.356827120814655, + "learning_rate": 1.0362545993214402e-06, + "loss": 0.26212313771247864, + "step": 6487 + }, + { + "epoch": 1.7228787677599255, + "grad_norm": 1.2583181328160484, + "learning_rate": 1.0343090480535788e-06, + "loss": 0.22827446460723877, + "step": 6488 + }, + { + "epoch": 1.7231443367414685, + "grad_norm": 1.3650470156220376, + "learning_rate": 1.032365225265196e-06, + "loss": 0.2710435390472412, + "step": 6489 + }, + { + "epoch": 1.7234099057230114, + "grad_norm": 1.560435811081079, + "learning_rate": 1.030423131331033e-06, + "loss": 0.25116702914237976, + "step": 6490 + }, + { + "epoch": 1.7236754747045544, + "grad_norm": 1.2598369270207033, + "learning_rate": 1.0284827666255048e-06, + "loss": 0.1980481743812561, + "step": 6491 + }, + { + "epoch": 1.7239410436860974, + "grad_norm": 1.3159445178277585, + "learning_rate": 1.0265441315226898e-06, + "loss": 0.2777971625328064, + "step": 6492 + }, + { + "epoch": 1.7242066126676403, + "grad_norm": 1.3290253215924488, + "learning_rate": 1.0246072263963336e-06, + "loss": 0.23041702806949615, + "step": 6493 + }, + { + "epoch": 1.7244721816491833, + "grad_norm": 1.2761862568921072, + "learning_rate": 1.0226720516198495e-06, + "loss": 0.21428728103637695, + "step": 6494 + }, + { + "epoch": 1.7247377506307262, + "grad_norm": 1.2965072992275601, + "learning_rate": 1.020738607566316e-06, + "loss": 0.22577518224716187, + "step": 6495 + }, + { + "epoch": 1.7250033196122692, + "grad_norm": 1.2489154030372867, + "learning_rate": 1.0188068946084783e-06, + "loss": 0.21080979704856873, + "step": 6496 + }, + { + "epoch": 1.7252688885938121, + "grad_norm": 1.1941107816051266, + "learning_rate": 1.0168769131187472e-06, + "loss": 0.21232858300209045, + "step": 6497 + }, + { + "epoch": 1.725534457575355, + "grad_norm": 1.3035016990745079, + "learning_rate": 1.0149486634692019e-06, + "loss": 0.25525614619255066, + "step": 6498 + }, + { + "epoch": 1.725800026556898, + "grad_norm": 1.2742578592858531, + "learning_rate": 1.0130221460315858e-06, + "loss": 0.26291778683662415, + "step": 6499 + }, + { + "epoch": 1.726065595538441, + "grad_norm": 1.1747703502148148, + "learning_rate": 1.011097361177308e-06, + "loss": 0.21314382553100586, + "step": 6500 + }, + { + "epoch": 1.726331164519984, + "grad_norm": 1.3027182735878766, + "learning_rate": 1.0091743092774474e-06, + "loss": 0.2106419950723648, + "step": 6501 + }, + { + "epoch": 1.7265967335015269, + "grad_norm": 1.2753206037657139, + "learning_rate": 1.0072529907027407e-06, + "loss": 0.22456032037734985, + "step": 6502 + }, + { + "epoch": 1.7268623024830698, + "grad_norm": 2.1059170179774807, + "learning_rate": 1.0053334058235975e-06, + "loss": 0.2301097959280014, + "step": 6503 + }, + { + "epoch": 1.7271278714646128, + "grad_norm": 1.4062353485935484, + "learning_rate": 1.0034155550100922e-06, + "loss": 0.21207617223262787, + "step": 6504 + }, + { + "epoch": 1.7273934404461557, + "grad_norm": 1.3379977808716934, + "learning_rate": 1.0014994386319621e-06, + "loss": 0.24378664791584015, + "step": 6505 + }, + { + "epoch": 1.727659009427699, + "grad_norm": 1.402146752515372, + "learning_rate": 9.995850570586107e-07, + "loss": 0.24914023280143738, + "step": 6506 + }, + { + "epoch": 1.7279245784092419, + "grad_norm": 1.2949159811476645, + "learning_rate": 9.976724106591128e-07, + "loss": 0.23235921561717987, + "step": 6507 + }, + { + "epoch": 1.7281901473907848, + "grad_norm": 1.295455173430887, + "learning_rate": 9.957614998022015e-07, + "loss": 0.22441455721855164, + "step": 6508 + }, + { + "epoch": 1.7284557163723278, + "grad_norm": 1.4195770964317103, + "learning_rate": 9.93852324856278e-07, + "loss": 0.2559920847415924, + "step": 6509 + }, + { + "epoch": 1.7287212853538707, + "grad_norm": 1.2106097617539484, + "learning_rate": 9.919448861894088e-07, + "loss": 0.21378321945667267, + "step": 6510 + }, + { + "epoch": 1.7289868543354137, + "grad_norm": 1.223247289196822, + "learning_rate": 9.900391841693247e-07, + "loss": 0.23622627556324005, + "step": 6511 + }, + { + "epoch": 1.7292524233169566, + "grad_norm": 1.2354266119490807, + "learning_rate": 9.88135219163424e-07, + "loss": 0.217013418674469, + "step": 6512 + }, + { + "epoch": 1.7295179922984996, + "grad_norm": 1.342902376475473, + "learning_rate": 9.862329915387669e-07, + "loss": 0.2221517264842987, + "step": 6513 + }, + { + "epoch": 1.7297835612800425, + "grad_norm": 1.3136496001371853, + "learning_rate": 9.84332501662083e-07, + "loss": 0.24377144873142242, + "step": 6514 + }, + { + "epoch": 1.7300491302615855, + "grad_norm": 1.2574348774674273, + "learning_rate": 9.824337498997593e-07, + "loss": 0.23368799686431885, + "step": 6515 + }, + { + "epoch": 1.7303146992431284, + "grad_norm": 1.1949944292188206, + "learning_rate": 9.805367366178608e-07, + "loss": 0.23061680793762207, + "step": 6516 + }, + { + "epoch": 1.7305802682246714, + "grad_norm": 1.2715048223769598, + "learning_rate": 9.78641462182104e-07, + "loss": 0.24157950282096863, + "step": 6517 + }, + { + "epoch": 1.7308458372062143, + "grad_norm": 1.3248165077712177, + "learning_rate": 9.76747926957875e-07, + "loss": 0.2122395783662796, + "step": 6518 + }, + { + "epoch": 1.7311114061877573, + "grad_norm": 1.320024810941134, + "learning_rate": 9.748561313102266e-07, + "loss": 0.2351134717464447, + "step": 6519 + }, + { + "epoch": 1.7313769751693002, + "grad_norm": 1.2421546716744003, + "learning_rate": 9.729660756038738e-07, + "loss": 0.22462692856788635, + "step": 6520 + }, + { + "epoch": 1.7316425441508432, + "grad_norm": 1.191887437920794, + "learning_rate": 9.710777602031985e-07, + "loss": 0.2140806019306183, + "step": 6521 + }, + { + "epoch": 1.7319081131323861, + "grad_norm": 1.1138928252794336, + "learning_rate": 9.691911854722447e-07, + "loss": 0.22256694734096527, + "step": 6522 + }, + { + "epoch": 1.732173682113929, + "grad_norm": 1.3703383963226383, + "learning_rate": 9.673063517747216e-07, + "loss": 0.26044604182243347, + "step": 6523 + }, + { + "epoch": 1.732439251095472, + "grad_norm": 1.2598416492801234, + "learning_rate": 9.65423259474001e-07, + "loss": 0.22553196549415588, + "step": 6524 + }, + { + "epoch": 1.732704820077015, + "grad_norm": 1.351471142700479, + "learning_rate": 9.635419089331255e-07, + "loss": 0.2240113914012909, + "step": 6525 + }, + { + "epoch": 1.732970389058558, + "grad_norm": 1.1814437793767476, + "learning_rate": 9.616623005147952e-07, + "loss": 0.2239987701177597, + "step": 6526 + }, + { + "epoch": 1.7332359580401009, + "grad_norm": 1.3385972692968178, + "learning_rate": 9.597844345813746e-07, + "loss": 0.2779507040977478, + "step": 6527 + }, + { + "epoch": 1.7335015270216438, + "grad_norm": 1.24243402144453, + "learning_rate": 9.57908311494896e-07, + "loss": 0.20211297273635864, + "step": 6528 + }, + { + "epoch": 1.7337670960031868, + "grad_norm": 1.3764658259437736, + "learning_rate": 9.560339316170542e-07, + "loss": 0.2552817165851593, + "step": 6529 + }, + { + "epoch": 1.7340326649847297, + "grad_norm": 1.2797541334315956, + "learning_rate": 9.54161295309206e-07, + "loss": 0.248790442943573, + "step": 6530 + }, + { + "epoch": 1.7342982339662727, + "grad_norm": 1.2952054804389268, + "learning_rate": 9.522904029323754e-07, + "loss": 0.22865381836891174, + "step": 6531 + }, + { + "epoch": 1.7345638029478156, + "grad_norm": 1.2248102039230788, + "learning_rate": 9.504212548472458e-07, + "loss": 0.212583988904953, + "step": 6532 + }, + { + "epoch": 1.7348293719293586, + "grad_norm": 1.3834113478738954, + "learning_rate": 9.48553851414169e-07, + "loss": 0.24632221460342407, + "step": 6533 + }, + { + "epoch": 1.7350949409109018, + "grad_norm": 1.2843254083507383, + "learning_rate": 9.466881929931582e-07, + "loss": 0.2264299988746643, + "step": 6534 + }, + { + "epoch": 1.7353605098924447, + "grad_norm": 1.1969400150248917, + "learning_rate": 9.4482427994389e-07, + "loss": 0.21560585498809814, + "step": 6535 + }, + { + "epoch": 1.7356260788739877, + "grad_norm": 1.2133784097522973, + "learning_rate": 9.429621126257038e-07, + "loss": 0.24358224868774414, + "step": 6536 + }, + { + "epoch": 1.7358916478555306, + "grad_norm": 1.2714225965713206, + "learning_rate": 9.411016913976045e-07, + "loss": 0.23307816684246063, + "step": 6537 + }, + { + "epoch": 1.7361572168370736, + "grad_norm": 1.3040669928143356, + "learning_rate": 9.392430166182597e-07, + "loss": 0.28001490235328674, + "step": 6538 + }, + { + "epoch": 1.7364227858186165, + "grad_norm": 1.271471324412232, + "learning_rate": 9.373860886459996e-07, + "loss": 0.22544093430042267, + "step": 6539 + }, + { + "epoch": 1.7366883548001595, + "grad_norm": 1.196472605989987, + "learning_rate": 9.355309078388186e-07, + "loss": 0.2066478282213211, + "step": 6540 + }, + { + "epoch": 1.7369539237817024, + "grad_norm": 1.3162468805281542, + "learning_rate": 9.336774745543697e-07, + "loss": 0.21185964345932007, + "step": 6541 + }, + { + "epoch": 1.7372194927632454, + "grad_norm": 1.2806137892507987, + "learning_rate": 9.318257891499793e-07, + "loss": 0.2337890863418579, + "step": 6542 + }, + { + "epoch": 1.7374850617447883, + "grad_norm": 1.3468215205180822, + "learning_rate": 9.299758519826274e-07, + "loss": 0.2430594563484192, + "step": 6543 + }, + { + "epoch": 1.7377506307263313, + "grad_norm": 1.4072339591675835, + "learning_rate": 9.281276634089609e-07, + "loss": 0.24799269437789917, + "step": 6544 + }, + { + "epoch": 1.7380161997078742, + "grad_norm": 1.3533264573117185, + "learning_rate": 9.26281223785287e-07, + "loss": 0.24756166338920593, + "step": 6545 + }, + { + "epoch": 1.7382817686894172, + "grad_norm": 1.281195516970091, + "learning_rate": 9.244365334675787e-07, + "loss": 0.23465190827846527, + "step": 6546 + }, + { + "epoch": 1.7385473376709601, + "grad_norm": 1.22953964144765, + "learning_rate": 9.225935928114716e-07, + "loss": 0.2039640098810196, + "step": 6547 + }, + { + "epoch": 1.738812906652503, + "grad_norm": 1.3426382286400422, + "learning_rate": 9.207524021722602e-07, + "loss": 0.22304412722587585, + "step": 6548 + }, + { + "epoch": 1.739078475634046, + "grad_norm": 1.2253196898929546, + "learning_rate": 9.189129619049064e-07, + "loss": 0.19985908269882202, + "step": 6549 + }, + { + "epoch": 1.739344044615589, + "grad_norm": 1.3354963919439176, + "learning_rate": 9.17075272364032e-07, + "loss": 0.2335432469844818, + "step": 6550 + }, + { + "epoch": 1.739609613597132, + "grad_norm": 1.6822196536181961, + "learning_rate": 9.152393339039223e-07, + "loss": 0.2313593327999115, + "step": 6551 + }, + { + "epoch": 1.739875182578675, + "grad_norm": 1.310977344619443, + "learning_rate": 9.134051468785243e-07, + "loss": 0.2320600152015686, + "step": 6552 + }, + { + "epoch": 1.7401407515602179, + "grad_norm": 1.0942022372096942, + "learning_rate": 9.115727116414475e-07, + "loss": 0.1870848387479782, + "step": 6553 + }, + { + "epoch": 1.7404063205417608, + "grad_norm": 1.340037469005655, + "learning_rate": 9.097420285459635e-07, + "loss": 0.22922812402248383, + "step": 6554 + }, + { + "epoch": 1.7406718895233038, + "grad_norm": 1.3705243227438364, + "learning_rate": 9.079130979450068e-07, + "loss": 0.2505050301551819, + "step": 6555 + }, + { + "epoch": 1.7409374585048467, + "grad_norm": 1.3187608464438627, + "learning_rate": 9.060859201911732e-07, + "loss": 0.20445439219474792, + "step": 6556 + }, + { + "epoch": 1.7412030274863897, + "grad_norm": 1.1489822386745985, + "learning_rate": 9.042604956367218e-07, + "loss": 0.22338441014289856, + "step": 6557 + }, + { + "epoch": 1.7414685964679326, + "grad_norm": 1.2900464387857213, + "learning_rate": 9.024368246335735e-07, + "loss": 0.24923941493034363, + "step": 6558 + }, + { + "epoch": 1.7417341654494756, + "grad_norm": 1.3383952744906746, + "learning_rate": 9.006149075333071e-07, + "loss": 0.22842931747436523, + "step": 6559 + }, + { + "epoch": 1.7419997344310185, + "grad_norm": 1.391145524863548, + "learning_rate": 8.987947446871703e-07, + "loss": 0.22451579570770264, + "step": 6560 + }, + { + "epoch": 1.7422653034125615, + "grad_norm": 1.3218089225892669, + "learning_rate": 8.969763364460682e-07, + "loss": 0.2521047592163086, + "step": 6561 + }, + { + "epoch": 1.7425308723941044, + "grad_norm": 1.1675892500249985, + "learning_rate": 8.951596831605691e-07, + "loss": 0.25001099705696106, + "step": 6562 + }, + { + "epoch": 1.7427964413756474, + "grad_norm": 1.175521207104519, + "learning_rate": 8.933447851809007e-07, + "loss": 0.19592508673667908, + "step": 6563 + }, + { + "epoch": 1.7430620103571903, + "grad_norm": 1.399887131584603, + "learning_rate": 8.915316428569554e-07, + "loss": 0.2785179018974304, + "step": 6564 + }, + { + "epoch": 1.7433275793387333, + "grad_norm": 1.1688351316361159, + "learning_rate": 8.897202565382845e-07, + "loss": 0.20700594782829285, + "step": 6565 + }, + { + "epoch": 1.7435931483202762, + "grad_norm": 1.2225569857896341, + "learning_rate": 8.879106265741044e-07, + "loss": 0.253167062997818, + "step": 6566 + }, + { + "epoch": 1.7438587173018192, + "grad_norm": 1.4278912909015264, + "learning_rate": 8.861027533132859e-07, + "loss": 0.27672937512397766, + "step": 6567 + }, + { + "epoch": 1.7441242862833621, + "grad_norm": 1.3136368448280313, + "learning_rate": 8.842966371043671e-07, + "loss": 0.23050950467586517, + "step": 6568 + }, + { + "epoch": 1.744389855264905, + "grad_norm": 1.2790658189865058, + "learning_rate": 8.824922782955481e-07, + "loss": 0.23529425263404846, + "step": 6569 + }, + { + "epoch": 1.744655424246448, + "grad_norm": 1.2887213562899031, + "learning_rate": 8.806896772346873e-07, + "loss": 0.21803250908851624, + "step": 6570 + }, + { + "epoch": 1.744920993227991, + "grad_norm": 1.3669961004756481, + "learning_rate": 8.788888342693047e-07, + "loss": 0.24237293004989624, + "step": 6571 + }, + { + "epoch": 1.745186562209534, + "grad_norm": 1.1957319745445254, + "learning_rate": 8.770897497465803e-07, + "loss": 0.2008107602596283, + "step": 6572 + }, + { + "epoch": 1.745452131191077, + "grad_norm": 1.2693790937709173, + "learning_rate": 8.752924240133587e-07, + "loss": 0.23106279969215393, + "step": 6573 + }, + { + "epoch": 1.7457177001726198, + "grad_norm": 1.377716829660982, + "learning_rate": 8.734968574161406e-07, + "loss": 0.23726215958595276, + "step": 6574 + }, + { + "epoch": 1.7459832691541628, + "grad_norm": 1.211024095215965, + "learning_rate": 8.717030503010915e-07, + "loss": 0.26349812746047974, + "step": 6575 + }, + { + "epoch": 1.7462488381357057, + "grad_norm": 1.2871963140003055, + "learning_rate": 8.699110030140367e-07, + "loss": 0.23226451873779297, + "step": 6576 + }, + { + "epoch": 1.7465144071172487, + "grad_norm": 1.3173524718115384, + "learning_rate": 8.68120715900459e-07, + "loss": 0.22188402712345123, + "step": 6577 + }, + { + "epoch": 1.7467799760987917, + "grad_norm": 1.2367242455559135, + "learning_rate": 8.663321893055087e-07, + "loss": 0.21238234639167786, + "step": 6578 + }, + { + "epoch": 1.7470455450803346, + "grad_norm": 1.3423960800972676, + "learning_rate": 8.645454235739903e-07, + "loss": 0.2700675427913666, + "step": 6579 + }, + { + "epoch": 1.7473111140618776, + "grad_norm": 1.2737029023524005, + "learning_rate": 8.627604190503714e-07, + "loss": 0.24463894963264465, + "step": 6580 + }, + { + "epoch": 1.7475766830434205, + "grad_norm": 1.2537801110870739, + "learning_rate": 8.609771760787822e-07, + "loss": 0.23429079353809357, + "step": 6581 + }, + { + "epoch": 1.7478422520249635, + "grad_norm": 1.342775712878445, + "learning_rate": 8.591956950030067e-07, + "loss": 0.21767663955688477, + "step": 6582 + }, + { + "epoch": 1.7481078210065064, + "grad_norm": 1.3390334282971272, + "learning_rate": 8.574159761664957e-07, + "loss": 0.2499813735485077, + "step": 6583 + }, + { + "epoch": 1.7483733899880494, + "grad_norm": 1.471955255689367, + "learning_rate": 8.556380199123582e-07, + "loss": 0.28065958619117737, + "step": 6584 + }, + { + "epoch": 1.7486389589695923, + "grad_norm": 1.3012440070718, + "learning_rate": 8.538618265833621e-07, + "loss": 0.2166985273361206, + "step": 6585 + }, + { + "epoch": 1.7489045279511353, + "grad_norm": 1.2228700023368582, + "learning_rate": 8.520873965219356e-07, + "loss": 0.22835782170295715, + "step": 6586 + }, + { + "epoch": 1.7491700969326782, + "grad_norm": 1.2209097376008975, + "learning_rate": 8.503147300701709e-07, + "loss": 0.23575961589813232, + "step": 6587 + }, + { + "epoch": 1.7494356659142212, + "grad_norm": 1.1275514661567778, + "learning_rate": 8.485438275698154e-07, + "loss": 0.183369442820549, + "step": 6588 + }, + { + "epoch": 1.7497012348957641, + "grad_norm": 1.519810508178025, + "learning_rate": 8.467746893622786e-07, + "loss": 0.2731352746486664, + "step": 6589 + }, + { + "epoch": 1.749966803877307, + "grad_norm": 1.2913957246056922, + "learning_rate": 8.450073157886296e-07, + "loss": 0.20177578926086426, + "step": 6590 + }, + { + "epoch": 1.75023237285885, + "grad_norm": 1.2742798574628598, + "learning_rate": 8.432417071895982e-07, + "loss": 0.21672385931015015, + "step": 6591 + }, + { + "epoch": 1.750497941840393, + "grad_norm": 1.370933216008306, + "learning_rate": 8.414778639055699e-07, + "loss": 0.2503831386566162, + "step": 6592 + }, + { + "epoch": 1.750763510821936, + "grad_norm": 1.2884133202144494, + "learning_rate": 8.397157862765959e-07, + "loss": 0.2427521049976349, + "step": 6593 + }, + { + "epoch": 1.7510290798034789, + "grad_norm": 1.3424141731181953, + "learning_rate": 8.379554746423824e-07, + "loss": 0.23128533363342285, + "step": 6594 + }, + { + "epoch": 1.7512946487850218, + "grad_norm": 1.2353999110478557, + "learning_rate": 8.361969293422967e-07, + "loss": 0.2470957189798355, + "step": 6595 + }, + { + "epoch": 1.7515602177665648, + "grad_norm": 1.3335789710762707, + "learning_rate": 8.344401507153665e-07, + "loss": 0.29447510838508606, + "step": 6596 + }, + { + "epoch": 1.7518257867481077, + "grad_norm": 1.197223419032368, + "learning_rate": 8.326851391002777e-07, + "loss": 0.21585828065872192, + "step": 6597 + }, + { + "epoch": 1.7520913557296507, + "grad_norm": 1.2653558688292899, + "learning_rate": 8.30931894835375e-07, + "loss": 0.24081121385097504, + "step": 6598 + }, + { + "epoch": 1.7523569247111936, + "grad_norm": 1.3408805119391818, + "learning_rate": 8.291804182586638e-07, + "loss": 0.23052063584327698, + "step": 6599 + }, + { + "epoch": 1.7526224936927366, + "grad_norm": 1.2126901970374089, + "learning_rate": 8.274307097078093e-07, + "loss": 0.19008183479309082, + "step": 6600 + }, + { + "epoch": 1.7528880626742795, + "grad_norm": 1.3285441470167585, + "learning_rate": 8.25682769520132e-07, + "loss": 0.2632960379123688, + "step": 6601 + }, + { + "epoch": 1.7531536316558225, + "grad_norm": 1.4350439941988302, + "learning_rate": 8.239365980326175e-07, + "loss": 0.25958624482154846, + "step": 6602 + }, + { + "epoch": 1.7534192006373654, + "grad_norm": 1.304275360361708, + "learning_rate": 8.221921955819035e-07, + "loss": 0.22370605170726776, + "step": 6603 + }, + { + "epoch": 1.7536847696189084, + "grad_norm": 1.2385957043075924, + "learning_rate": 8.204495625042919e-07, + "loss": 0.22018703818321228, + "step": 6604 + }, + { + "epoch": 1.7539503386004514, + "grad_norm": 1.3626754196729718, + "learning_rate": 8.187086991357418e-07, + "loss": 0.26802191138267517, + "step": 6605 + }, + { + "epoch": 1.7542159075819943, + "grad_norm": 1.5313825040978437, + "learning_rate": 8.169696058118725e-07, + "loss": 0.21560518443584442, + "step": 6606 + }, + { + "epoch": 1.7544814765635373, + "grad_norm": 1.270508998157205, + "learning_rate": 8.152322828679593e-07, + "loss": 0.23222430050373077, + "step": 6607 + }, + { + "epoch": 1.7547470455450802, + "grad_norm": 1.1542994886817455, + "learning_rate": 8.134967306389374e-07, + "loss": 0.17638427019119263, + "step": 6608 + }, + { + "epoch": 1.7550126145266232, + "grad_norm": 1.3257823658984844, + "learning_rate": 8.117629494594015e-07, + "loss": 0.21539513766765594, + "step": 6609 + }, + { + "epoch": 1.7552781835081661, + "grad_norm": 1.3431199934216977, + "learning_rate": 8.100309396636031e-07, + "loss": 0.2265736162662506, + "step": 6610 + }, + { + "epoch": 1.755543752489709, + "grad_norm": 1.3478032961337874, + "learning_rate": 8.083007015854549e-07, + "loss": 0.2688787281513214, + "step": 6611 + }, + { + "epoch": 1.755809321471252, + "grad_norm": 1.3027271078273857, + "learning_rate": 8.065722355585249e-07, + "loss": 0.19756367802619934, + "step": 6612 + }, + { + "epoch": 1.756074890452795, + "grad_norm": 1.3749986253881121, + "learning_rate": 8.048455419160405e-07, + "loss": 0.19934290647506714, + "step": 6613 + }, + { + "epoch": 1.756340459434338, + "grad_norm": 1.5756000064179743, + "learning_rate": 8.031206209908904e-07, + "loss": 0.2523588538169861, + "step": 6614 + }, + { + "epoch": 1.7566060284158809, + "grad_norm": 1.2988900493114706, + "learning_rate": 8.01397473115616e-07, + "loss": 0.22825747728347778, + "step": 6615 + }, + { + "epoch": 1.7568715973974238, + "grad_norm": 1.3238944187902402, + "learning_rate": 7.996760986224228e-07, + "loss": 0.24525251984596252, + "step": 6616 + }, + { + "epoch": 1.7571371663789668, + "grad_norm": 1.366323962207031, + "learning_rate": 7.979564978431687e-07, + "loss": 0.21883559226989746, + "step": 6617 + }, + { + "epoch": 1.7574027353605097, + "grad_norm": 1.5827948860142422, + "learning_rate": 7.96238671109374e-07, + "loss": 0.2642098069190979, + "step": 6618 + }, + { + "epoch": 1.757668304342053, + "grad_norm": 1.3345016667633411, + "learning_rate": 7.945226187522159e-07, + "loss": 0.24094998836517334, + "step": 6619 + }, + { + "epoch": 1.7579338733235959, + "grad_norm": 1.2243450261876818, + "learning_rate": 7.928083411025278e-07, + "loss": 0.2225762903690338, + "step": 6620 + }, + { + "epoch": 1.7581994423051388, + "grad_norm": 1.2991544127435968, + "learning_rate": 7.910958384908041e-07, + "loss": 0.26722851395606995, + "step": 6621 + }, + { + "epoch": 1.7584650112866818, + "grad_norm": 1.3206157533666447, + "learning_rate": 7.893851112471907e-07, + "loss": 0.2176910787820816, + "step": 6622 + }, + { + "epoch": 1.7587305802682247, + "grad_norm": 1.3618122023344794, + "learning_rate": 7.876761597015003e-07, + "loss": 0.20261354744434357, + "step": 6623 + }, + { + "epoch": 1.7589961492497677, + "grad_norm": 1.1728416456458601, + "learning_rate": 7.859689841831975e-07, + "loss": 0.23314467072486877, + "step": 6624 + }, + { + "epoch": 1.7592617182313106, + "grad_norm": 1.3115277523344588, + "learning_rate": 7.842635850214054e-07, + "loss": 0.19854989647865295, + "step": 6625 + }, + { + "epoch": 1.7595272872128536, + "grad_norm": 1.2614486006783794, + "learning_rate": 7.825599625449043e-07, + "loss": 0.2422565519809723, + "step": 6626 + }, + { + "epoch": 1.7597928561943965, + "grad_norm": 1.342773057026848, + "learning_rate": 7.808581170821328e-07, + "loss": 0.27029529213905334, + "step": 6627 + }, + { + "epoch": 1.7600584251759395, + "grad_norm": 1.1918292148332001, + "learning_rate": 7.791580489611872e-07, + "loss": 0.23596832156181335, + "step": 6628 + }, + { + "epoch": 1.7603239941574824, + "grad_norm": 1.2062344481848934, + "learning_rate": 7.774597585098198e-07, + "loss": 0.218271404504776, + "step": 6629 + }, + { + "epoch": 1.7605895631390254, + "grad_norm": 1.3762692469809215, + "learning_rate": 7.75763246055441e-07, + "loss": 0.2551255226135254, + "step": 6630 + }, + { + "epoch": 1.7608551321205683, + "grad_norm": 1.3049962391533094, + "learning_rate": 7.740685119251179e-07, + "loss": 0.24410653114318848, + "step": 6631 + }, + { + "epoch": 1.7611207011021113, + "grad_norm": 1.2577276419448338, + "learning_rate": 7.723755564455771e-07, + "loss": 0.23044872283935547, + "step": 6632 + }, + { + "epoch": 1.7613862700836542, + "grad_norm": 1.334208934461724, + "learning_rate": 7.706843799431985e-07, + "loss": 0.24569427967071533, + "step": 6633 + }, + { + "epoch": 1.7616518390651972, + "grad_norm": 1.1605227177029394, + "learning_rate": 7.689949827440224e-07, + "loss": 0.200277179479599, + "step": 6634 + }, + { + "epoch": 1.7619174080467401, + "grad_norm": 1.1742759165978003, + "learning_rate": 7.673073651737428e-07, + "loss": 0.19217821955680847, + "step": 6635 + }, + { + "epoch": 1.762182977028283, + "grad_norm": 1.281151649074766, + "learning_rate": 7.656215275577151e-07, + "loss": 0.227005273103714, + "step": 6636 + }, + { + "epoch": 1.762448546009826, + "grad_norm": 1.2211778988331632, + "learning_rate": 7.639374702209468e-07, + "loss": 0.21359863877296448, + "step": 6637 + }, + { + "epoch": 1.762714114991369, + "grad_norm": 1.267969218396632, + "learning_rate": 7.62255193488105e-07, + "loss": 0.24056711792945862, + "step": 6638 + }, + { + "epoch": 1.762979683972912, + "grad_norm": 1.28035138481303, + "learning_rate": 7.605746976835127e-07, + "loss": 0.20897413790225983, + "step": 6639 + }, + { + "epoch": 1.763245252954455, + "grad_norm": 1.2567764889990254, + "learning_rate": 7.588959831311493e-07, + "loss": 0.20395967364311218, + "step": 6640 + }, + { + "epoch": 1.7635108219359978, + "grad_norm": 1.4827108993688454, + "learning_rate": 7.572190501546517e-07, + "loss": 0.2334095984697342, + "step": 6641 + }, + { + "epoch": 1.7637763909175408, + "grad_norm": 1.3358734576215814, + "learning_rate": 7.555438990773134e-07, + "loss": 0.23892858624458313, + "step": 6642 + }, + { + "epoch": 1.7640419598990837, + "grad_norm": 1.3063666339869877, + "learning_rate": 7.538705302220839e-07, + "loss": 0.23515449464321136, + "step": 6643 + }, + { + "epoch": 1.7643075288806267, + "grad_norm": 1.1919354046726482, + "learning_rate": 7.521989439115674e-07, + "loss": 0.19728611409664154, + "step": 6644 + }, + { + "epoch": 1.7645730978621696, + "grad_norm": 1.2609989060636697, + "learning_rate": 7.505291404680281e-07, + "loss": 0.22277355194091797, + "step": 6645 + }, + { + "epoch": 1.7648386668437126, + "grad_norm": 1.2129119488866849, + "learning_rate": 7.488611202133822e-07, + "loss": 0.24117602407932281, + "step": 6646 + }, + { + "epoch": 1.7651042358252558, + "grad_norm": 1.3643314179100876, + "learning_rate": 7.471948834692045e-07, + "loss": 0.24675750732421875, + "step": 6647 + }, + { + "epoch": 1.7653698048067987, + "grad_norm": 1.3261352525807495, + "learning_rate": 7.455304305567279e-07, + "loss": 0.2413899004459381, + "step": 6648 + }, + { + "epoch": 1.7656353737883417, + "grad_norm": 1.3357210816225529, + "learning_rate": 7.438677617968348e-07, + "loss": 0.22125428915023804, + "step": 6649 + }, + { + "epoch": 1.7659009427698846, + "grad_norm": 1.2099689083776513, + "learning_rate": 7.422068775100732e-07, + "loss": 0.205051988363266, + "step": 6650 + }, + { + "epoch": 1.7661665117514276, + "grad_norm": 1.2734255069971199, + "learning_rate": 7.405477780166415e-07, + "loss": 0.23711715638637543, + "step": 6651 + }, + { + "epoch": 1.7664320807329705, + "grad_norm": 1.4063590395204508, + "learning_rate": 7.388904636363914e-07, + "loss": 0.2591046988964081, + "step": 6652 + }, + { + "epoch": 1.7666976497145135, + "grad_norm": 1.4323150626725398, + "learning_rate": 7.372349346888363e-07, + "loss": 0.24837243556976318, + "step": 6653 + }, + { + "epoch": 1.7669632186960564, + "grad_norm": 1.1492996795155954, + "learning_rate": 7.35581191493141e-07, + "loss": 0.20910412073135376, + "step": 6654 + }, + { + "epoch": 1.7672287876775994, + "grad_norm": 1.113119722429438, + "learning_rate": 7.339292343681282e-07, + "loss": 0.2056204229593277, + "step": 6655 + }, + { + "epoch": 1.7674943566591423, + "grad_norm": 1.2927092177897141, + "learning_rate": 7.322790636322764e-07, + "loss": 0.2496742308139801, + "step": 6656 + }, + { + "epoch": 1.7677599256406853, + "grad_norm": 1.3571185149739835, + "learning_rate": 7.306306796037188e-07, + "loss": 0.24432921409606934, + "step": 6657 + }, + { + "epoch": 1.7680254946222282, + "grad_norm": 1.3006085174415165, + "learning_rate": 7.289840826002414e-07, + "loss": 0.2492775321006775, + "step": 6658 + }, + { + "epoch": 1.7682910636037712, + "grad_norm": 1.3256617876861967, + "learning_rate": 7.273392729392936e-07, + "loss": 0.22673827409744263, + "step": 6659 + }, + { + "epoch": 1.7685566325853141, + "grad_norm": 1.3730978211523115, + "learning_rate": 7.25696250937975e-07, + "loss": 0.2225622981786728, + "step": 6660 + }, + { + "epoch": 1.768822201566857, + "grad_norm": 1.2296766172450786, + "learning_rate": 7.240550169130378e-07, + "loss": 0.24896883964538574, + "step": 6661 + }, + { + "epoch": 1.7690877705484, + "grad_norm": 1.2103035123370711, + "learning_rate": 7.224155711808923e-07, + "loss": 0.2395302951335907, + "step": 6662 + }, + { + "epoch": 1.769353339529943, + "grad_norm": 1.2658162555194572, + "learning_rate": 7.207779140576066e-07, + "loss": 0.2255886197090149, + "step": 6663 + }, + { + "epoch": 1.769618908511486, + "grad_norm": 1.2518907529925698, + "learning_rate": 7.191420458589005e-07, + "loss": 0.24029678106307983, + "step": 6664 + }, + { + "epoch": 1.769884477493029, + "grad_norm": 1.1016484922093457, + "learning_rate": 7.175079669001506e-07, + "loss": 0.19399142265319824, + "step": 6665 + }, + { + "epoch": 1.7701500464745719, + "grad_norm": 1.2291425924678119, + "learning_rate": 7.158756774963882e-07, + "loss": 0.24569162726402283, + "step": 6666 + }, + { + "epoch": 1.7704156154561148, + "grad_norm": 1.2180012837263907, + "learning_rate": 7.142451779622971e-07, + "loss": 0.2484329342842102, + "step": 6667 + }, + { + "epoch": 1.7706811844376578, + "grad_norm": 1.2505833357389051, + "learning_rate": 7.126164686122216e-07, + "loss": 0.24423512816429138, + "step": 6668 + }, + { + "epoch": 1.7709467534192007, + "grad_norm": 1.1277554918017485, + "learning_rate": 7.109895497601571e-07, + "loss": 0.20146678388118744, + "step": 6669 + }, + { + "epoch": 1.7712123224007437, + "grad_norm": 1.2945002187740315, + "learning_rate": 7.093644217197526e-07, + "loss": 0.23329001665115356, + "step": 6670 + }, + { + "epoch": 1.7714778913822866, + "grad_norm": 1.1689758736288713, + "learning_rate": 7.077410848043165e-07, + "loss": 0.2290019690990448, + "step": 6671 + }, + { + "epoch": 1.7717434603638296, + "grad_norm": 1.2744441159542537, + "learning_rate": 7.061195393268061e-07, + "loss": 0.2329377382993698, + "step": 6672 + }, + { + "epoch": 1.7720090293453725, + "grad_norm": 1.1430677052322078, + "learning_rate": 7.04499785599837e-07, + "loss": 0.21513575315475464, + "step": 6673 + }, + { + "epoch": 1.7722745983269155, + "grad_norm": 1.1659646021132744, + "learning_rate": 7.028818239356794e-07, + "loss": 0.19022463262081146, + "step": 6674 + }, + { + "epoch": 1.7725401673084584, + "grad_norm": 1.2837523861206293, + "learning_rate": 7.012656546462571e-07, + "loss": 0.2097887396812439, + "step": 6675 + }, + { + "epoch": 1.7728057362900014, + "grad_norm": 1.3991640357566577, + "learning_rate": 6.996512780431486e-07, + "loss": 0.2559792101383209, + "step": 6676 + }, + { + "epoch": 1.7730713052715443, + "grad_norm": 1.3219531410357084, + "learning_rate": 6.980386944375849e-07, + "loss": 0.24624274671077728, + "step": 6677 + }, + { + "epoch": 1.7733368742530873, + "grad_norm": 1.2405076465604956, + "learning_rate": 6.964279041404553e-07, + "loss": 0.22904372215270996, + "step": 6678 + }, + { + "epoch": 1.7736024432346302, + "grad_norm": 1.216707646052236, + "learning_rate": 6.948189074623002e-07, + "loss": 0.20808623731136322, + "step": 6679 + }, + { + "epoch": 1.7738680122161732, + "grad_norm": 1.229477200185015, + "learning_rate": 6.932117047133158e-07, + "loss": 0.1931435763835907, + "step": 6680 + }, + { + "epoch": 1.7741335811977161, + "grad_norm": 1.2962984681963328, + "learning_rate": 6.91606296203351e-07, + "loss": 0.22938531637191772, + "step": 6681 + }, + { + "epoch": 1.774399150179259, + "grad_norm": 1.2921857742770726, + "learning_rate": 6.900026822419103e-07, + "loss": 0.240365132689476, + "step": 6682 + }, + { + "epoch": 1.774664719160802, + "grad_norm": 1.3560359754116593, + "learning_rate": 6.8840086313815e-07, + "loss": 0.26665499806404114, + "step": 6683 + }, + { + "epoch": 1.774930288142345, + "grad_norm": 1.1827095382370005, + "learning_rate": 6.86800839200884e-07, + "loss": 0.19775834679603577, + "step": 6684 + }, + { + "epoch": 1.775195857123888, + "grad_norm": 1.2698613362606737, + "learning_rate": 6.852026107385756e-07, + "loss": 0.20334021747112274, + "step": 6685 + }, + { + "epoch": 1.775461426105431, + "grad_norm": 1.1845529296493982, + "learning_rate": 6.836061780593484e-07, + "loss": 0.20670340955257416, + "step": 6686 + }, + { + "epoch": 1.7757269950869738, + "grad_norm": 1.2940248868651125, + "learning_rate": 6.820115414709727e-07, + "loss": 0.2033209353685379, + "step": 6687 + }, + { + "epoch": 1.7759925640685168, + "grad_norm": 1.101442360403221, + "learning_rate": 6.804187012808761e-07, + "loss": 0.23827815055847168, + "step": 6688 + }, + { + "epoch": 1.7762581330500598, + "grad_norm": 1.200357834005043, + "learning_rate": 6.788276577961394e-07, + "loss": 0.2054731547832489, + "step": 6689 + }, + { + "epoch": 1.7765237020316027, + "grad_norm": 1.3006753644657554, + "learning_rate": 6.772384113234987e-07, + "loss": 0.25553691387176514, + "step": 6690 + }, + { + "epoch": 1.7767892710131457, + "grad_norm": 1.2800516387465457, + "learning_rate": 6.756509621693385e-07, + "loss": 0.23650874197483063, + "step": 6691 + }, + { + "epoch": 1.7770548399946886, + "grad_norm": 1.2987358367196533, + "learning_rate": 6.740653106397033e-07, + "loss": 0.2353624701499939, + "step": 6692 + }, + { + "epoch": 1.7773204089762316, + "grad_norm": 1.3578478166739052, + "learning_rate": 6.724814570402871e-07, + "loss": 0.26034629344940186, + "step": 6693 + }, + { + "epoch": 1.7775859779577745, + "grad_norm": 1.2070636800070726, + "learning_rate": 6.70899401676438e-07, + "loss": 0.2272130399942398, + "step": 6694 + }, + { + "epoch": 1.7778515469393175, + "grad_norm": 1.353295285146214, + "learning_rate": 6.693191448531589e-07, + "loss": 0.27940404415130615, + "step": 6695 + }, + { + "epoch": 1.7781171159208604, + "grad_norm": 1.2726244327901954, + "learning_rate": 6.677406868751013e-07, + "loss": 0.22997702658176422, + "step": 6696 + }, + { + "epoch": 1.7783826849024034, + "grad_norm": 1.2569026906720413, + "learning_rate": 6.661640280465775e-07, + "loss": 0.22918452322483063, + "step": 6697 + }, + { + "epoch": 1.7786482538839463, + "grad_norm": 1.2456580683228033, + "learning_rate": 6.645891686715456e-07, + "loss": 0.18456090986728668, + "step": 6698 + }, + { + "epoch": 1.7789138228654893, + "grad_norm": 1.3290472252808803, + "learning_rate": 6.630161090536214e-07, + "loss": 0.23256534337997437, + "step": 6699 + }, + { + "epoch": 1.7791793918470322, + "grad_norm": 1.2224316750050632, + "learning_rate": 6.614448494960713e-07, + "loss": 0.21171879768371582, + "step": 6700 + }, + { + "epoch": 1.7794449608285752, + "grad_norm": 1.201224789246079, + "learning_rate": 6.598753903018163e-07, + "loss": 0.21382400393486023, + "step": 6701 + }, + { + "epoch": 1.7797105298101181, + "grad_norm": 1.2240177347792593, + "learning_rate": 6.583077317734299e-07, + "loss": 0.22954748570919037, + "step": 6702 + }, + { + "epoch": 1.779976098791661, + "grad_norm": 1.519530195710278, + "learning_rate": 6.56741874213136e-07, + "loss": 0.25691086053848267, + "step": 6703 + }, + { + "epoch": 1.780241667773204, + "grad_norm": 1.4662002194098382, + "learning_rate": 6.551778179228174e-07, + "loss": 0.23413901031017303, + "step": 6704 + }, + { + "epoch": 1.780507236754747, + "grad_norm": 1.2775019242293946, + "learning_rate": 6.536155632040031e-07, + "loss": 0.2493733912706375, + "step": 6705 + }, + { + "epoch": 1.78077280573629, + "grad_norm": 1.2512747936457356, + "learning_rate": 6.520551103578776e-07, + "loss": 0.26094138622283936, + "step": 6706 + }, + { + "epoch": 1.7810383747178329, + "grad_norm": 1.3016608765448805, + "learning_rate": 6.504964596852781e-07, + "loss": 0.23509518802165985, + "step": 6707 + }, + { + "epoch": 1.7813039436993758, + "grad_norm": 1.4726929969063267, + "learning_rate": 6.489396114866942e-07, + "loss": 0.2471122294664383, + "step": 6708 + }, + { + "epoch": 1.7815695126809188, + "grad_norm": 1.3034668854019054, + "learning_rate": 6.47384566062268e-07, + "loss": 0.2363303005695343, + "step": 6709 + }, + { + "epoch": 1.7818350816624617, + "grad_norm": 1.1801501968168786, + "learning_rate": 6.458313237117953e-07, + "loss": 0.18868233263492584, + "step": 6710 + }, + { + "epoch": 1.7821006506440047, + "grad_norm": 1.3437880175802723, + "learning_rate": 6.442798847347187e-07, + "loss": 0.23380546271800995, + "step": 6711 + }, + { + "epoch": 1.7823662196255476, + "grad_norm": 1.471740030592424, + "learning_rate": 6.42730249430139e-07, + "loss": 0.24112167954444885, + "step": 6712 + }, + { + "epoch": 1.7826317886070906, + "grad_norm": 1.2664184946697812, + "learning_rate": 6.411824180968096e-07, + "loss": 0.2397521436214447, + "step": 6713 + }, + { + "epoch": 1.7828973575886335, + "grad_norm": 1.309174308390434, + "learning_rate": 6.396363910331338e-07, + "loss": 0.23775406181812286, + "step": 6714 + }, + { + "epoch": 1.7831629265701765, + "grad_norm": 1.4327166340451307, + "learning_rate": 6.380921685371655e-07, + "loss": 0.23278602957725525, + "step": 6715 + }, + { + "epoch": 1.7834284955517195, + "grad_norm": 1.1135605228940266, + "learning_rate": 6.365497509066143e-07, + "loss": 0.20028996467590332, + "step": 6716 + }, + { + "epoch": 1.7836940645332624, + "grad_norm": 1.146963533940078, + "learning_rate": 6.35009138438839e-07, + "loss": 0.20862875878810883, + "step": 6717 + }, + { + "epoch": 1.7839596335148054, + "grad_norm": 1.3257848293601993, + "learning_rate": 6.334703314308521e-07, + "loss": 0.23522542417049408, + "step": 6718 + }, + { + "epoch": 1.7842252024963483, + "grad_norm": 1.2172150430538355, + "learning_rate": 6.319333301793173e-07, + "loss": 0.24633824825286865, + "step": 6719 + }, + { + "epoch": 1.7844907714778913, + "grad_norm": 1.3131451310460658, + "learning_rate": 6.30398134980551e-07, + "loss": 0.22141410410404205, + "step": 6720 + }, + { + "epoch": 1.7847563404594342, + "grad_norm": 1.3593079444355614, + "learning_rate": 6.288647461305186e-07, + "loss": 0.23313754796981812, + "step": 6721 + }, + { + "epoch": 1.7850219094409772, + "grad_norm": 1.2751593889081192, + "learning_rate": 6.273331639248414e-07, + "loss": 0.22015389800071716, + "step": 6722 + }, + { + "epoch": 1.7852874784225201, + "grad_norm": 1.2716859790694561, + "learning_rate": 6.258033886587911e-07, + "loss": 0.21154522895812988, + "step": 6723 + }, + { + "epoch": 1.785553047404063, + "grad_norm": 1.3319130935282857, + "learning_rate": 6.242754206272883e-07, + "loss": 0.2320503294467926, + "step": 6724 + }, + { + "epoch": 1.785818616385606, + "grad_norm": 1.2016740259413836, + "learning_rate": 6.227492601249097e-07, + "loss": 0.21778921782970428, + "step": 6725 + }, + { + "epoch": 1.786084185367149, + "grad_norm": 1.2321504813505204, + "learning_rate": 6.212249074458776e-07, + "loss": 0.2368871569633484, + "step": 6726 + }, + { + "epoch": 1.786349754348692, + "grad_norm": 1.5195368545073897, + "learning_rate": 6.197023628840704e-07, + "loss": 0.27269479632377625, + "step": 6727 + }, + { + "epoch": 1.7866153233302349, + "grad_norm": 1.2744130185555103, + "learning_rate": 6.181816267330177e-07, + "loss": 0.2414151132106781, + "step": 6728 + }, + { + "epoch": 1.7868808923117778, + "grad_norm": 1.1197825562175172, + "learning_rate": 6.166626992858993e-07, + "loss": 0.2156972736120224, + "step": 6729 + }, + { + "epoch": 1.7871464612933208, + "grad_norm": 1.2748992996552195, + "learning_rate": 6.151455808355455e-07, + "loss": 0.2510441541671753, + "step": 6730 + }, + { + "epoch": 1.787412030274864, + "grad_norm": 1.2924509412618195, + "learning_rate": 6.136302716744402e-07, + "loss": 0.20290088653564453, + "step": 6731 + }, + { + "epoch": 1.787677599256407, + "grad_norm": 1.3705736121123597, + "learning_rate": 6.121167720947174e-07, + "loss": 0.25088101625442505, + "step": 6732 + }, + { + "epoch": 1.7879431682379499, + "grad_norm": 1.3723338572382136, + "learning_rate": 6.106050823881604e-07, + "loss": 0.2566376328468323, + "step": 6733 + }, + { + "epoch": 1.7882087372194928, + "grad_norm": 1.1043772478174716, + "learning_rate": 6.09095202846206e-07, + "loss": 0.1882714033126831, + "step": 6734 + }, + { + "epoch": 1.7884743062010358, + "grad_norm": 1.2323780172305254, + "learning_rate": 6.075871337599404e-07, + "loss": 0.18705856800079346, + "step": 6735 + }, + { + "epoch": 1.7887398751825787, + "grad_norm": 1.1976910574931858, + "learning_rate": 6.060808754201031e-07, + "loss": 0.24756133556365967, + "step": 6736 + }, + { + "epoch": 1.7890054441641217, + "grad_norm": 1.3197777974144425, + "learning_rate": 6.045764281170818e-07, + "loss": 0.2537599205970764, + "step": 6737 + }, + { + "epoch": 1.7892710131456646, + "grad_norm": 1.330362234255321, + "learning_rate": 6.030737921409169e-07, + "loss": 0.22049202024936676, + "step": 6738 + }, + { + "epoch": 1.7895365821272076, + "grad_norm": 1.1222347914068396, + "learning_rate": 6.015729677812965e-07, + "loss": 0.20820394158363342, + "step": 6739 + }, + { + "epoch": 1.7898021511087505, + "grad_norm": 1.3153590716408405, + "learning_rate": 6.00073955327567e-07, + "loss": 0.2339879721403122, + "step": 6740 + }, + { + "epoch": 1.7900677200902935, + "grad_norm": 1.2483259153993207, + "learning_rate": 5.98576755068715e-07, + "loss": 0.22082161903381348, + "step": 6741 + }, + { + "epoch": 1.7903332890718364, + "grad_norm": 1.28162605766883, + "learning_rate": 5.97081367293385e-07, + "loss": 0.21883058547973633, + "step": 6742 + }, + { + "epoch": 1.7905988580533794, + "grad_norm": 1.1591166092235485, + "learning_rate": 5.955877922898712e-07, + "loss": 0.214680016040802, + "step": 6743 + }, + { + "epoch": 1.7908644270349223, + "grad_norm": 1.37628370977899, + "learning_rate": 5.940960303461152e-07, + "loss": 0.24533744156360626, + "step": 6744 + }, + { + "epoch": 1.7911299960164653, + "grad_norm": 1.3046535737377691, + "learning_rate": 5.926060817497137e-07, + "loss": 0.19857585430145264, + "step": 6745 + }, + { + "epoch": 1.7913955649980082, + "grad_norm": 1.4468975368000232, + "learning_rate": 5.911179467879081e-07, + "loss": 0.27493876218795776, + "step": 6746 + }, + { + "epoch": 1.7916611339795512, + "grad_norm": 1.1490145590407708, + "learning_rate": 5.896316257475954e-07, + "loss": 0.20560544729232788, + "step": 6747 + }, + { + "epoch": 1.7919267029610941, + "grad_norm": 1.2213631424870741, + "learning_rate": 5.881471189153199e-07, + "loss": 0.23559418320655823, + "step": 6748 + }, + { + "epoch": 1.792192271942637, + "grad_norm": 1.3144055462601232, + "learning_rate": 5.866644265772769e-07, + "loss": 0.23055103421211243, + "step": 6749 + }, + { + "epoch": 1.79245784092418, + "grad_norm": 1.4747052812755685, + "learning_rate": 5.851835490193136e-07, + "loss": 0.2780724763870239, + "step": 6750 + }, + { + "epoch": 1.792723409905723, + "grad_norm": 1.2354333862915858, + "learning_rate": 5.837044865269248e-07, + "loss": 0.20216618478298187, + "step": 6751 + }, + { + "epoch": 1.792988978887266, + "grad_norm": 1.308066661539038, + "learning_rate": 5.822272393852557e-07, + "loss": 0.2289930284023285, + "step": 6752 + }, + { + "epoch": 1.793254547868809, + "grad_norm": 1.2952454297764495, + "learning_rate": 5.80751807879103e-07, + "loss": 0.2028929740190506, + "step": 6753 + }, + { + "epoch": 1.7935201168503518, + "grad_norm": 1.2960791997009702, + "learning_rate": 5.792781922929114e-07, + "loss": 0.1964842826128006, + "step": 6754 + }, + { + "epoch": 1.7937856858318948, + "grad_norm": 1.4512315838061285, + "learning_rate": 5.77806392910778e-07, + "loss": 0.2617039084434509, + "step": 6755 + }, + { + "epoch": 1.7940512548134377, + "grad_norm": 1.325466585449178, + "learning_rate": 5.76336410016447e-07, + "loss": 0.2582395374774933, + "step": 6756 + }, + { + "epoch": 1.7943168237949807, + "grad_norm": 1.2587701407069858, + "learning_rate": 5.74868243893314e-07, + "loss": 0.23379334807395935, + "step": 6757 + }, + { + "epoch": 1.7945823927765236, + "grad_norm": 1.2979435124807637, + "learning_rate": 5.734018948244247e-07, + "loss": 0.2376977801322937, + "step": 6758 + }, + { + "epoch": 1.7948479617580668, + "grad_norm": 1.414785341098569, + "learning_rate": 5.719373630924741e-07, + "loss": 0.21816037595272064, + "step": 6759 + }, + { + "epoch": 1.7951135307396098, + "grad_norm": 1.1404163081963787, + "learning_rate": 5.704746489798063e-07, + "loss": 0.22156387567520142, + "step": 6760 + }, + { + "epoch": 1.7953790997211527, + "grad_norm": 1.195358056085369, + "learning_rate": 5.690137527684147e-07, + "loss": 0.20818129181861877, + "step": 6761 + }, + { + "epoch": 1.7956446687026957, + "grad_norm": 1.1501993150491747, + "learning_rate": 5.67554674739944e-07, + "loss": 0.18672943115234375, + "step": 6762 + }, + { + "epoch": 1.7959102376842386, + "grad_norm": 1.2143392515173568, + "learning_rate": 5.66097415175686e-07, + "loss": 0.2023036777973175, + "step": 6763 + }, + { + "epoch": 1.7961758066657816, + "grad_norm": 1.3551091626165586, + "learning_rate": 5.646419743565845e-07, + "loss": 0.24798424541950226, + "step": 6764 + }, + { + "epoch": 1.7964413756473245, + "grad_norm": 1.2034553304236573, + "learning_rate": 5.631883525632297e-07, + "loss": 0.1885790377855301, + "step": 6765 + }, + { + "epoch": 1.7967069446288675, + "grad_norm": 1.3693229184747842, + "learning_rate": 5.617365500758631e-07, + "loss": 0.24120381474494934, + "step": 6766 + }, + { + "epoch": 1.7969725136104104, + "grad_norm": 1.2063823939207, + "learning_rate": 5.602865671743763e-07, + "loss": 0.24238690733909607, + "step": 6767 + }, + { + "epoch": 1.7972380825919534, + "grad_norm": 1.2611645650605894, + "learning_rate": 5.588384041383089e-07, + "loss": 0.22928190231323242, + "step": 6768 + }, + { + "epoch": 1.7975036515734963, + "grad_norm": 1.3148280979127052, + "learning_rate": 5.573920612468486e-07, + "loss": 0.2464730143547058, + "step": 6769 + }, + { + "epoch": 1.7977692205550393, + "grad_norm": 1.149985298163883, + "learning_rate": 5.559475387788348e-07, + "loss": 0.2167670875787735, + "step": 6770 + }, + { + "epoch": 1.7980347895365822, + "grad_norm": 1.3365719233561757, + "learning_rate": 5.545048370127526e-07, + "loss": 0.24080663919448853, + "step": 6771 + }, + { + "epoch": 1.7983003585181252, + "grad_norm": 1.3571891328346308, + "learning_rate": 5.530639562267382e-07, + "loss": 0.25481417775154114, + "step": 6772 + }, + { + "epoch": 1.7985659274996681, + "grad_norm": 1.3525822075957274, + "learning_rate": 5.51624896698576e-07, + "loss": 0.23328909277915955, + "step": 6773 + }, + { + "epoch": 1.798831496481211, + "grad_norm": 1.136424514008492, + "learning_rate": 5.50187658705702e-07, + "loss": 0.18779747188091278, + "step": 6774 + }, + { + "epoch": 1.799097065462754, + "grad_norm": 1.3089016035676113, + "learning_rate": 5.487522425251968e-07, + "loss": 0.24840545654296875, + "step": 6775 + }, + { + "epoch": 1.799362634444297, + "grad_norm": 1.4658187281761286, + "learning_rate": 5.473186484337911e-07, + "loss": 0.2559642791748047, + "step": 6776 + }, + { + "epoch": 1.79962820342584, + "grad_norm": 1.3714243263968933, + "learning_rate": 5.458868767078673e-07, + "loss": 0.2005981206893921, + "step": 6777 + }, + { + "epoch": 1.799893772407383, + "grad_norm": 1.4085177100377464, + "learning_rate": 5.444569276234523e-07, + "loss": 0.2480883002281189, + "step": 6778 + }, + { + "epoch": 1.8001593413889259, + "grad_norm": 1.2203856732153913, + "learning_rate": 5.430288014562235e-07, + "loss": 0.23043295741081238, + "step": 6779 + }, + { + "epoch": 1.8004249103704688, + "grad_norm": 1.4245462518797845, + "learning_rate": 5.416024984815072e-07, + "loss": 0.22702521085739136, + "step": 6780 + }, + { + "epoch": 1.8006904793520118, + "grad_norm": 1.153610007644359, + "learning_rate": 5.401780189742789e-07, + "loss": 0.19955751299858093, + "step": 6781 + }, + { + "epoch": 1.8009560483335547, + "grad_norm": 1.2560139759300732, + "learning_rate": 5.387553632091591e-07, + "loss": 0.19743162393569946, + "step": 6782 + }, + { + "epoch": 1.8012216173150977, + "grad_norm": 1.3072968250539403, + "learning_rate": 5.373345314604206e-07, + "loss": 0.2262525111436844, + "step": 6783 + }, + { + "epoch": 1.8014871862966406, + "grad_norm": 1.2987858405959638, + "learning_rate": 5.359155240019809e-07, + "loss": 0.249632328748703, + "step": 6784 + }, + { + "epoch": 1.8017527552781836, + "grad_norm": 1.1804135507002813, + "learning_rate": 5.344983411074111e-07, + "loss": 0.19300231337547302, + "step": 6785 + }, + { + "epoch": 1.8020183242597265, + "grad_norm": 1.293291337799575, + "learning_rate": 5.330829830499263e-07, + "loss": 0.22256134450435638, + "step": 6786 + }, + { + "epoch": 1.8022838932412695, + "grad_norm": 1.283065855572867, + "learning_rate": 5.316694501023911e-07, + "loss": 0.2666356563568115, + "step": 6787 + }, + { + "epoch": 1.8025494622228124, + "grad_norm": 1.239663996945653, + "learning_rate": 5.302577425373156e-07, + "loss": 0.223050057888031, + "step": 6788 + }, + { + "epoch": 1.8028150312043554, + "grad_norm": 1.3011452698852823, + "learning_rate": 5.288478606268632e-07, + "loss": 0.2298094481229782, + "step": 6789 + }, + { + "epoch": 1.8030806001858983, + "grad_norm": 1.4761708863150307, + "learning_rate": 5.27439804642843e-07, + "loss": 0.23596417903900146, + "step": 6790 + }, + { + "epoch": 1.8033461691674413, + "grad_norm": 1.226229776793909, + "learning_rate": 5.26033574856708e-07, + "loss": 0.19501623511314392, + "step": 6791 + }, + { + "epoch": 1.8036117381489842, + "grad_norm": 1.2825838070785722, + "learning_rate": 5.246291715395657e-07, + "loss": 0.23518472909927368, + "step": 6792 + }, + { + "epoch": 1.8038773071305272, + "grad_norm": 1.1820374841237484, + "learning_rate": 5.232265949621651e-07, + "loss": 0.2251899093389511, + "step": 6793 + }, + { + "epoch": 1.8041428761120701, + "grad_norm": 1.1527654541489951, + "learning_rate": 5.218258453949099e-07, + "loss": 0.1764119267463684, + "step": 6794 + }, + { + "epoch": 1.804408445093613, + "grad_norm": 1.2895741356204065, + "learning_rate": 5.204269231078484e-07, + "loss": 0.20768773555755615, + "step": 6795 + }, + { + "epoch": 1.804674014075156, + "grad_norm": 1.3841780370828203, + "learning_rate": 5.19029828370674e-07, + "loss": 0.2115546613931656, + "step": 6796 + }, + { + "epoch": 1.804939583056699, + "grad_norm": 1.315680847185169, + "learning_rate": 5.176345614527312e-07, + "loss": 0.2465972602367401, + "step": 6797 + }, + { + "epoch": 1.805205152038242, + "grad_norm": 1.379203464130328, + "learning_rate": 5.162411226230102e-07, + "loss": 0.2359803020954132, + "step": 6798 + }, + { + "epoch": 1.805470721019785, + "grad_norm": 1.4106819634653143, + "learning_rate": 5.148495121501506e-07, + "loss": 0.27518990635871887, + "step": 6799 + }, + { + "epoch": 1.8057362900013278, + "grad_norm": 1.3653410113402416, + "learning_rate": 5.134597303024391e-07, + "loss": 0.23914849758148193, + "step": 6800 + }, + { + "epoch": 1.8060018589828708, + "grad_norm": 1.256847668479307, + "learning_rate": 5.120717773478068e-07, + "loss": 0.21771098673343658, + "step": 6801 + }, + { + "epoch": 1.8062674279644138, + "grad_norm": 1.2716100664289411, + "learning_rate": 5.106856535538363e-07, + "loss": 0.235421285033226, + "step": 6802 + }, + { + "epoch": 1.8065329969459567, + "grad_norm": 1.4167241401735549, + "learning_rate": 5.093013591877561e-07, + "loss": 0.23973548412322998, + "step": 6803 + }, + { + "epoch": 1.8067985659274997, + "grad_norm": 1.484886222602596, + "learning_rate": 5.079188945164426e-07, + "loss": 0.24059349298477173, + "step": 6804 + }, + { + "epoch": 1.8070641349090426, + "grad_norm": 1.3840991454067133, + "learning_rate": 5.065382598064161e-07, + "loss": 0.25188207626342773, + "step": 6805 + }, + { + "epoch": 1.8073297038905856, + "grad_norm": 1.1866308474402574, + "learning_rate": 5.051594553238482e-07, + "loss": 0.20124536752700806, + "step": 6806 + }, + { + "epoch": 1.8075952728721285, + "grad_norm": 1.2234769875088154, + "learning_rate": 5.037824813345571e-07, + "loss": 0.2059330940246582, + "step": 6807 + }, + { + "epoch": 1.8078608418536715, + "grad_norm": 1.2468279665046458, + "learning_rate": 5.024073381040052e-07, + "loss": 0.2122621238231659, + "step": 6808 + }, + { + "epoch": 1.8081264108352144, + "grad_norm": 1.2203093249465347, + "learning_rate": 5.010340258973046e-07, + "loss": 0.20064303278923035, + "step": 6809 + }, + { + "epoch": 1.8083919798167574, + "grad_norm": 1.3685187895509534, + "learning_rate": 4.996625449792147e-07, + "loss": 0.24773281812667847, + "step": 6810 + }, + { + "epoch": 1.8086575487983003, + "grad_norm": 1.149837064877599, + "learning_rate": 4.982928956141375e-07, + "loss": 0.2111661732196808, + "step": 6811 + }, + { + "epoch": 1.8089231177798433, + "grad_norm": 1.2721912706796665, + "learning_rate": 4.969250780661306e-07, + "loss": 0.24823394417762756, + "step": 6812 + }, + { + "epoch": 1.8091886867613862, + "grad_norm": 1.410632443971984, + "learning_rate": 4.955590925988896e-07, + "loss": 0.24726605415344238, + "step": 6813 + }, + { + "epoch": 1.8094542557429292, + "grad_norm": 1.3112520269484638, + "learning_rate": 4.941949394757605e-07, + "loss": 0.2269962728023529, + "step": 6814 + }, + { + "epoch": 1.8097198247244721, + "grad_norm": 1.311172380903373, + "learning_rate": 4.928326189597377e-07, + "loss": 0.2336469292640686, + "step": 6815 + }, + { + "epoch": 1.809985393706015, + "grad_norm": 1.3372206959113173, + "learning_rate": 4.914721313134585e-07, + "loss": 0.24872124195098877, + "step": 6816 + }, + { + "epoch": 1.810250962687558, + "grad_norm": 1.3116570930981006, + "learning_rate": 4.901134767992099e-07, + "loss": 0.2484157383441925, + "step": 6817 + }, + { + "epoch": 1.810516531669101, + "grad_norm": 1.5234901533359522, + "learning_rate": 4.887566556789247e-07, + "loss": 0.24683158099651337, + "step": 6818 + }, + { + "epoch": 1.810782100650644, + "grad_norm": 1.1959899225802055, + "learning_rate": 4.874016682141802e-07, + "loss": 0.18717995285987854, + "step": 6819 + }, + { + "epoch": 1.8110476696321869, + "grad_norm": 1.2862771000886628, + "learning_rate": 4.860485146662053e-07, + "loss": 0.2220807671546936, + "step": 6820 + }, + { + "epoch": 1.8113132386137298, + "grad_norm": 1.196369102162481, + "learning_rate": 4.84697195295869e-07, + "loss": 0.2178400307893753, + "step": 6821 + }, + { + "epoch": 1.8115788075952728, + "grad_norm": 1.2250082051849178, + "learning_rate": 4.833477103636908e-07, + "loss": 0.2056645154953003, + "step": 6822 + }, + { + "epoch": 1.8118443765768157, + "grad_norm": 1.1729075702986809, + "learning_rate": 4.820000601298358e-07, + "loss": 0.21441905200481415, + "step": 6823 + }, + { + "epoch": 1.8121099455583587, + "grad_norm": 1.4445497728186703, + "learning_rate": 4.806542448541151e-07, + "loss": 0.17688237130641937, + "step": 6824 + }, + { + "epoch": 1.8123755145399016, + "grad_norm": 1.3216659704658935, + "learning_rate": 4.793102647959847e-07, + "loss": 0.22405505180358887, + "step": 6825 + }, + { + "epoch": 1.8126410835214446, + "grad_norm": 1.4226735460298432, + "learning_rate": 4.779681202145503e-07, + "loss": 0.21617908775806427, + "step": 6826 + }, + { + "epoch": 1.8129066525029875, + "grad_norm": 1.3284639992790963, + "learning_rate": 4.766278113685596e-07, + "loss": 0.23570871353149414, + "step": 6827 + }, + { + "epoch": 1.8131722214845305, + "grad_norm": 1.222373726415007, + "learning_rate": 4.7528933851641036e-07, + "loss": 0.23806743323802948, + "step": 6828 + }, + { + "epoch": 1.8134377904660735, + "grad_norm": 1.3312930220149763, + "learning_rate": 4.739527019161405e-07, + "loss": 0.24859179556369781, + "step": 6829 + }, + { + "epoch": 1.8137033594476164, + "grad_norm": 1.2143252342774762, + "learning_rate": 4.726179018254418e-07, + "loss": 0.21314260363578796, + "step": 6830 + }, + { + "epoch": 1.8139689284291594, + "grad_norm": 1.272910058647325, + "learning_rate": 4.7128493850164715e-07, + "loss": 0.25290659070014954, + "step": 6831 + }, + { + "epoch": 1.8142344974107023, + "grad_norm": 1.1800117497978073, + "learning_rate": 4.699538122017355e-07, + "loss": 0.22606703639030457, + "step": 6832 + }, + { + "epoch": 1.8145000663922453, + "grad_norm": 1.3037958158309495, + "learning_rate": 4.6862452318233275e-07, + "loss": 0.23973071575164795, + "step": 6833 + }, + { + "epoch": 1.8147656353737882, + "grad_norm": 1.2341358358957555, + "learning_rate": 4.672970716997094e-07, + "loss": 0.2225341498851776, + "step": 6834 + }, + { + "epoch": 1.8150312043553312, + "grad_norm": 1.441833447404081, + "learning_rate": 4.6597145800978183e-07, + "loss": 0.19153356552124023, + "step": 6835 + }, + { + "epoch": 1.8152967733368741, + "grad_norm": 1.2010339801105188, + "learning_rate": 4.646476823681145e-07, + "loss": 0.19694843888282776, + "step": 6836 + }, + { + "epoch": 1.815562342318417, + "grad_norm": 1.2719437537675773, + "learning_rate": 4.6332574502991554e-07, + "loss": 0.2353869527578354, + "step": 6837 + }, + { + "epoch": 1.81582791129996, + "grad_norm": 1.3504470280928214, + "learning_rate": 4.6200564625003775e-07, + "loss": 0.20919787883758545, + "step": 6838 + }, + { + "epoch": 1.816093480281503, + "grad_norm": 1.1775336742921327, + "learning_rate": 4.6068738628298193e-07, + "loss": 0.18352919816970825, + "step": 6839 + }, + { + "epoch": 1.816359049263046, + "grad_norm": 1.3571378213568392, + "learning_rate": 4.5937096538289147e-07, + "loss": 0.24711212515830994, + "step": 6840 + }, + { + "epoch": 1.8166246182445889, + "grad_norm": 1.2216287617055834, + "learning_rate": 4.580563838035579e-07, + "loss": 0.2350531816482544, + "step": 6841 + }, + { + "epoch": 1.8168901872261318, + "grad_norm": 1.3731447849726235, + "learning_rate": 4.5674364179841614e-07, + "loss": 0.26124465465545654, + "step": 6842 + }, + { + "epoch": 1.8171557562076748, + "grad_norm": 1.3819435677197398, + "learning_rate": 4.5543273962054934e-07, + "loss": 0.2110440880060196, + "step": 6843 + }, + { + "epoch": 1.817421325189218, + "grad_norm": 1.425540844923539, + "learning_rate": 4.5412367752268094e-07, + "loss": 0.2409415990114212, + "step": 6844 + }, + { + "epoch": 1.817686894170761, + "grad_norm": 1.2827549712815094, + "learning_rate": 4.528164557571857e-07, + "loss": 0.2280777543783188, + "step": 6845 + }, + { + "epoch": 1.8179524631523039, + "grad_norm": 1.111661347066374, + "learning_rate": 4.515110745760787e-07, + "loss": 0.201339989900589, + "step": 6846 + }, + { + "epoch": 1.8182180321338468, + "grad_norm": 1.2576623337538495, + "learning_rate": 4.5020753423102083e-07, + "loss": 0.22910752892494202, + "step": 6847 + }, + { + "epoch": 1.8184836011153898, + "grad_norm": 1.2835742527474332, + "learning_rate": 4.4890583497332327e-07, + "loss": 0.21736779808998108, + "step": 6848 + }, + { + "epoch": 1.8187491700969327, + "grad_norm": 1.282796826855034, + "learning_rate": 4.476059770539354e-07, + "loss": 0.20898449420928955, + "step": 6849 + }, + { + "epoch": 1.8190147390784757, + "grad_norm": 1.2514312774528749, + "learning_rate": 4.463079607234555e-07, + "loss": 0.22159051895141602, + "step": 6850 + }, + { + "epoch": 1.8192803080600186, + "grad_norm": 1.290667660986327, + "learning_rate": 4.450117862321246e-07, + "loss": 0.24081172049045563, + "step": 6851 + }, + { + "epoch": 1.8195458770415616, + "grad_norm": 1.2092663587603776, + "learning_rate": 4.4371745382983164e-07, + "loss": 0.17856758832931519, + "step": 6852 + }, + { + "epoch": 1.8198114460231045, + "grad_norm": 1.2002967167521004, + "learning_rate": 4.424249637661071e-07, + "loss": 0.20796868205070496, + "step": 6853 + }, + { + "epoch": 1.8200770150046475, + "grad_norm": 1.5683273026632796, + "learning_rate": 4.4113431629013046e-07, + "loss": 0.24277149140834808, + "step": 6854 + }, + { + "epoch": 1.8203425839861904, + "grad_norm": 1.1767967505464594, + "learning_rate": 4.3984551165071944e-07, + "loss": 0.19315838813781738, + "step": 6855 + }, + { + "epoch": 1.8206081529677334, + "grad_norm": 1.2457379727303777, + "learning_rate": 4.3855855009634075e-07, + "loss": 0.20789340138435364, + "step": 6856 + }, + { + "epoch": 1.8208737219492763, + "grad_norm": 1.4246348317049922, + "learning_rate": 4.372734318751082e-07, + "loss": 0.2871186137199402, + "step": 6857 + }, + { + "epoch": 1.8211392909308193, + "grad_norm": 1.3878283876849893, + "learning_rate": 4.359901572347758e-07, + "loss": 0.2419736236333847, + "step": 6858 + }, + { + "epoch": 1.8214048599123622, + "grad_norm": 1.3237602075469659, + "learning_rate": 4.3470872642274455e-07, + "loss": 0.2190292328596115, + "step": 6859 + }, + { + "epoch": 1.8216704288939052, + "grad_norm": 1.3879953178475168, + "learning_rate": 4.3342913968605903e-07, + "loss": 0.2654367685317993, + "step": 6860 + }, + { + "epoch": 1.8219359978754481, + "grad_norm": 1.3362249609314758, + "learning_rate": 4.321513972714075e-07, + "loss": 0.2536984086036682, + "step": 6861 + }, + { + "epoch": 1.822201566856991, + "grad_norm": 1.3804156416489965, + "learning_rate": 4.308754994251252e-07, + "loss": 0.260431170463562, + "step": 6862 + }, + { + "epoch": 1.822467135838534, + "grad_norm": 1.1376782237723586, + "learning_rate": 4.2960144639318855e-07, + "loss": 0.19348303973674774, + "step": 6863 + }, + { + "epoch": 1.822732704820077, + "grad_norm": 1.3505211109720399, + "learning_rate": 4.283292384212201e-07, + "loss": 0.2284386157989502, + "step": 6864 + }, + { + "epoch": 1.82299827380162, + "grad_norm": 1.2449697035186624, + "learning_rate": 4.270588757544869e-07, + "loss": 0.23439526557922363, + "step": 6865 + }, + { + "epoch": 1.823263842783163, + "grad_norm": 1.247098399621602, + "learning_rate": 4.2579035863790086e-07, + "loss": 0.2123441994190216, + "step": 6866 + }, + { + "epoch": 1.8235294117647058, + "grad_norm": 1.251423525262008, + "learning_rate": 4.245236873160163e-07, + "loss": 0.24568180739879608, + "step": 6867 + }, + { + "epoch": 1.8237949807462488, + "grad_norm": 1.4504253184377665, + "learning_rate": 4.232588620330325e-07, + "loss": 0.24078285694122314, + "step": 6868 + }, + { + "epoch": 1.8240605497277917, + "grad_norm": 1.157509101798501, + "learning_rate": 4.2199588303279414e-07, + "loss": 0.2003621608018875, + "step": 6869 + }, + { + "epoch": 1.8243261187093347, + "grad_norm": 1.3049050095763572, + "learning_rate": 4.2073475055878664e-07, + "loss": 0.21201889216899872, + "step": 6870 + }, + { + "epoch": 1.8245916876908777, + "grad_norm": 1.429124542908126, + "learning_rate": 4.1947546485414215e-07, + "loss": 0.23175427317619324, + "step": 6871 + }, + { + "epoch": 1.8248572566724208, + "grad_norm": 1.3101487536079581, + "learning_rate": 4.182180261616364e-07, + "loss": 0.2391383945941925, + "step": 6872 + }, + { + "epoch": 1.8251228256539638, + "grad_norm": 1.341869026992186, + "learning_rate": 4.169624347236878e-07, + "loss": 0.23120146989822388, + "step": 6873 + }, + { + "epoch": 1.8253883946355067, + "grad_norm": 1.1699948636498165, + "learning_rate": 4.157086907823604e-07, + "loss": 0.22541432082653046, + "step": 6874 + }, + { + "epoch": 1.8256539636170497, + "grad_norm": 1.3354293669412138, + "learning_rate": 4.1445679457936094e-07, + "loss": 0.25613510608673096, + "step": 6875 + }, + { + "epoch": 1.8259195325985926, + "grad_norm": 1.191861909098097, + "learning_rate": 4.1320674635604186e-07, + "loss": 0.21002547442913055, + "step": 6876 + }, + { + "epoch": 1.8261851015801356, + "grad_norm": 1.230870532242656, + "learning_rate": 4.119585463533959e-07, + "loss": 0.2593066692352295, + "step": 6877 + }, + { + "epoch": 1.8264506705616785, + "grad_norm": 1.4772106156087776, + "learning_rate": 4.1071219481206184e-07, + "loss": 0.23771531879901886, + "step": 6878 + }, + { + "epoch": 1.8267162395432215, + "grad_norm": 1.3106459571340912, + "learning_rate": 4.094676919723206e-07, + "loss": 0.2069541960954666, + "step": 6879 + }, + { + "epoch": 1.8269818085247644, + "grad_norm": 1.2065450512433227, + "learning_rate": 4.082250380740993e-07, + "loss": 0.21314311027526855, + "step": 6880 + }, + { + "epoch": 1.8272473775063074, + "grad_norm": 1.2723957233809677, + "learning_rate": 4.069842333569662e-07, + "loss": 0.198696106672287, + "step": 6881 + }, + { + "epoch": 1.8275129464878503, + "grad_norm": 1.2365636263350124, + "learning_rate": 4.057452780601334e-07, + "loss": 0.22771228849887848, + "step": 6882 + }, + { + "epoch": 1.8277785154693933, + "grad_norm": 1.3935711018120034, + "learning_rate": 4.045081724224564e-07, + "loss": 0.24176150560379028, + "step": 6883 + }, + { + "epoch": 1.8280440844509362, + "grad_norm": 1.1711714123320747, + "learning_rate": 4.0327291668243785e-07, + "loss": 0.18257084488868713, + "step": 6884 + }, + { + "epoch": 1.8283096534324792, + "grad_norm": 1.7740145369201021, + "learning_rate": 4.02039511078216e-07, + "loss": 0.2317531704902649, + "step": 6885 + }, + { + "epoch": 1.8285752224140222, + "grad_norm": 1.237685133468282, + "learning_rate": 4.008079558475797e-07, + "loss": 0.22523516416549683, + "step": 6886 + }, + { + "epoch": 1.828840791395565, + "grad_norm": 1.338469580607285, + "learning_rate": 3.995782512279578e-07, + "loss": 0.22351330518722534, + "step": 6887 + }, + { + "epoch": 1.829106360377108, + "grad_norm": 1.3272231861758204, + "learning_rate": 3.983503974564229e-07, + "loss": 0.22151902318000793, + "step": 6888 + }, + { + "epoch": 1.829371929358651, + "grad_norm": 1.2483501881623744, + "learning_rate": 3.971243947696901e-07, + "loss": 0.20800583064556122, + "step": 6889 + }, + { + "epoch": 1.829637498340194, + "grad_norm": 1.189419989304772, + "learning_rate": 3.959002434041181e-07, + "loss": 0.21332690119743347, + "step": 6890 + }, + { + "epoch": 1.829903067321737, + "grad_norm": 1.3040750377284556, + "learning_rate": 3.946779435957093e-07, + "loss": 0.2561502456665039, + "step": 6891 + }, + { + "epoch": 1.8301686363032799, + "grad_norm": 1.2150229659643972, + "learning_rate": 3.934574955801074e-07, + "loss": 0.23636910319328308, + "step": 6892 + }, + { + "epoch": 1.8304342052848228, + "grad_norm": 1.303931878967275, + "learning_rate": 3.922388995926041e-07, + "loss": 0.26683998107910156, + "step": 6893 + }, + { + "epoch": 1.8306997742663658, + "grad_norm": 1.319570373744726, + "learning_rate": 3.910221558681271e-07, + "loss": 0.2779492735862732, + "step": 6894 + }, + { + "epoch": 1.8309653432479087, + "grad_norm": 1.473106593059021, + "learning_rate": 3.8980726464125095e-07, + "loss": 0.20174488425254822, + "step": 6895 + }, + { + "epoch": 1.8312309122294517, + "grad_norm": 1.3128034885814306, + "learning_rate": 3.885942261461928e-07, + "loss": 0.21486055850982666, + "step": 6896 + }, + { + "epoch": 1.8314964812109946, + "grad_norm": 1.2201269476427121, + "learning_rate": 3.8738304061681107e-07, + "loss": 0.25637733936309814, + "step": 6897 + }, + { + "epoch": 1.8317620501925376, + "grad_norm": 1.3661274524986262, + "learning_rate": 3.8617370828661014e-07, + "loss": 0.2518364489078522, + "step": 6898 + }, + { + "epoch": 1.8320276191740805, + "grad_norm": 1.2902396654446358, + "learning_rate": 3.849662293887324e-07, + "loss": 0.25752246379852295, + "step": 6899 + }, + { + "epoch": 1.8322931881556235, + "grad_norm": 1.1514833439027936, + "learning_rate": 3.8376060415596826e-07, + "loss": 0.20891718566417694, + "step": 6900 + }, + { + "epoch": 1.8325587571371664, + "grad_norm": 1.378720679176223, + "learning_rate": 3.825568328207452e-07, + "loss": 0.20491960644721985, + "step": 6901 + }, + { + "epoch": 1.8328243261187094, + "grad_norm": 1.2540067790590503, + "learning_rate": 3.813549156151386e-07, + "loss": 0.22183339297771454, + "step": 6902 + }, + { + "epoch": 1.8330898951002523, + "grad_norm": 1.3321077338345055, + "learning_rate": 3.801548527708621e-07, + "loss": 0.2476987987756729, + "step": 6903 + }, + { + "epoch": 1.8333554640817953, + "grad_norm": 1.470629998110282, + "learning_rate": 3.7895664451927493e-07, + "loss": 0.26486238837242126, + "step": 6904 + }, + { + "epoch": 1.8336210330633382, + "grad_norm": 1.2524745099106778, + "learning_rate": 3.777602910913769e-07, + "loss": 0.25922873616218567, + "step": 6905 + }, + { + "epoch": 1.8338866020448812, + "grad_norm": 1.317563058388092, + "learning_rate": 3.7656579271781127e-07, + "loss": 0.22682476043701172, + "step": 6906 + }, + { + "epoch": 1.8341521710264241, + "grad_norm": 1.2391277284536568, + "learning_rate": 3.753731496288626e-07, + "loss": 0.20371592044830322, + "step": 6907 + }, + { + "epoch": 1.834417740007967, + "grad_norm": 1.2444383452097851, + "learning_rate": 3.7418236205445826e-07, + "loss": 0.23857446014881134, + "step": 6908 + }, + { + "epoch": 1.83468330898951, + "grad_norm": 2.6487436557467645, + "learning_rate": 3.729934302241689e-07, + "loss": 0.27119290828704834, + "step": 6909 + }, + { + "epoch": 1.834948877971053, + "grad_norm": 1.254159773595776, + "learning_rate": 3.7180635436720567e-07, + "loss": 0.2354927361011505, + "step": 6910 + }, + { + "epoch": 1.835214446952596, + "grad_norm": 1.301136184663389, + "learning_rate": 3.706211347124233e-07, + "loss": 0.26378512382507324, + "step": 6911 + }, + { + "epoch": 1.835480015934139, + "grad_norm": 1.3296098934003593, + "learning_rate": 3.6943777148831907e-07, + "loss": 0.20725026726722717, + "step": 6912 + }, + { + "epoch": 1.8357455849156818, + "grad_norm": 1.2212362377090786, + "learning_rate": 3.682562649230304e-07, + "loss": 0.2049856185913086, + "step": 6913 + }, + { + "epoch": 1.8360111538972248, + "grad_norm": 1.2555620791922353, + "learning_rate": 3.6707661524433833e-07, + "loss": 0.19303423166275024, + "step": 6914 + }, + { + "epoch": 1.8362767228787678, + "grad_norm": 1.2395332139010746, + "learning_rate": 3.6589882267966445e-07, + "loss": 0.21510104835033417, + "step": 6915 + }, + { + "epoch": 1.8365422918603107, + "grad_norm": 1.1669418633603965, + "learning_rate": 3.6472288745607376e-07, + "loss": 0.1933138072490692, + "step": 6916 + }, + { + "epoch": 1.8368078608418537, + "grad_norm": 1.112367559966563, + "learning_rate": 3.6354880980027373e-07, + "loss": 0.2015206664800644, + "step": 6917 + }, + { + "epoch": 1.8370734298233966, + "grad_norm": 1.2823070307410491, + "learning_rate": 3.6237658993861114e-07, + "loss": 0.20550866425037384, + "step": 6918 + }, + { + "epoch": 1.8373389988049396, + "grad_norm": 1.3067689335737758, + "learning_rate": 3.612062280970763e-07, + "loss": 0.221620112657547, + "step": 6919 + }, + { + "epoch": 1.8376045677864825, + "grad_norm": 1.3556317520839982, + "learning_rate": 3.6003772450130315e-07, + "loss": 0.23098941147327423, + "step": 6920 + }, + { + "epoch": 1.8378701367680255, + "grad_norm": 1.147765516964157, + "learning_rate": 3.588710793765626e-07, + "loss": 0.2119837999343872, + "step": 6921 + }, + { + "epoch": 1.8381357057495684, + "grad_norm": 1.3802709807389941, + "learning_rate": 3.5770629294777146e-07, + "loss": 0.24879229068756104, + "step": 6922 + }, + { + "epoch": 1.8384012747311114, + "grad_norm": 1.3060365647669372, + "learning_rate": 3.565433654394879e-07, + "loss": 0.18895789980888367, + "step": 6923 + }, + { + "epoch": 1.8386668437126543, + "grad_norm": 1.2553378569117732, + "learning_rate": 3.55382297075908e-07, + "loss": 0.23148275911808014, + "step": 6924 + }, + { + "epoch": 1.8389324126941973, + "grad_norm": 1.212120061404488, + "learning_rate": 3.542230880808739e-07, + "loss": 0.20919913053512573, + "step": 6925 + }, + { + "epoch": 1.8391979816757402, + "grad_norm": 1.4703495422250146, + "learning_rate": 3.53065738677868e-07, + "loss": 0.22832845151424408, + "step": 6926 + }, + { + "epoch": 1.8394635506572832, + "grad_norm": 1.2792392305491092, + "learning_rate": 3.519102490900117e-07, + "loss": 0.25866004824638367, + "step": 6927 + }, + { + "epoch": 1.8397291196388261, + "grad_norm": 1.4425441758777668, + "learning_rate": 3.507566195400691e-07, + "loss": 0.23372048139572144, + "step": 6928 + }, + { + "epoch": 1.839994688620369, + "grad_norm": 1.3100572186568338, + "learning_rate": 3.496048502504501e-07, + "loss": 0.2516997158527374, + "step": 6929 + }, + { + "epoch": 1.840260257601912, + "grad_norm": 1.3352189279547024, + "learning_rate": 3.4845494144320036e-07, + "loss": 0.21170508861541748, + "step": 6930 + }, + { + "epoch": 1.840525826583455, + "grad_norm": 1.3970465930645521, + "learning_rate": 3.473068933400081e-07, + "loss": 0.2642953395843506, + "step": 6931 + }, + { + "epoch": 1.840791395564998, + "grad_norm": 1.2429277065520816, + "learning_rate": 3.461607061622041e-07, + "loss": 0.2294994294643402, + "step": 6932 + }, + { + "epoch": 1.8410569645465409, + "grad_norm": 1.3898674163561502, + "learning_rate": 3.450163801307582e-07, + "loss": 0.2554621696472168, + "step": 6933 + }, + { + "epoch": 1.8413225335280838, + "grad_norm": 1.5251200097904765, + "learning_rate": 3.4387391546628733e-07, + "loss": 0.2291295826435089, + "step": 6934 + }, + { + "epoch": 1.8415881025096268, + "grad_norm": 1.2253918775229307, + "learning_rate": 3.4273331238903974e-07, + "loss": 0.1996842920780182, + "step": 6935 + }, + { + "epoch": 1.8418536714911697, + "grad_norm": 1.3974356568527164, + "learning_rate": 3.415945711189128e-07, + "loss": 0.248038187623024, + "step": 6936 + }, + { + "epoch": 1.8421192404727127, + "grad_norm": 1.4224083213114915, + "learning_rate": 3.4045769187544096e-07, + "loss": 0.232235848903656, + "step": 6937 + }, + { + "epoch": 1.8423848094542556, + "grad_norm": 1.2811247103872994, + "learning_rate": 3.3932267487780333e-07, + "loss": 0.2526085376739502, + "step": 6938 + }, + { + "epoch": 1.8426503784357986, + "grad_norm": 1.324059920588895, + "learning_rate": 3.381895203448182e-07, + "loss": 0.22401389479637146, + "step": 6939 + }, + { + "epoch": 1.8429159474173415, + "grad_norm": 1.2904044842651823, + "learning_rate": 3.3705822849494195e-07, + "loss": 0.2509264647960663, + "step": 6940 + }, + { + "epoch": 1.8431815163988845, + "grad_norm": 1.2502849304352568, + "learning_rate": 3.3592879954627564e-07, + "loss": 0.2451169192790985, + "step": 6941 + }, + { + "epoch": 1.8434470853804275, + "grad_norm": 1.2774613485778883, + "learning_rate": 3.3480123371655957e-07, + "loss": 0.2361738532781601, + "step": 6942 + }, + { + "epoch": 1.8437126543619704, + "grad_norm": 1.1823675774441849, + "learning_rate": 3.3367553122317544e-07, + "loss": 0.22336295247077942, + "step": 6943 + }, + { + "epoch": 1.8439782233435134, + "grad_norm": 1.4218109729535482, + "learning_rate": 3.325516922831451e-07, + "loss": 0.22287659347057343, + "step": 6944 + }, + { + "epoch": 1.8442437923250563, + "grad_norm": 1.2819242467045069, + "learning_rate": 3.3142971711312975e-07, + "loss": 0.21845945715904236, + "step": 6945 + }, + { + "epoch": 1.8445093613065993, + "grad_norm": 1.2822597279006254, + "learning_rate": 3.303096059294364e-07, + "loss": 0.2650350332260132, + "step": 6946 + }, + { + "epoch": 1.8447749302881422, + "grad_norm": 1.346661503925149, + "learning_rate": 3.291913589480078e-07, + "loss": 0.21282124519348145, + "step": 6947 + }, + { + "epoch": 1.8450404992696852, + "grad_norm": 1.1254422779054267, + "learning_rate": 3.280749763844293e-07, + "loss": 0.17899346351623535, + "step": 6948 + }, + { + "epoch": 1.8453060682512281, + "grad_norm": 1.3295675928838626, + "learning_rate": 3.269604584539254e-07, + "loss": 0.23462103307247162, + "step": 6949 + }, + { + "epoch": 1.845571637232771, + "grad_norm": 1.2573990354862534, + "learning_rate": 3.2584780537136206e-07, + "loss": 0.20188388228416443, + "step": 6950 + }, + { + "epoch": 1.845837206214314, + "grad_norm": 1.3823133322277716, + "learning_rate": 3.247370173512443e-07, + "loss": 0.2760109305381775, + "step": 6951 + }, + { + "epoch": 1.846102775195857, + "grad_norm": 1.1542508493730164, + "learning_rate": 3.236280946077219e-07, + "loss": 0.20977352559566498, + "step": 6952 + }, + { + "epoch": 1.8463683441774, + "grad_norm": 1.299549634983184, + "learning_rate": 3.225210373545806e-07, + "loss": 0.26468873023986816, + "step": 6953 + }, + { + "epoch": 1.8466339131589429, + "grad_norm": 1.287524526318513, + "learning_rate": 3.214158458052463e-07, + "loss": 0.2362184375524521, + "step": 6954 + }, + { + "epoch": 1.8468994821404858, + "grad_norm": 1.29131597308928, + "learning_rate": 3.2031252017278966e-07, + "loss": 0.21406327188014984, + "step": 6955 + }, + { + "epoch": 1.847165051122029, + "grad_norm": 1.4794600314925854, + "learning_rate": 3.1921106066991835e-07, + "loss": 0.2698758840560913, + "step": 6956 + }, + { + "epoch": 1.847430620103572, + "grad_norm": 1.3029413719135112, + "learning_rate": 3.1811146750898025e-07, + "loss": 0.22954389452934265, + "step": 6957 + }, + { + "epoch": 1.847696189085115, + "grad_norm": 1.149631756175727, + "learning_rate": 3.170137409019636e-07, + "loss": 0.23005755245685577, + "step": 6958 + }, + { + "epoch": 1.8479617580666579, + "grad_norm": 1.270561680049171, + "learning_rate": 3.159178810604968e-07, + "loss": 0.22408893704414368, + "step": 6959 + }, + { + "epoch": 1.8482273270482008, + "grad_norm": 1.1761716687553918, + "learning_rate": 3.14823888195851e-07, + "loss": 0.1983698308467865, + "step": 6960 + }, + { + "epoch": 1.8484928960297438, + "grad_norm": 1.387251984339494, + "learning_rate": 3.137317625189329e-07, + "loss": 0.24643054604530334, + "step": 6961 + }, + { + "epoch": 1.8487584650112867, + "grad_norm": 1.3612119090250128, + "learning_rate": 3.1264150424029083e-07, + "loss": 0.274917870759964, + "step": 6962 + }, + { + "epoch": 1.8490240339928297, + "grad_norm": 1.2836957141365997, + "learning_rate": 3.115531135701155e-07, + "loss": 0.2129468023777008, + "step": 6963 + }, + { + "epoch": 1.8492896029743726, + "grad_norm": 1.3421884287788837, + "learning_rate": 3.1046659071823695e-07, + "loss": 0.24127928912639618, + "step": 6964 + }, + { + "epoch": 1.8495551719559156, + "grad_norm": 1.2737231627436634, + "learning_rate": 3.093819358941208e-07, + "loss": 0.2528054416179657, + "step": 6965 + }, + { + "epoch": 1.8498207409374585, + "grad_norm": 1.253824703575336, + "learning_rate": 3.0829914930687767e-07, + "loss": 0.23623798787593842, + "step": 6966 + }, + { + "epoch": 1.8500863099190015, + "grad_norm": 1.231408637511902, + "learning_rate": 3.0721823116525497e-07, + "loss": 0.20241659879684448, + "step": 6967 + }, + { + "epoch": 1.8503518789005444, + "grad_norm": 1.264350645442844, + "learning_rate": 3.0613918167764156e-07, + "loss": 0.24365916848182678, + "step": 6968 + }, + { + "epoch": 1.8506174478820874, + "grad_norm": 1.311846273217192, + "learning_rate": 3.0506200105206554e-07, + "loss": 0.2550637722015381, + "step": 6969 + }, + { + "epoch": 1.8508830168636303, + "grad_norm": 1.1438212130974086, + "learning_rate": 3.0398668949619515e-07, + "loss": 0.21531938016414642, + "step": 6970 + }, + { + "epoch": 1.8511485858451733, + "grad_norm": 1.3468646282560623, + "learning_rate": 3.029132472173368e-07, + "loss": 0.22749900817871094, + "step": 6971 + }, + { + "epoch": 1.8514141548267162, + "grad_norm": 1.186404759445675, + "learning_rate": 3.018416744224373e-07, + "loss": 0.1826775223016739, + "step": 6972 + }, + { + "epoch": 1.8516797238082592, + "grad_norm": 1.1782373460713542, + "learning_rate": 3.0077197131808344e-07, + "loss": 0.21982814371585846, + "step": 6973 + }, + { + "epoch": 1.8519452927898021, + "grad_norm": 1.2874557997839566, + "learning_rate": 2.997041381105026e-07, + "loss": 0.23515473306179047, + "step": 6974 + }, + { + "epoch": 1.852210861771345, + "grad_norm": 1.2184369208885015, + "learning_rate": 2.9863817500556e-07, + "loss": 0.19620616734027863, + "step": 6975 + }, + { + "epoch": 1.852476430752888, + "grad_norm": 1.208715706835639, + "learning_rate": 2.975740822087603e-07, + "loss": 0.22158116102218628, + "step": 6976 + }, + { + "epoch": 1.852741999734431, + "grad_norm": 1.5176127203291871, + "learning_rate": 2.96511859925247e-07, + "loss": 0.23082244396209717, + "step": 6977 + }, + { + "epoch": 1.853007568715974, + "grad_norm": 1.286088700644728, + "learning_rate": 2.954515083598064e-07, + "loss": 0.22743141651153564, + "step": 6978 + }, + { + "epoch": 1.853273137697517, + "grad_norm": 1.3437900472909596, + "learning_rate": 2.943930277168594e-07, + "loss": 0.2329188883304596, + "step": 6979 + }, + { + "epoch": 1.8535387066790598, + "grad_norm": 1.1892741095151198, + "learning_rate": 2.9333641820047055e-07, + "loss": 0.20360302925109863, + "step": 6980 + }, + { + "epoch": 1.8538042756606028, + "grad_norm": 1.1771915113483071, + "learning_rate": 2.922816800143402e-07, + "loss": 0.1903664767742157, + "step": 6981 + }, + { + "epoch": 1.8540698446421457, + "grad_norm": 1.2252145672801615, + "learning_rate": 2.912288133618102e-07, + "loss": 0.2247854322195053, + "step": 6982 + }, + { + "epoch": 1.8543354136236887, + "grad_norm": 1.305215823982529, + "learning_rate": 2.9017781844586035e-07, + "loss": 0.22693192958831787, + "step": 6983 + }, + { + "epoch": 1.8546009826052319, + "grad_norm": 1.3213552294005186, + "learning_rate": 2.891286954691108e-07, + "loss": 0.23769894242286682, + "step": 6984 + }, + { + "epoch": 1.8548665515867748, + "grad_norm": 1.267542763443237, + "learning_rate": 2.880814446338198e-07, + "loss": 0.23251450061798096, + "step": 6985 + }, + { + "epoch": 1.8551321205683178, + "grad_norm": 1.3253334264213772, + "learning_rate": 2.870360661418847e-07, + "loss": 0.20828741788864136, + "step": 6986 + }, + { + "epoch": 1.8553976895498607, + "grad_norm": 1.2448815733296377, + "learning_rate": 2.859925601948421e-07, + "loss": 0.2324519008398056, + "step": 6987 + }, + { + "epoch": 1.8556632585314037, + "grad_norm": 1.2799176737952995, + "learning_rate": 2.8495092699386774e-07, + "loss": 0.2166297733783722, + "step": 6988 + }, + { + "epoch": 1.8559288275129466, + "grad_norm": 1.416567928880924, + "learning_rate": 2.839111667397765e-07, + "loss": 0.2760158181190491, + "step": 6989 + }, + { + "epoch": 1.8561943964944896, + "grad_norm": 1.1117414218952344, + "learning_rate": 2.8287327963302025e-07, + "loss": 0.2263752520084381, + "step": 6990 + }, + { + "epoch": 1.8564599654760325, + "grad_norm": 1.328135206527719, + "learning_rate": 2.8183726587369455e-07, + "loss": 0.2490656077861786, + "step": 6991 + }, + { + "epoch": 1.8567255344575755, + "grad_norm": 1.4860885268210424, + "learning_rate": 2.808031256615285e-07, + "loss": 0.22495508193969727, + "step": 6992 + }, + { + "epoch": 1.8569911034391184, + "grad_norm": 1.297235121122649, + "learning_rate": 2.7977085919589253e-07, + "loss": 0.2671046853065491, + "step": 6993 + }, + { + "epoch": 1.8572566724206614, + "grad_norm": 1.2050300397617886, + "learning_rate": 2.7874046667579535e-07, + "loss": 0.19782954454421997, + "step": 6994 + }, + { + "epoch": 1.8575222414022043, + "grad_norm": 1.3009259795352104, + "learning_rate": 2.777119482998847e-07, + "loss": 0.24458879232406616, + "step": 6995 + }, + { + "epoch": 1.8577878103837473, + "grad_norm": 1.203325902936209, + "learning_rate": 2.7668530426644637e-07, + "loss": 0.23476794362068176, + "step": 6996 + }, + { + "epoch": 1.8580533793652902, + "grad_norm": 1.3828799415147273, + "learning_rate": 2.7566053477340535e-07, + "loss": 0.2318287342786789, + "step": 6997 + }, + { + "epoch": 1.8583189483468332, + "grad_norm": 1.1075382213650395, + "learning_rate": 2.746376400183259e-07, + "loss": 0.21341973543167114, + "step": 6998 + }, + { + "epoch": 1.8585845173283762, + "grad_norm": 1.3634634009375282, + "learning_rate": 2.7361662019840916e-07, + "loss": 0.25269803404808044, + "step": 6999 + }, + { + "epoch": 1.858850086309919, + "grad_norm": 1.2242004376785176, + "learning_rate": 2.7259747551049653e-07, + "loss": 0.24590039253234863, + "step": 7000 + }, + { + "epoch": 1.859115655291462, + "grad_norm": 1.2116643717780577, + "learning_rate": 2.715802061510664e-07, + "loss": 0.19907096028327942, + "step": 7001 + }, + { + "epoch": 1.859381224273005, + "grad_norm": 1.319285786592131, + "learning_rate": 2.705648123162363e-07, + "loss": 0.24304917454719543, + "step": 7002 + }, + { + "epoch": 1.859646793254548, + "grad_norm": 1.3884525546157216, + "learning_rate": 2.6955129420176193e-07, + "loss": 0.24846915900707245, + "step": 7003 + }, + { + "epoch": 1.859912362236091, + "grad_norm": 1.365283429552511, + "learning_rate": 2.685396520030381e-07, + "loss": 0.21709200739860535, + "step": 7004 + }, + { + "epoch": 1.8601779312176339, + "grad_norm": 1.3687506828870908, + "learning_rate": 2.675298859150977e-07, + "loss": 0.28031325340270996, + "step": 7005 + }, + { + "epoch": 1.8604435001991768, + "grad_norm": 1.1527129171653896, + "learning_rate": 2.6652199613261155e-07, + "loss": 0.20367707312107086, + "step": 7006 + }, + { + "epoch": 1.8607090691807198, + "grad_norm": 1.1875101722790007, + "learning_rate": 2.6551598284988877e-07, + "loss": 0.20737403631210327, + "step": 7007 + }, + { + "epoch": 1.8609746381622627, + "grad_norm": 1.3375926225189751, + "learning_rate": 2.6451184626087646e-07, + "loss": 0.2504046559333801, + "step": 7008 + }, + { + "epoch": 1.8612402071438057, + "grad_norm": 1.3403751507501938, + "learning_rate": 2.635095865591608e-07, + "loss": 0.26347339153289795, + "step": 7009 + }, + { + "epoch": 1.8615057761253486, + "grad_norm": 1.1832867553985462, + "learning_rate": 2.625092039379662e-07, + "loss": 0.2347220480442047, + "step": 7010 + }, + { + "epoch": 1.8617713451068916, + "grad_norm": 1.2487098903864389, + "learning_rate": 2.6151069859015386e-07, + "loss": 0.23565630614757538, + "step": 7011 + }, + { + "epoch": 1.8620369140884345, + "grad_norm": 1.2377624004623402, + "learning_rate": 2.605140707082243e-07, + "loss": 0.21462437510490417, + "step": 7012 + }, + { + "epoch": 1.8623024830699775, + "grad_norm": 1.2992774401284823, + "learning_rate": 2.595193204843149e-07, + "loss": 0.24224728345870972, + "step": 7013 + }, + { + "epoch": 1.8625680520515204, + "grad_norm": 1.3531530893390702, + "learning_rate": 2.5852644811020344e-07, + "loss": 0.24200880527496338, + "step": 7014 + }, + { + "epoch": 1.8628336210330634, + "grad_norm": 1.2331149203562455, + "learning_rate": 2.5753545377730227e-07, + "loss": 0.23315191268920898, + "step": 7015 + }, + { + "epoch": 1.8630991900146063, + "grad_norm": 1.4360061023192454, + "learning_rate": 2.56546337676663e-07, + "loss": 0.31112274527549744, + "step": 7016 + }, + { + "epoch": 1.8633647589961493, + "grad_norm": 1.1775380155652753, + "learning_rate": 2.555590999989754e-07, + "loss": 0.2291945070028305, + "step": 7017 + }, + { + "epoch": 1.8636303279776922, + "grad_norm": 1.3248749602779475, + "learning_rate": 2.5457374093457057e-07, + "loss": 0.2324746549129486, + "step": 7018 + }, + { + "epoch": 1.8638958969592352, + "grad_norm": 1.3333311590100283, + "learning_rate": 2.5359026067341086e-07, + "loss": 0.2585206627845764, + "step": 7019 + }, + { + "epoch": 1.8641614659407781, + "grad_norm": 1.254813387894953, + "learning_rate": 2.5260865940510027e-07, + "loss": 0.22986871004104614, + "step": 7020 + }, + { + "epoch": 1.864427034922321, + "grad_norm": 1.3302473304174876, + "learning_rate": 2.5162893731888074e-07, + "loss": 0.22615428268909454, + "step": 7021 + }, + { + "epoch": 1.864692603903864, + "grad_norm": 1.2311139475810073, + "learning_rate": 2.5065109460363113e-07, + "loss": 0.21324753761291504, + "step": 7022 + }, + { + "epoch": 1.864958172885407, + "grad_norm": 1.2499721276179248, + "learning_rate": 2.4967513144786736e-07, + "loss": 0.2247733324766159, + "step": 7023 + }, + { + "epoch": 1.86522374186695, + "grad_norm": 1.198842298043478, + "learning_rate": 2.4870104803974336e-07, + "loss": 0.22080597281455994, + "step": 7024 + }, + { + "epoch": 1.865489310848493, + "grad_norm": 1.3721040923851937, + "learning_rate": 2.4772884456705224e-07, + "loss": 0.23669888079166412, + "step": 7025 + }, + { + "epoch": 1.8657548798300359, + "grad_norm": 1.2946969495879501, + "learning_rate": 2.4675852121722075e-07, + "loss": 0.2320847064256668, + "step": 7026 + }, + { + "epoch": 1.8660204488115788, + "grad_norm": 1.374404266409337, + "learning_rate": 2.4579007817731925e-07, + "loss": 0.2595662474632263, + "step": 7027 + }, + { + "epoch": 1.8662860177931218, + "grad_norm": 1.2351512812852723, + "learning_rate": 2.4482351563405174e-07, + "loss": 0.22152045369148254, + "step": 7028 + }, + { + "epoch": 1.8665515867746647, + "grad_norm": 1.270416082371449, + "learning_rate": 2.4385883377375683e-07, + "loss": 0.2391948401927948, + "step": 7029 + }, + { + "epoch": 1.8668171557562077, + "grad_norm": 1.3234796115140017, + "learning_rate": 2.428960327824159e-07, + "loss": 0.23117749392986298, + "step": 7030 + }, + { + "epoch": 1.8670827247377506, + "grad_norm": 1.313106749776766, + "learning_rate": 2.41935112845646e-07, + "loss": 0.24019500613212585, + "step": 7031 + }, + { + "epoch": 1.8673482937192936, + "grad_norm": 1.253088890729472, + "learning_rate": 2.4097607414869995e-07, + "loss": 0.19560202956199646, + "step": 7032 + }, + { + "epoch": 1.8676138627008365, + "grad_norm": 1.3625686769003584, + "learning_rate": 2.4001891687647103e-07, + "loss": 0.23110055923461914, + "step": 7033 + }, + { + "epoch": 1.8678794316823795, + "grad_norm": 1.3388200482229684, + "learning_rate": 2.39063641213485e-07, + "loss": 0.2214709371328354, + "step": 7034 + }, + { + "epoch": 1.8681450006639224, + "grad_norm": 1.2700799842548796, + "learning_rate": 2.381102473439101e-07, + "loss": 0.22123369574546814, + "step": 7035 + }, + { + "epoch": 1.8684105696454654, + "grad_norm": 1.4629863869289934, + "learning_rate": 2.371587354515481e-07, + "loss": 0.23984813690185547, + "step": 7036 + }, + { + "epoch": 1.8686761386270083, + "grad_norm": 1.4496870886295976, + "learning_rate": 2.3620910571984124e-07, + "loss": 0.26089030504226685, + "step": 7037 + }, + { + "epoch": 1.8689417076085513, + "grad_norm": 1.2076380290124689, + "learning_rate": 2.3526135833186527e-07, + "loss": 0.2344229370355606, + "step": 7038 + }, + { + "epoch": 1.8692072765900942, + "grad_norm": 1.290620691312973, + "learning_rate": 2.34315493470334e-07, + "loss": 0.24499498307704926, + "step": 7039 + }, + { + "epoch": 1.8694728455716372, + "grad_norm": 1.2975050166282813, + "learning_rate": 2.333715113176005e-07, + "loss": 0.21971477568149567, + "step": 7040 + }, + { + "epoch": 1.8697384145531801, + "grad_norm": 1.2659856510175163, + "learning_rate": 2.3242941205565362e-07, + "loss": 0.2594453990459442, + "step": 7041 + }, + { + "epoch": 1.870003983534723, + "grad_norm": 1.3125676617059407, + "learning_rate": 2.3148919586611806e-07, + "loss": 0.24689960479736328, + "step": 7042 + }, + { + "epoch": 1.870269552516266, + "grad_norm": 1.2165345453138858, + "learning_rate": 2.3055086293025665e-07, + "loss": 0.19972509145736694, + "step": 7043 + }, + { + "epoch": 1.870535121497809, + "grad_norm": 1.2460782677559714, + "learning_rate": 2.2961441342896795e-07, + "loss": 0.2139236032962799, + "step": 7044 + }, + { + "epoch": 1.870800690479352, + "grad_norm": 1.196552292185578, + "learning_rate": 2.286798475427898e-07, + "loss": 0.2251984179019928, + "step": 7045 + }, + { + "epoch": 1.8710662594608949, + "grad_norm": 1.2395291577625112, + "learning_rate": 2.277471654518959e-07, + "loss": 0.24517378211021423, + "step": 7046 + }, + { + "epoch": 1.8713318284424378, + "grad_norm": 1.3048847468612028, + "learning_rate": 2.2681636733609457e-07, + "loss": 0.19115275144577026, + "step": 7047 + }, + { + "epoch": 1.8715973974239808, + "grad_norm": 1.2997607659373802, + "learning_rate": 2.2588745337483454e-07, + "loss": 0.26092633605003357, + "step": 7048 + }, + { + "epoch": 1.8718629664055237, + "grad_norm": 1.2646212726473884, + "learning_rate": 2.2496042374719807e-07, + "loss": 0.18862302601337433, + "step": 7049 + }, + { + "epoch": 1.8721285353870667, + "grad_norm": 1.1602330038245767, + "learning_rate": 2.2403527863190554e-07, + "loss": 0.20728996396064758, + "step": 7050 + }, + { + "epoch": 1.8723941043686096, + "grad_norm": 1.236025812615254, + "learning_rate": 2.231120182073143e-07, + "loss": 0.24244122207164764, + "step": 7051 + }, + { + "epoch": 1.8726596733501526, + "grad_norm": 1.205655043915546, + "learning_rate": 2.2219064265141866e-07, + "loss": 0.18956953287124634, + "step": 7052 + }, + { + "epoch": 1.8729252423316956, + "grad_norm": 1.1159089015267554, + "learning_rate": 2.2127115214184868e-07, + "loss": 0.19873176515102386, + "step": 7053 + }, + { + "epoch": 1.8731908113132385, + "grad_norm": 1.2896839736015335, + "learning_rate": 2.203535468558704e-07, + "loss": 0.23717360198497772, + "step": 7054 + }, + { + "epoch": 1.8734563802947815, + "grad_norm": 1.3203924338573048, + "learning_rate": 2.1943782697038896e-07, + "loss": 0.24051904678344727, + "step": 7055 + }, + { + "epoch": 1.8737219492763244, + "grad_norm": 1.3193670550613668, + "learning_rate": 2.1852399266194312e-07, + "loss": 0.23541691899299622, + "step": 7056 + }, + { + "epoch": 1.8739875182578674, + "grad_norm": 1.3395958296451687, + "learning_rate": 2.1761204410671088e-07, + "loss": 0.22566163539886475, + "step": 7057 + }, + { + "epoch": 1.8742530872394103, + "grad_norm": 1.297432294479727, + "learning_rate": 2.167019814805027e-07, + "loss": 0.25771743059158325, + "step": 7058 + }, + { + "epoch": 1.8745186562209533, + "grad_norm": 1.1482951648622821, + "learning_rate": 2.1579380495876934e-07, + "loss": 0.22624637186527252, + "step": 7059 + }, + { + "epoch": 1.8747842252024962, + "grad_norm": 1.3036126318267591, + "learning_rate": 2.148875147165963e-07, + "loss": 0.24671627581119537, + "step": 7060 + }, + { + "epoch": 1.8750497941840392, + "grad_norm": 1.1983704285109544, + "learning_rate": 2.1398311092870605e-07, + "loss": 0.21607278287410736, + "step": 7061 + }, + { + "epoch": 1.8753153631655821, + "grad_norm": 1.1102939736369823, + "learning_rate": 2.1308059376945689e-07, + "loss": 0.1960655301809311, + "step": 7062 + }, + { + "epoch": 1.875580932147125, + "grad_norm": 1.2816228458436618, + "learning_rate": 2.1217996341284297e-07, + "loss": 0.22005721926689148, + "step": 7063 + }, + { + "epoch": 1.875846501128668, + "grad_norm": 1.2746284533707484, + "learning_rate": 2.1128122003249541e-07, + "loss": 0.21442776918411255, + "step": 7064 + }, + { + "epoch": 1.876112070110211, + "grad_norm": 1.1849768238897622, + "learning_rate": 2.1038436380168114e-07, + "loss": 0.23126785457134247, + "step": 7065 + }, + { + "epoch": 1.876377639091754, + "grad_norm": 1.4246070766583077, + "learning_rate": 2.094893948933041e-07, + "loss": 0.24286629259586334, + "step": 7066 + }, + { + "epoch": 1.8766432080732969, + "grad_norm": 1.3706445020134141, + "learning_rate": 2.0859631347990406e-07, + "loss": 0.25771957635879517, + "step": 7067 + }, + { + "epoch": 1.87690877705484, + "grad_norm": 1.1754559873110961, + "learning_rate": 2.0770511973365436e-07, + "loss": 0.19837790727615356, + "step": 7068 + }, + { + "epoch": 1.877174346036383, + "grad_norm": 1.2372359407501599, + "learning_rate": 2.0681581382636984e-07, + "loss": 0.21209359169006348, + "step": 7069 + }, + { + "epoch": 1.877439915017926, + "grad_norm": 1.9178204608286211, + "learning_rate": 2.0592839592949554e-07, + "loss": 0.26641422510147095, + "step": 7070 + }, + { + "epoch": 1.877705483999469, + "grad_norm": 1.3604176831947503, + "learning_rate": 2.050428662141146e-07, + "loss": 0.21609601378440857, + "step": 7071 + }, + { + "epoch": 1.8779710529810119, + "grad_norm": 1.2861845280896875, + "learning_rate": 2.0415922485095051e-07, + "loss": 0.23642000555992126, + "step": 7072 + }, + { + "epoch": 1.8782366219625548, + "grad_norm": 1.3854568667341272, + "learning_rate": 2.0327747201035587e-07, + "loss": 0.24564675986766815, + "step": 7073 + }, + { + "epoch": 1.8785021909440978, + "grad_norm": 1.229212126818568, + "learning_rate": 2.0239760786232355e-07, + "loss": 0.20001479983329773, + "step": 7074 + }, + { + "epoch": 1.8787677599256407, + "grad_norm": 1.2817747323253132, + "learning_rate": 2.015196325764801e-07, + "loss": 0.2590208649635315, + "step": 7075 + }, + { + "epoch": 1.8790333289071837, + "grad_norm": 1.2462050168824985, + "learning_rate": 2.0064354632208904e-07, + "loss": 0.23298504948616028, + "step": 7076 + }, + { + "epoch": 1.8792988978887266, + "grad_norm": 1.2573573484068483, + "learning_rate": 1.997693492680497e-07, + "loss": 0.22409996390342712, + "step": 7077 + }, + { + "epoch": 1.8795644668702696, + "grad_norm": 1.410723892029772, + "learning_rate": 1.9889704158289724e-07, + "loss": 0.27316784858703613, + "step": 7078 + }, + { + "epoch": 1.8798300358518125, + "grad_norm": 1.2924796650338854, + "learning_rate": 1.980266234348016e-07, + "loss": 0.2271946519613266, + "step": 7079 + }, + { + "epoch": 1.8800956048333555, + "grad_norm": 1.2438429761767338, + "learning_rate": 1.9715809499156858e-07, + "loss": 0.20887964963912964, + "step": 7080 + }, + { + "epoch": 1.8803611738148984, + "grad_norm": 1.2112268618082698, + "learning_rate": 1.9629145642064197e-07, + "loss": 0.23468685150146484, + "step": 7081 + }, + { + "epoch": 1.8806267427964414, + "grad_norm": 1.308865144497765, + "learning_rate": 1.9542670788909813e-07, + "loss": 0.21624556183815002, + "step": 7082 + }, + { + "epoch": 1.8808923117779843, + "grad_norm": 1.1751415989571612, + "learning_rate": 1.9456384956365149e-07, + "loss": 0.22328166663646698, + "step": 7083 + }, + { + "epoch": 1.8811578807595273, + "grad_norm": 1.3508603820961609, + "learning_rate": 1.93702881610649e-07, + "loss": 0.2526431381702423, + "step": 7084 + }, + { + "epoch": 1.8814234497410702, + "grad_norm": 1.3562256445660688, + "learning_rate": 1.9284380419607784e-07, + "loss": 0.23668771982192993, + "step": 7085 + }, + { + "epoch": 1.8816890187226132, + "grad_norm": 1.2668189225170288, + "learning_rate": 1.9198661748555557e-07, + "loss": 0.24710845947265625, + "step": 7086 + }, + { + "epoch": 1.8819545877041561, + "grad_norm": 1.4047256701053605, + "learning_rate": 1.911313216443389e-07, + "loss": 0.22696900367736816, + "step": 7087 + }, + { + "epoch": 1.882220156685699, + "grad_norm": 1.3717447863189725, + "learning_rate": 1.9027791683731922e-07, + "loss": 0.21652163565158844, + "step": 7088 + }, + { + "epoch": 1.882485725667242, + "grad_norm": 1.3189608691767827, + "learning_rate": 1.894264032290205e-07, + "loss": 0.2166716307401657, + "step": 7089 + }, + { + "epoch": 1.882751294648785, + "grad_norm": 1.3746931913110367, + "learning_rate": 1.8857678098360698e-07, + "loss": 0.26200050115585327, + "step": 7090 + }, + { + "epoch": 1.883016863630328, + "grad_norm": 1.2945644704190118, + "learning_rate": 1.8772905026487654e-07, + "loss": 0.2292764037847519, + "step": 7091 + }, + { + "epoch": 1.883282432611871, + "grad_norm": 1.3106590918741248, + "learning_rate": 1.8688321123625842e-07, + "loss": 0.23893016576766968, + "step": 7092 + }, + { + "epoch": 1.8835480015934138, + "grad_norm": 1.2241030970764724, + "learning_rate": 1.860392640608244e-07, + "loss": 0.2509230673313141, + "step": 7093 + }, + { + "epoch": 1.8838135705749568, + "grad_norm": 1.2218686374923997, + "learning_rate": 1.8519720890127434e-07, + "loss": 0.24156486988067627, + "step": 7094 + }, + { + "epoch": 1.8840791395564997, + "grad_norm": 1.2859122561460798, + "learning_rate": 1.843570459199462e-07, + "loss": 0.2120019942522049, + "step": 7095 + }, + { + "epoch": 1.884344708538043, + "grad_norm": 1.6579646138710773, + "learning_rate": 1.835187752788159e-07, + "loss": 0.23400259017944336, + "step": 7096 + }, + { + "epoch": 1.8846102775195859, + "grad_norm": 1.281132346942695, + "learning_rate": 1.8268239713949087e-07, + "loss": 0.20913103222846985, + "step": 7097 + }, + { + "epoch": 1.8848758465011288, + "grad_norm": 1.3381319381686223, + "learning_rate": 1.8184791166321546e-07, + "loss": 0.24468877911567688, + "step": 7098 + }, + { + "epoch": 1.8851414154826718, + "grad_norm": 1.236616212709848, + "learning_rate": 1.8101531901086767e-07, + "loss": 0.2038918137550354, + "step": 7099 + }, + { + "epoch": 1.8854069844642147, + "grad_norm": 1.3201086548941574, + "learning_rate": 1.8018461934296239e-07, + "loss": 0.24191413819789886, + "step": 7100 + }, + { + "epoch": 1.8856725534457577, + "grad_norm": 1.277539269643606, + "learning_rate": 1.793558128196493e-07, + "loss": 0.24394474923610687, + "step": 7101 + }, + { + "epoch": 1.8859381224273006, + "grad_norm": 1.1561225023553612, + "learning_rate": 1.7852889960071063e-07, + "loss": 0.22630709409713745, + "step": 7102 + }, + { + "epoch": 1.8862036914088436, + "grad_norm": 1.5472360212555962, + "learning_rate": 1.7770387984556768e-07, + "loss": 0.23936980962753296, + "step": 7103 + }, + { + "epoch": 1.8864692603903865, + "grad_norm": 1.275471897769737, + "learning_rate": 1.768807537132733e-07, + "loss": 0.24808618426322937, + "step": 7104 + }, + { + "epoch": 1.8867348293719295, + "grad_norm": 1.273035999339445, + "learning_rate": 1.7605952136251603e-07, + "loss": 0.23934635519981384, + "step": 7105 + }, + { + "epoch": 1.8870003983534724, + "grad_norm": 1.189686791776393, + "learning_rate": 1.7524018295162148e-07, + "loss": 0.22107656300067902, + "step": 7106 + }, + { + "epoch": 1.8872659673350154, + "grad_norm": 1.3496800848037154, + "learning_rate": 1.7442273863854553e-07, + "loss": 0.23253028094768524, + "step": 7107 + }, + { + "epoch": 1.8875315363165583, + "grad_norm": 1.3028365552765204, + "learning_rate": 1.7360718858088542e-07, + "loss": 0.2501102387905121, + "step": 7108 + }, + { + "epoch": 1.8877971052981013, + "grad_norm": 1.4057988238229884, + "learning_rate": 1.7279353293586765e-07, + "loss": 0.25537967681884766, + "step": 7109 + }, + { + "epoch": 1.8880626742796442, + "grad_norm": 2.7876746143917033, + "learning_rate": 1.7198177186035447e-07, + "loss": 0.25701045989990234, + "step": 7110 + }, + { + "epoch": 1.8883282432611872, + "grad_norm": 1.1447271563365653, + "learning_rate": 1.7117190551084628e-07, + "loss": 0.2109440565109253, + "step": 7111 + }, + { + "epoch": 1.8885938122427302, + "grad_norm": 1.2454061070152636, + "learning_rate": 1.7036393404347373e-07, + "loss": 0.22767721116542816, + "step": 7112 + }, + { + "epoch": 1.888859381224273, + "grad_norm": 1.1572937395529788, + "learning_rate": 1.6955785761400444e-07, + "loss": 0.1976814568042755, + "step": 7113 + }, + { + "epoch": 1.889124950205816, + "grad_norm": 1.1727224852039306, + "learning_rate": 1.687536763778419e-07, + "loss": 0.21109873056411743, + "step": 7114 + }, + { + "epoch": 1.889390519187359, + "grad_norm": 1.1916227822459606, + "learning_rate": 1.6795139049002095e-07, + "loss": 0.2165786623954773, + "step": 7115 + }, + { + "epoch": 1.889656088168902, + "grad_norm": 1.2917556149315792, + "learning_rate": 1.6715100010521347e-07, + "loss": 0.23962441086769104, + "step": 7116 + }, + { + "epoch": 1.889921657150445, + "grad_norm": 1.2423009900583697, + "learning_rate": 1.6635250537772596e-07, + "loss": 0.23351140320301056, + "step": 7117 + }, + { + "epoch": 1.8901872261319879, + "grad_norm": 1.3034348272306633, + "learning_rate": 1.6555590646149866e-07, + "loss": 0.19999945163726807, + "step": 7118 + }, + { + "epoch": 1.8904527951135308, + "grad_norm": 1.432201467842623, + "learning_rate": 1.647612035101054e-07, + "loss": 0.27142196893692017, + "step": 7119 + }, + { + "epoch": 1.8907183640950738, + "grad_norm": 1.2861780172834696, + "learning_rate": 1.6396839667675691e-07, + "loss": 0.21525685489177704, + "step": 7120 + }, + { + "epoch": 1.8909839330766167, + "grad_norm": 3.2062699859400396, + "learning_rate": 1.631774861142965e-07, + "loss": 0.24305005371570587, + "step": 7121 + }, + { + "epoch": 1.8912495020581597, + "grad_norm": 1.2019998279555377, + "learning_rate": 1.6238847197520113e-07, + "loss": 0.23202842473983765, + "step": 7122 + }, + { + "epoch": 1.8915150710397026, + "grad_norm": 1.4409003412080332, + "learning_rate": 1.6160135441158576e-07, + "loss": 0.24373790621757507, + "step": 7123 + }, + { + "epoch": 1.8917806400212456, + "grad_norm": 1.2360359431057044, + "learning_rate": 1.6081613357519565e-07, + "loss": 0.22774222493171692, + "step": 7124 + }, + { + "epoch": 1.8920462090027885, + "grad_norm": 1.2064368847282083, + "learning_rate": 1.6003280961741196e-07, + "loss": 0.20660057663917542, + "step": 7125 + }, + { + "epoch": 1.8923117779843315, + "grad_norm": 1.3070998228758686, + "learning_rate": 1.5925138268925166e-07, + "loss": 0.23578912019729614, + "step": 7126 + }, + { + "epoch": 1.8925773469658744, + "grad_norm": 1.2737250152668298, + "learning_rate": 1.5847185294136313e-07, + "loss": 0.20852091908454895, + "step": 7127 + }, + { + "epoch": 1.8928429159474174, + "grad_norm": 1.1465883719364975, + "learning_rate": 1.5769422052403172e-07, + "loss": 0.17455898225307465, + "step": 7128 + }, + { + "epoch": 1.8931084849289603, + "grad_norm": 1.5036497092390075, + "learning_rate": 1.5691848558717638e-07, + "loss": 0.29552748799324036, + "step": 7129 + }, + { + "epoch": 1.8933740539105033, + "grad_norm": 1.3009458238394367, + "learning_rate": 1.5614464828034746e-07, + "loss": 0.22972649335861206, + "step": 7130 + }, + { + "epoch": 1.8936396228920462, + "grad_norm": 1.2296689152648304, + "learning_rate": 1.5537270875273348e-07, + "loss": 0.2134108692407608, + "step": 7131 + }, + { + "epoch": 1.8939051918735892, + "grad_norm": 1.4119584533896288, + "learning_rate": 1.546026671531542e-07, + "loss": 0.24145451188087463, + "step": 7132 + }, + { + "epoch": 1.8941707608551321, + "grad_norm": 1.355860353407812, + "learning_rate": 1.5383452363006534e-07, + "loss": 0.2323920726776123, + "step": 7133 + }, + { + "epoch": 1.894436329836675, + "grad_norm": 1.197617700552455, + "learning_rate": 1.5306827833155403e-07, + "loss": 0.20091015100479126, + "step": 7134 + }, + { + "epoch": 1.894701898818218, + "grad_norm": 1.370489911603159, + "learning_rate": 1.523039314053465e-07, + "loss": 0.2451317310333252, + "step": 7135 + }, + { + "epoch": 1.894967467799761, + "grad_norm": 1.2946538259097045, + "learning_rate": 1.5154148299879822e-07, + "loss": 0.22744594514369965, + "step": 7136 + }, + { + "epoch": 1.895233036781304, + "grad_norm": 1.2046527835430252, + "learning_rate": 1.5078093325889943e-07, + "loss": 0.2460673749446869, + "step": 7137 + }, + { + "epoch": 1.895498605762847, + "grad_norm": 1.4172423595206858, + "learning_rate": 1.5002228233227722e-07, + "loss": 0.2524537444114685, + "step": 7138 + }, + { + "epoch": 1.8957641747443899, + "grad_norm": 1.1840127480017744, + "learning_rate": 1.4926553036518798e-07, + "loss": 0.2056279480457306, + "step": 7139 + }, + { + "epoch": 1.8960297437259328, + "grad_norm": 1.2144930845419581, + "learning_rate": 1.485106775035261e-07, + "loss": 0.2656184732913971, + "step": 7140 + }, + { + "epoch": 1.8962953127074758, + "grad_norm": 1.1903286988332102, + "learning_rate": 1.477577238928185e-07, + "loss": 0.2190116047859192, + "step": 7141 + }, + { + "epoch": 1.8965608816890187, + "grad_norm": 1.206151177902952, + "learning_rate": 1.4700666967822574e-07, + "loss": 0.22984017431735992, + "step": 7142 + }, + { + "epoch": 1.8968264506705617, + "grad_norm": 1.1949819121682481, + "learning_rate": 1.462575150045409e-07, + "loss": 0.17947378754615784, + "step": 7143 + }, + { + "epoch": 1.8970920196521046, + "grad_norm": 1.2649423314993642, + "learning_rate": 1.4551026001619395e-07, + "loss": 0.24965715408325195, + "step": 7144 + }, + { + "epoch": 1.8973575886336476, + "grad_norm": 1.236302993447548, + "learning_rate": 1.4476490485724526e-07, + "loss": 0.2337307333946228, + "step": 7145 + }, + { + "epoch": 1.8976231576151905, + "grad_norm": 1.2205039464348546, + "learning_rate": 1.4402144967139098e-07, + "loss": 0.22668538987636566, + "step": 7146 + }, + { + "epoch": 1.8978887265967335, + "grad_norm": 1.350785859399433, + "learning_rate": 1.4327989460196091e-07, + "loss": 0.21934781968593597, + "step": 7147 + }, + { + "epoch": 1.8981542955782764, + "grad_norm": 1.2212959594670445, + "learning_rate": 1.4254023979191844e-07, + "loss": 0.1957930624485016, + "step": 7148 + }, + { + "epoch": 1.8984198645598194, + "grad_norm": 1.1724780894008597, + "learning_rate": 1.4180248538385956e-07, + "loss": 0.22351369261741638, + "step": 7149 + }, + { + "epoch": 1.8986854335413623, + "grad_norm": 1.3930947329130605, + "learning_rate": 1.4106663152001487e-07, + "loss": 0.2603265047073364, + "step": 7150 + }, + { + "epoch": 1.8989510025229053, + "grad_norm": 1.260479860356455, + "learning_rate": 1.4033267834224873e-07, + "loss": 0.2566663324832916, + "step": 7151 + }, + { + "epoch": 1.8992165715044482, + "grad_norm": 1.2799319314175146, + "learning_rate": 1.3960062599205682e-07, + "loss": 0.23130206763744354, + "step": 7152 + }, + { + "epoch": 1.8994821404859912, + "grad_norm": 1.1757231252562024, + "learning_rate": 1.3887047461057179e-07, + "loss": 0.17946425080299377, + "step": 7153 + }, + { + "epoch": 1.8997477094675341, + "grad_norm": 1.2434099546308155, + "learning_rate": 1.3814222433855884e-07, + "loss": 0.23946328461170197, + "step": 7154 + }, + { + "epoch": 1.900013278449077, + "grad_norm": 1.2249367291717066, + "learning_rate": 1.3741587531641566e-07, + "loss": 0.21002715826034546, + "step": 7155 + }, + { + "epoch": 1.90027884743062, + "grad_norm": 1.3062374823275615, + "learning_rate": 1.3669142768417242e-07, + "loss": 0.2121986746788025, + "step": 7156 + }, + { + "epoch": 1.900544416412163, + "grad_norm": 1.373871289837254, + "learning_rate": 1.3596888158149525e-07, + "loss": 0.26400670409202576, + "step": 7157 + }, + { + "epoch": 1.900809985393706, + "grad_norm": 1.1813353744292436, + "learning_rate": 1.3524823714768375e-07, + "loss": 0.18764406442642212, + "step": 7158 + }, + { + "epoch": 1.9010755543752489, + "grad_norm": 1.415975931925435, + "learning_rate": 1.3452949452166686e-07, + "loss": 0.2550342381000519, + "step": 7159 + }, + { + "epoch": 1.9013411233567918, + "grad_norm": 1.304366194966887, + "learning_rate": 1.3381265384201035e-07, + "loss": 0.23188576102256775, + "step": 7160 + }, + { + "epoch": 1.9016066923383348, + "grad_norm": 1.2473914592639561, + "learning_rate": 1.3309771524691372e-07, + "loss": 0.23124513030052185, + "step": 7161 + }, + { + "epoch": 1.9018722613198777, + "grad_norm": 1.2056745011797427, + "learning_rate": 1.323846788742078e-07, + "loss": 0.19941067695617676, + "step": 7162 + }, + { + "epoch": 1.9021378303014207, + "grad_norm": 1.4624998875104938, + "learning_rate": 1.316735448613593e-07, + "loss": 0.22510412335395813, + "step": 7163 + }, + { + "epoch": 1.9024033992829636, + "grad_norm": 1.2448961229015743, + "learning_rate": 1.309643133454641e-07, + "loss": 0.19102326035499573, + "step": 7164 + }, + { + "epoch": 1.9026689682645066, + "grad_norm": 1.2307397875458914, + "learning_rate": 1.3025698446325618e-07, + "loss": 0.20826731622219086, + "step": 7165 + }, + { + "epoch": 1.9029345372460496, + "grad_norm": 1.3483240422328144, + "learning_rate": 1.2955155835109757e-07, + "loss": 0.23238909244537354, + "step": 7166 + }, + { + "epoch": 1.9032001062275925, + "grad_norm": 1.4338552298496805, + "learning_rate": 1.2884803514498833e-07, + "loss": 0.2635011374950409, + "step": 7167 + }, + { + "epoch": 1.9034656752091355, + "grad_norm": 1.1745725675637841, + "learning_rate": 1.281464149805578e-07, + "loss": 0.2073322981595993, + "step": 7168 + }, + { + "epoch": 1.9037312441906784, + "grad_norm": 1.2344038568124596, + "learning_rate": 1.274466979930711e-07, + "loss": 0.22091326117515564, + "step": 7169 + }, + { + "epoch": 1.9039968131722214, + "grad_norm": 1.114689842836081, + "learning_rate": 1.2674888431742472e-07, + "loss": 0.18613001704216003, + "step": 7170 + }, + { + "epoch": 1.9042623821537643, + "grad_norm": 1.2788383965135535, + "learning_rate": 1.2605297408814887e-07, + "loss": 0.2165849655866623, + "step": 7171 + }, + { + "epoch": 1.9045279511353073, + "grad_norm": 1.294203512401496, + "learning_rate": 1.2535896743940844e-07, + "loss": 0.21317794919013977, + "step": 7172 + }, + { + "epoch": 1.9047935201168502, + "grad_norm": 1.47127212987638, + "learning_rate": 1.2466686450499866e-07, + "loss": 0.25221073627471924, + "step": 7173 + }, + { + "epoch": 1.9050590890983932, + "grad_norm": 1.2647474973058104, + "learning_rate": 1.239766654183472e-07, + "loss": 0.21598559617996216, + "step": 7174 + }, + { + "epoch": 1.9053246580799361, + "grad_norm": 1.2635227030316536, + "learning_rate": 1.232883703125187e-07, + "loss": 0.2284495085477829, + "step": 7175 + }, + { + "epoch": 1.905590227061479, + "grad_norm": 1.1825527167306378, + "learning_rate": 1.2260197932020713e-07, + "loss": 0.21899332106113434, + "step": 7176 + }, + { + "epoch": 1.905855796043022, + "grad_norm": 1.3588902485974734, + "learning_rate": 1.2191749257374097e-07, + "loss": 0.2633277177810669, + "step": 7177 + }, + { + "epoch": 1.906121365024565, + "grad_norm": 1.2643904365611611, + "learning_rate": 1.2123491020508137e-07, + "loss": 0.2330140471458435, + "step": 7178 + }, + { + "epoch": 1.906386934006108, + "grad_norm": 1.2757939155257039, + "learning_rate": 1.2055423234582087e-07, + "loss": 0.21859750151634216, + "step": 7179 + }, + { + "epoch": 1.9066525029876509, + "grad_norm": 1.3985563606047093, + "learning_rate": 1.198754591271878e-07, + "loss": 0.252164363861084, + "step": 7180 + }, + { + "epoch": 1.906918071969194, + "grad_norm": 1.4365501399575176, + "learning_rate": 1.191985906800408e-07, + "loss": 0.24968160688877106, + "step": 7181 + }, + { + "epoch": 1.907183640950737, + "grad_norm": 1.199067091736319, + "learning_rate": 1.185236271348722e-07, + "loss": 0.2083423137664795, + "step": 7182 + }, + { + "epoch": 1.90744920993228, + "grad_norm": 1.258208503364781, + "learning_rate": 1.1785056862180789e-07, + "loss": 0.2468394935131073, + "step": 7183 + }, + { + "epoch": 1.907714778913823, + "grad_norm": 1.2908738922715033, + "learning_rate": 1.1717941527060405e-07, + "loss": 0.22417521476745605, + "step": 7184 + }, + { + "epoch": 1.9079803478953659, + "grad_norm": 1.2789853859840312, + "learning_rate": 1.1651016721065167e-07, + "loss": 0.2411842793226242, + "step": 7185 + }, + { + "epoch": 1.9082459168769088, + "grad_norm": 1.311967953603668, + "learning_rate": 1.1584282457097417e-07, + "loss": 0.24650761485099792, + "step": 7186 + }, + { + "epoch": 1.9085114858584518, + "grad_norm": 1.3305923315328496, + "learning_rate": 1.1517738748022755e-07, + "loss": 0.22433717548847198, + "step": 7187 + }, + { + "epoch": 1.9087770548399947, + "grad_norm": 1.2666444248015347, + "learning_rate": 1.145138560667003e-07, + "loss": 0.20867910981178284, + "step": 7188 + }, + { + "epoch": 1.9090426238215377, + "grad_norm": 1.2511449541105855, + "learning_rate": 1.138522304583134e-07, + "loss": 0.21889618039131165, + "step": 7189 + }, + { + "epoch": 1.9093081928030806, + "grad_norm": 1.113107479716362, + "learning_rate": 1.1319251078261928e-07, + "loss": 0.19350749254226685, + "step": 7190 + }, + { + "epoch": 1.9095737617846236, + "grad_norm": 1.183265546980091, + "learning_rate": 1.125346971668051e-07, + "loss": 0.19123657047748566, + "step": 7191 + }, + { + "epoch": 1.9098393307661665, + "grad_norm": 1.2653223306994201, + "learning_rate": 1.118787897376905e-07, + "loss": 0.21433782577514648, + "step": 7192 + }, + { + "epoch": 1.9101048997477095, + "grad_norm": 1.474925382041675, + "learning_rate": 1.1122478862172437e-07, + "loss": 0.2521187663078308, + "step": 7193 + }, + { + "epoch": 1.9103704687292524, + "grad_norm": 1.2835872924926361, + "learning_rate": 1.1057269394499248e-07, + "loss": 0.2141486555337906, + "step": 7194 + }, + { + "epoch": 1.9106360377107954, + "grad_norm": 1.271472683987379, + "learning_rate": 1.0992250583320985e-07, + "loss": 0.22960343956947327, + "step": 7195 + }, + { + "epoch": 1.9109016066923383, + "grad_norm": 1.3433609684783299, + "learning_rate": 1.092742244117262e-07, + "loss": 0.21809744834899902, + "step": 7196 + }, + { + "epoch": 1.9111671756738813, + "grad_norm": 1.248347973820862, + "learning_rate": 1.0862784980552044e-07, + "loss": 0.22418212890625, + "step": 7197 + }, + { + "epoch": 1.9114327446554242, + "grad_norm": 1.2504701200893746, + "learning_rate": 1.0798338213920845e-07, + "loss": 0.22050701081752777, + "step": 7198 + }, + { + "epoch": 1.9116983136369672, + "grad_norm": 1.206849931438756, + "learning_rate": 1.0734082153703418e-07, + "loss": 0.23200345039367676, + "step": 7199 + }, + { + "epoch": 1.9119638826185101, + "grad_norm": 1.1102825382626649, + "learning_rate": 1.0670016812287631e-07, + "loss": 0.18366631865501404, + "step": 7200 + }, + { + "epoch": 1.912229451600053, + "grad_norm": 1.2844567521026582, + "learning_rate": 1.0606142202024605e-07, + "loss": 0.24362193048000336, + "step": 7201 + }, + { + "epoch": 1.912495020581596, + "grad_norm": 1.2822631921528913, + "learning_rate": 1.0542458335228601e-07, + "loss": 0.2216200977563858, + "step": 7202 + }, + { + "epoch": 1.912760589563139, + "grad_norm": 1.0921875359661608, + "learning_rate": 1.0478965224176907e-07, + "loss": 0.20216065645217896, + "step": 7203 + }, + { + "epoch": 1.913026158544682, + "grad_norm": 1.254966671592246, + "learning_rate": 1.041566288111051e-07, + "loss": 0.22054359316825867, + "step": 7204 + }, + { + "epoch": 1.913291727526225, + "grad_norm": 1.3532366246655447, + "learning_rate": 1.0352551318233206e-07, + "loss": 0.21569015085697174, + "step": 7205 + }, + { + "epoch": 1.9135572965077678, + "grad_norm": 1.2826756039782425, + "learning_rate": 1.028963054771226e-07, + "loss": 0.22967267036437988, + "step": 7206 + }, + { + "epoch": 1.9138228654893108, + "grad_norm": 1.3494789006319945, + "learning_rate": 1.0226900581677968e-07, + "loss": 0.2422460913658142, + "step": 7207 + }, + { + "epoch": 1.9140884344708538, + "grad_norm": 1.3606228589652338, + "learning_rate": 1.0164361432223879e-07, + "loss": 0.25891292095184326, + "step": 7208 + }, + { + "epoch": 1.914354003452397, + "grad_norm": 1.3570561855059022, + "learning_rate": 1.0102013111406905e-07, + "loss": 0.26915764808654785, + "step": 7209 + }, + { + "epoch": 1.9146195724339399, + "grad_norm": 1.3889996377213247, + "learning_rate": 1.0039855631247097e-07, + "loss": 0.2268485426902771, + "step": 7210 + }, + { + "epoch": 1.9148851414154828, + "grad_norm": 1.254622691077732, + "learning_rate": 9.977889003727647e-08, + "loss": 0.22551512718200684, + "step": 7211 + }, + { + "epoch": 1.9151507103970258, + "grad_norm": 1.233084698895248, + "learning_rate": 9.91611324079489e-08, + "loss": 0.24224743247032166, + "step": 7212 + }, + { + "epoch": 1.9154162793785687, + "grad_norm": 1.2426176239380708, + "learning_rate": 9.854528354358517e-08, + "loss": 0.19550879299640656, + "step": 7213 + }, + { + "epoch": 1.9156818483601117, + "grad_norm": 1.3449782320604147, + "learning_rate": 9.793134356291478e-08, + "loss": 0.24986523389816284, + "step": 7214 + }, + { + "epoch": 1.9159474173416546, + "grad_norm": 1.3340583070384961, + "learning_rate": 9.731931258429638e-08, + "loss": 0.2565170228481293, + "step": 7215 + }, + { + "epoch": 1.9162129863231976, + "grad_norm": 1.185156912642083, + "learning_rate": 9.670919072572449e-08, + "loss": 0.2166958749294281, + "step": 7216 + }, + { + "epoch": 1.9164785553047405, + "grad_norm": 1.2903999319183896, + "learning_rate": 9.610097810482166e-08, + "loss": 0.2002115249633789, + "step": 7217 + }, + { + "epoch": 1.9167441242862835, + "grad_norm": 1.1589813054229285, + "learning_rate": 9.549467483884412e-08, + "loss": 0.209486186504364, + "step": 7218 + }, + { + "epoch": 1.9170096932678264, + "grad_norm": 1.2748483155423624, + "learning_rate": 9.489028104468056e-08, + "loss": 0.22061321139335632, + "step": 7219 + }, + { + "epoch": 1.9172752622493694, + "grad_norm": 1.3916500275624957, + "learning_rate": 9.428779683885114e-08, + "loss": 0.21880047023296356, + "step": 7220 + }, + { + "epoch": 1.9175408312309123, + "grad_norm": 1.174801358834737, + "learning_rate": 9.368722233750849e-08, + "loss": 0.22674325108528137, + "step": 7221 + }, + { + "epoch": 1.9178064002124553, + "grad_norm": 1.2877078963500264, + "learning_rate": 9.308855765643332e-08, + "loss": 0.22100718319416046, + "step": 7222 + }, + { + "epoch": 1.9180719691939982, + "grad_norm": 1.3291196619762962, + "learning_rate": 9.249180291104553e-08, + "loss": 0.23105254769325256, + "step": 7223 + }, + { + "epoch": 1.9183375381755412, + "grad_norm": 1.2897395451200044, + "learning_rate": 9.189695821638755e-08, + "loss": 0.22483405470848083, + "step": 7224 + }, + { + "epoch": 1.9186031071570842, + "grad_norm": 1.0701399001286365, + "learning_rate": 9.130402368714208e-08, + "loss": 0.1939004510641098, + "step": 7225 + }, + { + "epoch": 1.918868676138627, + "grad_norm": 1.2349263677236755, + "learning_rate": 9.071299943761769e-08, + "loss": 0.21722440421581268, + "step": 7226 + }, + { + "epoch": 1.91913424512017, + "grad_norm": 1.2911544131515666, + "learning_rate": 9.012388558175877e-08, + "loss": 0.24213966727256775, + "step": 7227 + }, + { + "epoch": 1.919399814101713, + "grad_norm": 1.2266941536480729, + "learning_rate": 8.953668223313783e-08, + "loss": 0.2305546998977661, + "step": 7228 + }, + { + "epoch": 1.919665383083256, + "grad_norm": 1.3932840646040938, + "learning_rate": 8.895138950496207e-08, + "loss": 0.2678033709526062, + "step": 7229 + }, + { + "epoch": 1.919930952064799, + "grad_norm": 1.2449965535251106, + "learning_rate": 8.836800751006791e-08, + "loss": 0.2491014301776886, + "step": 7230 + }, + { + "epoch": 1.9201965210463419, + "grad_norm": 1.2551836576043742, + "learning_rate": 8.778653636092537e-08, + "loss": 0.21837326884269714, + "step": 7231 + }, + { + "epoch": 1.9204620900278848, + "grad_norm": 1.2745391136427304, + "learning_rate": 8.72069761696348e-08, + "loss": 0.24149999022483826, + "step": 7232 + }, + { + "epoch": 1.9207276590094278, + "grad_norm": 1.3444140835580012, + "learning_rate": 8.662932704792793e-08, + "loss": 0.2124684453010559, + "step": 7233 + }, + { + "epoch": 1.9209932279909707, + "grad_norm": 1.3660213009765734, + "learning_rate": 8.60535891071712e-08, + "loss": 0.2452150285243988, + "step": 7234 + }, + { + "epoch": 1.9212587969725137, + "grad_norm": 1.2005299446152509, + "learning_rate": 8.547976245835698e-08, + "loss": 0.23598846793174744, + "step": 7235 + }, + { + "epoch": 1.9215243659540566, + "grad_norm": 1.3152974069295431, + "learning_rate": 8.490784721211454e-08, + "loss": 0.2105225920677185, + "step": 7236 + }, + { + "epoch": 1.9217899349355996, + "grad_norm": 1.4424977304862223, + "learning_rate": 8.433784347870122e-08, + "loss": 0.2585388720035553, + "step": 7237 + }, + { + "epoch": 1.9220555039171425, + "grad_norm": 1.2300698994172445, + "learning_rate": 8.376975136800691e-08, + "loss": 0.21703900396823883, + "step": 7238 + }, + { + "epoch": 1.9223210728986855, + "grad_norm": 1.2580366958382383, + "learning_rate": 8.3203570989554e-08, + "loss": 0.22771210968494415, + "step": 7239 + }, + { + "epoch": 1.9225866418802284, + "grad_norm": 1.1645003525207898, + "learning_rate": 8.263930245249408e-08, + "loss": 0.22535575926303864, + "step": 7240 + }, + { + "epoch": 1.9228522108617714, + "grad_norm": 1.1822452042500315, + "learning_rate": 8.207694586561344e-08, + "loss": 0.2052595466375351, + "step": 7241 + }, + { + "epoch": 1.9231177798433143, + "grad_norm": 1.2683012213528768, + "learning_rate": 8.151650133732536e-08, + "loss": 0.19611456990242004, + "step": 7242 + }, + { + "epoch": 1.9233833488248573, + "grad_norm": 1.2762939262923303, + "learning_rate": 8.095796897567787e-08, + "loss": 0.20256826281547546, + "step": 7243 + }, + { + "epoch": 1.9236489178064002, + "grad_norm": 1.5444723931343434, + "learning_rate": 8.040134888835038e-08, + "loss": 0.25462138652801514, + "step": 7244 + }, + { + "epoch": 1.9239144867879432, + "grad_norm": 1.2813246309729553, + "learning_rate": 7.984664118265262e-08, + "loss": 0.27362316846847534, + "step": 7245 + }, + { + "epoch": 1.9241800557694861, + "grad_norm": 1.3526739723939418, + "learning_rate": 7.929384596552459e-08, + "loss": 0.23749098181724548, + "step": 7246 + }, + { + "epoch": 1.924445624751029, + "grad_norm": 1.3016147885306604, + "learning_rate": 7.874296334353882e-08, + "loss": 0.2472018599510193, + "step": 7247 + }, + { + "epoch": 1.924711193732572, + "grad_norm": 1.3451463766339227, + "learning_rate": 7.819399342290034e-08, + "loss": 0.23181989789009094, + "step": 7248 + }, + { + "epoch": 1.924976762714115, + "grad_norm": 1.2415200588572097, + "learning_rate": 7.764693630944231e-08, + "loss": 0.21363665163516998, + "step": 7249 + }, + { + "epoch": 1.925242331695658, + "grad_norm": 1.1849821155034532, + "learning_rate": 7.710179210863144e-08, + "loss": 0.21239221096038818, + "step": 7250 + }, + { + "epoch": 1.925507900677201, + "grad_norm": 1.4494720585200522, + "learning_rate": 7.655856092556591e-08, + "loss": 0.2643742263317108, + "step": 7251 + }, + { + "epoch": 1.9257734696587439, + "grad_norm": 1.251877664981762, + "learning_rate": 7.601724286497414e-08, + "loss": 0.2232428789138794, + "step": 7252 + }, + { + "epoch": 1.9260390386402868, + "grad_norm": 1.313277386530887, + "learning_rate": 7.547783803121489e-08, + "loss": 0.2052377462387085, + "step": 7253 + }, + { + "epoch": 1.9263046076218298, + "grad_norm": 1.2540878413614547, + "learning_rate": 7.494034652827942e-08, + "loss": 0.22194740176200867, + "step": 7254 + }, + { + "epoch": 1.9265701766033727, + "grad_norm": 1.2500554609811554, + "learning_rate": 7.440476845979038e-08, + "loss": 0.22004084289073944, + "step": 7255 + }, + { + "epoch": 1.9268357455849157, + "grad_norm": 1.5480704193409933, + "learning_rate": 7.387110392899965e-08, + "loss": 0.2218078374862671, + "step": 7256 + }, + { + "epoch": 1.9271013145664586, + "grad_norm": 1.3006193889830067, + "learning_rate": 7.33393530387927e-08, + "loss": 0.23272839188575745, + "step": 7257 + }, + { + "epoch": 1.9273668835480016, + "grad_norm": 1.3119971487868216, + "learning_rate": 7.280951589168417e-08, + "loss": 0.23666653037071228, + "step": 7258 + }, + { + "epoch": 1.9276324525295445, + "grad_norm": 1.235294099691234, + "learning_rate": 7.228159258982126e-08, + "loss": 0.21946533024311066, + "step": 7259 + }, + { + "epoch": 1.9278980215110875, + "grad_norm": 1.252328485116134, + "learning_rate": 7.175558323498033e-08, + "loss": 0.22158634662628174, + "step": 7260 + }, + { + "epoch": 1.9281635904926304, + "grad_norm": 1.1330771135999202, + "learning_rate": 7.123148792857026e-08, + "loss": 0.19978654384613037, + "step": 7261 + }, + { + "epoch": 1.9284291594741734, + "grad_norm": 1.2859436875650823, + "learning_rate": 7.070930677163023e-08, + "loss": 0.21197813749313354, + "step": 7262 + }, + { + "epoch": 1.9286947284557163, + "grad_norm": 1.2611518825786316, + "learning_rate": 7.018903986483083e-08, + "loss": 0.22650468349456787, + "step": 7263 + }, + { + "epoch": 1.9289602974372593, + "grad_norm": 1.2701948406662635, + "learning_rate": 6.967068730847293e-08, + "loss": 0.22257481515407562, + "step": 7264 + }, + { + "epoch": 1.9292258664188022, + "grad_norm": 1.3219742856760701, + "learning_rate": 6.915424920248992e-08, + "loss": 0.24899804592132568, + "step": 7265 + }, + { + "epoch": 1.9294914354003452, + "grad_norm": 1.2996576951077934, + "learning_rate": 6.863972564644328e-08, + "loss": 0.250610888004303, + "step": 7266 + }, + { + "epoch": 1.9297570043818881, + "grad_norm": 1.251137163804366, + "learning_rate": 6.81271167395292e-08, + "loss": 0.22786292433738708, + "step": 7267 + }, + { + "epoch": 1.930022573363431, + "grad_norm": 1.2890465128808872, + "learning_rate": 6.761642258056977e-08, + "loss": 0.22816789150238037, + "step": 7268 + }, + { + "epoch": 1.930288142344974, + "grad_norm": 1.3522601458627446, + "learning_rate": 6.7107643268024e-08, + "loss": 0.2589687407016754, + "step": 7269 + }, + { + "epoch": 1.930553711326517, + "grad_norm": 1.1963236616697677, + "learning_rate": 6.660077889997673e-08, + "loss": 0.2281583547592163, + "step": 7270 + }, + { + "epoch": 1.93081928030806, + "grad_norm": 1.3347065729182181, + "learning_rate": 6.60958295741454e-08, + "loss": 0.22833740711212158, + "step": 7271 + }, + { + "epoch": 1.931084849289603, + "grad_norm": 1.1611313283452582, + "learning_rate": 6.559279538787877e-08, + "loss": 0.20720313489437103, + "step": 7272 + }, + { + "epoch": 1.9313504182711458, + "grad_norm": 1.1884544288263172, + "learning_rate": 6.509167643815594e-08, + "loss": 0.17191773653030396, + "step": 7273 + }, + { + "epoch": 1.9316159872526888, + "grad_norm": 1.1354230474675757, + "learning_rate": 6.459247282158632e-08, + "loss": 0.23586943745613098, + "step": 7274 + }, + { + "epoch": 1.9318815562342317, + "grad_norm": 1.3318856895013969, + "learning_rate": 6.409518463441067e-08, + "loss": 0.21353168785572052, + "step": 7275 + }, + { + "epoch": 1.9321471252157747, + "grad_norm": 1.404937308132313, + "learning_rate": 6.359981197250009e-08, + "loss": 0.23148195445537567, + "step": 7276 + }, + { + "epoch": 1.9324126941973176, + "grad_norm": 1.3040478141172254, + "learning_rate": 6.310635493135709e-08, + "loss": 0.2113666534423828, + "step": 7277 + }, + { + "epoch": 1.9326782631788606, + "grad_norm": 1.3399999009479682, + "learning_rate": 6.261481360611332e-08, + "loss": 0.27689510583877563, + "step": 7278 + }, + { + "epoch": 1.9329438321604036, + "grad_norm": 1.2809237898551964, + "learning_rate": 6.2125188091533e-08, + "loss": 0.23746277391910553, + "step": 7279 + }, + { + "epoch": 1.9332094011419465, + "grad_norm": 1.4215326252349767, + "learning_rate": 6.163747848201062e-08, + "loss": 0.23123708367347717, + "step": 7280 + }, + { + "epoch": 1.9334749701234895, + "grad_norm": 1.3095914464878196, + "learning_rate": 6.115168487157097e-08, + "loss": 0.23640167713165283, + "step": 7281 + }, + { + "epoch": 1.9337405391050324, + "grad_norm": 1.3278235730632808, + "learning_rate": 6.066780735386801e-08, + "loss": 0.2259385585784912, + "step": 7282 + }, + { + "epoch": 1.9340061080865754, + "grad_norm": 1.230137664492021, + "learning_rate": 6.018584602218824e-08, + "loss": 0.219761461019516, + "step": 7283 + }, + { + "epoch": 1.9342716770681183, + "grad_norm": 1.43054331413576, + "learning_rate": 5.970580096944733e-08, + "loss": 0.24411989748477936, + "step": 7284 + }, + { + "epoch": 1.9345372460496613, + "grad_norm": 1.196712051616964, + "learning_rate": 5.922767228819459e-08, + "loss": 0.232415571808815, + "step": 7285 + }, + { + "epoch": 1.9348028150312042, + "grad_norm": 1.341424963494065, + "learning_rate": 5.875146007060517e-08, + "loss": 0.25938165187835693, + "step": 7286 + }, + { + "epoch": 1.9350683840127472, + "grad_norm": 1.253589726996753, + "learning_rate": 5.827716440848785e-08, + "loss": 0.22138425707817078, + "step": 7287 + }, + { + "epoch": 1.9353339529942901, + "grad_norm": 1.12038038288381, + "learning_rate": 5.7804785393282825e-08, + "loss": 0.19724398851394653, + "step": 7288 + }, + { + "epoch": 1.935599521975833, + "grad_norm": 1.4840167690508577, + "learning_rate": 5.7334323116056136e-08, + "loss": 0.25307583808898926, + "step": 7289 + }, + { + "epoch": 1.935865090957376, + "grad_norm": 1.2525903433235852, + "learning_rate": 5.686577766751078e-08, + "loss": 0.2436421811580658, + "step": 7290 + }, + { + "epoch": 1.936130659938919, + "grad_norm": 1.2518328182394873, + "learning_rate": 5.6399149137973394e-08, + "loss": 0.2164984941482544, + "step": 7291 + }, + { + "epoch": 1.936396228920462, + "grad_norm": 1.2277499731042363, + "learning_rate": 5.5934437617407576e-08, + "loss": 0.22526800632476807, + "step": 7292 + }, + { + "epoch": 1.936661797902005, + "grad_norm": 2.195756796154145, + "learning_rate": 5.547164319540277e-08, + "loss": 0.27787747979164124, + "step": 7293 + }, + { + "epoch": 1.936927366883548, + "grad_norm": 1.2647979578451993, + "learning_rate": 5.5010765961179825e-08, + "loss": 0.2188001275062561, + "step": 7294 + }, + { + "epoch": 1.937192935865091, + "grad_norm": 1.2454775538056309, + "learning_rate": 5.4551806003591e-08, + "loss": 0.22620335221290588, + "step": 7295 + }, + { + "epoch": 1.937458504846634, + "grad_norm": 1.186081247005514, + "learning_rate": 5.409476341111775e-08, + "loss": 0.20357783138751984, + "step": 7296 + }, + { + "epoch": 1.937724073828177, + "grad_norm": 1.2316030990526627, + "learning_rate": 5.3639638271872906e-08, + "loss": 0.22717830538749695, + "step": 7297 + }, + { + "epoch": 1.9379896428097199, + "grad_norm": 1.1600371116406252, + "learning_rate": 5.318643067360074e-08, + "loss": 0.20139163732528687, + "step": 7298 + }, + { + "epoch": 1.9382552117912628, + "grad_norm": 1.3377291184643103, + "learning_rate": 5.273514070367247e-08, + "loss": 0.2620807886123657, + "step": 7299 + }, + { + "epoch": 1.9385207807728058, + "grad_norm": 1.2240680803779018, + "learning_rate": 5.2285768449091834e-08, + "loss": 0.2102596014738083, + "step": 7300 + }, + { + "epoch": 1.9387863497543487, + "grad_norm": 1.3057613284367482, + "learning_rate": 5.183831399649175e-08, + "loss": 0.2105238288640976, + "step": 7301 + }, + { + "epoch": 1.9390519187358917, + "grad_norm": 1.2241670740951547, + "learning_rate": 5.1392777432138773e-08, + "loss": 0.22178848087787628, + "step": 7302 + }, + { + "epoch": 1.9393174877174346, + "grad_norm": 1.3648564311332518, + "learning_rate": 5.094915884192419e-08, + "loss": 0.23375345766544342, + "step": 7303 + }, + { + "epoch": 1.9395830566989776, + "grad_norm": 1.3411332724549108, + "learning_rate": 5.050745831137405e-08, + "loss": 0.22709332406520844, + "step": 7304 + }, + { + "epoch": 1.9398486256805205, + "grad_norm": 1.270429998105922, + "learning_rate": 5.0067675925642437e-08, + "loss": 0.2312362790107727, + "step": 7305 + }, + { + "epoch": 1.9401141946620635, + "grad_norm": 1.159162680689607, + "learning_rate": 4.962981176951376e-08, + "loss": 0.2014419138431549, + "step": 7306 + }, + { + "epoch": 1.9403797636436064, + "grad_norm": 1.4294147842238243, + "learning_rate": 4.9193865927404936e-08, + "loss": 0.23700466752052307, + "step": 7307 + }, + { + "epoch": 1.9406453326251494, + "grad_norm": 1.3814639969092575, + "learning_rate": 4.8759838483358745e-08, + "loss": 0.23362770676612854, + "step": 7308 + }, + { + "epoch": 1.9409109016066923, + "grad_norm": 1.4217349736822034, + "learning_rate": 4.832772952105269e-08, + "loss": 0.26057323813438416, + "step": 7309 + }, + { + "epoch": 1.9411764705882353, + "grad_norm": 1.1693504727058668, + "learning_rate": 4.789753912379014e-08, + "loss": 0.20954950153827667, + "step": 7310 + }, + { + "epoch": 1.9414420395697782, + "grad_norm": 1.1532528532836688, + "learning_rate": 4.746926737450919e-08, + "loss": 0.2100827842950821, + "step": 7311 + }, + { + "epoch": 1.9417076085513212, + "grad_norm": 1.2509560196931713, + "learning_rate": 4.7042914355773795e-08, + "loss": 0.216691792011261, + "step": 7312 + }, + { + "epoch": 1.9419731775328641, + "grad_norm": 1.2086430330598397, + "learning_rate": 4.6618480149780434e-08, + "loss": 0.22815749049186707, + "step": 7313 + }, + { + "epoch": 1.942238746514407, + "grad_norm": 1.3440658280324072, + "learning_rate": 4.6195964838353646e-08, + "loss": 0.23365731537342072, + "step": 7314 + }, + { + "epoch": 1.94250431549595, + "grad_norm": 1.5301363693806977, + "learning_rate": 4.577536850295161e-08, + "loss": 0.2112172693014145, + "step": 7315 + }, + { + "epoch": 1.942769884477493, + "grad_norm": 1.1945701714854287, + "learning_rate": 4.5356691224659466e-08, + "loss": 0.21821950376033783, + "step": 7316 + }, + { + "epoch": 1.943035453459036, + "grad_norm": 1.1491339078592526, + "learning_rate": 4.4939933084192646e-08, + "loss": 0.2374412566423416, + "step": 7317 + }, + { + "epoch": 1.943301022440579, + "grad_norm": 1.3549046355713708, + "learning_rate": 4.4525094161897987e-08, + "loss": 0.2483779489994049, + "step": 7318 + }, + { + "epoch": 1.9435665914221218, + "grad_norm": 1.327945477663327, + "learning_rate": 4.411217453775152e-08, + "loss": 0.23641882836818695, + "step": 7319 + }, + { + "epoch": 1.9438321604036648, + "grad_norm": 1.3586245026219714, + "learning_rate": 4.370117429135956e-08, + "loss": 0.24779492616653442, + "step": 7320 + }, + { + "epoch": 1.944097729385208, + "grad_norm": 1.1641395539357577, + "learning_rate": 4.329209350195651e-08, + "loss": 0.20288071036338806, + "step": 7321 + }, + { + "epoch": 1.944363298366751, + "grad_norm": 1.2676649817410126, + "learning_rate": 4.288493224840928e-08, + "loss": 0.24286144971847534, + "step": 7322 + }, + { + "epoch": 1.9446288673482939, + "grad_norm": 1.3164985028745375, + "learning_rate": 4.2479690609213976e-08, + "loss": 0.22825902700424194, + "step": 7323 + }, + { + "epoch": 1.9448944363298368, + "grad_norm": 1.255280762331411, + "learning_rate": 4.207636866249587e-08, + "loss": 0.22563335299491882, + "step": 7324 + }, + { + "epoch": 1.9451600053113798, + "grad_norm": 1.2990544857906836, + "learning_rate": 4.167496648601166e-08, + "loss": 0.22853273153305054, + "step": 7325 + }, + { + "epoch": 1.9454255742929227, + "grad_norm": 1.1281442356079434, + "learning_rate": 4.1275484157147216e-08, + "loss": 0.20790672302246094, + "step": 7326 + }, + { + "epoch": 1.9456911432744657, + "grad_norm": 1.1980029703513235, + "learning_rate": 4.087792175291649e-08, + "loss": 0.2165423035621643, + "step": 7327 + }, + { + "epoch": 1.9459567122560086, + "grad_norm": 1.3858946395294593, + "learning_rate": 4.048227934996485e-08, + "loss": 0.2605394721031189, + "step": 7328 + }, + { + "epoch": 1.9462222812375516, + "grad_norm": 1.280554987273632, + "learning_rate": 4.008855702456904e-08, + "loss": 0.22624900937080383, + "step": 7329 + }, + { + "epoch": 1.9464878502190945, + "grad_norm": 1.1967949808184344, + "learning_rate": 3.9696754852632804e-08, + "loss": 0.23086196184158325, + "step": 7330 + }, + { + "epoch": 1.9467534192006375, + "grad_norm": 1.4330145211347993, + "learning_rate": 3.9306872909691265e-08, + "loss": 0.24633410573005676, + "step": 7331 + }, + { + "epoch": 1.9470189881821804, + "grad_norm": 2.2568432653955894, + "learning_rate": 3.8918911270908745e-08, + "loss": 0.2535535395145416, + "step": 7332 + }, + { + "epoch": 1.9472845571637234, + "grad_norm": 1.3555855555438505, + "learning_rate": 3.853287001108097e-08, + "loss": 0.23904260993003845, + "step": 7333 + }, + { + "epoch": 1.9475501261452663, + "grad_norm": 1.3963340527453718, + "learning_rate": 3.814874920463063e-08, + "loss": 0.22525179386138916, + "step": 7334 + }, + { + "epoch": 1.9478156951268093, + "grad_norm": 1.415360473918547, + "learning_rate": 3.776654892561293e-08, + "loss": 0.21139883995056152, + "step": 7335 + }, + { + "epoch": 1.9480812641083523, + "grad_norm": 1.2272269269066283, + "learning_rate": 3.738626924771005e-08, + "loss": 0.21939310431480408, + "step": 7336 + }, + { + "epoch": 1.9483468330898952, + "grad_norm": 1.1845473795192814, + "learning_rate": 3.7007910244236664e-08, + "loss": 0.22852283716201782, + "step": 7337 + }, + { + "epoch": 1.9486124020714382, + "grad_norm": 1.2529721413425112, + "learning_rate": 3.663147198813666e-08, + "loss": 0.20769211649894714, + "step": 7338 + }, + { + "epoch": 1.948877971052981, + "grad_norm": 1.216093250313145, + "learning_rate": 3.625695455198086e-08, + "loss": 0.21721890568733215, + "step": 7339 + }, + { + "epoch": 1.949143540034524, + "grad_norm": 1.261493312403511, + "learning_rate": 3.588435800797263e-08, + "loss": 0.24236848950386047, + "step": 7340 + }, + { + "epoch": 1.949409109016067, + "grad_norm": 1.21142050375974, + "learning_rate": 3.5513682427944505e-08, + "loss": 0.2300192266702652, + "step": 7341 + }, + { + "epoch": 1.94967467799761, + "grad_norm": 1.1850825722481098, + "learning_rate": 3.5144927883358215e-08, + "loss": 0.21636728942394257, + "step": 7342 + }, + { + "epoch": 1.949940246979153, + "grad_norm": 1.3000939007920165, + "learning_rate": 3.477809444530578e-08, + "loss": 0.25367966294288635, + "step": 7343 + }, + { + "epoch": 1.9502058159606959, + "grad_norm": 1.4245768388392126, + "learning_rate": 3.4413182184507285e-08, + "loss": 0.24514247477054596, + "step": 7344 + }, + { + "epoch": 1.9504713849422388, + "grad_norm": 1.1048557155163508, + "learning_rate": 3.405019117131425e-08, + "loss": 0.18460404872894287, + "step": 7345 + }, + { + "epoch": 1.9507369539237818, + "grad_norm": 1.275062396510646, + "learning_rate": 3.3689121475706244e-08, + "loss": 0.2096845805644989, + "step": 7346 + }, + { + "epoch": 1.9510025229053247, + "grad_norm": 1.2314050158221594, + "learning_rate": 3.332997316729536e-08, + "loss": 0.22435057163238525, + "step": 7347 + }, + { + "epoch": 1.9512680918868677, + "grad_norm": 1.208912476805739, + "learning_rate": 3.2972746315318436e-08, + "loss": 0.20798128843307495, + "step": 7348 + }, + { + "epoch": 1.9515336608684106, + "grad_norm": 1.2922181556866412, + "learning_rate": 3.2617440988645945e-08, + "loss": 0.23958316445350647, + "step": 7349 + }, + { + "epoch": 1.9517992298499536, + "grad_norm": 1.3799363972113297, + "learning_rate": 3.2264057255777525e-08, + "loss": 0.21934574842453003, + "step": 7350 + }, + { + "epoch": 1.9520647988314965, + "grad_norm": 1.2014453671941887, + "learning_rate": 3.1912595184839804e-08, + "loss": 0.24321375787258148, + "step": 7351 + }, + { + "epoch": 1.9523303678130395, + "grad_norm": 1.1661737247347086, + "learning_rate": 3.156305484359079e-08, + "loss": 0.20932736992835999, + "step": 7352 + }, + { + "epoch": 1.9525959367945824, + "grad_norm": 1.2983329607047998, + "learning_rate": 3.12154362994177e-08, + "loss": 0.19824840128421783, + "step": 7353 + }, + { + "epoch": 1.9528615057761254, + "grad_norm": 1.3128795915591134, + "learning_rate": 3.0869739619338034e-08, + "loss": 0.212745800614357, + "step": 7354 + }, + { + "epoch": 1.9531270747576683, + "grad_norm": 1.247129470001585, + "learning_rate": 3.0525964869997374e-08, + "loss": 0.23044779896736145, + "step": 7355 + }, + { + "epoch": 1.9533926437392113, + "grad_norm": 1.2323689907378315, + "learning_rate": 3.018411211767158e-08, + "loss": 0.2237459123134613, + "step": 7356 + }, + { + "epoch": 1.9536582127207542, + "grad_norm": 1.3228713238231502, + "learning_rate": 2.984418142826684e-08, + "loss": 0.2592429518699646, + "step": 7357 + }, + { + "epoch": 1.9539237817022972, + "grad_norm": 1.1444806738907807, + "learning_rate": 2.9506172867315163e-08, + "loss": 0.17559123039245605, + "step": 7358 + }, + { + "epoch": 1.9541893506838401, + "grad_norm": 1.287127142439038, + "learning_rate": 2.917008649998332e-08, + "loss": 0.24143017828464508, + "step": 7359 + }, + { + "epoch": 1.954454919665383, + "grad_norm": 1.310526275865734, + "learning_rate": 2.883592239106392e-08, + "loss": 0.23560799658298492, + "step": 7360 + }, + { + "epoch": 1.954720488646926, + "grad_norm": 1.357586181070064, + "learning_rate": 2.8503680604979878e-08, + "loss": 0.2456119805574417, + "step": 7361 + }, + { + "epoch": 1.954986057628469, + "grad_norm": 1.2143945666113656, + "learning_rate": 2.817336120578329e-08, + "loss": 0.21878069639205933, + "step": 7362 + }, + { + "epoch": 1.955251626610012, + "grad_norm": 1.2288786099560105, + "learning_rate": 2.7844964257155438e-08, + "loss": 0.20496608316898346, + "step": 7363 + }, + { + "epoch": 1.955517195591555, + "grad_norm": 1.2067776880816419, + "learning_rate": 2.7518489822407902e-08, + "loss": 0.23219498991966248, + "step": 7364 + }, + { + "epoch": 1.9557827645730979, + "grad_norm": 1.3499865013336032, + "learning_rate": 2.7193937964481442e-08, + "loss": 0.2284272015094757, + "step": 7365 + }, + { + "epoch": 1.9560483335546408, + "grad_norm": 1.3177047034961433, + "learning_rate": 2.68713087459449e-08, + "loss": 0.22303974628448486, + "step": 7366 + }, + { + "epoch": 1.9563139025361838, + "grad_norm": 1.337791009624748, + "learning_rate": 2.655060222899741e-08, + "loss": 0.22489243745803833, + "step": 7367 + }, + { + "epoch": 1.9565794715177267, + "grad_norm": 1.2719472133739602, + "learning_rate": 2.6231818475468407e-08, + "loss": 0.27986854314804077, + "step": 7368 + }, + { + "epoch": 1.9568450404992697, + "grad_norm": 1.3884495118427658, + "learning_rate": 2.591495754681539e-08, + "loss": 0.29321208596229553, + "step": 7369 + }, + { + "epoch": 1.9571106094808126, + "grad_norm": 1.3942541242432065, + "learning_rate": 2.5600019504125053e-08, + "loss": 0.2560982406139374, + "step": 7370 + }, + { + "epoch": 1.9573761784623556, + "grad_norm": 1.4283472016053, + "learning_rate": 2.528700440811438e-08, + "loss": 0.264164537191391, + "step": 7371 + }, + { + "epoch": 1.9576417474438985, + "grad_norm": 1.1832183058517125, + "learning_rate": 2.4975912319127326e-08, + "loss": 0.2135474979877472, + "step": 7372 + }, + { + "epoch": 1.9579073164254415, + "grad_norm": 1.265205421311282, + "learning_rate": 2.466674329714036e-08, + "loss": 0.2100939154624939, + "step": 7373 + }, + { + "epoch": 1.9581728854069844, + "grad_norm": 1.395586955333931, + "learning_rate": 2.4359497401758026e-08, + "loss": 0.23327934741973877, + "step": 7374 + }, + { + "epoch": 1.9584384543885274, + "grad_norm": 1.0722904974981595, + "learning_rate": 2.405417469221183e-08, + "loss": 0.18830639123916626, + "step": 7375 + }, + { + "epoch": 1.9587040233700703, + "grad_norm": 1.284092871282835, + "learning_rate": 2.3750775227364686e-08, + "loss": 0.2558823227882385, + "step": 7376 + }, + { + "epoch": 1.9589695923516133, + "grad_norm": 1.2598399224501151, + "learning_rate": 2.3449299065710917e-08, + "loss": 0.24241580069065094, + "step": 7377 + }, + { + "epoch": 1.9592351613331562, + "grad_norm": 1.1684337819721369, + "learning_rate": 2.3149746265368478e-08, + "loss": 0.21678534150123596, + "step": 7378 + }, + { + "epoch": 1.9595007303146992, + "grad_norm": 1.2804084693654512, + "learning_rate": 2.2852116884088947e-08, + "loss": 0.20956794917583466, + "step": 7379 + }, + { + "epoch": 1.9597662992962421, + "grad_norm": 1.2682321373225172, + "learning_rate": 2.2556410979253095e-08, + "loss": 0.2185555249452591, + "step": 7380 + }, + { + "epoch": 1.960031868277785, + "grad_norm": 1.3369178147645102, + "learning_rate": 2.226262860786643e-08, + "loss": 0.21802933514118195, + "step": 7381 + }, + { + "epoch": 1.960297437259328, + "grad_norm": 1.4565773631347612, + "learning_rate": 2.1970769826570317e-08, + "loss": 0.22842684388160706, + "step": 7382 + }, + { + "epoch": 1.960563006240871, + "grad_norm": 1.2737807469252465, + "learning_rate": 2.1680834691628627e-08, + "loss": 0.23380814492702484, + "step": 7383 + }, + { + "epoch": 1.960828575222414, + "grad_norm": 1.311531421948895, + "learning_rate": 2.1392823258938877e-08, + "loss": 0.23476335406303406, + "step": 7384 + }, + { + "epoch": 1.961094144203957, + "grad_norm": 1.2100451325455786, + "learning_rate": 2.110673558402554e-08, + "loss": 0.19657662510871887, + "step": 7385 + }, + { + "epoch": 1.9613597131854998, + "grad_norm": 1.191542044024077, + "learning_rate": 2.0822571722044494e-08, + "loss": 0.1724000722169876, + "step": 7386 + }, + { + "epoch": 1.9616252821670428, + "grad_norm": 1.3535695538712786, + "learning_rate": 2.0540331727777475e-08, + "loss": 0.22960031032562256, + "step": 7387 + }, + { + "epoch": 1.9618908511485857, + "grad_norm": 1.4028518726902017, + "learning_rate": 2.0260015655637623e-08, + "loss": 0.2601638436317444, + "step": 7388 + }, + { + "epoch": 1.9621564201301287, + "grad_norm": 1.3907771240802078, + "learning_rate": 1.998162355966726e-08, + "loss": 0.2562445402145386, + "step": 7389 + }, + { + "epoch": 1.9624219891116716, + "grad_norm": 1.1881922077977833, + "learning_rate": 1.9705155493535688e-08, + "loss": 0.20073221623897552, + "step": 7390 + }, + { + "epoch": 1.9626875580932146, + "grad_norm": 1.2076860773847395, + "learning_rate": 1.9430611510544707e-08, + "loss": 0.18454071879386902, + "step": 7391 + }, + { + "epoch": 1.9629531270747576, + "grad_norm": 1.1878203901407238, + "learning_rate": 1.915799166362087e-08, + "loss": 0.18515023589134216, + "step": 7392 + }, + { + "epoch": 1.9632186960563005, + "grad_norm": 1.3323308983960227, + "learning_rate": 1.8887296005323242e-08, + "loss": 0.25658512115478516, + "step": 7393 + }, + { + "epoch": 1.9634842650378435, + "grad_norm": 1.4122913637661163, + "learning_rate": 1.861852458783897e-08, + "loss": 0.2219933569431305, + "step": 7394 + }, + { + "epoch": 1.9637498340193864, + "grad_norm": 1.3005286775146463, + "learning_rate": 1.8351677462983276e-08, + "loss": 0.24949616193771362, + "step": 7395 + }, + { + "epoch": 1.9640154030009294, + "grad_norm": 1.4026906711741571, + "learning_rate": 1.808675468220167e-08, + "loss": 0.24348726868629456, + "step": 7396 + }, + { + "epoch": 1.9642809719824723, + "grad_norm": 1.3848607909391346, + "learning_rate": 1.782375629656885e-08, + "loss": 0.2329033762216568, + "step": 7397 + }, + { + "epoch": 1.9645465409640153, + "grad_norm": 1.2075544796662319, + "learning_rate": 1.7562682356786488e-08, + "loss": 0.22265426814556122, + "step": 7398 + }, + { + "epoch": 1.9648121099455582, + "grad_norm": 1.2895787739524316, + "learning_rate": 1.730353291318654e-08, + "loss": 0.24438990652561188, + "step": 7399 + }, + { + "epoch": 1.9650776789271012, + "grad_norm": 1.3518107746112518, + "learning_rate": 1.704630801573015e-08, + "loss": 0.2632136642932892, + "step": 7400 + }, + { + "epoch": 1.9653432479086441, + "grad_norm": 1.3377019916165274, + "learning_rate": 1.6791007714008766e-08, + "loss": 0.22230927646160126, + "step": 7401 + }, + { + "epoch": 1.965608816890187, + "grad_norm": 1.3577982430958546, + "learning_rate": 1.653763205723968e-08, + "loss": 0.26317098736763, + "step": 7402 + }, + { + "epoch": 1.96587438587173, + "grad_norm": 1.3261620865973216, + "learning_rate": 1.628618109427049e-08, + "loss": 0.23205846548080444, + "step": 7403 + }, + { + "epoch": 1.966139954853273, + "grad_norm": 1.1507090645553337, + "learning_rate": 1.6036654873579084e-08, + "loss": 0.202583909034729, + "step": 7404 + }, + { + "epoch": 1.966405523834816, + "grad_norm": 1.3959078486467311, + "learning_rate": 1.5789053443270308e-08, + "loss": 0.2579672038555145, + "step": 7405 + }, + { + "epoch": 1.966671092816359, + "grad_norm": 1.4293268160842907, + "learning_rate": 1.5543376851080428e-08, + "loss": 0.27483606338500977, + "step": 7406 + }, + { + "epoch": 1.966936661797902, + "grad_norm": 1.6466914863601023, + "learning_rate": 1.5299625144370444e-08, + "loss": 0.22510311007499695, + "step": 7407 + }, + { + "epoch": 1.967202230779445, + "grad_norm": 1.3926470224592478, + "learning_rate": 1.505779837013499e-08, + "loss": 0.24941131472587585, + "step": 7408 + }, + { + "epoch": 1.967467799760988, + "grad_norm": 1.316826202799614, + "learning_rate": 1.481789657499344e-08, + "loss": 0.22301170229911804, + "step": 7409 + }, + { + "epoch": 1.967733368742531, + "grad_norm": 1.4513024231529628, + "learning_rate": 1.4579919805198795e-08, + "loss": 0.23045194149017334, + "step": 7410 + }, + { + "epoch": 1.9679989377240739, + "grad_norm": 1.2632313332378347, + "learning_rate": 1.4343868106627689e-08, + "loss": 0.25892990827560425, + "step": 7411 + }, + { + "epoch": 1.9682645067056168, + "grad_norm": 1.316940344896203, + "learning_rate": 1.4109741524788167e-08, + "loss": 0.23086567223072052, + "step": 7412 + }, + { + "epoch": 1.9685300756871598, + "grad_norm": 1.2838593122102535, + "learning_rate": 1.3877540104818566e-08, + "loss": 0.2514735460281372, + "step": 7413 + }, + { + "epoch": 1.9687956446687027, + "grad_norm": 1.2787980812943278, + "learning_rate": 1.3647263891484187e-08, + "loss": 0.21824213862419128, + "step": 7414 + }, + { + "epoch": 1.9690612136502457, + "grad_norm": 1.3351479110439386, + "learning_rate": 1.3418912929178407e-08, + "loss": 0.2262609452009201, + "step": 7415 + }, + { + "epoch": 1.9693267826317886, + "grad_norm": 1.2373165426791106, + "learning_rate": 1.3192487261926013e-08, + "loss": 0.23119492828845978, + "step": 7416 + }, + { + "epoch": 1.9695923516133316, + "grad_norm": 1.2213219567044962, + "learning_rate": 1.2967986933378751e-08, + "loss": 0.20173534750938416, + "step": 7417 + }, + { + "epoch": 1.9698579205948745, + "grad_norm": 1.3102471335629409, + "learning_rate": 1.2745411986816447e-08, + "loss": 0.2212662547826767, + "step": 7418 + }, + { + "epoch": 1.9701234895764175, + "grad_norm": 1.2461352597734543, + "learning_rate": 1.2524762465151442e-08, + "loss": 0.21990706026554108, + "step": 7419 + }, + { + "epoch": 1.9703890585579604, + "grad_norm": 1.2130065240866306, + "learning_rate": 1.2306038410919707e-08, + "loss": 0.18648189306259155, + "step": 7420 + }, + { + "epoch": 1.9706546275395034, + "grad_norm": 1.334350070832243, + "learning_rate": 1.2089239866289737e-08, + "loss": 0.23273484408855438, + "step": 7421 + }, + { + "epoch": 1.9709201965210463, + "grad_norm": 1.3083344252475524, + "learning_rate": 1.1874366873059206e-08, + "loss": 0.21514324843883514, + "step": 7422 + }, + { + "epoch": 1.9711857655025893, + "grad_norm": 1.2628839077455776, + "learning_rate": 1.1661419472650538e-08, + "loss": 0.2544926106929779, + "step": 7423 + }, + { + "epoch": 1.9714513344841322, + "grad_norm": 1.1881271398224822, + "learning_rate": 1.1450397706119776e-08, + "loss": 0.235082745552063, + "step": 7424 + }, + { + "epoch": 1.9717169034656752, + "grad_norm": 1.3712056139426412, + "learning_rate": 1.1241301614147715e-08, + "loss": 0.24777358770370483, + "step": 7425 + }, + { + "epoch": 1.9719824724472181, + "grad_norm": 1.5271853101134352, + "learning_rate": 1.1034131237045443e-08, + "loss": 0.23714174330234528, + "step": 7426 + }, + { + "epoch": 1.972248041428761, + "grad_norm": 1.3430700979817631, + "learning_rate": 1.0828886614754342e-08, + "loss": 0.24665668606758118, + "step": 7427 + }, + { + "epoch": 1.972513610410304, + "grad_norm": 1.3931055934155485, + "learning_rate": 1.062556778684276e-08, + "loss": 0.23421131074428558, + "step": 7428 + }, + { + "epoch": 1.972779179391847, + "grad_norm": 1.274566697934482, + "learning_rate": 1.0424174792508234e-08, + "loss": 0.23443526029586792, + "step": 7429 + }, + { + "epoch": 1.97304474837339, + "grad_norm": 1.3315316306417777, + "learning_rate": 1.0224707670576373e-08, + "loss": 0.24177192151546478, + "step": 7430 + }, + { + "epoch": 1.973310317354933, + "grad_norm": 1.4439736433803494, + "learning_rate": 1.002716645950197e-08, + "loss": 0.20957472920417786, + "step": 7431 + }, + { + "epoch": 1.9735758863364758, + "grad_norm": 1.2252184749081894, + "learning_rate": 9.831551197370116e-09, + "loss": 0.21594710648059845, + "step": 7432 + }, + { + "epoch": 1.9738414553180188, + "grad_norm": 1.4445839220306718, + "learning_rate": 9.637861921891756e-09, + "loss": 0.2372155487537384, + "step": 7433 + }, + { + "epoch": 1.974107024299562, + "grad_norm": 1.295551996082086, + "learning_rate": 9.446098670408132e-09, + "loss": 0.211237370967865, + "step": 7434 + }, + { + "epoch": 1.974372593281105, + "grad_norm": 1.3006326416512255, + "learning_rate": 9.256261479888562e-09, + "loss": 0.25123757123947144, + "step": 7435 + }, + { + "epoch": 1.9746381622626479, + "grad_norm": 1.2670719422156809, + "learning_rate": 9.068350386932655e-09, + "loss": 0.23048831522464752, + "step": 7436 + }, + { + "epoch": 1.9749037312441908, + "grad_norm": 1.2157385411321804, + "learning_rate": 8.882365427765883e-09, + "loss": 0.22923544049263, + "step": 7437 + }, + { + "epoch": 1.9751693002257338, + "grad_norm": 1.1040485462060259, + "learning_rate": 8.698306638245114e-09, + "loss": 0.199529767036438, + "step": 7438 + }, + { + "epoch": 1.9754348692072767, + "grad_norm": 1.314383264088006, + "learning_rate": 8.516174053854187e-09, + "loss": 0.22778059542179108, + "step": 7439 + }, + { + "epoch": 1.9757004381888197, + "grad_norm": 1.3428968973890816, + "learning_rate": 8.335967709706128e-09, + "loss": 0.22807848453521729, + "step": 7440 + }, + { + "epoch": 1.9759660071703626, + "grad_norm": 1.3347725648799278, + "learning_rate": 8.157687640543143e-09, + "loss": 0.24764932692050934, + "step": 7441 + }, + { + "epoch": 1.9762315761519056, + "grad_norm": 1.376463462320243, + "learning_rate": 7.98133388073552e-09, + "loss": 0.22213312983512878, + "step": 7442 + }, + { + "epoch": 1.9764971451334485, + "grad_norm": 1.2799794398059858, + "learning_rate": 7.806906464281617e-09, + "loss": 0.22822709381580353, + "step": 7443 + }, + { + "epoch": 1.9767627141149915, + "grad_norm": 1.2148981447749936, + "learning_rate": 7.634405424808977e-09, + "loss": 0.2236599326133728, + "step": 7444 + }, + { + "epoch": 1.9770282830965344, + "grad_norm": 1.263255403192069, + "learning_rate": 7.463830795574334e-09, + "loss": 0.20294487476348877, + "step": 7445 + }, + { + "epoch": 1.9772938520780774, + "grad_norm": 1.3034015114742201, + "learning_rate": 7.295182609461382e-09, + "loss": 0.2187870740890503, + "step": 7446 + }, + { + "epoch": 1.9775594210596203, + "grad_norm": 1.362800468373944, + "learning_rate": 7.128460898984113e-09, + "loss": 0.2629002630710602, + "step": 7447 + }, + { + "epoch": 1.9778249900411633, + "grad_norm": 1.3155096560899557, + "learning_rate": 6.963665696285704e-09, + "loss": 0.24024136364459991, + "step": 7448 + }, + { + "epoch": 1.9780905590227063, + "grad_norm": 1.240780926418524, + "learning_rate": 6.800797033134077e-09, + "loss": 0.22334401309490204, + "step": 7449 + }, + { + "epoch": 1.9783561280042492, + "grad_norm": 1.2853076050759633, + "learning_rate": 6.639854940930779e-09, + "loss": 0.21535055339336395, + "step": 7450 + }, + { + "epoch": 1.9786216969857922, + "grad_norm": 1.3182931470109147, + "learning_rate": 6.480839450703214e-09, + "loss": 0.26096785068511963, + "step": 7451 + }, + { + "epoch": 1.978887265967335, + "grad_norm": 1.2393293544951642, + "learning_rate": 6.323750593106859e-09, + "loss": 0.22461384534835815, + "step": 7452 + }, + { + "epoch": 1.979152834948878, + "grad_norm": 1.2999818118404687, + "learning_rate": 6.168588398426378e-09, + "loss": 0.24372713267803192, + "step": 7453 + }, + { + "epoch": 1.979418403930421, + "grad_norm": 1.2743158428703243, + "learning_rate": 6.015352896576732e-09, + "loss": 0.19544872641563416, + "step": 7454 + }, + { + "epoch": 1.979683972911964, + "grad_norm": 1.1957228310016947, + "learning_rate": 5.864044117097623e-09, + "loss": 0.22004768252372742, + "step": 7455 + }, + { + "epoch": 1.979949541893507, + "grad_norm": 1.3624679399119848, + "learning_rate": 5.714662089162381e-09, + "loss": 0.2509492337703705, + "step": 7456 + }, + { + "epoch": 1.9802151108750499, + "grad_norm": 1.1563599654889156, + "learning_rate": 5.567206841567974e-09, + "loss": 0.19315078854560852, + "step": 7457 + }, + { + "epoch": 1.9804806798565928, + "grad_norm": 1.1652222675857882, + "learning_rate": 5.421678402741659e-09, + "loss": 0.20722024142742157, + "step": 7458 + }, + { + "epoch": 1.9807462488381358, + "grad_norm": 1.2430974429352135, + "learning_rate": 5.278076800742105e-09, + "loss": 0.2041238397359848, + "step": 7459 + }, + { + "epoch": 1.9810118178196787, + "grad_norm": 1.226308526828602, + "learning_rate": 5.136402063251611e-09, + "loss": 0.21889238059520721, + "step": 7460 + }, + { + "epoch": 1.9812773868012217, + "grad_norm": 1.2925316754685727, + "learning_rate": 4.996654217584995e-09, + "loss": 0.23580557107925415, + "step": 7461 + }, + { + "epoch": 1.9815429557827646, + "grad_norm": 1.5912986799887796, + "learning_rate": 4.858833290684039e-09, + "loss": 0.24967315793037415, + "step": 7462 + }, + { + "epoch": 1.9818085247643076, + "grad_norm": 1.3642305983011473, + "learning_rate": 4.722939309116381e-09, + "loss": 0.21802274882793427, + "step": 7463 + }, + { + "epoch": 1.9820740937458505, + "grad_norm": 1.2778589071361273, + "learning_rate": 4.588972299084393e-09, + "loss": 0.2641376554965973, + "step": 7464 + }, + { + "epoch": 1.9823396627273935, + "grad_norm": 1.181293128126433, + "learning_rate": 4.456932286412974e-09, + "loss": 0.20166629552841187, + "step": 7465 + }, + { + "epoch": 1.9826052317089364, + "grad_norm": 1.3531318882305197, + "learning_rate": 4.3268192965573164e-09, + "loss": 0.22796592116355896, + "step": 7466 + }, + { + "epoch": 1.9828708006904794, + "grad_norm": 1.1849961491022751, + "learning_rate": 4.19863335460402e-09, + "loss": 0.19833455979824066, + "step": 7467 + }, + { + "epoch": 1.9831363696720223, + "grad_norm": 1.273561592311718, + "learning_rate": 4.07237448526554e-09, + "loss": 0.23009257018566132, + "step": 7468 + }, + { + "epoch": 1.9834019386535653, + "grad_norm": 1.2188380225442625, + "learning_rate": 3.9480427128812945e-09, + "loss": 0.22418440878391266, + "step": 7469 + }, + { + "epoch": 1.9836675076351082, + "grad_norm": 1.2878640211544259, + "learning_rate": 3.825638061421e-09, + "loss": 0.2015800178050995, + "step": 7470 + }, + { + "epoch": 1.9839330766166512, + "grad_norm": 1.2488639013131106, + "learning_rate": 3.705160554485776e-09, + "loss": 0.22166767716407776, + "step": 7471 + }, + { + "epoch": 1.9841986455981941, + "grad_norm": 1.476152466944419, + "learning_rate": 3.5866102152981586e-09, + "loss": 0.3154509961605072, + "step": 7472 + }, + { + "epoch": 1.984464214579737, + "grad_norm": 1.3338840715084874, + "learning_rate": 3.4699870667165292e-09, + "loss": 0.25891417264938354, + "step": 7473 + }, + { + "epoch": 1.98472978356128, + "grad_norm": 1.2984805204003045, + "learning_rate": 3.355291131222904e-09, + "loss": 0.24837851524353027, + "step": 7474 + }, + { + "epoch": 1.984995352542823, + "grad_norm": 1.2923319105031845, + "learning_rate": 3.2425224309307055e-09, + "loss": 0.24254213273525238, + "step": 7475 + }, + { + "epoch": 1.985260921524366, + "grad_norm": 1.3479980629574153, + "learning_rate": 3.1316809875781005e-09, + "loss": 0.24822884798049927, + "step": 7476 + }, + { + "epoch": 1.985526490505909, + "grad_norm": 1.2515754926310612, + "learning_rate": 3.022766822535772e-09, + "loss": 0.19553488492965698, + "step": 7477 + }, + { + "epoch": 1.9857920594874519, + "grad_norm": 1.289139949226706, + "learning_rate": 2.9157799568002576e-09, + "loss": 0.24758943915367126, + "step": 7478 + }, + { + "epoch": 1.9860576284689948, + "grad_norm": 1.3254058481790592, + "learning_rate": 2.810720410998391e-09, + "loss": 0.22947746515274048, + "step": 7479 + }, + { + "epoch": 1.9863231974505378, + "grad_norm": 1.1718425441422213, + "learning_rate": 2.7075882053828605e-09, + "loss": 0.20573696494102478, + "step": 7480 + }, + { + "epoch": 1.9865887664320807, + "grad_norm": 1.3248019948595686, + "learning_rate": 2.606383359837761e-09, + "loss": 0.2547800838947296, + "step": 7481 + }, + { + "epoch": 1.9868543354136237, + "grad_norm": 1.3239089800396548, + "learning_rate": 2.507105893874151e-09, + "loss": 0.22227191925048828, + "step": 7482 + }, + { + "epoch": 1.9871199043951666, + "grad_norm": 1.379027057566697, + "learning_rate": 2.409755826630056e-09, + "loss": 0.24687603116035461, + "step": 7483 + }, + { + "epoch": 1.9873854733767096, + "grad_norm": 1.3626347731044859, + "learning_rate": 2.3143331768749053e-09, + "loss": 0.23577818274497986, + "step": 7484 + }, + { + "epoch": 1.9876510423582525, + "grad_norm": 1.2429616783261994, + "learning_rate": 2.2208379630039858e-09, + "loss": 0.23012465238571167, + "step": 7485 + }, + { + "epoch": 1.9879166113397955, + "grad_norm": 1.2667278392117014, + "learning_rate": 2.129270203043987e-09, + "loss": 0.21479251980781555, + "step": 7486 + }, + { + "epoch": 1.9881821803213384, + "grad_norm": 1.2419157692275362, + "learning_rate": 2.039629914645236e-09, + "loss": 0.24436548352241516, + "step": 7487 + }, + { + "epoch": 1.9884477493028814, + "grad_norm": 1.3198752588445606, + "learning_rate": 1.951917115091684e-09, + "loss": 0.22225134074687958, + "step": 7488 + }, + { + "epoch": 1.9887133182844243, + "grad_norm": 1.4243538533938824, + "learning_rate": 1.8661318212920275e-09, + "loss": 0.22320827841758728, + "step": 7489 + }, + { + "epoch": 1.9889788872659673, + "grad_norm": 1.3025984911365984, + "learning_rate": 1.7822740497852597e-09, + "loss": 0.2317924201488495, + "step": 7490 + }, + { + "epoch": 1.9892444562475102, + "grad_norm": 1.370204940685918, + "learning_rate": 1.700343816738448e-09, + "loss": 0.2275170385837555, + "step": 7491 + }, + { + "epoch": 1.9895100252290532, + "grad_norm": 1.652167024814656, + "learning_rate": 1.6203411379456247e-09, + "loss": 0.24541540443897247, + "step": 7492 + }, + { + "epoch": 1.9897755942105961, + "grad_norm": 1.311164124852614, + "learning_rate": 1.5422660288322288e-09, + "loss": 0.23041896522045135, + "step": 7493 + }, + { + "epoch": 1.990041163192139, + "grad_norm": 1.301476042648128, + "learning_rate": 1.4661185044484438e-09, + "loss": 0.22362437844276428, + "step": 7494 + }, + { + "epoch": 1.990306732173682, + "grad_norm": 1.1872303288026824, + "learning_rate": 1.3918985794747486e-09, + "loss": 0.22082944214344025, + "step": 7495 + }, + { + "epoch": 1.990572301155225, + "grad_norm": 1.2985516009859217, + "learning_rate": 1.3196062682208078e-09, + "loss": 0.2210516780614853, + "step": 7496 + }, + { + "epoch": 1.990837870136768, + "grad_norm": 1.2609254238659025, + "learning_rate": 1.249241584623251e-09, + "loss": 0.21891455352306366, + "step": 7497 + }, + { + "epoch": 1.991103439118311, + "grad_norm": 1.2687100133579783, + "learning_rate": 1.1808045422478932e-09, + "loss": 0.23363247513771057, + "step": 7498 + }, + { + "epoch": 1.9913690080998538, + "grad_norm": 1.188481032582791, + "learning_rate": 1.1142951542875146e-09, + "loss": 0.20676104724407196, + "step": 7499 + }, + { + "epoch": 1.9916345770813968, + "grad_norm": 1.2983095103442552, + "learning_rate": 1.0497134335663018e-09, + "loss": 0.23037788271903992, + "step": 7500 + }, + { + "epoch": 1.9919001460629397, + "grad_norm": 1.1706822471326355, + "learning_rate": 9.870593925320748e-10, + "loss": 0.21958573162555695, + "step": 7501 + }, + { + "epoch": 1.9921657150444827, + "grad_norm": 1.3574206120623875, + "learning_rate": 9.263330432662809e-10, + "loss": 0.23280993103981018, + "step": 7502 + }, + { + "epoch": 1.9924312840260257, + "grad_norm": 1.2662411212973668, + "learning_rate": 8.675343974762219e-10, + "loss": 0.2254818230867386, + "step": 7503 + }, + { + "epoch": 1.9926968530075686, + "grad_norm": 1.255709874874282, + "learning_rate": 8.106634664950541e-10, + "loss": 0.1850586235523224, + "step": 7504 + }, + { + "epoch": 1.9929624219891116, + "grad_norm": 1.1965362861662039, + "learning_rate": 7.557202612895609e-10, + "loss": 0.21080443263053894, + "step": 7505 + }, + { + "epoch": 1.9932279909706545, + "grad_norm": 1.2788710791805473, + "learning_rate": 7.027047924512698e-10, + "loss": 0.21604907512664795, + "step": 7506 + }, + { + "epoch": 1.9934935599521975, + "grad_norm": 1.287068201404914, + "learning_rate": 6.516170701997837e-10, + "loss": 0.24684564769268036, + "step": 7507 + }, + { + "epoch": 1.9937591289337404, + "grad_norm": 1.2013851004960618, + "learning_rate": 6.024571043861116e-10, + "loss": 0.21735510230064392, + "step": 7508 + }, + { + "epoch": 1.9940246979152834, + "grad_norm": 1.2853945699676002, + "learning_rate": 5.552249044860069e-10, + "loss": 0.23616179823875427, + "step": 7509 + }, + { + "epoch": 1.9942902668968263, + "grad_norm": 1.280261468721699, + "learning_rate": 5.099204796066293e-10, + "loss": 0.23930129408836365, + "step": 7510 + }, + { + "epoch": 1.9945558358783693, + "grad_norm": 1.30216307212454, + "learning_rate": 4.665438384809928e-10, + "loss": 0.2354714274406433, + "step": 7511 + }, + { + "epoch": 1.9948214048599122, + "grad_norm": 1.4489462806357751, + "learning_rate": 4.250949894724077e-10, + "loss": 0.28315576910972595, + "step": 7512 + }, + { + "epoch": 1.9950869738414552, + "grad_norm": 1.1749720994980957, + "learning_rate": 3.8557394057114895e-10, + "loss": 0.19599778950214386, + "step": 7513 + }, + { + "epoch": 1.9953525428229981, + "grad_norm": 1.5080290285974376, + "learning_rate": 3.4798069939667725e-10, + "loss": 0.2295808494091034, + "step": 7514 + }, + { + "epoch": 1.995618111804541, + "grad_norm": 1.2840127096725462, + "learning_rate": 3.1231527319763864e-10, + "loss": 0.23212578892707825, + "step": 7515 + }, + { + "epoch": 1.995883680786084, + "grad_norm": 1.2763709143213344, + "learning_rate": 2.78577668847424e-10, + "loss": 0.2408447265625, + "step": 7516 + }, + { + "epoch": 1.996149249767627, + "grad_norm": 1.325995428985527, + "learning_rate": 2.4676789285305034e-10, + "loss": 0.25482073426246643, + "step": 7517 + }, + { + "epoch": 1.9964148187491702, + "grad_norm": 1.2453043840474796, + "learning_rate": 2.1688595134516932e-10, + "loss": 0.21228459477424622, + "step": 7518 + }, + { + "epoch": 1.996680387730713, + "grad_norm": 1.3949495270151018, + "learning_rate": 1.8893185008472814e-10, + "loss": 0.2467353343963623, + "step": 7519 + }, + { + "epoch": 1.996945956712256, + "grad_norm": 1.3819791453502894, + "learning_rate": 1.6290559446185962e-10, + "loss": 0.24475792050361633, + "step": 7520 + }, + { + "epoch": 1.997211525693799, + "grad_norm": 1.3766398068169023, + "learning_rate": 1.3880718949366155e-10, + "loss": 0.24821621179580688, + "step": 7521 + }, + { + "epoch": 1.997477094675342, + "grad_norm": 1.2860965423885737, + "learning_rate": 1.1663663982530715e-10, + "loss": 0.24725303053855896, + "step": 7522 + }, + { + "epoch": 1.997742663656885, + "grad_norm": 1.2302869290522314, + "learning_rate": 9.639394973226523e-11, + "loss": 0.2319290041923523, + "step": 7523 + }, + { + "epoch": 1.9980082326384279, + "grad_norm": 1.3169058540691405, + "learning_rate": 7.807912311696974e-11, + "loss": 0.22183239459991455, + "step": 7524 + }, + { + "epoch": 1.9982738016199708, + "grad_norm": 1.3038532813647647, + "learning_rate": 6.169216350881968e-11, + "loss": 0.2154427468776703, + "step": 7525 + }, + { + "epoch": 1.9985393706015138, + "grad_norm": 1.3153427866812037, + "learning_rate": 4.723307406973021e-11, + "loss": 0.22269389033317566, + "step": 7526 + }, + { + "epoch": 1.9988049395830567, + "grad_norm": 1.1809886655167368, + "learning_rate": 3.4701857584140686e-11, + "loss": 0.20317527651786804, + "step": 7527 + }, + { + "epoch": 1.9990705085645997, + "grad_norm": 1.2813479125348537, + "learning_rate": 2.409851647011685e-11, + "loss": 0.20792551338672638, + "step": 7528 + }, + { + "epoch": 1.9993360775461426, + "grad_norm": 1.1774217019209885, + "learning_rate": 1.5423052770469072e-11, + "loss": 0.2128266990184784, + "step": 7529 + }, + { + "epoch": 1.9996016465276856, + "grad_norm": 1.2535950646579268, + "learning_rate": 8.67546815941367e-12, + "loss": 0.23220527172088623, + "step": 7530 + }, + { + "epoch": 1.9998672155092285, + "grad_norm": 1.234107937433565, + "learning_rate": 3.8557639359115826e-12, + "loss": 0.22269386053085327, + "step": 7531 + }, + { + "epoch": 2.0, + "grad_norm": 2.3086652843747557, + "learning_rate": 9.63941030329707e-13, + "loss": 0.2053365409374237, + "step": 7532 + }, + { + "epoch": 2.0, + "step": 7532, + "total_flos": 5704003196682240.0, + "train_loss": 0.29768029879729163, + "train_runtime": 98000.2149, + "train_samples_per_second": 1.229, + "train_steps_per_second": 0.077 + } + ], + "logging_steps": 1, + "max_steps": 7532, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5704003196682240.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c0c92feb0b44b3362d1d98054f06b20cb57a4b7 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89857e5ce3d813c9a03825c43337cd93b1e4a595acca4834e9e4f1a47312d609 +size 6968 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..b2693cd341894222439eef94ae9f241a09d1941f Binary files /dev/null and b/training_loss.png differ